From 8b138de89fc73c41f163c87382ee144fb7c8fa25 Mon Sep 17 00:00:00 2001
From: Lukas Stadler <lukas.stadler@oracle.com>
Date: Sun, 15 Oct 2017 11:01:56 +0200
Subject: [PATCH] handle multi-char, empty and "byte" replacements in iconv

---
 .../truffle/r/nodes/builtin/base/IConv.java   | 20 ++++++++++++++++---
 .../truffle/r/test/ExpectedTestOutput.test    | 20 +++++++++++++++++++
 .../r/test/builtins/TestBuiltin_iconv.java    |  5 +++++
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java
index 7a80372f4e..35a9acd0cb 100644
--- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java
@@ -90,14 +90,28 @@ public abstract class IConv extends RBuiltinNode.Arg6 {
             CharsetDecoder decoder = toCharset.newDecoder();
             if (RRuntime.isNA(sub)) {
                 encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+                encoder.onMalformedInput(CodingErrorAction.REPORT);
                 decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
                 decoder.onMalformedInput(CodingErrorAction.REPORT);
+            } else if ("byte".equals(sub)) {
+                // TODO: special mode that inserts <hexcode>
+                encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
+                encoder.onMalformedInput(CodingErrorAction.IGNORE);
+                decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
+                decoder.onMalformedInput(CodingErrorAction.IGNORE);
+            } else if (sub.isEmpty()) {
+                encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
+                encoder.onMalformedInput(CodingErrorAction.IGNORE);
+                decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
+                decoder.onMalformedInput(CodingErrorAction.IGNORE);
             } else {
-                decoder.replaceWith(sub);
+                // ignore encoding errors
+                encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
+                encoder.onMalformedInput(CodingErrorAction.IGNORE);
+                // TODO: support more than one character in "replacement"
+                decoder.replaceWith(sub.substring(0, 1));
                 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
                 decoder.onMalformedInput(CodingErrorAction.REPLACE);
-                encoder.replaceWith(sub.getBytes(toCharset));
-                encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
             }
             int length = x.getLength();
             String[] data = new String[length];
diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
index 19b3efcc60..06d54805ff 100644
--- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
+++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
@@ -28170,6 +28170,26 @@ attr(,"Rd_tag")
 [1] "UTF-8"
 [1] "²a²²" "b"
 
+##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#
+#iconv('foo²²', 'UTF8', 'ASCII')
+[1] NA
+
+##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#
+#iconv('foo²²', 'UTF8', 'ASCII', sub='')
+[1] "foo"
+
+##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented#
+#iconv('foo²²', 'UTF8', 'ASCII', sub='byte')
+[1] "foo<c2><b2><c2><b2>"
+
+##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#
+#iconv('foo²²', 'UTF8', 'ASCII', sub='f')
+[1] "fooffff"
+
+##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented#
+#iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo')
+[1] "foofooooofooooofooooofooooo"
+
 ##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#
 #{ .Internal(iconv("7", "latin1", "ASCII", 42, T, F)) }
 Error: invalid 'sub' argument
diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java
index 6d4e384ed3..9bb79a5737 100644
--- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java
+++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java
@@ -80,5 +80,10 @@ public class TestBuiltin_iconv extends TestBase {
         assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", 42, T, F)) }");
         assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", character(), T, F)) }");
         assertEval("Sys.setlocale('LC_CTYPE', 'C'); iconv(c('²a²²','b')); Sys.setlocale('LC_CTYPE', 'UTF-8'); iconv(c('²a²²','b'))");
+        assertEval("iconv('foo²²', 'UTF8', 'ASCII')");
+        assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='byte')");
+        assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo')");
+        assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='f')");
+        assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='')");
     }
 }
-- 
GitLab