diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java index 7a80372f4ee911e03e2fc49215e42bc2c3f70980..35a9acd0cb58ae18dfc044b702b0fc6a2980f163 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java @@ -90,14 +90,28 @@ public abstract class IConv extends RBuiltinNode.Arg6 { CharsetDecoder decoder = toCharset.newDecoder(); if (RRuntime.isNA(sub)) { encoder.onUnmappableCharacter(CodingErrorAction.REPORT); + encoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); decoder.onMalformedInput(CodingErrorAction.REPORT); + } else if ("byte".equals(sub)) { + // TODO: special mode that inserts <hexcode> + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + decoder.onMalformedInput(CodingErrorAction.IGNORE); + } else if (sub.isEmpty()) { + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + decoder.onMalformedInput(CodingErrorAction.IGNORE); } else { - decoder.replaceWith(sub); + // ignore encoding errors + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + // TODO: support more than one character in "replacement" + decoder.replaceWith(sub.substring(0, 1)); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.onMalformedInput(CodingErrorAction.REPLACE); - encoder.replaceWith(sub.getBytes(toCharset)); - encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } int length = x.getLength(); String[] data = new String[length]; diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 19b3efcc60a0206ac10698ccb3193ee1115a7707..06d54805ff27dfbfbc043ccfef202f211626b0d3 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -28170,6 +28170,26 @@ attr(,"Rd_tag") [1] "UTF-8" [1] "²a²²" "b" +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII') +[1] NA + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII', sub='') +[1] "foo" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented# +#iconv('foo²²', 'UTF8', 'ASCII', sub='byte') +[1] "foo<c2><b2><c2><b2>" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII', sub='f') +[1] "fooffff" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented# +#iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo') +[1] "foofooooofooooofooooofooooo" + ##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# #{ .Internal(iconv("7", "latin1", "ASCII", 42, T, F)) } Error: invalid 'sub' argument diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java index 6d4e384ed366f5a300cee713da3147dca844cb1b..9bb79a5737fe67f7dd476bf9b6253da18a1fe47f 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java @@ -80,5 +80,10 @@ public class TestBuiltin_iconv extends TestBase { assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", 42, T, F)) }"); assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", character(), T, F)) }"); assertEval("Sys.setlocale('LC_CTYPE', 'C'); iconv(c('²a²²','b')); Sys.setlocale('LC_CTYPE', 'UTF-8'); iconv(c('²a²²','b'))"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII')"); + assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='byte')"); + assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo')"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='f')"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='')"); } }