From 8b138de89fc73c41f163c87382ee144fb7c8fa25 Mon Sep 17 00:00:00 2001 From: Lukas Stadler <lukas.stadler@oracle.com> Date: Sun, 15 Oct 2017 11:01:56 +0200 Subject: [PATCH] handle multi-char, empty and "byte" replacements in iconv --- .../truffle/r/nodes/builtin/base/IConv.java | 20 ++++++++++++++++--- .../truffle/r/test/ExpectedTestOutput.test | 20 +++++++++++++++++++ .../r/test/builtins/TestBuiltin_iconv.java | 5 +++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java index 7a80372f4e..35a9acd0cb 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java @@ -90,14 +90,28 @@ public abstract class IConv extends RBuiltinNode.Arg6 { CharsetDecoder decoder = toCharset.newDecoder(); if (RRuntime.isNA(sub)) { encoder.onUnmappableCharacter(CodingErrorAction.REPORT); + encoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); decoder.onMalformedInput(CodingErrorAction.REPORT); + } else if ("byte".equals(sub)) { + // TODO: special mode that inserts <hexcode> + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + decoder.onMalformedInput(CodingErrorAction.IGNORE); + } else if (sub.isEmpty()) { + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + decoder.onMalformedInput(CodingErrorAction.IGNORE); } else { - decoder.replaceWith(sub); + // ignore encoding errors + encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); + encoder.onMalformedInput(CodingErrorAction.IGNORE); + // TODO: support more than one character in "replacement" + decoder.replaceWith(sub.substring(0, 1)); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.onMalformedInput(CodingErrorAction.REPLACE); - encoder.replaceWith(sub.getBytes(toCharset)); - encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } int length = x.getLength(); String[] data = new String[length]; diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 19b3efcc60..06d54805ff 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -28170,6 +28170,26 @@ attr(,"Rd_tag") [1] "UTF-8" [1] "²a²²" "b" +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII') +[1] NA + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII', sub='') +[1] "foo" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented# +#iconv('foo²²', 'UTF8', 'ASCII', sub='byte') +[1] "foo<c2><b2><c2><b2>" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# +#iconv('foo²²', 'UTF8', 'ASCII', sub='f') +[1] "fooffff" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv#Ignored.Unimplemented# +#iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo') +[1] "foofooooofooooofooooofooooo" + ##com.oracle.truffle.r.test.builtins.TestBuiltin_iconv.testIconv# #{ .Internal(iconv("7", "latin1", "ASCII", 42, T, F)) } Error: invalid 'sub' argument diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java index 6d4e384ed3..9bb79a5737 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java @@ -80,5 +80,10 @@ public class TestBuiltin_iconv extends TestBase { assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", 42, T, F)) }"); assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", character(), T, F)) }"); assertEval("Sys.setlocale('LC_CTYPE', 'C'); iconv(c('²a²²','b')); Sys.setlocale('LC_CTYPE', 'UTF-8'); iconv(c('²a²²','b'))"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII')"); + assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='byte')"); + assertEval(Ignored.Unimplemented, "iconv('foo²²', 'UTF8', 'ASCII', sub='fooooo')"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='f')"); + assertEval("iconv('foo²²', 'UTF8', 'ASCII', sub='')"); } } -- GitLab