diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Capabilities.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Capabilities.java index 015ffea85340b0fbdc899feed463a5a35a43a9dd..acb1b4cd0453de8efff4bf35ef022c694c8327e4 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Capabilities.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Capabilities.java @@ -48,7 +48,7 @@ public abstract class Capabilities extends RBuiltinNode.Arg0 { libxml(false, null), fifo(true, null), cledit(false, null), - iconv(false, null), + iconv(true, null), nls(false, "NLS"), profmem(false, null), cairo(false, null), diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java index c4a74c677561cd9277671d5b3fb91cd98eeb0034..4c2723335e7a3b107a808e7298a1ed1638dcd821 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IConv.java @@ -22,17 +22,30 @@ */ package com.oracle.truffle.r.nodes.builtin.base; -import com.oracle.truffle.api.dsl.Cached; import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.size; import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.stringValue; +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.toBoolean; import static com.oracle.truffle.r.runtime.builtins.RBehavior.PURE; import static com.oracle.truffle.r.runtime.builtins.RBuiltinKind.INTERNAL; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; +import java.nio.charset.UnsupportedCharsetException; + +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.profiles.ConditionProfile; import com.oracle.truffle.r.nodes.attributes.UnaryCopyAttributesNode; import com.oracle.truffle.r.nodes.builtin.RBuiltinNode; import com.oracle.truffle.r.runtime.RError; +import com.oracle.truffle.r.runtime.RError.Message; +import com.oracle.truffle.r.runtime.RInternalError; import com.oracle.truffle.r.runtime.RRuntime; import com.oracle.truffle.r.runtime.builtins.RBuiltin; import com.oracle.truffle.r.runtime.data.RDataFactory; @@ -46,25 +59,75 @@ public abstract class IConv extends RBuiltinNode.Arg6 { Casts casts = new Casts(IConv.class); casts.arg("x").mustBe(stringValue(), RError.Message.NOT_CHARACTER_VECTOR, "x"); // with default error message, NO_CALLER does not work - casts.arg("from").defaultError(RError.Message.INVALID_ARGUMENT, "from").mustBe(stringValue()).asStringVector().mustBe(size(1)); - casts.arg("to").defaultError(RError.Message.INVALID_ARGUMENT, "to").mustBe(stringValue()).asStringVector().mustBe(size(1)); - casts.arg("sub").defaultError(RError.Message.INVALID_ARGUMENT, "sub").mustBe(stringValue()).asStringVector().mustBe(size(1)); - casts.arg("mark").asLogicalVector().findFirst(RRuntime.LOGICAL_FALSE); - casts.arg("toRaw").asLogicalVector().findFirst(RRuntime.LOGICAL_FALSE); + casts.arg("from").defaultError(RError.Message.INVALID_ARGUMENT, "from").mustBe(stringValue()).asStringVector().mustBe(size(1)).findFirst(); + casts.arg("to").defaultError(RError.Message.INVALID_ARGUMENT, "to").mustBe(stringValue()).asStringVector().mustBe(size(1)).findFirst(); + casts.arg("sub").defaultError(RError.Message.INVALID_ARGUMENT, "sub").mustBe(stringValue()).asStringVector().mustBe(size(1)).findFirst(); + casts.arg("mark").asLogicalVector().findFirst(RRuntime.LOGICAL_FALSE).map(toBoolean()); + casts.arg("toRaw").asLogicalVector().findFirst(RRuntime.LOGICAL_FALSE).map(toBoolean()); } - @SuppressWarnings("unused") @Specialization - protected RStringVector doIConv(RAbstractStringVector x, Object from, Object to, Object sub, byte mark, byte toRaw, - @Cached("create()") UnaryCopyAttributesNode copyAttributesNode, - @Cached("createBinaryProfile()") ConditionProfile xLengthProfile) { - // TODO implement - RStringVector xv = x.materialize(); - RStringVector result = RDataFactory.createStringVector(xv.getDataCopy(), RDataFactory.COMPLETE_VECTOR); - if (xLengthProfile.profile(result.getLength() == x.getLength())) { + @TruffleBoundary + protected RAbstractStringVector doIConv(RAbstractStringVector x, String from, String to, String sub, @SuppressWarnings("unused") boolean mark, boolean toRaw, + @Cached("create()") UnaryCopyAttributesNode copyAttributesNode) { + + if (toRaw) { + throw RInternalError.unimplemented("iconv with toRaw=TRUE"); + } + + Charset fromCharset = getCharset(from, from, to); + Charset toCharset = getCharset(to, from, to); + boolean complete = x.isComplete(); + if (fromCharset == StandardCharsets.UTF_8 && toCharset == StandardCharsets.UTF_8) { + // this conversion cannot change anything + return x; + } else { + // simulate the results of charset conversion + CharsetEncoder encoder = fromCharset.newEncoder(); + CharsetDecoder decoder = toCharset.newDecoder(); + if (RRuntime.isNA(sub)) { + encoder.onUnmappableCharacter(CodingErrorAction.REPORT); + decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + decoder.onMalformedInput(CodingErrorAction.REPORT); + } else { + decoder.replaceWith(sub); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + decoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.replaceWith(sub.getBytes(toCharset)); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } + int length = x.getLength(); + String[] data = new String[length]; + for (int i = 0; i < length; i++) { + String value = x.getDataAt(i); + if (!RRuntime.isNA(value)) { + try { + data[i] = decoder.decode(encoder.encode(CharBuffer.wrap(value))).toString(); + } catch (CharacterCodingException e) { + complete = false; + data[i] = RRuntime.STRING_NA; + } + } + } + RStringVector result = RDataFactory.createStringVector(data, complete); copyAttributesNode.execute(result, x); + return result; + } + } + + private Charset getCharset(String name, String from, String to) { + String toCharsetName = "".equals(name) ? LocaleFunctions.LC.CTYPE.getValue() : name; + Charset toCharset; + if ("C".equals(toCharsetName)) { + toCharset = StandardCharsets.US_ASCII; + } else { + try { + toCharset = Charset.forName(toCharsetName); + } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { + throw error(Message.UNSUPPORTED_ENCODING_CONVERSION, from, to); + } } - return result; + return toCharset; } } diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java index b06ef8da33a6fdabcc15fc22b44759eca66e6e7d..6d4e384ed366f5a300cee713da3147dca844cb1b 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_iconv.java @@ -79,5 +79,6 @@ public class TestBuiltin_iconv extends TestBase { assertEval("{ .Internal(iconv(\"7\", \"latin1\", character(), \"42\", T, F)) }"); assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", 42, T, F)) }"); assertEval("{ .Internal(iconv(\"7\", \"latin1\", \"ASCII\", character(), T, F)) }"); + assertEval("Sys.setlocale('LC_CTYPE', 'C'); iconv(c('²a²²','b')); Sys.setlocale('LC_CTYPE', 'UTF-8'); iconv(c('²a²²','b'))"); } }