diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java index 7af620d538d851bee1c8a1feaf42165f1fc28326..baacddbab3725f335f394f2db72375a66038040c 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/BasePackage.java @@ -430,6 +430,7 @@ public class BasePackage extends RBuiltinPackage { add(Interactive.class, InteractiveNodeGen::create); add(Internal.class, InternalNodeGen::create); add(IntToBits.class, IntToBitsNodeGen::create); + add(IntToUtf8.class, IntToUtf8NodeGen::create); add(Invisible.class, InvisibleNodeGen::create); add(IsATTY.class, IsATTYNodeGen::create); add(IsFiniteFunctions.IsFinite.class, IsFiniteFunctionsFactory.IsFiniteNodeGen::create); diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IntToUtf8.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IntToUtf8.java new file mode 100644 index 0000000000000000000000000000000000000000..701d18fb0ddb15d9b19b868a399503934125b904 --- /dev/null +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/IntToUtf8.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.r.nodes.builtin.base; + +import static com.oracle.truffle.r.nodes.builtin.CastBuilder.Predef.toBoolean; +import static com.oracle.truffle.r.runtime.builtins.RBehavior.PURE; +import static com.oracle.truffle.r.runtime.builtins.RBuiltinKind.INTERNAL; + +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.r.nodes.builtin.CastBuilder; +import com.oracle.truffle.r.nodes.builtin.RBuiltinNode; +import com.oracle.truffle.r.runtime.RError; +import com.oracle.truffle.r.runtime.RError.Message; +import com.oracle.truffle.r.runtime.RRuntime; +import com.oracle.truffle.r.runtime.builtins.RBuiltin; +import com.oracle.truffle.r.runtime.data.RDataFactory; +import com.oracle.truffle.r.runtime.data.RNull; +import com.oracle.truffle.r.runtime.data.model.RAbstractIntVector; +import com.oracle.truffle.r.runtime.data.model.RAbstractStringVector; +import com.oracle.truffle.r.runtime.ops.na.NACheck; + +@RBuiltin(name = "intToUtf8", kind = INTERNAL, parameterNames = {"x", "multiple"}, behavior = PURE) +public abstract class IntToUtf8 extends RBuiltinNode { + + @Override + protected void createCasts(CastBuilder casts) { + casts.arg("x").allowNull().asIntegerVector(); + casts.arg("multiple").mustNotBeNull().asLogicalVector().findFirst().map(toBoolean()); + } + + @Specialization + protected String intToBits(@SuppressWarnings("unused") RNull x, @SuppressWarnings("unused") boolean multiple) { + return ""; + } + + @Specialization(guards = "multiple") + protected RAbstractStringVector intToBitsMultiple(RAbstractIntVector x, @SuppressWarnings("unused") boolean multiple, + @Cached("create()") NACheck na, + @Cached("createBinaryProfile()") ConditionProfile zeroProfile) { + + String[] result = new String[x.getLength()]; + na.enable(x); + for (int j = 0; j < x.getLength(); j++) { + int temp = x.getDataAt(j); + if (na.check(temp)) { + result[j] = RRuntime.STRING_NA; + } else if (zeroProfile.profile(temp == 0)) { + result[j] = ""; + } else { + try { + result[j] = new String(new int[]{temp}, 0, 1); + } catch (IllegalArgumentException e) { + throw RError.error(RError.SHOW_CALLER, Message.GENERIC, "illegal unicode code point"); + } + } + } + return RDataFactory.createStringVector(result, na.neverSeenNA()); + } + + @Specialization(guards = "!multiple") + protected String intToBits(RAbstractIntVector x, @SuppressWarnings("unused") boolean multiple, + @Cached("create()") NACheck na, + @Cached("createBinaryProfile()") ConditionProfile zeroProfile) { + + int[] result = new int[x.getLength()]; + na.enable(x); + int pos = 0; + for (int j = 0; j < x.getLength(); j++) { + int temp = x.getDataAt(j); + if (na.check(temp)) { + return RRuntime.STRING_NA; + } else if (zeroProfile.profile(temp != 0)) { + result[pos++] = temp; + } + } + try { + return new String(result, 0, pos); + } catch (IllegalArgumentException e) { + throw RError.error(RError.SHOW_CALLER, Message.GENERIC, "illegal unicode code point"); + } + } +} diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java index 842ef249347c51061cea9fbad90832d5a0fa8155..6ef2b515dd3a750b615ce9328c6884b38ab79cf5 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java @@ -691,11 +691,15 @@ public class RRuntime { break; default: if (codepoint < 32 || codepoint == 0x7f) { - str.append("\\").append(codepoint / 64).append((codepoint / 8) % 8).append(codepoint % 8); + str.append("\\").append(codepoint >>> 6).append((codepoint >>> 3) & 0x7).append(codepoint & 0x7); } else if (encodeNonASCII && codepoint > 0x7f && codepoint <= 0xff) { str.append("\\x" + Integer.toHexString(codepoint)); - // } else if (codepoint > 0x7f && codepoint <= 0xff) { - // str.append("\\u" + Integer.toHexString(codepoint)); + } else if (codepoint > 64967) { // determined by experimentation + if (codepoint < 0x10000) { + str.append("\\u").append(String.format("%04x", codepoint)); + } else { + str.append("\\U").append(String.format("%08x", codepoint)); + } } else { str.appendCodePoint(codepoint); } diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 24dc084fd56b15e6cc5d45c6861106068aa5bbcb..5b0d957fc0ad13689c8b25b38808bd3ded89ef5a 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -22526,6 +22526,87 @@ raw(0) #argv <- list(NULL); .Internal(intToBits(argv[[1]])) raw(0) +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(-100) +Error in intToUtf8(-100) : embedded nul in string: '\0\0\0\0\0\0\0' + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(0) +[1] "" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(1) +[1] "\001" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(1:100) +[1] "\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(1:100, FALSE) +[1] "\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(1:100, TRUE) + [1] "\001" "\002" "\003" "\004" "\005" "\006" "\a" "\b" "\t" "\n" + [11] "\v" "\f" "\r" "\016" "\017" "\020" "\021" "\022" "\023" "\024" + [21] "\025" "\026" "\027" "\030" "\031" "\032" "\033" "\034" "\035" "\036" + [31] "\037" " " "!" "\"" "#" "$" "%" "&" "'" "(" + [41] ")" "*" "+" "," "-" "." "/" "0" "1" "2" + [51] "3" "4" "5" "6" "7" "8" "9" ":" ";" "<" + [61] "=" ">" "?" "@" "A" "B" "C" "D" "E" "F" + [71] "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" + [81] "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" + [91] "[" "\\" "]" "^" "_" "`" "a" "b" "c" "d" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(2000) +[1] "\u07d0" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(200000) +[1] "\U00030d40" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(200L) +[1] "È" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(32) +[1] " " + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(55) +[1] "7" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(55.5) +[1] "7" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(65535) +[1] "\uffff" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(65536) +[1] "\U00010000" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(c(100,101,0,102)) +[1] "def" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#intToUtf8(c(100,101,0,102), TRUE) +[1] "d" "e" "" "f" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#nchar(intToUtf8(c(100,101,0,102))) +[1] 3 + +##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf8 +#nchar(intToUtf8(c(100,101,0,102), TRUE)) +[1] 1 1 0 1 + ##com.oracle.truffle.r.test.builtins.TestBuiltin_intToUtf8.testintToUtf81 #argv <- list(NULL, FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]])) [1] "" diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_intToUtf8.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_intToUtf8.java index 47e5402f3bed36e7948fa2f6f18b8f4f7a9301ba..a3d05c4a0fceb8a7630e1ba6d1c822edc83ade56 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_intToUtf8.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_intToUtf8.java @@ -19,26 +19,49 @@ public class TestBuiltin_intToUtf8 extends TestBase { @Test public void testintToUtf81() { - assertEval(Ignored.Unknown, "argv <- list(NULL, FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); + assertEval("argv <- list(NULL, FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); } @Test public void testintToUtf82() { - assertEval(Ignored.Unknown, "argv <- list(list(), FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); + assertEval("argv <- list(list(), FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); } @Test public void testintToUtf83() { - assertEval(Ignored.Unknown, "argv <- list(FALSE, FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); + assertEval("argv <- list(FALSE, FALSE); .Internal(intToUtf8(argv[[1]], argv[[2]]))"); } @Test public void testintToUtf85() { - assertEval(Ignored.Unknown, "argv <- structure(list(x = NA_integer_, multiple = TRUE), .Names = c('x', 'multiple'));do.call('intToUtf8', argv)"); + assertEval("argv <- structure(list(x = NA_integer_, multiple = TRUE), .Names = c('x', 'multiple'));do.call('intToUtf8', argv)"); } @Test public void testintToUtf86() { - assertEval(Ignored.Unknown, "argv <- structure(list(x = NA_integer_), .Names = 'x');do.call('intToUtf8', argv)"); + assertEval("argv <- structure(list(x = NA_integer_), .Names = 'x');do.call('intToUtf8', argv)"); + } + + @Test + public void testintToUtf8() { + assertEval("intToUtf8(0)"); + assertEval(Output.IgnoreErrorMessage, "intToUtf8(-100)"); + assertEval("intToUtf8(1)"); + assertEval("intToUtf8(c(100,101,0,102))"); + assertEval("intToUtf8(c(100,101,0,102), TRUE)"); + assertEval("nchar(intToUtf8(c(100,101,0,102)))"); + assertEval("nchar(intToUtf8(c(100,101,0,102), TRUE))"); + assertEval("intToUtf8(32)"); + assertEval("intToUtf8(55)"); + assertEval("intToUtf8(55.5)"); + assertEval("intToUtf8(200L)"); + // it's not clear why GNUR does not print these characters + assertEval(Ignored.ReferenceError, "intToUtf8(2000)"); + assertEval("intToUtf8(65535)"); + assertEval("intToUtf8(65536)"); + assertEval("intToUtf8(200000)"); + assertEval("intToUtf8(1:100)"); + assertEval("intToUtf8(1:100, FALSE)"); + assertEval("intToUtf8(1:100, TRUE)"); } }