From afa9699ad2d59ab7770485e6b7a9b70237545eec Mon Sep 17 00:00:00 2001 From: stepan <stepan.sindelar@oracle.com> Date: Fri, 9 Feb 2018 11:55:10 +0100 Subject: [PATCH] Treat CharSXP as byte array w.r.t. e.g. length --- .../r/ffi/impl/interop/CharSXPWrapperMR.java | 5 ++-- .../truffle/r/ffi/impl/nodes/MiscNodes.java | 2 +- .../r/runtime/data/CharSXPWrapper.java | 22 ++++++++++++++-- .../r/runtime/data/NativeDataAccess.java | 25 +++++++++++++------ .../packages/testrffi/testrffi/R/testrffi.R | 4 +++ .../packages/testrffi/testrffi/src/init.c | 1 + .../packages/testrffi/testrffi/src/testrffi.c | 14 +++++++++++ .../packages/testrffi/testrffi/src/testrffi.h | 4 ++- .../testrffi/testrffi/tests/simpleTests.R | 9 ++++++- 9 files changed, 71 insertions(+), 15 deletions(-) diff --git a/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/interop/CharSXPWrapperMR.java b/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/interop/CharSXPWrapperMR.java index 5ba2936057..95c06bb5e6 100644 --- a/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/interop/CharSXPWrapperMR.java +++ b/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/interop/CharSXPWrapperMR.java @@ -71,10 +71,9 @@ public class CharSXPWrapperMR { protected Object access(CharSXPWrapper receiver, Number indexNum) { int index = indexNum.intValue(); - String contents = receiver.getContents(); - int len = contents.length(); + int len = receiver.getLength(); if (prof1.profile(index < len)) { - return contents.charAt(index); + return receiver.getByteAt(index); } else if (prof2.profile(index == len)) { return 0; } else { diff --git a/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/nodes/MiscNodes.java b/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/nodes/MiscNodes.java index c42616fccf..25e4a650a1 100644 --- a/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/nodes/MiscNodes.java +++ b/com.oracle.truffle.r.ffi.impl/src/com/oracle/truffle/r/ffi/impl/nodes/MiscNodes.java @@ -91,7 +91,7 @@ public final class MiscNodes { @Specialization protected int length(CharSXPWrapper obj) { - return obj.getContents().length(); + return obj.getLength(); } @Specialization diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/CharSXPWrapper.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/CharSXPWrapper.java index b7291cb6a3..d1d4cab104 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/CharSXPWrapper.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/CharSXPWrapper.java @@ -22,6 +22,8 @@ */ package com.oracle.truffle.r.runtime.data; +import java.nio.charset.StandardCharsets; + import com.oracle.truffle.r.runtime.RRuntime; /** @@ -31,12 +33,16 @@ import com.oracle.truffle.r.runtime.RRuntime; * FastR already uses {@code String} to denote a length-1 string vector, it cannot be used to * represent a {@code CHARSXP}, so this class exists to do so. * + * As opposed to Strings on the Java side, the native side "Strings" should be treated as array of + * bytes. {@link CharSXPWrapper} wraps the byte array, but does not add the '\0' at the end of it. + * * N.B. Use limited to RFFI implementations. */ public final class CharSXPWrapper extends RObject implements RTruffleObject { private static final CharSXPWrapper NA = new CharSXPWrapper(RRuntime.STRING_NA); private String contents; + private byte[] bytes; private CharSXPWrapper(String contents) { this.contents = contents; @@ -52,8 +58,12 @@ public final class CharSXPWrapper extends RObject implements RTruffleObject { return NativeDataAccess.getData(this, contents); } + public byte getByteAt(int index) { + return NativeDataAccess.getDataAt(this, getBytes(), index); + } + public int getLength() { - return NativeDataAccess.getDataLength(this, contents); + return NativeDataAccess.getDataLength(this, getBytes()); } @Override @@ -71,9 +81,17 @@ public final class CharSXPWrapper extends RObject implements RTruffleObject { public long allocateNativeContents() { try { - return NativeDataAccess.allocateNativeContents(this, contents); + return NativeDataAccess.allocateNativeContents(this, getBytes()); } finally { contents = null; + bytes = null; + } + } + + private byte[] getBytes() { + if (bytes == null && contents != null) { + bytes = contents.getBytes(StandardCharsets.UTF_8); } + return bytes; } } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/NativeDataAccess.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/NativeDataAccess.java index 0f60201c90..a1e49b53ab 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/NativeDataAccess.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/NativeDataAccess.java @@ -147,9 +147,8 @@ public final class NativeDataAccess { } @TruffleBoundary - void allocateNative(String source) { + void allocateNativeString(byte[] bytes) { assert dataAddress == 0; - byte[] bytes = source.getBytes(StandardCharsets.US_ASCII); dataAddress = UnsafeAdapter.UNSAFE.allocateMemory(bytes.length + 1); UnsafeAdapter.UNSAFE.copyMemory(bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, null, dataAddress, bytes.length); UnsafeAdapter.UNSAFE.putByte(dataAddress + bytes.length, (byte) 0); // C strings @@ -570,9 +569,21 @@ public final class NativeDataAccess { } } - static int getDataLength(CharSXPWrapper vector, String data) { + static byte getDataAt(CharSXPWrapper vector, byte[] data, int index) { if (noCharSXPNative.isValid() || data != null) { - return data.length(); + return data[index]; + } else { + NativeMirror mirror = (NativeMirror) vector.getNativeMirror(); + long address = mirror.dataAddress; + assert address != 0; + assert index < mirror.length; + return UnsafeAdapter.UNSAFE.getByte(address + index); + } + } + + static int getDataLength(CharSXPWrapper vector, byte[] data) { + if (noCharSXPNative.isValid() || data != null) { + return data.length; } else { NativeMirror mirror = (NativeMirror) vector.getNativeMirror(); long address = mirror.dataAddress; @@ -644,13 +655,13 @@ public final class NativeDataAccess { return mirror.dataAddress; } - static long allocateNativeContents(CharSXPWrapper vector, String contents) { + static long allocateNativeContents(CharSXPWrapper vector, byte[] data) { NativeMirror mirror = (NativeMirror) vector.getNativeMirror(); assert mirror != null; - assert mirror.dataAddress == 0 ^ contents == null; + assert mirror.dataAddress == 0 ^ data == null; if (mirror.dataAddress == 0) { noCharSXPNative.invalidate(); - mirror.allocateNative(contents); + mirror.allocateNativeString(data); } return mirror.dataAddress; } diff --git a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/R/testrffi.R b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/R/testrffi.R index 2eef27c513..4f7c1c6af7 100644 --- a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/R/testrffi.R +++ b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/R/testrffi.R @@ -207,4 +207,8 @@ rffi.RfEvalWithPromiseInPairList <- function() { rffi.isNAString <- function(x) { .Call('test_isNAString', x) +} + +rffi.getBytes <- function(x) { + .Call('test_getBytes', x) } \ No newline at end of file diff --git a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/init.c b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/init.c index d73d58a523..5fe277273e 100644 --- a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/init.c +++ b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/init.c @@ -83,6 +83,7 @@ static const R_CallMethodDef CallEntries[] = { CALLDEF(test_ParseVector, 1), CALLDEF(test_RfEvalWithPromiseInPairList, 0), CALLDEF(test_isNAString, 1), + CALLDEF(test_getBytes, 1), CALLDEF(test_setStringElt, 2), {NULL, NULL, 0} }; diff --git a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.c b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.c index d4be694519..e0d8cc1c03 100644 --- a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.c +++ b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.c @@ -370,6 +370,20 @@ SEXP test_isNAString(SEXP vec) { } } +SEXP test_getBytes(SEXP vec) { + char* bytes = R_CHAR(STRING_ELT(vec, 0)); + SEXP result; + PROTECT(result = allocVector(RAWSXP, Rf_length(STRING_ELT(vec, 0)))); + unsigned char* resData = RAW(result); + int i = 0; + while (*bytes != '\0') { + resData[i++] = (unsigned char) *bytes; + bytes++; + } + UNPROTECT(1); + return result; +} + // This function is expected to be called only with environment that has single // promise value in the '...' variable and this is asserted inside this function. // The return value is list with the promises' expression and environment. diff --git a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.h b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.h index 9dddb807ee..44f6cd6d6a 100644 --- a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.h +++ b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/src/testrffi.h @@ -112,4 +112,6 @@ extern SEXP test_RfEvalWithPromiseInPairList(void); extern SEXP test_isNAString(SEXP vec); -extern SEXP test_setStringElt(SEXP vec, SEXP elt); \ No newline at end of file +extern SEXP test_setStringElt(SEXP vec, SEXP elt); + +extern SEXP test_getBytes(SEXP vec); \ No newline at end of file diff --git a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/tests/simpleTests.R b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/tests/simpleTests.R index 03204306df..d2df53a2af 100644 --- a/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/tests/simpleTests.R +++ b/com.oracle.truffle.r.test.native/packages/testrffi/testrffi/tests/simpleTests.R @@ -31,9 +31,16 @@ rffi.LENGTH(strVec) rffi.char_length(strVec) strVec <- rffi.setStringElt(c('hello'), as.character(NA)) stopifnot(anyNA(strVec)) - stopifnot(rffi.isNAString(as.character(NA))) +# Encoding tests +rffi.getBytes('\u1F602\n') +# ignored: FastR does not support explicit encoding yet +# latinEncStr <- '\xFD\xDD\xD6\xF0\n' +# Encoding(latinEncStr) <- "latin1" +# rffi.getBytes(latinEncStr) +rffi.getBytes('hello ascii') + x <- list(1) attr(x, 'myattr') <- 'hello'; attrs <- rffi.ATTRIB(x) -- GitLab