Skip to content
Snippets Groups Projects
Commit afa9699a authored by stepan's avatar stepan
Browse files

Treat CharSXP as byte array w.r.t. e.g. length

parent 0da13657
No related branches found
Tags 1.17.0
No related merge requests found
Showing
with 71 additions and 15 deletions
......@@ -71,10 +71,9 @@ public class CharSXPWrapperMR {
protected Object access(CharSXPWrapper receiver, Number indexNum) {
int index = indexNum.intValue();
String contents = receiver.getContents();
int len = contents.length();
int len = receiver.getLength();
if (prof1.profile(index < len)) {
return contents.charAt(index);
return receiver.getByteAt(index);
} else if (prof2.profile(index == len)) {
return 0;
} else {
......
......@@ -91,7 +91,7 @@ public final class MiscNodes {
@Specialization
protected int length(CharSXPWrapper obj) {
return obj.getContents().length();
return obj.getLength();
}
@Specialization
......
......@@ -22,6 +22,8 @@
*/
package com.oracle.truffle.r.runtime.data;
import java.nio.charset.StandardCharsets;
import com.oracle.truffle.r.runtime.RRuntime;
/**
......@@ -31,12 +33,16 @@ import com.oracle.truffle.r.runtime.RRuntime;
* FastR already uses {@code String} to denote a length-1 string vector, it cannot be used to
* represent a {@code CHARSXP}, so this class exists to do so.
*
* As opposed to Strings on the Java side, the native side "Strings" should be treated as array of
* bytes. {@link CharSXPWrapper} wraps the byte array, but does not add the '\0' at the end of it.
*
* N.B. Use limited to RFFI implementations.
*/
public final class CharSXPWrapper extends RObject implements RTruffleObject {
private static final CharSXPWrapper NA = new CharSXPWrapper(RRuntime.STRING_NA);
private String contents;
private byte[] bytes;
private CharSXPWrapper(String contents) {
this.contents = contents;
......@@ -52,8 +58,12 @@ public final class CharSXPWrapper extends RObject implements RTruffleObject {
return NativeDataAccess.getData(this, contents);
}
public byte getByteAt(int index) {
return NativeDataAccess.getDataAt(this, getBytes(), index);
}
public int getLength() {
return NativeDataAccess.getDataLength(this, contents);
return NativeDataAccess.getDataLength(this, getBytes());
}
@Override
......@@ -71,9 +81,17 @@ public final class CharSXPWrapper extends RObject implements RTruffleObject {
public long allocateNativeContents() {
try {
return NativeDataAccess.allocateNativeContents(this, contents);
return NativeDataAccess.allocateNativeContents(this, getBytes());
} finally {
contents = null;
bytes = null;
}
}
private byte[] getBytes() {
if (bytes == null && contents != null) {
bytes = contents.getBytes(StandardCharsets.UTF_8);
}
return bytes;
}
}
......@@ -147,9 +147,8 @@ public final class NativeDataAccess {
}
@TruffleBoundary
void allocateNative(String source) {
void allocateNativeString(byte[] bytes) {
assert dataAddress == 0;
byte[] bytes = source.getBytes(StandardCharsets.US_ASCII);
dataAddress = UnsafeAdapter.UNSAFE.allocateMemory(bytes.length + 1);
UnsafeAdapter.UNSAFE.copyMemory(bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, null, dataAddress, bytes.length);
UnsafeAdapter.UNSAFE.putByte(dataAddress + bytes.length, (byte) 0); // C strings
......@@ -570,9 +569,21 @@ public final class NativeDataAccess {
}
}
static int getDataLength(CharSXPWrapper vector, String data) {
static byte getDataAt(CharSXPWrapper vector, byte[] data, int index) {
if (noCharSXPNative.isValid() || data != null) {
return data.length();
return data[index];
} else {
NativeMirror mirror = (NativeMirror) vector.getNativeMirror();
long address = mirror.dataAddress;
assert address != 0;
assert index < mirror.length;
return UnsafeAdapter.UNSAFE.getByte(address + index);
}
}
static int getDataLength(CharSXPWrapper vector, byte[] data) {
if (noCharSXPNative.isValid() || data != null) {
return data.length;
} else {
NativeMirror mirror = (NativeMirror) vector.getNativeMirror();
long address = mirror.dataAddress;
......@@ -644,13 +655,13 @@ public final class NativeDataAccess {
return mirror.dataAddress;
}
static long allocateNativeContents(CharSXPWrapper vector, String contents) {
static long allocateNativeContents(CharSXPWrapper vector, byte[] data) {
NativeMirror mirror = (NativeMirror) vector.getNativeMirror();
assert mirror != null;
assert mirror.dataAddress == 0 ^ contents == null;
assert mirror.dataAddress == 0 ^ data == null;
if (mirror.dataAddress == 0) {
noCharSXPNative.invalidate();
mirror.allocateNative(contents);
mirror.allocateNativeString(data);
}
return mirror.dataAddress;
}
......
......@@ -207,4 +207,8 @@ rffi.RfEvalWithPromiseInPairList <- function() {
rffi.isNAString <- function(x) {
.Call('test_isNAString', x)
}
rffi.getBytes <- function(x) {
.Call('test_getBytes', x)
}
\ No newline at end of file
......@@ -83,6 +83,7 @@ static const R_CallMethodDef CallEntries[] = {
CALLDEF(test_ParseVector, 1),
CALLDEF(test_RfEvalWithPromiseInPairList, 0),
CALLDEF(test_isNAString, 1),
CALLDEF(test_getBytes, 1),
CALLDEF(test_setStringElt, 2),
{NULL, NULL, 0}
};
......
......@@ -370,6 +370,20 @@ SEXP test_isNAString(SEXP vec) {
}
}
SEXP test_getBytes(SEXP vec) {
char* bytes = R_CHAR(STRING_ELT(vec, 0));
SEXP result;
PROTECT(result = allocVector(RAWSXP, Rf_length(STRING_ELT(vec, 0))));
unsigned char* resData = RAW(result);
int i = 0;
while (*bytes != '\0') {
resData[i++] = (unsigned char) *bytes;
bytes++;
}
UNPROTECT(1);
return result;
}
// This function is expected to be called only with environment that has single
// promise value in the '...' variable and this is asserted inside this function.
// The return value is list with the promises' expression and environment.
......
......@@ -112,4 +112,6 @@ extern SEXP test_RfEvalWithPromiseInPairList(void);
extern SEXP test_isNAString(SEXP vec);
extern SEXP test_setStringElt(SEXP vec, SEXP elt);
\ No newline at end of file
extern SEXP test_setStringElt(SEXP vec, SEXP elt);
extern SEXP test_getBytes(SEXP vec);
\ No newline at end of file
......@@ -31,9 +31,16 @@ rffi.LENGTH(strVec)
rffi.char_length(strVec)
strVec <- rffi.setStringElt(c('hello'), as.character(NA))
stopifnot(anyNA(strVec))
stopifnot(rffi.isNAString(as.character(NA)))
# Encoding tests
rffi.getBytes('\u1F602\n')
# ignored: FastR does not support explicit encoding yet
# latinEncStr <- '\xFD\xDD\xD6\xF0\n'
# Encoding(latinEncStr) <- "latin1"
# rffi.getBytes(latinEncStr)
rffi.getBytes('hello ascii')
x <- list(1)
attr(x, 'myattr') <- 'hello';
attrs <- rffi.ATTRIB(x)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment