From 432314a8170498fb44a80a8cdae48d1db1a67837 Mon Sep 17 00:00:00 2001 From: Adam Welc <adam.welc@oracle.com> Date: Mon, 6 Oct 2014 18:33:22 -0700 Subject: [PATCH] Partial implementation of the "scan" function. --- .../builtin/base/ConnectionFunctions.java | 7 +- .../truffle/r/nodes/builtin/base/R/scan.R | 56 +++ .../truffle/r/nodes/builtin/base/Scan.java | 349 ++++++++++++++++++ .../com/oracle/truffle/r/runtime/RError.java | 4 + .../oracle/truffle/r/runtime/RRuntime.java | 9 + .../com/oracle/truffle/r/runtime/Utils.java | 4 + .../r/runtime/data/RComplexVector.java | 6 + .../r/runtime/data/RDoubleSequence.java | 7 +- .../truffle/r/runtime/data/RDoubleVector.java | 6 + .../truffle/r/runtime/data/RIntSequence.java | 8 +- .../truffle/r/runtime/data/RIntVector.java | 6 + .../oracle/truffle/r/runtime/data/RList.java | 6 + .../r/runtime/data/RLogicalVector.java | 6 + .../truffle/r/runtime/data/RRawVector.java | 6 + .../truffle/r/runtime/data/RStringVector.java | 6 + .../truffle/r/runtime/data/RVector.java | 14 +- .../data/closures/RToVectorClosure.java | 7 +- .../r/runtime/data/model/RAbstractVector.java | 4 +- mx.fastr/copyrights/gnu_r_scan.copyright.star | 11 + .../gnu_r_scan.copyright.star.regex | 1 + mx.fastr/copyrights/overrides | 1 + 21 files changed, 516 insertions(+), 8 deletions(-) create mode 100644 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R create mode 100644 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java create mode 100644 mx.fastr/copyrights/gnu_r_scan.copyright.star create mode 100644 mx.fastr/copyrights/gnu_r_scan.copyright.star.regex diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java index 64de1c38f6..403f1b1229 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java @@ -101,8 +101,11 @@ public abstract class ConnectionFunctions { break; } } - String[] result = new String[lines.size()]; - lines.toArray(result); + String[] result = null; + if (lines.size() > 0) { + result = new String[lines.size()]; + lines.toArray(result); + } return result; } diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R new file mode 100644 index 0000000000..620c6f3074 --- /dev/null +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R @@ -0,0 +1,56 @@ +# File src/library/base/R/scan.R +# Part of the R package, http://www.R-project.org +# +# Copyright (C) 1995-2014 The R Core Team +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# A copy of the GNU General Public License is available at +# http://www.r-project.org/Licenses/ + +scan <- +function(file = "", what = double(), nmax = -1L, n = -1L, sep = "", + quote = if(identical(sep, "\n")) "" else "'\"", + dec = ".", skip = 0L, nlines = 0L, + na.strings = "NA", flush = FALSE, fill = FALSE, + strip.white = FALSE, quiet = FALSE, blank.lines.skip = TRUE, + multi.line = TRUE, comment.char = "", allowEscapes = FALSE, + fileEncoding = "", encoding = "unknown", text, skipNul = FALSE) +{ + na.strings <- as.character(na.strings)# allow it to be NULL + if(!missing(n)) { + if(missing(nmax)) + nmax <- n / pmax(length(what), 1L) + else + stop("either specify 'nmax' or 'n', but not both.") + } + if (missing(file) && !missing(text)) { + file <- textConnection(text, encoding = "UTF-8") + encoding <- "UTF-8" + on.exit(close(file)) + } + + if(is.character(file)) + if(file == "") file <- stdin() + else { + file <- if(nzchar(fileEncoding)) + file(file, "r", encoding = fileEncoding) else file(file, "r") + # TODO: on.exit does not work properly (connection is currently closed in the internal implementation of scan) +# on.exit(close(file)) + } + # TODO: uncomment once RConnection is more faithful to how connections are implemented in GNU R +# if(!inherits(file, "connection")) +# stop("'file' must be a character string or connection") + .Internal(scan(file, what, nmax, sep, dec, quote, skip, nlines, + na.strings, flush, fill, strip.white, quiet, + blank.lines.skip, multi.line, comment.char, + allowEscapes, encoding, skipNul)) +} diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java new file mode 100644 index 0000000000..a95400fdf7 --- /dev/null +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java @@ -0,0 +1,349 @@ +/* + * This material is distributed under the GNU General Public License + * Version 2. You may review the terms of this license at + * http://www.gnu.org/licenses/gpl-2.0.html + * + * Copyright (c) 1995, 1996, Robert Gentleman and Ross Ihaka + * Copyright (c) 1998-2013, The R Core Team + * Copyright (c) 2014, 2014, Oracle and/or its affiliates + * + * All rights reserved. + */ + +package com.oracle.truffle.r.nodes.builtin.base; + +import static com.oracle.truffle.r.runtime.RBuiltinKind.*; + +import java.io.*; + +import com.oracle.truffle.api.*; +import com.oracle.truffle.api.dsl.*; +import com.oracle.truffle.api.frame.*; +import com.oracle.truffle.r.nodes.*; +import com.oracle.truffle.r.nodes.access.*; +import com.oracle.truffle.r.nodes.builtin.*; +import com.oracle.truffle.r.nodes.unary.*; +import com.oracle.truffle.r.runtime.*; +import com.oracle.truffle.r.runtime.data.*; +import com.oracle.truffle.r.runtime.data.model.*; +import com.oracle.truffle.r.runtime.ops.na.*; + +@SuppressWarnings("unused") +@RBuiltin(name = "scan", kind = INTERNAL, parameterNames = {"file", "what", "nmax", "sep", "dec", "quote", "skip", "nlines", "na.strings", "flush", "fill", "strip.white", "quiet", "blank.lines.skip", + "multi.line", "comment.char", "allowEscapes", "encoding", "skipNull"}) +public abstract class Scan extends RBuiltinNode { + + private static final int SCAN_BLOCKSIZE = 1000; + private static final int NO_COMCHAR = 100000; /* won't occur even in Unicode */ + + private final NACheck naCheck = new NACheck(); + + private static class LocalData { + RAbstractStringVector naStrings = null; + boolean quiet = false; + String sepchar = null; + char decchar = '.'; + String quoteset = null; + int comchar = NO_COMCHAR; + // connection-related (currently not supported) + // int ttyflag = 0; + RConnection con = null; + // connection-related (currently not supported) + // boolean wasopen = false; + boolean escapes = false; + int save = 0; + boolean isLatin1 = false; + boolean isUTF8 = false; + boolean atStart = false; + boolean embedWarn = false; + boolean skipNull = false; + } + + @CreateCast({"arguments"}) + public RNode[] createCastValue(RNode[] children) { + RNode file = children[0]; + RNode what = CastToVectorNodeFactory.create(children[1], false, false, false, false); + RNode nmax = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[2], false, false, false), false, false, false, false); + RNode sep = CastToVectorNodeFactory.create(children[3], false, false, false, false); + RNode dec = CastToVectorNodeFactory.create(children[4], false, false, false, false); + RNode quotes = CastToVectorNodeFactory.create(children[5], false, false, false, false); + RNode nskip = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[6], false, false, false), false, false, false, false); + RNode nlines = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[7], false, false, false), false, false, false, false); + RNode naStrings = CastToVectorNodeFactory.create(children[8], false, false, false, false); + RNode flush = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[9], false, false, false), false, false, false, false); + RNode fill = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[10], false, false, false), false, false, false, false); + RNode stripWhite = CastToVectorNodeFactory.create(children[11], false, false, false, false); + RNode quiet = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[12], false, false, false), false, false, false, false); + RNode blSkip = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[13], false, false, false), false, false, false, false); + RNode multiLine = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[14], false, false, false), false, false, false, false); + RNode commentChar = CastToVectorNodeFactory.create(children[15], false, false, false, false); + RNode allowEscapes = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[16], false, false, false), false, false, false, false); + RNode encoding = CastToVectorNodeFactory.create(children[17], false, false, false, false); + RNode skipNull = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[18], false, false, false), false, false, false, false); + + return new RNode[]{file, what, nmax, sep, dec, quotes, nskip, nlines, naStrings, flush, fill, stripWhite, quiet, blSkip, multiLine, commentChar, allowEscapes, encoding, skipNull}; + } + + @Specialization + Object doScan(RConnection file, RAbstractVector what, RAbstractIntVector nmaxVec, RAbstractVector sepVec, RAbstractVector decVec, RAbstractVector quotesVec, RAbstractIntVector nskipVec, + RAbstractIntVector nlinesVec, RAbstractVector naStringsVec, RAbstractLogicalVector flushVec, RAbstractLogicalVector fillVec, RAbstractVector stripVec, + RAbstractLogicalVector dataQuietVec, RAbstractLogicalVector blSkipVec, RAbstractLogicalVector multiLineVec, RAbstractVector commentCharVec, RAbstractLogicalVector escapesVec, + RAbstractVector encodingVec, RAbstractLogicalVector skipNullVec) { + + LocalData data = new LocalData(); + + int nmax = nmaxVec.getLength() == 0 ? RRuntime.INT_NA : nmaxVec.getDataAt(0); + + if (sepVec.getLength() == 0) { + data.sepchar = null; + } else if (sepVec.getElementClass() != RString.class) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "sep"); + } + // TODO: some sort of character translation happens here? + String sep = ((RAbstractStringVector) sepVec).getDataAt(0); + if (sep.length() > 1) { + throw RError.error(RError.Message.MUST_BE_ONE_BYTE, "'sep' value"); + } + data.sepchar = sep.length() == 0 ? null : sep.substring(0, 1); + + if (decVec.getLength() == 0) { + data.decchar = '.'; + } else if (decVec.getElementClass() != RString.class) { + throw RError.error(RError.Message.INVALID_DECIMAL_SEP); + } + // TODO: some sort of character translation happens here? + String dec = ((RAbstractStringVector) decVec).getDataAt(0); + if (dec.length() > 1) { + throw RError.error(RError.Message.MUST_BE_ONE_BYTE, "decimal separator"); + } + data.decchar = dec.charAt(0); + + if (quotesVec.getLength() == 0) { + data.quoteset = ""; + } else if (quotesVec.getElementClass() != RString.class) { + throw RError.error(RError.Message.INVALID_QUOTE_SYMBOL); + } + // TODO: some sort of character translation happens here? + data.quoteset = ((RAbstractStringVector) quotesVec).getDataAt(0); + + int nskip = nskipVec.getLength() == 0 ? RRuntime.INT_NA : nskipVec.getDataAt(0); + + int nlines = nlinesVec.getLength() == 0 ? RRuntime.INT_NA : nlinesVec.getDataAt(0); + + if (naStringsVec.getElementClass() != RString.class) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "na.strings"); + } + data.naStrings = (RAbstractStringVector) naStringsVec; + + byte flush = flushVec.getLength() == 0 ? RRuntime.LOGICAL_NA : flushVec.getDataAt(0); + + byte fill = fillVec.getLength() == 0 ? RRuntime.LOGICAL_NA : fillVec.getDataAt(0); + + if (stripVec.getElementClass() != RLogical.class) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "strip.white"); + } + if (stripVec.getLength() != 1 && stripVec.getLength() != what.getLength()) { + throw RError.error(RError.Message.INVALID_LENGTH, "strip.white"); + } + byte strip = ((RAbstractLogicalVector) stripVec).getDataAt(0); + + data.quiet = dataQuietVec.getLength() == 0 || RRuntime.isNA(dataQuietVec.getDataAt(0)) ? false : dataQuietVec.getDataAt(0) == RRuntime.LOGICAL_TRUE; + + byte blSkip = blSkipVec.getLength() == 0 ? RRuntime.LOGICAL_NA : blSkipVec.getDataAt(0); + + byte multiLine = multiLineVec.getLength() == 0 ? RRuntime.LOGICAL_NA : multiLineVec.getDataAt(0); + + if (commentCharVec.getElementClass() != RString.class || commentCharVec.getLength() != 1) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "comment.char"); + } + String commentChar = ((RAbstractStringVector) commentCharVec).getDataAt(0); + data.comchar = NO_COMCHAR; + if (commentChar.length() > 1) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "comment.char"); + } else if (commentChar.length() == 1) { + data.comchar = commentChar.charAt(0); + } + + byte escapes = escapesVec.getLength() == 0 ? RRuntime.LOGICAL_NA : escapesVec.getDataAt(0); + if (RRuntime.isNA(escapes)) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "allowEscapes"); + } + data.escapes = escapes != RRuntime.LOGICAL_FALSE; + + if (encodingVec.getElementClass() != RString.class || encodingVec.getLength() != 1) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "encoding"); + } + String encoding = ((RAbstractStringVector) encodingVec).getDataAt(0); + if (encoding.equals("latin1")) { + data.isLatin1 = true; + } + if (encoding.equals("UTF-8")) { + data.isUTF8 = true; + } + + byte skipNull = skipNullVec.getLength() == 0 ? RRuntime.LOGICAL_NA : skipNullVec.getDataAt(0); + if (RRuntime.isNA(skipNull)) { + throw RError.error(RError.Message.INVALID_ARGUMENT, "skipNull"); + } + data.skipNull = skipNull != RRuntime.LOGICAL_FALSE; + + if (blSkip == RRuntime.LOGICAL_NA) { + blSkip = 1; + } + if (multiLine == RRuntime.LOGICAL_NA) { + multiLine = 1; + } + if (nskip < 0 || nskip == RRuntime.INT_NA) { + nskip = 0; + } + if (nlines < 0 || nlines == RRuntime.INT_NA) { + nlines = 0; + } + if (nmax < 0 || nmax == RRuntime.INT_NA) { + nmax = 0; + } + + // TODO: quite a few more things happen in GNU R around connections + data.con = file; + + Object result = RNull.instance; + data.save = 0; + + try { + if (nskip > 0) { + data.con.readLines(nskip); + } + if (what.getElementClass() != Object.class) { + return scanVector(what, nmax, nlines, flush, strip, blSkip, data); + } + + } catch (IOException x) { + throw RError.error(RError.Message.CANNOT_READ_CONNECTION); + } finally { + try { + data.con.close(); + } catch (IOException ex) { + } + } + + return result; + } + + private RVector scanVector(RAbstractVector what, int maxItems, int maxLines, int flush, byte stripWhite, int blSkip, LocalData data) throws IOException { + int blockSize = maxItems > 0 ? maxItems : SCAN_BLOCKSIZE; + RVector vec = what.createEmptySameType(blockSize, RDataFactory.COMPLETE_VECTOR); + naCheck.enable(true); + + int n = 0; + int lines = 0; + while (true) { + // TODO: does not do any fancy stuff, like handling comments + String[] str = data.con.readLines(1); + if (str == null || str.length == 0) { + break; + } + String[] strItems = data.sepchar == null ? str[0].trim().split("\\s+") : str[0].trim().split(data.sepchar); + + boolean done = false; + for (int i = 0; i < strItems.length; i++) { + + Object item = extractItem(what, strItems[i], data); + + if (n == blockSize) { + // enlarge the vector + blockSize = blockSize * 2; + vec.copyResized(blockSize, false); + } + + vec.updateDataAtAsObject(n, item, naCheck); + n++; + if (n == maxItems) { + done = true; + break; + } + } + if (done) { + break; + } + lines++; + if (lines == maxLines) { + break; + } + + } + if (!data.quiet) { + RContext.getInstance().getConsoleHandler().printf("Read %d item%s\n", n, (n == 1) ? "" : "s"); + } + // trim vector if necessary + return vec.getLength() > n ? vec.copyResized(n, false) : vec; + } + + // If mode = 0 use for numeric fields where "" is NA + // If mode = 1 use for character fields where "" is verbatim unless + // na.strings includes "" + private static boolean isNaString(String buffer, int mode, LocalData data) { + int i; + + if (mode == 0 && buffer.length() == 0) { + return true; + } + for (i = 0; i < data.naStrings.getLength(); i++) { + if (data.naStrings.getDataAt(i).equals(buffer)) { + return true; + } + } + return false; + } + + private static Object extractItem(RAbstractVector what, String buffer, LocalData data) { + if (what.getElementClass() == RLogical.class) { + if (isNaString(buffer, 0, data)) { + return RRuntime.LOGICAL_NA; + } else { + return RRuntime.string2logicalNoCheck(buffer); + } + } + if (what.getElementClass() == RInt.class) { + if (isNaString(buffer, 0, data)) { + return RRuntime.INT_NA; + } else { + return RRuntime.string2intNoCheck(buffer); + } + } + + if (what.getElementClass() == RDouble.class) { + if (isNaString(buffer, 0, data)) { + return RRuntime.DOUBLE_NA; + } else { + return RRuntime.string2doubleNoCheck(buffer); + } + } + + if (what.getElementClass() == RComplex.class) { + if (isNaString(buffer, 0, data)) { + return RRuntime.createComplexNA(); + } else { + return RRuntime.string2complexNoCheck(buffer); + } + } + + if (what.getElementClass() == RString.class) { + if (isNaString(buffer, 0, data)) { + return RRuntime.STRING_NA; + } else { + return buffer; + } + } + + if (what.getElementClass() == RRaw.class) { + if (isNaString(buffer, 0, data)) { + return RDataFactory.createRaw((byte) 0); + } else { + return RRuntime.string2raw(buffer); + } + } + + throw RInternalError.shouldNotReachHere(); + } + +} diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java index e73169627c..cc1ecf32a1 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java @@ -271,6 +271,7 @@ public final class RError extends RuntimeException { INCORRECT_SUBSCRIPTS("incorrect number of subscripts"), INCORRECT_SUBSCRIPTS_MATRIX("incorrect number of subscripts on matrix"), INVALID_SEP("invalid 'sep' specification"), + INVALID_LENGTH("invalid '%s' length"), // below: GNU R gives also expression for the argument NOT_FUNCTION("argument is not a function, character or symbol"), NON_NUMERIC_MATH("non-numeric argument to mathematical function"), @@ -459,6 +460,9 @@ public final class RError extends RuntimeException { INCORRECT_NUM_PROB("incorrect number of probabilities"), NA_IN_PROB_VECTOR("NA in probability vector"), NEGATIVE_PROBABILITY("non-positive probability"), + MUST_BE_ONE_BYTE("invalid %s: must be one byte"), + INVALID_DECIMAL_SEP("invalid decimal separator"), + INVALID_QUOTE_SYMBOL("invalid quote symbol set"), // below: not exactly GNU-R message TOO_FEW_POSITIVE_PROBABILITY("too few positive probabilities"), DOTS_BOUNDS("The ... list does not contain %s elements"), diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java index 76099c23be..7ce57708a2 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java @@ -374,6 +374,15 @@ public class RRuntime { return isNA(v) ? createComplexNA() : string2complexNoCheck(v); } + @SlowPath + public static RRaw string2raw(String v) { + if (v.length() == 2 && (Utils.isIsoLatinDigit(v.charAt(0)) || Utils.isRomanLetter(v.charAt(0))) && (Utils.isIsoLatinDigit(v.charAt(1)) || Utils.isRomanLetter(v.charAt(1)))) { + return RDataFactory.createRaw(Byte.parseByte(v, 16)); + } else { + return RDataFactory.createRaw((byte) 0); + } + } + // conversions from int public static double int2doubleNoCheck(int i) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java index 62ea5d8599..afc0941bdd 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java @@ -68,6 +68,10 @@ public final class Utils { return c >= '\u0030' && c <= '\u0039'; } + public static boolean isRomanLetter(char c) { + return (/* lower case */c >= '\u00DF' && c <= '\u00FF') || (/* upper case */c >= '\u00C0' && c <= '\u00DE'); + } + public static int incMod(int value, int mod) { int result = (value + 1); if (result == mod) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java index 03cf4b7ea3..201a6d22dd 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java @@ -138,6 +138,12 @@ public final class RComplexVector extends RVector implements RAbstractComplexVec return this; } + @Override + public RComplexVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (RComplex) o, naCheck); + + } + private double[] copyResizedData(int size, boolean fillNA) { int csize = size << 1; double[] newData = Arrays.copyOf(data, csize); diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java index 7a277630d2..0d60e8f005 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java @@ -102,10 +102,15 @@ public final class RDoubleSequence extends RSequence implements RAbstractDoubleV } @Override - public RAbstractVector copyResized(int size, boolean fillNA) { + public RDoubleVector copyResized(int size, boolean fillNA) { double[] data = new double[size]; populateVectorData(data); RDoubleVector.resizeData(data, data, getLength(), fillNA); return RDataFactory.createDoubleVector(data, !(fillNA && size > getLength())); } + + @Override + public RDoubleVector createEmptySameType(int newLength, boolean newIsComplete) { + return RDataFactory.createDoubleVector(new double[newLength], newIsComplete); + } } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java index d45c31d5f1..a158953cc6 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java @@ -168,6 +168,12 @@ public final class RDoubleVector extends RVector implements RAbstractDoubleVecto return this; } + @Override + public RDoubleVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (Double) o, naCheck); + + } + public static double[] resizeData(double[] newData, double[] oldData, int oldDataLength, boolean fillNA) { if (newData.length > oldDataLength) { if (fillNA) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java index cfe868729c..8f6f1a89d4 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java @@ -103,10 +103,16 @@ public final class RIntSequence extends RSequence implements RAbstractIntVector } @Override - public RAbstractVector copyResized(int size, boolean fillNA) { + public RIntVector copyResized(int size, boolean fillNA) { int[] data = new int[size]; populateVectorData(data); RIntVector.resizeData(data, data, getLength(), fillNA); return RDataFactory.createIntVector(data, !(fillNA && size > getLength())); } + + @Override + public RIntVector createEmptySameType(int newLength, boolean newIsComplete) { + return RDataFactory.createIntVector(new int[newLength], newIsComplete); + } + } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java index e1544b49fc..97452f3784 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java @@ -156,6 +156,12 @@ public final class RIntVector extends RVector implements RAbstractIntVector { return this; } + @Override + public RIntVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (Integer) o, naCheck); + + } + public static int[] resizeData(int[] newData, int[] oldData, int oldDataLength, boolean fillNA) { if (newData.length > oldDataLength) { if (fillNA) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java index c1c9a527c8..429d229b60 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java @@ -135,6 +135,12 @@ public final class RList extends RVector implements RAbstractVector { return this; } + @Override + public RList updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, o, naCheck); + + } + @Override public RList createEmptySameType(int newLength, boolean newIsComplete) { return RDataFactory.createList(new Object[newLength]); diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java index 4f02a17937..afb3cfb544 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java @@ -103,6 +103,12 @@ public final class RLogicalVector extends RVector implements RAbstractLogicalVec return this; } + @Override + public RLogicalVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (Byte) o, naCheck); + + } + private byte[] copyResizedData(int size, boolean fillNA) { byte[] newData = Arrays.copyOf(data, size); if (size > this.getLength()) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java index 1e5a1a8430..627023dc61 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java @@ -27,6 +27,7 @@ import java.util.*; import com.oracle.truffle.api.CompilerDirectives.SlowPath; import com.oracle.truffle.r.runtime.*; import com.oracle.truffle.r.runtime.data.model.*; +import com.oracle.truffle.r.runtime.ops.na.*; public final class RRawVector extends RVector implements RAbstractRawVector { @@ -132,6 +133,11 @@ public final class RRawVector extends RVector implements RAbstractRawVector { return this; } + @Override + public RRawVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (RRaw) o); + } + private byte[] copyResizedData(int size, boolean fillNA) { byte[] newData = Arrays.copyOf(data, size); if (!fillNA) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java index a2c2aa510a..4f77350ae2 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java @@ -118,6 +118,12 @@ public final class RStringVector extends RVector implements RAbstractStringVecto return this; } + @Override + public RStringVector updateDataAtAsObject(int i, Object o, NACheck naCheck) { + return updateDataAt(i, (String) o, naCheck); + + } + private String[] copyResizedData(int size, String fill) { String[] newData = Arrays.copyOf(data, size); if (size > this.getLength()) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java index 56cf5dc211..b68bef7daf 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java @@ -29,6 +29,7 @@ import com.oracle.truffle.api.source.*; import com.oracle.truffle.r.runtime.*; import com.oracle.truffle.r.runtime.data.RAttributes.RAttribute; import com.oracle.truffle.r.runtime.data.model.*; +import com.oracle.truffle.r.runtime.ops.na.*; import edu.umd.cs.findbugs.annotations.*; @@ -461,6 +462,17 @@ public abstract class RVector extends RBounded implements RShareable, RAbstractV protected abstract boolean internalVerify(); + /** + * Update a data item in the vector. Possibly not as efficient as type-specific methods, but in + * some cases it likely does not matter (e.g. if used alongside I/O operations). + * + * @param i index of the vector item to be updated + * @param o updated value + * @param naCheck NA check used to change vector's mode in case value is NA + * @return updated vector + */ + public abstract RVector updateDataAtAsObject(int i, Object o, NACheck naCheck); + public final RStringVector toStringVector() { String[] values = new String[getLength()]; for (int i = 0; i < getLength(); ++i) { @@ -469,8 +481,6 @@ public abstract class RVector extends RBounded implements RShareable, RAbstractV return RDataFactory.createStringVector(values, this.isComplete()); } - public abstract RVector createEmptySameType(int newLength, boolean newIsComplete); - public abstract void transferElementSameType(int toIndex, RVector fromVector, int fromIndex); public final boolean isInBounds(int firstPosition, int secondPosition) { diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java index 6449f86ce8..dc6c4cae6e 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java @@ -86,7 +86,7 @@ public abstract class RToVectorClosure implements RAbstractVector { } @Override - public RAbstractVector copyResized(int size, boolean fillNA) { + public RVector copyResized(int size, boolean fillNA) { return vector.copyResized(size, fillNA); } @@ -121,4 +121,9 @@ public abstract class RToVectorClosure implements RAbstractVector { return vector.materialize(); } + @Override + public RVector createEmptySameType(int newLength, boolean newIsComplete) { + return vector.createEmptySameType(newLength, newIsComplete); + } + } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java index 262357a893..8b3f6a798e 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java @@ -41,12 +41,14 @@ public interface RAbstractVector extends RAbstractContainer { */ RAbstractVector copy(); - RAbstractVector copyResized(int size, boolean fillNA); + RVector copyResized(int size, boolean fillNA); RAbstractVector copyWithNewDimensions(int[] newDimensions); RAbstractVector copyDropAttributes(); + RVector createEmptySameType(int newLength, boolean newIsComplete); + /** * May throw errors, so needs a {@link VirtualFrame}. */ diff --git a/mx.fastr/copyrights/gnu_r_scan.copyright.star b/mx.fastr/copyrights/gnu_r_scan.copyright.star new file mode 100644 index 0000000000..0a1b1ef591 --- /dev/null +++ b/mx.fastr/copyrights/gnu_r_scan.copyright.star @@ -0,0 +1,11 @@ +/* + * This material is distributed under the GNU General Public License + * Version 2. You may review the terms of this license at + * http://www.gnu.org/licenses/gpl-2.0.html + * + * Copyright (c) 1995, 1996, Robert Gentleman and Ross Ihaka + * Copyright (c) 1998-2013, The R Core Team + * Copyright (c) 2014, 2014, Oracle and/or its affiliates + * + * All rights reserved. + */ diff --git a/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex b/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex new file mode 100644 index 0000000000..249eb964ce --- /dev/null +++ b/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex @@ -0,0 +1 @@ +/\*\n \* This material is distributed under the GNU General Public License\n \* Version 2. You may review the terms of this license at\n \* http://www.gnu.org/licenses/gpl-2.0.html\n \*\n \* Copyright \(c\) 1995, 1996, Robert Gentleman and Ross Ihaka\n \* Copyright \(c\) 1997-2013, The R Core Team\n \* Copyright \(c\) (?:(20[0-9][0-9]), )?(20[0-9][0-9]), Oracle and/or its affiliates\n \*\n \* All rights reserved.\n \*/\n.* diff --git a/mx.fastr/copyrights/overrides b/mx.fastr/copyrights/overrides index 2fc370c18b..a6fb6a219b 100644 --- a/mx.fastr/copyrights/overrides +++ b/mx.fastr/copyrights/overrides @@ -53,6 +53,7 @@ com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/RowMeans.java,purdue.copyright com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/RowSums.java,purdue.copyright com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Sample.java,gnu_r_sample.copyright +com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java,gnu_r_scan.copyright com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Tabulate.java,purdue.copyright com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/UnClass.java,purdue.copyright com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Unlist.java,gnu_r.copyright -- GitLab