From 432314a8170498fb44a80a8cdae48d1db1a67837 Mon Sep 17 00:00:00 2001
From: Adam Welc <adam.welc@oracle.com>
Date: Mon, 6 Oct 2014 18:33:22 -0700
Subject: [PATCH] Partial implementation of the "scan" function.

---
 .../builtin/base/ConnectionFunctions.java     |   7 +-
 .../truffle/r/nodes/builtin/base/R/scan.R     |  56 +++
 .../truffle/r/nodes/builtin/base/Scan.java    | 349 ++++++++++++++++++
 .../com/oracle/truffle/r/runtime/RError.java  |   4 +
 .../oracle/truffle/r/runtime/RRuntime.java    |   9 +
 .../com/oracle/truffle/r/runtime/Utils.java   |   4 +
 .../r/runtime/data/RComplexVector.java        |   6 +
 .../r/runtime/data/RDoubleSequence.java       |   7 +-
 .../truffle/r/runtime/data/RDoubleVector.java |   6 +
 .../truffle/r/runtime/data/RIntSequence.java  |   8 +-
 .../truffle/r/runtime/data/RIntVector.java    |   6 +
 .../oracle/truffle/r/runtime/data/RList.java  |   6 +
 .../r/runtime/data/RLogicalVector.java        |   6 +
 .../truffle/r/runtime/data/RRawVector.java    |   6 +
 .../truffle/r/runtime/data/RStringVector.java |   6 +
 .../truffle/r/runtime/data/RVector.java       |  14 +-
 .../data/closures/RToVectorClosure.java       |   7 +-
 .../r/runtime/data/model/RAbstractVector.java |   4 +-
 mx.fastr/copyrights/gnu_r_scan.copyright.star |  11 +
 .../gnu_r_scan.copyright.star.regex           |   1 +
 mx.fastr/copyrights/overrides                 |   1 +
 21 files changed, 516 insertions(+), 8 deletions(-)
 create mode 100644 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R
 create mode 100644 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
 create mode 100644 mx.fastr/copyrights/gnu_r_scan.copyright.star
 create mode 100644 mx.fastr/copyrights/gnu_r_scan.copyright.star.regex

diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java
index 64de1c38f6..403f1b1229 100644
--- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/ConnectionFunctions.java
@@ -101,8 +101,11 @@ public abstract class ConnectionFunctions {
                     break;
                 }
             }
-            String[] result = new String[lines.size()];
-            lines.toArray(result);
+            String[] result = null;
+            if (lines.size() > 0) {
+                result = new String[lines.size()];
+                lines.toArray(result);
+            }
             return result;
         }
 
diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R
new file mode 100644
index 0000000000..620c6f3074
--- /dev/null
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R/scan.R
@@ -0,0 +1,56 @@
+#  File src/library/base/R/scan.R
+#  Part of the R package, http://www.R-project.org
+#
+#  Copyright (C) 1995-2014 The R Core Team
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  A copy of the GNU General Public License is available at
+#  http://www.r-project.org/Licenses/
+
+scan <-
+function(file = "", what = double(), nmax = -1L, n = -1L, sep = "",
+         quote = if(identical(sep, "\n")) "" else "'\"",
+         dec = ".", skip = 0L, nlines = 0L,
+         na.strings = "NA", flush = FALSE, fill = FALSE,
+         strip.white = FALSE, quiet = FALSE, blank.lines.skip = TRUE,
+         multi.line = TRUE, comment.char = "", allowEscapes = FALSE,
+         fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
+{
+    na.strings <- as.character(na.strings)# allow it to be NULL
+    if(!missing(n)) {
+        if(missing(nmax))
+            nmax <- n / pmax(length(what), 1L)
+        else
+            stop("either specify 'nmax' or 'n', but not both.")
+    }
+    if (missing(file) && !missing(text)) {
+	file <- textConnection(text, encoding = "UTF-8")
+	encoding <- "UTF-8"
+	on.exit(close(file))
+    }
+
+    if(is.character(file))
+        if(file == "") file <- stdin()
+        else {
+            file <- if(nzchar(fileEncoding))
+                file(file, "r", encoding = fileEncoding) else file(file, "r")
+	# TODO: on.exit does not work properly (connection is currently closed in the internal implementation of scan)
+#            on.exit(close(file))
+        }
+	# TODO: uncomment once RConnection is more faithful to how connections are implemented in GNU R 
+#    if(!inherits(file, "connection"))
+#        stop("'file' must be a character string or connection")
+    .Internal(scan(file, what, nmax, sep, dec, quote, skip, nlines,
+                   na.strings, flush, fill, strip.white, quiet,
+                   blank.lines.skip, multi.line, comment.char,
+                   allowEscapes, encoding, skipNul))
+}
diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
new file mode 100644
index 0000000000..a95400fdf7
--- /dev/null
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
@@ -0,0 +1,349 @@
+/*
+ * This material is distributed under the GNU General Public License
+ * Version 2. You may review the terms of this license at
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * Copyright (c) 1995, 1996, Robert Gentleman and Ross Ihaka
+ * Copyright (c) 1998-2013, The R Core Team
+ * Copyright (c) 2014, 2014, Oracle and/or its affiliates
+ *
+ * All rights reserved.
+ */
+
+package com.oracle.truffle.r.nodes.builtin.base;
+
+import static com.oracle.truffle.r.runtime.RBuiltinKind.*;
+
+import java.io.*;
+
+import com.oracle.truffle.api.*;
+import com.oracle.truffle.api.dsl.*;
+import com.oracle.truffle.api.frame.*;
+import com.oracle.truffle.r.nodes.*;
+import com.oracle.truffle.r.nodes.access.*;
+import com.oracle.truffle.r.nodes.builtin.*;
+import com.oracle.truffle.r.nodes.unary.*;
+import com.oracle.truffle.r.runtime.*;
+import com.oracle.truffle.r.runtime.data.*;
+import com.oracle.truffle.r.runtime.data.model.*;
+import com.oracle.truffle.r.runtime.ops.na.*;
+
+@SuppressWarnings("unused")
+@RBuiltin(name = "scan", kind = INTERNAL, parameterNames = {"file", "what", "nmax", "sep", "dec", "quote", "skip", "nlines", "na.strings", "flush", "fill", "strip.white", "quiet", "blank.lines.skip",
+                "multi.line", "comment.char", "allowEscapes", "encoding", "skipNull"})
+public abstract class Scan extends RBuiltinNode {
+
+    private static final int SCAN_BLOCKSIZE = 1000;
+    private static final int NO_COMCHAR = 100000; /* won't occur even in Unicode */
+
+    private final NACheck naCheck = new NACheck();
+
+    private static class LocalData {
+        RAbstractStringVector naStrings = null;
+        boolean quiet = false;
+        String sepchar = null;
+        char decchar = '.';
+        String quoteset = null;
+        int comchar = NO_COMCHAR;
+        // connection-related (currently not supported)
+        // int ttyflag = 0;
+        RConnection con = null;
+        // connection-related (currently not supported)
+        // boolean wasopen = false;
+        boolean escapes = false;
+        int save = 0;
+        boolean isLatin1 = false;
+        boolean isUTF8 = false;
+        boolean atStart = false;
+        boolean embedWarn = false;
+        boolean skipNull = false;
+    }
+
+    @CreateCast({"arguments"})
+    public RNode[] createCastValue(RNode[] children) {
+        RNode file = children[0];
+        RNode what = CastToVectorNodeFactory.create(children[1], false, false, false, false);
+        RNode nmax = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[2], false, false, false), false, false, false, false);
+        RNode sep = CastToVectorNodeFactory.create(children[3], false, false, false, false);
+        RNode dec = CastToVectorNodeFactory.create(children[4], false, false, false, false);
+        RNode quotes = CastToVectorNodeFactory.create(children[5], false, false, false, false);
+        RNode nskip = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[6], false, false, false), false, false, false, false);
+        RNode nlines = CastToVectorNodeFactory.create(CastIntegerNodeFactory.create(children[7], false, false, false), false, false, false, false);
+        RNode naStrings = CastToVectorNodeFactory.create(children[8], false, false, false, false);
+        RNode flush = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[9], false, false, false), false, false, false, false);
+        RNode fill = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[10], false, false, false), false, false, false, false);
+        RNode stripWhite = CastToVectorNodeFactory.create(children[11], false, false, false, false);
+        RNode quiet = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[12], false, false, false), false, false, false, false);
+        RNode blSkip = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[13], false, false, false), false, false, false, false);
+        RNode multiLine = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[14], false, false, false), false, false, false, false);
+        RNode commentChar = CastToVectorNodeFactory.create(children[15], false, false, false, false);
+        RNode allowEscapes = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[16], false, false, false), false, false, false, false);
+        RNode encoding = CastToVectorNodeFactory.create(children[17], false, false, false, false);
+        RNode skipNull = CastToVectorNodeFactory.create(CastLogicalNodeFactory.create(children[18], false, false, false), false, false, false, false);
+
+        return new RNode[]{file, what, nmax, sep, dec, quotes, nskip, nlines, naStrings, flush, fill, stripWhite, quiet, blSkip, multiLine, commentChar, allowEscapes, encoding, skipNull};
+    }
+
+    @Specialization
+    Object doScan(RConnection file, RAbstractVector what, RAbstractIntVector nmaxVec, RAbstractVector sepVec, RAbstractVector decVec, RAbstractVector quotesVec, RAbstractIntVector nskipVec,
+                    RAbstractIntVector nlinesVec, RAbstractVector naStringsVec, RAbstractLogicalVector flushVec, RAbstractLogicalVector fillVec, RAbstractVector stripVec,
+                    RAbstractLogicalVector dataQuietVec, RAbstractLogicalVector blSkipVec, RAbstractLogicalVector multiLineVec, RAbstractVector commentCharVec, RAbstractLogicalVector escapesVec,
+                    RAbstractVector encodingVec, RAbstractLogicalVector skipNullVec) {
+
+        LocalData data = new LocalData();
+
+        int nmax = nmaxVec.getLength() == 0 ? RRuntime.INT_NA : nmaxVec.getDataAt(0);
+
+        if (sepVec.getLength() == 0) {
+            data.sepchar = null;
+        } else if (sepVec.getElementClass() != RString.class) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "sep");
+        }
+        // TODO: some sort of character translation happens here?
+        String sep = ((RAbstractStringVector) sepVec).getDataAt(0);
+        if (sep.length() > 1) {
+            throw RError.error(RError.Message.MUST_BE_ONE_BYTE, "'sep' value");
+        }
+        data.sepchar = sep.length() == 0 ? null : sep.substring(0, 1);
+
+        if (decVec.getLength() == 0) {
+            data.decchar = '.';
+        } else if (decVec.getElementClass() != RString.class) {
+            throw RError.error(RError.Message.INVALID_DECIMAL_SEP);
+        }
+        // TODO: some sort of character translation happens here?
+        String dec = ((RAbstractStringVector) decVec).getDataAt(0);
+        if (dec.length() > 1) {
+            throw RError.error(RError.Message.MUST_BE_ONE_BYTE, "decimal separator");
+        }
+        data.decchar = dec.charAt(0);
+
+        if (quotesVec.getLength() == 0) {
+            data.quoteset = "";
+        } else if (quotesVec.getElementClass() != RString.class) {
+            throw RError.error(RError.Message.INVALID_QUOTE_SYMBOL);
+        }
+        // TODO: some sort of character translation happens here?
+        data.quoteset = ((RAbstractStringVector) quotesVec).getDataAt(0);
+
+        int nskip = nskipVec.getLength() == 0 ? RRuntime.INT_NA : nskipVec.getDataAt(0);
+
+        int nlines = nlinesVec.getLength() == 0 ? RRuntime.INT_NA : nlinesVec.getDataAt(0);
+
+        if (naStringsVec.getElementClass() != RString.class) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "na.strings");
+        }
+        data.naStrings = (RAbstractStringVector) naStringsVec;
+
+        byte flush = flushVec.getLength() == 0 ? RRuntime.LOGICAL_NA : flushVec.getDataAt(0);
+
+        byte fill = fillVec.getLength() == 0 ? RRuntime.LOGICAL_NA : fillVec.getDataAt(0);
+
+        if (stripVec.getElementClass() != RLogical.class) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "strip.white");
+        }
+        if (stripVec.getLength() != 1 && stripVec.getLength() != what.getLength()) {
+            throw RError.error(RError.Message.INVALID_LENGTH, "strip.white");
+        }
+        byte strip = ((RAbstractLogicalVector) stripVec).getDataAt(0);
+
+        data.quiet = dataQuietVec.getLength() == 0 || RRuntime.isNA(dataQuietVec.getDataAt(0)) ? false : dataQuietVec.getDataAt(0) == RRuntime.LOGICAL_TRUE;
+
+        byte blSkip = blSkipVec.getLength() == 0 ? RRuntime.LOGICAL_NA : blSkipVec.getDataAt(0);
+
+        byte multiLine = multiLineVec.getLength() == 0 ? RRuntime.LOGICAL_NA : multiLineVec.getDataAt(0);
+
+        if (commentCharVec.getElementClass() != RString.class || commentCharVec.getLength() != 1) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "comment.char");
+        }
+        String commentChar = ((RAbstractStringVector) commentCharVec).getDataAt(0);
+        data.comchar = NO_COMCHAR;
+        if (commentChar.length() > 1) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "comment.char");
+        } else if (commentChar.length() == 1) {
+            data.comchar = commentChar.charAt(0);
+        }
+
+        byte escapes = escapesVec.getLength() == 0 ? RRuntime.LOGICAL_NA : escapesVec.getDataAt(0);
+        if (RRuntime.isNA(escapes)) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "allowEscapes");
+        }
+        data.escapes = escapes != RRuntime.LOGICAL_FALSE;
+
+        if (encodingVec.getElementClass() != RString.class || encodingVec.getLength() != 1) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "encoding");
+        }
+        String encoding = ((RAbstractStringVector) encodingVec).getDataAt(0);
+        if (encoding.equals("latin1")) {
+            data.isLatin1 = true;
+        }
+        if (encoding.equals("UTF-8")) {
+            data.isUTF8 = true;
+        }
+
+        byte skipNull = skipNullVec.getLength() == 0 ? RRuntime.LOGICAL_NA : skipNullVec.getDataAt(0);
+        if (RRuntime.isNA(skipNull)) {
+            throw RError.error(RError.Message.INVALID_ARGUMENT, "skipNull");
+        }
+        data.skipNull = skipNull != RRuntime.LOGICAL_FALSE;
+
+        if (blSkip == RRuntime.LOGICAL_NA) {
+            blSkip = 1;
+        }
+        if (multiLine == RRuntime.LOGICAL_NA) {
+            multiLine = 1;
+        }
+        if (nskip < 0 || nskip == RRuntime.INT_NA) {
+            nskip = 0;
+        }
+        if (nlines < 0 || nlines == RRuntime.INT_NA) {
+            nlines = 0;
+        }
+        if (nmax < 0 || nmax == RRuntime.INT_NA) {
+            nmax = 0;
+        }
+
+        // TODO: quite a few more things happen in GNU R around connections
+        data.con = file;
+
+        Object result = RNull.instance;
+        data.save = 0;
+
+        try {
+            if (nskip > 0) {
+                data.con.readLines(nskip);
+            }
+            if (what.getElementClass() != Object.class) {
+                return scanVector(what, nmax, nlines, flush, strip, blSkip, data);
+            }
+
+        } catch (IOException x) {
+            throw RError.error(RError.Message.CANNOT_READ_CONNECTION);
+        } finally {
+            try {
+                data.con.close();
+            } catch (IOException ex) {
+            }
+        }
+
+        return result;
+    }
+
+    private RVector scanVector(RAbstractVector what, int maxItems, int maxLines, int flush, byte stripWhite, int blSkip, LocalData data) throws IOException {
+        int blockSize = maxItems > 0 ? maxItems : SCAN_BLOCKSIZE;
+        RVector vec = what.createEmptySameType(blockSize, RDataFactory.COMPLETE_VECTOR);
+        naCheck.enable(true);
+
+        int n = 0;
+        int lines = 0;
+        while (true) {
+            // TODO: does not do any fancy stuff, like handling comments
+            String[] str = data.con.readLines(1);
+            if (str == null || str.length == 0) {
+                break;
+            }
+            String[] strItems = data.sepchar == null ? str[0].trim().split("\\s+") : str[0].trim().split(data.sepchar);
+
+            boolean done = false;
+            for (int i = 0; i < strItems.length; i++) {
+
+                Object item = extractItem(what, strItems[i], data);
+
+                if (n == blockSize) {
+                    // enlarge the vector
+                    blockSize = blockSize * 2;
+                    vec.copyResized(blockSize, false);
+                }
+
+                vec.updateDataAtAsObject(n, item, naCheck);
+                n++;
+                if (n == maxItems) {
+                    done = true;
+                    break;
+                }
+            }
+            if (done) {
+                break;
+            }
+            lines++;
+            if (lines == maxLines) {
+                break;
+            }
+
+        }
+        if (!data.quiet) {
+            RContext.getInstance().getConsoleHandler().printf("Read %d item%s\n", n, (n == 1) ? "" : "s");
+        }
+        // trim vector if necessary
+        return vec.getLength() > n ? vec.copyResized(n, false) : vec;
+    }
+
+    // If mode = 0 use for numeric fields where "" is NA
+    // If mode = 1 use for character fields where "" is verbatim unless
+    // na.strings includes ""
+    private static boolean isNaString(String buffer, int mode, LocalData data) {
+        int i;
+
+        if (mode == 0 && buffer.length() == 0) {
+            return true;
+        }
+        for (i = 0; i < data.naStrings.getLength(); i++) {
+            if (data.naStrings.getDataAt(i).equals(buffer)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static Object extractItem(RAbstractVector what, String buffer, LocalData data) {
+        if (what.getElementClass() == RLogical.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RRuntime.LOGICAL_NA;
+            } else {
+                return RRuntime.string2logicalNoCheck(buffer);
+            }
+        }
+        if (what.getElementClass() == RInt.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RRuntime.INT_NA;
+            } else {
+                return RRuntime.string2intNoCheck(buffer);
+            }
+        }
+
+        if (what.getElementClass() == RDouble.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RRuntime.DOUBLE_NA;
+            } else {
+                return RRuntime.string2doubleNoCheck(buffer);
+            }
+        }
+
+        if (what.getElementClass() == RComplex.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RRuntime.createComplexNA();
+            } else {
+                return RRuntime.string2complexNoCheck(buffer);
+            }
+        }
+
+        if (what.getElementClass() == RString.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RRuntime.STRING_NA;
+            } else {
+                return buffer;
+            }
+        }
+
+        if (what.getElementClass() == RRaw.class) {
+            if (isNaString(buffer, 0, data)) {
+                return RDataFactory.createRaw((byte) 0);
+            } else {
+                return RRuntime.string2raw(buffer);
+            }
+        }
+
+        throw RInternalError.shouldNotReachHere();
+    }
+
+}
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java
index e73169627c..cc1ecf32a1 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java
@@ -271,6 +271,7 @@ public final class RError extends RuntimeException {
         INCORRECT_SUBSCRIPTS("incorrect number of subscripts"),
         INCORRECT_SUBSCRIPTS_MATRIX("incorrect number of subscripts on matrix"),
         INVALID_SEP("invalid 'sep' specification"),
+        INVALID_LENGTH("invalid '%s' length"),
         // below: GNU R gives also expression for the argument
         NOT_FUNCTION("argument is not a function, character or symbol"),
         NON_NUMERIC_MATH("non-numeric argument to mathematical function"),
@@ -459,6 +460,9 @@ public final class RError extends RuntimeException {
         INCORRECT_NUM_PROB("incorrect number of probabilities"),
         NA_IN_PROB_VECTOR("NA in probability vector"),
         NEGATIVE_PROBABILITY("non-positive probability"),
+        MUST_BE_ONE_BYTE("invalid %s: must be one byte"),
+        INVALID_DECIMAL_SEP("invalid decimal separator"),
+        INVALID_QUOTE_SYMBOL("invalid quote symbol set"),
         // below: not exactly GNU-R message
         TOO_FEW_POSITIVE_PROBABILITY("too few positive probabilities"),
         DOTS_BOUNDS("The ... list does not contain %s elements"),
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java
index 76099c23be..7ce57708a2 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RRuntime.java
@@ -374,6 +374,15 @@ public class RRuntime {
         return isNA(v) ? createComplexNA() : string2complexNoCheck(v);
     }
 
+    @SlowPath
+    public static RRaw string2raw(String v) {
+        if (v.length() == 2 && (Utils.isIsoLatinDigit(v.charAt(0)) || Utils.isRomanLetter(v.charAt(0))) && (Utils.isIsoLatinDigit(v.charAt(1)) || Utils.isRomanLetter(v.charAt(1)))) {
+            return RDataFactory.createRaw(Byte.parseByte(v, 16));
+        } else {
+            return RDataFactory.createRaw((byte) 0);
+        }
+    }
+
     // conversions from int
 
     public static double int2doubleNoCheck(int i) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java
index 62ea5d8599..afc0941bdd 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/Utils.java
@@ -68,6 +68,10 @@ public final class Utils {
         return c >= '\u0030' && c <= '\u0039';
     }
 
+    public static boolean isRomanLetter(char c) {
+        return (/* lower case */c >= '\u00DF' && c <= '\u00FF') || (/* upper case */c >= '\u00C0' && c <= '\u00DE');
+    }
+
     public static int incMod(int value, int mod) {
         int result = (value + 1);
         if (result == mod) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java
index 03cf4b7ea3..201a6d22dd 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RComplexVector.java
@@ -138,6 +138,12 @@ public final class RComplexVector extends RVector implements RAbstractComplexVec
         return this;
     }
 
+    @Override
+    public RComplexVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (RComplex) o, naCheck);
+
+    }
+
     private double[] copyResizedData(int size, boolean fillNA) {
         int csize = size << 1;
         double[] newData = Arrays.copyOf(data, csize);
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java
index 7a277630d2..0d60e8f005 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleSequence.java
@@ -102,10 +102,15 @@ public final class RDoubleSequence extends RSequence implements RAbstractDoubleV
     }
 
     @Override
-    public RAbstractVector copyResized(int size, boolean fillNA) {
+    public RDoubleVector copyResized(int size, boolean fillNA) {
         double[] data = new double[size];
         populateVectorData(data);
         RDoubleVector.resizeData(data, data, getLength(), fillNA);
         return RDataFactory.createDoubleVector(data, !(fillNA && size > getLength()));
     }
+
+    @Override
+    public RDoubleVector createEmptySameType(int newLength, boolean newIsComplete) {
+        return RDataFactory.createDoubleVector(new double[newLength], newIsComplete);
+    }
 }
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java
index d45c31d5f1..a158953cc6 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RDoubleVector.java
@@ -168,6 +168,12 @@ public final class RDoubleVector extends RVector implements RAbstractDoubleVecto
         return this;
     }
 
+    @Override
+    public RDoubleVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (Double) o, naCheck);
+
+    }
+
     public static double[] resizeData(double[] newData, double[] oldData, int oldDataLength, boolean fillNA) {
         if (newData.length > oldDataLength) {
             if (fillNA) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java
index cfe868729c..8f6f1a89d4 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntSequence.java
@@ -103,10 +103,16 @@ public final class RIntSequence extends RSequence implements RAbstractIntVector
     }
 
     @Override
-    public RAbstractVector copyResized(int size, boolean fillNA) {
+    public RIntVector copyResized(int size, boolean fillNA) {
         int[] data = new int[size];
         populateVectorData(data);
         RIntVector.resizeData(data, data, getLength(), fillNA);
         return RDataFactory.createIntVector(data, !(fillNA && size > getLength()));
     }
+
+    @Override
+    public RIntVector createEmptySameType(int newLength, boolean newIsComplete) {
+        return RDataFactory.createIntVector(new int[newLength], newIsComplete);
+    }
+
 }
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java
index e1544b49fc..97452f3784 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RIntVector.java
@@ -156,6 +156,12 @@ public final class RIntVector extends RVector implements RAbstractIntVector {
         return this;
     }
 
+    @Override
+    public RIntVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (Integer) o, naCheck);
+
+    }
+
     public static int[] resizeData(int[] newData, int[] oldData, int oldDataLength, boolean fillNA) {
         if (newData.length > oldDataLength) {
             if (fillNA) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java
index c1c9a527c8..429d229b60 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RList.java
@@ -135,6 +135,12 @@ public final class RList extends RVector implements RAbstractVector {
         return this;
     }
 
+    @Override
+    public RList updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, o, naCheck);
+
+    }
+
     @Override
     public RList createEmptySameType(int newLength, boolean newIsComplete) {
         return RDataFactory.createList(new Object[newLength]);
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java
index 4f02a17937..afb3cfb544 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RLogicalVector.java
@@ -103,6 +103,12 @@ public final class RLogicalVector extends RVector implements RAbstractLogicalVec
         return this;
     }
 
+    @Override
+    public RLogicalVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (Byte) o, naCheck);
+
+    }
+
     private byte[] copyResizedData(int size, boolean fillNA) {
         byte[] newData = Arrays.copyOf(data, size);
         if (size > this.getLength()) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java
index 1e5a1a8430..627023dc61 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RRawVector.java
@@ -27,6 +27,7 @@ import java.util.*;
 import com.oracle.truffle.api.CompilerDirectives.SlowPath;
 import com.oracle.truffle.r.runtime.*;
 import com.oracle.truffle.r.runtime.data.model.*;
+import com.oracle.truffle.r.runtime.ops.na.*;
 
 public final class RRawVector extends RVector implements RAbstractRawVector {
 
@@ -132,6 +133,11 @@ public final class RRawVector extends RVector implements RAbstractRawVector {
         return this;
     }
 
+    @Override
+    public RRawVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (RRaw) o);
+    }
+
     private byte[] copyResizedData(int size, boolean fillNA) {
         byte[] newData = Arrays.copyOf(data, size);
         if (!fillNA) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java
index a2c2aa510a..4f77350ae2 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RStringVector.java
@@ -118,6 +118,12 @@ public final class RStringVector extends RVector implements RAbstractStringVecto
         return this;
     }
 
+    @Override
+    public RStringVector updateDataAtAsObject(int i, Object o, NACheck naCheck) {
+        return updateDataAt(i, (String) o, naCheck);
+
+    }
+
     private String[] copyResizedData(int size, String fill) {
         String[] newData = Arrays.copyOf(data, size);
         if (size > this.getLength()) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java
index 56cf5dc211..b68bef7daf 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/RVector.java
@@ -29,6 +29,7 @@ import com.oracle.truffle.api.source.*;
 import com.oracle.truffle.r.runtime.*;
 import com.oracle.truffle.r.runtime.data.RAttributes.RAttribute;
 import com.oracle.truffle.r.runtime.data.model.*;
+import com.oracle.truffle.r.runtime.ops.na.*;
 
 import edu.umd.cs.findbugs.annotations.*;
 
@@ -461,6 +462,17 @@ public abstract class RVector extends RBounded implements RShareable, RAbstractV
 
     protected abstract boolean internalVerify();
 
+    /**
+     * Update a data item in the vector. Possibly not as efficient as type-specific methods, but in
+     * some cases it likely does not matter (e.g. if used alongside I/O operations).
+     *
+     * @param i index of the vector item to be updated
+     * @param o updated value
+     * @param naCheck NA check used to change vector's mode in case value is NA
+     * @return updated vector
+     */
+    public abstract RVector updateDataAtAsObject(int i, Object o, NACheck naCheck);
+
     public final RStringVector toStringVector() {
         String[] values = new String[getLength()];
         for (int i = 0; i < getLength(); ++i) {
@@ -469,8 +481,6 @@ public abstract class RVector extends RBounded implements RShareable, RAbstractV
         return RDataFactory.createStringVector(values, this.isComplete());
     }
 
-    public abstract RVector createEmptySameType(int newLength, boolean newIsComplete);
-
     public abstract void transferElementSameType(int toIndex, RVector fromVector, int fromIndex);
 
     public final boolean isInBounds(int firstPosition, int secondPosition) {
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java
index 6449f86ce8..dc6c4cae6e 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/closures/RToVectorClosure.java
@@ -86,7 +86,7 @@ public abstract class RToVectorClosure implements RAbstractVector {
     }
 
     @Override
-    public RAbstractVector copyResized(int size, boolean fillNA) {
+    public RVector copyResized(int size, boolean fillNA) {
         return vector.copyResized(size, fillNA);
     }
 
@@ -121,4 +121,9 @@ public abstract class RToVectorClosure implements RAbstractVector {
         return vector.materialize();
     }
 
+    @Override
+    public RVector createEmptySameType(int newLength, boolean newIsComplete) {
+        return vector.createEmptySameType(newLength, newIsComplete);
+    }
+
 }
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java
index 262357a893..8b3f6a798e 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/data/model/RAbstractVector.java
@@ -41,12 +41,14 @@ public interface RAbstractVector extends RAbstractContainer {
      */
     RAbstractVector copy();
 
-    RAbstractVector copyResized(int size, boolean fillNA);
+    RVector copyResized(int size, boolean fillNA);
 
     RAbstractVector copyWithNewDimensions(int[] newDimensions);
 
     RAbstractVector copyDropAttributes();
 
+    RVector createEmptySameType(int newLength, boolean newIsComplete);
+
     /**
      * May throw errors, so needs a {@link VirtualFrame}.
      */
diff --git a/mx.fastr/copyrights/gnu_r_scan.copyright.star b/mx.fastr/copyrights/gnu_r_scan.copyright.star
new file mode 100644
index 0000000000..0a1b1ef591
--- /dev/null
+++ b/mx.fastr/copyrights/gnu_r_scan.copyright.star
@@ -0,0 +1,11 @@
+/*
+ * This material is distributed under the GNU General Public License
+ * Version 2. You may review the terms of this license at
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * Copyright (c) 1995, 1996, Robert Gentleman and Ross Ihaka
+ * Copyright (c) 1998-2013, The R Core Team
+ * Copyright (c) 2014, 2014, Oracle and/or its affiliates
+ *
+ * All rights reserved.
+ */
diff --git a/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex b/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex
new file mode 100644
index 0000000000..249eb964ce
--- /dev/null
+++ b/mx.fastr/copyrights/gnu_r_scan.copyright.star.regex
@@ -0,0 +1 @@
+/\*\n \* This material is distributed under the GNU General Public License\n \* Version 2. You may review the terms of this license at\n \* http://www.gnu.org/licenses/gpl-2.0.html\n \*\n \* Copyright \(c\) 1995, 1996, Robert Gentleman and Ross Ihaka\n \* Copyright \(c\) 1997-2013, The R Core Team\n \* Copyright \(c\) (?:(20[0-9][0-9]), )?(20[0-9][0-9]), Oracle and/or its affiliates\n \*\n \* All rights reserved.\n \*/\n.*
diff --git a/mx.fastr/copyrights/overrides b/mx.fastr/copyrights/overrides
index 2fc370c18b..a6fb6a219b 100644
--- a/mx.fastr/copyrights/overrides
+++ b/mx.fastr/copyrights/overrides
@@ -53,6 +53,7 @@ com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/R
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/RowMeans.java,purdue.copyright
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/RowSums.java,purdue.copyright
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Sample.java,gnu_r_sample.copyright
+com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java,gnu_r_scan.copyright
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Tabulate.java,purdue.copyright
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/UnClass.java,purdue.copyright
 com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Unlist.java,gnu_r.copyright
-- 
GitLab