diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/ReadTableHead.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/ReadTableHead.java index 8d14c507c30459f83ad60a679890af47693e47be..700ccf32f90886dc4f1ced03ab5681c83b63e18a 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/ReadTableHead.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/foreign/ReadTableHead.java @@ -46,38 +46,236 @@ public abstract class ReadTableHead extends RExternalBuiltinNode.Arg7 { String quote, String sep, boolean skipNull) { // TODO This is quite incomplete and just uses readLines, which works for some inputs try (RConnection openConn = RConnection.fromIndex(con).forceOpen("r")) { - List<String> lines = new ArrayList<>(nlines); - int totalLines = 0; - while (totalLines < nlines) { - String[] readLines = openConn.readLines(nlines - totalLines, EnumSet.of(ReadLineWarning.EMBEDDED_NUL, ReadLineWarning.INCOMPLETE_LAST_LINE), skipNull); - if (readLines.length == 0) { - break; + ReadState readState = new ReadState(openConn, nlines, + (commentChar.length() > 0) ? commentChar.charAt(0) : -1, quote, skipNull); + int sepChar = (sep.length() > 0) ? sep.charAt(0) : -1; + int quoteChar = -1; + StringBuilder lineBuilder = new StringBuilder(); + while (readState.resultLines.size() < nlines) { + boolean empty = true; + boolean skip = false; + boolean firstNonWhite = true; + int c; + while ((c = readState.nextChar()) != -1) { + if (quoteChar != -1) { + if (sepChar != -1 && c == '\\') { + lineBuilder.append('\\'); + c = readState.nextChar(); + if (c == -1) { + error(Message.EOF_AFTER_BACKSLASH); + } + lineBuilder.append((char) c); + continue; + } else if (c == quoteChar) { + if (sepChar == -1) { + quoteChar = -1; + } else { + int c2 = readState.nextChar(); + if (c2 == quoteChar) { + lineBuilder.append((char) c); + } else { + readState.pushBack(c2); + quoteChar = -1; + } + } + } + } else if (!skip && firstNonWhite && quote.indexOf((char) c) != -1) { + quoteChar = c; + } else if (Character.isWhitespace((char) c) || c == sepChar) { + firstNonWhite = true; + } else { + firstNonWhite = false; + } + if (empty && !skip && c != '\n' && c != readState.commentChar) { + empty = false; + } + if (quoteChar == -1 && !skip && c == readState.commentChar) { + skip = true; + } + if (quoteChar != -1 || c != '\n') { + lineBuilder.append((char) c); + } else { + break; + } } - - for (int i = 0; i < readLines.length; i++) { - postprocessLine(lines, readLines[i], commentChar, blankLinesSkip, quote, sep); + if (!empty || (c != -1 && !blankLinesSkip)) { + readState.addResultLine(lineBuilder); + } + if (c == -1) { + break; } - totalLines += lines.size(); } - - return RDataFactory.createStringVector(lines.toArray(new String[0]), RDataFactory.COMPLETE_VECTOR); + return RDataFactory.createStringVector(readState.resultLines.toArray(new String[0]), RDataFactory.COMPLETE_VECTOR); } catch (IOException ex) { throw error(RError.Message.ERROR_READING_CONNECTION, ex.getMessage()); } } - private static void postprocessLine(List<String> lines, String string, String commentChar, boolean blankLinesSkip, @SuppressWarnings("unused") String quote, - @SuppressWarnings("unused") String sep) { - // TODO quote, sep - if (blankLinesSkip && string.isEmpty()) { - return; + private final class ReadState { + + private final RConnection openConn; + + private final int nlines; + + final int commentChar; + + private final String quotes; + + private final boolean skipNull; + + final List<String> resultLines; + + private String[] readLines; + + private int readLineIndex; + + private int colIndex; + + private int pushBackChar = -1; + + private boolean inQuote; + + private int nullCnt; + + ReadState(RConnection openConn, int nlines, int commentChar, String quotes, boolean skipNull) { + this.openConn = openConn; + this.nlines = nlines; + this.commentChar = commentChar; + this.quotes = quotes; + this.skipNull = skipNull; + this.resultLines = new ArrayList<String>(nlines); } - if (commentChar != null && !commentChar.isEmpty() && string.startsWith(commentChar)) { - return; + int nextChar() throws IOException { + int c; + if (pushBackChar != -1) { + c = pushBackChar; + if (c == 0) { + nullCnt++; + } + pushBackChar = -1; + } else { + c = readChar(); + } + if (!inQuote && commentChar != -1 && c == commentChar) { + do { + c = readChar(); + } while (c != -1 && c != '\n'); + } + // The allowEscapes flag is currently not propagated into table head reading which + // causes e.g. a header line duplication - see tests + if (false && c == '\\') { // Assuming escapes never allowed + c = readChar(); + if ('0' <= c && c <= '8') { + int octal = c - '0'; + if ('0' <= (c = readChar()) && c <= '8') { + octal = 8 * octal + c - '0'; + if ('0' <= (c = readChar()) && c <= '8') { + octal = 8 * octal + c - '0'; + } else { + pushBack(c); + } + } else { + pushBack(c); + } + c = octal; + } else if (c == -1) { + c = '\\'; + } else { + switch ((char) c) { + case 'a': + c = '\u0007'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\u000B'; + break; + case 'x': + int hex = 0; + int ext; + for (int i = 0; i < 2; i++) { + c = readChar(); + if (c >= '0' && c <= '9') { + ext = c - '0'; + } else if (c >= 'A' && c <= 'F') { + ext = c - 'A' + 10; + } else if (c >= 'a' && c <= 'f') { + ext = c - 'a' + 10; + } else { + pushBack(c); + break; + } + hex = 16 * hex + ext; + } + c = hex; + break; + default: + if (inQuote && quotes.indexOf(c) != -1) { + pushBack(c); + c = '\\'; + } + break; + } + } + } + return c; + } + + int readChar() throws IOException { + int c; + do { + if (readLines == null || readLineIndex >= readLines.length) { + readLineIndex = 0; + readLines = openConn.readLines(nlines - resultLines.size(), EnumSet.of(ReadLineWarning.INCOMPLETE_LAST_LINE), false); + if (readLines == null || readLines.length == 0) { + return -1; + } + } + String readLine = readLines[readLineIndex]; + if (colIndex < readLine.length()) { + c = readLine.charAt(colIndex++); + } else { + c = '\n'; + readLineIndex++; + colIndex = 0; + } + } while (c == 0 && skipNull); + if (c == 0) { + nullCnt++; + } + return c; + } + + void pushBack(int c) { + pushBackChar = c; + if (c == 0) { + nullCnt--; + } + } + + void addResultLine(StringBuilder lineBuilder) { + resultLines.add(lineBuilder.toString()); + lineBuilder.setLength(0); + if (nullCnt > 0) { + nullCnt = 0; + warning(Message.LINE_CONTAINS_EMBEDDED_NULLS, resultLines.size()); + } } - // no reason why not to add - lines.add(string); } + } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java index a4cc7250eb6f3bc3bf9fd94fa9592e862351bd29..531d285ffd8af586e24062ab1bda8a3a70e48fd0 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RError.java @@ -945,7 +945,8 @@ public final class RError extends RuntimeException implements TruffleException { ATOMIC_VECTOR_ARGUMENTS_ONLY("atomic vector arguments only"), MUST_BE_COMPLEX_MATRIX("'%s' must be a complex matrix"), INVALID_FORMAL_ARG_LIST("invalid formal argument list for \"%s\""), - SINGULAR_BACKSOLVE("singular matrix in 'backsolve'. First zero in diagonal [%d]"); + SINGULAR_BACKSOLVE("singular matrix in 'backsolve'. First zero in diagonal [%d]"), + EOF_AFTER_BACKSLASH("\\ followed by EOF"); public final String message; final boolean hasArgs; diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index 4c72153998acf3103c52b7ba99ba17c9eaba3104..38955f0d5d51209d884f310d4dc15a09ec383c31 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -169187,6 +169187,85 @@ debug at #1: cat("foo exit\n") foo exit exiting from: foo(5) +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, allowEscapes=FALSE, text = 'a b c\\nx\n1 2 3\n4 5 6\n') + a b c.nx +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams#Ignored.ReferenceError# +#read.table(header = TRUE, allowEscapes=TRUE, text = 'a b c\\nx y z\n1 2 3\n4 5 6\n') + a b c +1 a b c +2 x y z +3 1 2 3 +4 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, blank.lines.skip=FALSE, text = 'a b c\n\n1 2 3\n4 5 6') + a b c +1 NA NA NA +2 1 2 3 +3 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, blank.lines.skip=TRUE, text = 'a b c\n\n1 2 3\n4 5 6') + a b c +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, comment.char='/', text = 'a b c /comment\n1 2 3\n4 5 6\n') + a b c +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, comment.char='/', text = 'a b c/"quotes-in-comment"\n1 2 3\n4 5 6\n') + a b c +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, quote='|', text = 'a b c\n1 2 3||x') + a b c +1 1 2 3||x + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, quote='|', text = 'a b c\n1 2|x 3') + a b c +1 1 2|x 3 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, quote='|', text = 'a b c\n1 |2 22 222| 3\n4 5 6\n') + a b c +1 1 2 22 222 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, quote='|', text = 'a b |c ccc|\n1 2 3\n4 5 6\n') + a b c.ccc +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, text = 'a b "c ccc"\n1 2 3\n4 5 6\n') + a b c.ccc +1 1 2 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, text = 'a b c\n1 "2 22 222" 3\n4 5 6\n') + a b c +1 1 2 22 222 3 +2 4 5 6 + +##com.oracle.truffle.r.test.library.utils.TestReadTable.testParams# +#read.table(header = TRUE, text = 'a b\n1 2\n3 4\n') + a b +1 1 2 +2 3 4 + ##com.oracle.truffle.r.test.library.utils.TestTrace.testCondTrace# #f <- function(x) {}; (if (exists('.fastr.trace')) .fastr.trace else trace)(f, tracer=quote(if (x == 3 || x == 7) print(x))); g <- function() for (i in 1:10) f(i); g() [1] "f" diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/utils/TestReadTable.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/utils/TestReadTable.java new file mode 100644 index 0000000000000000000000000000000000000000..b22a5f07e43699c68d461d75e9c04738cd219ad6 --- /dev/null +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/utils/TestReadTable.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.r.test.library.utils; + +import com.oracle.truffle.r.test.TestBase; +import org.junit.Test; + +public class TestReadTable extends TestBase { + + @Test + public void testParams() { + assertEval("read.table(header = TRUE, comment.char='/', text = 'a b c /comment\\n1 2 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, text = 'a b\\n1 2\\n3 4\\n')"); + assertEval("read.table(header = TRUE, allowEscapes=FALSE, text = 'a b c\\\\nx\\n1 2 3\\n4 5 6\\n')"); + // GnuR 3.2.3 duplicates header as a first table row (pushback treating extra line + // incorrectly) + assertEval(Ignored.ReferenceError, "read.table(header = TRUE, allowEscapes=TRUE, text = 'a b c\\\\nx y z\\n1 2 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, text = 'a b c\\n1 \"2 22 222\" 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, text = 'a b \"c ccc\"\\n1 2 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, quote='|', text = 'a b c\\n1 |2 22 222| 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, quote='|', text = 'a b |c ccc|\\n1 2 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, quote='|', text = 'a b c\\n1 2|x 3')"); + assertEval("read.table(header = TRUE, quote='|', text = 'a b c\\n1 2 3||x')"); + assertEval("read.table(header = TRUE, comment.char='/', text = 'a b c/\"quotes-in-comment\"\\n1 2 3\\n4 5 6\\n')"); + assertEval("read.table(header = TRUE, blank.lines.skip=TRUE, text = 'a b c\\n\\n1 2 3\\n4 5 6')"); + assertEval("read.table(header = TRUE, blank.lines.skip=FALSE, text = 'a b c\\n\\n1 2 3\\n4 5 6')"); + } + +}