From 41339a76811af0c7d8b90be241e1b287b5313317 Mon Sep 17 00:00:00 2001 From: Lukas Stadler <lukas.stadler@oracle.com> Date: Thu, 23 Aug 2018 16:27:57 +0200 Subject: [PATCH] treat CR and LF in readLine similar to GNU R --- .../r/runtime/conn/ConnectionSupport.java | 6 +- .../r/runtime/conn/TextConnections.java | 55 +++++++++++++++++- .../truffle/r/test/ExpectedTestOutput.test | 56 +++++++++++++++++++ .../r/test/library/base/TestConnections.java | 4 ++ 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java index 27aa8990f6..17d31db470 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java @@ -902,7 +902,7 @@ public class ConnectionSupport { if (s == null) { return null; } else { - String[] lines = s.split("\n", 2); + String[] lines = TextConnections.splitLines(s, 2); if (lines.length == 2) { // we hit end of the line if (lines[1].length() != 0) { @@ -923,7 +923,7 @@ public class ConnectionSupport { break; } - lines = s.split("\n", 2); + lines = TextConnections.splitLines(s, 2); if (lines.length == 2) { // we hit end of the line if (lines[1].length() != 0) { @@ -947,7 +947,7 @@ public class ConnectionSupport { */ @TruffleBoundary private String[] readLinesWithPushBack(int n, EnumSet<ReadLineWarning> warn, boolean skipNul) throws IOException { - // NOTE: 'n' may be negative indicating to read as much lines as available + // NOTE: 'n' may be negative indicating to read as many lines as available final List<String> res; if (n >= 0) { res = new ArrayList<>(n); diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java index 7695a0d5be..418df719ce 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java @@ -41,6 +41,55 @@ import com.oracle.truffle.r.runtime.env.REnvironment; import com.oracle.truffle.r.runtime.env.REnvironment.PutException; public class TextConnections { + + public static String[] splitLines(String value, int limit) { + assert limit != 0; + ArrayList<String> strings = null; + + int lastPos = 0; + int pos = 0; + while (pos < value.length()) { + char c = value.charAt(pos); + if (c == '\n' || c == '\r') { + if (strings == null) { + strings = new ArrayList<>(); + } + if (limit != -1 && strings.size() == limit - 1) { + strings.add(value.substring(lastPos)); + return strings.toArray(new String[strings.size()]); + } + strings.add(value.substring(lastPos, pos)); + // skip "\r\n" combination + if (c == '\r') { + if ((pos + 1) < value.length()) { + c = value.charAt(pos + 1); + if (c == '\r') { + // bug in GNU R: a second "\r" is immediately treated as a EOL + if (limit != -1 && strings.size() == limit - 1) { + strings.add(value.substring(pos + 2)); + return strings.toArray(new String[strings.size()]); + } + strings.add(""); + } else if (c == '\n') { + pos++; + } + } else { + // bug in GNU R: a final "\r" will be ignored + return strings.toArray(new String[strings.size()]); + } + } + lastPos = pos + 1; + } + pos++; + } + if (strings == null) { + return new String[]{value}; + } else { + strings.add(value.substring(lastPos)); + return strings.toArray(new String[strings.size()]); + } + } + public static class TextRConnection extends BaseRConnection { protected String description; private final RAbstractStringVector object; @@ -97,11 +146,13 @@ public class TextConnections { assert object != null; StringBuilder sb = new StringBuilder(); for (int i = 0; i < object.getLength(); i++) { + if (i > 0) { + sb.append('\n'); + } sb.append(object.getDataAt(i)); // vector elements are implicitly terminated with a newline - sb.append('\n'); } - lines = sb.toString().split("\\n"); + lines = splitLines(sb.toString(), -1); } @Override diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index bdb20bc4c5..32b76d9697 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -86035,6 +86035,62 @@ In writeChar(x, zz, nc, eos = NULL) : #{ s <- "äöüß"; rc <- rawConnection(raw(0), "w"); writeChar(s, rc); rawConnectionValue(rc) } [1] c3 a4 c3 b6 c3 bc c3 9f 00 +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo")) +[1] "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\n")) +[1] "foo" "" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\n\n\r")) +[1] "foo" "" "" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\n\n\rfoo")) +[1] "foo" "" "" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\n\r")) +[1] "foo" "" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\n\rfoo")) +[1] "foo" "" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\nfoo")) +[1] "foo" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\r")) +[1] "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\r\n")) +[1] "foo" "" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\r\nfoo")) +[1] "foo" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\r\r\n")) +[1] "foo" "" "" "" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\r\r\nfoo")) +[1] "foo" "" "" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foo\rfoo")) +[1] "foo" "foo" + +##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines# +#readLines(textConnection("foofoo")) +[1] "foofoo" + ##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#Output.MayIgnoreWarningContext# #{ zz <- file('',"w+b", blocking=F); writeBin(as.raw(c(97,98,99,100,0,101)), zz, useBytes=T); seek(zz, 0); res <- readLines(zz, 2, warn=F, skipNul=F); close(zz); res } [1] "abcd" diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java index 55b38976ab..6601a88357 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java @@ -208,6 +208,10 @@ public class TestConnections extends TestRBase { assertEval(Output.MayIgnoreWarningContext, TestBase.template( "{ zz <- file('',\"w+b\", blocking=%0); writeBin(as.raw(%1), zz, useBytes=T); seek(zz, 0); res <- readLines(zz, 2, warn=%2, skipNul=%3); close(zz); res }", LVAL, arr(lineWithNul, twoLinesOneNul, lineWithNulIncomp, twoLinesOneNulIncomp), LVAL, LVAL)); + + String[] endings = new String[]{"", "\\n", "\\r", "\\n\\r", "\\r\\n", "\\n\\n\\r", "\\r\\r\\n"}; + String[] text = new String[]{"", "foo"}; + assertEval(template("readLines(textConnection(\"foo%0%1\"))", endings, text)); } @Test -- GitLab