From 41339a76811af0c7d8b90be241e1b287b5313317 Mon Sep 17 00:00:00 2001
From: Lukas Stadler <lukas.stadler@oracle.com>
Date: Thu, 23 Aug 2018 16:27:57 +0200
Subject: [PATCH] treat CR and LF in readLine similar to GNU R

---
 .../r/runtime/conn/ConnectionSupport.java     |  6 +-
 .../r/runtime/conn/TextConnections.java       | 55 +++++++++++++++++-
 .../truffle/r/test/ExpectedTestOutput.test    | 56 +++++++++++++++++++
 .../r/test/library/base/TestConnections.java  |  4 ++
 4 files changed, 116 insertions(+), 5 deletions(-)

diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
index 27aa8990f6..17d31db470 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/ConnectionSupport.java
@@ -902,7 +902,7 @@ public class ConnectionSupport {
             if (s == null) {
                 return null;
             } else {
-                String[] lines = s.split("\n", 2);
+                String[] lines = TextConnections.splitLines(s, 2);
                 if (lines.length == 2) {
                     // we hit end of the line
                     if (lines[1].length() != 0) {
@@ -923,7 +923,7 @@ public class ConnectionSupport {
                             break;
                         }
 
-                        lines = s.split("\n", 2);
+                        lines = TextConnections.splitLines(s, 2);
                         if (lines.length == 2) {
                             // we hit end of the line
                             if (lines[1].length() != 0) {
@@ -947,7 +947,7 @@ public class ConnectionSupport {
          */
         @TruffleBoundary
         private String[] readLinesWithPushBack(int n, EnumSet<ReadLineWarning> warn, boolean skipNul) throws IOException {
-            // NOTE: 'n' may be negative indicating to read as much lines as available
+            // NOTE: 'n' may be negative indicating to read as many lines as available
             final List<String> res;
             if (n >= 0) {
                 res = new ArrayList<>(n);
diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java
index 7695a0d5be..418df719ce 100644
--- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java
+++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/conn/TextConnections.java
@@ -41,6 +41,55 @@ import com.oracle.truffle.r.runtime.env.REnvironment;
 import com.oracle.truffle.r.runtime.env.REnvironment.PutException;
 
 public class TextConnections {
+
+    public static String[] splitLines(String value, int limit) {
+        assert limit != 0;
+        ArrayList<String> strings = null;
+
+        int lastPos = 0;
+        int pos = 0;
+        while (pos < value.length()) {
+            char c = value.charAt(pos);
+            if (c == '\n' || c == '\r') {
+                if (strings == null) {
+                    strings = new ArrayList<>();
+                }
+                if (limit != -1 && strings.size() == limit - 1) {
+                    strings.add(value.substring(lastPos));
+                    return strings.toArray(new String[strings.size()]);
+                }
+                strings.add(value.substring(lastPos, pos));
+                // skip "\r\n" combination
+                if (c == '\r') {
+                    if ((pos + 1) < value.length()) {
+                        c = value.charAt(pos + 1);
+                        if (c == '\r') {
+                            // bug in GNU R: a second "\r" is immediately treated as a EOL
+                            if (limit != -1 && strings.size() == limit - 1) {
+                                strings.add(value.substring(pos + 2));
+                                return strings.toArray(new String[strings.size()]);
+                            }
+                            strings.add("");
+                        } else if (c == '\n') {
+                            pos++;
+                        }
+                    } else {
+                        // bug in GNU R: a final "\r" will be ignored
+                        return strings.toArray(new String[strings.size()]);
+                    }
+                }
+                lastPos = pos + 1;
+            }
+            pos++;
+        }
+        if (strings == null) {
+            return new String[]{value};
+        } else {
+            strings.add(value.substring(lastPos));
+            return strings.toArray(new String[strings.size()]);
+        }
+    }
+
     public static class TextRConnection extends BaseRConnection {
         protected String description;
         private final RAbstractStringVector object;
@@ -97,11 +146,13 @@ public class TextConnections {
             assert object != null;
             StringBuilder sb = new StringBuilder();
             for (int i = 0; i < object.getLength(); i++) {
+                if (i > 0) {
+                    sb.append('\n');
+                }
                 sb.append(object.getDataAt(i));
                 // vector elements are implicitly terminated with a newline
-                sb.append('\n');
             }
-            lines = sb.toString().split("\\n");
+            lines = splitLines(sb.toString(), -1);
         }
 
         @Override
diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
index bdb20bc4c5..32b76d9697 100644
--- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
+++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test
@@ -86035,6 +86035,62 @@ In writeChar(x, zz, nc, eos = NULL) :
 #{ s <- "äöüß"; rc <- rawConnection(raw(0), "w"); writeChar(s, rc); rawConnectionValue(rc) }
 [1] c3 a4 c3 b6 c3 bc c3 9f 00
 
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo"))
+[1] "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\n"))
+[1] "foo" ""
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\n\n\r"))
+[1] "foo" ""    ""
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\n\n\rfoo"))
+[1] "foo" ""    ""    "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\n\r"))
+[1] "foo" ""
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\n\rfoo"))
+[1] "foo" ""    "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\nfoo"))
+[1] "foo" "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\r"))
+[1] "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\r\n"))
+[1] "foo" ""
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\r\nfoo"))
+[1] "foo" "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\r\r\n"))
+[1] "foo" ""    ""    ""
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\r\r\nfoo"))
+[1] "foo" ""    ""    "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foo\rfoo"))
+[1] "foo" "foo"
+
+##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#
+#readLines(textConnection("foofoo"))
+[1] "foofoo"
+
 ##com.oracle.truffle.r.test.library.base.TestConnections.testReadLines#Output.MayIgnoreWarningContext#
 #{ zz <- file('',"w+b", blocking=F); writeBin(as.raw(c(97,98,99,100,0,101)), zz, useBytes=T); seek(zz, 0); res <- readLines(zz, 2, warn=F, skipNul=F); close(zz); res }
 [1] "abcd"
diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java
index 55b38976ab..6601a88357 100644
--- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java
+++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/library/base/TestConnections.java
@@ -208,6 +208,10 @@ public class TestConnections extends TestRBase {
         assertEval(Output.MayIgnoreWarningContext, TestBase.template(
                         "{ zz <- file('',\"w+b\", blocking=%0); writeBin(as.raw(%1), zz, useBytes=T); seek(zz, 0); res <- readLines(zz, 2, warn=%2, skipNul=%3); close(zz); res }",
                         LVAL, arr(lineWithNul, twoLinesOneNul, lineWithNulIncomp, twoLinesOneNulIncomp), LVAL, LVAL));
+
+        String[] endings = new String[]{"", "\\n", "\\r", "\\n\\r", "\\r\\n", "\\n\\n\\r", "\\r\\r\\n"};
+        String[] text = new String[]{"", "foo"};
+        assertEval(template("readLines(textConnection(\"foo%0%1\"))", endings, text));
     }
 
     @Test
-- 
GitLab