From 4346955ef2c6963e2f9c30714102fd876c284351 Mon Sep 17 00:00:00 2001
From: Tomas Stupka <tomas.stupka@oracle.com>
Date: Thu, 25 May 2017 17:42:05 +0200
Subject: [PATCH] fixed quotes handling in scan

---
 .../truffle/r/nodes/builtin/base/Scan.java    | 11 +++++-----
 .../r/test/builtins/TestBuiltin_scan.java     | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
index db79d22096..5640789929 100644
--- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
+++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java
@@ -202,10 +202,10 @@ public abstract class Scan extends RBuiltinNode.Arg19 {
         }
     }
 
-    private static int getFirstQuoteInd(String str, char sepChar) {
-        int quoteInd = str.indexOf(sepChar);
+    private static int getFirstQuoteInd(String str, char quotechar, Character sepChar) {
+        int quoteInd = str.indexOf(quotechar);
         if (quoteInd >= 0) {
-            if (quoteInd == 0 || str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') {
+            if (quoteInd == 0 || (str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') || sepChar != null && str.charAt(quoteInd - 1) == sepChar) {
                 // it's a quote character if it starts the string or is preceded by a blank space
                 return quoteInd;
             }
@@ -236,10 +236,11 @@ public abstract class Scan extends RBuiltinNode.Arg19 {
                 sepInd = str.indexOf(data.sepchar.charAt(0));
             }
 
-            int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0));
+            Character sepChar = data.sepchar != null ? data.sepchar.charAt(0) : null;
+            int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0), sepChar);
             char quoteChar = data.quoteset.charAt(0);
             for (int i = 1; i < data.quoteset.length(); i++) {
-                int ind = getFirstQuoteInd(str, data.quoteset.charAt(i));
+                int ind = getFirstQuoteInd(str, data.quoteset.charAt(i), sepChar);
                 if (ind >= 0 && (quoteInd == -1 || (quoteInd >= 0 && ind < quoteInd))) {
                     // update quoteInd if either the new index is smaller or the previous one (for
                     // another separator) was not found
diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java
index fd9a88eff6..3c3508a978 100644
--- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java
+++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java
@@ -13,11 +13,23 @@ package com.oracle.truffle.r.test.builtins;
 import org.junit.Test;
 
 import com.oracle.truffle.r.test.TestBase;
+import java.io.File;
+import org.junit.After;
 
 // Checkstyle: stop line length check
 
 public class TestBuiltin_scan extends TestBase {
 
+    private static final String TEST_CVS_FILE = "__TestBuiltin_scan_testReadCsvTestFile.cvs";
+
+    @After
+    public void cleanup() {
+        File f = new File(TEST_CVS_FILE);
+        if (f.exists()) {
+            f.delete();
+        }
+    }
+
     @Test
     public void testScan() {
         // from scan's documentation
@@ -45,12 +57,22 @@ public class TestBuiltin_scan extends TestBase {
         assertEval("{ con<-textConnection(c(\"foo faz\", \"bar \\\"baz\\\"\")); scan(con, what=list(\"\", \"\")) }");
         assertEval("{ con<-textConnection(c(\"foo, faz\", \"bar, baz\")); scan(con, what=list(\"\", \"\"), sep=\",\") }");
 
+        assertEval("con<-textConnection(c(\"foo,\\\"bar,bar\\\"\")); scan(con, what=list(\"\"), sep=',')");
+        assertEval("con<-textConnection(c(\"foo,'bar,bar'\")); scan(con, what=list(\"\"), sep=',')");
+
         assertEval("{ con<-textConnection(c(\"bar'foo'\")); scan(con, what=list(\"\")) }");
         assertEval("{ con<-textConnection(c(\"'foo'\")); scan(con, what=list(\"\")) }");
         assertEval("{ con<-textConnection(c(\"bar 'foo'\")); scan(con, what=list(\"\")) }");
 
         // sep should not be treated as a regex:
         assertEval("con <- textConnection(\"A|B|C\\n1|2|3\\n4|5|6\"); read.csv(con, sep=\"|\")");
+
+    }
+
+    @Test
+    public void testReadCsv() {
+        String testData = "n1,n2\nv1,\"v5, v5\"\n";
+        assertEval("fileConn<-file('" + TEST_CVS_FILE + "'); writeLines(c('" + testData + "'), fileConn); m <- read.csv('" + TEST_CVS_FILE + "'); m");
     }
 
     @Test
-- 
GitLab