From 4346955ef2c6963e2f9c30714102fd876c284351 Mon Sep 17 00:00:00 2001 From: Tomas Stupka <tomas.stupka@oracle.com> Date: Thu, 25 May 2017 17:42:05 +0200 Subject: [PATCH] fixed quotes handling in scan --- .../truffle/r/nodes/builtin/base/Scan.java | 11 +++++----- .../r/test/builtins/TestBuiltin_scan.java | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java index db79d22096..5640789929 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Scan.java @@ -202,10 +202,10 @@ public abstract class Scan extends RBuiltinNode.Arg19 { } } - private static int getFirstQuoteInd(String str, char sepChar) { - int quoteInd = str.indexOf(sepChar); + private static int getFirstQuoteInd(String str, char quotechar, Character sepChar) { + int quoteInd = str.indexOf(quotechar); if (quoteInd >= 0) { - if (quoteInd == 0 || str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') { + if (quoteInd == 0 || (str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') || sepChar != null && str.charAt(quoteInd - 1) == sepChar) { // it's a quote character if it starts the string or is preceded by a blank space return quoteInd; } @@ -236,10 +236,11 @@ public abstract class Scan extends RBuiltinNode.Arg19 { sepInd = str.indexOf(data.sepchar.charAt(0)); } - int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0)); + Character sepChar = data.sepchar != null ? data.sepchar.charAt(0) : null; + int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0), sepChar); char quoteChar = data.quoteset.charAt(0); for (int i = 1; i < data.quoteset.length(); i++) { - int ind = getFirstQuoteInd(str, data.quoteset.charAt(i)); + int ind = getFirstQuoteInd(str, data.quoteset.charAt(i), sepChar); if (ind >= 0 && (quoteInd == -1 || (quoteInd >= 0 && ind < quoteInd))) { // update quoteInd if either the new index is smaller or the previous one (for // another separator) was not found diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java index fd9a88eff6..3c3508a978 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_scan.java @@ -13,11 +13,23 @@ package com.oracle.truffle.r.test.builtins; import org.junit.Test; import com.oracle.truffle.r.test.TestBase; +import java.io.File; +import org.junit.After; // Checkstyle: stop line length check public class TestBuiltin_scan extends TestBase { + private static final String TEST_CVS_FILE = "__TestBuiltin_scan_testReadCsvTestFile.cvs"; + + @After + public void cleanup() { + File f = new File(TEST_CVS_FILE); + if (f.exists()) { + f.delete(); + } + } + @Test public void testScan() { // from scan's documentation @@ -45,12 +57,22 @@ public class TestBuiltin_scan extends TestBase { assertEval("{ con<-textConnection(c(\"foo faz\", \"bar \\\"baz\\\"\")); scan(con, what=list(\"\", \"\")) }"); assertEval("{ con<-textConnection(c(\"foo, faz\", \"bar, baz\")); scan(con, what=list(\"\", \"\"), sep=\",\") }"); + assertEval("con<-textConnection(c(\"foo,\\\"bar,bar\\\"\")); scan(con, what=list(\"\"), sep=',')"); + assertEval("con<-textConnection(c(\"foo,'bar,bar'\")); scan(con, what=list(\"\"), sep=',')"); + assertEval("{ con<-textConnection(c(\"bar'foo'\")); scan(con, what=list(\"\")) }"); assertEval("{ con<-textConnection(c(\"'foo'\")); scan(con, what=list(\"\")) }"); assertEval("{ con<-textConnection(c(\"bar 'foo'\")); scan(con, what=list(\"\")) }"); // sep should not be treated as a regex: assertEval("con <- textConnection(\"A|B|C\\n1|2|3\\n4|5|6\"); read.csv(con, sep=\"|\")"); + + } + + @Test + public void testReadCsv() { + String testData = "n1,n2\nv1,\"v5, v5\"\n"; + assertEval("fileConn<-file('" + TEST_CVS_FILE + "'); writeLines(c('" + testData + "'), fileConn); m <- read.csv('" + TEST_CVS_FILE + "'); m"); } @Test -- GitLab