Skip to content
Snippets Groups Projects
Commit 4346955e authored by Tomas Stupka's avatar Tomas Stupka
Browse files

fixed quotes handling in scan

parent 4cbb70bc
Branches
No related tags found
No related merge requests found
...@@ -202,10 +202,10 @@ public abstract class Scan extends RBuiltinNode.Arg19 { ...@@ -202,10 +202,10 @@ public abstract class Scan extends RBuiltinNode.Arg19 {
} }
} }
private static int getFirstQuoteInd(String str, char sepChar) { private static int getFirstQuoteInd(String str, char quotechar, Character sepChar) {
int quoteInd = str.indexOf(sepChar); int quoteInd = str.indexOf(quotechar);
if (quoteInd >= 0) { if (quoteInd >= 0) {
if (quoteInd == 0 || str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') { if (quoteInd == 0 || (str.charAt(quoteInd - 1) == ' ' || str.charAt(quoteInd - 1) == '\t') || sepChar != null && str.charAt(quoteInd - 1) == sepChar) {
// it's a quote character if it starts the string or is preceded by a blank space // it's a quote character if it starts the string or is preceded by a blank space
return quoteInd; return quoteInd;
} }
...@@ -236,10 +236,11 @@ public abstract class Scan extends RBuiltinNode.Arg19 { ...@@ -236,10 +236,11 @@ public abstract class Scan extends RBuiltinNode.Arg19 {
sepInd = str.indexOf(data.sepchar.charAt(0)); sepInd = str.indexOf(data.sepchar.charAt(0));
} }
int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0)); Character sepChar = data.sepchar != null ? data.sepchar.charAt(0) : null;
int quoteInd = getFirstQuoteInd(str, data.quoteset.charAt(0), sepChar);
char quoteChar = data.quoteset.charAt(0); char quoteChar = data.quoteset.charAt(0);
for (int i = 1; i < data.quoteset.length(); i++) { for (int i = 1; i < data.quoteset.length(); i++) {
int ind = getFirstQuoteInd(str, data.quoteset.charAt(i)); int ind = getFirstQuoteInd(str, data.quoteset.charAt(i), sepChar);
if (ind >= 0 && (quoteInd == -1 || (quoteInd >= 0 && ind < quoteInd))) { if (ind >= 0 && (quoteInd == -1 || (quoteInd >= 0 && ind < quoteInd))) {
// update quoteInd if either the new index is smaller or the previous one (for // update quoteInd if either the new index is smaller or the previous one (for
// another separator) was not found // another separator) was not found
......
...@@ -13,11 +13,23 @@ package com.oracle.truffle.r.test.builtins; ...@@ -13,11 +13,23 @@ package com.oracle.truffle.r.test.builtins;
import org.junit.Test; import org.junit.Test;
import com.oracle.truffle.r.test.TestBase; import com.oracle.truffle.r.test.TestBase;
import java.io.File;
import org.junit.After;
// Checkstyle: stop line length check // Checkstyle: stop line length check
public class TestBuiltin_scan extends TestBase { public class TestBuiltin_scan extends TestBase {
private static final String TEST_CVS_FILE = "__TestBuiltin_scan_testReadCsvTestFile.cvs";
@After
public void cleanup() {
File f = new File(TEST_CVS_FILE);
if (f.exists()) {
f.delete();
}
}
@Test @Test
public void testScan() { public void testScan() {
// from scan's documentation // from scan's documentation
...@@ -45,12 +57,22 @@ public class TestBuiltin_scan extends TestBase { ...@@ -45,12 +57,22 @@ public class TestBuiltin_scan extends TestBase {
assertEval("{ con<-textConnection(c(\"foo faz\", \"bar \\\"baz\\\"\")); scan(con, what=list(\"\", \"\")) }"); assertEval("{ con<-textConnection(c(\"foo faz\", \"bar \\\"baz\\\"\")); scan(con, what=list(\"\", \"\")) }");
assertEval("{ con<-textConnection(c(\"foo, faz\", \"bar, baz\")); scan(con, what=list(\"\", \"\"), sep=\",\") }"); assertEval("{ con<-textConnection(c(\"foo, faz\", \"bar, baz\")); scan(con, what=list(\"\", \"\"), sep=\",\") }");
assertEval("con<-textConnection(c(\"foo,\\\"bar,bar\\\"\")); scan(con, what=list(\"\"), sep=',')");
assertEval("con<-textConnection(c(\"foo,'bar,bar'\")); scan(con, what=list(\"\"), sep=',')");
assertEval("{ con<-textConnection(c(\"bar'foo'\")); scan(con, what=list(\"\")) }"); assertEval("{ con<-textConnection(c(\"bar'foo'\")); scan(con, what=list(\"\")) }");
assertEval("{ con<-textConnection(c(\"'foo'\")); scan(con, what=list(\"\")) }"); assertEval("{ con<-textConnection(c(\"'foo'\")); scan(con, what=list(\"\")) }");
assertEval("{ con<-textConnection(c(\"bar 'foo'\")); scan(con, what=list(\"\")) }"); assertEval("{ con<-textConnection(c(\"bar 'foo'\")); scan(con, what=list(\"\")) }");
// sep should not be treated as a regex: // sep should not be treated as a regex:
assertEval("con <- textConnection(\"A|B|C\\n1|2|3\\n4|5|6\"); read.csv(con, sep=\"|\")"); assertEval("con <- textConnection(\"A|B|C\\n1|2|3\\n4|5|6\"); read.csv(con, sep=\"|\")");
}
@Test
public void testReadCsv() {
String testData = "n1,n2\nv1,\"v5, v5\"\n";
assertEval("fileConn<-file('" + TEST_CVS_FILE + "'); writeLines(c('" + testData + "'), fileConn); m <- read.csv('" + TEST_CVS_FILE + "'); m");
} }
@Test @Test
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment