diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java index e4eabf251c08f14a3b4c68cae04097fbc6f3fcd1..fde3ead9ca1f7e7420c22d151b7c30ec134b0af7 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java @@ -1203,7 +1203,7 @@ public class GrepFunctions { if (perl) { resultItem = splitPerl(data, pcreSplits[i % splits.length]); } else { - resultItem = splitIntl(data, currentSplit); + resultItem = splitIntl(data, currentSplit, fixed); } if (resultItem.getLength() == 0) { if (fixed) { @@ -1234,9 +1234,36 @@ public class GrepFunctions { } } - private static RStringVector splitIntl(String input, String separator) { + private static RStringVector splitIntl(String input, String separator, boolean fixed) { assert !RRuntime.isNA(input); - return RDataFactory.createStringVector(input.split(separator), true); + + if (fixed) { + ArrayList<String> matches = new ArrayList<>(); + int idx = input.indexOf(separator); + if (idx < 0) { + return RDataFactory.createStringVector(input); + } + int lastIdx = 0; + while (idx > -1) { + matches.add(input.substring(lastIdx, idx)); + lastIdx = idx + separator.length(); + if (lastIdx > input.length()) { + break; + } + idx = input.indexOf(separator, lastIdx); + } + String m = input.substring(lastIdx); + if (!m.isEmpty()) { + matches.add(m); + } + return RDataFactory.createStringVector(matches.toArray(new String[matches.size()]), false); + } else { + if (input.equals(separator)) { + return RDataFactory.createStringVector(""); + } else { + return RDataFactory.createStringVector(input.split(separator), true); + } + } } private static RStringVector emptySplitIntl(String input) { diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_strsplit.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_strsplit.java index f6d29dc8ce187711d5ca431f2ccac8c2555620b8..6dd39400b3cc162c225bc84ca80d3ae84b44b96e 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_strsplit.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_strsplit.java @@ -121,5 +121,30 @@ public class TestBuiltin_strsplit extends TestBase { assertEval("strsplit('oo bar baz', '[f z]', perl=TRUE)"); assertEval("strsplit('foo \u1010ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄbar baz ', '[f z]', perl=TRUE)"); assertEval("strsplit('Ä Ä', '[ ]', perl=TRUE)"); + + assertEval("strsplit('1', '1', fixed=TRUE)"); + assertEval("strsplit('11', '11', fixed=TRUE)"); + assertEval("strsplit(c('1', '11'), c('1', '11'), fixed=TRUE)"); + assertEval("strsplit('Ä', 'Ä', fixed=TRUE)"); + assertEval("strsplit('ÄÄ', 'Ä', fixed=TRUE)"); + + assertEval("strsplit('1', '1', fixed=FALSE)"); + assertEval("strsplit('11', '11', fixed=FALSE)"); + assertEval("strsplit(c('1', '11'), c('1', '11'), fixed=FALSE)"); + assertEval("strsplit('Ä', 'Ä', fixed=FALSE)"); + assertEval("strsplit('ÄÄ', 'Ä', fixed=FALSE)"); + + assertEval("strsplit(c('111', '1'), c('111', '1'), fixed=TRUE)"); + assertEval("strsplit(c('1', ''), c('1', ''), fixed=TRUE)"); + assertEval("strsplit(c('1', 'b'), c('1', 'b'), fixed=TRUE)"); + assertEval("strsplit(c('a1a', 'a1b'), c('1', '1'), fixed=TRUE)"); + assertEval("strsplit(c('a1a', 'a1b'), '1', fixed=TRUE)"); + + assertEval("strsplit(c('111', '1'), c('111', '1'), fixed=FALSE)"); + assertEval("strsplit(c('1', ''), c('1', ''), fixed=FALSE)"); + assertEval("strsplit(c('1', 'b'), c('1', 'b'), fixed=FALSE)"); + assertEval("strsplit(c('a1a', 'a1b'), c('1', '1'), fixed=FALSE)"); + assertEval("strsplit(c('a1a', 'a1b'), '1', fixed=FALSE)"); + } }