From 3b73898a52eaeea38d49432746d42d15f28b2ace Mon Sep 17 00:00:00 2001 From: Adam Welc <adam.welc@oracle.com> Date: Tue, 1 Dec 2015 21:46:54 -0800 Subject: [PATCH] Added handling of fixed string matching in regexp. --- .../r/nodes/builtin/base/GrepFunctions.java | 34 ++++++++++++------- .../r/test/builtins/TestBuiltin_regexpr.java | 9 +++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java index 3172c03387..22d90022d2 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java @@ -560,27 +560,37 @@ public class GrepFunctions { @TruffleBoundary protected Object regexp(RAbstractStringVector patternArg, RAbstractStringVector vector, byte ignoreCaseL, byte perlL, byte fixedL, byte useBytesL) { controlVisibility(); - checkExtraArgs(RRuntime.LOGICAL_FALSE, perlL, fixedL, useBytesL, RRuntime.LOGICAL_FALSE); + checkExtraArgs(RRuntime.LOGICAL_FALSE, perlL, RRuntime.LOGICAL_FALSE, useBytesL, RRuntime.LOGICAL_FALSE); boolean ignoreCase = RRuntime.fromLogical(ignoreCaseL); String pattern = RegExp.checkPreDefinedClasses(patternArg.getDataAt(0)); int[] result = new int[vector.getLength()]; for (int i = 0; i < vector.getLength(); i++) { - result[i] = findIndex(pattern, vector.getDataAt(i), ignoreCase).get(0); + result[i] = findIndex(pattern, vector.getDataAt(i), ignoreCase, fixedL == RRuntime.LOGICAL_TRUE).get(0); } return RDataFactory.createIntVector(result, RDataFactory.COMPLETE_VECTOR); } - protected static List<Integer> findIndex(String pattern, String text, boolean ignoreCase) { - Matcher m = getPatternMatcher(pattern, text, ignoreCase); + protected static List<Integer> findIndex(String pattern, String text, boolean ignoreCase, boolean fixed) { List<Integer> list = new ArrayList<>(); - while (m.find()) { - // R starts counting at index 1 - list.add(m.start() + 1); - } - if (list.size() > 0) { - return list; + if (!fixed) { + int index; + if (ignoreCase) { + index = text.toLowerCase().indexOf(pattern.toLowerCase()); + } else { + index = text.indexOf(pattern); + } + list.add(index == -1 ? index : index + 1); + } else { + Matcher m = getPatternMatcher(pattern, text, ignoreCase); + while (m.find()) { + // R starts counting at index 1 + list.add(m.start() + 1); + } + if (list.size() > 0) { + return list; + } + list.add(-1); } - list.add(-1); return list; } @@ -603,7 +613,7 @@ public class GrepFunctions { String pattern = RegExp.checkPreDefinedClasses(patternArg.getDataAt(0)); Object[] result = new Object[vector.getLength()]; for (int i = 0; i < vector.getLength(); i++) { - int[] data = toIntArray(findIndex(pattern, vector.getDataAt(i), ignoreCase)); + int[] data = toIntArray(findIndex(pattern, vector.getDataAt(i), ignoreCase, true)); result[i] = RDataFactory.createIntVector(data, RDataFactory.COMPLETE_VECTOR); } return RDataFactory.createList(result); diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_regexpr.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_regexpr.java index 4d526c2000..55610a73ca 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_regexpr.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_regexpr.java @@ -88,5 +88,14 @@ public class TestBuiltin_regexpr extends TestBase { assertEval(Ignored.Unknown, "regexpr(\"e\",c(\"arm\",\"foot\",\"lefroo\", \"bafoobar\"))"); // NOTE: this is without attributes assertEval(Ignored.Unknown, "regexpr(\"(a)[^a]\\\\1\", c(\"andrea apart\", \"amadeus\", NA))"); + + // FIXME: missing attributes + assertEval(Ignored.Unknown, "{ regexpr(\"aaa\", \"bbbaaaccc\", fixed=TRUE) }"); + assertEval(Ignored.Unknown, "{ regexpr(\"aaa\", c(\"bbbaaaccc\", \"haaah\"), fixed=TRUE) }"); + assertEval(Ignored.Unknown, "{ regexpr(\"aaa\", c(\"bbbaaaccc\", \"hah\"), fixed=TRUE) }"); + + assertEval("{ x<-regexpr(\"aaa\", \"bbbaaaccc\", fixed=TRUE); c(x[1]) }"); + assertEval("{ x<-regexpr(\"aaa\", c(\"bbbaaaccc\", \"haaah\"), fixed=TRUE); c(x[1], x[2]) }"); + assertEval("{ x<-regexpr(\"aaa\", c(\"bbbaaaccc\", \"hah\"), fixed=TRUE); c(x[1], x[2]) }"); } } -- GitLab