From 9881ef5b12202eb4ad0b6da2cac0ecfbbd765972 Mon Sep 17 00:00:00 2001 From: Zbynek Slajchrt <zbynek.slajchrt@oracle.com> Date: Mon, 12 Feb 2018 19:56:22 +0100 Subject: [PATCH] '.' matches a new line for non-Perl regexp --- .../truffle/r/nodes/builtin/base/GrepFunctions.java | 9 +++++---- .../truffle/r/test/builtins/TestBuiltin_grepl.java | 4 ++++ .../oracle/truffle/r/test/builtins/TestBuiltin_gsub.java | 5 +++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java index 2799b9c753..c81bcc02d8 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/GrepFunctions.java @@ -535,10 +535,11 @@ public class GrepFunctions { } else { replacement = convertGroups(replacement); + Matcher matcher = Pattern.compile(pattern, Pattern.DOTALL).matcher(input); if (gsub) { - value = input.replaceAll(pattern, replacement); + value = matcher.replaceAll(replacement); } else { - value = input.replaceFirst(pattern, replacement); + value = matcher.replaceFirst(replacement); } } result[i] = value; @@ -958,7 +959,7 @@ public class GrepFunctions { if (pattern.length() > 0 && pattern.charAt(0) == '*') { actualPattern = pattern.substring(1); } - return Pattern.compile(actualPattern, ignoreCase ? Pattern.CASE_INSENSITIVE : 0).matcher(text); + return Pattern.compile(actualPattern, Pattern.DOTALL | (ignoreCase ? Pattern.CASE_INSENSITIVE : 0)).matcher(text); } } @@ -1069,7 +1070,7 @@ public class GrepFunctions { @TruffleBoundary private static Matcher getPatternMatcher(String pattern, String text, boolean ignoreCase) { - return Pattern.compile(pattern, ignoreCase ? Pattern.CASE_INSENSITIVE : 0).matcher(text); + return Pattern.compile(pattern, Pattern.DOTALL | (ignoreCase ? Pattern.CASE_INSENSITIVE : 0)).matcher(text); } } diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_grepl.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_grepl.java index 1e2e745005..be8aa3cbe9 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_grepl.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_grepl.java @@ -79,5 +79,9 @@ public class TestBuiltin_grepl extends TestBase { assertEval("{ .Internal(grepl(7, \"7\", F, F, F, F, F, F)) }"); assertEval("{ .Internal(grepl(character(), \"7\", F, F, F, F, F, F)) }"); assertEval("{ .Internal(grepl(\"7\", 7, F, F, F, F, F, F)) }"); + // the dot does not match the new line in a non-Perl regexp + assertEval("{ .Internal(grepl('.+X', 'a\nXb', F, F, F, F, F, F)) }"); + // the dot matches the new line in a Perl regexp + assertEval("{ .Internal(grepl('.+X', 'a\nXb', F, F, T, F, F, F)) }"); } } diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_gsub.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_gsub.java index 05e8cbecd2..e070cbb031 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_gsub.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_gsub.java @@ -200,6 +200,11 @@ public class TestBuiltin_gsub extends TestBase { assertEval("{ gsub(pattern = 'a*', replacement = 'x', x = 'ÄaÄ', perl = TRUE) }"); assertEval("{ gsub(pattern = 'a*', replacement = 'x', x = 'ÄaaaaÄ', perl = TRUE) }"); + // the dot does not match the new line in a non-Perl regexp + assertEval("{ gsub('.*X', '', 'a\nXb', perl = FALSE) }"); + // the dot matches the new line in a Perl regexp + assertEval("{ gsub('.*X', '', 'a\nXb', perl = TRUE) }"); + // Expected output: [1] "xaxbx" // FastR output: [1] "axxxxxb" assertEval(Ignored.ImplementationError, "{ gsub(pattern = 'Ä*', replacement = 'x', x = 'aÄÄÄÄÄb', perl = TRUE) }"); -- GitLab