From 00db58e07da83f152d444a0c28dacc4591ee2e3d Mon Sep 17 00:00:00 2001 From: Zbynek Slajchrt <zbynek.slajchrt@oracle.com> Date: Thu, 8 Feb 2018 12:16:42 +0100 Subject: [PATCH] Parse data attached to the 'srcfile' attribute --- .../truffle/r/nodes/builtin/base/Parse.java | 179 ++++++++++++++++++ .../oracle/truffle/r/runtime/RDeparse.java | 6 +- .../truffle/r/test/ExpectedTestOutput.test | 37 ++++ .../r/test/builtins/TestBuiltin_parse.java | 11 +- 4 files changed, 229 insertions(+), 4 deletions(-) diff --git a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Parse.java b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Parse.java index 78a6313f14..bf81f8a617 100644 --- a/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Parse.java +++ b/com.oracle.truffle.r.nodes.builtin/src/com/oracle/truffle/r/nodes/builtin/base/Parse.java @@ -30,7 +30,9 @@ import static com.oracle.truffle.r.runtime.builtins.RBuiltinKind.INTERNAL; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.List; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Specialization; @@ -41,6 +43,7 @@ import com.oracle.truffle.r.nodes.builtin.RBuiltinNode; import com.oracle.truffle.r.nodes.unary.CastIntegerNode; import com.oracle.truffle.r.nodes.unary.CastStringNode; import com.oracle.truffle.r.nodes.unary.CastToVectorNode; +import com.oracle.truffle.r.runtime.RDeparse; import com.oracle.truffle.r.runtime.RError; import com.oracle.truffle.r.runtime.RInternalError; import com.oracle.truffle.r.runtime.RRuntime; @@ -57,12 +60,19 @@ import com.oracle.truffle.r.runtime.context.RContext; import com.oracle.truffle.r.runtime.data.RComplex; import com.oracle.truffle.r.runtime.data.RDataFactory; import com.oracle.truffle.r.runtime.data.RExpression; +import com.oracle.truffle.r.runtime.data.RIntVector; import com.oracle.truffle.r.runtime.data.RLanguage; import com.oracle.truffle.r.runtime.data.RNull; import com.oracle.truffle.r.runtime.data.RSymbol; import com.oracle.truffle.r.runtime.data.model.RAbstractStringVector; import com.oracle.truffle.r.runtime.env.REnvironment; import com.oracle.truffle.r.runtime.nodes.RBaseNode; +import com.oracle.truffle.r.runtime.nodes.RSyntaxCall; +import com.oracle.truffle.r.runtime.nodes.RSyntaxConstant; +import com.oracle.truffle.r.runtime.nodes.RSyntaxElement; +import com.oracle.truffle.r.runtime.nodes.RSyntaxFunction; +import com.oracle.truffle.r.runtime.nodes.RSyntaxLookup; +import com.oracle.truffle.r.runtime.nodes.RSyntaxVisitor; /** * Internal component of the {@code parse} base package function. @@ -269,5 +279,174 @@ public abstract class Parse extends RBuiltinNode.Arg6 { setWholeSrcRefAttrNode.execute(exprs, RDataFactory.createIntVector(wholeSrcrefData, RDataFactory.COMPLETE_VECTOR)); setSrcFileAttrNode.execute(exprs, srcFile); + + RIntVector parseData = new ParseDataVisitor(exprs).getParseData(); + srcFile.safePut("parseData", parseData); + } + + /** + * This class aspires to reconstruct the original parse tree through visiting a parsed + * expression(s). + * + * The current implementation does not reconstruct the parse tree completely. Instead, it deals + * with symbols only (i.e. a flat tree is produced), which should suffice for templating + * packages, such as knitr. + */ + static class ParseDataVisitor extends RSyntaxVisitor<Object> { + + private final RExpression exprs; + + /** + * A list of parse data octets. Every octet corresponds to one term/non-term. The octet is + * composed as follows: + * + * <pre> + * line1 col1 line2 col2 terminal token id parent + * </pre> + * + */ + private final List<Integer> data = new ArrayList<>(); + private boolean containsNA = false; + + private final List<String> tokens = new ArrayList<>(); + + private final List<String> text = new ArrayList<>(); + + private int idCounter = 0; + + ParseDataVisitor(RExpression exprs) { + this.exprs = exprs; + } + + /** + * This enum mimics the <code>yytokentype</code> enum from <code>src/main/gram.c</code>. + */ + enum TokenType { + SYMBOL(263, true), + SYMBOL_FUNCTION_CALL(296, true), + SPECIAL(304, true); + + final int code; + final boolean terminal; + + TokenType(int c, boolean term) { + this.code = c; + this.terminal = term; + } + } + + private void addOctet(RSyntaxElement element, TokenType tokenType, String txt) { + addOctet(element.getSourceSection().getStartLine(), element.getSourceSection().getStartColumn(), element.getSourceSection().getEndLine(), element.getSourceSection().getEndColumn(), + tokenType, txt); + } + + private void addOctet(RSymbol symbol) { + addOctet(RRuntime.INT_NA, RRuntime.INT_NA, RRuntime.INT_NA, RRuntime.INT_NA, TokenType.SYMBOL, symbol.getName()); + containsNA = true; + } + + private void addOctet(int startLine, int startColumn, int endLine, int endColumn, TokenType tokenType, String txt) { + // TODO: adjust the parentId correctly + int parentId = 0; + + data.add(startLine); + data.add(startColumn); + data.add(endLine); + data.add(endColumn); + data.add(tokenType.terminal ? 1 : 0); + data.add(tokenType.code); + data.add(idCounter); + data.add(parentId); + + tokens.add(tokenType.name()); + text.add(txt); + + idCounter++; + } + + @TruffleBoundary + RIntVector getParseData() { + int exprLen = exprs.getLength(); + for (int i = 0; i < exprLen; i++) { + Object x = exprs.getDataAt(i); + if (x instanceof RLanguage) { + RBaseNode rep = ((RLanguage) x).getRep(); + assert rep instanceof RSyntaxElement; + accept((RSyntaxElement) rep); + } else if (x instanceof RSymbol) { + addOctet((RSymbol) x); + } else { + // TODO: primitives + } + } + + int[] dataArray = new int[data.size()]; + for (int i = 0; i < dataArray.length; i++) { + dataArray[i] = data.get(i); + } + RIntVector parseData = RDataFactory.createIntVector(dataArray, !containsNA); + + String[] textArray = new String[text.size()]; + for (int i = 0; i < textArray.length; i++) { + textArray[i] = text.get(i); + } + + String[] tokensArray = new String[tokens.size()]; + for (int i = 0; i < tokensArray.length; i++) { + tokensArray[i] = tokens.get(i); + } + + parseData.setAttr("text", RDataFactory.createStringVector(textArray, true)); + parseData.setAttr("tokens", RDataFactory.createStringVector(tokensArray, true)); + parseData.setClassAttr(RDataFactory.createStringVector("parseData")); + parseData.setDimensions(new int[]{8, idCounter}); + return parseData; + } + + @Override + protected Object visit(RSyntaxCall element) { + RSyntaxElement lhs = element.getSyntaxLHS(); + if (lhs instanceof RSyntaxLookup) { + String symbol = ((RSyntaxLookup) lhs).getIdentifier(); + RDeparse.Func func = RDeparse.getFunc(symbol); + if (func == null) { + addOctet(element, TokenType.SYMBOL_FUNCTION_CALL, symbol); + } + } + + RSyntaxElement[] args = element.getSyntaxArguments(); + for (int i = 0; i < args.length; i++) { + accept(args[i]); + } + + return null; + } + + @Override + protected Object visit(RSyntaxConstant element) { + // TODO: recognize constants + return null; + } + + @Override + protected Object visit(RSyntaxLookup element) { + String symbol = element.getIdentifier(); + addOctet(element, TokenType.SYMBOL, symbol); + return null; + } + + @Override + protected Object visit(RSyntaxFunction element) { + for (RSyntaxElement arg : element.getSyntaxArgumentDefaults()) { + if (arg != null) { + accept(arg); + } + } + + accept(element.getSyntaxBody()); + + return null; + } + } } diff --git a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RDeparse.java b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RDeparse.java index 51628b9c63..4c065da548 100644 --- a/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RDeparse.java +++ b/com.oracle.truffle.r.runtime/src/com/oracle/truffle/r/runtime/RDeparse.java @@ -137,7 +137,7 @@ public class RDeparse { public static final int PREC_NS = 16; public static final int PREC_SUBSET = 17; - private static class PPInfo { + public static class PPInfo { public final PP kind; public final int prec; public final boolean rightassoc; @@ -153,7 +153,7 @@ public class RDeparse { } } - private static class Func { + public static class Func { public final String op; public final String closeOp; public final PPInfo info; @@ -209,7 +209,7 @@ public class RDeparse { private static final PPInfo USERBINOP = new PPInfo(PP.BINARY, PREC_PERCENT, false); - private static Func getFunc(String op) { + public static Func getFunc(String op) { for (Func func : FUNCTAB) { if (func.op.equals(op)) { return func; diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test index a71b1f6021..c4da647600 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/ExpectedTestOutput.test @@ -47130,6 +47130,43 @@ expression(NULL) #.Internal(parse(stdin(), c(1,2), c('expr1', 'expr2'), '?', '<weird-text', 'unknown')) expression(expr1) +##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseData# +#if (!any(R.version$engine == "FastR")) { structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L), text = "x", tokens = "SYMBOL", class = "parseData", .Dim = c(8L, 1L)) } else { p <- parse(text = 'x = 1', keep.source = TRUE); attr(p, 'srcfile')$parseData } + [,1] +[1,] 1 +[2,] 1 +[3,] 1 +[4,] 1 +[5,] 1 +[6,] 263 +[7,] 0 +[8,] 0 +attr(,"text") +[1] "x" +attr(,"tokens") +[1] "SYMBOL" +attr(,"class") +[1] "parseData" + +##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseData# +#if (!any(R.version$engine == "FastR")) { structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L, 1L, 5L, 1L, 5L, 1L, 263L, 1L, 0L, 1L, 12L, 1L, 28L, 1L, 296L, 2L, 0L, 1L, 27L, 1L, 27L, 1L, 263L, 3L, 0L, 1L, 31L, 1L, 32L, 1L, 263L, 4L, 0L, 1L, 51L, 1L, 51L, 1L, 263L, 5L, 0L), text = c("x", "x", "rnorm", "z", "f2", "a"), tokens = c("SYMBOL", "SYMBOL", "SYMBOL_FUNCTION_CALL", "SYMBOL", "SYMBOL", "SYMBOL"), class = "parseData", .Dim = c(8L, 6L)) } else { p <- parse(text = 'x = x + 1; rnorm(1, std = z); f2 <- function(a=1) a', keep.source = TRUE); attr(p, 'srcfile')$parseData } + [,1] [,2] [,3] [,4] [,5] [,6] +[1,] 1 1 1 1 1 1 +[2,] 1 5 12 27 31 51 +[3,] 1 1 1 1 1 1 +[4,] 1 5 28 27 32 51 +[5,] 1 1 1 1 1 1 +[6,] 263 263 296 263 263 263 +[7,] 0 1 2 3 4 5 +[8,] 0 0 0 0 0 0 +attr(,"text") +[1] "x" "x" "rnorm" "z" "f2" "a" +attr(,"tokens") +[1] "SYMBOL" "SYMBOL" "SYMBOL_FUNCTION_CALL" +[4] "SYMBOL" "SYMBOL" "SYMBOL" +attr(,"class") +[1] "parseData" + ##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseDataFrame# #eval(parse(text=deparse(data.frame(x=c(1))))) x diff --git a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_parse.java b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_parse.java index dad626ba6f..4d9589c26e 100644 --- a/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_parse.java +++ b/com.oracle.truffle.r.test/src/com/oracle/truffle/r/test/builtins/TestBuiltin_parse.java @@ -4,7 +4,7 @@ * http://www.gnu.org/licenses/gpl-2.0.html * * Copyright (c) 2012-2014, Purdue University - * Copyright (c) 2013, 2017, Oracle and/or its affiliates + * Copyright (c) 2013, 2018, Oracle and/or its affiliates * * All rights reserved. */ @@ -66,4 +66,13 @@ public class TestBuiltin_parse extends TestBase { public void testSrcfile() { assertEval("parse(text='', srcfile=srcfile(system.file('testfile')))"); } + + @Test + public void testParseData() { + assertEvalFastR("p <- parse(text = 'x = 1', keep.source = TRUE); attr(p, 'srcfile')$parseData", + "structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L), text = \"x\", tokens = \"SYMBOL\", class = \"parseData\", .Dim = c(8L, 1L))"); + assertEvalFastR("p <- parse(text = 'x = x + 1; rnorm(1, std = z); f2 <- function(a=1) a', keep.source = TRUE); attr(p, 'srcfile')$parseData", + "structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L, 1L, 5L, 1L, 5L, 1L, 263L, 1L, 0L, 1L, 12L, 1L, 28L, 1L, 296L, 2L, 0L, 1L, 27L, 1L, 27L, 1L, 263L, 3L, 0L, 1L, 31L, 1L, 32L, 1L, 263L, 4L, 0L, 1L, 51L, 1L, 51L, 1L, 263L, 5L, 0L), text = c(\"x\", \"x\", \"rnorm\", \"z\", \"f2\", \"a\"), tokens = c(\"SYMBOL\", \"SYMBOL\", \"SYMBOL_FUNCTION_CALL\", \"SYMBOL\", \"SYMBOL\", \"SYMBOL\"), class = \"parseData\", .Dim = c(8L, 6L))"); + } + } -- GitLab