Skip to content
Snippets Groups Projects
Commit 22d06c76 authored by Zbyněk Šlajchrt's avatar Zbyněk Šlajchrt
Browse files

[GR-2798] Parse data attached to the 'srcfile' attribute.

PullRequest: fastr/1388
parents 723e096b 00db58e0
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,9 @@ import static com.oracle.truffle.r.runtime.builtins.RBuiltinKind.INTERNAL;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Specialization;
......@@ -41,6 +43,7 @@ import com.oracle.truffle.r.nodes.builtin.RBuiltinNode;
import com.oracle.truffle.r.nodes.unary.CastIntegerNode;
import com.oracle.truffle.r.nodes.unary.CastStringNode;
import com.oracle.truffle.r.nodes.unary.CastToVectorNode;
import com.oracle.truffle.r.runtime.RDeparse;
import com.oracle.truffle.r.runtime.RError;
import com.oracle.truffle.r.runtime.RInternalError;
import com.oracle.truffle.r.runtime.RRuntime;
......@@ -57,12 +60,19 @@ import com.oracle.truffle.r.runtime.context.RContext;
import com.oracle.truffle.r.runtime.data.RComplex;
import com.oracle.truffle.r.runtime.data.RDataFactory;
import com.oracle.truffle.r.runtime.data.RExpression;
import com.oracle.truffle.r.runtime.data.RIntVector;
import com.oracle.truffle.r.runtime.data.RLanguage;
import com.oracle.truffle.r.runtime.data.RNull;
import com.oracle.truffle.r.runtime.data.RSymbol;
import com.oracle.truffle.r.runtime.data.model.RAbstractStringVector;
import com.oracle.truffle.r.runtime.env.REnvironment;
import com.oracle.truffle.r.runtime.nodes.RBaseNode;
import com.oracle.truffle.r.runtime.nodes.RSyntaxCall;
import com.oracle.truffle.r.runtime.nodes.RSyntaxConstant;
import com.oracle.truffle.r.runtime.nodes.RSyntaxElement;
import com.oracle.truffle.r.runtime.nodes.RSyntaxFunction;
import com.oracle.truffle.r.runtime.nodes.RSyntaxLookup;
import com.oracle.truffle.r.runtime.nodes.RSyntaxVisitor;
/**
* Internal component of the {@code parse} base package function.
......@@ -269,5 +279,174 @@ public abstract class Parse extends RBuiltinNode.Arg6 {
setWholeSrcRefAttrNode.execute(exprs, RDataFactory.createIntVector(wholeSrcrefData, RDataFactory.COMPLETE_VECTOR));
setSrcFileAttrNode.execute(exprs, srcFile);
RIntVector parseData = new ParseDataVisitor(exprs).getParseData();
srcFile.safePut("parseData", parseData);
}
/**
* This class aspires to reconstruct the original parse tree through visiting a parsed
* expression(s).
*
* The current implementation does not reconstruct the parse tree completely. Instead, it deals
* with symbols only (i.e. a flat tree is produced), which should suffice for templating
* packages, such as knitr.
*/
static class ParseDataVisitor extends RSyntaxVisitor<Object> {
private final RExpression exprs;
/**
* A list of parse data octets. Every octet corresponds to one term/non-term. The octet is
* composed as follows:
*
* <pre>
* line1 col1 line2 col2 terminal token id parent
* </pre>
*
*/
private final List<Integer> data = new ArrayList<>();
private boolean containsNA = false;
private final List<String> tokens = new ArrayList<>();
private final List<String> text = new ArrayList<>();
private int idCounter = 0;
ParseDataVisitor(RExpression exprs) {
this.exprs = exprs;
}
/**
* This enum mimics the <code>yytokentype</code> enum from <code>src/main/gram.c</code>.
*/
enum TokenType {
SYMBOL(263, true),
SYMBOL_FUNCTION_CALL(296, true),
SPECIAL(304, true);
final int code;
final boolean terminal;
TokenType(int c, boolean term) {
this.code = c;
this.terminal = term;
}
}
private void addOctet(RSyntaxElement element, TokenType tokenType, String txt) {
addOctet(element.getSourceSection().getStartLine(), element.getSourceSection().getStartColumn(), element.getSourceSection().getEndLine(), element.getSourceSection().getEndColumn(),
tokenType, txt);
}
private void addOctet(RSymbol symbol) {
addOctet(RRuntime.INT_NA, RRuntime.INT_NA, RRuntime.INT_NA, RRuntime.INT_NA, TokenType.SYMBOL, symbol.getName());
containsNA = true;
}
private void addOctet(int startLine, int startColumn, int endLine, int endColumn, TokenType tokenType, String txt) {
// TODO: adjust the parentId correctly
int parentId = 0;
data.add(startLine);
data.add(startColumn);
data.add(endLine);
data.add(endColumn);
data.add(tokenType.terminal ? 1 : 0);
data.add(tokenType.code);
data.add(idCounter);
data.add(parentId);
tokens.add(tokenType.name());
text.add(txt);
idCounter++;
}
@TruffleBoundary
RIntVector getParseData() {
int exprLen = exprs.getLength();
for (int i = 0; i < exprLen; i++) {
Object x = exprs.getDataAt(i);
if (x instanceof RLanguage) {
RBaseNode rep = ((RLanguage) x).getRep();
assert rep instanceof RSyntaxElement;
accept((RSyntaxElement) rep);
} else if (x instanceof RSymbol) {
addOctet((RSymbol) x);
} else {
// TODO: primitives
}
}
int[] dataArray = new int[data.size()];
for (int i = 0; i < dataArray.length; i++) {
dataArray[i] = data.get(i);
}
RIntVector parseData = RDataFactory.createIntVector(dataArray, !containsNA);
String[] textArray = new String[text.size()];
for (int i = 0; i < textArray.length; i++) {
textArray[i] = text.get(i);
}
String[] tokensArray = new String[tokens.size()];
for (int i = 0; i < tokensArray.length; i++) {
tokensArray[i] = tokens.get(i);
}
parseData.setAttr("text", RDataFactory.createStringVector(textArray, true));
parseData.setAttr("tokens", RDataFactory.createStringVector(tokensArray, true));
parseData.setClassAttr(RDataFactory.createStringVector("parseData"));
parseData.setDimensions(new int[]{8, idCounter});
return parseData;
}
@Override
protected Object visit(RSyntaxCall element) {
RSyntaxElement lhs = element.getSyntaxLHS();
if (lhs instanceof RSyntaxLookup) {
String symbol = ((RSyntaxLookup) lhs).getIdentifier();
RDeparse.Func func = RDeparse.getFunc(symbol);
if (func == null) {
addOctet(element, TokenType.SYMBOL_FUNCTION_CALL, symbol);
}
}
RSyntaxElement[] args = element.getSyntaxArguments();
for (int i = 0; i < args.length; i++) {
accept(args[i]);
}
return null;
}
@Override
protected Object visit(RSyntaxConstant element) {
// TODO: recognize constants
return null;
}
@Override
protected Object visit(RSyntaxLookup element) {
String symbol = element.getIdentifier();
addOctet(element, TokenType.SYMBOL, symbol);
return null;
}
@Override
protected Object visit(RSyntaxFunction element) {
for (RSyntaxElement arg : element.getSyntaxArgumentDefaults()) {
if (arg != null) {
accept(arg);
}
}
accept(element.getSyntaxBody());
return null;
}
}
}
......@@ -137,7 +137,7 @@ public class RDeparse {
public static final int PREC_NS = 16;
public static final int PREC_SUBSET = 17;
private static class PPInfo {
public static class PPInfo {
public final PP kind;
public final int prec;
public final boolean rightassoc;
......@@ -153,7 +153,7 @@ public class RDeparse {
}
}
private static class Func {
public static class Func {
public final String op;
public final String closeOp;
public final PPInfo info;
......@@ -209,7 +209,7 @@ public class RDeparse {
private static final PPInfo USERBINOP = new PPInfo(PP.BINARY, PREC_PERCENT, false);
private static Func getFunc(String op) {
public static Func getFunc(String op) {
for (Func func : FUNCTAB) {
if (func.op.equals(op)) {
return func;
......
......@@ -47130,6 +47130,43 @@ expression(NULL)
#.Internal(parse(stdin(), c(1,2), c('expr1', 'expr2'), '?', '<weird-text', 'unknown'))
expression(expr1)
 
##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseData#
#if (!any(R.version$engine == "FastR")) { structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L), text = "x", tokens = "SYMBOL", class = "parseData", .Dim = c(8L, 1L)) } else { p <- parse(text = 'x = 1', keep.source = TRUE); attr(p, 'srcfile')$parseData }
[,1]
[1,] 1
[2,] 1
[3,] 1
[4,] 1
[5,] 1
[6,] 263
[7,] 0
[8,] 0
attr(,"text")
[1] "x"
attr(,"tokens")
[1] "SYMBOL"
attr(,"class")
[1] "parseData"
##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseData#
#if (!any(R.version$engine == "FastR")) { structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L, 1L, 5L, 1L, 5L, 1L, 263L, 1L, 0L, 1L, 12L, 1L, 28L, 1L, 296L, 2L, 0L, 1L, 27L, 1L, 27L, 1L, 263L, 3L, 0L, 1L, 31L, 1L, 32L, 1L, 263L, 4L, 0L, 1L, 51L, 1L, 51L, 1L, 263L, 5L, 0L), text = c("x", "x", "rnorm", "z", "f2", "a"), tokens = c("SYMBOL", "SYMBOL", "SYMBOL_FUNCTION_CALL", "SYMBOL", "SYMBOL", "SYMBOL"), class = "parseData", .Dim = c(8L, 6L)) } else { p <- parse(text = 'x = x + 1; rnorm(1, std = z); f2 <- function(a=1) a', keep.source = TRUE); attr(p, 'srcfile')$parseData }
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 1 1 1
[2,] 1 5 12 27 31 51
[3,] 1 1 1 1 1 1
[4,] 1 5 28 27 32 51
[5,] 1 1 1 1 1 1
[6,] 263 263 296 263 263 263
[7,] 0 1 2 3 4 5
[8,] 0 0 0 0 0 0
attr(,"text")
[1] "x" "x" "rnorm" "z" "f2" "a"
attr(,"tokens")
[1] "SYMBOL" "SYMBOL" "SYMBOL_FUNCTION_CALL"
[4] "SYMBOL" "SYMBOL" "SYMBOL"
attr(,"class")
[1] "parseData"
##com.oracle.truffle.r.test.builtins.TestBuiltin_parse.testParseDataFrame#
#eval(parse(text=deparse(data.frame(x=c(1)))))
x
......@@ -4,7 +4,7 @@
* http://www.gnu.org/licenses/gpl-2.0.html
*
* Copyright (c) 2012-2014, Purdue University
* Copyright (c) 2013, 2017, Oracle and/or its affiliates
* Copyright (c) 2013, 2018, Oracle and/or its affiliates
*
* All rights reserved.
*/
......@@ -66,4 +66,13 @@ public class TestBuiltin_parse extends TestBase {
public void testSrcfile() {
assertEval("parse(text='', srcfile=srcfile(system.file('testfile')))");
}
@Test
public void testParseData() {
assertEvalFastR("p <- parse(text = 'x = 1', keep.source = TRUE); attr(p, 'srcfile')$parseData",
"structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L), text = \"x\", tokens = \"SYMBOL\", class = \"parseData\", .Dim = c(8L, 1L))");
assertEvalFastR("p <- parse(text = 'x = x + 1; rnorm(1, std = z); f2 <- function(a=1) a', keep.source = TRUE); attr(p, 'srcfile')$parseData",
"structure(c(1L, 1L, 1L, 1L, 1L, 263L, 0L, 0L, 1L, 5L, 1L, 5L, 1L, 263L, 1L, 0L, 1L, 12L, 1L, 28L, 1L, 296L, 2L, 0L, 1L, 27L, 1L, 27L, 1L, 263L, 3L, 0L, 1L, 31L, 1L, 32L, 1L, 263L, 4L, 0L, 1L, 51L, 1L, 51L, 1L, 263L, 5L, 0L), text = c(\"x\", \"x\", \"rnorm\", \"z\", \"f2\", \"a\"), tokens = c(\"SYMBOL\", \"SYMBOL\", \"SYMBOL_FUNCTION_CALL\", \"SYMBOL\", \"SYMBOL\", \"SYMBOL\"), class = \"parseData\", .Dim = c(8L, 6L))");
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment