Skip to content

Commit

Permalink
Merge pull request #2034 from usethesource/recovery/testing
Browse files Browse the repository at this point in the history
Test support for error recovery
  • Loading branch information
PieterOlivier authored Oct 2, 2024
2 parents 55229cf + e093964 commit ad5f036
Show file tree
Hide file tree
Showing 18 changed files with 1,520 additions and 41 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.dot diff=-astextplain
48 changes: 10 additions & 38 deletions src/org/rascalmpl/library/ParseTree.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ extend Message;
extend List;
import String;
import Node;
import Set;
@synopsis{The Tree data type as produced by the parser.}
@description{
Expand Down Expand Up @@ -807,20 +807,19 @@ str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringCh
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse forest.
}
Tree defaultErrorDisambiguationFilter(t: appl(Production prod, args)) {
Tree result = appl(prod, [defaultErrorDisambiguationFilter(arg) | arg <- args]);
return setKeywordParameters(result, getKeywordParameters(t));
Tree defaultErrorDisambiguationFilter(Tree t) {
return visit(t) {
case a:amb(_) => ambDisambiguation(a)
};
}
Tree defaultErrorDisambiguationFilter(amb(set[Tree] alternatives)) {
private Tree ambDisambiguation(amb(set[Tree] alternatives)) {
// Go depth-first
set[Tree] disambiguatedAlts = { defaultErrorDisambiguationFilter(alt) | Tree alt <- alternatives };
set[Tree] errorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) := alt };
set[Tree] nonErrorTrees = { alt | Tree alt <- disambiguatedAlts, /appl(error(_,_,_), _) !:= alt };
rel[int score, Tree alt] scoredErrorTrees = { <scoreErrors(alt), alt> | Tree alt <- alternatives };
set[Tree] nonErrorTrees = scoredErrorTrees[0];
if (nonErrorTrees == {}) {
return getBestErrorTree(errorTrees);
return (getFirstFrom(scoredErrorTrees) | it.score > c.score ? c : it | c <- scoredErrorTrees).alt;
}
if ({Tree single} := nonErrorTrees) {
Expand All @@ -832,34 +831,7 @@ Tree defaultErrorDisambiguationFilter(amb(set[Tree] alternatives)) {
return amb(nonErrorTrees);
}
private Tree getBestErrorTree(set[Tree] trees) {
Tree best = char(0);
int bestErrorCount = -1;
int bestErrorLength = 0;
for (tree <- trees) {
list[Tree] errors = findAllErrors(tree);
int errorCount = size(errors);
int errorLength = 0;
for (err <- errors) {
errorLength += getSkipped(err).src.length;
}
if (bestErrorCount == -1 || errorCount < bestErrorCount || (errorCount == bestErrorCount && errorLength < bestErrorLength)) {
best = tree;
bestErrorCount = errorCount;
bestErrorLength = errorLength;
}
}
if (bestErrorCount != -1) {
return best;
}
// trees must have been empty
fail;
}
private int scoreErrors(Tree t) = (0 | it + getSkipped(e).src.length | /e:appl(error(_,_,_),_) := t);
// Handle char and cycle nodes
default Tree defaultErrorDisambiguationFilter(Tree t) = t;
19 changes: 19 additions & 0 deletions src/org/rascalmpl/library/lang/c90/examples/hello-world.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

int print(const char *text);

void printHello(char *name) {
print("Hello ");
print(name);
print("!");
}

int main(int argc, char *argv[]) {
char *name;
if (argc > 1) {
name = argv[1];
} else {
name = "World";
}

printHello(name);
}
2 changes: 2 additions & 0 deletions src/org/rascalmpl/library/lang/diff/unified/UnifiedDiff.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
@contributor{Tijs van der Storm - [email protected] (CWI)}
module lang::diff::unified::UnifiedDiff

start syntax DiffFile = Diff;

syntax Diff
= Header old Header new Chunk* chunks
;
Expand Down
39 changes: 39 additions & 0 deletions src/org/rascalmpl/library/lang/diff/unified/examples/example.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
--- a/src/org/rascalmpl/parser/uptr/UPTRNodeFactory.java
+++ b/src/org/rascalmpl/parser/uptr/UPTRNodeFactory.java
@@ -1,6 +1,7 @@
package org.rascalmpl.parser.uptr;

import java.net.URI;
+import java.util.Arrays;
import java.util.IdentityHashMap;
import java.util.Map;

@@ -21,7 +22,9 @@ import org.rascalmpl.values.parsetrees.ProductionAdapter;
import org.rascalmpl.values.parsetrees.TreeAdapter;

public class UPTRNodeFactory implements INodeConstructorFactory<ITree, ISourceLocation>{
- private final static RascalValueFactory VF = (RascalValueFactory) ValueFactoryFactory.getValueFactory();
+ private static final RascalValueFactory VF = (RascalValueFactory) ValueFactoryFactory.getValueFactory();
+ private static final IConstructor SKIPPED = VF.constructor(RascalValueFactory.Production_Skipped, VF.constructor(RascalValueFactory.Symbol_IterStar, VF.constructor(RascalValueFactory.Symbol_CharClass, VF.list(VF.constructor(RascalValueFactory.CharRange_Range, VF.integer(1), VF.integer(Character.MAX_CODE_POINT))))));
+
private boolean allowAmb;

public UPTRNodeFactory(boolean allowAmbiguity){
@@ -141,7 +144,14 @@ public class UPTRNodeFactory implements INodeConstructorFactory<ITree, ISourceLo
}

@Override
- public ITree createRecoveryNode(int[] characters) {
- throw new UnsupportedOperationException();
- }
+ public ITree createSkippedNode(int[] characters) {
+ return createLiteralNode(characters, SKIPPED);
+ }
+
+ public ITree createErrorNode(ArrayList<ITree> children, Object production) {
+ IConstructor prod = (IConstructor) production;
+ IConstructor errorProd = VF.constructor(RascalValueFactory.Production_Error, prod.get(0), prod, VF.integer(children.size()-1));
+ return buildAppl(children, errorProd);
+ }
+
}
65 changes: 65 additions & 0 deletions src/org/rascalmpl/library/lang/dot/examples/parser-state.dot
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
digraph Parser {
"Parser"["label"="Parser\nInput: \"void f(){if(1){}}\"\nLocation: 0 ('v')\nStep 5: Reducing terminals"];
"todo-1"["label"="<0> 0", "shape"="record"];
"-2"["label"="Epsilon: \n.0@0 ,matchable,end\n?\nin: 'lex(\"LAYOUT\") -> regular(\iter-star(lex(\"LAYOUT\")))'"];
"7226"["label"="List: 7226\n.0@0 ,expandable,end\n7226\nin: 'LAYOUTLIST -> \iter-star(lex(\"LAYOUT\"))'"];
"12860"["label"="NonTerminal: LAYOUTLIST\n.1@0 \nlayouts_LAYOUTLIST\nin: Tags Visibility Signature '=' Expression 'when' 12878 ';'"];
"-1"["label"="NonTerminal: FunctionDeclaration\n.0@-1 \nFunctionDeclaration"];
"12860" -> "-1";
"7226" -> "12860";
"-2" -> "7226";
"todo-1":"0":sw -> "-2"["label"="Stack"];
"46484886"["shape"="octagon", "label"="Epsilon"];
"todo-1":"0":se -> "46484886"["label"="Node"];
"todoLists":"1" -> "todo-1";
"todoLists"["label"="<0> 0 | <1> 1 | <2> 2 | <3> 3 | <4> 4 | <5> 5 | <6> 6 | <7> 7 | <8> 8 | <9> 9 | <10> 10 | <11> 11 | <12> 12 | <13> 13 | <14> 14 | <15> 15", "shape"="record"];
"Parser" -> "todoLists"["label"="todo lists"];
"stacksToExpand"["label"="", "shape"="record"];
"Parser" -> "stacksToExpand"["label"="stacks to expand"];
"terminalsToReduce"["label"="<0> 0", "shape"="record", "color"="red"];
"terminalsToReduce":"0":sw -> "-2"["label"="Stack"];
"terminalsToReduce":"0":se -> "46484886"["label"="Node"];
"Parser" -> "terminalsToReduce"["label"="terminals to reduce"];
"nonTerminalsToReduce"["label"="", "shape"="record"];
"Parser" -> "nonTerminalsToReduce"["label"="non-terminals to reduce"];
"122"["label"="NonTerminal: Tag\n.0@0 ,end\nTag\nin: 'sort(\"Tag\") -> regular(\iter-star-seps(sort(\"Tag\"),[layouts(\"LAYOUTLIST\")]))'"];
"124"["label"="SeparatedList: 124\n.0@0 ,expandable,end\n124\nin: 'default -> tags'"];
"12858"["label"="NonTerminal: Tags\n.0@0 \nTags\nin: Tags Visibility Signature '=' Expression 'when' 12878 ';'"];
"12858" -> "-1";
"124" -> "12858";
"122" -> "124";
"unexpandableNodes":"0" -> "122";
"13120"["label"="NonTerminal: Comment\n.0@0 ,end\nComment\nin: 'LAYOUT -> Comment'"];
"7221"["label"="NonTerminal: LAYOUT\n.0@0 ,end\nLAYOUT\nin: 'lex(\"LAYOUT\") -> regular(\iter-star(lex(\"LAYOUT\")))'"];
"7221" -> "7226";
"13120" -> "7221";
"unexpandableNodes":"1" -> "13120";
"unexpandableNodes"["label"="<0> 0 | <1> 1", "shape"="record"];
"12824"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"FunctionDeclaration\")' ':' 12828 0"];
"unmatchableLeafNodes":"0" -> "12824";
"128"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"Tags\")' ':' 132 0"];
"unmatchableLeafNodes":"1" -> "128";
"2043"["label"="Literal: \n.0@-1 ,matchable\n'@'\nin: '@' Name '=' Expression"];
"unmatchableLeafNodes":"2" -> "2043";
"2065"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 '\iter-star(sort(\"Tag\"))' ':' 2069 0"];
"unmatchableLeafNodes":"3" -> "2065";
"13122"["label"="Char: \n.0@-1 ,matchable,end\n9-13,32,133,160,5760,6158,8192-8202,8232-8233,8239,8287,12288\nin: 'LAYOUT -> [range(9,13),range(32,32),range(133,133),range(160,160),range(5760,5760),range(6158,6158),range(8192,8202),range(8232,8233),range(8239,8239),range(8287,8287),range(12288,12288)]'"];
"unmatchableLeafNodes":"4" -> "13122";
"13125"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 '\iter-star(sort(\"LAYOUT\"))' ':' 13129 0"];
"unmatchableLeafNodes":"5" -> "13125";
"7373"["label"="Literal: \n.0@-1 ,matchable\n'/*'\nin: '/*' 7379 '*/'"];
"unmatchableLeafNodes":"6" -> "7373";
"7382"["label"="Literal: \n.0@-1 ,matchable\n'//'\nin: '//' 7386"];
"unmatchableLeafNodes":"7" -> "7382";
"7389"["label"="Char: \n.0@-1 ,matchable\n0\nin: 0 'sort(\"Comment\")' ':' 7393 0"];
"unmatchableLeafNodes":"8" -> "7389";
"unmatchableLeafNodes"["label"="<0> 0 | <1> 1 | <2> 2 | <3> 3 | <4> 4 | <5> 5 | <6> 6 | <7> 7 | <8> 8", "shape"="record"];
"unmatchableMidProductionNodes"["shape"="record", "label"=""];
"filteredNodes"["label"="", "shape"="record"];
"error"["label"="Errors"];
"Parser" -> "error"["label"="error tracking"];
"error" -> "unexpandableNodes"["label"="unexpandable"];
"error" -> "unmatchableLeafNodes"["label"="unmatchable leafs"];
"error" -> "unmatchableMidProductionNodes"["label"="unmatchable mid-prod"];
"error" -> "filteredNodes"["label"="filtered"];
}
2 changes: 1 addition & 1 deletion src/org/rascalmpl/library/lang/dot/syntax/Dot.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ syntax NodeId
| Id Port
;

syntax Port = ":" Id Id?
syntax Port = ":" Id (":" Id)?
// | ":" Id
// | ":" CompassPt
;
Expand Down
18 changes: 18 additions & 0 deletions src/org/rascalmpl/library/lang/pico/examples/fac.pico
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
begin declare input : natural,
output : natural,
repnr : natural,
rep : natural,
s1 : string,
s2 : string;
input := 14;
output := 1;
while input - 1 do
rep := output;
repnr := input;
while repnr - 1 do
output := output + rep;
repnr := repnr - 1
od;
input := input - 1
od
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
module lang::rascal::tests::concrete::recovery::ErrorRecoveryBenchmark

import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;

import IO;
import util::Benchmark;
import String;
import List;

void runTestC() { testRecoveryC(); }
void runTestDiff() { testRecoveryDiff(); }
void runTestDot() { testRecoveryDot(); }
void runTestJava() { testRecoveryJava(); }
void runTestJson() { testRecoveryJson(); }
void runTestPico() { testRecoveryPico(); }
void runTestRascal() { testRecoveryRascal(); }

FileStats testRecoveryC() = testErrorRecovery(|std:///lang/c90/syntax/C.rsc|, "TranslationUnit", |std:///lang/c90/examples/hello-world.c|);
FileStats testRecoveryDiff() = testErrorRecovery(|std:///lang/diff/unified/UnifiedDiff.rsc|, "DiffFile", |std:///lang/diff/unified/examples/example.diff|);
FileStats testRecoveryDot() = testErrorRecovery(|std:///lang/dot/syntax/Dot.rsc|, "DOT", |std:///lang/dot/examples/parser-state.dot|);
FileStats testRecoveryJava() = testErrorRecovery(|std:///lang/java/syntax/Java15.rsc|, "CompilationUnit", zippedFile("m3/snakes-and-ladders-project-source.zip", "src/snakes/LastSquare.java"));
FileStats testRecoveryJson() = testErrorRecovery(|std:///lang/json/syntax/JSON.rsc|, "JSONText", |std:///lang/json/examples/ex01.json|);
FileStats testRecoveryPico() = testErrorRecovery(|std:///lang/pico/syntax/Main.rsc|, "Program", |std:///lang/pico/examples/fac.pico|);
FileStats testRecoveryRascal() = testErrorRecovery(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///lang/rascal/vis/ImportGraph.rsc|);

void runLanguageTests() {
testRecoveryC();
testRecoveryDiff();
testRecoveryDot();
testRecoveryJava();
testRecoveryJson();
testRecoveryPico();
testRecoveryRascal();
}

void runRascalBatchTest(int maxFiles=1000, int maxFileSize=4000) {
int startTime = realTime();
TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, maxFileSize);
int duration = realTime() - startTime;
println();
println("========================im========================================");
println("Rascal batch test done in <duration/1000> seconds, total result:");
printStats(stats);
}
int main(list[str] args) {
int maxFiles = 1000;
int maxFileSize = 4000;
if (size(args) == 2) {
maxFiles = toInt(args[0]);
maxFileSize = toInt(args[1]);
} else if (size(args) != 0) {
println("Usage: ErrorRecoveryBenchmark <max-files> <max-file-size>");
}
runRascalBatchTest(maxFiles=maxFiles, maxFileSize=maxFileSize);
return 0;
}
Loading

0 comments on commit ad5f036

Please sign in to comment.