diff --git a/coverage/pom.xml b/coverage/pom.xml index 9ab5110b7..b0cd31f17 100644 --- a/coverage/pom.xml +++ b/coverage/pom.xml @@ -6,7 +6,7 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 coverage @@ -22,32 +22,32 @@ fr.insee.trevas vtl-engine - 1.7.0 + 1.8.0 fr.insee.trevas vtl-jackson - 1.7.0 + 1.8.0 fr.insee.trevas vtl-jdbc - 1.7.0 + 1.8.0 fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 fr.insee.trevas vtl-parser - 1.7.0 + 1.8.0 fr.insee.trevas vtl-spark - 1.7.0 + 1.8.0 diff --git a/docs/yarn.lock b/docs/yarn.lock index 1b4781105..557f310dd 100644 --- a/docs/yarn.lock +++ b/docs/yarn.lock @@ -4711,9 +4711,9 @@ elkjs@^0.9.0: integrity sha512-f/ZeWvW/BCXbhGEf1Ujp29EASo/lk1FDnETgNKwJrsVvGZhUWCZyg3xLJjAsxfOmt8KjswHmI5EwCQcPMpOYhQ== elliptic@^6.5.3, elliptic@^6.5.5: - version "6.5.7" - resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.5.7.tgz#8ec4da2cb2939926a1b9a73619d768207e647c8b" - integrity sha512-ESVCtTwiA+XhY3wyh24QqRGBoP3rEdDUl3EDUUo9tft074fi19IrdpH7hLCMMP3CIj7jb3W96rn8lt/BqIlt5Q== + version "6.6.0" + resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.6.0.tgz#5919ec723286c1edf28685aa89261d4761afa210" + integrity sha512-dpwoQcLc/2WLQvJvLRHKZ+f9FgOdjnq11rurqwekGQygGPsYSK29OMMD2WalatiqQ+XGFDglTNixpPfI+lpaAA== dependencies: bn.js "^4.11.9" brorand "^1.1.0" @@ -6164,9 +6164,9 @@ http-parser-js@>=0.5.1: integrity sha512-SGeBX54F94Wgu5RH3X5jsDtf4eHyRogWX1XGT3b4HuW3tQPM4AaBzoUji/4AAJNXCEOWZ5O0DgZmJw1947gD5Q== http-proxy-middleware@^2.0.3: - version "2.0.6" - resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz#e1a4dd6979572c7ab5a4e4b55095d1f32a74963f" - integrity sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw== + version "2.0.7" + resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz#915f236d92ae98ef48278a95dedf17e991936ec6" + integrity sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA== dependencies: "@types/http-proxy" "^1.17.8" http-proxy "^1.18.1" @@ -7321,9 +7321,9 @@ merge2@^1.3.0, merge2@^1.4.1: integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg== mermaid@^10.4.0: - version "10.9.2" - resolved "https://registry.yarnpkg.com/mermaid/-/mermaid-10.9.2.tgz#108fe98060e6fba6bc826e5b454674aa2d32b817" - integrity sha512-UkZyMSuIYcI1Q0H+2pv/5CiY84sOwQ2XlKoDZMl9Y/MtrLEtxQtyA6LWGkMxnZxj0dJqI+7nw51bYjNnrbdFsQ== + version "10.9.3" + resolved "https://registry.yarnpkg.com/mermaid/-/mermaid-10.9.3.tgz#90bc6f15c33dbe5d9507fed31592cc0d88fee9f7" + integrity sha512-V80X1isSEvAewIL3xhmz/rVmc27CVljcsbWxkxlWJWY/1kQa4XOABqpDl2qQLGKzpKm6WbTfUEKImBlUfFYArw== dependencies: "@braintree/sanitize-url" "^6.0.1" "@types/d3-scale" "^4.0.3" diff --git a/pom.xml b/pom.xml index 98cef9395..80cdf0b4e 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ fr.insee.trevas trevas-parent pom - 1.7.0 + 1.8.0 vtl-parser vtl-model diff --git a/vtl-csv/pom.xml b/vtl-csv/pom.xml index 9c0af0afc..411d0e894 100644 --- a/vtl-csv/pom.xml +++ b/vtl-csv/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-csv VTL CSV CSV module for the VTL model - 1.7.0 + 1.8.0 @@ -25,7 +25,7 @@ fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 net.sf.supercsv diff --git a/vtl-engine/pom.xml b/vtl-engine/pom.xml index dc429e899..a50226771 100644 --- a/vtl-engine/pom.xml +++ b/vtl-engine/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-engine VTL Engine VTL engine framework for Trevas - 1.7.0 + 1.8.0 @@ -25,13 +25,13 @@ fr.insee.trevas vtl-parser - 1.7.0 + 1.8.0 compile fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 compile diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/VtlNativeMethods.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/VtlNativeMethods.java index cc9abe1f1..aa348012a 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/VtlNativeMethods.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/VtlNativeMethods.java @@ -47,10 +47,6 @@ public class VtlNativeMethods { Fun.toMethod(ConditionalVisitor::ifThenElse), Fun.toMethod(ConditionalVisitor::ifThenElse), Fun.toMethod(ConditionalVisitor::ifThenElse), - Fun.toMethod(ConditionalVisitor::caseFn), - Fun.toMethod(ConditionalVisitor::caseFn), - Fun.toMethod(ConditionalVisitor::caseFn), - Fun.toMethod(ConditionalVisitor::caseFn), Fun.toMethod(ConditionalVisitor::nvl), Fun.toMethod(ConditionalVisitor::nvl), Fun.toMethod(ConditionalVisitor::nvl), diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/ConditionalVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/ConditionalVisitor.java index 02165ec74..2eca83cb2 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/ConditionalVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/ConditionalVisitor.java @@ -10,14 +10,10 @@ import fr.insee.vtl.parser.VtlBaseVisitor; import fr.insee.vtl.parser.VtlParser; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.*; import java.util.stream.Collectors; import static fr.insee.vtl.engine.VtlScriptEngine.fromContext; -import static fr.insee.vtl.engine.utils.TypeChecking.assertBoolean; import static fr.insee.vtl.engine.utils.TypeChecking.hasSameTypeOrNull; /** @@ -68,34 +64,6 @@ public static Boolean ifThenElse(Boolean condition, Boolean thenExpr, Boolean el return condition ? thenExpr : elseExpr; } - public static Long caseFn(Boolean condition, Long thenExpr) { - if (condition == null) { - return null; - } - return condition ? thenExpr : null; - } - - public static Double caseFn(Boolean condition, Double thenExpr) { - if (condition == null) { - return null; - } - return condition ? thenExpr : null; - } - - public static String caseFn(Boolean condition, String thenExpr) { - if (condition == null) { - return null; - } - return condition ? thenExpr : null; - } - - public static Boolean caseFn(Boolean condition, Boolean thenExpr) { - if (condition == null) { - return null; - } - return condition ? thenExpr : null; - } - public static Long nvl(Long value, Long defaultValue) { return value == null ? defaultValue : value; } @@ -149,92 +117,59 @@ public ResolvableExpression visitIfExpr(VtlParser.IfExprContext ctx) { */ @Override public ResolvableExpression visitCaseExpr(VtlParser.CaseExprContext ctx) { - Positioned pos = fromContext(ctx); - List exprs = ctx.expr(); - List whenExprs = new ArrayList<>(); - List thenExprs = new ArrayList<>(); - for (int i = 0; i < exprs.size() - 1; i = i + 2) { - whenExprs.add(exprs.get(i)); - thenExprs.add(exprs.get(i + 1)); - } - List whenExpressions = whenExprs.stream() - .map(e -> assertBoolean(exprVisitor.visit(e), e)) - .collect(Collectors.toList()); - List thenExpressions = thenExprs.stream() - .map(exprVisitor::visit) - .collect(Collectors.toList()); - ResolvableExpression elseExpression = exprVisitor.visit(exprs.get(exprs.size() - 1)); - List forTypeCheck = (new ArrayList<>(thenExpressions)); - forTypeCheck.add(elseExpression); - // TODO: handle better the default element position - if (!hasSameTypeOrNull(forTypeCheck)) { - try { - throw new InvalidTypeException( - forTypeCheck.get(0).getClass(), - Boolean.class, - fromContext(ctx.expr(0)) - ); - } catch (InvalidTypeException e) { - throw new RuntimeException(e); + try { + Positioned pos = fromContext(ctx); + List exprs = ctx.expr(); + List whenExprs = new ArrayList<>(); + List thenExprs = new ArrayList<>(); + for (int i = 0; i < exprs.size() - 1; i = i + 2) { + whenExprs.add(exprs.get(i)); + thenExprs.add(exprs.get(i + 1)); + } + List whenExpressions = whenExprs.stream() + .map(exprVisitor::visit) + .collect(Collectors.toList()); + List thenExpressions = thenExprs.stream() + .map(exprVisitor::visit) + .collect(Collectors.toList()); + ResolvableExpression elseExpression = exprVisitor.visit(exprs.get(exprs.size() - 1)); + List forTypeCheck = (new ArrayList<>(thenExpressions)); + forTypeCheck.add(elseExpression); + // TODO: handle better the default element position + if (!hasSameTypeOrNull(forTypeCheck)) { + try { + throw new InvalidTypeException( + forTypeCheck.get(0).getClass(), + Boolean.class, + fromContext(ctx.expr(0)) + ); + } catch (InvalidTypeException e) { + throw new RuntimeException(e); + } } - } - - Class outputType = elseExpression.getType(); - if (outputType.equals(String.class)) { - return ResolvableExpression.withType(String.class) - .withPosition(pos) - .using(context -> { - for (int i = 0; i < whenExprs.size(); i++) { - Boolean condition = (Boolean) whenExpressions.get(i).resolve(context); - if (condition) { - return (String) (new CastExpression(pos, thenExpressions.get(i), outputType)).resolve(context); - } - } - return (String) (new CastExpression(pos, elseExpression, outputType)).resolve(context); - }); - } - if (outputType.equals(Double.class)) { - return ResolvableExpression.withType(Double.class) - .withPosition(pos) - .using(context -> { - for (int i = 0; i < whenExprs.size(); i++) { - Boolean condition = (Boolean) whenExpressions.get(i).resolve(context); - if (condition) { - return (Double) (new CastExpression(pos, thenExpressions.get(i), outputType)).resolve(context); - } - } - return (Double) (new CastExpression(pos, elseExpression, outputType)).resolve(context); - }); + Class outputType = elseExpression.getType(); + return new CastExpression(pos, caseToIfIt(whenExpressions.listIterator(), thenExpressions.listIterator(), elseExpression), outputType); + } catch (VtlScriptException e) { + throw new VtlRuntimeException(e); } - if (outputType.equals(Long.class)) { - return ResolvableExpression.withType(Long.class) - .withPosition(pos) - .using(context -> { - for (int i = 0; i < whenExprs.size(); i++) { - Boolean condition = (Boolean) whenExpressions.get(i).resolve(context); - if (condition) { - return (Long) (new CastExpression(pos, thenExpressions.get(i), outputType)).resolve(context); - } - } - return (Long) (new CastExpression(pos, elseExpression, outputType)).resolve(context); - }); + } + + private ResolvableExpression caseToIfIt(ListIterator whenExpr, ListIterator thenExpr, ResolvableExpression elseExpression) throws VtlScriptException { + if (!whenExpr.hasNext() || !thenExpr.hasNext()) { + return elseExpression; } - if (outputType.equals(Boolean.class)) { - return ResolvableExpression.withType(Boolean.class) - .withPosition(pos) - .using(context -> { - for (int i = 0; i < whenExprs.size(); i++) { - Boolean condition = (Boolean) whenExpressions.get(i).resolve(context); - if (condition) { - return (Boolean) (new CastExpression(pos, thenExpressions.get(i), outputType)).resolve(context); - } - } - return (Boolean) (new CastExpression(pos, elseExpression, outputType)).resolve(context); - }); - } else return null; + + ResolvableExpression nextWhen = whenExpr.next(); + + return genericFunctionsVisitor.invokeFunction("ifThenElse", Java8Helpers.listOf( + nextWhen, + thenExpr.next(), + caseToIfIt(whenExpr, thenExpr, elseExpression) + ), nextWhen); } + /** * Visits nvl expressions. * diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/functions/GenericFunctionsVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/functions/GenericFunctionsVisitor.java index f89b5bee0..564d9ade2 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/functions/GenericFunctionsVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/expression/functions/GenericFunctionsVisitor.java @@ -7,10 +7,10 @@ import fr.insee.vtl.engine.expressions.CastExpression; import fr.insee.vtl.engine.expressions.ComponentExpression; import fr.insee.vtl.engine.expressions.FunctionExpression; -import fr.insee.vtl.model.utils.Java8Helpers; import fr.insee.vtl.engine.visitors.expression.ExpressionVisitor; import fr.insee.vtl.model.*; import fr.insee.vtl.model.exceptions.VtlScriptException; +import fr.insee.vtl.model.utils.Java8Helpers; import fr.insee.vtl.parser.VtlBaseVisitor; import fr.insee.vtl.parser.VtlParser; import org.antlr.v4.runtime.Token; @@ -169,7 +169,8 @@ private DatasetExpression invokeFunctionOnDataset(String funcName, List "arg" + e.hashCode(), e -> e)); if (measureNames.size() != 1) { throw new VtlRuntimeException( - new InvalidArgumentException("mono-measure datasets don't contain same measures (number or names)", position) + new InvalidArgumentException("Variables in the mono-measure datasets are not named the same: " + + measureNames + " found", position) ); } DatasetExpression ds = proc.executeInnerJoin(dsExprs); diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java index a53c5e12e..18776c802 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/expression/ConditionalExprTest.java @@ -2,8 +2,8 @@ import fr.insee.vtl.engine.exceptions.FunctionNotFoundException; import fr.insee.vtl.engine.samples.DatasetSamples; -import fr.insee.vtl.model.utils.Java8Helpers; import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.utils.Java8Helpers; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -92,6 +92,15 @@ public void testCaseExpr() throws ScriptException { Java8Helpers.mapOf("id", "Franck", "c", 1L) ); assertThat(((Dataset) res1).getDataStructure().get("c").getType()).isEqualTo(Long.class); + engine.eval("ds1 := ds_1[keep id, long1][rename long1 to bool_var];" + + "ds2 := ds_2[keep id, long1][rename long1 to bool_var]; " + + "res_ds <- case when ds1 < 30 then ds1 else ds2;"); + Object res_ds = engine.getContext().getAttribute("res_ds"); + assertThat(((Dataset) res_ds).getDataAsMap()).containsExactlyInAnyOrder( + Java8Helpers.mapOf("id", "Hadrien", "bool_var", 10L), + Java8Helpers.mapOf("id", "Nico", "bool_var", 20L), + Java8Helpers.mapOf("id", "Franck", "bool_var", 100L) + ); } @Test diff --git a/vtl-jackson/pom.xml b/vtl-jackson/pom.xml index 700f79375..bb44f6c2a 100644 --- a/vtl-jackson/pom.xml +++ b/vtl-jackson/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-jackson VTL Jackson Jackson module for the VTL model - 1.7.0 + 1.8.0 @@ -25,7 +25,7 @@ fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 compile diff --git a/vtl-jdbc/pom.xml b/vtl-jdbc/pom.xml index 596b66cd6..f64365aa3 100644 --- a/vtl-jdbc/pom.xml +++ b/vtl-jdbc/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-jdbc VTL JDBC Dataset wrapper around JDBC result set. Useful to expose SQL tables as VTL datasets - 1.7.0 + 1.8.0 @@ -25,14 +25,14 @@ fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 compile fr.insee.trevas vtl-engine - 1.7.0 + 1.8.0 test diff --git a/vtl-model/pom.xml b/vtl-model/pom.xml index 995da2e1c..ac1a0dd26 100644 --- a/vtl-model/pom.xml +++ b/vtl-model/pom.xml @@ -7,7 +7,7 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 @@ -21,7 +21,7 @@ vtl-model VTL Model Model describing VTL expressions and bindings - 1.7.0 + 1.8.0 diff --git a/vtl-parser/pom.xml b/vtl-parser/pom.xml index 2bbcc55cf..38ce8531a 100644 --- a/vtl-parser/pom.xml +++ b/vtl-parser/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-parser VTL Parser Artifacts generated by Antlr from the VTL grammar files - 1.7.0 + 1.8.0 diff --git a/vtl-prov/pom.xml b/vtl-prov/pom.xml index 6df502fb8..9af676e4b 100644 --- a/vtl-prov/pom.xml +++ b/vtl-prov/pom.xml @@ -7,14 +7,14 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 VTL Provenance Provenance static analysis vtl-prov - 1.7.0 + 1.8.0 @@ -32,7 +32,22 @@ fr.insee.trevas vtl-parser - 1.7.0 + 1.8.0 + + + fr.insee.trevas + vtl-engine + 1.8.0 + + + fr.insee.trevas + vtl-spark + 1.8.0 + + + fr.insee.trevas + vtl-model + 1.8.0 @@ -45,4 +60,17 @@ + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.3.0 + + --add-exports java.base/sun.nio.ch=ALL-UNNAMED + + + + + \ No newline at end of file diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java index c44fdc8e5..f4c42df53 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java @@ -1,5 +1,6 @@ package fr.insee.vtl.prov; +import fr.insee.vtl.model.Dataset; import fr.insee.vtl.parser.VtlBaseListener; import fr.insee.vtl.parser.VtlLexer; import fr.insee.vtl.parser.VtlParser; @@ -7,11 +8,15 @@ import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.prov.ProgramStep; import fr.insee.vtl.prov.prov.VariableInstance; +import fr.insee.vtl.prov.utils.ProvenanceUtils; import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.tree.ParseTreeWalker; -import java.util.Set; +import javax.script.ScriptEngine; +import javax.script.ScriptException; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; /** * ANTLR Listener that create provenance objects. @@ -26,6 +31,22 @@ public class ProvenanceListener extends VtlBaseListener { private String currentComponentID; + private String currentDataframeID; + + private int stepIndex = 1; + + private boolean rootAssignment = true; + + // Map of label/UUID id + private final Map availableDataframeUUID = new HashMap<>(); + + private Map currentAvailableDataframeUUID = new HashMap<>(); + + // Map of label/UUID id + private final Map availableVariableUUID = new HashMap<>(); + + private Map currentAvailableVariableUUID = new HashMap<>(); + public ProvenanceListener(String id, String programName) { program.setId(id); program.setLabel(programName); @@ -44,11 +65,14 @@ public void enterStart(VtlParser.StartContext ctx) { @Override public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { - String id = getText(ctx.varID()); + String label = getText(ctx.varID()); String sourceCode = getText(ctx); - currentProgramStep = id; - ProgramStep programStep = new ProgramStep(id, id, sourceCode); - DataframeInstance df = new DataframeInstance(id, id); + currentProgramStep = label; + ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex); + stepIndex++; + String dfId = UUID.randomUUID().toString(); + currentAvailableDataframeUUID.put(label, dfId); + DataframeInstance df = new DataframeInstance(dfId, label); programStep.setProducedDataframe(df); program.getProgramSteps().add(programStep); } @@ -56,15 +80,21 @@ public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { @Override public void exitTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { currentProgramStep = null; + availableDataframeUUID.putAll(currentAvailableDataframeUUID); + currentAvailableDataframeUUID = new HashMap<>(); + rootAssignment = true; } @Override public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) { - String id = getText(ctx.varID()); + String label = getText(ctx.varID()); String sourceCode = getText(ctx); - currentProgramStep = id; - ProgramStep programStep = new ProgramStep(id, id, sourceCode); - DataframeInstance df = new DataframeInstance(id, id); + currentProgramStep = label; + ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex); + stepIndex++; + String dfId = UUID.randomUUID().toString(); + currentAvailableDataframeUUID.put(label, dfId); + DataframeInstance df = new DataframeInstance(dfId, label); programStep.setProducedDataframe(df); program.getProgramSteps().add(programStep); } @@ -72,23 +102,33 @@ public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) { @Override public void exitPersistAssignment(VtlParser.PersistAssignmentContext ctx) { currentProgramStep = null; + availableDataframeUUID.putAll(currentAvailableDataframeUUID); + currentAvailableDataframeUUID = new HashMap<>(); + rootAssignment = true; } @Override public void enterVarID(VtlParser.VarIDContext ctx) { - String id = ctx.IDENTIFIER().getText(); - if (!id.equals(currentProgramStep)) { - ProgramStep programStep = program.getProgramStepById(currentProgramStep); + String label = ctx.IDENTIFIER().getText(); + if (!rootAssignment) { + ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep); if (!isInDatasetClause) { Set consumedDataframe = programStep.getConsumedDataframe(); - DataframeInstance df = new DataframeInstance(id, id); + String dfId = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label); + DataframeInstance df = new DataframeInstance(dfId, label); consumedDataframe.add(df); + // Certainly don't need to reset? To check! + currentDataframeID = label; } if (isInDatasetClause && null != currentComponentID) { Set usedVariables = programStep.getUsedVariables(); - VariableInstance v = new VariableInstance(id, id); + String varUUID = ProvenanceUtils.getOrBuildUUID(availableVariableUUID, currentDataframeID + "|" + label); + VariableInstance v = new VariableInstance(varUUID, label); + v.setParentDataframe(currentDataframeID); usedVariables.add(v); } + } else { + rootAssignment = false; } } @@ -104,11 +144,18 @@ public void exitDatasetClause(VtlParser.DatasetClauseContext ctx) { @Override public void enterComponentID(VtlParser.ComponentIDContext ctx) { - String id = ctx.getText(); - ProgramStep programStep = program.getProgramStepById(currentProgramStep); + String label = ctx.getText(); + ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep); Set assignedVariables = programStep.getAssignedVariables(); - VariableInstance v = new VariableInstance(id, id); + String variableUUID = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label); + VariableInstance v = new VariableInstance(variableUUID, label); assignedVariables.add(v); + currentAvailableVariableUUID.put(currentDataframeID + "|" + label, variableUUID); + } + + @Override + public void exitComponentID(VtlParser.ComponentIDContext ctx) { + System.out.println(ctx.IDENTIFIER()); } @Override @@ -119,6 +166,8 @@ public void enterCalcClauseItem(VtlParser.CalcClauseItemContext ctx) { @Override public void exitCalcClauseItem(VtlParser.CalcClauseItemContext ctx) { currentComponentID = null; + availableVariableUUID.putAll(currentAvailableVariableUUID); + currentAvailableVariableUUID = new HashMap<>(); } @Override @@ -148,4 +197,63 @@ public static Program run(String expr, String id, String programName) { return provenanceListener.getProgram(); } + public static Program runWithBindings(ScriptEngine engine, String expr, String id, String programName) { + Program program = run(expr, id, programName); + // 0 check if input dataset are empty? + // Keep already handled dataset + List dsHandled = new ArrayList<>(); + // Split script to loop over program steps and run them + AtomicInteger index = new AtomicInteger(1); + Arrays.stream(expr.split(";")) + .map(e -> e + ";") + .forEach(stepScript -> { + int i = index.getAndIncrement(); + // 1 - Handle input dataset + ProgramStep step = program.getProgramStepByIndex(i); + Set consumedDataframe = step.getConsumedDataframe(); + consumedDataframe.forEach(d -> { + if (!dsHandled.contains(d.getLabel())) { + Dataset ds = (Dataset) engine.getContext().getAttribute(d.getLabel()); + ds.getDataStructure().values().forEach(c -> { + VariableInstance variableInstance = step.getUsedVariables() + .stream() + .filter(v -> + v.getParentDataframe().equals(d.getLabel()) && + v.getLabel().equals(c.getName())) + .findFirst() + .orElse(new VariableInstance(c.getName())); + variableInstance.setRole(c.getRole()); + variableInstance.setType(c.getType()); + d.getHasVariableInstances().add(variableInstance); + }); + } + }); + try { + engine.eval(stepScript); + } catch (ScriptException e) { + throw new RuntimeException(e); + } + // Improve built variables attributes + DataframeInstance producedDataframe = step.getProducedDataframe(); + Dataset ds = (Dataset) engine.getContext().getAttribute(producedDataframe.getLabel()); + + ds.getDataStructure().values().forEach(c -> { + VariableInstance variableInstance = step.getAssignedVariables() + .stream() + .filter(v -> v.getLabel().equals(c.getName())) + .findFirst() + // TODO: refine variable detection in usedVariable + .orElse(new VariableInstance(c.getName())); + variableInstance.setRole(c.getRole()); + variableInstance.setType(c.getType()); + producedDataframe.getHasVariableInstances().add(variableInstance); + }); + dsHandled.add((producedDataframe.getLabel())); + // Correct usedVariables ID checking ds/var of last assignment + }); + + + return program; + } + } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java index 2860dc6e4..c25c4218b 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java @@ -1,9 +1,14 @@ package fr.insee.vtl.prov.prov; +import java.util.HashSet; +import java.util.Set; + public class DataframeInstance { String id; String label; + Set hasVariableInstances = new HashSet<>(); + public DataframeInstance(String id, String label) { this.id = id; this.label = label; @@ -24,4 +29,14 @@ public String getLabel() { public void setLabel(String label) { this.label = label; } + + public Set getHasVariableInstances() { + return hasVariableInstances; + } + + public void setHasVariableInstances(Set hasVariableInstances) { + this.hasVariableInstances = hasVariableInstances; + } + + } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java index ad11c7204..50a7571b5 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java @@ -3,12 +3,16 @@ import java.util.HashSet; import java.util.Set; +/* Filled thanks to listener, except for dataframInstances */ public class Program { String id; String label; Set programSteps = new HashSet<>(); + /* Provided running preview mode */ + Set dataframeInstances = new HashSet<>(); + String sourceCode; public Program() { @@ -43,6 +47,14 @@ public void setProgramSteps(Set programSteps) { this.programSteps = programSteps; } + public Set getDataframeInstances() { + return dataframeInstances; + } + + public void setDataframeInstances(Set dataframeInstances) { + this.dataframeInstances = dataframeInstances; + } + public String getSourceCode() { return sourceCode; } @@ -51,9 +63,16 @@ public void setSourceCode(String sourceCode) { this.sourceCode = sourceCode; } - public ProgramStep getProgramStepById(String id) { + public ProgramStep getProgramStepByLabel(String label) { + return programSteps.stream() + .filter(p -> p.getLabel().equals(label)) + .findFirst() + .orElse(null); + } + + public ProgramStep getProgramStepByIndex(int index) { return programSteps.stream() - .filter(p -> p.getId().equals(id)) + .filter(p -> p.getIndex() == index) .findFirst() .orElse(null); } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java index 421999d72..8f2cad975 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java @@ -2,26 +2,24 @@ import java.util.HashSet; import java.util.Set; +import java.util.UUID; public class ProgramStep { String id; String label; String sourceCode; - Set usedVariables = new HashSet<>();; - Set assignedVariables = new HashSet<>();; - - Set consumedDataframe = new HashSet<>();; + int index; + Set usedVariables = new HashSet<>(); + Set assignedVariables = new HashSet<>(); + Set consumedDataframe = new HashSet<>(); DataframeInstance producedDataframe; - - public ProgramStep() { - } - - public ProgramStep(String id, String label, String sourceCode) { - this.id = id; + public ProgramStep(String label, String sourceCode, int index) { + this.id = UUID.randomUUID().toString(); this.label = label; this.sourceCode = sourceCode; + this.index = index; } public String getId() { @@ -44,6 +42,14 @@ public String getSourceCode() { return sourceCode; } + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + public void setSourceCode(String sourceCode) { this.sourceCode = sourceCode; } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java index b7cc00707..92e150f6d 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java @@ -1,8 +1,20 @@ package fr.insee.vtl.prov.prov; +import fr.insee.vtl.model.Dataset; + +import java.util.UUID; + public class VariableInstance { String id; String label; + Dataset.Role role; + String parentDataframe; + Class type; + + public VariableInstance(String label) { + this.id = UUID.randomUUID().toString(); + this.label = label; + } public VariableInstance(String id, String label) { this.id = id; @@ -24,4 +36,29 @@ public String getLabel() { public void setLabel(String label) { this.label = label; } + + public Dataset.Role getRole() { + return role; + } + + public void setRole(Dataset.Role role) { + this.role = role; + } + + public Class getType() { + return type; + } + + public void setType(Class type) { + this.type = type; + } + + + public String getParentDataframe() { + return parentDataframe; + } + + public void setParentDataframe(String parentDataframe) { + this.parentDataframe = parentDataframe; + } } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java similarity index 68% rename from vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java rename to vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java index 80651142c..1a6b624bb 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java @@ -1,17 +1,17 @@ -package fr.insee.vtl.prov; +package fr.insee.vtl.prov.utils; -import fr.insee.vtl.parser.VtlLexer; -import fr.insee.vtl.parser.VtlParser; -import fr.insee.vtl.prov.model.VTLDataset; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CodePointCharStream; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.tree.ParseTreeWalker; - -import java.util.*; +import java.util.Map; +import java.util.UUID; public class ProvenanceUtils { + public static String getOrBuildUUID(Map availableUUID, String label) { + if (null != availableUUID.get(label)) { + return availableUUID.get(label); + } + return UUID.randomUUID().toString(); + } + //public static List toBusinessModel(ProvenanceListener listener) { // // TODO: @nico te graph needs to be refactored. I'll try to fix it before monday. // @@ -25,12 +25,12 @@ public class ProvenanceUtils { // }); // return model; //} -// + // //public static void toJSON(ProvenanceListener.Node node) { -// + // //} -// + // //public static void toRDF(ProvenanceListener.Node node) { -// + // //} } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java similarity index 85% rename from vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java rename to vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java index 26fb91409..eede0af40 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java @@ -1,10 +1,9 @@ -package fr.insee.vtl.prov; +package fr.insee.vtl.prov.utils; import fr.insee.vtl.prov.prov.DataframeInstance; import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.prov.ProgramStep; import fr.insee.vtl.prov.prov.VariableInstance; -import fr.insee.vtl.prov.utils.PROV; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Property; @@ -13,6 +12,7 @@ import org.apache.jena.rdfconnection.RDFConnectionFactory; import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDFS; +import org.apache.jena.vocabulary.XSD; import java.io.IOException; import java.io.StringWriter; @@ -60,7 +60,7 @@ public static void handleProgramStep(Model model, ProgramStep programStep) { Resource programStepURI = model.createResource(TREVAS_BASE_URI + "program-step/" + id); Resource SDTH_PROGRAM_STEP = model.createResource(SDTH_BASE_URI + "ProgramStep"); programStepURI.addProperty(RDF.type, SDTH_PROGRAM_STEP); - programStepURI.addProperty(RDFS.label, "Create " + id + " dataset"); + programStepURI.addProperty(RDFS.label, "Step " + programStep.getIndex()); String sourceCode = programStep.getSourceCode(); Property SDTH_HAS_SOURCE_CODE = model.createProperty(SDTH_BASE_URI + "hasSourceCode"); programStepURI.addProperty(SDTH_HAS_SOURCE_CODE, sourceCode); @@ -102,6 +102,12 @@ public static void handleDataframeInstance(Model model, DataframeInstance dfInst dfURI.addProperty(RDF.type, SDTH_DATAFRAME); String label = dfInstance.getLabel(); dfURI.addProperty(RDFS.label, label); + Property SDTH_USED_VARIABLE = model.createProperty(SDTH_BASE_URI + "hasVariableInstance"); + dfInstance.getHasVariableInstances().forEach(v -> { + Resource varAssignedURI = model.createResource(TREVAS_BASE_URI + "variable/" + v.getId()); + dfURI.addProperty(SDTH_USED_VARIABLE, varAssignedURI); + handleVariableInstance(model, v); + }); } public static void handleVariableInstance(Model model, VariableInstance varInstance) { @@ -112,6 +118,18 @@ public static void handleVariableInstance(Model model, VariableInstance varInsta varURI.addProperty(RDF.type, SDTH_VARIABLE); String label = varInstance.getLabel(); varURI.addProperty(RDFS.label, label); + if (null != varInstance.getRole()) { + String role = varInstance.getRole().toString(); + // TO EXTRACT + Property hasRole = model.createProperty("http://id.making-sense.info/vtl/component/hasRole"); + varURI.addProperty(hasRole, role); + } + if (null != varInstance.getType()) { + Class type = varInstance.getType(); + // TO EXTRACT + Property hasType = model.createProperty("http://id.making-sense.info/vtl/component/hasType"); + varURI.addProperty(hasType, VtlTypes.getVtlType(type)); + } } public static Model initModel(String baseFilePath) { diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java new file mode 100644 index 000000000..463b5d167 --- /dev/null +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java @@ -0,0 +1,34 @@ +package fr.insee.vtl.prov.utils; + +import org.threeten.extra.Interval; +import org.threeten.extra.PeriodDuration; + +import java.time.Instant; + +public class VtlTypes { + + /** + * Method to map Java classes and VTL basic scalar types. + * + * @param clazz Java type. + * @return Basic scalar type + */ + public static String getVtlType(Class clazz) { + if (clazz.equals(String.class)) { + return "STRING"; + } else if (clazz.equals(Long.class)) { + return "INTEGER"; + } else if (clazz.equals(Double.class)) { + return "NUMBER"; + } else if (clazz.equals(Boolean.class)) { + return "BOOLEAN"; + } else if (clazz.equals(Instant.class)) { + return "DATE"; + } else if (clazz.equals(PeriodDuration.class)) { + return "DURATION"; + } else if (clazz.equals(Interval.class)) { + return "TIME_PERIOD"; + } + throw new UnsupportedOperationException("class " + clazz + " unsupported"); + } +} diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java index 315071592..a999b29dc 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java @@ -1,11 +1,19 @@ package fr.insee.vtl.prov; +import fr.insee.vtl.engine.VtlScriptEngine; +import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.InMemoryDataset; +import fr.insee.vtl.model.utils.Java8Helpers; import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.utils.PropertiesLoader; +import fr.insee.vtl.prov.utils.RDFUtils; import org.apache.jena.rdf.model.Model; +import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; import java.io.File; import java.io.IOException; import java.util.Properties; @@ -70,6 +78,54 @@ public void simpleTest() throws IOException { assertThat(program.getProgramSteps()).hasSize(3); } + @Test + public void simpleTestWithBindings() throws IOException { + SparkSession spark = SparkSession.builder() + .appName("test") + .master("local") + .getOrCreate(); + + ScriptEngineManager mgr = new ScriptEngineManager(); + ScriptEngine engine = mgr.getEngineByExtension("vtl"); + engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); + + InMemoryDataset ds1 = new InMemoryDataset( + Java8Helpers.listOf( + Java8Helpers.mapOf("id1", "A", "var1", 0L, "var2", 100L), + Java8Helpers.mapOf("id1", "B", "var1", 1L, "var2", 200L), + Java8Helpers.mapOf("id1", "C", "var1", 2L, "var2", 300L) + ), + Java8Helpers.mapOf("id1", String.class, "var1", Long.class, "var2", Long.class), + Java8Helpers.mapOf("id1", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE) + ); + InMemoryDataset ds2 = new InMemoryDataset( + Java8Helpers.listOf( + Java8Helpers.mapOf("id1", "A", "var1", 10L, "var2", 1L), + Java8Helpers.mapOf("id1", "B", "var1", 11L, "var2", 2L), + Java8Helpers.mapOf("id1", "D", "var1", 12L, "var2", 3L) + ), + Java8Helpers.mapOf("id1", String.class, "var1", Long.class, "var2", Long.class), + Java8Helpers.mapOf("id1", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE) + ); + + engine.put("ds1", ds1); + engine.put("ds2", ds2); + + String script = "ds1 := ds1[calc identifier id1 := id1, var1 := cast(var1, integer), var2 := cast(var2, integer)];\n" + + "ds2 := ds2[calc identifier id1 := id1, var1 := cast(var1, integer), var2 := cast(var2, integer)];\n" + + "ds_sum := ds1 + ds2;\n" + + "ds_mul := ds_sum * 3; \n" + + "ds_res <- ds_mul[filter mod(var1, 2) = 0][calc var_sum := var1 + var2];"; + + Program program = ProvenanceListener.runWithBindings(engine, script, "trevas-simple-test", "Simple test from Trevas tests"); + Model model = RDFUtils.buildModel(program); + String content = RDFUtils.serialize(model, "JSON-LD"); + assertThat(content).isNotEmpty(); + RDFUtils.loadModelWithCredentials(model, sparqlEndpoint, sparqlEndpointUser, sparlqEndpointPassword); + RDFUtils.writeJsonLdToFile(model, "src/test/resources/output/test-simple-with-bindings.json"); + assertThat(program.getProgramSteps()).hasSize(5); + } + @Test public void bpeTest() throws IOException { diff --git a/vtl-sdmx/pom.xml b/vtl-sdmx/pom.xml index d118eb258..3f6abf6d4 100644 --- a/vtl-sdmx/pom.xml +++ b/vtl-sdmx/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-sdmx VTL SDMX Dataset wrapper around SDMX - 1.7.0 + 1.8.0 @@ -40,24 +40,24 @@ fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 fr.insee.trevas vtl-engine - 1.7.0 + 1.8.0 test fr.insee.trevas vtl-csv - 1.7.0 + 1.8.0 test fr.insee.trevas vtl-spark - 1.7.0 + 1.8.0 test diff --git a/vtl-spark/pom.xml b/vtl-spark/pom.xml index 5406c2d0d..4e87204ac 100644 --- a/vtl-spark/pom.xml +++ b/vtl-spark/pom.xml @@ -7,13 +7,13 @@ fr.insee.trevas trevas-parent - 1.7.0 + 1.8.0 vtl-spark VTL Spark Trevas engine for Apache Spark - 1.7.0 + 1.8.0 @@ -25,13 +25,13 @@ fr.insee.trevas vtl-model - 1.7.0 + 1.8.0 compile fr.insee.trevas vtl-engine - 1.7.0 + 1.8.0 test