From 18df95bd11da545d442d9a12924ad86321f85d91 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Fri, 13 Dec 2024 16:16:11 +0800 Subject: [PATCH 1/4] fix bugs of aggregate column order mismatch --- .../cypher/antlr4/visitor/ColumnOrder.java | 96 +++++++++++++++++++ .../antlr4/visitor/GraphBuilderVisitor.java | 74 +++++++------- .../common/ir/planner/cbo/BITest.java | 4 +- .../common/ir/planner/cbo/LdbcTest.java | 22 +++-- .../graphscope/cypher/antlr4/MatchTest.java | 18 ++++ 5 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/ColumnOrder.java diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/ColumnOrder.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/ColumnOrder.java new file mode 100644 index 000000000000..10ed8eebe337 --- /dev/null +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/ColumnOrder.java @@ -0,0 +1,96 @@ +/* + * + * * Copyright 2020 Alibaba Group Holding Limited. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.alibaba.graphscope.cypher.antlr4.visitor; + +import com.alibaba.graphscope.common.ir.tools.GraphBuilder; +import com.google.common.base.Objects; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * ColumnOrder keeps fields as the same order with RETURN clause + */ +public class ColumnOrder { + public static class Field { + private final RexNode expr; + private final String alias; + + public Field(RexNode expr, String alias) { + this.expr = expr; + this.alias = alias; + } + + public RexNode getExpr() { + return expr; + } + + public String getAlias() { + return alias; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Field field = (Field) o; + return Objects.equal(expr, field.expr) && Objects.equal(alias, field.alias); + } + + @Override + public int hashCode() { + return Objects.hashCode(expr, alias); + } + } + + public interface FieldSupplier { + Field get(RelDataType inputType); + + class Default implements FieldSupplier { + private final GraphBuilder builder; + private final Supplier ordinalSupplier; + + public Default(GraphBuilder builder, Supplier ordinalSupplier) { + this.builder = builder; + this.ordinalSupplier = ordinalSupplier; + } + + @Override + public Field get(RelDataType inputType) { + String aliasName = inputType.getFieldList().get(ordinalSupplier.get()).getName(); + return new Field(this.builder.variable(aliasName), aliasName); + } + } + } + + private final List fieldSuppliers; + + public ColumnOrder(List fieldSuppliers) { + this.fieldSuppliers = fieldSuppliers; + } + + public @Nullable List getFields(RelDataType inputType) { + return this.fieldSuppliers.stream().map(k -> k.get(inputType)).collect(Collectors.toList()); + } +} diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/GraphBuilderVisitor.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/GraphBuilderVisitor.java index cab36fdc1348..ee094b7a22ab 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/GraphBuilderVisitor.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/antlr4/visitor/GraphBuilderVisitor.java @@ -18,13 +18,11 @@ import com.alibaba.graphscope.common.antlr4.ExprUniqueAliasInfer; import com.alibaba.graphscope.common.antlr4.ExprVisitorResult; -import com.alibaba.graphscope.common.ir.rel.GraphLogicalAggregate; import com.alibaba.graphscope.common.ir.rel.GraphProcedureCall; import com.alibaba.graphscope.common.ir.rel.graph.GraphLogicalGetV; import com.alibaba.graphscope.common.ir.rel.graph.GraphLogicalPathExpand; import com.alibaba.graphscope.common.ir.rel.type.group.GraphAggCall; import com.alibaba.graphscope.common.ir.rex.RexTmpVariableConverter; -import com.alibaba.graphscope.common.ir.rex.RexVariableAliasCollector; import com.alibaba.graphscope.common.ir.tools.GraphBuilder; import com.alibaba.graphscope.common.ir.tools.config.GraphOpt; import com.alibaba.graphscope.grammar.CypherGSBaseVisitor; @@ -39,6 +37,7 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexSubQuery; @@ -49,6 +48,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; public class GraphBuilderVisitor extends CypherGSBaseVisitor { @@ -271,9 +271,8 @@ public GraphBuilder visitOC_ProjectionBody(CypherGSParser.OC_ProjectionBodyConte List keyExprs = new ArrayList<>(); List keyAliases = new ArrayList<>(); List aggCalls = new ArrayList<>(); - List extraExprs = new ArrayList<>(); - List extraAliases = new ArrayList<>(); - if (isGroupPattern(ctx, keyExprs, keyAliases, aggCalls, extraExprs, extraAliases)) { + AtomicReference columnManagerRef = new AtomicReference<>(); + if (isGroupPattern(ctx, keyExprs, keyAliases, aggCalls, columnManagerRef)) { RelBuilder.GroupKey groupKey; if (keyExprs.isEmpty()) { groupKey = builder.groupKey(); @@ -281,39 +280,25 @@ public GraphBuilder visitOC_ProjectionBody(CypherGSParser.OC_ProjectionBodyConte groupKey = builder.groupKey(keyExprs, keyAliases); } builder.aggregate(groupKey, aggCalls); - if (!extraExprs.isEmpty()) { + RelDataType inputType = builder.peek().getRowType(); + List originalFields = + inputType.getFieldList().stream() + .map( + k -> + new ColumnOrder.Field( + builder.variable(k.getName()), k.getName())) + .collect(Collectors.toList()); + List newFields = columnManagerRef.get().getFields(inputType); + if (!originalFields.equals(newFields)) { + List extraExprs = new ArrayList<>(); + List<@Nullable String> extraAliases = new ArrayList<>(); RexTmpVariableConverter converter = new RexTmpVariableConverter(true, builder); - extraExprs = - extraExprs.stream() - .map(k -> k.accept(converter)) - .collect(Collectors.toList()); - List projectExprs = Lists.newArrayList(); - List projectAliases = Lists.newArrayList(); - List extraVarNames = Lists.newArrayList(); - RexVariableAliasCollector varNameCollector = - new RexVariableAliasCollector<>( - true, - v -> { - String[] splits = v.getName().split("\\."); - return splits[0]; - }); - extraExprs.forEach(k -> extraVarNames.addAll(k.accept(varNameCollector))); - GraphLogicalAggregate aggregate = (GraphLogicalAggregate) builder.peek(); - aggregate - .getRowType() - .getFieldList() - .forEach( - field -> { - if (!extraVarNames.contains(field.getName())) { - projectExprs.add(builder.variable(field.getName())); - projectAliases.add(field.getName()); - } - }); - for (int i = 0; i < extraExprs.size(); ++i) { - projectExprs.add(extraExprs.get(i)); - projectAliases.add(extraAliases.get(i)); - } - builder.project(projectExprs, projectAliases, false); + newFields.forEach( + k -> { + extraExprs.add(k.getExpr().accept(converter)); + extraAliases.add(k.getAlias()); + }); + builder.project(extraExprs, extraAliases, false); } } else if (isDistinct) { builder.aggregate(builder.groupKey(keyExprs, keyAliases)); @@ -334,21 +319,27 @@ private boolean isGroupPattern( List keyExprs, List keyAliases, List aggCalls, - List extraExprs, - List extraAliases) { + AtomicReference columnManagerRef) { + List fieldSuppliers = Lists.newArrayList(); for (CypherGSParser.OC_ProjectionItemContext itemCtx : ctx.oC_ProjectionItems().oC_ProjectionItem()) { ExprVisitorResult item = expressionVisitor.visitOC_Expression(itemCtx.oC_Expression()); String alias = (itemCtx.AS() == null) ? null : itemCtx.oC_Variable().getText(); if (item.getAggCalls().isEmpty()) { + int ordinal = keyExprs.size(); + fieldSuppliers.add(new ColumnOrder.FieldSupplier.Default(builder, () -> ordinal)); keyExprs.add(item.getExpr()); keyAliases.add(alias); } else { if (item.getExpr() instanceof RexCall) { - extraExprs.add(item.getExpr()); - extraAliases.add(alias); + fieldSuppliers.add( + (RelDataType type) -> new ColumnOrder.Field(item.getExpr(), alias)); aggCalls.addAll(item.getAggCalls()); } else if (item.getAggCalls().size() == 1) { // count(a.name) + int ordinal = aggCalls.size(); + fieldSuppliers.add( + new ColumnOrder.FieldSupplier.Default( + builder, () -> keyExprs.size() + ordinal)); GraphAggCall original = (GraphAggCall) item.getAggCalls().get(0); aggCalls.add( new GraphAggCall( @@ -362,6 +353,7 @@ private boolean isGroupPattern( } } } + columnManagerRef.set(new ColumnOrder(fieldSuppliers)); return !aggCalls.isEmpty(); } diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/BITest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/BITest.java index 788d5e19909f..563cebe096b6 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/BITest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/BITest.java @@ -124,8 +124,8 @@ public void bi1_test() { + " totalMessageCount)], isAppend=[false])\n" + " GraphLogicalProject(totalMessageCount=[totalMessageCount], year=[year]," + " isComment=[isComment], lengthCategory=[lengthCategory]," - + " messageCount=[messageCount], sumMessageLength=[sumMessageLength]," - + " averageMessageLength=[/(EXPR$2, EXPR$3)], isAppend=[false])\n" + + " messageCount=[messageCount], averageMessageLength=[/(EXPR$2, EXPR$3)]," + + " sumMessageLength=[sumMessageLength], isAppend=[false])\n" + " GraphLogicalAggregate(keys=[{variables=[totalMessageCount, year, $f0," + " $f1], aliases=[totalMessageCount, year, isComment, lengthCategory]}]," + " values=[[{operands=[message], aggFunction=COUNT, alias='messageCount'," diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/LdbcTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/LdbcTest.java index ea08d443639f..1402ebaa1aec 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/LdbcTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/common/ir/planner/cbo/LdbcTest.java @@ -511,30 +511,32 @@ public void ldbc7_test() { + " messageContent=[message.content], messageImageFile=[message.imageFile]," + " minutesLatency=[/(/(-(likeTime, message.creationDate), 1000), 60)]," + " isNew=[isNew], isAppend=[false])\n" - + " GraphLogicalAggregate(keys=[{variables=[liker, person, isNew]," + + " GraphLogicalProject(liker=[liker], person=[person], message=[message]," + + " likeTime=[likeTime], isNew=[isNew], isAppend=[false])\n" + + " GraphLogicalAggregate(keys=[{variables=[liker, person, isNew]," + " aliases=[liker, person, isNew]}], values=[[{operands=[message]," + " aggFunction=FIRST_VALUE, alias='message', distinct=false}," + " {operands=[likeTime], aggFunction=FIRST_VALUE, alias='likeTime'," + " distinct=false}]])\n" - + " GraphLogicalSort(sort0=[likeTime], sort1=[message.id], dir0=[DESC]," + + " GraphLogicalSort(sort0=[likeTime], sort1=[message.id], dir0=[DESC]," + " dir1=[ASC])\n" - + " GraphLogicalProject(liker=[liker], message=[message]," + + " GraphLogicalProject(liker=[liker], message=[message]," + " likeTime=[like.creationDate], person=[person], isNew=[IS NULL(k)]," + " isAppend=[false])\n" - + " MultiJoin(joinFilter=[=(liker, liker)], isFullOuterJoin=[false]," + + " MultiJoin(joinFilter=[=(liker, liker)], isFullOuterJoin=[false]," + " joinTypes=[[INNER, INNER]], outerJoinConditions=[[NULL, NULL]]," + " projFields=[[ALL, ALL]])\n" - + " GraphLogicalGetV(tableConfig=[{isAll=false, tables=[PERSON]}]," + + " GraphLogicalGetV(tableConfig=[{isAll=false, tables=[PERSON]}]," + " alias=[liker], opt=[START])\n" - + " GraphLogicalExpand(tableConfig=[{isAll=false," + + " GraphLogicalExpand(tableConfig=[{isAll=false," + " tables=[LIKES]}], alias=[like], startAlias=[message], opt=[IN])\n" - + " CommonTableScan(table=[[common#378747223]])\n" - + " GraphLogicalGetV(tableConfig=[{isAll=false, tables=[PERSON]}]," + + " CommonTableScan(table=[[common#378747223]])\n" + + " GraphLogicalGetV(tableConfig=[{isAll=false, tables=[PERSON]}]," + " alias=[liker], opt=[OTHER])\n" - + " GraphLogicalExpand(tableConfig=[{isAll=false," + + " GraphLogicalExpand(tableConfig=[{isAll=false," + " tables=[KNOWS]}], alias=[k], startAlias=[person], opt=[BOTH]," + " optional=[true])\n" - + " CommonTableScan(table=[[common#378747223]])\n" + + " CommonTableScan(table=[[common#378747223]])\n" + "common#378747223:\n" + "GraphPhysicalExpand(tableConfig=[{isAll=false, tables=[HASCREATOR]}]," + " alias=[message], startAlias=[person], opt=[IN], physicalOpt=[VERTEX])\n" diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/antlr4/MatchTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/antlr4/MatchTest.java index 68e3318dfe42..66daabe3b943 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/antlr4/MatchTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/antlr4/MatchTest.java @@ -713,4 +713,22 @@ public void optional_shortest_path_test() { + " alias=[p1], opt=[VERTEX], uniqueKeyFilters=[=(_.id, ?0)])", after.explain().trim()); } + + // the return column order should align with the query given + @Test + public void aggregate_column_order_test() { + GraphBuilder builder = + com.alibaba.graphscope.common.ir.Utils.mockGraphBuilder(optimizer, irMeta); + RelNode node = + Utils.eval("Match (n:person) Return count(n), n, sum(n.age)", builder).build(); + RelNode after = optimizer.optimize(node, new GraphIOProcessor(builder, irMeta)); + Assert.assertEquals( + "GraphLogicalProject($f1=[$f1], n=[n], $f2=[$f2], isAppend=[false])\n" + + " GraphLogicalAggregate(keys=[{variables=[n], aliases=[n]}]," + + " values=[[{operands=[n], aggFunction=COUNT, alias='$f1', distinct=false}," + + " {operands=[n.age], aggFunction=SUM, alias='$f2', distinct=false}]])\n" + + " GraphLogicalSource(tableConfig=[{isAll=false, tables=[person]}]," + + " alias=[n], opt=[VERTEX])", + after.explain().trim()); + } } From d614b4841fbd4203f559dd8029a92ca51036f400 Mon Sep 17 00:00:00 2001 From: BingqingLyu Date: Wed, 25 Dec 2024 11:52:34 +0800 Subject: [PATCH 2/4] minor fix --- .github/workflows/gaia.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/gaia.yml b/.github/workflows/gaia.yml index 6007d9e37a87..bc135601dfb6 100644 --- a/.github/workflows/gaia.yml +++ b/.github/workflows/gaia.yml @@ -52,6 +52,12 @@ jobs: ~/.cache/sccache key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: 1.81.0 + override: true + - name: Rust Format Check run: | cd ${GITHUB_WORKSPACE}/interactive_engine/executor && ./check_format.sh From 5168e03322d70ef1a61316e105a8818b9f09cae5 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Mon, 30 Dec 2024 17:52:54 +0800 Subject: [PATCH 3/4] skip movie tests on ir core based IR --- .../compiler/ir_experimental_ci.sh | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/interactive_engine/compiler/ir_experimental_ci.sh b/interactive_engine/compiler/ir_experimental_ci.sh index 66e470afa0b6..f46ff680d703 100755 --- a/interactive_engine/compiler/ir_experimental_ci.sh +++ b/interactive_engine/compiler/ir_experimental_ci.sh @@ -57,23 +57,23 @@ if [ $exit_code -ne 0 ]; then fi unset DISTRIBUTED_ENV -# Test4: run cypher movie tests on experimental store via ir-core -cd ${base_dir}/../executor/ir/target/release && DATA_PATH=/tmp/gstest/movie_graph_exp_bin RUST_LOG=info ./start_rpc_server --config ${base_dir}/../executor/ir/integrated/config & -sleep 5s -# start compiler service -cd ${base_dir} && make run graph.schema:=../executor/ir/core/resource/movie_schema.json & -sleep 10s -export ENGINE_TYPE=pegasus -# run cypher movie tests -cd ${base_dir} && make cypher_test -exit_code=$? -# clean service -ps -ef | grep "com.alibaba.graphscope.GraphServer" | grep -v grep | awk '{print $2}' | xargs kill -9 || true -# report test result -if [ $exit_code -ne 0 ]; then - echo "ir cypher movie integration test on experimental store fail" - exit 1 -fi +## Test4: run cypher movie tests on experimental store via ir-core +#cd ${base_dir}/../executor/ir/target/release && DATA_PATH=/tmp/gstest/movie_graph_exp_bin RUST_LOG=info ./start_rpc_server --config ${base_dir}/../executor/ir/integrated/config & +#sleep 5s +## start compiler service +#cd ${base_dir} && make run graph.schema:=../executor/ir/core/resource/movie_schema.json & +#sleep 10s +#export ENGINE_TYPE=pegasus +## run cypher movie tests +#cd ${base_dir} && make cypher_test +#exit_code=$? +## clean service +#ps -ef | grep "com.alibaba.graphscope.GraphServer" | grep -v grep | awk '{print $2}' | xargs kill -9 || true +## report test result +#if [ $exit_code -ne 0 ]; then +# echo "ir cypher movie integration test on experimental store fail" +# exit 1 +#fi # Test5: run cypher movie tests on experimental store via calcite-based ir From ce50d846080097bc679a917be08aae1de708bcf1 Mon Sep 17 00:00:00 2001 From: shirly121 Date: Tue, 31 Dec 2024 19:00:33 +0800 Subject: [PATCH 4/4] fix ci tests --- interactive_engine/compiler/ir_experimental_ci.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/interactive_engine/compiler/ir_experimental_ci.sh b/interactive_engine/compiler/ir_experimental_ci.sh index f46ff680d703..510efe100654 100755 --- a/interactive_engine/compiler/ir_experimental_ci.sh +++ b/interactive_engine/compiler/ir_experimental_ci.sh @@ -77,6 +77,9 @@ unset DISTRIBUTED_ENV # Test5: run cypher movie tests on experimental store via calcite-based ir +# start engine service and load movie graph +cd ${base_dir}/../executor/ir/target/release && DATA_PATH=/tmp/gstest/movie_graph_exp_bin RUST_LOG=info ./start_rpc_server --config ${base_dir}/../executor/ir/integrated/config & +sleep 5s # restart compiler service cd ${base_dir} && make run graph.schema:=../executor/ir/core/resource/movie_schema.json graph.planner.opt=CBO graph.statistics:=./src/test/resources/statistics/movie_statistics.json graph.physical.opt=proto graph.planner.rules=FilterIntoJoinRule,FilterMatchRule,ExtendIntersectRule,ExpandGetVFusionRule & sleep 10s