From 20c10ce80892c2ca1588ab104e1ba737d29685e1 Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Fri, 15 Mar 2024 11:49:16 +0800 Subject: [PATCH] [opt](Nereids) remove group by key eliminate rule option (#32238) --- .../doris/nereids/jobs/executor/Rewriter.java | 2 +- .../nereids/rules/rewrite/ColumnPruning.java | 29 +- .../rules/rewrite/EliminateGroupByKey.java | 5 - .../eliminate_gby_key/eliminate_gby_key.out | 164 ++++++++++++ .../eliminate_gby_key.groovy | 251 ++++++++++++++++++ 5 files changed, 417 insertions(+), 34 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.out create mode 100644 regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 16ae4a31435d2a..6dee5ef18f8329 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -328,7 +328,7 @@ public class Rewriter extends AbstractBatchJobExecutor { custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new) ), - // this rule should invoke after topic "Join pull up" + // this rule should be invoked after topic "Join pull up" topic("eliminate group by keys according to fd items", topDown(new EliminateGroupByKey()) ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java index 4d0c9be368d1fa..f33f1658c32e29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.nereids.jobs.JobContext; -import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.rewrite.ColumnPruning.PruneContext; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; @@ -40,7 +39,6 @@ import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -207,16 +205,8 @@ public Plan visitLogicalRepeat(LogicalRepeat repeat, PruneContex private Plan pruneAggregate(Aggregate agg, PruneContext context) { // first try to prune group by and aggregate functions Aggregate prunedOutputAgg = pruneOutput(agg, agg.getOutputs(), agg::pruneOutputs, context); - Set enableNereidsRules = ConnectContext.get().getSessionVariable().getEnableNereidsRules(); - Aggregate fillUpAggr; - if (!enableNereidsRules.contains(RuleType.ELIMINATE_GROUP_BY_KEY.type())) { - fillUpAggr = fillUpGroupByToOutput(prunedOutputAgg) - .map(fullOutput -> prunedOutputAgg.withAggOutput(fullOutput)) - .orElse(prunedOutputAgg); - } else { - fillUpAggr = fillUpGroupByAndOutput(prunedOutputAgg); - } + Aggregate fillUpAggr = fillUpGroupByAndOutput(prunedOutputAgg); return pruneChildren(fillUpAggr); } @@ -229,23 +219,6 @@ private Plan skipPruneThisAndFirstLevelChildren(Plan plan) { return pruneChildren(plan, requireAllOutputOfChildren); } - private static Optional> fillUpGroupByToOutput(Aggregate prunedOutputAgg) { - List groupBy = prunedOutputAgg.getGroupByExpressions(); - List output = prunedOutputAgg.getOutputExpressions(); - - if (output.containsAll(groupBy)) { - return Optional.empty(); - } - - List aggFunctions = Lists.newArrayList(output); - aggFunctions.removeAll(groupBy); - - return Optional.of(ImmutableList.builder() - .addAll((List) groupBy) - .addAll(aggFunctions) - .build()); - } - private static Aggregate fillUpGroupByAndOutput(Aggregate prunedOutputAgg) { List groupBy = prunedOutputAgg.getGroupByExpressions(); List output = prunedOutputAgg.getOutputExpressions(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java index 69a34a680ec51c..9853b7e6ff9ac3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java @@ -25,7 +25,6 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; -import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableSet; @@ -42,10 +41,6 @@ public class EliminateGroupByKey extends OneRewriteRuleFactory { @Override public Rule build() { return logicalAggregate(logicalProject()).then(agg -> { - Set enableNereidsRules = ConnectContext.get().getSessionVariable().getEnableNereidsRules(); - if (!enableNereidsRules.contains(RuleType.ELIMINATE_GROUP_BY_KEY.type())) { - return null; - } LogicalPlan childPlan = agg.child(); List uniqueFdItems = new ArrayList<>(); List nonUniqueFdItems = new ArrayList<>(); diff --git a/regression-test/data/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.out b/regression-test/data/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.out new file mode 100644 index 00000000000000..a7e82975a539e5 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.out @@ -0,0 +1,164 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[t2_c1#10] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c1#13 AS `t2_c1`#10] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !2 -- +cost = 11.09 +PhysicalResultSink[334] ( outputExprs=[t2_c2#9] ) ++--PhysicalDistribute[331]@6 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[328]@6 ( stats=1, projects=[t2_c2#19 AS `t2_c2`#9] ) + +--PhysicalHashAggregate[325]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18, t2_c2#19], outputExpr=[c1#13, c3#18, t2_c2#19], partitionExpr=Optional[[c1#13, c3#18, t2_c2#19]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18, 19], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18], [19]], exprIdToEquivalenceSet={13=0, 18=1, 19=2} ) Order: ([])], stats=1 ) + +--PhysicalProject[322]@4 ( stats=1, projects=[substring(c2#14, 1, 3) AS `t2_c2`#19, c1#13, c3#18] ) + +--PhysicalHashJoin[319]@3 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[316]@2 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[313]@2 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@1 ( stats=1 ) + +-- !3 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[c3#11] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c3#18 AS `c3`#11] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !4 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[cnt#12] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[cnt#20 AS `cnt`#12] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18, count(*) AS `cnt`#20], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !5 -- +cost = 11.09 +PhysicalResultSink[334] ( outputExprs=[t2_c2#9, t2_c1#10] ) ++--PhysicalDistribute[331]@6 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[328]@6 ( stats=1, projects=[t2_c2#19 AS `t2_c2`#9, c1#13 AS `t2_c1`#10] ) + +--PhysicalHashAggregate[325]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18, t2_c2#19], outputExpr=[c1#13, c3#18, t2_c2#19], partitionExpr=Optional[[c1#13, c3#18, t2_c2#19]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18, 19], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18], [19]], exprIdToEquivalenceSet={13=0, 18=1, 19=2} ) Order: ([])], stats=1 ) + +--PhysicalProject[322]@4 ( stats=1, projects=[substring(c2#14, 1, 3) AS `t2_c2`#19, c1#13, c3#18] ) + +--PhysicalHashJoin[319]@3 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[316]@2 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[313]@2 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@1 ( stats=1 ) + +-- !6 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[c3#11, t2_c1#10] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c3#18 AS `c3`#11, c1#13 AS `t2_c1`#10] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !7 -- +cost = 11.09 +PhysicalResultSink[334] ( outputExprs=[c3#11, t2_c2#9] ) ++--PhysicalDistribute[331]@6 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[328]@6 ( stats=1, projects=[c3#18 AS `c3`#11, t2_c2#19 AS `t2_c2`#9] ) + +--PhysicalHashAggregate[325]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18, t2_c2#19], outputExpr=[c1#13, c3#18, t2_c2#19], partitionExpr=Optional[[c1#13, c3#18, t2_c2#19]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18, 19], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18], [19]], exprIdToEquivalenceSet={13=0, 18=1, 19=2} ) Order: ([])], stats=1 ) + +--PhysicalProject[322]@4 ( stats=1, projects=[substring(c2#14, 1, 3) AS `t2_c2`#19, c1#13, c3#18] ) + +--PhysicalHashJoin[319]@3 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[316]@2 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[313]@2 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@1 ( stats=1 ) + +-- !8 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[t2_c1#10, cnt#12] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c1#13 AS `t2_c1`#10, cnt#20 AS `cnt`#12] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18, count(*) AS `cnt`#20], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !9 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[c3#11, cnt#12] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c3#18 AS `c3`#11, cnt#20 AS `cnt`#12] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18, count(*) AS `cnt`#20], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !10 -- +cost = 12.09 +PhysicalResultSink[358] ( outputExprs=[t2_c1#10, c3#11, cnt#12] ) ++--PhysicalDistribute[355]@7 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[352]@7 ( stats=1, projects=[c1#13 AS `t2_c1`#10, c3#18 AS `c3`#11, cnt#20 AS `cnt`#12] ) + +--PhysicalHashAggregate[349]@6 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18], outputExpr=[c1#13, c3#18, count(*) AS `cnt`#20], partitionExpr=Optional[[c1#13, c3#18]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18]], exprIdToEquivalenceSet={13=0, 18=1} ) Order: ([])], stats=1 ) + +--PhysicalProject[346]@5 ( stats=1, projects=[c1#13, c3#18] ) + +--PhysicalHashJoin[343]@4 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[333]@1 ( stats=1, projects=[c1#13, c3#15] ) + | +--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[340]@3 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[337]@3 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@2 ( stats=1 ) + +-- !11 -- +cost = 11.09 +PhysicalResultSink[332] ( outputExprs=[t2_c2#9, c3#11, t2_c1#10] ) ++--PhysicalDistribute[329]@6 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[326]@6 ( stats=1, projects=[t2_c2#19 AS `t2_c2`#9, c3#18 AS `c3`#11, c1#13 AS `t2_c1`#10] ) + +--PhysicalHashAggregate[323]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18, t2_c2#19], outputExpr=[c1#13, c3#18, t2_c2#19], partitionExpr=Optional[[c1#13, c3#18, t2_c2#19]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18, 19], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18], [19]], exprIdToEquivalenceSet={13=0, 18=1, 19=2} ) Order: ([])], stats=1 ) + +--PhysicalProject[320]@4 ( stats=1, projects=[substring(c2#14, 1, 3) AS `t2_c2`#19, c1#13, c3#18] ) + +--PhysicalHashJoin[317]@3 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[314]@2 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[311]@2 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@1 ( stats=1 ) + +-- !12 -- +cost = 11.09 +PhysicalResultSink[330] ( outputExprs=[t2_c2#9, c3#11, t2_c1#10, cnt#12] ) ++--PhysicalDistribute[327]@6 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalProject[324]@6 ( stats=1, projects=[t2_c2#19 AS `t2_c2`#9, c3#18 AS `c3`#11, c1#13 AS `t2_c1`#10, cnt#20 AS `cnt`#12] ) + +--PhysicalHashAggregate[321]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[c1#13, c3#18, t2_c2#19], outputExpr=[c1#13, c3#18, t2_c2#19, count(*) AS `cnt`#20], partitionExpr=Optional[[c1#13, c3#18, t2_c2#19]], requireProperties=[DistributionSpecHash ( orderedShuffledColumns=[13, 18, 19], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[13], [18], [19]], exprIdToEquivalenceSet={13=0, 18=1, 19=2} ) Order: ([])], stats=1 ) + +--PhysicalProject[318]@4 ( stats=1, projects=[substring(c2#14, 1, 3) AS `t2_c2`#19, c1#13, c3#18] ) + +--PhysicalHashJoin[315]@3 ( type=INNER_JOIN, stats=1, hashCondition=[(c3#15 = c2#17)], otherCondition=[], markCondition=[] ) + |--PhysicalOlapScan[t2]@0 ( stats=1 ) + +--PhysicalDistribute[312]@2 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[309]@2 ( stats=1, projects=[c2#17, c3#18] ) + +--PhysicalOlapScan[t1]@1 ( stats=1 ) + diff --git a/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.groovy b/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.groovy new file mode 100644 index 00000000000000..831e5e6eb3f206 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_gby_key.groovy @@ -0,0 +1,251 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("eliminate_gby_key") { + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + + sql """DROP TABLE IF EXISTS t1;""" + sql """DROP TABLE IF EXISTS t2;""" + + sql """ + CREATE TABLE `t1` ( + `c1` int(20) DEFAULT NULL, + `c2` int(20) DEFAULT NULL, + `c3` int(20) DEFAULT NULL + ) + DUPLICATE KEY (`c1`) + DISTRIBUTED BY HASH(`c1`) BUCKETS 3 PROPERTIES("replication_num"="1"); + """ + + sql """ + CREATE TABLE `t2` ( + `c1` int(20) DEFAULT NULL, + `c2` varchar(20) DEFAULT NULL, + `c3` int(20) DEFAULT NULL + ) + DUPLICATE KEY (`c1`) + DISTRIBUTED BY HASH(`c1`) BUCKETS 3 PROPERTIES("replication_num"="1"); + """ + + sql """ + alter table t2 add constraint t2_c1_pk primary key (c1); + """ + + qt_1 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c1 + from temp; + """ + + qt_2 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c2 + from temp; + """ + + qt_3 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select c3 + from temp; + """ + + qt_4 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select cnt + from temp; + """ + + qt_5 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c2, t2_c1 + from temp; + """ + + qt_6 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select c3, t2_c1 + from temp; + """ + + qt_7 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select c3, t2_c2 + from temp; + """ + + qt_8 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c1, cnt + from temp; + """ + + qt_9 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select c3, cnt + from temp; + """ + + qt_10 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c1, c3, cnt + from temp; + """ + + qt_11 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c2, c3, t2_c1 + from temp; + """ + + qt_12 """ + explain physical plan + with temp + as (select substr(t2.c2, 1, 3) t2_c2, + t2.c1 t2_c1, + t1.c3, + count(*) cnt + from t2 + join t1 + on t2.c3 = t1.c2 + group by substr(t2.c2, 1, 3), + t2.c1, + t1.c3) + select t2_c2, c3, t2_c1, cnt + from temp; + """ +}