diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index c6bb6e6fdff2d91..8b2a09ca07b8043 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -67,6 +67,7 @@ class CostModelV1 extends PlanVisitor { // the penalty factor is no more than BROADCAST_JOIN_SKEW_PENALTY_LIMIT static final double BROADCAST_JOIN_SKEW_RATIO = 30.0; static final double BROADCAST_JOIN_SKEW_PENALTY_LIMIT = 2.0; + static final double RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR = 0.1; private final int beNumber; public CostModelV1() { @@ -226,10 +227,11 @@ public Cost visitPhysicalDistribute( } // any + // cost of randome shuffle is lower than hash shuffle. return CostV1.of( - intputRowCount, 0, - 0); + 0, + intputRowCount * childStatistics.dataSizeFactor() * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumber); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index 93f6e947894c832..4a2ecdd098b11d9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import org.apache.doris.nereids.util.JoinUtils; @@ -115,6 +116,16 @@ public Boolean visitPhysicalHashAggregate(PhysicalHashAggregate && children.get(0).getPlan() instanceof PhysicalDistribute) { return false; } + + // agg(group by x)-union all(A, B) + // no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x + // and hence we forbid one phase agg + if (agg.getAggMode() == AggMode.INPUT_TO_RESULT + && requiredProperties.get(0).getDistributionSpec() instanceof DistributionSpecHash + && children.get(0).getPlan() instanceof PhysicalUnion + && !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) { + return false; + } // forbid multi distinct opt that bad than multi-stage version when multi-stage can be executed in one fragment if (agg.getAggMode() == AggMode.INPUT_TO_BUFFER || agg.getAggMode() == AggMode.INPUT_TO_RESULT) { List multiDistinctions = agg.getOutputExpressions().stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java index 83f15e3f6688d97..ec9537e7cb981a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java @@ -194,4 +194,8 @@ public List computeOutput() { .map(NamedExpression::toSlot) .collect(ImmutableList.toImmutableList()); } + + public boolean isDistinct() { + return qualifier == Qualifier.DISTINCT; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index c6b019f669b65d3..7fb81065c206e1a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -38,7 +38,7 @@ public class ColumnStatistic { public static final double STATS_ERROR = 0.1D; - + public static final double ALMOST_UNIQUE_FACTOR = 0.9; public static final StatsType NDV = StatsType.NDV; public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; @@ -202,7 +202,7 @@ public static ColumnStatistic fromResultRow(ResultRow row) { } public static boolean isAlmostUnique(double ndv, double rowCount) { - return rowCount * 0.9 < ndv && ndv < rowCount * 1.1; + return rowCount * ALMOST_UNIQUE_FACTOR < ndv; } public ColumnStatistic updateByLimit(long limit, double rowCount) {