From d77250b238e54a504248b341c6ff3657df8f0902 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 8 Nov 2023 17:15:53 +0800 Subject: [PATCH] agg on union all --- .../doris/nereids/cost/CostModelV1.java | 7 +- .../ChildrenPropertiesRegulator.java | 10 ++ .../plans/physical/PhysicalSetOperation.java | 4 + .../doris/statistics/ColumnStatistic.java | 4 +- .../shape/query1.out | 43 +++++++++ .../shape/query49.out | 89 ++++++++++++++++++ .../shape/query75.out | 81 ++++++++++++++++ .../shape/query1.out | 13 +-- .../shape/query49.out | 92 +++++++++++++++++-- .../shape/query75.out | 70 ++++++++++++-- .../aggregate/agg_union_random.groovy | 54 +++++++++++ 11 files changed, 437 insertions(+), 30 deletions(-) create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out create mode 100644 regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index aa8f4d6cc7cfda..fb8d2b9f921a11 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -67,7 +67,8 @@ class CostModelV1 extends PlanVisitor { // the penalty factor is no more than BROADCAST_JOIN_SKEW_PENALTY_LIMIT static final double BROADCAST_JOIN_SKEW_RATIO = 30.0; static final double BROADCAST_JOIN_SKEW_PENALTY_LIMIT = 2.0; - private int beNumber = 1; + static final double RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR = 0.1; + private final int beNumber; public CostModelV1() { if (ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { @@ -236,9 +237,9 @@ public Cost visitPhysicalDistribute( // any return CostV1.of( - intputRowCount, 0, - 0); + 0, + intputRowCount * childStatistics.dataSizeFactor() * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumber); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index a3a91880c5179a..e969e7e878b0cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -40,6 +40,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import org.apache.doris.nereids.util.JoinUtils; @@ -114,6 +115,15 @@ public Boolean visitPhysicalHashAggregate(PhysicalHashAggregate && children.get(0).getPlan() instanceof PhysicalDistribute) { return false; } + + // agg(group by x)-union all(A, B) + // no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x + // and hence we forbid one phase agg + if (agg.getAggMode() == AggMode.INPUT_TO_RESULT + && children.get(0).getPlan() instanceof PhysicalUnion + && !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) { + return false; + } // forbid multi distinct opt that bad than multi-stage version when multi-stage can be executed in one fragment if (agg.getAggMode() == AggMode.INPUT_TO_BUFFER || agg.getAggMode() == AggMode.INPUT_TO_RESULT) { List multiDistinctions = agg.getOutputExpressions().stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java index b9358c5e205c73..07326a8afb4d1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java @@ -143,4 +143,8 @@ public List computeOutput() { .map(NamedExpression::toSlot) .collect(ImmutableList.toImmutableList()); } + + public boolean isDistinct() { + return qualifier == Qualifier.DISTINCT; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 82e0efdac1784d..1ec22cbc47b013 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -40,7 +40,7 @@ public class ColumnStatistic { public static final double STATS_ERROR = 0.1D; - + public static final double ALMOST_UNIQUE_FACTOR = 0.9; public static final StatsType NDV = StatsType.NDV; public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; @@ -211,7 +211,7 @@ public static ColumnStatistic fromResultRow(ResultRow row) { } public static boolean isAlmostUnique(double ndv, double rowCount) { - return rowCount * 0.9 < ndv && ndv < rowCount * 1.1; + return rowCount * ALMOST_UNIQUE_FACTOR < ndv; } public ColumnStatistic updateByLimit(long limit, double rowCount) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out new file mode 100644 index 00000000000000..50d0c4bce6773a --- /dev/null +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_1 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((store_returns.sr_returned_date_sk = date_dim.d_date_sk))otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[store_returns] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter((date_dim.d_year = 2000)) +----------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN +------PhysicalDistribute +--------PhysicalTopN +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk))otherCondition=() +--------------PhysicalDistribute +----------------PhysicalProject +------------------PhysicalOlapScan[customer] +--------------PhysicalDistribute +----------------hashJoin[INNER_JOIN] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk))otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk))otherCondition=() +----------------------PhysicalDistribute +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter((store.s_state = 'TN')) +----------------------------PhysicalOlapScan[store] +------------------PhysicalDistribute +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute +------------------------hashAgg[LOCAL] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out new file mode 100644 index 00000000000000..a8ab13bb0f8d30 --- /dev/null +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out @@ -0,0 +1,89 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_49 -- +PhysicalResultSink +--PhysicalTopN +----PhysicalDistribute +------PhysicalTopN +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +----------------------PhysicalWindow +------------------------PhysicalQuickSort +--------------------------PhysicalWindow +----------------------------PhysicalQuickSort +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[web_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[web_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +----------------------PhysicalWindow +------------------------PhysicalQuickSort +--------------------------PhysicalWindow +----------------------------PhysicalQuickSort +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------PhysicalOlapScan[catalog_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[catalog_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +----------------------PhysicalWindow +------------------------PhysicalQuickSort +--------------------------PhysicalWindow +----------------------------PhysicalQuickSort +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((sr.sr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[store_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out new file mode 100644 index 00000000000000..fb9d10e30ff233 --- /dev/null +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out @@ -0,0 +1,81 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_75 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalUnion +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN +------PhysicalDistribute +--------PhysicalTopN +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((curr_yr.i_brand_id = prev_yr.i_brand_id) and (curr_yr.i_category_id = prev_yr.i_category_id) and (curr_yr.i_class_id = prev_yr.i_class_id) and (curr_yr.i_manufact_id = prev_yr.i_manufact_id))otherCondition=(((cast(cast(sales_cnt as DECIMALV3(17, 2)) as DECIMALV3(23, 8)) / cast(sales_cnt as DECIMALV3(17, 2))) < 0.900000)) +--------------PhysicalDistribute +----------------filter((curr_yr.d_year = 2002)) +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------PhysicalDistribute +----------------filter((prev_yr.d_year = 2001)) +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out index d872d6969a8f36..700186ae78d70a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out @@ -33,10 +33,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------PhysicalProject --------------------------filter((store.s_state = 'SD')) ----------------------------PhysicalOlapScan[store] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute -----------------------hashAgg[LOCAL] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------PhysicalDistribute +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute +------------------------hashAgg[LOCAL] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out index e1b6cfabc3a104..935417c5f54b0b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out @@ -4,17 +4,18 @@ PhysicalResultSink --PhysicalTopN ----PhysicalDistribute ------PhysicalTopN ---------hashAgg[LOCAL] -----------PhysicalUnion -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort +<<<<<<< HEAD ------------------------------PhysicalProject --------------------------------hashAgg[GLOBAL] ----------------------------------PhysicalDistribute @@ -37,10 +38,35 @@ PhysicalResultSink ----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ------------------PhysicalWindow --------------------PhysicalQuickSort +======= +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[web_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[web_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort +<<<<<<< HEAD ------------------------------PhysicalProject --------------------------------hashAgg[GLOBAL] ----------------------------------PhysicalDistribute @@ -63,10 +89,35 @@ PhysicalResultSink ----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ------------------PhysicalWindow --------------------PhysicalQuickSort +======= +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------PhysicalOlapScan[catalog_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[catalog_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort +<<<<<<< HEAD ------------------------------PhysicalProject --------------------------------hashAgg[GLOBAL] ----------------------------------PhysicalDistribute @@ -84,4 +135,25 @@ PhysicalResultSink ----------------------------------------------PhysicalProject ------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999)) --------------------------------------------------PhysicalOlapScan[date_dim] +======= +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() +----------------------------------------------PhysicalProject +------------------------------------------------filter((sr.sr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[store_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out index 59b860b0f73b3c..957059de65dd9f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out @@ -5,6 +5,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----hashAgg[GLOBAL] ------PhysicalDistribute --------hashAgg[LOCAL] +<<<<<<< HEAD ----------hashAgg[LOCAL] ------------PhysicalUnion --------------PhysicalDistribute @@ -16,10 +17,30 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk) ------------------------PhysicalProject --------------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk) +======= +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalUnion +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject +<<<<<<< HEAD --------------------------------filter((item.i_category = 'Home')) ----------------------------------PhysicalOlapScan[item] ------------------------PhysicalDistribute @@ -35,10 +56,28 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk) ------------------------PhysicalProject --------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk) +======= +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject +<<<<<<< HEAD --------------------------------filter((item.i_category = 'Home')) ----------------------------------PhysicalOlapScan[item] ------------------------PhysicalDistribute @@ -54,16 +93,29 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = web_sales.ws_sold_date_sk) ------------------------PhysicalProject --------------------------hashJoin[INNER_JOIN](item.i_item_sk = web_sales.ws_item_sk) +======= +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() +>>>>>>> a6d2013802... [opt](nereids) use 2 phase agg above union all (#26245) ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN ------PhysicalDistribute diff --git a/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy b/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy new file mode 100644 index 00000000000000..f233c80dac62ac --- /dev/null +++ b/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("agg_union_random") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "DROP TABLE IF EXISTS test_random;" + sql """ + create table test_random + ( + a varchar(100) null, + b decimalv3(18,10) null + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + DISTRIBUTED BY HASH(`a`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + explain{ + sql "select a from (select * from test_random union all (select * from test_random))t group by a" + /** + STREAM DATA SINK + EXCHANGE ID: 258 + RANDOM + + 252:VOlapScanNode + TABLE: default_cluster:regression_test_nereids_p0_aggregate.test_random(test_random), PREAGGREGATION: ON + partitions=0/1, tablets=0/0, tabletList= + cardinality=1, avgRowSize=0.0, numNodes=1 + pushAggOp=NONE + **/ + contains "RANDOM" + } + + sql "DROP TABLE IF EXISTS test_random;" +}