diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java index 128dd61b487b53..d3161a0dba36b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PushTopnToAgg.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate.TopnPushInfo; +import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; import org.apache.doris.qe.ConnectContext; @@ -36,9 +37,12 @@ * plan: topn(1) -> aggGlobal -> shuffle -> aggLocal -> scan * optimization: aggLocal and aggGlobal only need to generate the smallest row with respect to o_clerk. * - * This rule only applies to the pattern that - * 1. aggregate is the child of topN (there is no project between topN and aggregate). - * 2. aggregate is not scalar agg, and there is no distinct arguments + * This rule only applies to the patterns + * 1. topn->project->agg, or + * 2. topn->agg + * that + * 1. orderKeys and groupkeys are one-one mapping + * 2. aggregate is not scalar agg * Refer to LimitAggToTopNAgg rule. */ public class PushTopnToAgg extends PlanPostProcessor { @@ -50,6 +54,9 @@ public Plan visitPhysicalTopN(PhysicalTopN topN, CascadesContext return topN; } Plan topNChild = topN.child(); + if (topNChild instanceof PhysicalProject) { + topNChild = topNChild.child(0); + } if (topNChild instanceof PhysicalHashAggregate) { PhysicalHashAggregate upperAgg = (PhysicalHashAggregate) topNChild; if (isGroupKeyIdenticalToOrderKey(topN, upperAgg)) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out index 8d1b2834a000ec..ebc2519e119218 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out @@ -7,86 +7,85 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[cr_item_sk,cs_item_sk,sr_item_sk,ss_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF19 d_date_sk->[c_first_shipto_date_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF18 ib_income_band_sk->[hd_income_band_sk] +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 c_customer_sk->[ss_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF17 ib_income_band_sk->[hd_income_band_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF17 p_promo_sk->[ss_promo_sk] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF16 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[ss_addr_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF16 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF14 ss_item_sk->[sr_item_sk];RF15 ss_ticket_number->[sr_ticket_number] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] -------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[ss_hdemo_sk] -----------------------------------------PhysicalProject -------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF12 p_promo_sk->[ss_promo_sk] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF11 cd_demo_sk->[c_current_cdemo_sk] -------------------------------------------------PhysicalProject ---------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 cd_demo_sk->[ss_cdemo_sk] -----------------------------------------------------PhysicalProject -------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[c_first_shipto_date_sk] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[c_first_sales_date_sk] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk] +----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF14 RF15 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF13 cd_demo_sk->[ss_cdemo_sk] +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF11 s_store_sk->[ss_store_sk] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF10 ib_income_band_sk->[hd_income_band_sk] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF8 cs_item_sk->[ss_item_sk] +------------------------------------------------------PhysicalProject +--------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------------PhysicalProject +------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF7 RF8 RF9 RF11 RF12 RF13 RF17 RF18 +----------------------------------------------------------PhysicalProject +------------------------------------------------------------filter(d_year IN (1999, 2000)) +--------------------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((sale > (2 * refund))) +----------------------------------------------------------hashAgg[GLOBAL] +------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------------------hashAgg[LOCAL] ----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] +------------------------------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF5 cr_item_sk->[cs_item_sk];RF6 cr_order_number->[cs_order_number] --------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] -------------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[sr_item_sk,ss_item_sk] -----------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF2 sr_item_sk->[ss_item_sk];RF3 sr_ticket_number->[ss_ticket_number] ---------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF5 RF6 RF7 RF10 RF12 RF13 RF15 RF19 ---------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF19 -----------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------filter((sale > (2 * refund))) ---------------------------------------------------------------------------------hashAgg[GLOBAL] -----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------------------------------------------hashAgg[LOCAL] ---------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] -------------------------------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF19 -------------------------------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF19 -------------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------------filter(d_year IN (1999, 2000)) -----------------------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 RF12 --------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------PhysicalOlapScan[store] -----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF8 RF9 RF11 RF14 RF16 -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------PhysicalOlapScan[date_dim] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------------PhysicalProject -------------------------------------------------------PhysicalOlapScan[customer_demographics] -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[customer_demographics] ---------------------------------------------PhysicalProject -----------------------------------------------PhysicalOlapScan[promotion] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF17 +----------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF12 +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF10 +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[income_band] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[store] +--------------------------------------PhysicalProject +----------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) +------------------------------------------PhysicalOlapScan[item] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF18 ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[customer_address] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[customer_address] +--------------------------------------PhysicalOlapScan[customer_demographics] ------------------------PhysicalProject ---------------------------PhysicalOlapScan[income_band] +--------------------------PhysicalOlapScan[promotion] --------------------PhysicalProject -----------------------PhysicalOlapScan[income_band] +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[customer_address] apply RFs: RF4 +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF3 cd_demo_sk->[c_current_cdemo_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[c_first_sales_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2 RF3 RF19 +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[income_band] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_demographics] ----------------PhysicalProject -------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) ---------------------PhysicalOlapScan[item] +------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalQuickSort[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather]