From 9b075bc873c288cda058ab939b2336b464d6b3ab Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 9 Jul 2024 17:46:57 +0800 Subject: [PATCH] [fix](nereids) derive column stats for 'expr and A is not null' (#37235) (#37498) pick from #37235 the algorithm for computing stats for "expr1 and expr2" predicate is as following: 1. compute output stats of expr1 based on input stats. the result stats is denoted by leftStats 2. compute stats of expr2 based on leftStats after step1, leftStats should be normalized to avoid abnormal cases, such as ndv > rowCount or numNulls > rowCount Issue Number: close #xxx ## Proposed changes Issue Number: close #xxx --- .../doris/nereids/stats/FilterEstimation.java | 1 + .../apache/doris/statistics/Statistics.java | 18 +++ .../nereids/stats/FilterEstimationTest.java | 35 +++++- .../rf_prune/query64.out | 102 ++++++++-------- .../shape/query64.out | 112 +++++++++--------- 5 files changed, 160 insertions(+), 108 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index d66b342a16add3..b8e08086d63301 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -110,6 +110,7 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation Expression leftExpr = predicate.child(0); Expression rightExpr = predicate.child(1); Statistics leftStats = leftExpr.accept(this, context); + leftStats = leftStats.normalizeByRatio(context.statistics.getRowCount()); Statistics andStats = rightExpr.accept(this, new EstimationContext(leftStats)); if (predicate instanceof And) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 3d961982d161be..b6bbebdd37118b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -204,4 +204,22 @@ public String detail(String prefix) { public int getWidthInJoinCluster() { return widthInJoinCluster; } + + public Statistics normalizeByRatio(double originRowCount) { + if (rowCount >= originRowCount || rowCount <= 0) { + return this; + } + StatisticsBuilder builder = new StatisticsBuilder(this); + double ratio = rowCount / originRowCount; + for (Entry entry : expressionToColumnStats.entrySet()) { + ColumnStatistic colStats = entry.getValue(); + if (colStats.numNulls != 0 || colStats.ndv > rowCount) { + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(colStats); + colStatsBuilder.setNumNulls(colStats.numNulls * ratio); + colStatsBuilder.setNdv(Math.min(rowCount - colStatsBuilder.getNumNulls(), colStats.ndv)); + builder.putColumnStatistics(entry.getKey(), colStatsBuilder.build()); + } + } + return builder.build(); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index 08aced49e14276..dd5a38a4a62caa 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -206,7 +206,7 @@ public void test1() { Statistics stat = new Statistics(1000, slotToColumnStat); FilterEstimation filterEstimation = new FilterEstimation(); Statistics expected = filterEstimation.estimate(or, stat); - Assertions.assertEquals(51.9, expected.getRowCount(), 0.1); + Assertions.assertEquals(51, expected.getRowCount(), 1); } // a > 500 and b < 100 or a > c @@ -1059,6 +1059,39 @@ public void testNumNullsAnd() { Assertions.assertEquals(result.getRowCount(), 2.0, 0.01); } + /** + * a = 1 and b is not null + */ + @Test + public void testNumNullsAndTwoCol() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builderA = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(0) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + EqualTo equalTo = new EqualTo(a, int1); + SlotReference b = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builderB = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + Not isNotNull = new Not(new IsNull(b)); + And and = new And(equalTo, isNotNull); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builderA.build()); + stats.addColumnStats(b, builderB.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(and, stats); + Assertions.assertEquals(result.getRowCount(), 1.0, 0.01); + } + /** * a >= 1 or a <= 2 */ diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out index 60bd76330b733c..3639f4ef241dd1 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out @@ -11,27 +11,52 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------PhysicalProject ------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) +----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF17 ss_customer_sk->[c_customer_sk] ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ss_addr_sk->[ca_address_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF15 -----------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF13 ss_item_sk->[sr_item_sk];RF14 ss_ticket_number->[sr_ticket_number] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF13 RF14 -----------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() ------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF12 ss_cdemo_sk->[cd_demo_sk] -----------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF17 +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF12 -----------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +--------------------------------------------------PhysicalOlapScan[household_demographics] +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[income_band] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF11 ss_item_sk->[sr_item_sk];RF12 ss_ticket_number->[sr_ticket_number] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_returns] apply RFs: RF11 RF12 +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 ss_cdemo_sk->[cd_demo_sk] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF10 +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF9 ss_addr_sk->[ca_address_sk] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() --------------------------------------------------PhysicalProject ----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() ------------------------------------------------------PhysicalProject @@ -39,11 +64,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------------PhysicalProject ------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF6 cs_item_sk->[ss_item_sk] +--------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF2 cs_item_sk->[ss_item_sk] ----------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 RF11 +------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF8 ----------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------------filter((sale > (2 * refund))) @@ -53,9 +78,9 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------------------------------------PhysicalProject ------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF11 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF8 --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF11 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF8 ------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------filter(d_year IN (2001, 2002)) @@ -71,36 +96,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------------PhysicalOlapScan[store] --------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------------------PhysicalProject -------------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) ---------------------------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[promotion] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() -------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer] -------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() +------------------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[household_demographics] -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[income_band] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[customer_demographics] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[customer_address] +--------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) +----------------------------------------------------PhysicalOlapScan[item] --------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------PhysicalProject ------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out index cfaff549bd1364..8abcdf47671024 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out @@ -11,39 +11,64 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------PhysicalProject ------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF18 d_date_sk->[c_first_sales_date_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF17 c_customer_sk->[ss_customer_sk] +----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF17 ss_customer_sk->[c_customer_sk] ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF16 p_promo_sk->[ss_promo_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ss_addr_sk->[ca_address_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF15 -----------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF13 ss_item_sk->[sr_item_sk];RF14 ss_ticket_number->[sr_ticket_number] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF13 RF14 -----------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF15 cd_demo_sk->[c_current_cdemo_sk] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] ------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF12 ss_cdemo_sk->[cd_demo_sk] -----------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF14 RF15 RF16 RF17 RF18 RF19 +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF13 ib_income_band_sk->[hd_income_band_sk] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF12 -----------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +--------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF13 +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------------------PhysicalProject -----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF10 s_store_sk->[ss_store_sk] +----------------------------------------------------PhysicalOlapScan[income_band] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF11 ss_item_sk->[sr_item_sk];RF12 ss_ticket_number->[sr_ticket_number] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_returns] apply RFs: RF11 RF12 +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 ss_cdemo_sk->[cd_demo_sk] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF10 +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF9 ss_addr_sk->[ca_address_sk] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] ------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF9 ib_income_band_sk->[hd_income_band_sk] +--------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF5 ib_income_band_sk->[hd_income_band_sk] ----------------------------------------------------------PhysicalProject -------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF8 hd_demo_sk->[ss_hdemo_sk] +------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ss_hdemo_sk] --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF6 cs_item_sk->[ss_item_sk] +--------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF2 cs_item_sk->[ss_item_sk] ----------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 RF8 RF10 RF11 RF16 RF17 +------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF6 RF7 RF8 ----------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------------filter((sale > (2 * refund))) @@ -51,18 +76,18 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------------------------------------------------hashAgg[LOCAL] ----------------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF4 cr_item_sk->[cs_item_sk];RF5 cr_order_number->[cs_order_number] +------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5 RF11 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF8 --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF11 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF8 ------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------filter(d_year IN (2001, 2002)) ------------------------------------------------------------------------PhysicalOlapScan[date_dim] --------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF9 +------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF5 ----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------PhysicalProject --------------------------------------------------------------PhysicalOlapScan[income_band] @@ -71,36 +96,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------------PhysicalOlapScan[store] --------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------------------PhysicalProject -------------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) ---------------------------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[promotion] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF2 cd_demo_sk->[c_current_cdemo_sk] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk] -------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2 RF3 RF18 RF19 -------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] +------------------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[income_band] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[customer_demographics] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[customer_address] +--------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) +----------------------------------------------------PhysicalOlapScan[item] --------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------PhysicalProject ------------------------PhysicalOlapScan[date_dim]