From e6ec13d54251331d6b690c1dedffc6abcd6da153 Mon Sep 17 00:00:00 2001 From: minghong Date: Thu, 26 Sep 2024 16:52:02 +0800 Subject: [PATCH] fix --- .../rules/expression/rules/OrToIn.java | 34 +++++-------------- .../nereids/rules/rewrite/OrToInTest.java | 2 +- .../shape/query15.out | 2 +- .../shape/query41.out | 2 +- .../shape/query47.out | 2 +- .../shape/query48.out | 18 +++++----- .../shape/query53.out | 2 +- .../shape/query57.out | 2 +- .../shape/query63.out | 2 +- .../shape/query85.out | 21 ++++++------ .../shape/query88.out | 16 ++++----- .../shape/query89.out | 2 +- .../shape/query91.out | 2 +- .../shape/q19.out | 2 +- 14 files changed, 46 insertions(+), 63 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OrToIn.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OrToIn.java index 4e99101b29d6628..1f8a73278d46ea1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OrToIn.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OrToIn.java @@ -44,21 +44,6 @@ /** * dependends on SimplifyRange rule * - * Used to convert multi equalTo which has same slot and compare to a literal of disjunction to a InPredicate so that - * it could be push down to storage engine. - * example: - * col1 = 1 or col1 = 2 or col1 = 3 and (col2 = 4) - * col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4 - * (col1 = 1 or col1 = 2) and (col2 = 3 or col2 = 4) - *

- * would be converted to: - * col1 in (1, 2) or col1 = 3 and (col2 = 4) - * col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4 - * (col1 in (1, 2) and (col2 in (3, 4))) - * The generic type declaration and the overridden 'rewrite' function in this class may appear unconventional - * because we need to maintain a map passed between methods in this class. But the owner of this module prohibits - * adding any additional rule-specific fields to the default ExpressionRewriteContext. However, the entire expression - * rewrite framework always passes an ExpressionRewriteContext of type context to all rules. */ public class OrToIn implements ExpressionPatternRuleFactory { @@ -99,14 +84,14 @@ private Expression rewrite(Or or) { } } - Map> candidates = getCandidate(disjuncts.get(0)); + Map> candidates = getCandidates(disjuncts.get(0)); if (candidates.isEmpty()) { return or; } // verify each candidate for (int i = 1; i < disjuncts.size(); i++) { - Map> otherCandidates = getCandidate(disjuncts.get(i)); + Map> otherCandidates = getCandidates(disjuncts.get(i)); if (otherCandidates.isEmpty()) { return or; } @@ -117,8 +102,8 @@ private Expression rewrite(Or or) { } if (!candidates.isEmpty()) { Expression conjunct = candidatesToFinalResult(candidates); - boolean hasOtherExpr = hasOtherExpressionExceptCandidates(disjuncts, candidates.keySet()); - if (hasOtherExpr) { + boolean keep = keepOriginalOrExpression(disjuncts); + if (keep) { return new And(conjunct, or); } else { return conjunct; @@ -127,15 +112,12 @@ private Expression rewrite(Or or) { return or; } - private boolean hasOtherExpressionExceptCandidates(List disjuncts, Set candidateKeys) { + private boolean keepOriginalOrExpression(List disjuncts) { for (Expression disjunct : disjuncts) { List conjuncts = ExpressionUtils.extractConjunction(disjunct); - for (Expression conjunct : conjuncts) { - if (!containsAny(conjunct.getInputSlots(), candidateKeys)) { - return true; - } + if (conjuncts.size() > 1) { + return true; } - } return false; } @@ -215,7 +197,7 @@ private boolean independentConjunct(int idx, List conjuncts) { return true; } - private Map> getCandidate(Expression disjunct) { + private Map> getCandidates(Expression disjunct) { List conjuncts = ExpressionUtils.extractConjunction(disjunct); Map> candidates = new LinkedHashMap<>(); // collect candidates from the first disjunction diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/OrToInTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/OrToInTest.java index 9decea50376e02e..67126e09ff14b65 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/OrToInTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/OrToInTest.java @@ -153,7 +153,7 @@ void test11() { String expr = "(a=1 and b=2 and c=3) or (a=2 and b=2 and c=4)"; Expression expression = PARSER.parseExpression(expr); Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context); - Assertions.assertEquals("((a IN (1, 2) AND (b = 2)) AND c IN (3, 4))", + Assertions.assertEquals("(((a IN (1, 2) AND (b = 2)) AND c IN (3, 4)) AND ((((a = 1) AND (b = 2)) AND (c = 3)) OR (((a = 2) AND (b = 2)) AND (c = 4))))", rewritten.toSql()); } diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query15.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query15.out index ed2ea026a241c9d..685f61ffed3387b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query15.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query15.out @@ -8,7 +8,7 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((ca_state IN ('CA', 'GA', 'WA') OR substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274') OR ca_state IN ('CA', 'GA', 'WA')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] ----------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query41.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query41.out index d20341c931a06da..4cac3a912dbe190 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query41.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query41.out @@ -18,6 +18,6 @@ PhysicalResultSink ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------hashAgg[LOCAL] ----------------------------PhysicalProject -------------------------------filter((((item.i_category = 'Men') AND (((((i_size IN ('economy', 'medium') AND i_color IN ('dodger', 'tan')) AND i_units IN ('Bunch', 'Tsp')) OR ((i_size IN ('economy', 'medium') AND i_color IN ('indian', 'spring')) AND i_units IN ('Carton', 'Unknown'))) OR ((i_color IN ('blue', 'chartreuse') AND i_units IN ('Each', 'Oz')) AND i_size IN ('N/A', 'large'))) OR ((i_color IN ('peru', 'saddle') AND i_units IN ('Gram', 'Pallet')) AND i_size IN ('N/A', 'large')))) OR ((item.i_category = 'Women') AND (((((i_color IN ('aquamarine', 'gainsboro') AND i_units IN ('Dozen', 'Ounce')) AND i_size IN ('economy', 'medium')) OR ((i_color IN ('chiffon', 'violet') AND i_units IN ('Pound', 'Ton')) AND i_size IN ('extra large', 'small'))) OR ((i_color IN ('blanched', 'tomato') AND i_units IN ('Case', 'Tbl')) AND i_size IN ('economy', 'medium'))) OR ((i_color IN ('almond', 'lime') AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'small')))))) +------------------------------filter((((i_color IN ('aquamarine', 'blue', 'chartreuse', 'chiffon', 'dodger', 'gainsboro', 'tan', 'violet') AND i_units IN ('Bunch', 'Dozen', 'Each', 'Ounce', 'Oz', 'Pound', 'Ton', 'Tsp')) AND ((((((item.i_category = 'Women') AND i_color IN ('aquamarine', 'gainsboro')) AND i_units IN ('Dozen', 'Ounce')) AND i_size IN ('economy', 'medium')) OR ((((item.i_category = 'Women') AND i_color IN ('chiffon', 'violet')) AND i_units IN ('Pound', 'Ton')) AND i_size IN ('extra large', 'small'))) OR (((((item.i_category = 'Men') AND i_color IN ('blue', 'chartreuse')) AND i_units IN ('Each', 'Oz')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('dodger', 'tan')) AND i_units IN ('Bunch', 'Tsp')) AND i_size IN ('economy', 'medium'))))) OR ((i_color IN ('almond', 'blanched', 'indian', 'lime', 'peru', 'saddle', 'spring', 'tomato') AND i_units IN ('Box', 'Carton', 'Case', 'Dram', 'Gram', 'Pallet', 'Tbl', 'Unknown')) AND ((((((item.i_category = 'Women') AND i_color IN ('blanched', 'tomato')) AND i_units IN ('Case', 'Tbl')) AND i_size IN ('economy', 'medium')) OR ((((item.i_category = 'Women') AND i_color IN ('almond', 'lime')) AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'small'))) OR (((((item.i_category = 'Men') AND i_color IN ('peru', 'saddle')) AND i_units IN ('Gram', 'Pallet')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('indian', 'spring')) AND i_units IN ('Carton', 'Unknown')) AND i_size IN ('economy', 'medium')))))) and i_category IN ('Men', 'Women') and i_size IN ('N/A', 'economy', 'extra large', 'large', 'medium', 'small')) --------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out index f33181c085733c0..7a668b06c433e5e 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------------------PhysicalProject --------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ------------------------------------PhysicalProject ---------------------------------------filter((((date_dim.d_year = 2001) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2002) AND (date_dim.d_moy = 1)))) +--------------------------------------filter((((date_dim.d_year = 2001) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2002) AND (date_dim.d_moy = 1))) and d_year IN (2000, 2001, 2002)) ----------------------------------------PhysicalOlapScan[date_dim] --------------------------------PhysicalProject ----------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query48.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query48.out index 0bb694c50c5abea..69b59b55915bcde 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query48.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query48.out @@ -7,23 +7,23 @@ PhysicalResultSink --------PhysicalProject ----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=((((ca_state IN ('IA', 'MD', 'MN') AND ((store_sales.ss_net_profit >= 0.00) AND (store_sales.ss_net_profit <= 2000.00))) OR (ca_state IN ('IL', 'TX', 'VA') AND ((store_sales.ss_net_profit >= 150.00) AND (store_sales.ss_net_profit <= 3000.00)))) OR (ca_state IN ('IN', 'MI', 'WI') AND ((store_sales.ss_net_profit >= 50.00) AND (store_sales.ss_net_profit <= 25000.00))))) build RFs:RF2 ss_addr_sk->[ca_address_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] ----------------PhysicalProject -------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('IA', 'IL', 'IN', 'MD', 'MI', 'MN', 'TX', 'VA', 'WI')) ---------------------PhysicalOlapScan[customer_address] apply RFs: RF2 -----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=((((ca_state IN ('IA', 'MD', 'MN') AND ((store_sales.ss_net_profit >= 0.00) AND (store_sales.ss_net_profit <= 2000.00))) OR (ca_state IN ('IL', 'TX', 'VA') AND ((store_sales.ss_net_profit >= 150.00) AND (store_sales.ss_net_profit <= 3000.00)))) OR (ca_state IN ('IN', 'MI', 'WI') AND ((store_sales.ss_net_profit >= 50.00) AND (store_sales.ss_net_profit <= 25000.00))))) build RFs:RF1 ca_address_sk->[ss_addr_sk] --------------------PhysicalProject ----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00)))) OR (((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))))) build RFs:RF0 cd_demo_sk->[ss_cdemo_sk] ------------------------PhysicalProject --------------------------filter((store_sales.ss_net_profit <= 25000.00) and (store_sales.ss_net_profit >= 0.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00)) -----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF3 +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 ------------------------PhysicalProject ---------------------------filter(((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree')))) +--------------------------filter(((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree'))) and cd_education_status IN ('2 yr Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'U', 'W')) ----------------------------PhysicalOlapScan[customer_demographics] --------------------PhysicalProject -----------------------filter((date_dim.d_year = 1999)) -------------------------PhysicalOlapScan[date_dim] +----------------------filter(((((customer_address.ca_country = 'United States') AND ca_state IN ('IA', 'MD', 'MN')) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('IL', 'TX', 'VA'))) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('IN', 'MI', 'WI'))) and (customer_address.ca_country = 'United States') and ca_state IN ('IA', 'IL', 'IN', 'MD', 'MI', 'MN', 'TX', 'VA', 'WI')) +------------------------PhysicalOlapScan[customer_address] +----------------PhysicalProject +------------------filter((date_dim.d_year = 1999)) +--------------------PhysicalOlapScan[date_dim] ------------PhysicalProject --------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query53.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query53.out index 7dbd34d228fe3fa..81e6091b663bf7b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query53.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query53.out @@ -22,7 +22,7 @@ PhysicalResultSink --------------------------------------PhysicalProject ----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 --------------------------------------PhysicalProject -----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')))) +----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) ------------------------------------------PhysicalOlapScan[item] ----------------------------------PhysicalProject ------------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out index 15dda452ebc9842..7bbcf019701d4d7 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------------------PhysicalProject --------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 ------------------------------------PhysicalProject ---------------------------------------filter((((date_dim.d_year = 1999) OR ((date_dim.d_year = 1998) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 1)))) +--------------------------------------filter((((date_dim.d_year = 1999) OR ((date_dim.d_year = 1998) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 1))) and d_year IN (1998, 1999, 2000) and d_year IN (1998, 1999, 2000)) ----------------------------------------PhysicalOlapScan[date_dim] --------------------------------PhysicalProject ----------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query63.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query63.out index c5cea55eb7b009b..2cd49bd8c407861 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query63.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query63.out @@ -22,7 +22,7 @@ PhysicalResultSink --------------------------------------PhysicalProject ----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 --------------------------------------PhysicalProject -----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')))) +----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) ------------------------------------------PhysicalOlapScan[item] ----------------------------------PhysicalProject ------------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query85.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query85.out index da46d54e2f8be5b..32de47c16d87ecd 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query85.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query85.out @@ -13,18 +13,19 @@ PhysicalResultSink --------------------PhysicalProject ----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cd1.cd_education_status = cd2.cd_education_status) and (cd1.cd_marital_status = cd2.cd_marital_status) and (cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk)) otherCondition=() build RFs:RF6 wr_returning_cdemo_sk->[cd_demo_sk];RF7 cd_marital_status->[cd_marital_status];RF8 cd_education_status->[cd_education_status] ------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF6 RF7 RF8 +--------------------------filter(cd_education_status IN ('4 yr Degree', 'Advanced Degree', 'Secondary') and cd_marital_status IN ('M', 'S', 'W')) +----------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF6 RF7 RF8 ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF5 wp_web_page_sk->[ws_web_page_sk] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk)) otherCondition=((((((cd1.cd_marital_status = 'M') AND (cd1.cd_education_status = '4 yr Degree')) AND ((web_sales.ws_sales_price >= 100.00) AND (web_sales.ws_sales_price <= 150.00))) OR (((cd1.cd_marital_status = 'S') AND (cd1.cd_education_status = 'Secondary')) AND ((web_sales.ws_sales_price >= 50.00) AND (web_sales.ws_sales_price <= 100.00)))) OR (((cd1.cd_marital_status = 'W') AND (cd1.cd_education_status = 'Advanced Degree')) AND ((web_sales.ws_sales_price >= 150.00) AND (web_sales.ws_sales_price <= 200.00))))) build RFs:RF5 wr_refunded_cdemo_sk->[cd_demo_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk)) otherCondition=((((((cd1.cd_marital_status = 'M') AND (cd1.cd_education_status = '4 yr Degree')) AND ((web_sales.ws_sales_price >= 100.00) AND (web_sales.ws_sales_price <= 150.00))) OR (((cd1.cd_marital_status = 'S') AND (cd1.cd_education_status = 'Secondary')) AND ((web_sales.ws_sales_price >= 50.00) AND (web_sales.ws_sales_price <= 100.00)))) OR (((cd1.cd_marital_status = 'W') AND (cd1.cd_education_status = 'Advanced Degree')) AND ((web_sales.ws_sales_price >= 150.00) AND (web_sales.ws_sales_price <= 200.00))))) build RFs:RF4 wr_refunded_cdemo_sk->[cd_demo_sk] ---------------------------------PhysicalProject -----------------------------------filter(((((cd1.cd_marital_status = 'M') AND (cd1.cd_education_status = '4 yr Degree')) OR ((cd1.cd_marital_status = 'S') AND (cd1.cd_education_status = 'Secondary'))) OR ((cd1.cd_marital_status = 'W') AND (cd1.cd_education_status = 'Advanced Degree')))) -------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF4 +------------------------------filter(((((cd1.cd_marital_status = 'M') AND (cd1.cd_education_status = '4 yr Degree')) OR ((cd1.cd_marital_status = 'S') AND (cd1.cd_education_status = 'Secondary'))) OR ((cd1.cd_marital_status = 'W') AND (cd1.cd_education_status = 'Advanced Degree'))) and cd_education_status IN ('4 yr Degree', 'Advanced Degree', 'Secondary') and cd_marital_status IN ('M', 'S', 'W')) +--------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF5 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF4 wp_web_page_sk->[ws_web_page_sk] --------------------------------PhysicalProject ----------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk)) otherCondition=((((ca_state IN ('DE', 'FL', 'TX') AND ((web_sales.ws_net_profit >= 100.00) AND (web_sales.ws_net_profit <= 200.00))) OR (ca_state IN ('ID', 'IN', 'ND') AND ((web_sales.ws_net_profit >= 150.00) AND (web_sales.ws_net_profit <= 300.00)))) OR (ca_state IN ('IL', 'MT', 'OH') AND ((web_sales.ws_net_profit >= 50.00) AND (web_sales.ws_net_profit <= 250.00))))) build RFs:RF3 wr_refunded_addr_sk->[ca_address_sk] ------------------------------------PhysicalProject ---------------------------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('DE', 'FL', 'ID', 'IL', 'IN', 'MT', 'ND', 'OH', 'TX')) +--------------------------------------filter(((((customer_address.ca_country = 'United States') AND ca_state IN ('DE', 'FL', 'TX')) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('ID', 'IN', 'ND'))) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('IL', 'MT', 'OH'))) and (customer_address.ca_country = 'United States') and ca_state IN ('DE', 'FL', 'ID', 'IL', 'IN', 'MT', 'ND', 'OH', 'TX')) ----------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 ------------------------------------PhysicalProject --------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF1 ws_item_sk->[wr_item_sk];RF2 ws_order_number->[wr_order_number] @@ -34,12 +35,12 @@ PhysicalResultSink ------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] --------------------------------------------PhysicalProject ----------------------------------------------filter((web_sales.ws_net_profit <= 300.00) and (web_sales.ws_net_profit >= 50.00) and (web_sales.ws_sales_price <= 200.00) and (web_sales.ws_sales_price >= 50.00)) -------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF5 +------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF4 --------------------------------------------PhysicalProject ----------------------------------------------filter((date_dim.d_year = 2000)) ------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_page] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_page] --------------------PhysicalProject ----------------------PhysicalOlapScan[reason] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query88.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query88.out index af35685d6e46ac0..bca8b0d069014bb 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query88.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query88.out @@ -23,7 +23,7 @@ PhysicalResultSink ------------------------------------filter((time_dim.t_hour = 8) and (time_dim.t_minute >= 30)) --------------------------------------PhysicalOlapScan[time_dim] ------------------------------PhysicalProject ---------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +--------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ----------------------------------PhysicalOlapScan[household_demographics] --------------------------PhysicalProject ----------------------------filter((store.s_store_name = 'ese')) @@ -43,7 +43,7 @@ PhysicalResultSink ------------------------------------filter((time_dim.t_hour = 9) and (time_dim.t_minute < 30)) --------------------------------------PhysicalOlapScan[time_dim] ------------------------------PhysicalProject ---------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +--------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ----------------------------------PhysicalOlapScan[household_demographics] --------------------------PhysicalProject ----------------------------filter((store.s_store_name = 'ese')) @@ -63,7 +63,7 @@ PhysicalResultSink ----------------------------------filter((time_dim.t_hour = 9) and (time_dim.t_minute >= 30)) ------------------------------------PhysicalOlapScan[time_dim] ----------------------------PhysicalProject -------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +------------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) --------------------------------PhysicalOlapScan[household_demographics] ------------------------PhysicalProject --------------------------filter((store.s_store_name = 'ese')) @@ -83,7 +83,7 @@ PhysicalResultSink --------------------------------filter((time_dim.t_hour = 10) and (time_dim.t_minute < 30)) ----------------------------------PhysicalOlapScan[time_dim] --------------------------PhysicalProject -----------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +----------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ------------------------------PhysicalOlapScan[household_demographics] ----------------------PhysicalProject ------------------------filter((store.s_store_name = 'ese')) @@ -103,7 +103,7 @@ PhysicalResultSink ------------------------------filter((time_dim.t_hour = 10) and (time_dim.t_minute >= 30)) --------------------------------PhysicalOlapScan[time_dim] ------------------------PhysicalProject ---------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +--------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ----------------------------PhysicalOlapScan[household_demographics] --------------------PhysicalProject ----------------------filter((store.s_store_name = 'ese')) @@ -123,7 +123,7 @@ PhysicalResultSink ----------------------------filter((time_dim.t_hour = 11) and (time_dim.t_minute < 30)) ------------------------------PhysicalOlapScan[time_dim] ----------------------PhysicalProject -------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +------------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) --------------------------PhysicalOlapScan[household_demographics] ------------------PhysicalProject --------------------filter((store.s_store_name = 'ese')) @@ -143,7 +143,7 @@ PhysicalResultSink --------------------------filter((time_dim.t_hour = 11) and (time_dim.t_minute >= 30)) ----------------------------PhysicalOlapScan[time_dim] --------------------PhysicalProject -----------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +----------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ------------------------PhysicalOlapScan[household_demographics] ----------------PhysicalProject ------------------filter((store.s_store_name = 'ese')) @@ -163,7 +163,7 @@ PhysicalResultSink ------------------------filter((time_dim.t_hour = 12) and (time_dim.t_minute < 30)) --------------------------PhysicalOlapScan[time_dim] ------------------PhysicalProject ---------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5)))) +--------------------filter(((((household_demographics.hd_dep_count = -1) AND (household_demographics.hd_vehicle_count <= 1)) OR ((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= 6))) OR ((household_demographics.hd_dep_count = 3) AND (household_demographics.hd_vehicle_count <= 5))) and hd_dep_count IN (-1, 3, 4)) ----------------------PhysicalOlapScan[household_demographics] --------------PhysicalProject ----------------filter((store.s_store_name = 'ese')) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query89.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query89.out index 35a90cb94290b79..661b47d0ea980ab 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query89.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query89.out @@ -23,7 +23,7 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ----------------------------------------PhysicalProject -------------------------------------------filter(((i_category IN ('Electronics', 'Jewelry', 'Shoes') AND i_class IN ('athletic', 'portable', 'semi-precious')) OR (i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'maternity', 'rock')))) +------------------------------------------filter(((i_category IN ('Electronics', 'Jewelry', 'Shoes') AND i_class IN ('athletic', 'portable', 'semi-precious')) OR (i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'maternity', 'rock'))) and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) --------------------------------------------PhysicalOlapScan[item] ------------------------------------PhysicalProject --------------------------------------filter((date_dim.d_year = 1999)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query91.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query91.out index ca8fac70d693aac..6f3970cbb29d50e 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query91.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query91.out @@ -28,7 +28,7 @@ PhysicalResultSink ----------------------------------------PhysicalProject ------------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 RF1 ----------------------------------------PhysicalProject -------------------------------------------filter((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'Unknown')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'Advanced Degree')))) +------------------------------------------filter((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'Unknown')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'Advanced Degree'))) and cd_education_status IN ('Advanced Degree', 'Unknown') and cd_marital_status IN ('M', 'W')) --------------------------------------------PhysicalOlapScan[customer_demographics] ------------------------------------PhysicalProject --------------------------------------filter((hd_buy_potential like '1001-5000%')) diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out index 60e7b8446bd1a06..bf86e68a7f98372 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out @@ -10,6 +10,6 @@ PhysicalResultSink --------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR')) ----------------PhysicalOlapScan[lineitem] apply RFs: RF0 ------------PhysicalProject ---------------filter((((((part.p_brand = 'Brand#12') AND p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG')) AND (part.p_size <= 5)) OR (((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG')) AND (part.p_size <= 10))) OR (((part.p_brand = 'Brand#34') AND p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG')) AND (part.p_size <= 15))) and (part.p_size >= 1)) +--------------filter((((((part.p_brand = 'Brand#12') AND p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG')) AND (part.p_size <= 5)) OR (((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG')) AND (part.p_size <= 10))) OR (((part.p_brand = 'Brand#34') AND p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG')) AND (part.p_size <= 15))) and (part.p_size >= 1) and p_brand IN ('Brand#12', 'Brand#23', 'Brand#34') and p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG', 'MED BAG', 'MED BOX', 'MED PACK', 'MED PKG', 'SM BOX', 'SM CASE', 'SM PACK', 'SM PKG')) ----------------PhysicalOlapScan[part]