Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Sep 26, 2024
1 parent a5d5d13 commit e6ec13d
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,6 @@
/**
* dependends on SimplifyRange rule
*
* Used to convert multi equalTo which has same slot and compare to a literal of disjunction to a InPredicate so that
* it could be push down to storage engine.
* example:
* col1 = 1 or col1 = 2 or col1 = 3 and (col2 = 4)
* col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4
* (col1 = 1 or col1 = 2) and (col2 = 3 or col2 = 4)
* <p>
* would be converted to:
* col1 in (1, 2) or col1 = 3 and (col2 = 4)
* col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4
* (col1 in (1, 2) and (col2 in (3, 4)))
* The generic type declaration and the overridden 'rewrite' function in this class may appear unconventional
* because we need to maintain a map passed between methods in this class. But the owner of this module prohibits
* adding any additional rule-specific fields to the default ExpressionRewriteContext. However, the entire expression
* rewrite framework always passes an ExpressionRewriteContext of type context to all rules.
*/
public class OrToIn implements ExpressionPatternRuleFactory {

Expand Down Expand Up @@ -99,14 +84,14 @@ private Expression rewrite(Or or) {
}
}

Map<Expression, Set<Literal>> candidates = getCandidate(disjuncts.get(0));
Map<Expression, Set<Literal>> candidates = getCandidates(disjuncts.get(0));
if (candidates.isEmpty()) {
return or;
}

// verify each candidate
for (int i = 1; i < disjuncts.size(); i++) {
Map<Expression, Set<Literal>> otherCandidates = getCandidate(disjuncts.get(i));
Map<Expression, Set<Literal>> otherCandidates = getCandidates(disjuncts.get(i));
if (otherCandidates.isEmpty()) {
return or;
}
Expand All @@ -117,8 +102,8 @@ private Expression rewrite(Or or) {
}
if (!candidates.isEmpty()) {
Expression conjunct = candidatesToFinalResult(candidates);
boolean hasOtherExpr = hasOtherExpressionExceptCandidates(disjuncts, candidates.keySet());
if (hasOtherExpr) {
boolean keep = keepOriginalOrExpression(disjuncts);
if (keep) {
return new And(conjunct, or);
} else {
return conjunct;
Expand All @@ -127,15 +112,12 @@ private Expression rewrite(Or or) {
return or;
}

private boolean hasOtherExpressionExceptCandidates(List<Expression> disjuncts, Set<Expression> candidateKeys) {
private boolean keepOriginalOrExpression(List<Expression> disjuncts) {
for (Expression disjunct : disjuncts) {
List<Expression> conjuncts = ExpressionUtils.extractConjunction(disjunct);
for (Expression conjunct : conjuncts) {
if (!containsAny(conjunct.getInputSlots(), candidateKeys)) {
return true;
}
if (conjuncts.size() > 1) {
return true;
}

}
return false;
}
Expand Down Expand Up @@ -215,7 +197,7 @@ private boolean independentConjunct(int idx, List<Expression> conjuncts) {
return true;
}

private Map<Expression, Set<Literal>> getCandidate(Expression disjunct) {
private Map<Expression, Set<Literal>> getCandidates(Expression disjunct) {
List<Expression> conjuncts = ExpressionUtils.extractConjunction(disjunct);
Map<Expression, Set<Literal>> candidates = new LinkedHashMap<>();
// collect candidates from the first disjunction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void test11() {
String expr = "(a=1 and b=2 and c=3) or (a=2 and b=2 and c=4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((a IN (1, 2) AND (b = 2)) AND c IN (3, 4))",
Assertions.assertEquals("(((a IN (1, 2) AND (b = 2)) AND c IN (3, 4)) AND ((((a = 1) AND (b = 2)) AND (c = 3)) OR (((a = 2) AND (b = 2)) AND (c = 4))))",
rewritten.toSql());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((ca_state IN ('CA', 'GA', 'WA') OR substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274') OR ca_state IN ('CA', 'GA', 'WA')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
----------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ PhysicalResultSink
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------filter((((item.i_category = 'Men') AND (((((i_size IN ('economy', 'medium') AND i_color IN ('dodger', 'tan')) AND i_units IN ('Bunch', 'Tsp')) OR ((i_size IN ('economy', 'medium') AND i_color IN ('indian', 'spring')) AND i_units IN ('Carton', 'Unknown'))) OR ((i_color IN ('blue', 'chartreuse') AND i_units IN ('Each', 'Oz')) AND i_size IN ('N/A', 'large'))) OR ((i_color IN ('peru', 'saddle') AND i_units IN ('Gram', 'Pallet')) AND i_size IN ('N/A', 'large')))) OR ((item.i_category = 'Women') AND (((((i_color IN ('aquamarine', 'gainsboro') AND i_units IN ('Dozen', 'Ounce')) AND i_size IN ('economy', 'medium')) OR ((i_color IN ('chiffon', 'violet') AND i_units IN ('Pound', 'Ton')) AND i_size IN ('extra large', 'small'))) OR ((i_color IN ('blanched', 'tomato') AND i_units IN ('Case', 'Tbl')) AND i_size IN ('economy', 'medium'))) OR ((i_color IN ('almond', 'lime') AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'small'))))))
------------------------------filter((((i_color IN ('aquamarine', 'blue', 'chartreuse', 'chiffon', 'dodger', 'gainsboro', 'tan', 'violet') AND i_units IN ('Bunch', 'Dozen', 'Each', 'Ounce', 'Oz', 'Pound', 'Ton', 'Tsp')) AND ((((((item.i_category = 'Women') AND i_color IN ('aquamarine', 'gainsboro')) AND i_units IN ('Dozen', 'Ounce')) AND i_size IN ('economy', 'medium')) OR ((((item.i_category = 'Women') AND i_color IN ('chiffon', 'violet')) AND i_units IN ('Pound', 'Ton')) AND i_size IN ('extra large', 'small'))) OR (((((item.i_category = 'Men') AND i_color IN ('blue', 'chartreuse')) AND i_units IN ('Each', 'Oz')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('dodger', 'tan')) AND i_units IN ('Bunch', 'Tsp')) AND i_size IN ('economy', 'medium'))))) OR ((i_color IN ('almond', 'blanched', 'indian', 'lime', 'peru', 'saddle', 'spring', 'tomato') AND i_units IN ('Box', 'Carton', 'Case', 'Dram', 'Gram', 'Pallet', 'Tbl', 'Unknown')) AND ((((((item.i_category = 'Women') AND i_color IN ('blanched', 'tomato')) AND i_units IN ('Case', 'Tbl')) AND i_size IN ('economy', 'medium')) OR ((((item.i_category = 'Women') AND i_color IN ('almond', 'lime')) AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'small'))) OR (((((item.i_category = 'Men') AND i_color IN ('peru', 'saddle')) AND i_units IN ('Gram', 'Pallet')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('indian', 'spring')) AND i_units IN ('Carton', 'Unknown')) AND i_size IN ('economy', 'medium')))))) and i_category IN ('Men', 'Women') and i_size IN ('N/A', 'economy', 'extra large', 'large', 'medium', 'small'))
--------------------------------PhysicalOlapScan[item]

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
------------------------------------PhysicalProject
--------------------------------------filter((((date_dim.d_year = 2001) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2002) AND (date_dim.d_moy = 1))))
--------------------------------------filter((((date_dim.d_year = 2001) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2002) AND (date_dim.d_moy = 1))) and d_year IN (2000, 2001, 2002))
----------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ PhysicalResultSink
--------PhysicalProject
----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk]
------------PhysicalProject
--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=((((ca_state IN ('IA', 'MD', 'MN') AND ((store_sales.ss_net_profit >= 0.00) AND (store_sales.ss_net_profit <= 2000.00))) OR (ca_state IN ('IL', 'TX', 'VA') AND ((store_sales.ss_net_profit >= 150.00) AND (store_sales.ss_net_profit <= 3000.00)))) OR (ca_state IN ('IN', 'MI', 'WI') AND ((store_sales.ss_net_profit >= 50.00) AND (store_sales.ss_net_profit <= 25000.00))))) build RFs:RF2 ss_addr_sk->[ca_address_sk]
--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
----------------PhysicalProject
------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('IA', 'IL', 'IN', 'MD', 'MI', 'MN', 'TX', 'VA', 'WI'))
--------------------PhysicalOlapScan[customer_address] apply RFs: RF2
----------------PhysicalProject
------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=((((ca_state IN ('IA', 'MD', 'MN') AND ((store_sales.ss_net_profit >= 0.00) AND (store_sales.ss_net_profit <= 2000.00))) OR (ca_state IN ('IL', 'TX', 'VA') AND ((store_sales.ss_net_profit >= 150.00) AND (store_sales.ss_net_profit <= 3000.00)))) OR (ca_state IN ('IN', 'MI', 'WI') AND ((store_sales.ss_net_profit >= 50.00) AND (store_sales.ss_net_profit <= 25000.00))))) build RFs:RF1 ca_address_sk->[ss_addr_sk]
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00)))) OR (((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))))) build RFs:RF0 cd_demo_sk->[ss_cdemo_sk]
------------------------PhysicalProject
--------------------------filter((store_sales.ss_net_profit <= 25000.00) and (store_sales.ss_net_profit >= 0.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00))
----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF3
----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------PhysicalProject
--------------------------filter(((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree'))))
--------------------------filter(((((customer_demographics.cd_marital_status = 'U') AND (customer_demographics.cd_education_status = 'Primary')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'College'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = '2 yr Degree'))) and cd_education_status IN ('2 yr Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'U', 'W'))
----------------------------PhysicalOlapScan[customer_demographics]
--------------------PhysicalProject
----------------------filter((date_dim.d_year = 1999))
------------------------PhysicalOlapScan[date_dim]
----------------------filter(((((customer_address.ca_country = 'United States') AND ca_state IN ('IA', 'MD', 'MN')) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('IL', 'TX', 'VA'))) OR ((customer_address.ca_country = 'United States') AND ca_state IN ('IN', 'MI', 'WI'))) and (customer_address.ca_country = 'United States') and ca_state IN ('IA', 'IL', 'IN', 'MD', 'MI', 'MN', 'TX', 'VA', 'WI'))
------------------------PhysicalOlapScan[customer_address]
----------------PhysicalProject
------------------filter((date_dim.d_year = 1999))
--------------------PhysicalOlapScan[date_dim]
------------PhysicalProject
--------------PhysicalOlapScan[store]

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PhysicalResultSink
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
--------------------------------------PhysicalProject
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))))
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help'))
------------------------------------------PhysicalOlapScan[item]
----------------------------------PhysicalProject
------------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2
------------------------------------PhysicalProject
--------------------------------------filter((((date_dim.d_year = 1999) OR ((date_dim.d_year = 1998) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 1))))
--------------------------------------filter((((date_dim.d_year = 1999) OR ((date_dim.d_year = 1998) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2000) AND (date_dim.d_moy = 1))) and d_year IN (1998, 1999, 2000) and d_year IN (1998, 1999, 2000))
----------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PhysicalResultSink
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
--------------------------------------PhysicalProject
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))))
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help'))
------------------------------------------PhysicalOlapScan[item]
----------------------------------PhysicalProject
------------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192))
Expand Down
Loading

0 comments on commit e6ec13d

Please sign in to comment.