Skip to content

Commit

Permalink
Revert "[opt](nereids) reimplement or-to-in rule (apache#41222)"
Browse files Browse the repository at this point in the history
This reverts commit cfe1506.
  • Loading branch information
englefly committed Oct 17, 2024
1 parent 34d3406 commit 1b78f8b
Show file tree
Hide file tree
Showing 152 changed files with 472 additions and 716 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,19 @@
public class ExpressionOptimization extends ExpressionRewrite {
public static final List<ExpressionRewriteRule> OPTIMIZE_REWRITE_RULES = ImmutableList.of(
bottomUp(
ExtractCommonFactorRule.INSTANCE,
DistinctPredicatesRule.INSTANCE,
SimplifyComparisonPredicate.INSTANCE,
SimplifyInPredicate.INSTANCE,
SimplifyDecimalV3Comparison.INSTANCE,
SimplifyRange.INSTANCE,
OrToIn.INSTANCE,
SimplifyRange.INSTANCE,
DateFunctionRewrite.INSTANCE,
ArrayContainToArrayOverlap.INSTANCE,
CaseWhenToIf.INSTANCE,
TopnToMax.INSTANCE,
NullSafeEqualToEqual.INSTANCE,
LikeToEqualRewrite.INSTANCE
ExtractCommonFactorRule.INSTANCE,
DistinctPredicatesRule.INSTANCE,
SimplifyComparisonPredicate.INSTANCE,
SimplifyInPredicate.INSTANCE,
SimplifyDecimalV3Comparison.INSTANCE,
OrToIn.INSTANCE,
SimplifyRange.INSTANCE,
DateFunctionRewrite.INSTANCE,
ArrayContainToArrayOverlap.INSTANCE,
CaseWhenToIf.INSTANCE,
TopnToMax.INSTANCE,
NullSafeEqualToEqual.INSTANCE,
LikeToEqualRewrite.INSTANCE
)
);
private static final ExpressionRuleExecutor EXECUTOR = new ExpressionRuleExecutor(OPTIMIZE_REWRITE_RULES);
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ public interface MutableState {
String KEY_PARENT = "parent";
String KEY_RF_JUMP = "rf-jump";
String KEY_PUSH_TOPN_TO_AGG = "pushTopnToAgg";

String KEY_OR_TO_IN = "or_to_in";

<T> Optional<T> get(String key);

MutableState set(String key, Object value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.nereids.rules.expression.ExpressionRewriteTestHelper;
import org.apache.doris.nereids.rules.expression.rules.OrToIn;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
Expand All @@ -38,16 +39,30 @@ void test1() {
String expr = "col1 = 1 or col1 = 2 or col1 = 3 and (col2 = 4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(col1 IN (1, 2, 3) AND (col1 IN (1, 2) OR ((col1 = 3) AND (col2 = 4))))",
rewritten.toSql());
Set<InPredicate> inPredicates = rewritten.collect(e -> e instanceof InPredicate);
Assertions.assertEquals(1, inPredicates.size());
InPredicate inPredicate = inPredicates.iterator().next();
NamedExpression namedExpression = (NamedExpression) inPredicate.getCompareExpr();
Assertions.assertEquals("col1", namedExpression.getName());
List<Expression> options = inPredicate.getOptions();
Assertions.assertEquals(2, options.size());
Set<Integer> opVals = ImmutableSet.of(1, 2);
for (Expression op : options) {
Literal literal = (Literal) op;
Assertions.assertTrue(opVals.contains(((Byte) literal.getValue()).intValue()));
}
Set<And> ands = rewritten.collect(e -> e instanceof And);
Assertions.assertEquals(1, ands.size());
And and = ands.iterator().next();
Assertions.assertEquals("((col1 = 3) AND (col2 = 4))", and.toSql());
}

@Test
void test2() {
String expr = "col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(col2 = 4)",
Assertions.assertEquals("((((col1 = 1) AND (col1 = 3)) AND (col2 = 3)) OR (col2 = 4))",
rewritten.toSql());
}

Expand Down Expand Up @@ -89,7 +104,7 @@ void test5() {
String expr = "col = 1 or (col = 2 and (col = 3 or col = 4 or col = 5))";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(col = 1)",
Assertions.assertEquals("((col = 1) OR ((col = 2) AND col IN (3, 4, 5)))",
rewritten.toSql());
}

Expand All @@ -106,7 +121,7 @@ void test7() {
String expr = "A = 1 or A = 2 or abs(A)=5 or A in (1, 2, 3) or B = 1 or B = 2 or B in (1, 2, 3) or B+1 in (4, 5, 7)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(((A IN (1, 2, 3) OR (abs(A) = 5)) OR B IN (1, 2, 3)) OR (B + 1) IN (4, 5, 7))", rewritten.toSql());
Assertions.assertEquals("(((A IN (1, 2, 3) OR B IN (1, 2, 3)) OR (abs(A) = 5)) OR (B + 1) IN (4, 5, 7))", rewritten.toSql());
}

@Test
Expand All @@ -127,82 +142,4 @@ void testEnsureOrder() {
Assertions.assertEquals("(col1 IN (1, 2) OR col2 IN (1, 2))",
rewritten.toSql());
}

@Test
void test9() {
String expr = "col1=1 and (col2=1 or col2=2)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((col1 = 1) AND col2 IN (1, 2))",
rewritten.toSql());
}

@Test
void test10() {
// recursive rewrites
String expr = "col1=1 or (col2 = 2 and (col3=4 or col3=5))";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((col1 = 1) OR ((col2 = 2) AND col3 IN (4, 5)))",
rewritten.toSql());
}

@Test
void test11() {
// rewrite multi-inPredicates
String expr = "(a=1 and b=2 and c=3) or (a=2 and b=2 and c=4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((b = 2) AND ((a IN (1, 2) AND c IN (3, 4)) AND (((a = 1) AND (c = 3)) OR ((a = 2) AND (c = 4)))))",
rewritten.toSql());
}

@Test
void test12() {
// no rewrite
String expr = "a in (1, 2) and a in (3, 4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("FALSE",
rewritten.toSql());
}

@Test
void test13() {
// no rewrite, because of "a like 'xyz'"
String expr = "a like 'xyz% or a=1 or a=2': no extract";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(a like 'xyz% or a=1 or a=2')",
rewritten.toSql());
}

@Test
void test14() {
// no rewrite, because of "f(a)"
String expr = "(a=1 and f(a)=2) or a=3";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(((a = 1) AND (f(a) = 2)) OR (a = 3))",
rewritten.toSql());
}

@Test
void test15() {
// no rewrite, because of "a like 'xyz'"
String expr = "x=1 or (a=1 and b=2) or (a=2 and c=3)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((x = 1) OR (((a = 1) AND (b = 2)) OR ((a = 2) AND (c = 3))))",
rewritten.toSql());
}

@Test
void test16() {
String expr = "a=1 or a=1 or a=1";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(a = 1)",
rewritten.toSql());
}
}
4 changes: 2 additions & 2 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query13.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ PhysicalResultSink
--------PhysicalProject
----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk]
------------PhysicalProject
--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((((household_demographics.hd_dep_count = 1) AND cd_marital_status IN ('D', 'W')) AND cd_education_status IN ('2 yr Degree', 'Primary')) AND ((cd_marital_status IN ('D', 'W') AND cd_education_status IN ('2 yr Degree', 'Primary')) AND ((((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00)))))) OR ((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)))) build RFs:RF3 ss_cdemo_sk->[cd_demo_sk]
--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((household_demographics.hd_dep_count = 1) AND ((((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))))) OR ((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)))) build RFs:RF3 ss_cdemo_sk->[cd_demo_sk]
----------------PhysicalProject
------------------filter(((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary'))) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree'))) and cd_education_status IN ('2 yr Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'M', 'W'))
------------------filter(((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary'))) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree'))))
--------------------PhysicalOlapScan[customer_demographics] apply RFs: RF3
----------------PhysicalProject
------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274') OR ca_state IN ('CA', 'GA', 'WA')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((ca_state IN ('CA', 'GA', 'WA') OR substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
----------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ PhysicalResultSink
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------filter((((i_color IN ('forest', 'lime', 'maroon', 'navy', 'powder', 'sky', 'slate', 'smoke') AND i_units IN ('Bunch', 'Case', 'Dozen', 'Gross', 'Lb', 'Ounce', 'Pallet', 'Pound')) AND ((((((item.i_category = 'Women') AND i_color IN ('forest', 'lime')) AND i_units IN ('Pallet', 'Pound')) AND i_size IN ('economy', 'small')) OR ((((item.i_category = 'Women') AND i_color IN ('navy', 'slate')) AND i_units IN ('Bunch', 'Gross')) AND i_size IN ('extra large', 'petite'))) OR (((((item.i_category = 'Men') AND i_color IN ('powder', 'sky')) AND i_units IN ('Dozen', 'Lb')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('maroon', 'smoke')) AND i_units IN ('Case', 'Ounce')) AND i_size IN ('economy', 'small'))))) OR ((i_color IN ('aquamarine', 'dark', 'firebrick', 'frosted', 'papaya', 'peach', 'plum', 'sienna') AND i_units IN ('Box', 'Bundle', 'Carton', 'Cup', 'Dram', 'Each', 'Tbl', 'Ton')) AND ((((((item.i_category = 'Women') AND i_color IN ('aquamarine', 'dark')) AND i_units IN ('Tbl', 'Ton')) AND i_size IN ('economy', 'small')) OR ((((item.i_category = 'Women') AND i_color IN ('frosted', 'plum')) AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'petite'))) OR (((((item.i_category = 'Men') AND i_color IN ('papaya', 'peach')) AND i_units IN ('Bundle', 'Carton')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('firebrick', 'sienna')) AND i_units IN ('Cup', 'Each')) AND i_size IN ('economy', 'small')))))) and i_category IN ('Men', 'Women') and i_size IN ('N/A', 'economy', 'extra large', 'large', 'petite', 'small'))
------------------------------filter((((item.i_category = 'Men') AND (((((i_size IN ('economy', 'small') AND i_color IN ('maroon', 'smoke')) AND i_units IN ('Case', 'Ounce')) OR ((i_size IN ('economy', 'small') AND i_color IN ('firebrick', 'sienna')) AND i_units IN ('Cup', 'Each'))) OR ((i_color IN ('powder', 'sky') AND i_units IN ('Dozen', 'Lb')) AND i_size IN ('N/A', 'large'))) OR ((i_color IN ('papaya', 'peach') AND i_units IN ('Bundle', 'Carton')) AND i_size IN ('N/A', 'large')))) OR ((item.i_category = 'Women') AND (((((i_color IN ('forest', 'lime') AND i_units IN ('Pallet', 'Pound')) AND i_size IN ('economy', 'small')) OR ((i_color IN ('navy', 'slate') AND i_units IN ('Bunch', 'Gross')) AND i_size IN ('extra large', 'petite'))) OR ((i_color IN ('aquamarine', 'dark') AND i_units IN ('Tbl', 'Ton')) AND i_size IN ('economy', 'small'))) OR ((i_color IN ('frosted', 'plum') AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'petite'))))))
--------------------------------PhysicalOlapScan[item]

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
------------------------------------PhysicalProject
--------------------------------------filter((((date_dim.d_year = 2000) OR ((date_dim.d_year = 1999) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2001) AND (date_dim.d_moy = 1))) and d_year IN (1999, 2000, 2001) and d_year IN (1999, 2000, 2001))
--------------------------------------filter((((date_dim.d_year = 2000) OR ((date_dim.d_year = 1999) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2001) AND (date_dim.d_moy = 1))))
----------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PhysicalResultSink
--------------------------filter((store_sales.ss_net_profit <= 25000.00) and (store_sales.ss_net_profit >= 0.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00))
----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------PhysicalProject
--------------------------filter(((((customer_demographics.cd_marital_status = 'S') AND (customer_demographics.cd_education_status = 'Secondary')) OR ((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = '2 yr Degree'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Advanced Degree'))) and cd_education_status IN ('2 yr Degree', 'Advanced Degree', 'Secondary') and cd_marital_status IN ('D', 'M', 'S'))
--------------------------filter(((((customer_demographics.cd_marital_status = 'S') AND (customer_demographics.cd_education_status = 'Secondary')) OR ((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = '2 yr Degree'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Advanced Degree'))))
----------------------------PhysicalOlapScan[customer_demographics]
--------------------PhysicalProject
----------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('CO', 'GA', 'KS', 'MD', 'MN', 'NC', 'ND', 'NY', 'SD'))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PhysicalResultSink
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
--------------------------------------PhysicalProject
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help'))
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))))
------------------------------------------PhysicalOlapScan[item]
----------------------------------PhysicalProject
------------------------------------filter(d_month_seq IN (1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197))
Expand Down
Loading

0 comments on commit 1b78f8b

Please sign in to comment.