Skip to content

Commit

Permalink
[CALCITE-6632] Wrong optimization because window missing constants in…
Browse files Browse the repository at this point in the history
… digest
  • Loading branch information
JiajunBernoulli committed Oct 17, 2024
1 parent 659ea98 commit 5341730
Showing 5 changed files with 59 additions and 10 deletions.
3 changes: 3 additions & 0 deletions core/src/main/java/org/apache/calcite/rel/core/Window.java
Original file line number Diff line number Diff line change
@@ -163,6 +163,9 @@ public Window(RelOptCluster cluster, RelTraitSet traitSet, RelNode input,
for (Ord<Group> window : Ord.zip(groups)) {
pw.item("window#" + window.i, window.e.toString());
}
if (this.constants != null && this.constants.size() > 0) {
pw.item("constants", constants);
}
return pw;
}

2 changes: 1 addition & 1 deletion core/src/test/java/org/apache/calcite/test/JdbcTest.java
Original file line number Diff line number Diff line change
@@ -4123,7 +4123,7 @@ void testOrderByOnSortedTable2(String format) {
"[deptno INTEGER NOT NULL, empid INTEGER NOT NULL, S REAL, FIVE INTEGER NOT NULL, M REAL, C BIGINT NOT NULL]")
.explainContains(""
+ "EnumerableCalc(expr#0..7=[{inputs}], expr#8=[0:BIGINT], expr#9=[>($t4, $t8)], expr#10=[null:JavaType(class java.lang.Float)], expr#11=[CASE($t9, $t5, $t10)], expr#12=[5], deptno=[$t1], empid=[$t0], S=[$t11], FIVE=[$t12], M=[$t6], C=[$t7])\n"
+ " EnumerableWindow(window#0=[window(partition {1} order by [0] rows between $4 PRECEDING and CURRENT ROW aggs [COUNT($3), $SUM0($3), MIN($2), COUNT()])])\n"
+ " EnumerableWindow(window#0=[window(partition {1} order by [0] rows between $4 PRECEDING and CURRENT ROW aggs [COUNT($3), $SUM0($3), MIN($2), COUNT()])], constants=[[1]])\n"
+ " EnumerableCalc(expr#0..4=[{inputs}], expr#5=[+($t3, $t0)], proj#0..1=[{exprs}], salary=[$t3], $3=[$t5])\n"
+ " EnumerableTableScan(table=[[hr, emps]])\n")
.returnsUnordered(
14 changes: 14 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
@@ -5892,6 +5892,20 @@ private HepProgram getTransitiveProgram() {
.check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6632">[CALCITE-6632]
* Wrong optimization because window missing constants in digest</a>. */
@Test void testWindowMissingConstantInDigest() {
final String sql = "select sum(100) over (partition by deptno order by sal) as s\n"
+ "from emp\n"
+ "union all\n"
+ "select sum(1000) over(partition by deptno order by sal) as s\n"
+ "from emp\n";
sql(sql)
.withRule(CoreRules.PROJECT_TO_LOGICAL_PROJECT_AND_WINDOW)
.check();
}

/** While it's probably valid relational algebra for a Project to contain
* a RexOver inside a RexOver, ProjectMergeRule should not bring it about. */
@Test void testProjectMergeShouldIgnoreOver() {
Original file line number Diff line number Diff line change
@@ -4441,7 +4441,7 @@ ROWS BETWEEN 5 + 5 PRECEDING AND 1 PRECEDING) AS w_count from emp
<Resource name="planBefore">
<![CDATA[
LogicalProject($0=[$1])
LogicalWindow(window#0=[window(order by [0] rows between $1 PRECEDING and $2 PRECEDING aggs [COUNT()])])
LogicalWindow(window#0=[window(order by [0] rows between $1 PRECEDING and $2 PRECEDING aggs [COUNT()])], constants=[[10, 1]])
LogicalProject(EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
@@ -5328,7 +5328,7 @@ LogicalProject(EMPNO=[$0], DEPTNO=[$1], W_COUNT=[$2])
<![CDATA[
LogicalProject(EMPNO=[$0], DEPTNO=[$1], W_COUNT=[$2])
LogicalFilter(condition=[IS NULL($2)])
LogicalWindow(window#0=[window(rows between $2 PRECEDING and $3 PRECEDING aggs [COUNT($0)])])
LogicalWindow(window#0=[window(rows between $2 PRECEDING and $3 PRECEDING aggs [COUNT($0)])], constants=[[10, 1]])
LogicalProject(EMPNO=[$0], DEPTNO=[$7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
@@ -8409,7 +8409,7 @@ LogicalProject(EXPR$0=[CAST(/(CASE(>(COUNT($5) OVER (ORDER BY $0 ROWS 3 PRECEDIN
<Resource name="planAfter">
<![CDATA[
LogicalProject(EXPR$0=[CAST(/(CASE(>($2, 0), $3, null:INTEGER), $2)):INTEGER])
LogicalWindow(window#0=[window(order by [0] rows between $2 PRECEDING and CURRENT ROW aggs [COUNT($1), $SUM0($1)])])
LogicalWindow(window#0=[window(order by [0] rows between $2 PRECEDING and CURRENT ROW aggs [COUNT($1), $SUM0($1)])], constants=[[3]])
LogicalProject(EMPNO=[$0], SAL=[$5])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
@@ -8509,7 +8509,7 @@ from (
<Resource name="planAfter">
<![CDATA[
LogicalProject($0=[$2], $1=[$3])
LogicalWindow(window#0=[window(partition {1} order by [0] aggs [SUM($2), SUM($3)])])
LogicalWindow(window#0=[window(partition {1} order by [0] aggs [SUM($2), SUM($3)])], constants=[[100, 1000]])
LogicalProject(SAL=[$5], DEPTNO=[$7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
@@ -10603,15 +10603,15 @@ where DEPTNO = 1]]>
LogicalProject(NAME=[$0], DEPTNO=[$1], CNT=[$2], ALL_SUM=[$3])
LogicalFilter(condition=[=($1, 1)])
LogicalProject(NAME=[$1], DEPTNO=[$0], CNT=[$2], ALL_SUM=[$3])
LogicalWindow(window#0=[window(partition {0, 1} aggs [COUNT()])], window#1=[window(partition {0} aggs [SUM($2)])])
LogicalWindow(window#0=[window(partition {0, 1} aggs [COUNT()])], window#1=[window(partition {0} aggs [SUM($2)])], constants=[[1]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(NAME=[$1], DEPTNO=[$0], CNT=[$2], ALL_SUM=[$3])
LogicalFilter(condition=[=($0, 1)])
LogicalWindow(window#0=[window(partition {0, 1} aggs [COUNT()])], window#1=[window(partition {0} aggs [SUM($2)])])
LogicalWindow(window#0=[window(partition {0, 1} aggs [COUNT()])], window#1=[window(partition {0} aggs [SUM($2)])], constants=[[1]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
@@ -13055,7 +13055,7 @@ from (
<Resource name="planAfter">
<![CDATA[
LogicalProject($0=[$2], $1=[$3], $2=[$4])
LogicalWindow(window#0=[window(partition {1} range between UNBOUNDED PRECEDING and CURRENT ROW aggs [SUM($2)])], window#1=[window(order by [1] aggs [SUM($2)])], window#2=[window(partition {1} range between UNBOUNDED PRECEDING and CURRENT ROW aggs [SUM(5000)])])
LogicalWindow(window#0=[window(partition {1} range between UNBOUNDED PRECEDING and CURRENT ROW aggs [SUM($2)])], window#1=[window(order by [1] aggs [SUM($2)])], window#2=[window(partition {1} range between UNBOUNDED PRECEDING and CURRENT ROW aggs [SUM(5000)])], constants=[[100]])
LogicalProject(SAL=[$5], DEPTNO=[$7])
LogicalFilter(condition=[=($5, 5000)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
@@ -13088,7 +13088,7 @@ LogicalProject(COL1=[SUM(100) OVER (ORDER BY $7, $0 RANGE BETWEEN CURRENT ROW AN
<Resource name="planAfter">
<![CDATA[
LogicalProject($0=[$3], $1=[$4])
LogicalWindow(window#0=[window(order by [2, 0] range between CURRENT ROW and UNBOUNDED FOLLOWING aggs [SUM($3)])], window#1=[window(partition {2} order by [2, 0] range between UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING aggs [SUM($3)])])
LogicalWindow(window#0=[window(order by [2, 0] range between CURRENT ROW and UNBOUNDED FOLLOWING aggs [SUM($3)])], window#1=[window(partition {2} order by [2, 0] range between UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING aggs [SUM($3)])], constants=[[100]])
LogicalProject(EMPNO=[$0], SAL=[$5], DEPTNO=[$7])
LogicalFilter(condition=[=($5, 5000)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
@@ -17232,6 +17232,38 @@ LogicalProject(EXPR$0=[$9], EXPR$1=[$9])
<![CDATA[
LogicalProject(EXPR$0=[COUNT() OVER (PARTITION BY $0 ORDER BY $0)], EXPR$1=[COUNT() OVER (PARTITION BY $0 ORDER BY $0)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testWindowMissingConstantInDigest">
<Resource name="sql">
<![CDATA[select sum(100) over (partition by deptno order by sal) as s
from emp
union all
select sum(1000) over(partition by deptno order by sal) as s
from emp
]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalUnion(all=[true])
LogicalProject(S=[SUM(100) OVER (PARTITION BY $7 ORDER BY $5)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(S=[SUM(1000) OVER (PARTITION BY $7 ORDER BY $5)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalUnion(all=[true])
LogicalProject($0=[$2])
LogicalWindow(window#0=[window(partition {1} order by [0] aggs [SUM($2)])], constants=[[100]])
LogicalProject(SAL=[$5], DEPTNO=[$7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject($0=[$2])
LogicalWindow(window#0=[window(partition {1} order by [0] aggs [SUM($2)])], constants=[[1000]])
LogicalProject(SAL=[$5], DEPTNO=[$7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
2 changes: 1 addition & 1 deletion core/src/test/resources/sql/winagg.iq
Original file line number Diff line number Diff line change
@@ -760,7 +760,7 @@ select emp."ENAME", emp."DEPTNO",
nth_value(emp."DEPTNO", 10) over() as "tenth_value"
from emp order by emp."ENAME";
EnumerableSort(sort0=[$0], dir0=[ASC])
EnumerableWindow(window#0=[window(aggs [NTH_VALUE($1, $2), NTH_VALUE($1, $3), NTH_VALUE($1, $4), NTH_VALUE($1, $5), NTH_VALUE($1, $6)])])
EnumerableWindow(window#0=[window(aggs [NTH_VALUE($1, $2), NTH_VALUE($1, $3), NTH_VALUE($1, $4), NTH_VALUE($1, $5), NTH_VALUE($1, $6)])], constants=[[1, 2, 5, 8, 10]])
EnumerableValues(tuples=[[{ 'Jane ', 10 }, { 'Bob ', 10 }, { 'Eric ', 20 }, { 'Susan', 30 }, { 'Alice', 30 }, { 'Adam ', 50 }, { 'Eve ', 50 }, { 'Grace', 60 }, { 'Wilma', null }]])
!plan
+-------+--------+-------------+--------------+-------------+--------------+-------------+

0 comments on commit 5341730

Please sign in to comment.