Skip to content

Commit

Permalink
[fix](nereids)exists subquery should handle top level scarlar agg cor…
Browse files Browse the repository at this point in the history
…rectly (apache#29135)
  • Loading branch information
starocean999 authored Dec 29, 2023
1 parent 61677d1 commit d2dc12b
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,10 @@ private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPla
LogicalPlan tmpPlan = childPlan;
for (int i = 0; i < subqueryExprs.size(); ++i) {
SubqueryExpr subqueryExpr = subqueryExprs.get(i);
if (nonMarkJoinExistsWithAgg(subqueryExpr, subqueryToMarkJoinSlot)) {
if (subqueryExpr instanceof Exists && hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) {
// because top level scalar agg always returns a value or null(for empty input)
// so Exists and Not Exists conjunct are always evaluated to True and false literals respectively
// we don't create apply node for it
continue;
}

Expand All @@ -338,18 +341,11 @@ private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPla
return tmpPlan;
}

private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) {
return exists instanceof Exists
&& !subqueryToMarkJoinSlot.get(exists).isPresent()
&& hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
}

private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
private static boolean hasTopLevelScalarAgg(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof LogicalSort) {
return hasTopLevelAggWithoutGroupBy(plan.child(0));
return hasTopLevelScalarAgg(plan.child(0));
}
return false;
}
Expand Down Expand Up @@ -427,19 +423,30 @@ public Expression visitExistsSubquery(Exists exists, SubqueryContext context) {
// The result set when NULL is specified in the subquery and still evaluates to TRUE by using EXISTS
// When the number of rows returned is empty, agg will return null, so if there is more agg,
// it will always consider the returned result to be true
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
MarkJoinSlotReference markJoinSlotReference = null;
if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName(), true);
} else if (needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
}
if (needCreateMarkJoinSlot) {
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
if (hasTopLevelScalarAgg(exists.getQueryPlan())) {
/*
top level scalar agg and always return a value or null for empty input
so Exists and Not Exists conjunct are always evaluated to True and False literals respectively
SELECT *
FROM t1
WHERE EXISTS (
SELECT SUM(a)
FROM t2
WHERE t1.a = t2.b and t1.a = 1;
);
*/
return exists.isNot() ? BooleanLiteral.FALSE : BooleanLiteral.TRUE;
} else {
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
if (needCreateMarkJoinSlot) {
MarkJoinSlotReference markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
return markJoinSlotReference;
} else {
return BooleanLiteral.TRUE;
}
}
return needCreateMarkJoinSlot ? markJoinSlotReference : BooleanLiteral.TRUE;
}

@Override
Expand Down
81 changes: 81 additions & 0 deletions regression-test/data/nereids_syntax_p0/sub_query_correlated.out
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,84 @@ true
-- !doris_7643 --
3 3

-- !select_exists1 --
\N \N 0
\N \N 100

-- !select_exists2 --

-- !select_exists3 --
\N \N 0
\N \N 100

-- !select_exists4 --
\N \N 100

-- !select_exists5 --
true
true

-- !select_exists6 --
false
false

-- !select_exists7 --
true
true

-- !select_exists8 --
false
true

-- !select_exists9 --
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100

-- !select_exists10 --

-- !select_exists11 --
\N \N 0
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100

-- !select_exists12 --
\N \N 0

Original file line number Diff line number Diff line change
Expand Up @@ -679,4 +679,87 @@ suite ("sub_query_correlated") {
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2;
// """
sql """drop table if exists table_21_undef_partitions2_keys3;"""
sql """drop table if exists table_1_undef_partitions2_keys3;"""
sql """create table table_21_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """create table table_1_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """insert into table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);"""
sql """insert into table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(100,null,null);"""

qt_select_exists1 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists2 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists3 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists4 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists5 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1;"""
qt_select_exists6 """select not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists7 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) or t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists8 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) and t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
}

0 comments on commit d2dc12b

Please sign in to comment.