diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java index 5f55f86cc91257..675575ad298b38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java @@ -325,7 +325,10 @@ private LogicalPlan subqueryToApply(List subqueryExprs, LogicalPla LogicalPlan tmpPlan = childPlan; for (int i = 0; i < subqueryExprs.size(); ++i) { SubqueryExpr subqueryExpr = subqueryExprs.get(i); - if (nonMarkJoinExistsWithAgg(subqueryExpr, subqueryToMarkJoinSlot)) { + if (subqueryExpr instanceof Exists && hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) { + // because top level scalar agg always returns a value or null(for empty input) + // so Exists and Not Exists conjunct are always evaluated to True and false literals respectively + // we don't create apply node for it continue; } @@ -338,18 +341,11 @@ private LogicalPlan subqueryToApply(List subqueryExprs, LogicalPla return tmpPlan; } - private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists, - Map> subqueryToMarkJoinSlot) { - return exists instanceof Exists - && !subqueryToMarkJoinSlot.get(exists).isPresent() - && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()); - } - - private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) { + private static boolean hasTopLevelScalarAgg(Plan plan) { if (plan instanceof LogicalAggregate) { return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty(); } else if (plan instanceof LogicalProject || plan instanceof LogicalSort) { - return hasTopLevelAggWithoutGroupBy(plan.child(0)); + return hasTopLevelScalarAgg(plan.child(0)); } return false; } @@ -427,19 +423,30 @@ public Expression visitExistsSubquery(Exists exists, SubqueryContext context) { // The result set when NULL is specified in the subquery and still evaluates to TRUE by using EXISTS // When the number of rows returned is empty, agg will return null, so if there is more agg, // it will always consider the returned result to be true - boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot; - MarkJoinSlotReference markJoinSlotReference = null; - if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && needCreateMarkJoinSlot) { - markJoinSlotReference = - new MarkJoinSlotReference(statementContext.generateColumnName(), true); - } else if (needCreateMarkJoinSlot) { - markJoinSlotReference = - new MarkJoinSlotReference(statementContext.generateColumnName()); - } - if (needCreateMarkJoinSlot) { - context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference)); + if (hasTopLevelScalarAgg(exists.getQueryPlan())) { + /* + top level scalar agg and always return a value or null for empty input + so Exists and Not Exists conjunct are always evaluated to True and False literals respectively + SELECT * + FROM t1 + WHERE EXISTS ( + SELECT SUM(a) + FROM t2 + WHERE t1.a = t2.b and t1.a = 1; + ); + */ + return exists.isNot() ? BooleanLiteral.FALSE : BooleanLiteral.TRUE; + } else { + boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot; + if (needCreateMarkJoinSlot) { + MarkJoinSlotReference markJoinSlotReference = + new MarkJoinSlotReference(statementContext.generateColumnName()); + context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference)); + return markJoinSlotReference; + } else { + return BooleanLiteral.TRUE; + } } - return needCreateMarkJoinSlot ? markJoinSlotReference : BooleanLiteral.TRUE; } @Override diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out index 0564d1884d9c05..e7758d02a5f660 100644 --- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out +++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out @@ -489,3 +489,84 @@ true -- !doris_7643 -- 3 3 +-- !select_exists1 -- +\N \N 0 +\N \N 100 + +-- !select_exists2 -- + +-- !select_exists3 -- +\N \N 0 +\N \N 100 + +-- !select_exists4 -- +\N \N 100 + +-- !select_exists5 -- +true +true + +-- !select_exists6 -- +false +false + +-- !select_exists7 -- +true +true + +-- !select_exists8 -- +false +true + +-- !select_exists9 -- +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 + +-- !select_exists10 -- + +-- !select_exists11 -- +\N \N 0 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 +\N \N 100 + +-- !select_exists12 -- +\N \N 0 + diff --git a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy index ac87bbc813fe3e..09ba6c31becc8c 100644 --- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy +++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy @@ -679,4 +679,87 @@ suite ("sub_query_correlated") { // order_qt_doris_6937_2 """ // select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2; // """ + sql """drop table if exists table_21_undef_partitions2_keys3;""" + sql """drop table if exists table_1_undef_partitions2_keys3;""" + sql """create table table_21_undef_partitions2_keys3 ( + `col_int_undef_signed` int , + `col_varchar_10__undef_signed` varchar(10) , + `pk` int + ) engine=olap + distributed by hash(pk) buckets 10 + properties('replication_num' = '1');""" + sql """create table table_1_undef_partitions2_keys3 ( + `col_int_undef_signed` int , + `col_varchar_10__undef_signed` varchar(10) , + `pk` int + ) engine=olap + distributed by hash(pk) buckets 10 + properties('replication_num' = '1');""" + sql """insert into table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);""" + sql """insert into table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(100,null,null);""" + + qt_select_exists1 """SELECT * + FROM table_1_undef_partitions2_keys3 AS t1 + WHERE EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) order by t1.pk;""" + qt_select_exists2 """SELECT * + FROM table_1_undef_partitions2_keys3 AS t1 + WHERE not EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) order by t1.pk;""" + qt_select_exists3 """SELECT * + FROM table_1_undef_partitions2_keys3 AS t1 + WHERE EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = 9 + ) or t1.pk = 100 order by t1.pk;""" + qt_select_exists4 """SELECT * + FROM table_1_undef_partitions2_keys3 AS t1 + WHERE not EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = 9 + ) or t1.pk = 100 order by t1.pk;""" + qt_select_exists5 """select EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) from table_1_undef_partitions2_keys3 AS t1;""" + qt_select_exists6 """select not EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;""" + qt_select_exists7 """select EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) or t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;""" + qt_select_exists8 """select EXISTS ( + SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 + ) and t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;""" + qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2 + on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;""" + qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2 + on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;""" + qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2 + on t1.pk = t2.pk and exists ( SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;""" + qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2 + on t1.pk = t2.pk and exists ( SELECT SUM(`pk`) + FROM table_21_undef_partitions2_keys3 AS t2 + WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;""" }