From 5b3dfde6bf5aaffe71a334a321eb52bc5320d311 Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 8 Nov 2024 15:20:19 +0800 Subject: [PATCH] add regression case: disable join reorder if there are invalid stats --- .../data/nereids_p0/stats/invalid_stats.out | 46 ++++++ .../nereids_p0/stats/invalid_stats.groovy | 132 +++++++----------- 2 files changed, 95 insertions(+), 83 deletions(-) create mode 100644 regression-test/data/nereids_p0/stats/invalid_stats.out diff --git a/regression-test/data/nereids_p0/stats/invalid_stats.out b/regression-test/data/nereids_p0/stats/invalid_stats.out new file mode 100644 index 000000000000000..1c9cd8b2f94592b --- /dev/null +++ b/regression-test/data/nereids_p0/stats/invalid_stats.out @@ -0,0 +1,46 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !reorder_1 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !ndv_min_max_invalid -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[region] +------PhysicalProject +--------PhysicalOlapScan[nation] + +-- !reorder_2 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !order_3 -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[nation] +------PhysicalProject +--------PhysicalOlapScan[region] + +-- !ndv_row_invalid -- +PhysicalResultSink +--PhysicalProject +----hashJoin[INNER_JOIN broadcast] hashCondition=((region.r_regionkey = nation.n_regionkey)) otherCondition=() +------PhysicalProject +--------PhysicalOlapScan[region] +------PhysicalProject +--------PhysicalOlapScan[nation] + diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy index 5304cd8c2c1fa97..042b591e0d088dc 100644 --- a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy +++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy @@ -16,96 +16,62 @@ // under the License. suite("invalid_stats") { - // multi_sql """ - // set global enable_auto_analyze=false; - // SET enable_nereids_planner=true; - // SET enable_fallback_to_original_planner=false; - // set disable_nereids_rules=PRUNE_EMPTY_PARTITION; + multi_sql """ + set global enable_auto_analyze=false; + SET enable_nereids_planner=true; + SET enable_fallback_to_original_planner=false; + set disable_nereids_rules=PRUNE_EMPTY_PARTITION; - // drop table if exists region; - // CREATE TABLE region ( - // r_regionkey int NOT NULL, - // r_name VARCHAR(25) NOT NULL, - // r_comment VARCHAR(152) - // )ENGINE=OLAP - // DUPLICATE KEY(`r_regionkey`) - // COMMENT "OLAP" - // DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 - // PROPERTIES ( - // "replication_num" = "1" - // ); + drop table if exists region; + CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) + )ENGINE=OLAP + DUPLICATE KEY(`r_regionkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); - // drop table if exists nation; - // CREATE TABLE `nation` ( - // `n_nationkey` int(11) NOT NULL, - // `n_name` varchar(25) NOT NULL, - // `n_regionkey` int(11) NOT NULL, - // `n_comment` varchar(152) NULL - // ) ENGINE=OLAP - // DUPLICATE KEY(`N_NATIONKEY`) - // COMMENT "OLAP" - // DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 - // PROPERTIES ( - // "replication_num" = "1" - // ); - // alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25'); + drop table if exists nation; + CREATE TABLE `nation` ( + `n_nationkey` int(11) NOT NULL, + `n_name` varchar(25) NOT NULL, + `n_regionkey` int(11) NOT NULL, + `n_comment` varchar(152) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`N_NATIONKEY`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25'); - // alter table nation modify column n_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25'); + alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5'); - // """ + """ - // explain { - // sql "select * from region" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select * from region where r_regionkey=1" - // contains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region group by r_regionkey" - // contains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" - // contains("join reorder with unknown column statistics") - // } - - // sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + qt_reorder_1 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + sql "alter table region modify column r_regionkey set stats ('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');" - // explain { - // sql "select * from region where r_regionkey=1" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region group by r_regionkey" - // notContains("join reorder with unknown column statistics") - // } - - // explain { - // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" - // notContains("join reorder with unknown column statistics") - // } + // r_regionkey stats invalid: ndv=0, but min or max is not null + qt_ndv_min_max_invalid "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + // inject normal stats and check join order is nation-region + sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + + qt_reorder_2 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" - // explain { - // sql "select r_name from region join nation on r_regionkey=n_regionkey" - // notContains("join reorder with unknown column statistics") - // } + // r_regionkey stats invalid: ndv > 10*row + sql "alter table region modify column r_regionkey set stats ('ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');" + qt_order_3 "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" + + sql "alter table region modify column r_regionkey set stats ('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');" + qt_ndv_row_invalid "explain shape plan select r_regionkey from region join nation on r_regionkey=n_regionkey" - // explain { - // sql """ - // select r_name - // from (select r_name, r_regionkey + 1 x from region) T join nation on T.x=n_regionkey - // """ - // notContains("join reorder with unknown column statistics") - // } } -// disable jo: alter table region modify column r_regionkey set stats ('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0'); -// disable jo: alter table region modify column r_regionkey set stats ('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1'); - -// alter table region modify column r_regionkey set stats ('ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');