Skip to content

Commit

Permalink
[test](mtmv) Add inject statistic when mv rewrite regression test to …
Browse files Browse the repository at this point in the history
…make sure rewrite result stable (apache#43785)

The result of successful rewriting by the cbo optimizer depends on the
statistics.
The priority of the optimizer consumption statistics in descending order
is
1. the injected statistics
2. the statistics reported by be
3. and the statistics analyzed actively.

When the pipeline runs the case, the statistics reported by be may not
be timely. Therefore, the outcome that leads to the cbo optimizer's
successful selection of overwrites is not very certain, so the
statistics are currently injected manually in the test cases
  • Loading branch information
seawinde committed Dec 6, 2024
1 parent ec9a648 commit f59403b
Show file tree
Hide file tree
Showing 169 changed files with 497 additions and 228 deletions.
7 changes: 5 additions & 2 deletions regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
19930101 1 1 1 1 1 1 1 1 1 1 100 1 1 1 2023-06-09 shipmode name address city nation AMERICA phone mktsegment name address city nation AMERICA phone name MFGR#1 category brand color type 4 container
19930101 1 1 1 1 1 1 1 1 1 1 100 1 1 1 2023-06-09 shipmode name address city nation AMERICA phone mktsegment name address city nation AMERICA phone name MFGR#1 category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container

-- !select_mv --
4
16

-- !select --
4
16

Original file line number Diff line number Diff line change
Expand Up @@ -1617,9 +1617,8 @@ class Suite implements GroovyInterceptable {
check { result ->
boolean success = true;
for (String mv_name : mv_names) {
success = success && result.contains("${mv_name} chose")
Assert.assertEquals(true, result.contains("${mv_name} chose"))
}
Assert.assertEquals(true, success)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ suite ("agg_have_dup_base") {
qt_select_mv "select unix_timestamp(k1) tmp,sum(k2) from d_table group by tmp order by tmp;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='5');"""

mv_rewrite_success("select k1,sum(k2),max(k2) from d_table group by k1;", "k12s3m")

mv_rewrite_success("select k1,sum(k2) from d_table group by k1;", "k12s3m")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ suite ("test_agg_state_max_by") {
qt_select_star "select * from d_table order by 1,2;"
mv_rewrite_success("select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;", "k1mb")
sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;", "k1mb")
qt_select_mv "select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;"

Expand Down Expand Up @@ -101,6 +102,7 @@ suite ("test_agg_state_max_by") {
qt_select_star "select * from d_table order by 1,2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
sql "analyze table d_table with sync;"
sql """set enable_stats=false;"""

Expand All @@ -114,6 +116,7 @@ suite ("test_agg_state_max_by") {
qt_select_mv "select k1,max_by(k2,abs(k3)) from d_table group by k1 order by 1,2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,max_by(k2+k3,abs(k3)) from d_table group by k1 order by 1,2;", "k1mbcp1")
mv_rewrite_success("select k1,max_by(k2+k3,k3) from d_table group by k1 order by 1,2;", "k1mbcp2")
mv_rewrite_success("select k1,max_by(k2,abs(k3)) from d_table group by k1 order by 1,2;", "k1mbcp3")
Expand Down
3 changes: 3 additions & 0 deletions regression-test/suites/mv_p0/case_ignore/case_ignore.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ suite ("case_ignore") {
sql "insert into d_table select 2,2,2,'b';"
sql "insert into d_table select 3,-3,null,'c';"

sql """alter table d_table modify column k4 set stats ('row_count'='4');"""

createMV ("create materialized view k12a as select K1,abs(K2) from d_table;")

sql "insert into d_table select -4,-4,-4,'d';"
Expand All @@ -53,6 +55,7 @@ suite ("case_ignore") {
qt_select_mv "select K1,abs(K2) from d_table order by K1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,abs(k2) from d_table order by k1;", "k12a")
mv_rewrite_success("select K1,abs(K2) from d_table order by K1;", "k12a")

Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/count_star/count_star.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ suite ("count_star") {
qt_select_mv "select count(*) from d_table where k3=1;"

sql """set enable_stats=true;"""

sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,k4,count(*) from d_table group by k1,k4;", "kstar")
mv_rewrite_success("select k1,k4,count(*) from d_table where k1=1 group by k1,k4;", "kstar")
mv_rewrite_fail("select k1,k4,count(*) from d_table where k3=1 group by k1,k4;", "kstar")
Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/dis_26495/dis_26495.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ suite ("dis_26495") {

sql """insert into doris_test values (1,2,max_by_state(1,2));"""

sql """alter table doris_test modify column agg_st_1 set stats ('row_count'='1');"""

streamLoad {
table "doris_test"
set 'column_separator', ','
Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/k1ap2spa/k1ap2spa.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ suite ("k1ap2spa") {

sql """set enable_stats=true;"""

sql """alter table d_table modify column k1 set stats ('row_count'='5');"""

mv_rewrite_success("select abs(k1)+1 t,sum(abs(k2+1)) from d_table group by t order by t;", "k1ap2spa")

}
1 change: 1 addition & 0 deletions regression-test/suites/mv_p0/k1s2m3/k1s2m3.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ suite ("k1s2m3") {
sql "insert into d_table select 2,2,2,'b';"
sql "insert into d_table select 3,-3,null,'c';"

sql """alter table d_table modify column k1 set stats ('row_count'='6');"""
createMV("create materialized view k1s2m3 as select k1,sum(k2*k3) from d_table group by k1;")

sql "insert into d_table select -4,-4,-4,'d';"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ suite ("k1s2m3_auto_inc") {
qt_select_mv "select k3,sum(abs(k2+1)) from d_table group by k3 order by 1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='2');"""
mv_rewrite_success("select k3,sum(abs(k2+1)) from d_table group by k3 order by 1;", "k3ap2spa")
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ suite ("multi_agg_with_same_slot") {

sql """set enable_stats=true;"""

sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1,k2,avg(k3),max(k3) from d_table group by k1,k2 order by 1,2;", "kmv")
mv_rewrite_success("select k1,k2,avg(k3)+max(k3) from d_table group by k1,k2 order by 1,2;", "kmv")
mv_rewrite_success("select k1,k2,avg(k3)+max(k3) from d_table group by grouping sets((k1),(k1,k2),()) order by 1,2;", "kmv")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,6 @@ suite ("multi_slot_k123p") {
qt_select_mv "select k1,version() from d_table order by k1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1,k2+k3 from d_table order by k1;", "k123p")
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ suite ("multi_slot_k1a2p2ap3p") {
qt_select_mv "select abs(k1)+k2+1,abs(k2+2)+k3+3 from d_table order by abs(k1)+k2+1,abs(k2+2)+k3+3;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='7');"""
mv_rewrite_success("select abs(k1)+k2+1,abs(k2+2)+k3+3 from d_table order by abs(k1)+k2+1,abs(k2+2)+k3+3", "k1a2p2ap3p")

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ suite ("multi_slot_k1a2p2ap3ps") {
qt_select_base "select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2 order by abs(k1)+k2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='4');"""
mv_rewrite_success("select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2+1 order by abs(k1)+k2+1", "k1a2p2ap3ps")

mv_rewrite_fail("select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2 order by abs(k1)+k2", "k1a2p2ap3ps")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ suite ("multi_slot_k1p2ap3p") {
qt_select_mv "select k1+1,abs(k2+2)+k3+3 from d_table order by k1+1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='4');"""
mv_rewrite_success("select k1+1,abs(k2+2)+k3+3 from d_table order by k1+1;", "k1p2ap3p")
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,6 @@ suite ("multi_slot_k1p2ap3ps") {
qt_select_mv "select k1+1,sum(abs(k2+2)+k3+3) from d_table group by k1+1 order by k1+1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1+1,sum(abs(k2+2)+k3+3) from d_table group by k1+1 order by k1+1;", "k1p2ap3ps")
}
26 changes: 10 additions & 16 deletions regression-test/suites/mv_p0/mv_with_view/mv_with_view.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,25 @@ suite ("mv_with_view") {
sql """insert into d_table select 1,1,1,'a';"""
sql """insert into d_table select 2,2,2,'b';"""

createMV("create materialized view k132 as select k1,k3,k2 from d_table;")
createMV("create materialized view k312 as select k3,k1,k2 from d_table;")

sql """insert into d_table select 3,-3,null,'c';"""

explain {
sql("select * from d_table order by k1;")
contains "(d_table)"
}
sql "analyze table d_table with sync;"
sql """set enable_stats=false;"""

mv_rewrite_fail("select * from d_table order by k1;", "k312")
qt_select_star "select * from d_table order by k1;"

sql """
drop view if exists v_k132;
drop view if exists v_k312;
"""

sql """
create view v_k132 as select k1,k3,k2 from d_table where k1 = 1;
create view v_k312 as select k1,k3,k2 from d_table where k3 = 1;
"""
explain {
sql("select * from v_k132 order by k1;")
contains "(k132)"
}
qt_select_mv "select * from v_k132 order by k1;"
mv_rewrite_success("select * from v_k312 order by k1;", "k312")
qt_select_mv "select * from v_k312 order by k1;"

sql """
drop view if exists v_k124;
Expand All @@ -66,9 +63,6 @@ suite ("mv_with_view") {
sql """
create view v_k124 as select k1,k2,k4 from d_table where k1 = 1;
"""
explain {
sql("select * from v_k124 order by k1;")
contains "(d_table)"
}
mv_rewrite_fail("select * from v_k124 order by k1;", "k312")
qt_select_mv "select * from v_k124 order by k1;"
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ suite ("null_insert") {
GROUP BY date,vid,os,ver,ip_country;"""

sql """set enable_stats=true;"""
sql """alter table test modify column date set stats ('row_count'='3');"""
mv_rewrite_success("""SELECT date, vid, os, ver, ip_country, hll_union(hll_hash(uid))
FROM test
GROUP BY date,vid,os,ver,ip_country;""", "mv_test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ suite ("routine_load_hll") {
qt_select_mv "select time_stamp, hll_union_agg(device_id) from test group by time_stamp order by 1;"

sql """set enable_stats=true;"""
sql """alter table test modify column event_id set stats ('row_count'='2');"""
mv_rewrite_success("select time_stamp, hll_union_agg(device_id) from test group by time_stamp order by 1;", "m_view")
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,6 @@ suite ("multiple_ssb_between") {
) ENGINE=OLAP
DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`)
COMMENT "OLAP"
PARTITION BY RANGE(`LO_ORDERDATE`)
(PARTITION p1992 VALUES [("-2147483648"), ("19930101")),
PARTITION p1993 VALUES [("19930101"), ("19940101")),
PARTITION p1994 VALUES [("19940101"), ("19950101")),
PARTITION p1995 VALUES [("19950101"), ("19960101")),
PARTITION p1996 VALUES [("19960101"), ("19970101")),
PARTITION p1997 VALUES [("19970101"), ("19980101")),
PARTITION p1998 VALUES [("19980101"), ("19990101")))
DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48
PROPERTIES (
"replication_num" = "1",
Expand Down Expand Up @@ -153,17 +145,22 @@ suite ("multiple_ssb_between") {
sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE, C_NAME, C_ADDRESS, C_CITY, C_NATION, C_REGION, C_PHONE, C_MKTSEGMENT, S_NAME, S_ADDRESS, S_CITY, S_NATION, S_REGION, S_PHONE, P_NAME, P_MFGR, P_CATEGORY, P_BRAND, P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (1 , 1 , 1 , 1 , 1 , 1 , '1' , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , '2023-06-09' , 'shipmode' , 'name' , 'address' , 'city' , 'nation' , 'AMERICA' , 'phone' , 'mktsegment' , 'name' , 'address' , 'city' , 'nation' , 'AMERICA' ,'phone', 'name', 'MFGR#1', 'category', 'brand', 'color', 'type', 4 ,'container');"""

qt_select_star "select * from lineorder_flat order by 1,2, P_MFGR;"

sql "analyze table lineorder_flat with sync;"
sql """set enable_stats=true;"""

sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');"""

explain {
sql("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE
LO_ORDERDATE >= 19930101
AND LO_ORDERDATE <= 19931231
AND LO_DISCOUNT BETWEEN 1 AND 3
AND LO_QUANTITY < 25;""")
contains "(lineorder_q_1_1)"
}
AND LO_QUANTITY < 25;""",
"lineorder_q_1_1"
)

qt_select_q_1_1 """SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE
Expand All @@ -172,16 +169,16 @@ suite ("multiple_ssb_between") {
AND LO_DISCOUNT BETWEEN 1 AND 3
AND LO_QUANTITY < 25;"""

explain {
sql("""SELECT
mv_rewrite_success("""SELECT
SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
P_BRAND
FROM lineorder_flat
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
GROUP BY (LO_ORDERDATE DIV 10000), P_BRAND
ORDER BY YEAR, P_BRAND;""")
contains "(lineorder_q_2_1)"
}
ORDER BY YEAR, P_BRAND;""",
"lineorder_q_2_1"
)

qt_select_q_2_1 """SELECT
SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR,
P_BRAND
Expand All @@ -190,8 +187,7 @@ suite ("multiple_ssb_between") {
GROUP BY YEAR, P_BRAND
ORDER BY YEAR, P_BRAND;"""

explain {
sql("""SELECT
mv_rewrite_success("""SELECT
C_NATION,
S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR,
SUM(LO_REVENUE) AS revenue
Expand All @@ -202,9 +198,9 @@ suite ("multiple_ssb_between") {
AND LO_ORDERDATE >= 19920101
AND LO_ORDERDATE <= 19971231
GROUP BY C_NATION, S_NATION, YEAR
ORDER BY YEAR ASC, revenue DESC;""")
contains "(lineorder_q_3_1)"
}
ORDER BY YEAR ASC, revenue DESC;""",
"lineorder_q_3_1")

qt_select_q_3_1 """SELECT
C_NATION,
S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR,
Expand All @@ -218,8 +214,7 @@ suite ("multiple_ssb_between") {
GROUP BY C_NATION, S_NATION, YEAR
ORDER BY YEAR ASC, revenue DESC;"""

explain {
sql("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
mv_rewrite_success("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
C_NATION,
SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
Expand All @@ -228,9 +223,9 @@ suite ("multiple_ssb_between") {
AND S_REGION = 'AMERICA'
AND P_MFGR IN ('MFGR#1', 'MFGR#2')
GROUP BY YEAR, C_NATION
ORDER BY YEAR ASC, C_NATION ASC;""")
contains "(lineorder_q_4_1)"
}
ORDER BY YEAR ASC, C_NATION ASC;""",
"lineorder_q_4_1")

qt_select_q_4_1 """SELECT (LO_ORDERDATE DIV 10000) AS YEAR,
C_NATION,
SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit
Expand Down
Loading

0 comments on commit f59403b

Please sign in to comment.