Skip to content

Commit

Permalink
fix eliminate_outer_join
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Sep 12, 2023
1 parent d3f1388 commit 7dac52e
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@ PhysicalResultSink
----PhysicalProject
------hashJoin[INNER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey)
--------hashJoin[INNER_JOIN](nation.n_nationkey = supplier.s_suppkey)
----------PhysicalDistribute
------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
--------------PhysicalOlapScan[region]
--------------PhysicalDistribute
----------------filter(( not n_nationkey IS NULL)(nation.n_nationkey > 1))
------------------PhysicalOlapScan[nation]
----------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
------------PhysicalOlapScan[region]
------------PhysicalDistribute
--------------filter(( not n_nationkey IS NULL)(nation.n_nationkey > 1))
----------------PhysicalOlapScan[nation]
----------PhysicalDistribute
------------filter((supplier.s_suppkey > 1)( not s_suppkey IS NULL))
--------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL))
----------filter(( not ps_suppkey IS NULL)(partsupp.ps_suppkey > 1))
------------PhysicalOlapScan[partsupp]

-- !2 --
Expand All @@ -28,13 +27,14 @@ PhysicalResultSink
------------hashJoin[FULL_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
--------------PhysicalDistribute
----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
------------------PhysicalOlapScan[region]
------------------PhysicalDistribute
--------------------PhysicalOlapScan[region]
------------------PhysicalDistribute
--------------------PhysicalOlapScan[nation]
--------------PhysicalDistribute
----------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL))
----------filter(( not ps_suppkey IS NULL)(partsupp.ps_suppkey > 1))
------------PhysicalOlapScan[partsupp]

-- !3 --
Expand All @@ -45,15 +45,15 @@ PhysicalResultSink
--------PhysicalDistribute
----------filter((supplier.s_suppkey > 1))
------------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
--------------PhysicalDistribute
----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
--------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
----------------PhysicalDistribute
------------------PhysicalOlapScan[region]
------------------PhysicalDistribute
--------------------PhysicalOlapScan[nation]
----------------PhysicalDistribute
------------------PhysicalOlapScan[nation]
--------------PhysicalDistribute
----------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL))
----------filter(( not ps_suppkey IS NULL)(partsupp.ps_suppkey > 1))
------------PhysicalOlapScan[partsupp]

-- !4 --
Expand All @@ -71,12 +71,11 @@ PhysicalResultSink
--PhysicalDistribute
----PhysicalProject
------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
--------PhysicalDistribute
----------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
------------filter(( not r_name IS NULL)(region.r_name = ''))
--------------PhysicalOlapScan[region]
------------PhysicalDistribute
--------------PhysicalOlapScan[nation]
--------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
----------filter(( not r_name IS NULL)(region.r_name = ''))
------------PhysicalOlapScan[region]
----------PhysicalDistribute
------------PhysicalOlapScan[nation]
--------PhysicalDistribute
----------PhysicalOlapScan[supplier]

Expand All @@ -85,16 +84,14 @@ PhysicalResultSink
--PhysicalDistribute
----PhysicalProject
------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey)
--------PhysicalDistribute
----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
------------PhysicalDistribute
--------------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
----------------filter(( not r_name IS NULL)(region.r_name = ''))
------------------PhysicalOlapScan[region]
----------------PhysicalDistribute
------------------PhysicalOlapScan[nation]
--------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
----------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey)
------------filter(( not r_name IS NULL)(region.r_name = ''))
--------------PhysicalOlapScan[region]
------------PhysicalDistribute
--------------PhysicalOlapScan[supplier]
--------------PhysicalOlapScan[nation]
----------PhysicalDistribute
------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
----------PhysicalOlapScan[partsupp]

Expand All @@ -105,13 +102,12 @@ PhysicalResultSink
------hashJoin[FULL_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey)
--------PhysicalDistribute
----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
------------PhysicalDistribute
--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
----------------filter(( not r_regionkey IS NULL))
------------------PhysicalOlapScan[region]
----------------PhysicalDistribute
------------------filter(( not n_regionkey IS NULL))
--------------------PhysicalOlapScan[nation]
------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
--------------filter(( not r_regionkey IS NULL))
----------------PhysicalOlapScan[region]
--------------PhysicalDistribute
----------------filter(( not n_regionkey IS NULL))
------------------PhysicalOlapScan[nation]
------------PhysicalDistribute
--------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
Expand All @@ -122,17 +118,16 @@ PhysicalResultSink
--PhysicalDistribute
----PhysicalProject
------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey)
--------PhysicalDistribute
----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
--------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey)
----------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
------------PhysicalDistribute
--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
----------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = ''))
------------------PhysicalOlapScan[region]
----------------PhysicalDistribute
------------------filter(( not n_regionkey IS NULL))
--------------------PhysicalOlapScan[nation]
--------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = ''))
----------------PhysicalOlapScan[region]
------------PhysicalDistribute
--------------PhysicalOlapScan[supplier]
--------------filter(( not n_regionkey IS NULL))
----------------PhysicalOlapScan[nation]
----------PhysicalDistribute
------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
----------PhysicalOlapScan[partsupp]

Expand All @@ -142,13 +137,12 @@ PhysicalResultSink
----PhysicalProject
------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey)
--------hashJoin[INNER_JOIN](nation.n_nationkey = supplier.s_suppkey)
----------PhysicalDistribute
------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
--------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = ''))
----------------PhysicalOlapScan[region]
--------------PhysicalDistribute
----------------filter(( not n_regionkey IS NULL))
------------------PhysicalOlapScan[nation]
----------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey)
------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = ''))
--------------PhysicalOlapScan[region]
------------PhysicalDistribute
--------------filter(( not n_regionkey IS NULL))
----------------PhysicalOlapScan[nation]
----------PhysicalDistribute
------------PhysicalOlapScan[supplier]
--------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ suite("eliminate_outer_join") {
sql "set disable_nereids_rules='ELIMINATE_NOT_NULL'"
sql "set disable_join_reorder=true"
sql "set forbid_unknown_col_stats=false"

sql "set enable_bucket_shuffle_join=false"

String database = context.config.getDbNameByFile(context.file)
sql "drop database if exists ${database}"
sql "create database ${database}"
Expand All @@ -33,15 +34,16 @@ suite("eliminate_outer_join") {

sql '''
CREATE TABLE partsupp (
ps_id int,
ps_partkey int NOT NULL,
ps_suppkey int NOT NULL,
ps_availqty int NOT NULL,
ps_supplycost decimal(15, 2) NOT NULL,
ps_comment VARCHAR(199) NOT NULL
)ENGINE=OLAP
DUPLICATE KEY(`ps_partkey`)
DUPLICATE KEY(`ps_id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24
DISTRIBUTED BY HASH(`ps_id`) BUCKETS 24
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "part_partsupp"
Expand All @@ -54,6 +56,7 @@ suite("eliminate_outer_join") {

sql '''
CREATE TABLE supplier (
s_id int,
s_suppkey int NOT NULL,
s_name VARCHAR(25) NOT NULL,
s_address VARCHAR(40) NOT NULL,
Expand All @@ -62,9 +65,9 @@ suite("eliminate_outer_join") {
s_acctbal decimal(15, 2) NOT NULL,
s_comment VARCHAR(101) NOT NULL
)ENGINE=OLAP
DUPLICATE KEY(`s_suppkey`)
DUPLICATE KEY(`s_id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
DISTRIBUTED BY HASH(`s_id`) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
Expand All @@ -76,14 +79,15 @@ suite("eliminate_outer_join") {

sql '''
CREATE TABLE `nation` (
n_id int(11),
`n_nationkey` int(11) NOT NULL,
`n_name` varchar(25) NOT NULL,
`n_regionkey` int(11) NOT NULL,
`n_comment` varchar(152) NULL
) ENGINE=OLAP
DUPLICATE KEY(`N_NATIONKEY`)
DUPLICATE KEY(`n_id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
DISTRIBUTED BY HASH(`n_id`) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
Expand All @@ -95,103 +99,104 @@ suite("eliminate_outer_join") {

sql '''
CREATE TABLE region (
r_id int,
r_regionkey int NOT NULL,
r_name VARCHAR(25) NOT NULL,
r_comment VARCHAR(152)
)ENGINE=OLAP
DUPLICATE KEY(`r_regionkey`)
DUPLICATE KEY(`r_id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
DISTRIBUTED BY HASH(`r_id`) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
'''

// eliminate outer joins in cascading
qt_1 '''
--eliminate outer joins in cascading
explain shape plan
select *
from region
left join nation on r_regionkey=n_regionkey -->inner
left join supplier on n_nationkey=s_suppkey -->inner
left join partsupp on ps_suppkey=s_suppkey -->inner
left join [broadcast] nation on r_regionkey=n_regionkey -->inner
left join [broadcast] supplier on n_nationkey=s_suppkey -->inner
left join [broadcast] partsupp on ps_suppkey=s_suppkey -->inner
where ps_suppkey > 1
'''

// full join ps => right join ps, other outer joins are not eliminated
qt_2 '''
-- full join ps => right join ps, other outer joins are not eliminated
explain shape plan
select *
from region
full join nation on r_regionkey=n_regionkey -->full
full join supplier on n_nationkey=s_suppkey -->full
full join partsupp on ps_suppkey=s_suppkey -->right
full join [broadcast] nation on r_regionkey=n_regionkey -->full
full join [broadcast] supplier on n_nationkey=s_suppkey -->full
full join [broadcast] partsupp on ps_suppkey=s_suppkey -->right
where ps_suppkey > 1;
'''

qt_3 '''
explain shape plan
select *
from region
full join nation on r_regionkey=n_regionkey -- full
left join supplier on n_nationkey=s_suppkey -- left
full join partsupp on ps_suppkey=s_suppkey -- right
full join [broadcast] nation on r_regionkey=n_regionkey -- full
left join [broadcast] supplier on n_nationkey=s_suppkey -- left
full join [broadcast] partsupp on ps_suppkey=s_suppkey -- right
where ps_suppkey > 1;
'''

qt_4 '''
explain shape plan
select *
from region
full join nation on r_regionkey=n_regionkey -- left
full join [broadcast] nation on r_regionkey=n_regionkey -- left
where r_name = "";
'''

qt_5 '''
explain shape plan
select *
from region
full join nation on r_regionkey=n_regionkey -- left
left join supplier on n_nationkey=s_suppkey -- left
full join [broadcast] nation on r_regionkey=n_regionkey -- left
left join [broadcast] supplier on n_nationkey=s_suppkey -- left
where r_name = "";
'''

qt_6 '''
explain shape plan
select *
from region
full join nation on r_regionkey=n_regionkey -- left
left join supplier on n_nationkey=s_suppkey -- left
full join partsupp on ps_suppkey=s_suppkey -- left
full join [broadcast] nation on r_regionkey=n_regionkey -- left
left join [broadcast] supplier on n_nationkey=s_suppkey -- left
full join [broadcast] partsupp on ps_suppkey=s_suppkey -- left
where r_name = "";
'''

qt_7'''
explain shape plan
select *
from region
join nation on r_regionkey=n_regionkey -- inner
left join supplier on n_nationkey=s_suppkey -- left
full join partsupp on ps_suppkey=s_suppkey; -- full
join [broadcast] nation on r_regionkey=n_regionkey -- inner
left join [broadcast] supplier on n_nationkey=s_suppkey -- left
full join [broadcast] partsupp on ps_suppkey=s_suppkey; -- full
'''

qt_8'''
explain shape plan
select *
from region
join nation on r_regionkey=n_regionkey --inner
left join supplier on n_nationkey=s_suppkey --left
full join partsupp on ps_suppkey=s_suppkey --left
join nation [broadcast] on r_regionkey=n_regionkey --inner
left join [broadcast] supplier on n_nationkey=s_suppkey --left
full join [broadcast] partsupp on ps_suppkey=s_suppkey --left
where r_name = "";
'''

qt_9 '''
explain shape plan
select *
from region
join nation on r_regionkey=n_regionkey --inner
right join supplier on n_nationkey=s_suppkey --inner
full join partsupp on ps_suppkey=s_suppkey --left
join [broadcast] nation on r_regionkey=n_regionkey --inner
right join [broadcast] supplier on n_nationkey=s_suppkey --inner
full join [broadcast] partsupp on ps_suppkey=s_suppkey --left
where r_name = "";
'''
}

0 comments on commit 7dac52e

Please sign in to comment.