Skip to content

Commit

Permalink
[opt](nereids) refine expression estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 19, 2024
1 parent 368facd commit 7ec79a1
Show file tree
Hide file tree
Showing 86 changed files with 814 additions and 832 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ PhysicalResultSink
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE)))
--------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=()
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF5 ca_address_sk->[c_current_addr_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=()
------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=()
--------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=()
----------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=()
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=()
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk]
--------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
Expand All @@ -25,18 +25,18 @@ PhysicalResultSink
----------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer] apply RFs: RF5
------------------------------------PhysicalOlapScan[customer] apply RFs: RF4
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
------------------------------------PhysicalProject
--------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
----------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalOlapScan[customer_demographics]
--------------------------PhysicalProject
----------------------------filter(ca_county IN ('Cochran County', 'Kandiyohi County', 'Marquette County', 'Storey County', 'Warren County'))
------------------------------PhysicalOlapScan[customer_address]
----------------------------------filter(ca_county IN ('Cochran County', 'Kandiyohi County', 'Marquette County', 'Storey County', 'Warren County'))
------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalOlapScan[customer_demographics]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
----------------------------PhysicalProject
------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
--------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
--------------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
PhysicalCteAnchor ( cteId=CTEId#0 )
--PhysicalCteProducer ( cteId=CTEId#0 )
----PhysicalProject
------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_brand_id = t.brand_id) and (item.i_category_id = t.category_id) and (item.i_class_id = t.class_id)) otherCondition=()
------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((item.i_brand_id = t.brand_id) and (item.i_category_id = t.category_id) and (item.i_class_id = t.class_id)) otherCondition=()
--------PhysicalIntersect
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
Expand Down Expand Up @@ -122,7 +122,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF17 d_date_sk->[cs_sold_date_sk]
----------------------------------------PhysicalProject
------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
--------------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) otherCondition=()
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF17
Expand All @@ -145,7 +145,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF20 d_date_sk->[ws_sold_date_sk]
----------------------------------------PhysicalProject
------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
--------------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=()
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF20
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@ PhysicalResultSink
--------hashAgg[DISTINCT_LOCAL]
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[cs_ship_addr_sk]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
--------------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
--------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
----------------PhysicalProject
------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk)))
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF0 cs_order_number->[cs_order_number]
------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF1 RF2 RF3
----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
------------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[catalog_returns]
--------------------------PhysicalProject
----------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((customer_address.ca_state = 'WV'))
--------------------------PhysicalOlapScan[customer_address]
------------------PhysicalProject
--------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County'))
----------------------PhysicalOlapScan[call_center]
------------------------------filter((customer_address.ca_state = 'WV'))
--------------------------------PhysicalOlapScan[customer_address]
------------------------PhysicalProject
--------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County'))
----------------------------PhysicalOlapScan[call_center]
--------------------PhysicalProject
----------------------PhysicalOlapScan[catalog_sales]
----------------PhysicalProject
------------------PhysicalOlapScan[catalog_returns]

Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ PhysicalResultSink
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=()
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF2 cd_demo_sk->[cs_bill_cdemo_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[cs_bill_customer_sk]
------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=()
--------------------------------------PhysicalProject
----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[cs_bill_cdemo_sk]
----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[cs_bill_customer_sk]
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF5
--------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF2 RF5
------------------------------------------PhysicalProject
--------------------------------------------filter((cd1.cd_education_status = 'Advanced Degree') and (cd1.cd_gender = 'F'))
----------------------------------------------PhysicalOlapScan[customer_demographics]
--------------------------------------------filter(c_birth_month IN (1, 10, 2, 4, 7, 8))
----------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3
--------------------------------------PhysicalProject
----------------------------------------filter(c_birth_month IN (1, 10, 2, 4, 7, 8))
------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3
----------------------------------------PhysicalOlapScan[customer_demographics]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer_demographics]
------------------------------------filter((cd1.cd_education_status = 'Advanced Degree') and (cd1.cd_gender = 'F'))
--------------------------------------PhysicalOlapScan[customer_demographics]
------------------------------PhysicalProject
--------------------------------filter(ca_state IN ('GA', 'IN', 'ME', 'NC', 'OK', 'WA', 'WY'))
----------------------------------PhysicalOlapScan[customer_address]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------PhysicalProject
------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE)))
--------------PhysicalProject
----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk]
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk]
----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk]
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk]
------------------PhysicalProject
--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[inv_date_sk]
--------------------------PhysicalProject
----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
--------------------------PhysicalProject
----------------------------filter((item.i_current_price <= 75.00) and (item.i_current_price >= 45.00) and i_manufact_id IN (1000, 707, 747, 856))
------------------------------PhysicalOlapScan[item]
----------------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
--------------------------PhysicalOlapScan[date_dim]
------------------------filter((item.i_current_price <= 75.00) and (item.i_current_price >= 45.00) and i_manufact_id IN (1000, 707, 747, 856))
--------------------------PhysicalOlapScan[item]

Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,21 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--PhysicalCteProducer ( cteId=CTEId#0 )
----PhysicalProject
------filter((if((mean = 0.0), 0.0, (stdev / mean)) > 1.0))
--------hashAgg[GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------hashAgg[LOCAL]
----------PhysicalProject
------------hashJoin[INNER_JOIN shuffle] hashCondition=((inventory.inv_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=()
--------------PhysicalProject
----------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
----------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=()
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=()
----------------------PhysicalOlapScan[inventory] apply RFs: RF1
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=()
--------------------------PhysicalOlapScan[inventory] apply RFs: RF2
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[item]
----------------------PhysicalProject
------------------------PhysicalOlapScan[warehouse]
------------------------PhysicalOlapScan[item]
------------------PhysicalProject
--------------------filter((date_dim.d_year = 1998) and d_moy IN (1, 2))
----------------------PhysicalOlapScan[date_dim]
--------------PhysicalProject
----------------PhysicalOlapScan[warehouse]
--PhysicalResultSink
----PhysicalQuickSort[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ PhysicalResultSink
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk]
----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk]
------------PhysicalProject
--------------PhysicalOlapScan[item] apply RFs: RF1
------------PhysicalProject
Expand Down
Loading

0 comments on commit 7ec79a1

Please sign in to comment.