Skip to content

Commit

Permalink
rf prune tpcds
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Sep 12, 2023
1 parent 3fe6280 commit 075bed9
Show file tree
Hide file tree
Showing 203 changed files with 11,752 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@ public PhysicalProject visitPhysicalProject(PhysicalProject<? extends Plan> proj
Plan newChild = child.accept(this, ctx);
if (newChild instanceof PhysicalProject) {
List<NamedExpression> projections = project.mergeProjections((PhysicalProject) newChild);
return project.withProjectionsAndChild(projections, newChild.child(0));
return project.withProjectionsAndChild(projections, newChild.child(0))
.withPhysicalPropertiesAndStats(project.getPhysicalProperties(), project.getStats());
}
return child != newChild ? project.withChildren(Lists.newArrayList(newChild)) : project;
return child != newChild
? project.withChildren(Lists.newArrayList(newChild))
.withPhysicalPropertiesAndStats(project.getPhysicalProperties(), project.getStats())
: project;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,44 @@
package org.apache.doris.nereids.processor.post;

import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.statistics.Statistics;

/**
* merge consecutive projects
*/
public class PushdownFilterThroughProject extends PlanPostProcessor {
@Override
public Plan visitPhysicalFilter(PhysicalFilter<? extends Plan> filter, CascadesContext context) {
PhysicalProperties properties = filter.getPhysicalProperties();
Statistics stats = filter.getStats();
Plan child = filter.child();
if (!(child instanceof PhysicalProject)) {
return filter.withChildren(child.accept(this, context));
PhysicalPlan newChild = (PhysicalPlan) child.accept(this, context);
if (child != newChild) {
newChild = newChild.withPhysicalPropertiesAndStats(
((PhysicalPlan) child).getPhysicalProperties(),
((AbstractPlan) child).getStats());
return ((PhysicalPlan) (filter.withChildren(newChild)))
.withPhysicalPropertiesAndStats(properties, stats);
}
return filter;
}

PhysicalProject<? extends Plan> project = (PhysicalProject<? extends Plan>) child;
PhysicalFilter<? extends Plan> newFilter = filter.withConjunctsAndChild(
ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()),
project.child());

return project.withChildren(newFilter.accept(this, context));
project.child()).withPhysicalPropertiesAndStats(properties, stats);
PhysicalPlan newChild = (PhysicalPlan) newFilter.accept(this, context);
newChild = newChild.withPhysicalPropertiesAndStats(properties, stats);
PhysicalPlan ret = ((PhysicalPlan) (project.withChildren(newChild)))
.withPhysicalPropertiesAndStats(properties, stats);
return ret;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
with open('rf.tmpl', 'r') as f:
tmpl = f.read()
for i in range(1,100):
with open('../../../../tools/tpcds-tools/queries/sf100/query'+str(i)+'.sql', 'r') as fi:
with open('./queries/query'+str(i)+'.ini', 'r') as fi:
casei = tmpl.replace('{--}', str(i))
casei = casei.replace('{query}', fi.read())
casei = casei.replace('{query}', fi.read().split(";")[0])
# with open('../rf/ds_rf'+str(i)+'.groovy', 'w') as out:
# out.write(casei)
with open('rf/rf.'+str(i), 'r') as rf_file:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# // Licensed to the Apache Software Foundation (ASF) under one
# // or more contributor license agreements. See the NOTICE file
# // distributed with this work for additional information
# // regarding copyright ownership. The ASF licenses this file
# // to you under the Apache License, Version 2.0 (the
# // "License"); you may not use this file except in compliance
# // with the License. You may obtain a copy of the License at
# //
# // http://www.apache.org/licenses/LICENSE-2.0
# //
# // Unless required by applicable law or agreed to in writing,
# // software distributed under the License is distributed on an
# // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# // KIND, either express or implied. See the License for the
# // specific language governing permissions and limitations
# // under the License.


def ini_prune_count():
counter = dict()
counter[2] = 2
counter[4] = 3
counter[5] = 4
counter[6] = 4
counter[7] = 1
counter[8] = 1
counter[11] = 2
counter[13] = 0
counter[14] = 12
counter[15] = 2
counter[17] = 4
counter[19] = 1
counter[21] = 1
counter[22] = 1
counter[23] = 3
counter[24] = 4
counter[25] = 2
counter[26] = 1
counter[27] = 1
counter[30] = 1
counter[31] = 2
counter[35] = 3
counter[36] = 1
counter[38] = 3
counter[39] = 2
counter[40] = 1
counter[45] = 3
counter[46] = 2
counter[47] = 2
counter[48] = 1
counter[50] = 1
counter[53] = 1
counter[54] = 1
counter[57] = 2
counter[58] = 1
counter[59] = 3
counter[62] = 3
counter[63] = 1
counter[64] = 3
counter[65] = 3
counter[66] = 2
counter[67] = 2
counter[70] = 1
counter[72] = 5
counter[74] = 2
counter[76] = 6
counter[77] = 4
counter[79] = 1
counter[80] = 3
counter[81] = 1
counter[83] = 1
counter[85] = 0
counter[86] = 1
counter[87] = 3
counter[89] = 1
counter[91] = 1
counter[95] = 2
counter[99] = 3
return counter


if __name__ == '__main__':
counters = ini_prune_count()
with open('rf_prune.tmpl', 'r') as f:
tmpl = f.read()
for i in range(1,100):
casei = tmpl.replace('{query_id}', str(i))
pruned = 0
if i in counters:
pruned = counters[i]
casei = casei.replace('{pruned}', str(pruned))
with open('../rf_prune/ds'+str(i)+'_rf_prune.groovy', 'w') as out:
out.write(casei)

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
with customer_total_return as
(select sr_customer_sk as ctr_customer_sk
,sr_store_sk as ctr_store_sk
,sum(SR_FEE) as ctr_total_return
from store_returns
,date_dim
where sr_returned_date_sk = d_date_sk
and d_year =2000
group by sr_customer_sk
,sr_store_sk)
select c_customer_id
from customer_total_return ctr1
,store
,customer
where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
from customer_total_return ctr2
where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
and s_store_sk = ctr1.ctr_store_sk
and s_state = 'SD'
and ctr1.ctr_customer_sk = c_customer_sk
order by c_customer_id
limit 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
select
cd_gender,
cd_marital_status,
cd_education_status,
count(*) cnt1,
cd_purchase_estimate,
count(*) cnt2,
cd_credit_rating,
count(*) cnt3,
cd_dep_count,
count(*) cnt4,
cd_dep_employed_count,
count(*) cnt5,
cd_dep_college_count,
count(*) cnt6
from
customer c,customer_address ca,customer_demographics
where
c.c_current_addr_sk = ca.ca_address_sk and
ca_county in ('Storey County','Marquette County','Warren County','Cochran County','Kandiyohi County') and
cd_demo_sk = c.c_current_cdemo_sk and
exists (select *
from store_sales,date_dim
where c.c_customer_sk = ss_customer_sk and
ss_sold_date_sk = d_date_sk and
d_year = 2001 and
d_moy between 1 and 1+3) and
(exists (select *
from web_sales,date_dim
where c.c_customer_sk = ws_bill_customer_sk and
ws_sold_date_sk = d_date_sk and
d_year = 2001 and
d_moy between 1 ANd 1+3) or
exists (select *
from catalog_sales,date_dim
where c.c_customer_sk = cs_ship_customer_sk and
cs_sold_date_sk = d_date_sk and
d_year = 2001 and
d_moy between 1 and 1+3))
group by cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
order by cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
limit 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
with year_total as (
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
,'s' sale_type
from customer
,store_sales
,date_dim
where c_customer_sk = ss_customer_sk
and ss_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
union all
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
,'w' sale_type
from customer
,web_sales
,date_dim
where c_customer_sk = ws_bill_customer_sk
and ws_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
)
select
t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_birth_country
from year_total t_s_firstyear
,year_total t_s_secyear
,year_total t_w_firstyear
,year_total t_w_secyear
where t_s_secyear.customer_id = t_s_firstyear.customer_id
and t_s_firstyear.customer_id = t_w_secyear.customer_id
and t_s_firstyear.customer_id = t_w_firstyear.customer_id
and t_s_firstyear.sale_type = 's'
and t_w_firstyear.sale_type = 'w'
and t_s_secyear.sale_type = 's'
and t_w_secyear.sale_type = 'w'
and t_s_firstyear.dyear = 2001
and t_s_secyear.dyear = 2001+1
and t_w_firstyear.dyear = 2001
and t_w_secyear.dyear = 2001+1
and t_s_firstyear.year_total > 0
and t_w_firstyear.year_total > 0
and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
> case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
order by t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_birth_country
limit 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
select i_item_id
,i_item_desc
,i_category
,i_class
,i_current_price
,sum(ws_ext_sales_price) as itemrevenue
,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
(partition by i_class) as revenueratio
from
web_sales
,item
,date_dim
where
ws_item_sk = i_item_sk
and i_category in ('Books', 'Sports', 'Men')
and ws_sold_date_sk = d_date_sk
and d_date between cast('1998-04-06' as date)
and (cast('1998-04-06' as date) + interval 30 day)
group by
i_item_id
,i_item_desc
,i_category
,i_class
,i_current_price
order by
i_category
,i_class
,i_item_id
,i_item_desc
,revenueratio
limit 100;
Loading

0 comments on commit 075bed9

Please sign in to comment.