Skip to content

Commit

Permalink
1. condition order: filter/hashCondition/otherCondition,
Browse files Browse the repository at this point in the history
2. update regression out
3. remove tpch_sf500 shape case(covered by tpch sf1000)
4. implement is-null stats estimation
  • Loading branch information
englefly committed Sep 25, 2023
1 parent 129ffb7 commit 1637919
Show file tree
Hide file tree
Showing 169 changed files with 898 additions and 3,404 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.doris.nereids.trees.expressions.GreaterThan;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
Expand Down Expand Up @@ -386,6 +387,22 @@ public Statistics visitNot(Not not, EstimationContext context) {
return statisticsBuilder.build();
}

@Override
public Statistics visitIsNull(IsNull isNull, EstimationContext context) {
ColumnStatistic childStats = ExpressionEstimation.estimate(isNull.child(), context.statistics);
if (childStats.isUnKnown()) {
return new StatisticsBuilder(context.statistics).build();
}
double outputRowCount = childStats.numNulls;
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(childStats);
// do not modify ndv/min/max to make is-not-null work
colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount);
StatisticsBuilder builder = new StatisticsBuilder(context.statistics);
builder.putColumnStatistics(isNull.child(), colBuilder.build());
// TODO we do not call updateRowCountOnly() to make is-not-null work. this need refactor
return builder.build();
}

static class EstimationContext {
private final Statistics statistics;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public String toString() {
@Override
public String toSql() {
return compareExpr.toSql() + " IN " + options.stream()
.map(Expression::toSql)
.map(Expression::toSql).sorted()
.collect(Collectors.joining(", ", "(", ")"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.nereids.trees.plans.physical;

import com.google.common.collect.Lists;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
Expand All @@ -37,6 +38,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Physical filter plan.
Expand Down Expand Up @@ -136,9 +138,14 @@ public PhysicalFilter<Plan> withConjunctsAndChild(Set<Expression> conjuncts, Pla
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
builder.append("filter(");
conjuncts.forEach(conjunct -> builder.append(conjunct.shapeInfo()));
builder.append(")");
builder.append("filter");
builder.append(
conjuncts.stream().map(conjunct -> conjunct.shapeInfo())
.sorted()
.collect(Collectors.joining(" and ", "(", ")")));
// List<String> strConjuncts = Lists.newArrayList();
// conjuncts.forEach(conjunct -> strConjuncts.add(conjunct.shapeInfo()));
// builder.append(strConjuncts.stream().sorted().collect(Collectors.joining(" and ", "(", ")")));
return builder.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,43 +274,15 @@ public boolean pushDownRuntimeFilter(CascadesContext context, IdGenerator<Runtim
return pushedDown;
}

private class ExprComparator implements Comparator<Expression> {
@Override
public int compare(Expression e1, Expression e2) {
List<ExprId> ids1 = e1.getInputSlotExprIds()
.stream().sorted(Comparator.comparing(ExprId::asInt))
.collect(Collectors.toList());
List<ExprId> ids2 = e2.getInputSlotExprIds()
.stream().sorted(Comparator.comparing(ExprId::asInt))
.collect(Collectors.toList());
if (ids1.size() > ids2.size()) {
return 1;
} else if (ids1.size() < ids2.size()) {
return -1;
} else {
for (int i = 0; i < ids1.size(); i++) {
if (ids1.get(i).asInt() > ids2.get(i).asInt()) {
return 1;
} else if (ids1.get(i).asInt() < ids2.get(i).asInt()) {
return -1;
}
}
return 0;
}
}
}

@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
builder.append("hashJoin[").append(joinType).append("]");
// print sorted hash conjuncts for plan check
hashJoinConjuncts.stream().sorted(new ExprComparator()).forEach(expr -> {
builder.append(expr.shapeInfo());
});
otherJoinConjuncts.stream().sorted(new ExprComparator()).forEach(expr -> {
builder.append(expr.shapeInfo());
});
builder.append(hashJoinConjuncts.stream().map(conjunct -> conjunct.shapeInfo())
.sorted().collect(Collectors.joining(" and ", " hashCondition=(", ")")));
builder.append(otherJoinConjuncts.stream().map(cond -> cond.shapeInfo())
.sorted().collect(Collectors.joining(" and ", "otherCondition=(", ")")));
return builder.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute
--------hashAgg[LOCAL]
----------PhysicalProject
------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = date_dim.d_date_sk)
------------hashJoin[INNER_JOIN] hashCondition=((store_returns.sr_returned_date_sk = date_dim.d_date_sk))otherCondition=()
--------------PhysicalProject
----------------PhysicalOlapScan[store_returns]
--------------PhysicalDistribute
Expand All @@ -18,14 +18,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute
--------PhysicalTopN
----------PhysicalProject
------------hashJoin[INNER_JOIN](ctr1.ctr_customer_sk = customer.c_customer_sk)
------------hashJoin[INNER_JOIN] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk))otherCondition=()
--------------PhysicalDistribute
----------------PhysicalProject
------------------PhysicalOlapScan[customer]
--------------PhysicalDistribute
----------------hashJoin[INNER_JOIN](ctr1.ctr_store_sk = ctr2.ctr_store_sk)(cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))
----------------hashJoin[INNER_JOIN] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk))otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE)))
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](store.s_store_sk = ctr1.ctr_store_sk)
--------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk))otherCondition=()
----------------------PhysicalDistribute
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute
Expand Down
24 changes: 12 additions & 12 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,49 @@ PhysicalResultSink
------------PhysicalDistribute
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN](c.c_customer_sk = store_sales.ss_customer_sk)
------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))otherCondition=()
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=()
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[store_sales]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------filter((date_dim.d_moy >= 1)(date_dim.d_moy <= 4)(date_dim.d_year = 2001))
------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
--------------------------------PhysicalOlapScan[date_dim]
--------------------PhysicalProject
----------------------filter(($c$1 OR $c$2))
------------------------hashJoin[LEFT_SEMI_JOIN](c.c_customer_sk = catalog_sales.cs_ship_customer_sk)
--------------------------hashJoin[LEFT_SEMI_JOIN](c.c_customer_sk = web_sales.ws_bill_customer_sk)
------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))otherCondition=()
--------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))otherCondition=()
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)
--------------------------------hashJoin[INNER_JOIN] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))otherCondition=()
----------------------------------PhysicalOlapScan[customer_demographics]
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](c.c_current_addr_sk = ca.ca_address_sk)
--------------------------------------hashJoin[INNER_JOIN] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk))otherCondition=()
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[customer]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter(ca_county IN ('Storey County', 'Marquette County', 'Warren County', 'Cochran County', 'Kandiyohi County'))
--------------------------------------------filter(ca_county IN ('Cochran County', 'Kandiyohi County', 'Marquette County', 'Storey County', 'Warren County'))
----------------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=()
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[web_sales]
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------filter((date_dim.d_moy >= 1)(date_dim.d_year = 2001)(date_dim.d_moy <= 4))
--------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
----------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=()
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_moy >= 1)(date_dim.d_moy <= 4)(date_dim.d_year = 2001))
------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
--------------------------------------PhysicalOlapScan[date_dim]

22 changes: 11 additions & 11 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](customer.c_customer_sk = store_sales.ss_customer_sk)
----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_customer_sk = store_sales.ss_customer_sk))otherCondition=()
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=()
----------------------PhysicalProject
------------------------PhysicalOlapScan[store_sales]
----------------------PhysicalDistribute
Expand All @@ -25,9 +25,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](customer.c_customer_sk = web_sales.ws_bill_customer_sk)
----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_customer_sk = web_sales.ws_bill_customer_sk))otherCondition=()
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=()
----------------------PhysicalProject
------------------------PhysicalOlapScan[web_sales]
----------------------PhysicalDistribute
Expand All @@ -42,23 +42,23 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute
--------PhysicalTopN
----------PhysicalProject
------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(if((year_total > 0.00), (cast(year_total as DOUBLE) / cast(year_total as DOUBLE)), 0) > if((year_total > 0.00), (cast(year_total as DOUBLE) / cast(year_total as DOUBLE)), 0))
--------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))otherCondition=((if((year_total > 0.00), (cast(year_total as DOUBLE) / cast(year_total as DOUBLE)), 0) > if((year_total > 0.00), (cast(year_total as DOUBLE) / cast(year_total as DOUBLE)), 0)))
--------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))otherCondition=()
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00))
----------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001))
----------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------PhysicalDistribute
------------------PhysicalProject
--------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002))
--------------------filter((t_s_secyear.dyear = 2002) and (t_s_secyear.sale_type = 's'))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalDistribute
----------------PhysicalProject
------------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w'))
------------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ PhysicalResultSink
------------------PhysicalDistribute
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk))otherCondition=()
--------------------------PhysicalDistribute
----------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=()
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[web_sales]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------filter((date_dim.d_date <= 1998-05-06)(date_dim.d_date >= 1998-04-06))
----------------------------------filter((date_dim.d_date <= 1998-05-06) and (date_dim.d_date >= 1998-04-06))
------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------filter(i_category IN ('Books', 'Sports', 'Men'))
------------------------------filter(i_category IN ('Books', 'Men', 'Sports'))
--------------------------------PhysicalOlapScan[item]

Loading

0 comments on commit 1637919

Please sign in to comment.