Skip to content

Commit

Permalink
1. condition order: filter/hashCondition/otherCondition,
Browse files Browse the repository at this point in the history
2. update regression out
3. remove tpch_sf500 shape case(covered by tpch sf1000)
4. implement is-null stats estimation
5. update ssb shape
  • Loading branch information
englefly committed Sep 25, 2023
1 parent 129ffb7 commit 5d1ad11
Show file tree
Hide file tree
Showing 182 changed files with 945 additions and 3,453 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.doris.nereids.trees.expressions.GreaterThan;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
Expand Down Expand Up @@ -386,6 +387,22 @@ public Statistics visitNot(Not not, EstimationContext context) {
return statisticsBuilder.build();
}

@Override
public Statistics visitIsNull(IsNull isNull, EstimationContext context) {
ColumnStatistic childStats = ExpressionEstimation.estimate(isNull.child(), context.statistics);
if (childStats.isUnKnown()) {
return new StatisticsBuilder(context.statistics).build();
}
double outputRowCount = childStats.numNulls;
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(childStats);
// do not modify ndv/min/max to make is-not-null work
colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount);
StatisticsBuilder builder = new StatisticsBuilder(context.statistics);
builder.putColumnStatistics(isNull.child(), colBuilder.build());
// TODO we do not call updateRowCountOnly() to make is-not-null work. this need refactor
return builder.build();
}

static class EstimationContext {
private final Statistics statistics;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public String toString() {
@Override
public String toSql() {
return compareExpr.toSql() + " IN " + options.stream()
.map(Expression::toSql)
.map(Expression::toSql).sorted()
.collect(Collectors.joining(", ", "(", ")"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Physical filter plan.
Expand Down Expand Up @@ -136,9 +137,14 @@ public PhysicalFilter<Plan> withConjunctsAndChild(Set<Expression> conjuncts, Pla
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
builder.append("filter(");
conjuncts.forEach(conjunct -> builder.append(conjunct.shapeInfo()));
builder.append(")");
builder.append("filter");
builder.append(
conjuncts.stream().map(conjunct -> conjunct.shapeInfo())
.sorted()
.collect(Collectors.joining(" and ", "(", ")")));
// List<String> strConjuncts = Lists.newArrayList();
// conjuncts.forEach(conjunct -> strConjuncts.add(conjunct.shapeInfo()));
// builder.append(strConjuncts.stream().sorted().collect(Collectors.joining(" and ", "(", ")")));
return builder.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -274,43 +273,15 @@ public boolean pushDownRuntimeFilter(CascadesContext context, IdGenerator<Runtim
return pushedDown;
}

private class ExprComparator implements Comparator<Expression> {
@Override
public int compare(Expression e1, Expression e2) {
List<ExprId> ids1 = e1.getInputSlotExprIds()
.stream().sorted(Comparator.comparing(ExprId::asInt))
.collect(Collectors.toList());
List<ExprId> ids2 = e2.getInputSlotExprIds()
.stream().sorted(Comparator.comparing(ExprId::asInt))
.collect(Collectors.toList());
if (ids1.size() > ids2.size()) {
return 1;
} else if (ids1.size() < ids2.size()) {
return -1;
} else {
for (int i = 0; i < ids1.size(); i++) {
if (ids1.get(i).asInt() > ids2.get(i).asInt()) {
return 1;
} else if (ids1.get(i).asInt() < ids2.get(i).asInt()) {
return -1;
}
}
return 0;
}
}
}

@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
builder.append("hashJoin[").append(joinType).append("]");
// print sorted hash conjuncts for plan check
hashJoinConjuncts.stream().sorted(new ExprComparator()).forEach(expr -> {
builder.append(expr.shapeInfo());
});
otherJoinConjuncts.stream().sorted(new ExprComparator()).forEach(expr -> {
builder.append(expr.shapeInfo());
});
builder.append(hashJoinConjuncts.stream().map(conjunct -> conjunct.shapeInfo())
.sorted().collect(Collectors.joining(" and ", " hashCondition=(", ")")));
builder.append(otherJoinConjuncts.stream().map(cond -> cond.shapeInfo())
.sorted().collect(Collectors.joining(" and ", "otherCondition=(", ")")));
return builder.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ PhysicalResultSink
----PhysicalDistribute
------hashAgg[LOCAL]
--------PhysicalProject
----------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------PhysicalProject
--------------filter((lineorder.lo_discount <= 3)(lineorder.lo_discount >= 1)(lineorder.lo_quantity < 25))
--------------filter((lineorder.lo_discount <= 3) and (lineorder.lo_discount >= 1) and (lineorder.lo_quantity < 25))
----------------PhysicalOlapScan[lineorder]
------------PhysicalDistribute
--------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ PhysicalResultSink
----PhysicalDistribute
------hashAgg[LOCAL]
--------PhysicalProject
----------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------PhysicalProject
--------------filter((lineorder.lo_quantity <= 35)(lineorder.lo_discount <= 6)(lineorder.lo_discount >= 4)(lineorder.lo_quantity >= 26))
--------------filter((lineorder.lo_discount <= 6) and (lineorder.lo_discount >= 4) and (lineorder.lo_quantity <= 35) and (lineorder.lo_quantity >= 26))
----------------PhysicalOlapScan[lineorder]
------------PhysicalDistribute
--------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ PhysicalResultSink
----PhysicalDistribute
------hashAgg[LOCAL]
--------PhysicalProject
----------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------PhysicalProject
--------------filter((lineorder.lo_discount <= 7)(lineorder.lo_discount >= 5)(lineorder.lo_quantity <= 35)(lineorder.lo_quantity >= 26))
--------------filter((lineorder.lo_discount <= 7) and (lineorder.lo_discount >= 5) and (lineorder.lo_quantity <= 35) and (lineorder.lo_quantity >= 26))
----------------PhysicalOlapScan[lineorder]
------------PhysicalDistribute
--------------PhysicalProject
----------------filter((dates.d_year = 1994)(dates.d_weeknuminyear = 6))
----------------filter((dates.d_weeknuminyear = 6) and (dates.d_year = 1994))
------------------PhysicalOlapScan[dates]

Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
--------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = part.p_partkey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey))otherCondition=()
----------------------PhysicalProject
------------------------PhysicalOlapScan[lineorder]
----------------------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
------------------------PhysicalDistribute
--------------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = part.p_partkey)
--------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey))otherCondition=()
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[lineorder]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------filter((part.p_brand >= 'MFGR#2221')(part.p_brand <= 'MFGR#2228'))
--------------------------------filter((part.p_brand <= 'MFGR#2228') and (part.p_brand >= 'MFGR#2221'))
----------------------------------PhysicalOlapScan[part]
------------------------PhysicalDistribute
--------------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
------------------------PhysicalDistribute
--------------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = part.p_partkey)
--------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey))otherCondition=()
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[lineorder]
----------------------------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
----------------------PhysicalDistribute
------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[lineorder]
--------------------------PhysicalDistribute
Expand All @@ -25,6 +25,6 @@ PhysicalResultSink
----------------------------PhysicalOlapScan[customer]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((dates.d_year <= 1997)(dates.d_year >= 1992))
----------------------filter((dates.d_year <= 1997) and (dates.d_year >= 1992))
------------------------PhysicalOlapScan[dates]

Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
----------------------PhysicalDistribute
------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[lineorder]
--------------------------PhysicalDistribute
Expand All @@ -25,6 +25,6 @@ PhysicalResultSink
----------------------------PhysicalOlapScan[customer]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((dates.d_year <= 1997)(dates.d_year >= 1992))
----------------------filter((dates.d_year <= 1997) and (dates.d_year >= 1992))
------------------------PhysicalOlapScan[dates]

12 changes: 6 additions & 6 deletions regression-test/data/nereids_ssb_shape_sf100_p0/shape/q3.3.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
------------------------PhysicalDistribute
--------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
--------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[lineorder]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------filter(s_city IN ('UNITED KI5', 'UNITED KI1'))
--------------------------------filter(s_city IN ('UNITED KI1', 'UNITED KI5'))
----------------------------------PhysicalOlapScan[supplier]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------filter(c_city IN ('UNITED KI5', 'UNITED KI1'))
----------------------------filter(c_city IN ('UNITED KI1', 'UNITED KI5'))
------------------------------PhysicalOlapScan[customer]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((dates.d_year <= 1997)(dates.d_year >= 1992))
----------------------filter((dates.d_year <= 1997) and (dates.d_year >= 1992))
------------------------PhysicalOlapScan[dates]

10 changes: 5 additions & 5 deletions regression-test/data/nereids_ssb_shape_sf100_p0/shape/q3.4.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[lineorder]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------filter(s_city IN ('UNITED KI5', 'UNITED KI1'))
------------------------------filter(s_city IN ('UNITED KI1', 'UNITED KI5'))
--------------------------------PhysicalOlapScan[supplier]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------filter((dates.d_yearmonth = 'Dec1997'))
------------------------------PhysicalOlapScan[dates]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter(c_city IN ('UNITED KI5', 'UNITED KI1'))
----------------------filter(c_city IN ('UNITED KI1', 'UNITED KI5'))
------------------------PhysicalOlapScan[customer]

Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
------------------hashJoin[INNER_JOIN](lineorder.lo_partkey = part.p_partkey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey))otherCondition=()
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
--------------------------PhysicalDistribute
----------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
----------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[lineorder]
------------------------------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](lineorder.lo_partkey = part.p_partkey)
----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey))otherCondition=()
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](lineorder.lo_custkey = customer.c_custkey)
----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_custkey = customer.c_custkey))otherCondition=()
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](lineorder.lo_orderdate = dates.d_datekey)
------------------------------hashJoin[INNER_JOIN](lineorder.lo_suppkey = supplier.s_suppkey)
----------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey))otherCondition=()
------------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey))otherCondition=()
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[lineorder]
--------------------------------PhysicalDistribute
Expand Down
Loading

0 comments on commit 5d1ad11

Please sign in to comment.