Skip to content

Commit

Permalink
fix bug for A>n, where A.max is infinity
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Sep 3, 2024
1 parent d580a0a commit 0c671a0
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -600,11 +600,13 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues())
.setNumNulls(0);
double sel = leftRange.overlapPercentWith(rightRange);
double sel = leftRange.getDistinctValues() == 0
? 1.0
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
} else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
sel = RANGE_SELECTIVITY_THRESHOLD;
} else {
sel = Math.max(sel, RANGE_SELECTIVITY_THRESHOLD);
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ public boolean isInfinite() {
return Double.isInfinite(low) || Double.isInfinite(high);
}

public boolean isOneSideInfinite() {
return isInfinite() && !isBothInfinite();
}

public boolean isFinite() {
return Double.isFinite(low) && Double.isFinite(high);
}
Expand Down Expand Up @@ -175,8 +179,7 @@ public Pair<Double, LiteralExpr> maxPair(double r1, LiteralExpr e1, double r2, L
}

public StatisticRange cover(StatisticRange other) {
// double newLow = Math.max(low, other.low);
// double newHigh = Math.min(high, other.high);
StatisticRange resultRange;
Pair<Double, LiteralExpr> biggerLow = maxPair(low, lowExpr, other.low, other.lowExpr);
double newLow = biggerLow.first;
LiteralExpr newLowExpr = biggerLow.second;
Expand All @@ -188,9 +191,18 @@ public StatisticRange cover(StatisticRange other) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double coveredDistinctValues = minExcludeNaN(distinctValues, overlapDistinctValuesLeft);
return new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues, dataType);
if (this.isBothInfinite() && other.isOneSideInfinite()) {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr,
distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR,
dataType);
} else {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues,
dataType);
}
} else {
resultRange = empty(dataType);
}
return empty(dataType);
return resultRange;
}

public StatisticRange union(StatisticRange other) {
Expand Down Expand Up @@ -241,6 +253,6 @@ public double getDistinctValues() {

@Override
public String toString() {
return "(" + lowExpr + "," + highExpr + ")";
return "range=(" + lowExpr + "," + highExpr + "), ndv=" + distinctValues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1394,4 +1394,39 @@ public void testLargeRange() {
out = estimation.estimate(greater, stats);
Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD);
}

@Test
void testAndWithInfinity() {
Double row = 1000.0;
SlotReference a = new SlotReference("a", new VarcharType(25));
ColumnStatisticBuilder columnStatisticBuilderA = new ColumnStatisticBuilder()
.setNdv(10)
.setAvgSizeByte(4)
.setNumNulls(0)
.setCount(row);

SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
ColumnStatisticBuilder columnStatisticBuilderB = new ColumnStatisticBuilder()
.setNdv(488)
.setAvgSizeByte(25)
.setNumNulls(0)
.setCount(row);
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(row);
statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
Expression strGE = new GreaterThanEqual(a,
new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-05-14"));
Statistics strStats = new FilterEstimation().estimate(strGE, statsBuilder.build());
Assertions.assertEquals(500, strStats.getRowCount());

Expression intGE = new GreaterThan(b, new IntegerLiteral(0));
Statistics intStats = new FilterEstimation().estimate(intGE, statsBuilder.build());
Assertions.assertEquals(500, intStats.getRowCount());

Expression predicate = new And(strGE, intGE);

Statistics stats = new FilterEstimation().estimate(predicate, statsBuilder.build());
Assertions.assertEquals(250, stats.getRowCount());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalProject
----------------------filter((t_s_secyear.dyear = 2002) and (t_s_secyear.sale_type = 's'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalProject
----------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,25 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------filter((t_w_secyear.dyear = 2000) and (t_w_secyear.sale_type = 'w'))
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
----------------------------PhysicalProject
------------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's'))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------PhysicalProject
------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalProject
----------------------filter((t_s_secyear.dyear = 2002) and (t_s_secyear.sale_type = 's'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalProject
----------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,25 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------filter((t_w_secyear.dyear = 2000) and (t_w_secyear.sale_type = 'w'))
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
----------------------------PhysicalProject
------------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's'))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------PhysicalProject
------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
Expand All @@ -69,7 +72,4 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
Expand All @@ -69,7 +72,4 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------------PhysicalProject
----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalProject
----------------------filter((t_s_secyear.dyear = 2002) and (t_s_secyear.sale_type = 's'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalProject
----------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Loading

0 comments on commit 0c671a0

Please sign in to comment.