Skip to content

Commit

Permalink
[opt](nereids) set lower bound for range-selectivity (apache#40089)
Browse files Browse the repository at this point in the history
## Proposed changes
Range selectivity is prone to producing outliers, so we add this
threshold limit.
The threshold estimation is calculated based on selecting one month out
of fifty years.

Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
englefly authored Aug 30, 2024
1 parent 20af746 commit d580a0a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@
*/
public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationContext> {
public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
// "Range selectivity is prone to producing outliers, so we add this threshold limit.
// The threshold estimation is calculated based on selecting one month out of fifty years."
public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016;
public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0;

public static final double DEFAULT_LIKE_COMPARISON_SELECTIVITY = 0.2;
Expand Down Expand Up @@ -600,6 +603,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
double sel = leftRange.overlapPercentWith(rightRange);
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
} else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
sel = RANGE_SELECTIVITY_THRESHOLD;
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
Expand Down Expand Up @@ -1365,4 +1366,32 @@ public void testStringRangeColToCol() {
Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, c), baseStats);
Assertions.assertEquals(50, agrtc.getRowCount());
}

@Test
public void testLargeRange() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
long tenB = 1000000000;
long row = 1600000000;
ColumnStatistic colStats = new ColumnStatisticBuilder()
.setAvgSizeByte(10)
.setCount(row)
.setNdv(10000)
.setMinExpr(new IntLiteral(0))
.setMinValue(0)
.setMaxExpr(new IntLiteral(tenB))
.setMaxValue(tenB)
.build();
Statistics stats = new StatisticsBuilder()
.setRowCount(row)
.putColumnStatistics(a, colStats)
.build();
Expression less = new LessThan(a, new IntegerLiteral(50000));
FilterEstimation estimation = new FilterEstimation();
Statistics out = estimation.estimate(less, stats);
Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD);

Expression greater = new GreaterThan(a, new BigIntLiteral(tenB - 5000L));
out = estimation.estimate(greater, stats);
Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD);
}
}

0 comments on commit d580a0a

Please sign in to comment.