Skip to content

Commit

Permalink
set lower bound for range-selectivity
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Aug 29, 2024
1 parent d7eb343 commit 547828d
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@
*/
public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationContext> {
public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
// "Range selectivity is prone to producing outliers, so we add this threshold limit.
// The threshold estimation is calculated based on selecting one month out of fifty years."
public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016;
public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0;

public static final double DEFAULT_LIKE_COMPARISON_SELECTIVITY = 0.2;
Expand Down Expand Up @@ -600,6 +603,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
double sel = leftRange.overlapPercentWith(rightRange);
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
} else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
sel = RANGE_SELECTIVITY_THRESHOLD;
}
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
Expand Down Expand Up @@ -1365,4 +1366,32 @@ public void testStringRangeColToCol() {
Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, c), baseStats);
Assertions.assertEquals(50, agrtc.getRowCount());
}

@Test
public void testLargeRange() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
long tenB = 1000000000;
long row = 1600000000;
ColumnStatistic colStats = new ColumnStatisticBuilder()
.setAvgSizeByte(10)
.setCount(row)
.setNdv(10000)
.setMinExpr(new IntLiteral(0))
.setMinValue(0)
.setMaxExpr(new IntLiteral(tenB))
.setMaxValue(tenB)
.build();
Statistics stats = new StatisticsBuilder()
.setRowCount(row)
.putColumnStatistics(a, colStats)
.build();
Expression less = new LessThan(a, new IntegerLiteral(50000));
FilterEstimation estimation = new FilterEstimation();
Statistics out = estimation.estimate(less, stats);
Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD);

Expression greater = new GreaterThan(a, new BigIntLiteral(tenB - 5000L));
out = estimation.estimate(greater, stats);
Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD);
}
}

0 comments on commit 547828d

Please sign in to comment.