diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 2839a817700794..1628c3b7d72e3f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -67,6 +67,9 @@ */ public class FilterEstimation extends ExpressionVisitor { public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5; + // "Range selectivity is prone to producing outliers, so we add this threshold limit. + // The threshold estimation is calculated based on selecting one month out of fifty years." + public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016; public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0; public static final double DEFAULT_LIKE_COMPARISON_SELECTIVITY = 0.2; @@ -600,6 +603,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType double sel = leftRange.overlapPercentWith(rightRange); if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) { sel = DEFAULT_INEQUALITY_COEFFICIENT; + } else if (sel < RANGE_SELECTIVITY_THRESHOLD) { + sel = RANGE_SELECTIVITY_THRESHOLD; } sel = getNotNullSelectivity(leftStats, sel); updatedStatistics = context.statistics.withSel(sel); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index d7c44e082cf52e..0158dd9587c70b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.scalar.Left; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; @@ -1365,4 +1366,32 @@ public void testStringRangeColToCol() { Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, c), baseStats); Assertions.assertEquals(50, agrtc.getRowCount()); } + + @Test + public void testLargeRange() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + long tenB = 1000000000; + long row = 1600000000; + ColumnStatistic colStats = new ColumnStatisticBuilder() + .setAvgSizeByte(10) + .setCount(row) + .setNdv(10000) + .setMinExpr(new IntLiteral(0)) + .setMinValue(0) + .setMaxExpr(new IntLiteral(tenB)) + .setMaxValue(tenB) + .build(); + Statistics stats = new StatisticsBuilder() + .setRowCount(row) + .putColumnStatistics(a, colStats) + .build(); + Expression less = new LessThan(a, new IntegerLiteral(50000)); + FilterEstimation estimation = new FilterEstimation(); + Statistics out = estimation.estimate(less, stats); + Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD); + + Expression greater = new GreaterThan(a, new BigIntLiteral(tenB - 5000L)); + out = estimation.estimate(greater, stats); + Assertions.assertEquals(out.getRowCount(), row * FilterEstimation.RANGE_SELECTIVITY_THRESHOLD); + } }