diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index f46533b14843686..c592cfeb0869b2b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -580,7 +580,7 @@ public ColumnStatistic visitToDate(ToDate toDate, Statistics context) { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats) .setAvgSizeByte(toDate.getDataType().width()) .setDataSize(toDate.getDataType().width() * context.getRowCount()); - if (childColumnStats.minOrMaxIsInf()) { + if (childColumnStats.isMinMaxInvalid()) { return columnStatisticBuilder.build(); } double minValue; @@ -611,7 +611,7 @@ public ColumnStatistic visitToDays(ToDays toDays, Statistics context) { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats) .setAvgSizeByte(toDays.getDataType().width()) .setDataSize(toDays.getDataType().width() * context.getRowCount()); - if (childColumnStats.minOrMaxIsInf()) { + if (childColumnStats.isMinMaxInvalid()) { return columnStatisticBuilder.build(); } double minValue; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 36b26d74559f90d..dd02b86c9f17b1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -360,20 +360,14 @@ private Statistics estimateColumnToColumn(ComparisonPredicate cp, EstimationCont } } - @Override - public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext context) { - Expression compareExpr = inPredicate.getCompareExpr(); - ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics); - if (compareExprStats.isUnKnown || compareExpr instanceof Function) { - return context.statistics.withSel(DEFAULT_IN_COEFFICIENT); - } + private ColumnStatistic updateInPredicateColumnStatistics(InPredicate inPredicate, EstimationContext context, + ColumnStatistic compareExprStats) { List options = inPredicate.getOptions(); + Expression compareExpr = inPredicate.getCompareExpr(); ColumnStatisticBuilder compareExprStatsBuilder = new ColumnStatisticBuilder(compareExprStats); - double selectivity = 1.0; - - // for string type, not use min/max to calc selectivity; other type can use them. DataType compareColumnType = compareExpr.getDataType(); - if (compareColumnType.isMinMaxSensitiveType()) { + + if (compareColumnType instanceof RangeScalable && !compareExprStats.isMinMaxInvalid()) { // init minOption and maxOption by compareExpr.max and compareExpr.min respectively, // and then adjust min/max by options double minOptionValue = compareExprStats.maxValue; @@ -428,9 +422,6 @@ A not in (1, 2, 3, 100): int newNdv = nonLiteralOptionCount + validInOptCount; if (newNdv < compareExprStats.ndv) { compareExprStatsBuilder.setNdv(newNdv); - selectivity = StatsMathUtil.divide(newNdv, compareExprStats.ndv); - } else { - selectivity = 1.0; } } else { maxOptionValue = Math.min(maxOptionValue, compareExprStats.maxValue); @@ -441,25 +432,42 @@ A not in (1, 2, 3, 100): compareExprStatsBuilder.setMinExpr(minOptionLiteral); if (validInOptCount < compareExprStats.ndv) { compareExprStatsBuilder.setNdv(validInOptCount); - selectivity = StatsMathUtil.divide(validInOptCount, compareExprStats.ndv); - } else { - selectivity = 1.0; } } } else { // other types, such as string type, using option's size to estimate // min/max will not be updated - selectivity = Math.min(options.size() / compareExprStats.getOriginalNdv(), 1); - selectivity = Statistics.getValidSelectivity(selectivity); compareExprStatsBuilder.setNdv(Math.min(options.size(), compareExprStats.getOriginalNdv())); } - compareExprStatsBuilder.setNumNulls(0); + return compareExprStatsBuilder.build(); + } + + @Override + public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext context) { + Expression compareExpr = inPredicate.getCompareExpr(); + ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics); + if (compareExprStats.isUnKnown || compareExpr instanceof Function) { + return context.statistics.withSel(DEFAULT_IN_COEFFICIENT); + } + + DataType compareColumnType = compareExpr.getDataType(); + List options = inPredicate.getOptions(); + double selectivity; + ColumnStatistic newCompareExprStats = updateInPredicateColumnStatistics(inPredicate, context, compareExprStats); + // TODO: need to calc nonNullSelectivity + if (compareColumnType instanceof RangeScalable && !newCompareExprStats.isMinMaxInvalid()) { + selectivity = Statistics.getValidSelectivity( + Math.min(StatsMathUtil.divide(newCompareExprStats.ndv, compareExprStats.ndv), 1)); + } else { + selectivity = Statistics.getValidSelectivity( + Math.min(options.size() / compareExprStats.getOriginalNdv(), 1)); + } + Statistics estimated = new StatisticsBuilder(context.statistics).build(); - ColumnStatistic stats = compareExprStatsBuilder.build(); - selectivity = getNotNullSelectivity(stats, selectivity); + selectivity = getNotNullSelectivity(newCompareExprStats, selectivity); estimated = estimated.withSel(selectivity); - estimated.addColumnStats(compareExpr, stats); + estimated.addColumnStats(compareExpr, newCompareExprStats); context.addKeyIfSlot(compareExpr); return estimated; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java index 0062fd4a0e54cb0..0c83098bf8f1f2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java @@ -632,10 +632,6 @@ public boolean isObjectType() { return isHllType() || isBitmapType() || isQuantileStateType(); } - public boolean isMinMaxSensitiveType() { - return isNumericType() || isDateLikeType() || isTimeLikeType(); - } - public DataType conversion() { return this; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 9713d2d30e1453b..aebf758527ef673 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -356,7 +356,7 @@ public static ColumnStatistic fromJson(String statJson) { ); } - public boolean minOrMaxIsInf() { + public boolean isMinMaxInvalid() { return Double.isInfinite(maxValue) || Double.isInfinite(minValue); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticRange.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticRange.java index b7639186c403e5a..7011372302135dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticRange.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticRange.java @@ -196,8 +196,7 @@ public StatisticRange union(StatisticRange other) { private double overlappingDistinctValues(StatisticRange other) { double overlapDistinctValuesLeft; double overlapDistinctValuesRight; - // FIXME: what does it mean? - if (this.isBothInfinite() && other.isOneSideInfinite()) { + if (this.isInfinite() || other.isInfinite()) { overlapDistinctValuesRight = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR; } else if (Math.abs(other.low - other.high) < 1e-6) { // other is constant @@ -207,7 +206,7 @@ private double overlappingDistinctValues(StatisticRange other) { overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues; } - if (other.isBothInfinite() && this.isOneSideInfinite()) { + if (other.isInfinite() || this.isInfinite()) { overlapDistinctValuesLeft = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR; } else if (Math.abs(this.low - this.high) < 1e-6) { overlapDistinctValuesLeft = distinctValues;