Skip to content

Commit

Permalink
[opt](nereids) refine operator estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 24, 2024
1 parent e4d4e91 commit cc5f410
Showing 1 changed file with 15 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation
leftStats.normalizeColumnStatistics(context.statistics.getRowCount(), true);
Statistics andStats = rightExpr.accept(this, new EstimationContext(leftStats));
if (predicate instanceof And) {
andStats.normalizeColumnStatistics();
// TODO: this will cause estimation change
//andStats.normalizeColumnStatistics();
return andStats;
} else if (predicate instanceof Or) {
Statistics rightStats = rightExpr.accept(this, context);
Expand Down Expand Up @@ -344,7 +345,8 @@ private Statistics estimateColumnEqualToConstant(ComparisonPredicate cp, ColumnS
if (!(left instanceof SlotReference)) {
left.accept(new ColumnStatsAdjustVisitor(), equalStats);
}
equalStats.normalizeColumnStatistics();
// TODO: normalizeColumnStatistics() will have problem after ColumnStatsAdjustVisitor
//equalStats.normalizeColumnStatistics();
return equalStats;
}

Expand Down Expand Up @@ -605,6 +607,9 @@ private Statistics estimateColumnToConstantRange(Expression leftExpr, DataType d
Statistics updatedStatistics;

StatisticRange intersectRange = leftRange.intersect(rightRange);
double sel = leftRange.getDistinctValues() == 0
? 1.0
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
if (intersectRange.isEmpty()) {
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(Double.NEGATIVE_INFINITY)
Expand All @@ -614,35 +619,34 @@ private Statistics estimateColumnToConstantRange(Expression leftExpr, DataType d
.setNdv(0)
.setNumNulls(0);
updatedStatistics = context.statistics.withRowCount(0);
} else if (dataType instanceof RangeScalable) {
} else if (dataType instanceof RangeScalable || sel == 0 || sel == 1) {
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(intersectRange.getLow())
.setMinExpr(intersectRange.getLowExpr())
.setMaxValue(intersectRange.getHigh())
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues())
.setNumNulls(0);
double sel = leftRange.getDistinctValues() == 0
? 1.0
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
sel = Math.max(sel, RANGE_SELECTIVITY_THRESHOLD);
sel = getNotNullSelectivity(leftStats.numNulls, context.statistics.getRowCount(), leftStats.ndv, sel);
updatedStatistics = context.statistics.withSel(sel);
} else {
double sel = DEFAULT_INEQUALITY_COEFFICIENT;
sel = DEFAULT_INEQUALITY_COEFFICIENT;
sel = getNotNullSelectivity(leftStats.numNulls, context.statistics.getRowCount(), leftStats.ndv, sel);
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(intersectRange.getLow())
.setMinExpr(intersectRange.getLowExpr())
.setMaxValue(intersectRange.getHigh())
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(Math.max(leftStats.ndv * sel, 1))
.setNdv(Math.max(1, Math.min(leftStats.ndv * sel, intersectRange.getDistinctValues())))
.setNumNulls(0);
updatedStatistics = context.statistics.withSel(sel);
}
updatedStatistics.addColumnStats(leftExpr, leftColumnStatisticBuilder.build());
context.addKeyIfSlot(leftExpr);
leftExpr.accept(new ColumnStatsAdjustVisitor(), updatedStatistics);
updatedStatistics.normalizeColumnStatistics();
// TODO: normalizeColumnStatistics() will have problem after ColumnStatsAdjustVisitor
//updatedStatistics.normalizeColumnStatistics();

return updatedStatistics;
}
Expand All @@ -660,7 +664,7 @@ private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatis
&& !leftStats.isMinMaxInvalid() && !rightStats.isMinMaxInvalid()) {
intersectBuilder.setNdv(intersect.getDistinctValues());
} else {
// intersect 's ndv uses min ndv but selectivity computing use the max ndv
// intersect ndv uses min ndv but selectivity computing use the max
intersectBuilder.setNdv(Math.min(leftStats.ndv, rightStats.ndv));
}
double numNull = keepNull ? Math.min(leftStats.numNulls, rightStats.numNulls) : 0;
Expand Down Expand Up @@ -693,7 +697,7 @@ private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStati
StatisticRange intersect = leftRange.intersect(rightRange);

if (leftExpr.getDataType() instanceof RangeScalable && rightExpr.getDataType() instanceof RangeScalable
&& !leftStats.isMinMaxInvalid() && !rightStats.isMinMaxInvalid()) {
&& !leftStats.isMinMaxInvalid() && !rightStats.isMinMaxInvalid()) {
// TODO: use intersect interface to refine this to avoid this kind of left-dominating style
Statistics statistics;
// Left always less than Right
Expand Down

0 comments on commit cc5f410

Please sign in to comment.