Skip to content

Commit

Permalink
[opt](nereids) refine operator estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 12, 2024
1 parent e20030c commit 2a258f5
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,42 +171,32 @@ public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationCon
ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left, context.statistics);
ColumnStatistic statsForRight = ExpressionEstimation.estimate(right, context.statistics);
if (!left.isConstant() && !right.isConstant()) {
return calculateWhenBothColumn(cp, context, statsForLeft, statsForRight);
return estimateColumnToColumn(cp, context, statsForLeft, statsForRight);
} else {
// For literal, it's max min is same value.
return calculateWhenLiteralRight(cp,
statsForLeft,
statsForRight,
context);
return estimateColumnToConstant(cp, statsForLeft, statsForRight, context);
}
}

private Statistics updateLessThanLiteral(Expression leftExpr, DataType dataType, ColumnStatistic statsForLeft,
ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, statsForLeft.minExpr,
statsForRight.maxValue, statsForRight.maxExpr,
statsForLeft.ndv, dataType);
return estimateBinaryComparisonFilter(leftExpr, dataType,
statsForLeft,
rightRange, context);
private Statistics estimateColumnLessThanConstant(Expression leftExpr, DataType dataType,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange constantRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
statsForRight.maxValue, statsForRight.maxExpr, 1, dataType);
return estimateColumnToConstantRange(leftExpr, dataType, statsForLeft, constantRange, context);
}

private Statistics updateGreaterThanLiteral(Expression leftExpr, DataType dataType, ColumnStatistic statsForLeft,
ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange rightRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
statsForLeft.maxValue, statsForLeft.maxExpr,
statsForLeft.ndv, dataType);
return estimateBinaryComparisonFilter(leftExpr, dataType, statsForLeft, rightRange, context);
private Statistics estimateColumnGreaterThanConstant(Expression leftExpr, DataType dataType,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange constantRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
statsForRight.maxValue, statsForRight.maxExpr, 1, dataType);
return estimateColumnToConstantRange(leftExpr, dataType, statsForLeft, constantRange, context);
}

private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
private Statistics estimateColumnToConstant(ComparisonPredicate cp,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
if (statsForLeft.isUnKnown) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}

if (cp instanceof EqualPredicate) {
return estimateEqualTo(cp, statsForLeft, statsForRight, context);
} else if (cp instanceof EqualPredicate) {
return estimateColumnEqualToConstant(cp, statsForLeft, statsForRight, context);
} else {
// literal Map used to covert dateLiteral back to stringLiteral
Map<DateLiteral, StringLiteral> literalMap = new HashMap<>();
Expand All @@ -229,12 +219,13 @@ private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
statsForLeftMayConverted = statsForLeftMayConvertedOpt.get();
statsForRightMayConverted = statsForRightMayConvertedOpt.get();
}

Statistics result = null;
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
result = updateLessThanLiteral(cp.left(), compareType, statsForLeftMayConverted,
result = estimateColumnLessThanConstant(cp.left(), compareType, statsForLeftMayConverted,
statsForRightMayConverted, context);
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
result = updateGreaterThanLiteral(cp.left(), compareType, statsForLeftMayConverted,
result = estimateColumnGreaterThanConstant(cp.left(), compareType, statsForLeftMayConverted,
statsForRightMayConverted, context);
} else {
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
Expand Down Expand Up @@ -315,7 +306,7 @@ private Optional<DateLiteral> tryConvertStrLiteralToDateLiteral(LiteralExpr lite
return dt == null ? Optional.empty() : Optional.of(dt);
}

private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic statsForLeft,
private Statistics estimateColumnEqualToConstant(ComparisonPredicate cp, ColumnStatistic statsForLeft,
ColumnStatistic statsForRight,
EstimationContext context) {
double selectivity;
Expand Down Expand Up @@ -351,21 +342,20 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats
return equalStats;
}

private Statistics calculateWhenBothColumn(ComparisonPredicate cp, EstimationContext context,
private Statistics estimateColumnToColumn(ComparisonPredicate cp, EstimationContext context,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight) {
Expression left = cp.left();
Expression right = cp.right();
if (cp instanceof EqualPredicate) {
return estimateColumnEqualToColumn(left, statsForLeft, right, statsForRight,
cp instanceof NullSafeEqual, context);
}
if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
return estimateColumnLessThanColumn(right, statsForRight, left, statsForLeft, context);
}
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
} else if (cp instanceof LessThan || cp instanceof LessThanEqual) {
return estimateColumnLessThanColumn(left, statsForLeft, right, statsForRight, context);
} else {
return context.statistics;
}
return context.statistics;
}

@Override
Expand Down Expand Up @@ -580,15 +570,14 @@ public boolean isKeySlot(Expression expr) {
}
}

private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType dataType, ColumnStatistic leftStats,
private Statistics estimateColumnToConstantRange(Expression leftExpr, DataType dataType, ColumnStatistic leftStats,
StatisticRange rightRange, EstimationContext context) {
StatisticRange leftRange =
new StatisticRange(leftStats.minValue, leftStats.minExpr, leftStats.maxValue, leftStats.maxExpr,
leftStats.ndv, dataType);
StatisticRange intersectRange = leftRange.cover(rightRange);

StatisticRange leftRange = new StatisticRange(leftStats.minValue, leftStats.minExpr,
leftStats.maxValue, leftStats.maxExpr, leftStats.ndv, dataType);
StatisticRange intersectRange = leftRange.intersect(rightRange);
ColumnStatisticBuilder leftColumnStatisticBuilder;
Statistics updatedStatistics;

if (intersectRange.isEmpty()) {
updatedStatistics = context.statistics.withRowCount(0);
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
Expand Down Expand Up @@ -629,21 +618,28 @@ private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatis
Expression rightExpr, ColumnStatistic rightStats, boolean keepNull, EstimationContext context) {
StatisticRange leftRange = StatisticRange.from(leftStats, leftExpr.getDataType());
StatisticRange rightRange = StatisticRange.from(rightStats, rightExpr.getDataType());
StatisticRange leftIntersectRight = leftRange.intersect(rightRange);
StatisticRange intersect = rightRange.intersect(leftIntersectRight);
StatisticRange intersect = leftRange.intersect(rightRange);

ColumnStatisticBuilder intersectBuilder = new ColumnStatisticBuilder(leftStats);
intersectBuilder.setNdv(intersect.getDistinctValues());
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
double numNull = 0;
if (keepNull) {
numNull = Math.min(leftStats.numNulls, rightStats.numNulls);
}
double numNull = keepNull ? Math.min(leftStats.numNulls, rightStats.numNulls) : 0;
intersectBuilder.setNumNulls(numNull);
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel, numNull);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());

double origRowCount = context.statistics.getRowCount();
double leftNotNullSel = Statistics.getValidSelectivity(1 - (leftStats.numNulls / origRowCount));
double rightNotNullSel = Statistics.getValidSelectivity(1 - (rightStats.numNulls / origRowCount));
double notNullSel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv))
* (keepNull ? 1 : leftNotNullSel * rightNotNullSel);

Statistics updatedStatistics = context.statistics.withSel(notNullSel, numNull);
ColumnStatistic newLeftStatistics = intersectBuilder
.setAvgSizeByte(leftStats.avgSizeByte).build();
ColumnStatistic newRightStatistics = intersectBuilder
.setAvgSizeByte(rightStats.avgSizeByte).build();
updatedStatistics.addColumnStats(leftExpr, newLeftStatistics);
updatedStatistics.addColumnStats(rightExpr, newRightStatistics);

context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,9 @@ public StatisticRange intersect(StatisticRange other) {
double newHigh = smallerHigh.first;
LiteralExpr newHighExpr = smallerHigh.second;
if (newLow <= newHigh) {
double distinctValues = overlappingDistinctValues(other);
return new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr,
overlappingDistinctValues(other), dataType);
distinctValues, dataType);
}
return empty(dataType);
}
Expand All @@ -178,33 +179,6 @@ public Pair<Double, LiteralExpr> maxPair(double r1, LiteralExpr e1, double r2, L
return Pair.of(r2, e2);
}

public StatisticRange cover(StatisticRange other) {
StatisticRange resultRange;
Pair<Double, LiteralExpr> biggerLow = maxPair(low, lowExpr, other.low, other.lowExpr);
double newLow = biggerLow.first;
LiteralExpr newLowExpr = biggerLow.second;
Pair<Double, LiteralExpr> smallerHigh = minPair(high, highExpr, other.high, other.highExpr);
double newHigh = smallerHigh.first;
LiteralExpr newHighExpr = smallerHigh.second;

if (newLow <= newHigh) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double coveredDistinctValues = minExcludeNaN(distinctValues, overlapDistinctValuesLeft);
if (this.isBothInfinite() && other.isOneSideInfinite()) {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr,
distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR,
dataType);
} else {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues,
dataType);
}
} else {
resultRange = empty(dataType);
}
return resultRange;
}

public StatisticRange union(StatisticRange other) {
double overlapPercentThis = this.overlapPercentWith(other);
double overlapPercentOther = other.overlapPercentWith(this);
Expand All @@ -220,10 +194,28 @@ public StatisticRange union(StatisticRange other) {
}

private double overlappingDistinctValues(StatisticRange other) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapPercentOfRight = other.overlapPercentWith(this);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues;
double overlapDistinctValuesLeft;
double overlapDistinctValuesRight;
// FIXME: what does it mean?
if (this.isBothInfinite() && other.isOneSideInfinite()) {
overlapDistinctValuesRight = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(other.low - other.high) < 1e-6) {
// other is constant
overlapDistinctValuesRight = distinctValues;
} else {
double overlapPercentOfRight = other.overlapPercentWith(this);
overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues;
}

if (other.isBothInfinite() && this.isOneSideInfinite()) {
overlapDistinctValuesLeft = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(this.low - this.high) < 1e-6) {
overlapDistinctValuesLeft = distinctValues;
} else {
double overlapPercentOfLeft = this.overlapPercentWith(other);
overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
}

return minExcludeNaN(overlapDistinctValuesLeft, overlapDistinctValuesRight);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,12 @@ public Statistics withSel(double sel) {
return withSel(sel, 0);
}

public Statistics withSel(double sel, double numNull) {
sel = StatsMathUtil.minNonNaN(sel, 1);
public Statistics withSel(double notNullSel, double numNull) {
notNullSel = StatsMathUtil.minNonNaN(notNullSel, 1);
if (Double.isNaN(rowCount)) {
return this;
}
double newCount = rowCount * sel + numNull;
double newCount = rowCount * notNullSel + numNull;
return new Statistics(newCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats));
}

Expand Down Expand Up @@ -236,8 +236,8 @@ public static Statistics zero(Statistics statistics) {
return zero;
}

public static double getValidSelectivity(double nullSel) {
return nullSel < 0 ? 0 : (nullSel > 1 ? 1 : nullSel);
public static double getValidSelectivity(double selectivity) {
return selectivity < 0 ? 0 : (selectivity > 1 ? 1 : selectivity);
}

/**
Expand Down

0 comments on commit 2a258f5

Please sign in to comment.