Skip to content

Commit

Permalink
1.refactor statistics functions withSel/updateRowCountOnly/withRowCou…
Browse files Browse the repository at this point in the history
…nt, 2. donot use Double.MAX in estimation
  • Loading branch information
englefly committed Sep 28, 2023
1 parent fd47209 commit c4fe501
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public void execute() {
// child group's row count unchanged when the parent group expression is a project operation.
double parentRowCount = groupExpression.getOwnerGroup().getStatistics().getRowCount();
groupExpression.children().forEach(g -> g.setStatistics(
g.getStatistics().updateRowCountAndColStats(parentRowCount))
g.getStatistics().withRowCountAndEnforceValid(parentRowCount))
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public ColumnStatistic visitIf(If function, Statistics context) {
return new ColumnStatisticBuilder()
.setNdv(2)
.setMinValue(0)
.setMaxValue(Double.MAX_VALUE)
.setMaxValue(Double.POSITIVE_INFINITY)
.setAvgSizeByte(8)
.setNumNulls(0)
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public Statistics estimate(Expression expression, Statistics statistics) {

@Override
public Statistics visit(Expression expr, EstimationContext context) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT, false);
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}

@Override
Expand All @@ -104,7 +104,7 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation
} else if (predicate instanceof Or) {
Statistics rightStats = rightExpr.accept(this, context);
double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount();
Statistics orStats = context.statistics.setRowCount(rowCount);
Statistics orStats = context.statistics.withRowCount(rowCount);
Set<Slot> leftInputSlots = leftExpr.getInputSlots();
Set<Slot> rightInputSlots = rightExpr.getInputSlots();
for (Slot slot : context.keyColumns) {
Expand Down Expand Up @@ -161,7 +161,7 @@ public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationCon
double rowCount = context.statistics.getRowCount();
double newRowCount = Math.max(rowCount * DEFAULT_HAVING_COEFFICIENT,
Math.max(statsForLeft.ndv, statsForRight.ndv));
return context.statistics.setRowCount(newRowCount);
return context.statistics.withRowCount(newRowCount);
}
}
if (!left.isConstant() && !right.isConstant()) {
Expand Down Expand Up @@ -204,7 +204,7 @@ private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic
private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context) {
if (statsForLeft.isUnKnown) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT, false);
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}

if (cp instanceof EqualTo || cp instanceof NullSafeEqual) {
Expand Down Expand Up @@ -238,7 +238,7 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats
return estimateEqualToWithHistogram(cp.left(), statsForLeft, val, context);
}

Statistics equalStats = context.statistics.withSel(selectivity, false);
Statistics equalStats = context.statistics.withSel(selectivity);
Expression left = cp.left();
equalStats.addColumnStats(left, statsForRight);
context.addKeyIfSlot(left);
Expand Down Expand Up @@ -269,7 +269,7 @@ public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext co
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics);
if (compareExprStats.isUnKnown || compareExpr instanceof Function) {
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT, false);
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
}
List<Expression> options = inPredicate.getOptions();
// init minOption and maxOption by compareExpr.max and compareExpr.min respectively,
Expand Down Expand Up @@ -345,7 +345,7 @@ A not in (1, 2, 3, 100):
}
}
Statistics estimated = new Statistics(context.statistics);
estimated = estimated.withSel(selectivity, false);
estimated = estimated.withSel(selectivity);
estimated.addColumnStats(compareExpr,
compareExprStatsBuilder.build());
context.addKeyIfSlot(compareExpr);
Expand Down Expand Up @@ -465,7 +465,7 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnSta
ColumnStatisticBuilder leftColumnStatisticBuilder;
Statistics updatedStatistics;
if (intersectRange.isEmpty()) {
updatedStatistics = context.statistics.setRowCount(0);
updatedStatistics = context.statistics.withRowCount(0);
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMinExpr(null)
Expand All @@ -481,7 +481,7 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnSta
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues());
double sel = leftRange.overlapPercentWith(rightRange);
updatedStatistics = context.statistics.withSel(sel, false);
updatedStatistics = context.statistics.withSel(sel);
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
}
updatedStatistics.addColumnStats(leftExpr, leftColumnStatisticBuilder.build());
Expand All @@ -501,7 +501,7 @@ private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatis
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel, false);
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
updatedStatistics.addColumnStats(rightExpr, intersectBuilder.build());
context.addKeyIfSlot(leftExpr);
Expand All @@ -517,7 +517,7 @@ private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStati
// Left always less than Right
if (leftRange.getHigh() < rightRange.getLow()) {
statistics =
context.statistics.setRowCount(Math.min(context.statistics.getRowCount() - leftStats.numNulls,
context.statistics.withRowCount(Math.min(context.statistics.getRowCount() - leftStats.numNulls,
context.statistics.getRowCount() - rightStats.numNulls));
statistics.addColumnStats(leftExpr, new ColumnStatisticBuilder(leftStats).setNumNulls(0.0).build());
statistics.addColumnStats(rightExpr, new ColumnStatisticBuilder(rightStats).setNumNulls(0.0).build());
Expand All @@ -528,7 +528,7 @@ private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStati
double leftOverlapPercent = leftRange.overlapPercentWith(rightRange);
// Left always greater than right
if (leftOverlapPercent == 0) {
return context.statistics.setRowCount(0.0);
return context.statistics.withRowCount(0.0);
}
StatisticRange leftAlwaysLessThanRightRange = new StatisticRange(leftStats.minValue, leftStats.minExpr,
rightStats.minValue, rightStats.minExpr, Double.NaN, leftExpr.getDataType());
Expand Down Expand Up @@ -561,7 +561,7 @@ private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStati
+ leftOverlapPercent * rightAlwaysGreaterRangeFraction;
context.addKeyIfSlot(leftExpr);
context.addKeyIfSlot(rightExpr);
return context.statistics.withSel(sel, false)
return context.statistics.withSel(sel)
.addColumnStats(leftExpr, leftColumnStatistic)
.addColumnStats(rightExpr, rightColumnStatistic);
}
Expand Down Expand Up @@ -595,10 +595,10 @@ private Statistics estimateLessThanLiteralWithHistogram(Expression leftExpr, Col
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0, false);
return context.statistics.withSel(0);
}

private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
Expand Down Expand Up @@ -632,10 +632,10 @@ private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr,
.setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build())
.build();
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}
}
return context.statistics.withSel(0, false);
return context.statistics.withSel(0);
}

private Statistics estimateEqualToWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
Expand All @@ -660,7 +660,7 @@ private Statistics estimateEqualToWithHistogram(Expression leftExpr, ColumnStati
.setMinValue(numVal)
.build();
context.addKeyIfSlot(leftExpr);
return context.statistics.withSel(sel, false).addColumnStats(leftExpr, columnStatistic);
return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ private static Statistics estimateHashJoin(Statistics leftStats, Statistics righ
outputRowCount = Math.max(1, outputRowCount * ratio.get());
}
}
innerJoinStats = crossJoinStats.updateRowCountAndColStats(outputRowCount);
innerJoinStats = crossJoinStats.withRowCountAndEnforceValid(outputRowCount);
return innerJoinStats;
}

Expand Down Expand Up @@ -257,7 +257,7 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri
double baseRowCount =
join.getJoinType().isLeftSemiOrAntiJoin() ? leftStats.getRowCount() : rightStats.getRowCount();
rowCount = Math.min(innerJoinStats.getRowCount(), baseRowCount);
return innerJoinStats.withRowCount(rowCount);
return innerJoinStats.withRowCountAndEnforceValid(rowCount);
} else {
StatisticsBuilder builder;
double originalRowCount = leftStats.getRowCount();
Expand All @@ -271,7 +271,7 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri
originalRowCount = rightStats.getRowCount();
}
Statistics outputStats = builder.build();
outputStats.fix(rowCount, originalRowCount);
outputStats.enforceValid();
return outputStats;
}
}
Expand All @@ -291,15 +291,15 @@ public static Statistics estimate(Statistics leftStats, Statistics rightStats, J
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
double rowCount = Math.max(leftStats.getRowCount(), innerJoinStats.getRowCount());
rowCount = Math.max(leftStats.getRowCount(), rowCount);
return innerJoinStats.withRowCount(rowCount);
return innerJoinStats.withRowCountAndEnforceValid(rowCount);
} else if (joinType == JoinType.RIGHT_OUTER_JOIN) {
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
double rowCount = Math.max(rightStats.getRowCount(), innerJoinStats.getRowCount());
rowCount = Math.max(rowCount, rightStats.getRowCount());
return innerJoinStats.withRowCount(rowCount);
return innerJoinStats.withRowCountAndEnforceValid(rowCount);
} else if (joinType == JoinType.FULL_OUTER_JOIN) {
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
return innerJoinStats.withRowCount(leftStats.getRowCount()
return innerJoinStats.withRowCountAndEnforceValid(leftStats.getRowCount()
+ rightStats.getRowCount() + innerJoinStats.getRowCount());
} else if (joinType == JoinType.CROSS_JOIN) {
return new StatisticsBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ public Statistics visitPhysicalGenerate(PhysicalGenerate<? extends Plan> generat

private Statistics computeAssertNumRows(long desiredNumOfRows) {
Statistics statistics = groupExpression.childStatistics(0);
statistics.withRowCount(Math.min(1, statistics.getRowCount()));
statistics.withRowCountAndEnforceValid(Math.min(1, statistics.getRowCount()));
return statistics;
}

Expand Down Expand Up @@ -657,7 +657,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {

private Statistics computeTopN(TopN topN) {
Statistics stats = groupExpression.childStatistics(0);
return stats.withRowCount(Math.min(stats.getRowCount(), topN.getLimit()));
return stats.withRowCountAndEnforceValid(Math.min(stats.getRowCount(), topN.getLimit()));
}

private Statistics computePartitionTopN(PartitionTopN partitionTopN) {
Expand Down Expand Up @@ -690,12 +690,12 @@ private Statistics computePartitionTopN(PartitionTopN partitionTopN) {
// TODO: for the filter push down window situation, we will prune the row count twice
// because we keep the pushed down filter. And it will be calculated twice, one of them in 'PartitionTopN'
// and the other is in 'Filter'. It's hard to dismiss.
return childStats.updateRowCountAndColStats(rowCount);
return childStats.withRowCountAndEnforceValid(rowCount);
}

private Statistics computeLimit(Limit limit) {
Statistics stats = groupExpression.childStatistics(0);
return stats.withRowCount(Math.min(stats.getRowCount(), limit.getLimit()));
return stats.withRowCountAndEnforceValid(Math.min(stats.getRowCount(), limit.getLimit()));
}

private double estimateGroupByRowCount(List<Expression> groupByExpressions, Statistics childStats) {
Expand Down Expand Up @@ -878,7 +878,7 @@ private Statistics computeIntersect(SetOperation setOperation) {
for (int i = 1; i < setOperation.getArity(); ++i) {
rowCount = Math.min(rowCount, groupExpression.childStatistics(i).getRowCount());
}
double minProd = Double.MAX_VALUE;
double minProd = Double.POSITIVE_INFINITY;
for (Group group : groupExpression.children()) {
Statistics statistics = group.getStatistics();
double prod = 1.0;
Expand All @@ -896,7 +896,7 @@ private Statistics computeIntersect(SetOperation setOperation) {
leftChildStats.addColumnStats(outputs.get(i),
leftChildStats.findColumnStatistics(leftChildOutputs.get(i)));
}
return leftChildStats.withRowCount(rowCount);
return leftChildStats.withRowCountAndEnforceValid(rowCount);
}

private Statistics computeGenerate(Generate generate) {
Expand All @@ -910,8 +910,8 @@ private Statistics computeGenerate(Generate generate) {
for (Slot output : generate.getGeneratorOutput()) {
ColumnStatistic columnStatistic = new ColumnStatisticBuilder()
.setCount(count)
.setMinValue(Double.MAX_VALUE)
.setMaxValue(Double.MIN_VALUE)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMaxValue(Double.POSITIVE_INFINITY)
.setNdv(count)
.setNumNulls(0)
.setAvgSizeByte(output.getDataType().width())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,21 +177,21 @@ public static ColumnStatistic fromResultRow(ResultRow row) {
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} min value {}.", col, min, e);
columnStatisticBuilder.setMinValue(Double.MIN_VALUE);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
} else {
columnStatisticBuilder.setMinValue(Double.MIN_VALUE);
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
}
if (max != null && !max.equalsIgnoreCase("NULL")) {
try {
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
} catch (AnalysisException e) {
LOG.warn("Failed to deserialize column {} max value {}.", col, max, e);
columnStatisticBuilder.setMaxValue(Double.MAX_VALUE);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
} else {
columnStatisticBuilder.setMaxValue(Double.MAX_VALUE);
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
}
columnStatisticBuilder.setUpdatedTime(row.get(13));
return columnStatisticBuilder.build();
Expand Down
Loading

0 comments on commit c4fe501

Please sign in to comment.