Skip to content

Commit

Permalink
[opt](nereids) refine operator estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 12, 2024
1 parent fcab5cf commit 3a61c2b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,10 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation
Expression leftExpr = predicate.child(0);
Expression rightExpr = predicate.child(1);
Statistics leftStats = leftExpr.accept(this, context);
leftStats = leftStats.normalizeByRatio(context.statistics.getRowCount());
Statistics andStats = rightExpr.accept(this,
new EstimationContext(leftStats));
leftStats = leftStats.normalizeColumnStatistics(context.statistics.getRowCount());
Statistics andStats = rightExpr.accept(this, new EstimationContext(leftStats));
if (predicate instanceof And) {
return andStats;
return andStats.normalizeColumnStatistics(andStats.getRowCount());
} else if (predicate instanceof Or) {
Statistics rightStats = rightExpr.accept(this, context);
double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount();
Expand Down Expand Up @@ -540,15 +539,17 @@ public Statistics visitIsNull(IsNull isNull, EstimationContext context) {
double row = context.statistics.getRowCount() * DEFAULT_ISNULL_SELECTIVITY;
return new StatisticsBuilder(context.statistics).setRowCount(row).build();
}
double outputRowCount = childColStats.numNulls;
double childOutputRowCount = context.statistics.getRowCount();
double outputRowCount = Math.min(childColStats.numNulls, childOutputRowCount);
if (!isOnBaseTable) {
// for is null on base table, use the numNulls, otherwise
// nulls will be generated such as outer join and then we do a protection
Expression child = isNull.child();
Statistics childStats = child.accept(this, context);
outputRowCount = Math.max(childStats.getRowCount() * DEFAULT_ISNULL_SELECTIVITY, outputRowCount);
outputRowCount = Math.max(outputRowCount, 1);
}
outputRowCount = Math.max(outputRowCount, 1);

ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(childColStats);
colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount)
.setMaxValue(Double.POSITIVE_INFINITY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,17 +272,18 @@ public int getWidthInJoinCluster() {
return widthInJoinCluster;
}

public Statistics normalizeByRatio(double originRowCount) {
if (rowCount >= originRowCount || rowCount <= 0) {
// TODO: should do this action at each visitor
public Statistics normalizeColumnStatistics(double originRowCount) {
if (rowCount >= originRowCount || rowCount <= 0
|| expressionToColumnStats.values().stream().anyMatch(ColumnStatistic::isUnKnown)) {
return this;
}
StatisticsBuilder builder = new StatisticsBuilder(this);
double ratio = rowCount / originRowCount;
for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) {
ColumnStatistic colStats = entry.getValue();
if (colStats.numNulls != 0 || colStats.ndv > rowCount) {
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(colStats);
colStatsBuilder.setNumNulls(colStats.numNulls * ratio);
colStatsBuilder.setNumNulls(colStats.numNulls * rowCount / originRowCount);
colStatsBuilder.setNdv(Math.min(rowCount - colStatsBuilder.getNumNulls(), colStats.ndv));
builder.putColumnStatistics(entry.getKey(), colStatsBuilder.build());
}
Expand Down

0 comments on commit 3a61c2b

Please sign in to comment.