Skip to content

Commit

Permalink
[opt](nereids) refine operator estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 14, 2024
1 parent a5f5f76 commit b8a92af
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ public ColumnStatistic visitToDate(ToDate toDate, Statistics context) {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats)
.setAvgSizeByte(toDate.getDataType().width())
.setDataSize(toDate.getDataType().width() * context.getRowCount());
if (childColumnStats.minOrMaxIsInf()) {
if (childColumnStats.isMinMaxInvalid()) {
return columnStatisticBuilder.build();
}
double minValue;
Expand Down Expand Up @@ -611,7 +611,7 @@ public ColumnStatistic visitToDays(ToDays toDays, Statistics context) {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(childColumnStats)
.setAvgSizeByte(toDays.getDataType().width())
.setDataSize(toDays.getDataType().width() * context.getRowCount());
if (childColumnStats.minOrMaxIsInf()) {
if (childColumnStats.isMinMaxInvalid()) {
return columnStatisticBuilder.build();
}
double minValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,20 +360,14 @@ private Statistics estimateColumnToColumn(ComparisonPredicate cp, EstimationCont
}
}

@Override
public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext context) {
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics);
if (compareExprStats.isUnKnown || compareExpr instanceof Function) {
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
}
private ColumnStatistic updateInPredicateColumnStatistics(InPredicate inPredicate, EstimationContext context,
ColumnStatistic compareExprStats) {
List<Expression> options = inPredicate.getOptions();
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatisticBuilder compareExprStatsBuilder = new ColumnStatisticBuilder(compareExprStats);
double selectivity = 1.0;

// for string type, not use min/max to calc selectivity; other type can use them.
DataType compareColumnType = compareExpr.getDataType();
if (compareColumnType.isMinMaxSensitiveType()) {

if (compareColumnType instanceof RangeScalable && !compareExprStats.isMinMaxInvalid()) {
// init minOption and maxOption by compareExpr.max and compareExpr.min respectively,
// and then adjust min/max by options
double minOptionValue = compareExprStats.maxValue;
Expand Down Expand Up @@ -428,9 +422,6 @@ A not in (1, 2, 3, 100):
int newNdv = nonLiteralOptionCount + validInOptCount;
if (newNdv < compareExprStats.ndv) {
compareExprStatsBuilder.setNdv(newNdv);
selectivity = StatsMathUtil.divide(newNdv, compareExprStats.ndv);
} else {
selectivity = 1.0;
}
} else {
maxOptionValue = Math.min(maxOptionValue, compareExprStats.maxValue);
Expand All @@ -441,25 +432,42 @@ A not in (1, 2, 3, 100):
compareExprStatsBuilder.setMinExpr(minOptionLiteral);
if (validInOptCount < compareExprStats.ndv) {
compareExprStatsBuilder.setNdv(validInOptCount);
selectivity = StatsMathUtil.divide(validInOptCount, compareExprStats.ndv);
} else {
selectivity = 1.0;
}
}
} else {
// other types, such as string type, using option's size to estimate
// min/max will not be updated
selectivity = Math.min(options.size() / compareExprStats.getOriginalNdv(), 1);
selectivity = Statistics.getValidSelectivity(selectivity);
compareExprStatsBuilder.setNdv(Math.min(options.size(), compareExprStats.getOriginalNdv()));
}

compareExprStatsBuilder.setNumNulls(0);
return compareExprStatsBuilder.build();
}

@Override
public Statistics visitInPredicate(InPredicate inPredicate, EstimationContext context) {
Expression compareExpr = inPredicate.getCompareExpr();
ColumnStatistic compareExprStats = ExpressionEstimation.estimate(compareExpr, context.statistics);
if (compareExprStats.isUnKnown || compareExpr instanceof Function) {
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
}

DataType compareColumnType = compareExpr.getDataType();
List<Expression> options = inPredicate.getOptions();
double selectivity;
ColumnStatistic newCompareExprStats = updateInPredicateColumnStatistics(inPredicate, context, compareExprStats);
// TODO: need to calc nonNullSelectivity
if (compareColumnType instanceof RangeScalable && !newCompareExprStats.isMinMaxInvalid()) {
selectivity = Statistics.getValidSelectivity(
Math.min(StatsMathUtil.divide(newCompareExprStats.ndv, compareExprStats.ndv), 1));
} else {
selectivity = Statistics.getValidSelectivity(
Math.min(options.size() / compareExprStats.getOriginalNdv(), 1));
}

Statistics estimated = new StatisticsBuilder(context.statistics).build();
ColumnStatistic stats = compareExprStatsBuilder.build();
selectivity = getNotNullSelectivity(stats, selectivity);
selectivity = getNotNullSelectivity(newCompareExprStats, selectivity);
estimated = estimated.withSel(selectivity);
estimated.addColumnStats(compareExpr, stats);
estimated.addColumnStats(compareExpr, newCompareExprStats);
context.addKeyIfSlot(compareExpr);
return estimated;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,6 @@ public boolean isObjectType() {
return isHllType() || isBitmapType() || isQuantileStateType();
}

public boolean isMinMaxSensitiveType() {
return isNumericType() || isDateLikeType() || isTimeLikeType();
}

public DataType conversion() {
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ public static ColumnStatistic fromJson(String statJson) {
);
}

public boolean minOrMaxIsInf() {
public boolean isMinMaxInvalid() {
return Double.isInfinite(maxValue) || Double.isInfinite(minValue);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ public StatisticRange union(StatisticRange other) {
private double overlappingDistinctValues(StatisticRange other) {
double overlapDistinctValuesLeft;
double overlapDistinctValuesRight;
// FIXME: what does it mean?
if (this.isBothInfinite() && other.isOneSideInfinite()) {
if (this.isInfinite() || other.isInfinite()) {
overlapDistinctValuesRight = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(other.low - other.high) < 1e-6) {
// other is constant
Expand All @@ -207,7 +206,7 @@ private double overlappingDistinctValues(StatisticRange other) {
overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues;
}

if (other.isBothInfinite() && this.isOneSideInfinite()) {
if (other.isInfinite() || this.isInfinite()) {
overlapDistinctValuesLeft = distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
} else if (Math.abs(this.low - this.high) < 1e-6) {
overlapDistinctValuesLeft = distinctValues;
Expand Down

0 comments on commit b8a92af

Please sign in to comment.