Skip to content

Commit

Permalink
[opt](nereids)stats derive for min()/max() agg function (apache#40126)
Browse files Browse the repository at this point in the history
## Proposed changes
the column stats for min(A) agg function is estimated as column stats of A.
in current version, min(A).ndv is 1, this is error-prone.

Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
englefly authored Sep 6, 2024
1 parent 7f744ec commit 744a1ec
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,9 @@ public ColumnStatistic visitMin(Min min, Statistics context) {
if (columnStat.isUnKnown) {
return ColumnStatistic.UNKNOWN;
}
/*
we keep columnStat.min and columnStat.max, but set ndv=1.
if there is group-by keys, we will update count when visiting group clause
*/
double width = min.child().getDataType().width();
return new ColumnStatisticBuilder().setCount(1).setNdv(1).setAvgSizeByte(width)
.setMinValue(columnStat.minValue).setMinExpr(columnStat.minExpr)
.setMaxValue(columnStat.maxValue).setMaxExpr(columnStat.maxExpr).build();
// if this is scalar agg, we will update count and ndv to 1 when visiting group clause
return new ColumnStatisticBuilder(columnStat)
.build();
}

@Override
Expand All @@ -359,14 +354,8 @@ public ColumnStatistic visitMax(Max max, Statistics context) {
if (columnStat.isUnKnown) {
return ColumnStatistic.UNKNOWN;
}
/*
we keep columnStat.min and columnStat.max, but set ndv=1.
if there is group-by keys, we will update count when visiting group clause
*/
int width = max.child().getDataType().width();
return new ColumnStatisticBuilder().setCount(1D).setNdv(1D).setAvgSizeByte(width)
.setMinValue(columnStat.minValue).setMinExpr(columnStat.minExpr)
.setMaxValue(columnStat.maxValue).setMaxExpr(columnStat.maxExpr)
// if this is scalar agg, we will update count and ndv to 1 when visiting group clause
return new ColumnStatisticBuilder(columnStat)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public void test1() {
ColumnStatistic estimated = ExpressionEstimation.estimate(max, stat);
Assertions.assertEquals(0, estimated.minValue);
Assertions.assertEquals(500, estimated.maxValue);
Assertions.assertEquals(1, estimated.ndv);
Assertions.assertEquals(500, estimated.ndv);
}

// MIN(a)
Expand All @@ -95,7 +95,7 @@ public void test2() {
ColumnStatistic estimated = ExpressionEstimation.estimate(max, stat);
Assertions.assertEquals(0, estimated.minValue);
Assertions.assertEquals(1000, estimated.maxValue);
Assertions.assertEquals(1, estimated.ndv);
Assertions.assertEquals(500, estimated.ndv);
}

// a + b
Expand Down

0 comments on commit 744a1ec

Please sign in to comment.