Skip to content

Commit

Permalink
[opt](nereids) refine stats deriving
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Oct 17, 2024
1 parent 01126ea commit 97379b4
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,49 +128,60 @@ public static ColumnStatistic estimate(Expression expression, Statistics stats)

@Override
public ColumnStatistic visit(Expression expr, Statistics context) {
ColumnStatistic stats = context.findColumnStatistics(expr);
if (stats != null) {
return stats;
}
List<Expression> childrenExpr = expr.children();
if (CollectionUtils.isEmpty(childrenExpr)) {
return ColumnStatistic.UNKNOWN;
}
return expr.child(0).accept(this, context);
}

//TODO: case-when need to re-implemented
@Override
public ColumnStatistic visitCaseWhen(CaseWhen caseWhen, Statistics context) {
double ndv = caseWhen.getWhenClauses().size();
double width = 1;
if (caseWhen.getDefaultValue().isPresent()) {
ndv += 1;
}
for (WhenClause clause : caseWhen.getWhenClauses()) {
ColumnStatistic colStats = ExpressionEstimation.estimate(clause.getResult(), context);
ndv = Math.max(ndv, colStats.ndv);
width = Math.max(width, clause.getResult().getDataType().width());
}
if (caseWhen.getDefaultValue().isPresent()) {
ColumnStatistic colStats = ExpressionEstimation.estimate(caseWhen.getDefaultValue().get(), context);
ndv = Math.max(ndv, colStats.ndv);
width = Math.max(width, caseWhen.getDefaultValue().get().getDataType().width());
}
return new ColumnStatisticBuilder()
.setNdv(ndv)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMaxValue(Double.POSITIVE_INFINITY)
.setAvgSizeByte(8)
.setAvgSizeByte(width)
.setNumNulls(0)
.build();
}

@Override
public ColumnStatistic visitIf(If ifClause, Statistics context) {
double ndv = 2;
double width = 1;
ColumnStatistic colStatsThen = ExpressionEstimation.estimate(ifClause.child(1), context);
ndv = Math.max(ndv, colStatsThen.ndv);
width = Math.max(width, ifClause.child(1).getDataType().width());

ColumnStatistic colStatsElse = ExpressionEstimation.estimate(ifClause.child(2), context);
ndv = Math.max(ndv, colStatsElse.ndv);
width = Math.max(width, ifClause.child(2).getDataType().width());

return new ColumnStatisticBuilder()
.setNdv(ndv)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMaxValue(Double.POSITIVE_INFINITY)
.setAvgSizeByte(8)
.setAvgSizeByte(width)
.setNumNulls(0)
.build();
}
Expand Down Expand Up @@ -242,9 +253,9 @@ public ColumnStatistic visitLiteral(Literal literal, Statistics context) {
return new ColumnStatisticBuilder()
.setMaxValue(literalVal)
.setMinValue(literalVal)
.setNdv(1)
.setNdv(literal.isNullLiteral() ? 0 : 1)
.setNumNulls(literal.isNullLiteral() ? 1 : 0)
.setAvgSizeByte(1)
.setAvgSizeByte(literal.getDataType().width())
.setMinExpr(literal.toLegacyLiteral())
.setMaxExpr(literal.toLegacyLiteral())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ private StringType() {
super(-1);
}

@Override
public int width() {
return len;
}

@Override
public Type toCatalogDataType() {
return Type.STRING;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@
import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateV2Literal;
import org.apache.doris.nereids.trees.expressions.literal.DecimalLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
import org.apache.doris.nereids.types.DateType;
import org.apache.doris.nereids.types.DoubleType;
import org.apache.doris.nereids.types.IntegerType;
Expand All @@ -44,6 +51,7 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -357,6 +365,7 @@ public void testCaseWhen() {
CaseWhen caseWhen = new CaseWhen(whens);
ColumnStatistic est = ExpressionEstimation.estimate(caseWhen, stats);
Assertions.assertEquals(est.ndv, 100);
Assertions.assertEquals(est.avgSizeByte, 16);
}

@Test
Expand All @@ -383,5 +392,59 @@ public void testIf() {
If ifClause = new If(BooleanLiteral.TRUE, a, b);
ColumnStatistic est = ExpressionEstimation.estimate(ifClause, stats);
Assertions.assertEquals(est.ndv, 100);
Assertions.assertEquals(est.avgSizeByte, 16);
}

@Test
public void testLiteral() {
Statistics stats = new Statistics(1000, new HashMap<>());

BigIntLiteral l1 = new BigIntLiteral(1000000);
ColumnStatistic est = ExpressionEstimation.estimate(l1, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 8);
Assertions.assertEquals(est.numNulls, 0);

VarcharLiteral l2 = new VarcharLiteral("abcdefghij");
est = ExpressionEstimation.estimate(l2, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 10);
Assertions.assertEquals(est.numNulls, 0);

DoubleLiteral l3 = new DoubleLiteral(0.01);
est = ExpressionEstimation.estimate(l3, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 8);
Assertions.assertEquals(est.numNulls, 0);

DateV2Literal l4 = new DateV2Literal("2024-09-10");
est = ExpressionEstimation.estimate(l4, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 4);
Assertions.assertEquals(est.numNulls, 0);

DateTimeLiteral l5 = new DateTimeLiteral("2024-09-10 00:00:00");
est = ExpressionEstimation.estimate(l5, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 16);
Assertions.assertEquals(est.numNulls, 0);

BooleanLiteral l6 = BooleanLiteral.TRUE;
est = ExpressionEstimation.estimate(l6, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 1);
Assertions.assertEquals(est.numNulls, 0);

DecimalLiteral l7 = new DecimalLiteral(BigDecimal.valueOf(2024.0928));
est = ExpressionEstimation.estimate(l7, stats);
Assertions.assertEquals(est.ndv, 1);
Assertions.assertEquals(est.avgSizeByte, 16);
Assertions.assertEquals(est.numNulls, 0);

NullLiteral l8 = new NullLiteral();
est = ExpressionEstimation.estimate(l8, stats);
Assertions.assertEquals(est.ndv, 0);
Assertions.assertEquals(est.avgSizeByte, 1);
Assertions.assertEquals(est.numNulls, 1);
}
}

0 comments on commit 97379b4

Please sign in to comment.