Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[opt](nereids) fix non-null selectivity computing #42286

Merged
merged 2 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ public void normalizeColumnStatistics(double inputRowCount, boolean isNumNullsDe
// the following columnStatistic.isUnKnown() judgment is loop inside since current doris
// supports partial stats deriving, i.e, allowing part of tables have stats and other parts don't,
// or part of columns have stats but other parts don't, especially join and filter estimation.
if (!checkColumnStatsValid(columnStatistic, rowCount) && !columnStatistic.isUnKnown()) {
if (!columnStatistic.isUnKnown() && (!checkColumnStatsValid(columnStatistic, rowCount)
|| isNumNullsDecreaseByProportion && columnStatistic.numNulls != 0)) {
ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic);
double ndv = Math.min(columnStatistic.ndv, rowCount);
double numNulls = Math.min(columnStatistic.numNulls * factor, rowCount - ndv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
import org.apache.doris.nereids.types.DateTimeType;
import org.apache.doris.nereids.types.DateType;
import org.apache.doris.nereids.types.DoubleType;
import org.apache.doris.nereids.types.IntegerType;
Expand Down Expand Up @@ -1144,7 +1146,75 @@ public void testNumNullsAndTwoCol() {
Statistics result = filterEstimation.estimate(and, stats);
// result 1.0->2.0 bc happens because the calculation from normalization of
// "Math.min(columnStatistic.numNulls * factor, rowCount - ndv);"
Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
Assertions.assertEquals(result.getRowCount(), 3.5, 0.01);
}

/**
* dt BETWEEN "2020-05-25 00:00:00" and "2020-05-25 23:59:59"
* and day BETWEEN "2020-05-24" and "2020-05-26"
* and game="mus" and plat = "37wan";
*/
@Test
void testMultiAndWithNull() {
SlotReference dt = new SlotReference("dt", DateTimeType.INSTANCE);
ColumnStatisticBuilder dtBuilder = new ColumnStatisticBuilder(1000000)
.setNdv(783813.0)
.setNumNulls(50833.0)
.setMaxValue(new DateTimeLiteral("2020-05-31 07:59:59").getDouble())
.setMinValue(new DateTimeLiteral("2020-05-01 08:00:04").getDouble());
DateLiteral dtMin = new DateTimeLiteral("2020-05-25 00:00:00");
DateLiteral dtMax = new DateTimeLiteral("2020-05-25 23:59:59");
GreaterThanEqual dtGreater = new GreaterThanEqual(dt, dtMin);
LessThan dtLess = new LessThan(dt, dtMax);
And dtAnd = new And(dtLess, dtGreater);

SlotReference day = new SlotReference("day", DateType.INSTANCE);
ColumnStatisticBuilder dayBuilder = new ColumnStatisticBuilder(1000000)
.setNdv(31.0)
.setNumNulls(49699.0)
.setMaxValue(new DateLiteral("2020-05-31").getDouble())
.setMinValue(new DateLiteral("2020-05-01").getDouble());
DateLiteral dayMin = new DateLiteral("2020-05-24");
DateLiteral dayMax = new DateLiteral("2020-05-26");
GreaterThanEqual dayGreater = new GreaterThanEqual(day, dayMin);
LessThan dayLess = new LessThan(day, dayMax);
And dayAnd = new And(dayLess, dayGreater);

SlotReference game = new SlotReference("game", new VarcharType(500));
ColumnStatisticBuilder gameBuilder = new ColumnStatisticBuilder(1000000)
.setNdv(1.0)
.setNumNulls(49813.0)
.setMaxExpr(new StringLiteral("mus"))
.setMaxValue(new VarcharLiteral("mus").getDouble())
.setMinExpr(new StringLiteral("mus"))
.setMinValue(new VarcharLiteral("mus").getDouble());
VarcharLiteral mus = new VarcharLiteral("mus");
EqualTo gameEqualTo = new EqualTo(game, mus);

SlotReference plat = new SlotReference("plat", new VarcharType(500));
ColumnStatisticBuilder platBuilder = new ColumnStatisticBuilder(1000000)
.setNdv(1.0)
.setNumNulls(49691.0)
.setMaxExpr(new StringLiteral("37wan"))
.setMaxValue(new VarcharLiteral("37wan").getDouble())
.setMinExpr(new StringLiteral("37wan"))
.setMinValue(new VarcharLiteral("37wan").getDouble());
VarcharLiteral wan = new VarcharLiteral("37wan");
EqualTo wanEqualTo = new EqualTo(plat, wan);
And equalAnd = new And(gameEqualTo, wanEqualTo);

And partialAnd = new And(dtAnd, dayAnd);
And allAnd = new And(partialAnd, equalAnd);

Statistics stats = new Statistics(1000000, new HashMap<>());
stats.addColumnStats(dt, dtBuilder.build());
stats.addColumnStats(day, dayBuilder.build());
stats.addColumnStats(game, gameBuilder.build());
stats.addColumnStats(plat, platBuilder.build());

FilterEstimation filterEstimation = new FilterEstimation();
Statistics result = filterEstimation.estimate(allAnd, stats);
Assertions.assertEquals(result.getRowCount(), 2109.16, 0.01);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,13 @@ public void testFilter() {
GroupExpression groupExpression = new GroupExpression(logicalFilter, ImmutableList.of(childGroup));
Group ownerGroup = new Group(null, groupExpression, null);
StatsCalculator.estimate(groupExpression, null);
Assertions.assertEquals(49.45, ownerGroup.getStatistics().getRowCount(), 0.001);
Assertions.assertEquals(49.945, ownerGroup.getStatistics().getRowCount(), 0.001);

LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or, groupPlan);
GroupExpression groupExpressionOr = new GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
Group ownerGroupOr = new Group(null, groupExpressionOr, null);
StatsCalculator.estimate(groupExpressionOr, null);
Assertions.assertEquals(1449.05,
Assertions.assertEquals(1448.555,
ownerGroupOr.getStatistics().getRowCount(), 0.001);
}

Expand Down
Loading