From 379984130552ac0a1088f6eb0aec67de7f4b0024 Mon Sep 17 00:00:00 2001 From: "zhongjian.xzj" Date: Tue, 24 Sep 2024 18:06:31 +0800 Subject: [PATCH] [opt](nereids) refine operator estimation --- .../org/apache/doris/nereids/stats/FilterEstimation.java | 5 +++-- .../java/org/apache/doris/nereids/stats/JoinEstimation.java | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 5595bba926c73f5..7f2690ec43bc5bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -509,6 +509,7 @@ public Statistics visitNot(Not not, EstimationContext context) { || child instanceof Match, "Not-predicate meet unexpected child: %s", child.toSql()); if (child instanceof Like) { + rowCount = context.statistics.getRowCount() - childStats.getRowCount(); colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); } else if (child instanceof InPredicate) { colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); @@ -529,6 +530,7 @@ public Statistics visitNot(Not not, EstimationContext context) { .setMaxValue(originColStats.maxValue) .setMaxExpr(originColStats.maxExpr); } else if (child instanceof Match) { + rowCount = context.statistics.getRowCount() - childStats.getRowCount(); colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); } if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) { @@ -550,8 +552,7 @@ public Statistics visitIsNull(IsNull isNull, EstimationContext context) { double row = context.statistics.getRowCount() * DEFAULT_ISNULL_SELECTIVITY; return new StatisticsBuilder(context.statistics).setRowCount(row).build(); } - double childOutputRowCount = context.statistics.getRowCount(); - double outputRowCount = Math.min(childColStats.numNulls, childOutputRowCount); + double outputRowCount = Math.min(childColStats.numNulls, context.statistics.getRowCount()); if (!isOnBaseTable) { // for is null on base table, use the numNulls, otherwise // nulls will be generated such as outer join and then we do a protection diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index 99080566e848ca2..a7430a410559710 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -307,6 +307,7 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri rowCount = Math.min(innerJoinStats.getRowCount(), baseRowCount); return innerJoinStats.withRowCountAndEnforceValid(rowCount); } else { + // TODO: tuning the new semi/anti estimation method /*double crossRowCount = Math.max(1, leftStats.getRowCount()) * Math.max(1, rightStats.getRowCount()); double selectivity = innerJoinStats.getRowCount() / crossRowCount; selectivity = Statistics.getValidSelectivity(selectivity);