From e4d4e91b45763963980ab3f972758024f9830dbf Mon Sep 17 00:00:00 2001 From: "zhongjian.xzj" Date: Tue, 24 Sep 2024 12:02:38 +0800 Subject: [PATCH] [opt](nereids) refine operator estimation --- .../org/apache/doris/nereids/stats/JoinEstimation.java | 7 +++---- .../main/java/org/apache/doris/statistics/Statistics.java | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index e6e43700b656da3..99080566e848ca2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -289,7 +289,6 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri .putColumnStatistics(rightStats.columnStatistics()) .build(); } - updateJoinConditionColumnStatistics(result, join); result.normalizeColumnStatistics(); return result; } @@ -306,7 +305,6 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri double baseRowCount = join.getJoinType().isLeftSemiOrAntiJoin() ? leftStats.getRowCount() : rightStats.getRowCount(); rowCount = Math.min(innerJoinStats.getRowCount(), baseRowCount); - updateJoinConditionColumnStatistics(innerJoinStats, join); return innerJoinStats.withRowCountAndEnforceValid(rowCount); } else { /*double crossRowCount = Math.max(1, leftStats.getRowCount()) * Math.max(1, rightStats.getRowCount()); @@ -336,7 +334,6 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri } builder.setRowCount(outputRowCount); Statistics outputStats = builder.build(); - updateJoinConditionColumnStatistics(outputStats, join); outputStats.normalizeColumnStatistics(); return outputStats;*/ StatisticsBuilder builder; @@ -366,7 +363,9 @@ public static Statistics estimate(Statistics leftStats, Statistics rightStats, J .build(); Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join); if (joinType.isSemiOrAntiJoin()) { - return estimateSemiOrAnti(leftStats, rightStats, innerJoinStats, join); + Statistics outputStats = estimateSemiOrAnti(leftStats, rightStats, innerJoinStats, join); + updateJoinConditionColumnStatistics(outputStats, join); + return outputStats; } else if (joinType == JoinType.INNER_JOIN) { updateJoinConditionColumnStatistics(innerJoinStats, join); return innerJoinStats; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 23b4a8e935295d3..4236993977aaa10 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -119,10 +119,10 @@ public void normalizeColumnStatistics(double inputRowCount, boolean isNumNullsDe // the following columnStatistic.isUnKnown() judgment is loop inside since current doris // supports partial stats deriving, i.e, allowing part of tables have stats and other parts don't, // or part of columns have stats but other parts don't, especially join and filter estimation. - if (!checkColumnStatsValid(columnStatistic, inputRowCount) && !columnStatistic.isUnKnown()) { + if (!checkColumnStatsValid(columnStatistic, rowCount) && !columnStatistic.isUnKnown()) { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic); - double ndv = Math.min(columnStatistic.ndv, inputRowCount); - double numNulls = Math.min(columnStatistic.numNulls * factor, inputRowCount - ndv); + double ndv = Math.min(columnStatistic.ndv, rowCount); + double numNulls = Math.min(columnStatistic.numNulls * factor, rowCount - ndv); columnStatisticBuilder.setNumNulls(numNulls); columnStatisticBuilder.setNdv(Math.min(ndv, rowCount - numNulls)); columnStatistic = columnStatisticBuilder.build();