From 478f979a35aec65a3d0a39fcc68f09686af3020b Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 4 Nov 2024 19:22:32 +0800 Subject: [PATCH] [feat](nereids)disable join reorder if any table row count is not available (#43000) in previous PR #41790, if any Olap table row count is not available, planner set disable_join_reorder true. this pr make this rule apply to external table. Issue Number: close #xxx --- .../apache/doris/nereids/NereidsPlanner.java | 6 +-- .../doris/nereids/stats/StatsCalculator.java | 41 ++++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 9ceecde9950dd9..69d4d2aa1067d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -54,11 +54,11 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.ComputeResultSet; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; import org.apache.doris.nereids.trees.plans.distribute.DistributePlanner; import org.apache.doris.nereids.trees.plans.distribute.DistributedPlan; import org.apache.doris.nereids.trees.plans.distribute.FragmentIdMapping; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; @@ -272,8 +272,8 @@ private Plan planWithoutLock( // 2. ut test. In ut test, FeConstants.enableInternalSchemaDb is false or FeConstants.runningUnitTest is true if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest && !cascadesContext.isLeadingDisableJoinReorder()) { - List scans = cascadesContext.getRewritePlan() - .collectToList(LogicalOlapScan.class::isInstance); + List scans = cascadesContext.getRewritePlan() + .collectToList(CatalogRelation.class::isInstance); Optional disableJoinReorderReason = StatsCalculator .disableJoinReorderIfStatsInvalid(scans, cascadesContext); disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index ad7d91fa33be25..9f01503ef16c38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -217,32 +217,34 @@ public Map getTotalColumnStatisticMap() { * 2. col stats ndv=0 but minExpr or maxExpr is not null * 3. ndv > 10 * rowCount */ - public static Optional disableJoinReorderIfStatsInvalid(List scans, + public static Optional disableJoinReorderIfStatsInvalid(List scans, CascadesContext context) { StatsCalculator calculator = new StatsCalculator(context); if (ConnectContext.get() == null) { // ut case return Optional.empty(); } - for (LogicalOlapScan scan : scans) { - double rowCount = calculator.getOlapTableRowCount(scan); + for (CatalogRelation scan : scans) { + double rowCount = calculator.getTableRowCount(scan); // row count not available if (rowCount == -1) { LOG.info("disable join reorder since row count not available: " + scan.getTable().getNameWithFullQualifiers()); return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid"); } - // ndv abnormal - Optional reason = calculator.checkNdvValidation(scan, rowCount); - if (reason.isPresent()) { - try { - ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); - LOG.info("disable join reorder since col stats invalid: " - + reason.get()); - } catch (Exception e) { - LOG.info("disableNereidsJoinReorderOnce failed"); + if (scan instanceof OlapScan) { + // ndv abnormal + Optional reason = calculator.checkNdvValidation((OlapScan) scan, rowCount); + if (reason.isPresent()) { + try { + ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); + LOG.info("disable join reorder since col stats invalid: " + + reason.get()); + } catch (Exception e) { + LOG.info("disableNereidsJoinReorderOnce failed"); + } + return reason; } - return reason; } } return Optional.empty(); @@ -398,6 +400,14 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { } } + private double getTableRowCount(CatalogRelation scan) { + if (scan instanceof OlapScan) { + return getOlapTableRowCount((OlapScan) scan); + } else { + return scan.getTable().getRowCount(); + } + } + /** * if the table is not analyzed and BE does not report row count, return -1 */ @@ -1082,11 +1092,12 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { StatisticsBuilder builder = new StatisticsBuilder(); + double tableRowCount = catalogRelation.getTable().getRowCount(); // for FeUt, use ColumnStatistic.UNKNOWN if (!FeConstants.enableInternalSchemaDb || ConnectContext.get() == null || ConnectContext.get().getSessionVariable().internalSession) { - builder.setRowCount(catalogRelation.getTable().getRowCountForNereids()); + builder.setRowCount(Math.max(1, tableRowCount)); for (Slot slot : catalogRelation.getOutput()) { builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); } @@ -1102,8 +1113,8 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } } Set slotSet = slotSetBuilder.build(); - double tableRowCount = catalogRelation.getTable().getRowCount(); if (tableRowCount <= 0) { + tableRowCount = 1; // try to get row count from col stats for (SlotReference slot : slotSet) { ColumnStatistic cache = getColumnStatsFromTableCache(catalogRelation, slot);