From 4b3661cd3256aee78fe50faf5ad9461dd4d1d253 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 3 Sep 2024 19:34:57 +0800 Subject: [PATCH] table-row --- .../apache/doris/nereids/NereidsPlanner.java | 16 ++++ .../doris/nereids/stats/StatsCalculator.java | 79 +++++++++++-------- 2 files changed, 62 insertions(+), 33 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 9d32af433b5849b..376cc70914167f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -47,6 +47,7 @@ import org.apache.doris.nereids.processor.pre.PlanPreprocessors; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext; +import org.apache.doris.nereids.stats.StatsCalculator; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.ComputeResultSet; @@ -55,6 +56,7 @@ import org.apache.doris.nereids.trees.plans.distribute.DistributePlanner; import org.apache.doris.nereids.trees.plans.distribute.DistributedPlan; import org.apache.doris.nereids.trees.plans.distribute.FragmentIdMapping; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; @@ -250,6 +252,8 @@ private Plan planWithoutLock( return rewrittenPlan; } } + List scans = getAllOlapScans(cascadesContext.getRewritePlan()); + StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext); optimize(); if (statementContext.getConnectContext().getExecutor() != null) { @@ -283,6 +287,18 @@ private Plan planWithoutLock( return physicalPlan; } + private List getAllOlapScans(Plan plan) { + List scans = Lists.newArrayList(); + if (plan instanceof LogicalOlapScan) { + scans.add((LogicalOlapScan) plan); + } else { + for (Plan child : plan.children()) { + scans.addAll(getAllOlapScans(child)); + } + } + return scans; + } + private LogicalPlan preprocess(LogicalPlan logicalPlan) { return new PlanPreprocessors(statementContext).process(logicalPlan); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index fec744b86b10f2d..a8a64e207a28c34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -182,6 +182,11 @@ public class StatsCalculator extends DefaultPlanVisitor { private CascadesContext cascadesContext; + private StatsCalculator(CascadesContext context) { + this.groupExpression = null; + this.cascadesContext = context; + } + private StatsCalculator(GroupExpression groupExpression, boolean forbidUnknownColStats, Map columnStatisticMap, boolean isPlayNereidsDump, Map cteIdToStats, CascadesContext context) { @@ -205,6 +210,22 @@ public Map getTotalColumnStatisticMap() { return totalColumnStatisticMap; } + /** + * disable join reorder if any table row count is not available. + */ + public static void disableJoinReorderIfTableRowCountNotAvailable( + List scans, CascadesContext context) { + StatsCalculator calculator = new StatsCalculator(context); + for (LogicalOlapScan scan : scans) { + double rowCount = calculator.getOlapTableRowCount(scan); + if (rowCount == -1 && ConnectContext.get() != null) { + LOG.info("disable join reorder since row count not available: " + + scan.getTable().getNameWithFullQualifiers()); + ConnectContext.get().getSessionVariable().setDisableJoinReorder(true); + } + } + } + /** * estimate stats */ @@ -217,15 +238,6 @@ public static StatsCalculator estimate(GroupExpression groupExpression, boolean return statsCalculator; } - public static StatsCalculator estimate(GroupExpression groupExpression, boolean forbidUnknownColStats, - Map columnStatisticMap, boolean isPlayNereidsDump, CascadesContext context) { - return StatsCalculator.estimate(groupExpression, - forbidUnknownColStats, - columnStatisticMap, - isPlayNereidsDump, - new HashMap<>(), context); - } - // For unit test only public static void estimate(GroupExpression groupExpression, CascadesContext context) { StatsCalculator statsCalculator = new StatsCalculator(groupExpression, false, @@ -364,19 +376,28 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { } } - private Statistics computeOlapScan(OlapScan olapScan) { + private double getOlapTableRowCount(OlapScan olapScan) { OlapTable olapTable = olapScan.getTable(); - double tableRowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId()); - if (tableRowCount <= 0) { - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); - if (tableMeta != null) { - // create-view after analyzing, we may get -1 for this view row count - tableRowCount = Math.max(1, tableMeta.getRowCount(olapScan.getSelectedIndexId())); - } else { - tableRowCount = 1; + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + double rowCount = -1; + if (tableMeta != null && tableMeta.userInjected) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } else { + rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId()); + if (rowCount == -1) { + if (tableMeta != null) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } } } + return rowCount; + } + + private Statistics computeOlapScan(OlapScan olapScan) { + OlapTable olapTable = olapScan.getTable(); + double tableRowCount = getOlapTableRowCount(olapScan); + tableRowCount = Math.max(1, tableRowCount); if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) { // mv is selected, return its estimated stats @@ -431,6 +452,7 @@ private Statistics computeOlapScan(OlapScan olapScan) { // build Stats for olapScan double deltaRowCount = computeDeltaRowCount(olapScan); builder.setDeltaRowCount(deltaRowCount); + // if slot is invisible, use UNKNOWN List visibleOutputSlots = new ArrayList<>(); for (Slot slot : ((Plan) olapScan).getOutput()) { @@ -441,10 +463,12 @@ private Statistics computeOlapScan(OlapScan olapScan) { } } + boolean useTableLevelStats = true; if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) { // partition pruned double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan); - if (selectedPartitionsRowCount > 0) { + if (selectedPartitionsRowCount >= 0) { + useTableLevelStats = false; List selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); olapScan.getSelectedPartitionIds().forEach(id -> { selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); @@ -458,20 +482,9 @@ private Statistics computeOlapScan(OlapScan olapScan) { } checkIfUnknownStatsUsedAsKey(builder); builder.setRowCount(selectedPartitionsRowCount + deltaRowCount); - } else { - // if partition row count is invalid (-1), fallback to table stats - for (SlotReference slot : visibleOutputSlots) { - ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); - ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - colStatsBuilder.setCount(tableRowCount); - colStatsBuilder.normalizeAvgSizeByte(slot); - builder.putColumnStatistics(slot, colStatsBuilder.build()); - } - checkIfUnknownStatsUsedAsKey(builder); - builder.setRowCount(tableRowCount + deltaRowCount); } - } else { - // get table level stats + } + if (useTableLevelStats) { for (SlotReference slot : visibleOutputSlots) { ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);