diff --git a/fe/.idea/vcs.xml b/fe/.idea/vcs.xml index 7b2cdb1cbbd39ab..8c0f59e92e6c5bd 100644 --- a/fe/.idea/vcs.xml +++ b/fe/.idea/vcs.xml @@ -1,20 +1,4 @@ - - + + + - + \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 36ca1fa09926b81..4cc464b58e94d54 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -22,6 +22,7 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.FeConstants; import org.apache.doris.common.FormatOptions; import org.apache.doris.common.NereidsException; import org.apache.doris.common.Pair; @@ -46,11 +47,13 @@ import org.apache.doris.nereids.processor.pre.PlanPreprocessors; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext; +import org.apache.doris.nereids.stats.StatsCalculator; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.ComputeResultSet; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; @@ -242,7 +245,16 @@ private Plan planWithoutLock( return rewrittenPlan; } } - + // if we cannot get table row count, skip join reorder + // except: + // 1. user set leading hint + // 2. ut test. In ut test, FeConstants.enableInternalSchemaDb is false or FeConstants.runningUnitTest is true + if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest + && !cascadesContext.isLeadingDisableJoinReorder()) { + List scans = cascadesContext.getRewritePlan() + .collectToList(LogicalOlapScan.class::isInstance); + StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext); + } optimize(); if (statementContext.getConnectContext().getExecutor() != null) { statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsOptimizeTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 1a983532a94dacc..c0d552f05165a65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.IntLiteral; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; @@ -187,6 +188,11 @@ private StatsCalculator(GroupExpression groupExpression, boolean forbidUnknownCo this.cascadesContext = context; } + private StatsCalculator(CascadesContext context) { + this.groupExpression = null; + this.cascadesContext = context; + } + public Map getTotalHistogramMap() { return totalHistogramMap; } @@ -256,6 +262,29 @@ private void estimate() { groupExpression.setStatDerived(true); } + /** + * disable join reorder if any table row count is not available. + */ + public static void disableJoinReorderIfTableRowCountNotAvailable( + List scans, CascadesContext context) { + StatsCalculator calculator = new StatsCalculator(context); + for (LogicalOlapScan scan : scans) { + double rowCount = calculator.getOlapTableRowCount(scan); + // analyzed rowCount may be zero, but BE-reported rowCount could be positive. + // check ndv validation when reported rowCount > 0 + if (rowCount == -1 && ConnectContext.get() != null) { + try { + ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); + LOG.info("disable join reorder since row count not available: " + + scan.getTable().getNameWithFullQualifiers()); + } catch (Exception e) { + LOG.info("disableNereidsJoinReorderOnce failed"); + } + return; + } + } + } + @Override public Statistics visitLogicalSink(LogicalSink logicalSink, Void context) { return groupExpression.childStatistics(0); @@ -762,6 +791,43 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i } } + private long computeDeltaRowCount(CatalogRelation scan) { + TableIf table = scan.getTable(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); + return tableMeta == null ? 0 : tableMeta.updatedRows.get(); + } + + /** + * if the table is not analyzed and BE does not report row count, return -1 + */ + private double getOlapTableRowCount(OlapScan olapScan) { + OlapTable olapTable = olapScan.getTable(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + double rowCount = -1; + if (tableMeta != null && tableMeta.userInjected) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } else { + rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true); + if (rowCount == -1) { + if (tableMeta != null) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()) + + computeDeltaRowCount((CatalogRelation) olapScan); + } + } + } + return rowCount; + } + + private double getTableRowCount(CatalogRelation relation) { + if (relation instanceof OlapScan) { + return getOlapTableRowCount((OlapScan) relation); + } else { + return relation.getTable().getRowCountForNereids(); + } + } + // TODO: 1. Subtract the pruned partition // 2. Consider the influence of runtime filter // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. @@ -791,11 +857,9 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { Set slotSet = slotSetBuilder.build(); Map columnStatisticBuilderMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); // rows newly updated after last analyze - long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get(); - double rowCount = catalogRelation.getTable().getRowCountForNereids(); + long deltaRowCount = computeDeltaRowCount(catalogRelation); + double rowCount = getTableRowCount(catalogRelation); boolean hasUnknownCol = false; long idxId = -1; if (catalogRelation instanceof OlapScan) { @@ -829,8 +893,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { colStatsBuilder.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()); } if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count + deltaRowCount); - } else { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { diff --git a/regression-test/suites/nereids_p0/delta_row/delta_row.groovy b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy index c6f40f5363f453a..7c38de69f98f292 100644 --- a/regression-test/suites/nereids_p0/delta_row/delta_row.groovy +++ b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy @@ -18,9 +18,11 @@ suite("delta_row") { String database = context.config.getDbNameByFile(context.file) sql """ + set global enable_auto_analyze=false; drop database if exists ${database}; create database ${database}; use ${database}; + drop table if exists t; CREATE TABLE IF NOT EXISTS t ( k int(11) null comment "", v string replace null comment "", @@ -52,4 +54,11 @@ suite("delta_row") { // +--PhysicalFilter[72]@1 ( stats=0.5, predicates=(k#0 > 6) ) // +--PhysicalOlapScan[t]@0 ( stats=5(1) ) } + sql "alter table t modify column v set stats ('row_count'='200000', 'ndv'='197960', 'num_nulls'='0', 'min_value'=' 2MrUy', 'max_value'='zzzqXhTdKxT0RAR8yxbc', 'data_size'='2998285')" + // verify that inject rowCount has highest priority + explain{ + sql "physical plan select * from t" + contains("PhysicalOlapScan[t]@0 ( stats=200,000") + } + sql "set global enable_auto_analyze=true;" } \ No newline at end of file