From 8bc977d4ccb087c765b455232a913d48aef63829 Mon Sep 17 00:00:00 2001 From: minghong Date: Thu, 26 Sep 2024 15:42:21 +0800 Subject: [PATCH] [opt](nereids) tabe row count priority: user injected > BE report > analyzed #40529' --- .../doris/nereids/stats/StatsCalculator.java | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 1b67b80a62d5d5f..d0a5a20d03a5f96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.stats; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; @@ -620,6 +621,27 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i } } + /** + * if the table is not analyzed and BE does not report row count, return -1 + */ + private double getOlapTableRowCount(OlapScan olapScan) { + OlapTable olapTable = olapScan.getTable(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + double rowCount = -1; + if (tableMeta != null && tableMeta.userInjected) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } else { + rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true); + if (rowCount == -1) { + if (tableMeta != null) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } + } + } + return rowCount; + } + // TODO: 1. Subtract the pruned partition // 2. Consider the influence of runtime filter // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. @@ -638,7 +660,12 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); // rows newly updated after last analyze long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get(); - double rowCount = catalogRelation.getTable().getRowCountForNereids(); + double rowCount; + if (catalogRelation instanceof OlapScan) { + rowCount = getOlapTableRowCount((OlapScan) catalogRelation); + } else { + rowCount = catalogRelation.getTable().getRowCountForNereids(); + } boolean hasUnknownCol = false; long idxId = -1; if (catalogRelation instanceof OlapScan) { @@ -647,10 +674,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { idxId = olapScan.getSelectedIndexId(); } } - // if (deltaRowCount > 0 && LOG.isDebugEnabled()) { - // LOG.debug("{} is partially analyzed, clear min/max values in column stats", - // catalogRelation.getTable().getName()); - // } + for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent() ? slotReference.getColumn().get().getName() @@ -678,20 +702,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { colStatsBuilder.setMinExpr(null); colStatsBuilder.setMaxExpr(null); } - if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count + deltaRowCount); - } else { + if (cache.isUnKnown) { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { - // if (deltaRowCount > 0) { - // // clear min-max to avoid error estimation - // // for example, after yesterday data loaded, user send query about yesterday immediately. - // // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer - // // estimates the filter result is zero - // colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - // .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); - // } columnStatisticBuilderMap.put(slotReference, colStatsBuilder); } else { columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));