Skip to content

Commit

Permalink
[opt](nereids) tabe row count priority: user injected > BE report > a…
Browse files Browse the repository at this point in the history
…nalyzed apache#40529'
  • Loading branch information
englefly committed Sep 26, 2024
1 parent 0784a0d commit 8bc977d
Showing 1 changed file with 30 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
Expand Down Expand Up @@ -620,6 +621,27 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i
}
}

/**
* if the table is not analyzed and BE does not report row count, return -1
*/
private double getOlapTableRowCount(OlapScan olapScan) {
OlapTable olapTable = olapScan.getTable();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId());
double rowCount = -1;
if (tableMeta != null && tableMeta.userInjected) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId());
} else {
rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true);
if (rowCount == -1) {
if (tableMeta != null) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId());
}
}
}
return rowCount;
}

// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now.
Expand All @@ -638,7 +660,12 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId());
// rows newly updated after last analyze
long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get();
double rowCount = catalogRelation.getTable().getRowCountForNereids();
double rowCount;
if (catalogRelation instanceof OlapScan) {
rowCount = getOlapTableRowCount((OlapScan) catalogRelation);
} else {
rowCount = catalogRelation.getTable().getRowCountForNereids();
}
boolean hasUnknownCol = false;
long idxId = -1;
if (catalogRelation instanceof OlapScan) {
Expand All @@ -647,10 +674,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
idxId = olapScan.getSelectedIndexId();
}
}
// if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
// LOG.debug("{} is partially analyzed, clear min/max values in column stats",
// catalogRelation.getTable().getName());
// }

for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
Expand Down Expand Up @@ -678,20 +702,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
colStatsBuilder.setMinExpr(null);
colStatsBuilder.setMaxExpr(null);
}
if (!cache.isUnKnown) {
rowCount = Math.max(rowCount, cache.count + deltaRowCount);
} else {
if (cache.isUnKnown) {
hasUnknownCol = true;
}
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
// if (deltaRowCount > 0) {
// // clear min-max to avoid error estimation
// // for example, after yesterday data loaded, user send query about yesterday immediately.
// // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
// // estimates the filter result is zero
// colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
// .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
// }
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {
columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
Expand Down

0 comments on commit 8bc977d

Please sign in to comment.