Skip to content

Commit

Permalink
table-row
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Sep 4, 2024
1 parent 3615a36 commit 4b3661c
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.doris.nereids.processor.pre.PlanPreprocessors;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext;
import org.apache.doris.nereids.stats.StatsCalculator;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.ComputeResultSet;
Expand All @@ -55,6 +56,7 @@
import org.apache.doris.nereids.trees.plans.distribute.DistributePlanner;
import org.apache.doris.nereids.trees.plans.distribute.DistributedPlan;
import org.apache.doris.nereids.trees.plans.distribute.FragmentIdMapping;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache;
import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
Expand Down Expand Up @@ -250,6 +252,8 @@ private Plan planWithoutLock(
return rewrittenPlan;
}
}
List<LogicalOlapScan> scans = getAllOlapScans(cascadesContext.getRewritePlan());
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext);

optimize();
if (statementContext.getConnectContext().getExecutor() != null) {
Expand Down Expand Up @@ -283,6 +287,18 @@ private Plan planWithoutLock(
return physicalPlan;
}

private List<LogicalOlapScan> getAllOlapScans(Plan plan) {
List<LogicalOlapScan> scans = Lists.newArrayList();
if (plan instanceof LogicalOlapScan) {
scans.add((LogicalOlapScan) plan);
} else {
for (Plan child : plan.children()) {
scans.addAll(getAllOlapScans(child));
}
}
return scans;
}

private LogicalPlan preprocess(LogicalPlan logicalPlan) {
return new PlanPreprocessors(statementContext).process(logicalPlan);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {

private CascadesContext cascadesContext;

private StatsCalculator(CascadesContext context) {
this.groupExpression = null;
this.cascadesContext = context;
}

private StatsCalculator(GroupExpression groupExpression, boolean forbidUnknownColStats,
Map<String, ColumnStatistic> columnStatisticMap, boolean isPlayNereidsDump,
Map<CTEId, Statistics> cteIdToStats, CascadesContext context) {
Expand All @@ -205,6 +210,22 @@ public Map<String, ColumnStatistic> getTotalColumnStatisticMap() {
return totalColumnStatisticMap;
}

/**
* disable join reorder if any table row count is not available.
*/
public static void disableJoinReorderIfTableRowCountNotAvailable(
List<LogicalOlapScan> scans, CascadesContext context) {
StatsCalculator calculator = new StatsCalculator(context);
for (LogicalOlapScan scan : scans) {
double rowCount = calculator.getOlapTableRowCount(scan);
if (rowCount == -1 && ConnectContext.get() != null) {
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
ConnectContext.get().getSessionVariable().setDisableJoinReorder(true);
}
}
}

/**
* estimate stats
*/
Expand All @@ -217,15 +238,6 @@ public static StatsCalculator estimate(GroupExpression groupExpression, boolean
return statsCalculator;
}

public static StatsCalculator estimate(GroupExpression groupExpression, boolean forbidUnknownColStats,
Map<String, ColumnStatistic> columnStatisticMap, boolean isPlayNereidsDump, CascadesContext context) {
return StatsCalculator.estimate(groupExpression,
forbidUnknownColStats,
columnStatisticMap,
isPlayNereidsDump,
new HashMap<>(), context);
}

// For unit test only
public static void estimate(GroupExpression groupExpression, CascadesContext context) {
StatsCalculator statsCalculator = new StatsCalculator(groupExpression, false,
Expand Down Expand Up @@ -364,19 +376,28 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) {
}
}

private Statistics computeOlapScan(OlapScan olapScan) {
private double getOlapTableRowCount(OlapScan olapScan) {
OlapTable olapTable = olapScan.getTable();
double tableRowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId());
if (tableRowCount <= 0) {
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId());
if (tableMeta != null) {
// create-view after analyzing, we may get -1 for this view row count
tableRowCount = Math.max(1, tableMeta.getRowCount(olapScan.getSelectedIndexId()));
} else {
tableRowCount = 1;
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId());
double rowCount = -1;
if (tableMeta != null && tableMeta.userInjected) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId());
} else {
rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId());
if (rowCount == -1) {
if (tableMeta != null) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId());
}
}
}
return rowCount;
}

private Statistics computeOlapScan(OlapScan olapScan) {
OlapTable olapTable = olapScan.getTable();
double tableRowCount = getOlapTableRowCount(olapScan);
tableRowCount = Math.max(1, tableRowCount);

if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) {
// mv is selected, return its estimated stats
Expand Down Expand Up @@ -431,6 +452,7 @@ private Statistics computeOlapScan(OlapScan olapScan) {
// build Stats for olapScan
double deltaRowCount = computeDeltaRowCount(olapScan);
builder.setDeltaRowCount(deltaRowCount);

// if slot is invisible, use UNKNOWN
List<SlotReference> visibleOutputSlots = new ArrayList<>();
for (Slot slot : ((Plan) olapScan).getOutput()) {
Expand All @@ -441,10 +463,12 @@ private Statistics computeOlapScan(OlapScan olapScan) {
}
}

boolean useTableLevelStats = true;
if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) {
// partition pruned
double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan);
if (selectedPartitionsRowCount > 0) {
if (selectedPartitionsRowCount >= 0) {
useTableLevelStats = false;
List<String> selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size());
olapScan.getSelectedPartitionIds().forEach(id -> {
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
Expand All @@ -458,20 +482,9 @@ private Statistics computeOlapScan(OlapScan olapScan) {
}
checkIfUnknownStatsUsedAsKey(builder);
builder.setRowCount(selectedPartitionsRowCount + deltaRowCount);
} else {
// if partition row count is invalid (-1), fallback to table stats
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot);
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);
colStatsBuilder.setCount(tableRowCount);
colStatsBuilder.normalizeAvgSizeByte(slot);
builder.putColumnStatistics(slot, colStatsBuilder.build());
}
checkIfUnknownStatsUsedAsKey(builder);
builder.setRowCount(tableRowCount + deltaRowCount);
}
} else {
// get table level stats
}
if (useTableLevelStats) {
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot);
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);
Expand Down

0 comments on commit 4b3661c

Please sign in to comment.