Skip to content

Commit

Permalink
estimate selected partitions row count and adjust partition key min/max
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Nov 6, 2024
1 parent 7f3ea42 commit 3fe07f9
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,10 @@ private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRela
return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId);
}

private ColumnStatistic getColumnStatsFromPartitionCache(OlapScan catalogRelation, SlotReference slot,
/**
* if get partition col stats failed, then return table level col stats
*/
private ColumnStatistic getColumnStatsFromPartitionCacheOrTableCache(OlapScan catalogRelation, SlotReference slot,
List<String> partitionNames) {
long idxId = catalogRelation.getSelectedIndexId();

Expand All @@ -384,8 +387,9 @@ private double getSelectedPartitionRowCount(OlapScan olapScan, double tableRowCo
.getRowCountForPartitionIndex(id, olapScan.getSelectedIndexId(), true);
if (partRowCount == -1) {
unknownPartitionCount++;
} else {
partRowCountSum += partRowCount;
}
partRowCountSum += partRowCount;
}
// estimate row count for unknownPartitionCount
if (unknownPartitionCount > 0) {
Expand Down Expand Up @@ -525,18 +529,17 @@ private Statistics computeOlapScan(OlapScan olapScan) {
}
}

boolean useTableLevelStats = true;
if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) {
// partition pruned
// try to use selected partition stats, if failed, fall back to table stats
double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan, tableRowCount);
useTableLevelStats = false;
List<String> selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size());
olapScan.getSelectedPartitionIds().forEach(id -> {
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
});
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache = getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
ColumnStatistic cache = getColumnStatsFromPartitionCacheOrTableCache(
olapScan, slot, selectedPartitionNames);
if (slot.getColumn().isPresent()) {
cache = updateMinMaxForPartitionKey(olapTable, selectedPartitionNames, slot, cache);
}
Expand All @@ -547,10 +550,7 @@ private Statistics computeOlapScan(OlapScan olapScan) {
}
checkIfUnknownStatsUsedAsKey(builder);
builder.setRowCount(selectedPartitionsRowCount);
}
// 1. no partition is pruned, or
// 2. fall back to table stats
if (useTableLevelStats) {
} else {
// get table level stats
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot);
Expand All @@ -564,6 +564,9 @@ private Statistics computeOlapScan(OlapScan olapScan) {
return builder.build();
}

/**
* Determine whether it is a partition key inside the function.
*/
private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
Expand Down Expand Up @@ -614,12 +617,7 @@ private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable,
}
}
if (minExpr != null) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (AnalysisException e) {
LOG.debug(e.getMessage());
Expand Down Expand Up @@ -669,12 +667,7 @@ private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapT
}
}
if (minExpr != null) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (AnalysisException e) {
LOG.debug(e.getMessage());
Expand All @@ -683,6 +676,43 @@ private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapT
return cache;
}

private ColumnStatistic updateMinMax(ColumnStatistic cache, double minValue, LiteralExpr minExpr,
double maxValue, LiteralExpr maxExpr) {
boolean shouldUpdateCache = false;
if (!cache.isUnKnown) {
// merge the min/max with cache.
// example: min/max range in cache is [10-20]
// range from partition def is [15-30]
// the final range is [15-20]
if (cache.minValue > minValue) {
minValue = cache.minValue;
minExpr = cache.minExpr;
} else {
shouldUpdateCache = true;
}
if (cache.maxValue < maxValue) {
maxValue = cache.maxValue;
maxExpr = cache.maxExpr;
} else {
shouldUpdateCache = true;
}
// if min/max is invalid, do not update cache
if (minValue > maxValue) {
shouldUpdateCache = false;
}
}

if (shouldUpdateCache) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
}
return cache;
}

@Override
public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) {
return computeOlapScan(olapScan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ suite("partition_key_minmax") {
sql """memo plan
select * from rangetable where a < 250;
"""
containsAny("a#0 -> ndv=3.0000, min=1.000000(1), max=30.000000(30), count=3.0000")
containsAny("a#0 -> ndv=2.6667, min=5.000000(5), max=333.000000(333), count=2.6667")
containsAny("a#0 -> ndv=2.6667, min=5.000000(5), max=30.000000(30), count=2.6667")
containsAny("a#0 -> ndv=3, min=5.000000(5), max=30.000000(30), count=2.6667")
}

sql """
Expand Down

0 comments on commit 3fe07f9

Please sign in to comment.