From 887866457c9d98c42b64e02e223e65466bbdad73 Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 12 Sep 2023 11:51:36 +0800 Subject: [PATCH] forbid unknown stats for branch 2.0 --- .../translator/PhysicalPlanTranslator.java | 35 +++++++++++++ .../translator/PlanTranslatorContext.java | 31 ++++++++++++ .../doris/nereids/stats/StatsCalculator.java | 49 +++++-------------- 3 files changed, 77 insertions(+), 38 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 406dda54859714..359373ddbf0ce7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -126,6 +126,11 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; @@ -235,6 +240,14 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) { Collections.reverse(context.getPlanFragments()); // TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout context.getDescTable().computeMemLayout(); + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + Set scans = context.getScanNodeWithUnknownColumnStats(); + if (!scans.isEmpty()) { + StringBuilder builder = new StringBuilder(); + scans.forEach(scanNode -> builder.append(scanNode)); + throw new AnalysisException("tables with unknown column stats: " + builder); + } + } return rootFragment; } @@ -530,6 +543,15 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla // TODO: move all node set cardinality into one place if (olapScan.getStats() != null) { olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); + if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + for (int i = 0; i < slots.size(); i++) { + Slot slot = slots.get(i); + if (olapScan.getStats().findColumnStatistics(slot).isUnKnown() + && !isComplexDataType(slot.getDataType())) { + context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId()); + } + } + } } // TODO: Do we really need tableName here? TableName tableName = new TableName(null, "", ""); @@ -1978,6 +2000,14 @@ private void updateScanSlotsMaterialization(ScanNode scanNode, scanNode.getTupleDesc().getSlots().add(smallest); } try { + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + for (SlotId slotId : requiredByProjectSlotIdSet) { + if (context.isColumnStatsUnknown(scanNode, slotId)) { + throw new AnalysisException("meet unknown column stats on table " + scanNode); + } + } + context.removeScanFromStatsUnknownColumnsMap(scanNode); + } scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet); } catch (UserException e) { Util.logAndThrowRuntimeException(LOG, @@ -2240,4 +2270,9 @@ private List translateToLegacyConjuncts(Set conjuncts) { } return outputExprs; } + + private boolean isComplexDataType(DataType dataType) { + return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType + || dataType instanceof StructType; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 256b37d70572e4..e69b5ee8ef3a13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -45,11 +45,13 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -90,6 +92,7 @@ public class PlanTranslatorContext { private final Map cteProducerMap = Maps.newHashMap(); private final Map tablePushAggOp = Maps.newHashMap(); + private final Map> statsUnknownColumnsMap = Maps.newHashMap(); public PlanTranslatorContext(CascadesContext ctx) { this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); @@ -100,6 +103,34 @@ public PlanTranslatorContext() { translator = null; } + /** + * remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check + */ + public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) { + Set slots = statsUnknownColumnsMap.get(scan); + if (slots == null) { + statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId)); + } else { + statsUnknownColumnsMap.get(scan).add(slotId); + } + } + + public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) { + Set unknownSlots = statsUnknownColumnsMap.get(scan); + if (unknownSlots == null) { + return false; + } + return unknownSlots.contains(slotId); + } + + public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) { + statsUnknownColumnsMap.remove(scan); + } + + public Set getScanNodeWithUnknownColumnStats() { + return statsUnknownColumnsMap.keySet(); + } + public List getPlanFragments() { return planFragments; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 45aeae54fde483..24ec929e820b00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -26,7 +26,6 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -123,7 +122,6 @@ import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; -import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -623,46 +621,21 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (cache.isUnKnown) { - if (forbidUnknownColStats && !shouldIgnoreThisCol) { - if (StatisticsUtil.statsTblAvailable()) { - throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" - + "It may caused by:\n" - + "\n" - + "1. This column never got analyzed\n" - + "2. This table is empty\n" - + "3. Stats load failed caused by unstable of backends," - + "and FE cached the unknown stats by default in this scenario\n" - + "4. There is a bug, please report it to Doris community\n" - + "\n" - + "If an unknown stats for this column is tolerable," - + "you could set session variable `forbid_unknown_col_stats` to false to make planner" - + " ignore this error and keep planning.", table.getName(), colName)); - } else { - throw new AnalysisException("BE is not available!"); + if (!cache.isUnKnown) { + rowCount = Math.max(rowCount, cache.count); + Histogram histogram = getColumnHistogram(table, colName); + if (histogram != null) { + ColumnStatisticBuilder columnStatisticBuilder = + new ColumnStatisticBuilder(cache).setHistogram(histogram); + cache = columnStatisticBuilder.build(); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); + totalHistogramMap.put(table.getName() + colName, histogram); } } - columnStatisticMap.put(slotReference, cache); - continue; - } - rowCount = Math.max(rowCount, cache.count); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); - cache = columnStatisticBuilder.build(); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalHistogramMap.put(table.getName() + ":" + colName, histogram); - } } columnStatisticMap.put(slotReference, cache); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); - totalHistogramMap.put(table.getName() + colName, histogram); - } } return new Statistics(rowCount, columnStatisticMap); }