Skip to content

Commit

Permalink
Revert "[refactor](nereids)forbid unknown stats for branch2.0 apache#…
Browse files Browse the repository at this point in the history
…24061 (apache#24243)" (apache#24294)

This reverts commit 108e91f.
  • Loading branch information
xiaokang authored Sep 13, 2023
1 parent 108e91f commit 687a918
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,6 @@
import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.JsonType;
import org.apache.doris.nereids.types.MapType;
import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.nereids.util.Utils;
Expand Down Expand Up @@ -240,14 +235,6 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) {
Collections.reverse(context.getPlanFragments());
// TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout
context.getDescTable().computeMemLayout();
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
if (!scans.isEmpty()) {
StringBuilder builder = new StringBuilder();
scans.forEach(scanNode -> builder.append(scanNode));
throw new AnalysisException("tables with unknown column stats: " + builder);
}
}
return rootFragment;
}

Expand Down Expand Up @@ -543,15 +530,6 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla
// TODO: move all node set cardinality into one place
if (olapScan.getStats() != null) {
olapScanNode.setCardinality((long) olapScan.getStats().getRowCount());
if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
for (int i = 0; i < slots.size(); i++) {
Slot slot = slots.get(i);
if (olapScan.getStats().findColumnStatistics(slot).isUnKnown()
&& !isComplexDataType(slot.getDataType())) {
context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId());
}
}
}
}
// TODO: Do we really need tableName here?
TableName tableName = new TableName(null, "", "");
Expand Down Expand Up @@ -2000,14 +1978,6 @@ private void updateScanSlotsMaterialization(ScanNode scanNode,
scanNode.getTupleDesc().getSlots().add(smallest);
}
try {
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
for (SlotId slotId : requiredByProjectSlotIdSet) {
if (context.isColumnStatsUnknown(scanNode, slotId)) {
throw new AnalysisException("meet unknown column stats on table " + scanNode);
}
}
context.removeScanFromStatsUnknownColumnsMap(scanNode);
}
scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
} catch (UserException e) {
Util.logAndThrowRuntimeException(LOG,
Expand Down Expand Up @@ -2270,9 +2240,4 @@ private List<Expr> translateToLegacyConjuncts(Set<Expression> conjuncts) {
}
return outputExprs;
}

private boolean isComplexDataType(DataType dataType) {
return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType
|| dataType instanceof StructType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,11 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -92,7 +90,6 @@ public class PlanTranslatorContext {
private final Map<CTEId, PhysicalCTEProducer> cteProducerMap = Maps.newHashMap();

private final Map<RelationId, TPushAggOp> tablePushAggOp = Maps.newHashMap();
private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap = Maps.newHashMap();

public PlanTranslatorContext(CascadesContext ctx) {
this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
Expand All @@ -103,34 +100,6 @@ public PlanTranslatorContext() {
translator = null;
}

/**
* remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check
*/
public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
if (slots == null) {
statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
} else {
statsUnknownColumnsMap.get(scan).add(slotId);
}
}

public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
if (unknownSlots == null) {
return false;
}
return unknownSlots.contains(slotId);
}

public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
statsUnknownColumnsMap.remove(scan);
}

public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
return statsUnknownColumnsMap.keySet();
}

public List<PlanFragment> getPlanFragments() {
return planFragments;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ public Set<Expression> infer(Set<Expression> predicates) {
}

/**
* Use the left or right child of `equalExpr` to replace the left or right child of `expression`
* Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression`
* Now only support infer `ComparisonPredicate`.
* TODO: We should determine whether `expression` satisfies the condition for replacement
* eg: Satisfy `expression` is non-deterministic
*/
private Expression doInfer(Expression equalExpr, Expression expression) {
private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) {
return expression.accept(new DefaultExpressionRewriter<Void>() {

@Override
Expand All @@ -76,43 +76,36 @@ public Expression visit(Expression expr, Void context) {
public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) {
// we need to get expression covered by cast, because we want to infer different datatype
if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) {
return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr);
return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()));
} else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) {
return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr);
return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()));
}
return super.visit(cp, context);
}

private boolean isDataTypeValid(DataType originDataType, Expression expr) {
if ((expr.child(0).getDataType() instanceof IntegralType)
&& (expr.child(1).getDataType() instanceof IntegralType)
if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType)
&& (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType)
&& (originDataType instanceof IntegralType)) {
// infer filter can not be lower than original datatype, or dataset would be wrong
if (!((IntegralType) originDataType).widerThan(
(IntegralType) expr.child(0).getDataType())
(IntegralType) leftSlotEqualToRightSlot.child(0).getDataType())
&& !((IntegralType) originDataType).widerThan(
(IntegralType) expr.child(1).getDataType())) {
(IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) {
return true;
}
} else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) {
return true;
}
return false;
}

private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) {
if (!isDataTypeValid(originDataType, equal)) {
return sourcePredicate;
}
return sourcePredicate.rewriteUp(e -> {
// we can not replace Cast expression to slot because when rewrite up, we have replace child of cast
if (e instanceof Cast) {
return e;
}
if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) {
return equal.child(1);
} else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) {
return equal.child(0);
private Expression replaceSlot(Expression expr, DataType originDataType) {
return expr.rewriteUp(e -> {
if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) {
if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) {
return leftSlotEqualToRightSlot.child(1);
} else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) {
return leftSlotEqualToRightSlot.child(0);
}
}
return e;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.expressions.Alias;
Expand Down Expand Up @@ -122,6 +123,7 @@
import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
Expand Down Expand Up @@ -621,21 +623,46 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
.build();
}
if (!cache.isUnKnown) {
rowCount = Math.max(rowCount, cache.count);
Histogram histogram = getColumnHistogram(table, colName);
if (histogram != null) {
ColumnStatisticBuilder columnStatisticBuilder =
new ColumnStatisticBuilder(cache).setHistogram(histogram);
cache = columnStatisticBuilder.build();
if (ConnectContext.get().getSessionVariable().isEnableMinidump()
&& !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
totalColumnStatisticMap.put(table.getName() + ":" + colName, cache);
totalHistogramMap.put(table.getName() + colName, histogram);
if (cache.isUnKnown) {
if (forbidUnknownColStats && !shouldIgnoreThisCol) {
if (StatisticsUtil.statsTblAvailable()) {
throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n"
+ "It may caused by:\n"
+ "\n"
+ "1. This column never got analyzed\n"
+ "2. This table is empty\n"
+ "3. Stats load failed caused by unstable of backends,"
+ "and FE cached the unknown stats by default in this scenario\n"
+ "4. There is a bug, please report it to Doris community\n"
+ "\n"
+ "If an unknown stats for this column is tolerable,"
+ "you could set session variable `forbid_unknown_col_stats` to false to make planner"
+ " ignore this error and keep planning.", table.getName(), colName));
} else {
throw new AnalysisException("BE is not available!");
}
}
columnStatisticMap.put(slotReference, cache);
continue;
}
rowCount = Math.max(rowCount, cache.count);
Histogram histogram = getColumnHistogram(table, colName);
if (histogram != null) {
ColumnStatisticBuilder columnStatisticBuilder =
new ColumnStatisticBuilder(cache).setHistogram(histogram);
columnStatisticMap.put(slotReference, columnStatisticBuilder.build());
cache = columnStatisticBuilder.build();
if (ConnectContext.get().getSessionVariable().isEnableMinidump()
&& !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
totalHistogramMap.put(table.getName() + ":" + colName, histogram);
}
}
columnStatisticMap.put(slotReference, cache);
if (ConnectContext.get().getSessionVariable().isEnableMinidump()
&& !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
totalColumnStatisticMap.put(table.getName() + ":" + colName, cache);
totalHistogramMap.put(table.getName() + colName, histogram);
}
}
return new Statistics(rowCount, columnStatisticMap);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,15 @@

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.nereids.util.MemoPatternMatchSupported;
import org.apache.doris.nereids.util.PlanChecker;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.utframe.TestWithFeService;

import com.google.common.collect.Sets;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.Optional;
import java.util.Set;

public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported {

private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0);

private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0);

private final PredicatePropagation propagation = new PredicatePropagation();

@Override
protected void runBeforeAll() throws Exception {
createDatabase("test");
Expand Down Expand Up @@ -646,16 +628,4 @@ public void innerJoinShouldNotInferUnderLeftJoinOnClausePredicates() {
).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN)
);
}

@Test
void testInfer() {
EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1));
EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0));
Set<Expression> predicates = Sets.newHashSet();
predicates.add(equalTo2);
predicates.add(equalTo);
Set<Expression> newPredicates = propagation.infer(predicates);
Optional<Expression> newPredicate = newPredicates.stream().findFirst();
Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1))));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,13 @@ suite("test_infer_predicate") {
sql 'drop table if exists infer_tb1;'
sql 'drop table if exists infer_tb2;'
sql 'drop table if exists infer_tb3;'
sql 'drop table if exists infer_tb4;'
sql 'drop table if exists infer_tb5;'

sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''

sql '''create table infer_tb2 (k1 tinyint, k2 smallint, k3 int, k4 bigint, k5 largeint, k6 date, k7 datetime, k8 float, k9 double) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''

sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''

sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''

sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''

explain {
sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;"
contains "PREDICATES: k2"
Expand All @@ -61,16 +55,4 @@ suite("test_infer_predicate") {
contains "PREDICATES: k3"
contains "PREDICATES: k2"
}

explain {
sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';"
contains "PREDICATES: k3"
contains "PREDICATES: k2"
}

sql 'drop table if exists infer_tb1;'
sql 'drop table if exists infer_tb2;'
sql 'drop table if exists infer_tb3;'
sql 'drop table if exists infer_tb4;'
sql 'drop table if exists infer_tb5;'
}

0 comments on commit 687a918

Please sign in to comment.