diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 150780d95e1ad8..24550f4f7246e7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -268,7 +268,8 @@ private Plan planWithoutLock( if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest && cascadesContext.isLeadingJoin()) { List scans = cascadesContext.getRewritePlan() .collectToList(LogicalOlapScan.class::isInstance); - StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext); + Optional reason = StatsCalculator.disableJoinReorderIfStatsInvalid(scans, cascadesContext); + reason.ifPresent(LOG::info); } optimize(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 9458407295d903..1fc3708545e690 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.stats; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; @@ -141,6 +142,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -1159,25 +1161,74 @@ private double getOlapTableRowCount(OlapScan olapScan) { return rowCount; } + private boolean isVisibleSlotReference(Slot slot) { + if (slot instanceof SlotReference) { + Optional colOpt = ((SlotReference) slot).getColumn(); + if (colOpt.isPresent()) { + return colOpt.get().isVisible(); + } + } + return false; + } + + private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) { + long idxId = -1; + if (catalogRelation instanceof OlapScan) { + idxId = ((OlapScan) catalogRelation).getSelectedIndexId(); + } + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId); + } + + // check validation of ndv. + private Optional checkNdvValidation(OlapScan olapScan, double rowCount) { + for (Slot slot : ((Plan) olapScan).getOutput()) { + if (isVisibleSlotReference(slot)) { + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot); + if (!cache.isUnKnown) { + if ((cache.ndv == 0 && (cache.minExpr != null || cache.maxExpr != null)) + || cache.ndv > rowCount * 10) { + return Optional.of("slot " + slot.getName() + " has invalid column stats: " + cache); + } + } + } + } + return Optional.empty(); + } + /** - * disable join reorder if any table row count is not available. + * disable join reorder if + * 1. any table rowCount is not available, or + * 2. col stats ndv=0 but minExpr or maxExpr is not null + * 3. ndv > 10 * rowCount */ - public static void disableJoinReorderIfTableRowCountNotAvailable( - List scans, + public static Optional disableJoinReorderIfStatsInvalid(List scans, CascadesContext context) { StatsCalculator calculator = new StatsCalculator(context); + if (ConnectContext.get() == null) { + // ut case + return Optional.empty(); + } for (LogicalOlapScan scan : scans) { double rowCount = calculator.getOlapTableRowCount(scan); - if (rowCount == -1 && ConnectContext.get() != null) { + // row count not available + if (rowCount == -1) { + LOG.info("disable join reorder since row count not available: " + + scan.getTable().getNameWithFullQualifiers()); + return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid"); + } + // ndv abnormal + Optional reason = calculator.checkNdvValidation(scan, rowCount); + if (reason.isPresent()) { try { ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); - LOG.info("disable join reorder since row count not available: " - + scan.getTable().getNameWithFullQualifiers()); + LOG.info("disable join reorder since col stats invalid: " + + reason.get()); } catch (Exception e) { LOG.info("disableNereidsJoinReorderOnce failed"); } - return; + return reason; } } + return Optional.empty(); } } diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy index 169c192a1e4b1e..c57e7297838d8c 100644 --- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy +++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy @@ -1268,10 +1268,6 @@ sql """ alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040', 'ndv'='5', 'min_value'='0', 'max_value'='4', 'avg_size'='8160', 'max_size'='8160' ) """ -sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' ) -""" - sql """ alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='7197670', 'ndv'='10429', 'min_value'='0.00', 'max_value'='13602.60', 'avg_size'='28790680', 'max_size'='28790680' ) """ @@ -1989,7 +1985,7 @@ alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'nd """ sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' ) +alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='30', 'avg_size'='120', 'max_size'='120' ) """ sql """