Skip to content

Commit

Permalink
pick
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Oct 30, 2024
1 parent 6d50c28 commit 4b7f5d0
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ private Plan planWithoutLock(
if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest && cascadesContext.isLeadingJoin()) {
List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
.collectToList(LogicalOlapScan.class::isInstance);
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext);
Optional<String> reason = StatsCalculator.disableJoinReorderIfStatsInvalid(scans, cascadesContext);
reason.ifPresent(LOG::info);
}

optimize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.nereids.stats;

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
Expand Down Expand Up @@ -141,6 +142,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -1159,25 +1161,74 @@ private double getOlapTableRowCount(OlapScan olapScan) {
return rowCount;
}

private boolean isVisibleSlotReference(Slot slot) {
if (slot instanceof SlotReference) {
Optional<Column> colOpt = ((SlotReference) slot).getColumn();
if (colOpt.isPresent()) {
return colOpt.get().isVisible();
}
}
return false;
}

private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) {
long idxId = -1;
if (catalogRelation instanceof OlapScan) {
idxId = ((OlapScan) catalogRelation).getSelectedIndexId();
}
return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId);
}

// check validation of ndv.
private Optional<String> checkNdvValidation(OlapScan olapScan, double rowCount) {
for (Slot slot : ((Plan) olapScan).getOutput()) {
if (isVisibleSlotReference(slot)) {
ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot);
if (!cache.isUnKnown) {
if ((cache.ndv == 0 && (cache.minExpr != null || cache.maxExpr != null))
|| cache.ndv > rowCount * 10) {
return Optional.of("slot " + slot.getName() + " has invalid column stats: " + cache);
}
}
}
}
return Optional.empty();
}

/**
* disable join reorder if any table row count is not available.
* disable join reorder if
* 1. any table rowCount is not available, or
* 2. col stats ndv=0 but minExpr or maxExpr is not null
* 3. ndv > 10 * rowCount
*/
public static void disableJoinReorderIfTableRowCountNotAvailable(
List<LogicalOlapScan> scans,
public static Optional<String> disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans,
CascadesContext context) {
StatsCalculator calculator = new StatsCalculator(context);
if (ConnectContext.get() == null) {
// ut case
return Optional.empty();
}
for (LogicalOlapScan scan : scans) {
double rowCount = calculator.getOlapTableRowCount(scan);
if (rowCount == -1 && ConnectContext.get() != null) {
// row count not available
if (rowCount == -1) {
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid");
}
// ndv abnormal
Optional<String> reason = calculator.checkNdvValidation(scan, rowCount);
if (reason.isPresent()) {
try {
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
LOG.info("disable join reorder since col stats invalid: "
+ reason.get());
} catch (Exception e) {
LOG.info("disableNereidsJoinReorderOnce failed");
}
return;
return reason;
}
}
return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1268,10 +1268,6 @@ sql """
alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040', 'ndv'='5', 'min_value'='0', 'max_value'='4', 'avg_size'='8160', 'max_size'='8160' )
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' )
"""

sql """
alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='7197670', 'ndv'='10429', 'min_value'='0.00', 'max_value'='13602.60', 'avg_size'='28790680', 'max_size'='28790680' )
"""
Expand Down Expand Up @@ -1989,7 +1985,7 @@ alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'nd
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' )
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='30', 'avg_size'='120', 'max_size'='120' )
"""

sql """
Expand Down

0 comments on commit 4b7f5d0

Please sign in to comment.