Skip to content

Commit

Permalink
pick
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Oct 30, 2024
1 parent 2a6d6c1 commit c032ecd
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ private Plan planWithoutLock(
&& !cascadesContext.isLeadingDisableJoinReorder()) {
List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
.collectToList(LogicalOlapScan.class::isInstance);
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext);
Optional<String> reason = StatsCalculator.disableJoinReorderIfStatsInvalid(scans, cascadesContext);
reason.ifPresent(LOG::info);
}
optimize();
if (statementContext.getConnectContext().getExecutor() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
Expand Down Expand Up @@ -262,27 +263,75 @@ private void estimate() {
groupExpression.setStatDerived(true);
}

private boolean isVisibleSlotReference(Slot slot) {
if (slot instanceof SlotReference) {
Optional<Column> colOpt = ((SlotReference) slot).getColumn();
if (colOpt.isPresent()) {
return colOpt.get().isVisible();
}
}
return false;
}

private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) {
long idxId = -1;
if (catalogRelation instanceof OlapScan) {
idxId = ((OlapScan) catalogRelation).getSelectedIndexId();
}
return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId);
}

// check validation of ndv.
private Optional<String> checkNdvValidation(OlapScan olapScan, double rowCount) {
for (Slot slot : ((Plan) olapScan).getOutput()) {
if (isVisibleSlotReference(slot)) {
ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot);
if (!cache.isUnKnown) {
if ((cache.ndv == 0 && (cache.minExpr != null || cache.maxExpr != null))
|| cache.ndv > rowCount * 10) {
return Optional.of("slot " + slot.getName() + " has invalid column stats: " + cache);
}
}
}
}
return Optional.empty();
}

/**
* disable join reorder if any table row count is not available.
* disable join reorder if
* 1. any table rowCount is not available, or
* 2. col stats ndv=0 but minExpr or maxExpr is not null
* 3. ndv > 10 * rowCount
*/
public static void disableJoinReorderIfTableRowCountNotAvailable(
List<LogicalOlapScan> scans, CascadesContext context) {
public static Optional<String> disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans,
CascadesContext context) {
StatsCalculator calculator = new StatsCalculator(context);
if (ConnectContext.get() == null) {
// ut case
return Optional.empty();
}
for (LogicalOlapScan scan : scans) {
double rowCount = calculator.getOlapTableRowCount(scan);
// analyzed rowCount may be zero, but BE-reported rowCount could be positive.
// check ndv validation when reported rowCount > 0
if (rowCount == -1 && ConnectContext.get() != null) {
// row count not available
if (rowCount == -1) {
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid");
}
// ndv abnormal
Optional<String> reason = calculator.checkNdvValidation(scan, rowCount);
if (reason.isPresent()) {
try {
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
LOG.info("disable join reorder since col stats invalid: "
+ reason.get());
} catch (Exception e) {
LOG.info("disableNereidsJoinReorderOnce failed");
}
return;
return reason;
}
}
return Optional.empty();
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/nereids_hint_tpcds_p0/load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2336,7 +2336,7 @@ suite("load") {
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168')
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""

sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2336,7 +2336,7 @@ suite("load") {
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168')
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""

sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2340,7 +2340,7 @@ suite("load") {
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168')
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
"""

sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040'
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' )
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='30', 'avg_size'='120', 'max_size'='120' )
"""

sql """
Expand Down Expand Up @@ -2018,10 +2018,6 @@ sql """
alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'avg_size'='252', 'max_size'='252' )
"""

sql """
alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' )
"""

sql """
alter table customer_address modify column ca_zip set stats ('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981', 'avg_size'='4848150', 'max_size'='4848150' )
"""
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2754,7 +2754,7 @@ PARTITION `p599` VALUES IN (599)
sql """drop stats alter_test"""
alter_result = sql """show table stats alter_test"""
assertEquals("", alter_result[0][7])
sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='0.0', 'max_value'='2');"""
alter_result = sql """show column stats alter_test(id)"""
logger.info("show column alter_test(id) stats: " + alter_result)
assertEquals(1, alter_result.size())
Expand Down

0 comments on commit c032ecd

Please sign in to comment.