Skip to content

Commit

Permalink
pick 40529_41464_40349
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Oct 20, 2024
1 parent 29f29f4 commit a5bf992
Show file tree
Hide file tree
Showing 47 changed files with 944 additions and 857 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.doris.analysis.StatementBase;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.FormatOptions;
import org.apache.doris.common.NereidsException;
import org.apache.doris.common.Pair;
Expand All @@ -46,11 +47,13 @@
import org.apache.doris.nereids.processor.pre.PlanPreprocessors;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext;
import org.apache.doris.nereids.stats.StatsCalculator;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.ComputeResultSet;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache;
import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
Expand Down Expand Up @@ -242,7 +245,16 @@ private Plan planWithoutLock(
return rewrittenPlan;
}
}

// if we cannot get table row count, skip join reorder
// except:
// 1. user set leading hint
// 2. ut test. In ut test, FeConstants.enableInternalSchemaDb is false or FeConstants.runningUnitTest is true
if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest
&& !cascadesContext.isLeadingDisableJoinReorder()) {
List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
.collectToList(LogicalOlapScan.class::isInstance);
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext);
}
optimize();
if (statementContext.getConnectContext().getExecutor() != null) {
statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsOptimizeTime();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
Expand Down Expand Up @@ -187,6 +188,11 @@ private StatsCalculator(GroupExpression groupExpression, boolean forbidUnknownCo
this.cascadesContext = context;
}

private StatsCalculator(CascadesContext context) {
this.groupExpression = null;
this.cascadesContext = context;
}

public Map<String, Histogram> getTotalHistogramMap() {
return totalHistogramMap;
}
Expand Down Expand Up @@ -256,6 +262,29 @@ private void estimate() {
groupExpression.setStatDerived(true);
}

/**
* disable join reorder if any table row count is not available.
*/
public static void disableJoinReorderIfTableRowCountNotAvailable(
List<LogicalOlapScan> scans, CascadesContext context) {
StatsCalculator calculator = new StatsCalculator(context);
for (LogicalOlapScan scan : scans) {
double rowCount = calculator.getOlapTableRowCount(scan);
// analyzed rowCount may be zero, but BE-reported rowCount could be positive.
// check ndv validation when reported rowCount > 0
if (rowCount == -1 && ConnectContext.get() != null) {
try {
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
LOG.info("disable join reorder since row count not available: "
+ scan.getTable().getNameWithFullQualifiers());
} catch (Exception e) {
LOG.info("disableNereidsJoinReorderOnce failed");
}
return;
}
}
}

@Override
public Statistics visitLogicalSink(LogicalSink<? extends Plan> logicalSink, Void context) {
return groupExpression.childStatistics(0);
Expand Down Expand Up @@ -762,6 +791,43 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i
}
}

private long computeDeltaRowCount(CatalogRelation scan) {
TableIf table = scan.getTable();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId());
return tableMeta == null ? 0 : tableMeta.updatedRows.get();
}

/**
* if the table is not analyzed and BE does not report row count, return -1
*/
private double getOlapTableRowCount(OlapScan olapScan) {
OlapTable olapTable = olapScan.getTable();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId());
double rowCount = -1;
if (tableMeta != null && tableMeta.userInjected) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId());
} else {
rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true);
if (rowCount == -1) {
if (tableMeta != null) {
rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId())
+ computeDeltaRowCount((CatalogRelation) olapScan);
}
}
}
return rowCount;
}

private double getTableRowCount(CatalogRelation relation) {
if (relation instanceof OlapScan) {
return getOlapTableRowCount((OlapScan) relation);
} else {
return relation.getTable().getRowCountForNereids();
}
}

// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now.
Expand Down Expand Up @@ -791,11 +857,9 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
Set<SlotReference> slotSet = slotSetBuilder.build();
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = new HashMap<>();
TableIf table = catalogRelation.getTable();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId());
// rows newly updated after last analyze
long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get();
double rowCount = catalogRelation.getTable().getRowCountForNereids();
long deltaRowCount = computeDeltaRowCount(catalogRelation);
double rowCount = getTableRowCount(catalogRelation);
boolean hasUnknownCol = false;
long idxId = -1;
if (catalogRelation instanceof OlapScan) {
Expand Down Expand Up @@ -827,8 +891,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);
colStatsBuilder.normalizeAvgSizeByte(slotReference);
if (!cache.isUnKnown) {
rowCount = Math.max(rowCount, cache.count + deltaRowCount);
} else {
hasUnknownCol = true;
}
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ PhysicalResultSink
----PhysicalDistribute[DistributionSpecGather]
------hashAgg[LOCAL]
--------PhysicalProject
----------PhysicalStorageLayerAggregate[hits]
----------PhysicalOlapScan[hits]

10 changes: 6 additions & 4 deletions regression-test/data/nereids_clickbench_shape_p0/query10.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------PhysicalOlapScan[hits]

12 changes: 7 additions & 5 deletions regression-test/data/nereids_clickbench_shape_p0/query11.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------filter(( not (MobilePhoneModel = '')))
------------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------filter(( not (MobilePhoneModel = '')))
----------------------PhysicalOlapScan[hits]

12 changes: 7 additions & 5 deletions regression-test/data/nereids_clickbench_shape_p0/query12.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------filter(( not (MobilePhoneModel = '')))
------------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------filter(( not (MobilePhoneModel = '')))
----------------------PhysicalOlapScan[hits]

12 changes: 7 additions & 5 deletions regression-test/data/nereids_clickbench_shape_p0/query14.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------filter(( not (SearchPhrase = '')))
------------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------filter(( not (SearchPhrase = '')))
----------------------PhysicalOlapScan[hits]

7 changes: 4 additions & 3 deletions regression-test/data/nereids_clickbench_shape_p0/query16.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[LOCAL]
----------PhysicalProject
------------PhysicalOlapScan[hits]
--------hashAgg[GLOBAL]
----------hashAgg[LOCAL]
------------PhysicalProject
--------------PhysicalOlapScan[hits]

7 changes: 4 additions & 3 deletions regression-test/data/nereids_clickbench_shape_p0/query17.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[LOCAL]
----------PhysicalProject
------------PhysicalOlapScan[hits]
--------hashAgg[GLOBAL]
----------hashAgg[LOCAL]
------------PhysicalProject
--------------PhysicalOlapScan[hits]

7 changes: 4 additions & 3 deletions regression-test/data/nereids_clickbench_shape_p0/query18.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ PhysicalResultSink
--PhysicalLimit[GLOBAL]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalLimit[LOCAL]
--------hashAgg[LOCAL]
----------PhysicalProject
------------PhysicalOlapScan[hits]
--------hashAgg[GLOBAL]
----------hashAgg[LOCAL]
------------PhysicalProject
--------------PhysicalOlapScan[hits]

7 changes: 4 additions & 3 deletions regression-test/data/nereids_clickbench_shape_p0/query19.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[LOCAL]
----------PhysicalProject
------------PhysicalOlapScan[hits]
--------hashAgg[GLOBAL]
----------hashAgg[LOCAL]
------------PhysicalProject
--------------PhysicalOlapScan[hits]

12 changes: 7 additions & 5 deletions regression-test/data/nereids_clickbench_shape_p0/query23.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------filter(( not (SearchPhrase = '')) and ( not (URL like '%.google.%')) and (Title like '%Google%'))
------------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------filter(( not (SearchPhrase = '')) and ( not (URL like '%.google.%')) and (Title like '%Google%'))
----------------------PhysicalOlapScan[hits]

Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ PhysicalResultSink
----PhysicalDistribute[DistributionSpecGather]
------hashAgg[LOCAL]
--------PhysicalProject
----------PhysicalStorageLayerAggregate[hits]
----------PhysicalOlapScan[hits]

15 changes: 7 additions & 8 deletions regression-test/data/nereids_clickbench_shape_p0/query8.out
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ckbench_shape_8 --
PhysicalResultSink
--PhysicalQuickSort[MERGE_SORT]
--PhysicalQuickSort[GATHER_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalQuickSort[LOCAL_SORT]
--------hashAgg[GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------filter(( not (AdvEngineID = 0)))
------------------PhysicalOlapScan[hits]
------hashAgg[GLOBAL]
--------PhysicalDistribute[DistributionSpecHash]
----------hashAgg[LOCAL]
------------PhysicalProject
--------------filter(( not (AdvEngineID = 0)))
----------------PhysicalOlapScan[hits]

10 changes: 6 additions & 4 deletions regression-test/data/nereids_clickbench_shape_p0/query9.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[GLOBAL]
--------hashAgg[DISTINCT_GLOBAL]
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------PhysicalOlapScan[hits]
------------hashAgg[DISTINCT_LOCAL]
--------------hashAgg[GLOBAL]
----------------hashAgg[LOCAL]
------------------PhysicalProject
--------------------PhysicalOlapScan[hits]

40 changes: 20 additions & 20 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query16.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,33 @@ PhysicalResultSink
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
----------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
------------------PhysicalProject
--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number]
--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
--------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
----------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3
----------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2
------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
----------------------------------filter((customer_address.ca_state = 'PA'))
------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_returns]
--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter((customer_address.ca_state = 'PA'))
--------------------------------PhysicalOlapScan[customer_address]
------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------PhysicalProject
----------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
------------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------PhysicalProject
----------------------filter((call_center.cc_county = 'Williamson County'))
------------------------PhysicalOlapScan[call_center]
------------------------------filter((call_center.cc_county = 'Williamson County'))
--------------------------------PhysicalOlapScan[call_center]

Hint log:
Used: leading(catalog_sales { cs1 customer_address date_dim call_center } )
Expand Down
Loading

0 comments on commit a5bf992

Please sign in to comment.