diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 2dfbe1dd0fa6757..55d99e6b50fc7d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -276,7 +276,11 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) { } } for (ScanNode scanNode : context.getScanNodes()) { - Utils.execWithUncheckedException(scanNode::finalizeForNereids); + try { + scanNode.finalizeForNereids(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } } return rootFragment; } @@ -834,6 +838,9 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla .map(context::findSlotRef).collect(Collectors.toList()); dataPartition = new DataPartition(TPartitionType.HASH_PARTITIONED, partitionExprs); } + if (olapScan.getStats() != null) { + olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); + } // TODO: maybe we could have a better way to create fragment PlanFragment planFragment = createPlanFragment(olapScanNode, dataPartition, olapScan); context.addPlanFragment(planFragment); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 652623ef8920002..a51b621eaf7ee26 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -53,6 +53,7 @@ import org.apache.doris.nereids.trees.plans.algebra.OlapScan; import org.apache.doris.nereids.trees.plans.algebra.PartitionTopN; import org.apache.doris.nereids.trees.plans.algebra.Project; +import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.algebra.Repeat; import org.apache.doris.nereids.trees.plans.algebra.SetOperation; import org.apache.doris.nereids.trees.plans.algebra.TopN; @@ -115,6 +116,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalSchemaScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalSink; @@ -328,24 +330,22 @@ private long computeDeltaRowCount(OlapScan olapScan, SlotReference slot) { return deltaRowCount; } - private void adjustColStats(CatalogRelation catalogRelation, SlotReference slot, + private void adjustColStats(OlapScan olapScan, SlotReference slot, ColumnStatisticBuilder builder) { if (builder.getAvgSizeByte() <= 0) { builder.setAvgSizeByte(slot.getDataType().toCatalogDataType().getSlotSize()); } - if (catalogRelation instanceof OlapScan) { - OlapScan olapScan = (OlapScan) catalogRelation; - long delta = computeDeltaRowCount(olapScan, slot); - if (delta > 0) { - builder.setCount(builder.getCount() + delta); - // clear min-max to avoid error estimation - // for example, after yesterday data loaded, user send query about yesterday immediately. - // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer - // estimates the filter result is zero - builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); - } + long delta = computeDeltaRowCount(olapScan, slot); + if (delta > 0) { + builder.setCount(builder.getCount() + delta); + // clear min-max to avoid error estimation + // for example, after yesterday data loaded, user send query about yesterday immediately. + // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer + // estimates the filter result is zero + builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); } + } private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) { @@ -356,12 +356,10 @@ private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRela return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId); } - private ColumnStatistic getColumnStatsFromPartitionCache(CatalogRelation catalogRelation, SlotReference slot, + private ColumnStatistic getColumnStatsFromPartitionCache(OlapScan catalogRelation, SlotReference slot, List partitionNames) { - long idxId = -1; - if (catalogRelation instanceof OlapScan) { - idxId = ((OlapScan) catalogRelation).getSelectedIndexId(); - } + long idxId = catalogRelation.getSelectedIndexId(); + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId, partitionNames); } @@ -399,7 +397,7 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { } } - private Statistics computeOlapScan(LogicalOlapScan olapScan) { + private Statistics computeOlapScan(OlapScan olapScan) { OlapTable olapTable = olapScan.getTable(); double tableRowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId()); if (tableRowCount <= 0) { @@ -416,12 +414,14 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) { // mv is selected, return its estimated stats Optional optStats = cascadesContext.getStatementContext() - .getStatistics(olapScan.getRelationId()); + .getStatistics(((Relation) olapScan).getRelationId()); if (optStats.isPresent()) { - double selectedRowCount = tableRowCount * olapScan.getSelectedPartitionIds().size() - / Math.max(1, olapTable.getPartitions().size()); + double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan); + if (selectedPartitionsRowCount == -1) { + selectedPartitionsRowCount = tableRowCount; + } // if estimated mv rowCount is more than actual row count, fall back to base table stats - if (selectedRowCount > optStats.get().getRowCount()) { + if (selectedPartitionsRowCount > optStats.get().getRowCount()) { return optStats.get(); } } @@ -433,7 +433,7 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { if (StatisticConstants.isSystemTable(olapTable) || !FeConstants.enableInternalSchemaDb || ConnectContext.get() == null || ConnectContext.get().getSessionVariable().internalSession) { - for (Slot slot : olapScan.getOutput()) { + for (Slot slot : ((Plan) olapScan).getOutput()) { builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); } setHasUnknownColStatsInStatementContext(); @@ -444,7 +444,7 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { // for regression shape test if (ConnectContext.get() == null || !ConnectContext.get().getSessionVariable().enableStats) { // get row count from any visible slotReference's colStats - for (Slot slot : olapScan.getOutput()) { + for (Slot slot : ((Plan) olapScan).getOutput()) { builder.putColumnStatistics(slot, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(tableRowCount).build()); } @@ -455,7 +455,7 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { // build Stats for olapScan // if slot is invisible, use UNKNOWN List visibleOutputSlots = new ArrayList<>(); - for (Slot slot : olapScan.getOutput()) { + for (Slot slot : ((Plan) olapScan).getOutput()) { if (isVisibleSlotReference(slot)) { visibleOutputSlots.add((SlotReference) slot); } else { @@ -482,23 +482,20 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { builder.setRowCount(selectedPartitionsRowCount); } else { // if partition row count is invalid (-1), fallback to table stats - // suppose all partitions have the same row count - double estimatedSelectedPartitionsRowCount = tableRowCount * olapScan.getSelectedPartitionIds().size() - / Math.max(1, olapTable.getPartitions().size()); for (SlotReference slot : visibleOutputSlots) { - ColumnStatistic cache = getColumnStatsFromTableCache(olapScan, slot); + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - colStatsBuilder.setCount(estimatedSelectedPartitionsRowCount); + colStatsBuilder.setCount(tableRowCount); adjustColStats(olapScan, slot, colStatsBuilder); builder.putColumnStatistics(slot, colStatsBuilder.build()); } checkIfUnknownStatsUsedAsKey(builder); - builder.setRowCount(estimatedSelectedPartitionsRowCount); + builder.setRowCount(tableRowCount); } } else { // get table level stats for (SlotReference slot : visibleOutputSlots) { - ColumnStatistic cache = getColumnStatsFromTableCache(olapScan, slot); + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); colStatsBuilder.setCount(tableRowCount); adjustColStats(olapScan, slot, colStatsBuilder); @@ -676,7 +673,7 @@ public Statistics visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelat @Override public Statistics visitPhysicalOlapScan(PhysicalOlapScan olapScan, Void context) { - return computeCatalogRelation(olapScan); + return computeOlapScan(olapScan); } @Override @@ -698,7 +695,10 @@ public Statistics visitPhysicalFileScan(PhysicalFileScan fileScan, Void context) @Override public Statistics visitPhysicalStorageLayerAggregate( PhysicalStorageLayerAggregate storageLayerAggregate, Void context) { - return storageLayerAggregate.getRelation().accept(this, context); + PhysicalRelation relation = storageLayerAggregate.getRelation(); + Preconditions.checkArgument(relation instanceof PhysicalOlapScan, "aaaa"); + return relation.accept(this, context); + } @Override @@ -1086,7 +1086,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); colStatsBuilder.setCount(tableRowCount); - adjustColStats(catalogRelation, slot, colStatsBuilder); builder.putColumnStatistics(slot, colStatsBuilder.build()); } checkIfUnknownStatsUsedAsKey(builder); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index ddd6c0f719e7788..dffbba37cfe03bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -1729,8 +1729,6 @@ public String getSelectedIndexName() { public void finalizeForNereids() { computeNumNodes(); computeStatsForNereids(); - // NOTICE: must call here to get selected tablet row count to let block rules work well. - mockRowCountInStatistic(); } private void computeStatsForNereids() { @@ -1738,7 +1736,7 @@ private void computeStatsForNereids() { avgRowSize = totalBytes / (float) cardinality * COMPRESSION_RATIO; capCardinalityAtLimit(); } - // when node scan has no data, cardinality should be 0 instead of a invalid + // when node scan has no data, cardinality should be 0 instead of an invalid // value after computeStats() cardinality = cardinality == -1 ? 0 : cardinality; } diff --git a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy index c1d5626715f8016..77adae92c93b79a 100644 --- a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy @@ -144,6 +144,8 @@ suite("test_count_on_index_httplogs", "p0") { sql """set experimental_enable_nereids_planner=true;""" sql """set enable_fallback_to_original_planner=false;""" sql """analyze table ${testTable_dup} with sync"""; + // wait BE report every partition's row count + sleep(10000) // case1: test duplicate table explain { sql("select COUNT() from ${testTable_dup} where request match 'GET'") diff --git a/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy index afd9d81e5292f9c..7106dfbb08f2a84 100644 --- a/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy @@ -133,11 +133,10 @@ suite("partition_mv_rewrite") { """ - def mv_name = "mv_10086" - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql """DROP TABLE IF EXISTS ${mv_name}""" + sql """DROP MATERIALIZED VIEW IF EXISTS mv_10086""" + sql """DROP TABLE IF EXISTS mv_10086""" sql""" - CREATE MATERIALIZED VIEW ${mv_name} + CREATE MATERIALIZED VIEW mv_10086 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(l_shipdate) DISTRIBUTED BY RANDOM BUCKETS 2 @@ -146,31 +145,40 @@ suite("partition_mv_rewrite") { ${mv_def_sql} """ - waitingMTMVTaskFinished(getJobName(db, mv_name)) + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) + multi_sql """ + analyze table lineitem with sync; + analyze table orders with sync; + analyze table mv_10086 with sync; + """ + sleep(10000) explain { sql("${all_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } explain { sql("${partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } // base table partition data change sql """ insert into lineitem values (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'); """ - waitingPartitionIsExpected("${mv_name}", "p_20231017_20231018", false) + waitingPartitionIsExpected("mv_10086", "p_20231017_20231018", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" order_qt_query_3_0_before "${all_partition_sql}" sql "SET enable_materialized_view_rewrite=true" + sql "analyze table mv_10086 with sync" + def memo = sql "explain memo plan ${all_partition_sql}" + print(memo) explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if sub partition is invalid - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_3_0_after "${all_partition_sql}" @@ -180,19 +188,19 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query invalid partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_4_0_after "${partition_sql}" // base table add partition - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ insert into lineitem values (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-21', '2023-10-21', '2023-10-21', 'a', 'b', 'yyyyyyyyy'); """ - waitingPartitionIsExpected("${mv_name}", "p_20231021_20231022", false) + waitingPartitionIsExpected("mv_10086", "p_20231021_20231022", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" @@ -201,7 +209,7 @@ suite("partition_mv_rewrite") { explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_7_0_after "${all_partition_sql}" @@ -211,17 +219,17 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_8_0_after "${partition_sql}" // base table delete partition test - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ ALTER TABLE lineitem DROP PARTITION IF EXISTS p_20231017 FORCE; """ // show partitions will cause error, tmp comment -// waitingPartitionIsExpected("${mv_name}", "p_20231017_20231018", false) + waitingPartitionIsExpected("mv_10086", "p_20231017_20231018", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" @@ -230,7 +238,7 @@ suite("partition_mv_rewrite") { explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table delete partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_11_0_after "${all_partition_sql}" @@ -240,7 +248,7 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query deleted partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_12_0_after "${partition_sql}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv_10086""" @@ -415,10 +423,10 @@ suite("partition_mv_rewrite") { l_suppkey; """ - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql """DROP TABLE IF EXISTS ${mv_name}""" + sql """DROP MATERIALIZED VIEW IF EXISTS mv_10086""" + sql """DROP TABLE IF EXISTS mv_10086""" sql""" - CREATE MATERIALIZED VIEW ${mv_name} + CREATE MATERIALIZED VIEW mv_10086 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by (date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 @@ -426,7 +434,7 @@ suite("partition_mv_rewrite") { AS ${roll_up_mv_def_sql} """ - waitingMTMVTaskFinished(getJobName(db, mv_name)) + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) multi_sql """ @@ -437,11 +445,11 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_all_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } explain { sql("${roll_up_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } // base table add partition sql """ @@ -462,7 +470,7 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_17_0_after "${roll_up_all_partition_sql}" @@ -472,7 +480,7 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_18_0_after "${roll_up_partition_sql}" @@ -489,8 +497,8 @@ suite("partition_mv_rewrite") { // base table partition add data - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ insert into lineitem values @@ -512,7 +520,7 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_19_0_after "${roll_up_all_partition_sql}" @@ -522,14 +530,14 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_20_0_after "${roll_up_partition_sql}" // base table delete partition - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ ALTER TABLE lineitem DROP PARTITION IF EXISTS p_20231121 FORCE; """ @@ -541,7 +549,7 @@ suite("partition_mv_rewrite") { // explain { // sql("${roll_up_all_partition_sql}") // // should rewrite successful when union rewrite enalbe if base table add new partition -// contains("${mv_name}(${mv_name})") +// contains("mv_10086(mv_10086)") // } // order_qt_query_21_0_after "${roll_up_all_partition_sql}" // @@ -551,7 +559,7 @@ suite("partition_mv_rewrite") { // explain { // sql("${roll_up_partition_sql}") // // should rewrite successfully when union rewrite enable if doesn't query new partition -// contains("${mv_name}(${mv_name})") +// contains("mv_10086(mv_10086)") // } // order_qt_query_22_0_after "${roll_up_partition_sql}" }