Skip to content

Commit

Permalink
adjust min/max for partition key
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Oct 11, 2024
1 parent bc9b87d commit 39a0147
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,18 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.MTMV;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.PartitionKey;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
Expand Down Expand Up @@ -141,11 +148,13 @@
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand Down Expand Up @@ -483,6 +492,9 @@ private Statistics computeOlapScan(OlapScan olapScan) {
});
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache = getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
if (slot.getColumn().isPresent()) {
cache = updateMinMaxForPartitionKey(olapTable, selectedPartitionNames, slot, cache);
}
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache,
selectedPartitionsRowCount);
colStatsBuilder.normalizeAvgSizeByte(slot);
Expand All @@ -508,6 +520,125 @@ private Statistics computeOlapScan(OlapScan olapScan) {
return builder.build();
}

private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
if (olapTable.getPartitionType() == PartitionType.LIST) {
cache = updateMinMaxForListPartitionKey(olapTable, selectedPartitionNames, slot, cache);
} else if (olapTable.getPartitionType() == PartitionType.RANGE) {
cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, selectedPartitionNames, slot, cache);
}
return cache;
}

private double convertLegacyLiteralToDouble(LiteralExpr literal) throws AnalysisException {
return StatisticsUtil.convertToDouble(literal.getType(), literal.getStringValue());
}

private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
if (partitionColumnIdx != -1) {
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof ListPartitionItem) {
ListPartitionItem lp = (ListPartitionItem) item;
for (PartitionKey key : lp.getItems()) {
if (minExpr == null) {
minExpr = key.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
maxExpr = key.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
} else {
double current = convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
if (current > maxValue) {
maxValue = current;
maxExpr = key.getKeys().get(partitionColumnIdx);
} else if (current < minValue) {
minValue = current;
minExpr = key.getKeys().get(partitionColumnIdx);
}
}
}
}
}
if (minExpr != null) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
}
} catch (AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
// for multi partition keys, only the first partition key need to adjust min/max
if (partitionColumnIdx == 0) {
// update partition column min/max by partition info
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof RangePartitionItem) {
RangePartitionItem ri = (RangePartitionItem) item;
Range<PartitionKey> range = ri.getItems();
PartitionKey upper = range.upperEndpoint();
PartitionKey lower = range.lowerEndpoint();
if (maxExpr == null) {
maxExpr = upper.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
minExpr = lower.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
} else {
double currentValue = convertLegacyLiteralToDouble(upper.getKeys()
.get(partitionColumnIdx));
if (currentValue > maxValue) {
maxValue = currentValue;
maxExpr = upper.getKeys().get(partitionColumnIdx);
}
currentValue = convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
if (currentValue < minValue) {
minValue = currentValue;
minExpr = lower.getKeys().get(partitionColumnIdx);
}
}
}
}
if (minExpr != null) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
}
} catch (AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

@Override
public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) {
return computeOlapScan(olapScan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class ExplainAction implements SuiteAction {
private boolean verbose = false
private SuiteContext context
private Set<String> containsStrings = new LinkedHashSet<>()
private Set<String> containsAnyStrings = new LinkedHashSet<>()
private Set<String> notContainsStrings = new LinkedHashSet<>()
private Map<String, Integer> multiContainsStrings = new HashMap<>()
private String coonType
Expand All @@ -57,6 +58,10 @@ class ExplainAction implements SuiteAction {
containsStrings.add(subString)
}

void containsAny(String subString) {
containsAnyStrings.add(subString)
}

void multiContains(String subString, int n) {
multiContainsStrings.put(subString, n);
}
Expand Down Expand Up @@ -124,6 +129,18 @@ class ExplainAction implements SuiteAction {
throw t
}
}
boolean any = false;
for (String string : containsAnyStrings) {
if (explainString.contains(string)) {
any = true;
}
}
if (!containsAnyStrings.isEmpty() && !any) {
String msg = ("Explain and check failed, expect contains any '${containsAnyStrings}',"
+ " but actual explain string is:\n${explainString}").toString()
def t = new IllegalStateException(msg)
throw t
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("partition_key_minmax") {
sql """
drop table if exists rangetable;
create table rangetable (a int,
b int,
c int)
partition by range (a, b)
(partition p1 values [("1", "2"), ("10", "20")),
partition p2 values [("20", "100"), ("30", "200")),
partition p3 values [("300", "-1"), ("400", "1000"))
)
distributed by hash(a) properties("replication_num"="1");
insert into rangetable values (5, 3, 0), (22, 150, 1), (333, 1, 2),(6, 1, 3);
analyze table rangetable with sync;
"""
explain {
sql """memo plan
select * from rangetable where a < 250;
"""
containsAny("a#0 -> ndv=3.0000, min=1.000000(1), max=30.000000(30), count=3.0000")
containsAny("a#0 -> ndv=4.0000, min=5.000000(5), max=333.000000(333), count=4.0000")
}

sql """
drop table if exists listtable;
create table listtable(id int, city varchar(20), value int)
PARTITION BY LIST(id, city)
(
PARTITION p1_city VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
PARTITION p2_city VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
PARTITION p3_city VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
)
distributed by hash(id) properties("replication_num"="1");
insert into listtable values (1, "Beijing", 0), (2, "Beijing", 0), (3, "Beijing", 0);
analyze table listtable with sync;
"""

explain {
sql """
memo plan select * from listtable where id >=3;
"""
containsAny("id#0 -> ndv=1.0000, min=3.000000(3), max=3.000000(3), count=1.0000,")
containsAny("id#0 -> ndv=3.0000, min=1.000000(1), max=3.000000(3), count=3.0000,")
}
}

0 comments on commit 39a0147

Please sign in to comment.