Skip to content

Commit

Permalink
add range check
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Dec 11, 2023
1 parent 0914d6e commit 32b9400
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1827,4 +1827,8 @@ private static int getTimeZoneSplitPos(String arg) {
}
return split + 1;
}

public double getDouble() {
return (year * 10000 + month * 100 + day) * 1000000L + hour * 10000 + minute * 100 + second;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

package org.apache.doris.nereids.rules.expression.rules;

import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.PartitionKey;
import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.trees.expressions.Cast;
Expand All @@ -39,6 +41,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
* PartitionPruner
Expand Down Expand Up @@ -115,6 +118,11 @@ public static List<Long> prune(List<Slot> partitionSlots, Expression partitionPr
PartitionTableType partitionTableType) {
partitionPredicate = TryEliminateUninterestedPredicates.rewrite(
partitionPredicate, ImmutableSet.copyOf(partitionSlots), cascadesContext);
if (canRewriteToMinMax(idToPartitions)) {
double rangeLength = totalRangeLength(idToPartitions);
partitionPredicate = rewritePartitionPredicateForRangePartition(
partitionPredicate, rangeLength, cascadesContext);
}

List<OnePartitionEvaluator> evaluators = idToPartitions.entrySet()
.stream()
Expand All @@ -127,6 +135,52 @@ public static List<Long> prune(List<Slot> partitionSlots, Expression partitionPr
return partitionPruner.prune();
}

private static boolean isRangePartition(Map<Long, PartitionItem> idToPartitions) {
if (idToPartitions.isEmpty()) {
return false;
}
PartitionItem item = idToPartitions.values().iterator().next();
return item instanceof RangePartitionItem;
}

private static boolean canRewriteToMinMax(Map<Long, PartitionItem> idToPartitions) {
if (isRangePartition(idToPartitions)) {
RangePartitionItem item = (RangePartitionItem) idToPartitions.values().iterator().next();
if (item == null) {
return false;
}
PartitionKey k1 = item.getItems().upperEndpoint();
if (k1.getKeys().size() != 1) {
return false; // multi keys or hive keys
}
LiteralExpr bound = k1.getKeys().get(0);
if (bound instanceof org.apache.doris.analysis.DateLiteral) {
return true;
}
}
return false;
}

// for range partition, total range is from the end of the first partition to the beginning of the last partition
private static double totalRangeLength(Map<Long, PartitionItem> idToPartitions) {
List<Long> sortedIds = idToPartitions.keySet().stream().sorted().collect(Collectors.toList());
RangePartitionItem firstItem = (RangePartitionItem) idToPartitions.get(sortedIds.get(0));
PartitionKey k1 = firstItem.getItems().upperEndpoint();
org.apache.doris.analysis.DateLiteral d1 = (org.apache.doris.analysis.DateLiteral) k1.getKeys().get(0);
Long lastId = sortedIds.get(sortedIds.size() - 1);
RangePartitionItem lastItem = (RangePartitionItem) idToPartitions.get(lastId);
PartitionKey k2 = lastItem.getItems().lowerEndpoint();
org.apache.doris.analysis.DateLiteral d2 = (org.apache.doris.analysis.DateLiteral) k2.getKeys().get(0);
return d2.getDouble() - d1.getDouble();
}

private static Expression rewritePartitionPredicateForRangePartition(Expression partitionPredicate, double range,
CascadesContext ctx) {
partitionPredicate = OrToIn.INSTANCE.rewrite(partitionPredicate, null);
RewriteRangePartitionPredicate rewriter = new RewriteRangePartitionPredicate(range);
return partitionPredicate.accept(rewriter, ctx);
}

/**
* convert partition item to partition evaluator
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.expression.rules;

import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;

/**
* F = '2022-01-01 00:01:01' or F = 'date2' or ...
* =>
* min_date <= F <= max_date
*/
public class RewriteRangePartitionPredicate extends
DefaultExpressionRewriter<CascadesContext> {
private final double rangeLength;

public RewriteRangePartitionPredicate(double rangeLength) {
this.rangeLength = rangeLength;
}

public static Expression rewrite(Expression expression, double rangeLength, CascadesContext ctx) {
return expression.accept(new RewriteRangePartitionPredicate(rangeLength), ctx);
}

@Override
public Expression visitInPredicate(InPredicate inPredicate, CascadesContext ctx) {
if (inPredicate.getOptions().size() > 10) {
Expression opt0 = inPredicate.getOptions().get(0);
if (opt0 instanceof DateLiteral || opt0 instanceof DateTimeLiteral) {
Literal minOpt = (Literal) inPredicate.getOptions().get(0);
Double minVal = minOpt.getDouble();
Literal maxOpt = (Literal) inPredicate.getOptions().get(0);
Double maxVal = maxOpt.getDouble();
for (int i = 1; i < inPredicate.getOptions().size(); i++) {
Expression opt = inPredicate.getOptions().get(i);
if (!(opt instanceof Literal)) {
return inPredicate;
}
double optValue = ((Literal) opt).getDouble();
if (optValue < minVal) {
minVal = optValue;
minOpt = (Literal) opt;
} else if (optValue > maxVal) {
maxVal = optValue;
maxOpt = (Literal) opt;
}
}
double inToMinMaxThreshold = ctx.getConnectContext()
.getSessionVariable().inToMinmaxParitionRewriteThreshold;
if (((maxVal - minVal) / rangeLength) < inToMinMaxThreshold) {
return new And(
new GreaterThanEqual(inPredicate.getCompareExpr(), minOpt),
new LessThanEqual(inPredicate.getCompareExpr(), maxOpt));
}
}
}
return inPredicate;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,9 @@
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.rules.TryEliminateUninterestedPredicates.Context;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.FloatLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLikeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;

import java.util.Set;
Expand Down Expand Up @@ -69,55 +59,7 @@ public static Expression rewrite(Expression expression, Set<Slot> interestedSlot
expression = expression.accept(new SimplifyNotExprRule(), null);
TryEliminateUninterestedPredicates rewriter = new TryEliminateUninterestedPredicates(
interestedSlots, cascadesContext);
Expression eliminated = expression.accept(rewriter, new Context());
if (cascadesContext.getConnectContext().getSessionVariable().enablePartitionPredicateRewrite) {
Expression toIn = OrToIn.INSTANCE.rewrite(eliminated, null);
return InToMinMax.rewrite(toIn, null);
} else {
return eliminated;
}
}

static class InToMinMax extends DefaultExpressionRewriter<Context> {
public static Expression rewrite(Expression expression, Context ctx) {
return expression.accept(new InToMinMax(), new Context());
}

@Override
public Expression visitInPredicate(InPredicate inPredicate, Context ctx) {
if (inPredicate.getOptions().size() > 10) {
Expression opt0 = inPredicate.getOptions().get(0);
if (opt0 instanceof IntegerLikeLiteral
|| opt0 instanceof DoubleLiteral
|| opt0 instanceof FloatLiteral
|| opt0 instanceof DateLiteral
|| opt0 instanceof DateTimeLiteral) {
Literal minOpt = (Literal) inPredicate.getOptions().get(0);
Double minVal = minOpt.getDouble();
Literal maxOpt = (Literal) inPredicate.getOptions().get(0);
Double maxVal = maxOpt.getDouble();
for (int i = 1; i < inPredicate.getOptions().size(); i++) {
Expression opt = inPredicate.getOptions().get(i);
if (!(opt instanceof Literal)) {
return inPredicate;
}
double optValue = ((Literal) opt).getDouble();
if (optValue < minVal) {
minVal = optValue;
minOpt = (Literal) opt;
} else if (optValue > maxVal) {
maxVal = optValue;
maxOpt = (Literal) opt;
}
}
return new And(
new GreaterThanEqual(inPredicate.getCompareExpr(), minOpt),
new LessThanEqual(inPredicate.getCompareExpr(), maxOpt));
}
}
return inPredicate;
}

return expression.accept(rewriter, new Context());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,8 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
@VariableMgr.VarAttr(name = ENABLE_UNIQUE_KEY_PARTIAL_UPDATE, needForward = true)
public boolean enableUniqueKeyPartialUpdate = false;

@VariableMgr.VarAttr(name = "enable_partition_predicate_rewrite", needForward = true)
public boolean enablePartitionPredicateRewrite = false;
@VariableMgr.VarAttr(name = "in_to_minmax_parition_rewrite_threshold", needForward = true)
public double inToMinmaxParitionRewriteThreshold = 0.2;

// If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
// not the default value set in the code.
Expand Down

0 comments on commit 32b9400

Please sign in to comment.