Skip to content

Commit

Permalink
support encode/decode
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Dec 16, 2024
1 parent a9a1a40 commit 1eba32e
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ private static List<RewriteJob> buildAnalyzerJobs(Optional<CustomTableResolver>
topDown(new EliminateGroupByConstant()),

topDown(new SimplifyAggGroupBy()),
topDown(new CompressedMaterialize()),
bottomUp(new CompressedMaterialize()),
topDown(new NormalizeAggregate()),
topDown(new HavingToFilter()),
topDown(new QualifyToFilter()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@
import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows;
import org.apache.doris.nereids.rules.rewrite.ReorderJoin;
import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren;
import org.apache.doris.nereids.rules.rewrite.SimplifyEncodeDecode;
import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression;
import org.apache.doris.nereids.rules.rewrite.SplitLimit;
import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite;
Expand Down Expand Up @@ -371,6 +372,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
// generate one PhysicalLimit if current distribution is gather or two
// PhysicalLimits with gather exchange
topDown(new LimitSortToTopN()),
topDown(new SimplifyEncodeDecode()),
topDown(new LimitAggToTopNAgg()),
topDown(new MergeTopNs()),
topDown(new SplitLimit()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public enum RuleType {
// rewrite rules
COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE),
COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE),
SIMPLIFY_ENCODE_DECODE(RuleTypeClass.REWRITE),
NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE),
NORMALIZE_SORT(RuleTypeClass.REWRITE),
NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,26 @@ public List<Rule> buildRules() {

private LogicalSort<Plan> compressMaterializeSort(LogicalSort<Plan> sort) {
List<OrderKey> newOrderKeys = Lists.newArrayList();
boolean changed = false;
List<Expression> orderKeysToEncode = Lists.newArrayList();
for (OrderKey orderKey : sort.getOrderKeys()) {
Expression expr = orderKey.getExpr();
Optional<Expression> encode = getEncodeExpression(expr);
if (encode.isPresent()) {
newOrderKeys.add(new OrderKey(encode.get(),
orderKey.isAsc(),
orderKey.isNullFirst()));
changed = true;
orderKeysToEncode.add(expr);
} else {
newOrderKeys.add(orderKey);
}
}
return changed ? sort.withOrderKeys(newOrderKeys) : sort;
if (orderKeysToEncode.isEmpty()) {
return sort;
} else {
sort = sort.withOrderKeys(newOrderKeys);
return sort;
}

}

private Optional<Expression> getEncodeExpression(Expression expression) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
Expand All @@ -32,7 +35,9 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -68,7 +73,6 @@ public List<Rule> buildRules() {
&& ConnectContext.get().getSessionVariable().pushTopnToAgg
&& ConnectContext.get().getSessionVariable().topnOptLimitThreshold
>= limit.getLimit() + limit.getOffset())
.when(limit -> limit.child().isAllSlots())
.when(limit -> {
LogicalAggregate<? extends Plan> agg = limit.child().child();
return !agg.getGroupByExpressions().isEmpty();
Expand Down Expand Up @@ -109,18 +113,49 @@ public List<Rule> buildRules() {
&& ConnectContext.get().getSessionVariable().pushTopnToAgg
&& ConnectContext.get().getSessionVariable().topnOptLimitThreshold
>= topn.getLimit() + topn.getOffset())
.when(topn -> topn.child().isAllSlots())
.when(topn -> {
LogicalAggregate<? extends Plan> agg = topn.child().child();
return !agg.getGroupByExpressions().isEmpty();
})
.then(topn -> {
LogicalTopN originTopn = topn;
LogicalProject<? extends Plan> project = topn.child();
LogicalAggregate<? extends Plan> agg = (LogicalAggregate) project.child();
if (!project.isAllSlots()) {
/*
topn(orderKey=[a])
+-->project(b as a)
+--> agg(groupKey[b]
=>
topn(orderKey=[b])
+-->project(b as a)
+-->agg(groupKey[b])
and then exchange topn and project
*/
Map<SlotReference, SlotReference> keyAsKey = new HashMap<>();
for (NamedExpression e : project.getProjects()) {
if (e instanceof Alias && e.child(0) instanceof SlotReference) {
keyAsKey.put((SlotReference) e.toSlot(), (SlotReference) e.child(0));
}
}
List<OrderKey> projectOrderKeys = Lists.newArrayList();
boolean hasNew = false;
for (OrderKey orderKey : topn.getOrderKeys()) {
if (keyAsKey.containsKey(orderKey.getExpr())) {
projectOrderKeys.add(orderKey.withExpression(keyAsKey.get(orderKey.getExpr())));
hasNew = true;
} else {
projectOrderKeys.add(orderKey);
}
}
if (hasNew) {
topn = (LogicalTopN) topn.withOrderKeys(projectOrderKeys);
}
}
Pair<List<OrderKey>, List<Expression>> pair =
supplementOrderKeyByGroupKeyIfCompatible(topn, agg);
if (pair == null) {
return topn;
return originTopn;
} else {
agg = agg.withGroupBy(pair.second);
topn = (LogicalTopN) topn.withChildren(agg);
Expand All @@ -138,6 +173,14 @@ private List<OrderKey> generateOrderKeyByGroupKey(LogicalAggregate<? extends Pla
.collect(Collectors.toList());
}

/**
* compatible: if order key is subset of group by keys
* example:
* 1. orderKey[a, b], groupKeys[b, a, c]
* compatible, return Pair(orderKey[a, b, c], groupKey[a, b, c])
* 2. orderKey[a, b+1], groupKeys[a, b]
* not compatible, return null
*/
private Pair<List<OrderKey>, List<Expression>> supplementOrderKeyByGroupKeyIfCompatible(
LogicalTopN<? extends Plan> topn, LogicalAggregate<? extends Plan> agg) {
Set<Expression> groupKeySet = Sets.newHashSet(agg.getGroupByExpressions());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import java.util.List;

/**
* project (..., encode(decode(A)) as B, ...)
* =>
* project (..., A as B,...)
*/
public class SimplifyEncodeDecode implements RewriteRuleFactory {

@Override
public List<Rule> buildRules() {
return ImmutableList.of(
RuleType.SIMPLIFY_ENCODE_DECODE.build(
logicalProject()
.then(project -> {
List<NamedExpression> newProjections =
Lists.newArrayListWithCapacity(project.getProjects().size());
boolean changed = false;
for (NamedExpression namedExpression : project.getProjects()) {
if (namedExpression instanceof Alias
&& namedExpression.child(0) instanceof EncodeStrToInteger
&& namedExpression.child(0).child(0)
instanceof DecodeAsVarchar) {
Alias alias = (Alias) namedExpression;
Expression body = namedExpression.child(0)
.child(0).child(0);
newProjections.add((Alias) alias.withChildren(body));
changed = true;
} else {
newProjections.add(namedExpression);
}
}
return changed ? project.withProjects(newProjections) : project;
})
)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.nereids.trees.plans.algebra;

import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
Expand Down Expand Up @@ -108,6 +109,18 @@ default boolean isAllSlots() {
return true;
}

/**
* project(A as B) is eventually slot project, where A is a slot
*/
default boolean isEventuallyAllSlots() {
for (NamedExpression project : getProjects()) {
if (!project.isSlot() && !(project instanceof Alias && project.child(0) instanceof Slot)) {
return false;
}
}
return true;
}

/** containsNoneMovableFunction */
default boolean containsNoneMovableFunction() {
for (NamedExpression expression : getProjects()) {
Expand Down
26 changes: 13 additions & 13 deletions regression-test/data/query_p0/limit/test_group_by_limit.out
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select --
-- !select1 --
253967024 8491 AIR
259556658 8641 FOB
260402265 8669 MAIL

-- !select --
-- !select2 --
449872500 15000 1
386605746 12900 2
320758616 10717 3

-- !select --
-- !select3 --
198674527 6588 0.0
198679731 6563 0.01
198501055 6622 0.02

-- !select --
-- !select4 --
27137 1 1992-02-02
45697 1 1992-02-04
114452 5 1992-02-05

-- !select --
-- !select5 --
27137 1 1992-02-02T00:00
45697 1 1992-02-04T00:00
114452 5 1992-02-05T00:00

-- !select --
-- !select6 --
139015016 4632 1
130287219 4313 2
162309750 5334 3

-- !select --
-- !select7 --
64774969 2166 AIR 1
54166166 1804 AIR 2
45538267 1532 AIR 3

-- !select --
-- !select8 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02

-- !select --
-- !select9 --
7618 1 AIR 1 0.0 1992-02-06
2210 1 AIR 1 0.0 1992-03-24
16807 1 AIR 1 0.0 1992-03-29

-- !select --
-- !select10 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02

-- !select --
-- !select11 --
6882631 228 AIR 1 0.0
6756423 228 AIR 1 0.01
7920028 254 AIR 1 0.02

-- !select --
-- !select12 --
7707018 238 TRUCK 1 0.0
7467045 233 TRUCK 1 0.01
6927206 245 TRUCK 1 0.02

-- !select --
-- !select13 --
7661562 249 TRUCK 1 0.08
6673139 228 TRUCK 1 0.07
8333862 265 TRUCK 1 0.06
Expand Down
Loading

0 comments on commit 1eba32e

Please sign in to comment.