From 916bd8dff1369c1f701ca6b96d649cecb098d636 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 5 Nov 2024 16:18:21 +0800 Subject: [PATCH] compress_materialize for aggregate and sort --- .../doris/nereids/jobs/executor/Analyzer.java | 2 + .../apache/doris/nereids/rules/RuleType.java | 2 + .../rules/analysis/CompressedMaterialize.java | 160 ++++++++++++++ .../rewrite/PushDownFilterThroughProject.java | 35 +++- .../functions/scalar/EncodeAsBigInt.java | 2 +- .../functions/scalar/EncodeAsInt.java | 2 +- .../functions/scalar/EncodeAsLargeInt.java | 2 +- .../functions/scalar/EncodeAsSmallInt.java | 2 +- .../functions/scalar/EncodeStrToInteger.java | 24 +++ .../plans/physical/PhysicalHashAggregate.java | 7 + .../org/apache/doris/qe/SessionVariable.java | 9 + .../rules/analysis/AnalyzeCTETest.java | 3 +- .../compress_materialize.out | 55 +++++ .../compress_materialize.groovy | 198 ++++++++++++++++++ 14 files changed, 496 insertions(+), 7 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java create mode 100644 regression-test/data/nereids_p0/compress_materialize/compress_materialize.out create mode 100644 regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 894d42642015338..8985dadc0bd2e99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -31,6 +31,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy; import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; +import org.apache.doris.nereids.rules.analysis.CompressedMaterialize; import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; @@ -166,6 +167,7 @@ private static List buildAnalyzerJobs(Optional topDown(new EliminateGroupByConstant()), topDown(new SimplifyAggGroupBy()), + topDown(new CompressedMaterialize()), topDown(new NormalizeAggregate()), topDown(new HavingToFilter()), topDown(new QualifyToFilter()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index dbf96ef2f1f2fbc..beb8bd436557434 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -106,6 +106,8 @@ public enum RuleType { CHECK_DATA_TYPES(RuleTypeClass.CHECK), // rewrite rules + COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE), + COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE), NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE), NORMALIZE_SORT(RuleTypeClass.REWRITE), NORMALIZE_REPEAT(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java new file mode 100644 index 000000000000000..d0e08f34a2cfa4d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.properties.OrderKey; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsBigInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsLargeInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsSmallInt; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.coercion.CharacterType; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.qe.ConnectContext; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * select A from T group by A + * => + * select any_value(A) from T group by encode_as_int(A) + */ +public class CompressedMaterialize implements AnalysisRuleFactory { + @Override + public List buildRules() { + return ImmutableList.of( + RuleType.COMPRESSED_MATERIALIZE_AGG.build( + logicalAggregate().when(a -> ConnectContext.get() != null + && ConnectContext.get().getSessionVariable().enableCompressMaterialize) + .then(this::compressedMaterializeAggregate)), + RuleType.COMPRESSED_MATERIALIZE_SORT.build( + logicalSort().when(a -> ConnectContext.get() != null + && ConnectContext.get().getSessionVariable().enableCompressMaterialize) + .then(this::compressMaterializeSort) + ) + ); + } + + private LogicalSort compressMaterializeSort(LogicalSort sort) { + // List orderExpressions = sort.getOrderKeys().stream() + // .map(OrderKey::getExpr).collect(Collectors.toList()); + List newOrderKeys = Lists.newArrayList(); + boolean changed = false; + for (OrderKey orderKey : sort.getOrderKeys()) { + Expression expr = orderKey.getExpr(); + Optional encode = getEncodeExpression(expr); + if (encode.isPresent()) { + newOrderKeys.add(new OrderKey(encode.get(), + orderKey.isAsc(), + orderKey.isNullFirst())); + changed = true; + } else { + newOrderKeys.add(orderKey); + } + } + return changed ? sort.withOrderKeys(newOrderKeys) : sort; + } + + private Optional getEncodeExpression(Expression expression) { + DataType type = expression.getDataType(); + Expression encodeExpr = null; + if (type instanceof CharacterType) { + CharacterType ct = (CharacterType) type; + if (ct.getLen() > 0) { + // skip column from variant, like 'L.var["L_SHIPMODE"] AS TEXT' + if (ct.getLen() < 2) { + encodeExpr = new EncodeAsSmallInt(expression); + } else if (ct.getLen() < 4) { + encodeExpr = new EncodeAsInt(expression); + } else if (ct.getLen() < 7) { + encodeExpr = new EncodeAsBigInt(expression); + } else if (ct.getLen() < 15) { + encodeExpr = new EncodeAsLargeInt(expression); + } + } + } + return Optional.ofNullable(encodeExpr); + } + + /* + example: + [support] select sum(v) from t group by substring(k, 1,2) + [not support] select substring(k, 1,2), sum(v) from t group by substring(k, 1,2) + [support] select k, sum(v) from t group by k + [not support] select substring(k, 1,2), sum(v) from t group by k + [support] select A as B from T group by A + */ + private Map getEncodeGroupByExpressions(LogicalAggregate aggregate) { + Map encodeGroupbyExpressions = Maps.newHashMap(); + for (Expression gb : aggregate.getGroupByExpressions()) { + Optional encodeExpr = getEncodeExpression(gb); + encodeExpr.ifPresent(expression -> encodeGroupbyExpressions.put(gb, expression)); + } + return encodeGroupbyExpressions; + } + + private LogicalAggregate compressedMaterializeAggregate(LogicalAggregate aggregate) { + Map encodeGroupByExpressions = getEncodeGroupByExpressions(aggregate); + if (!encodeGroupByExpressions.isEmpty()) { + List newGroupByExpressions = Lists.newArrayList(); + for (Expression gp : aggregate.getGroupByExpressions()) { + newGroupByExpressions.add(encodeGroupByExpressions.getOrDefault(gp, gp)); + } + List newOutputs = Lists.newArrayList(); + Map decodeMap = new HashMap<>(); + for (Expression gp : encodeGroupByExpressions.keySet()) { + decodeMap.put(gp, new DecodeAsVarchar(encodeGroupByExpressions.get(gp))); + } + for (NamedExpression out : aggregate.getOutputExpressions()) { + Expression replaced = ExpressionUtils.replace(out, decodeMap); + if (out != replaced) { + if (out instanceof SlotReference) { + newOutputs.add(new Alias(out.getExprId(), replaced, out.getName())); + } else if (out instanceof Alias) { + newOutputs.add(((Alias) out).withChildren(replaced.children())); + } else { + // should not reach here + Preconditions.checkArgument(false, "output abnormal: " + aggregate); + } + } else { + newOutputs.add(out); + } + } + aggregate = aggregate.withGroupByAndOutput(newGroupByExpressions, newOutputs); + } + return aggregate; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index f6f7c2d1100d0be..d40e485ca03f282 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -22,6 +22,8 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; @@ -30,10 +32,12 @@ import org.apache.doris.nereids.util.PlanUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** * Push down filter through project. @@ -81,7 +85,7 @@ private static Plan pushDownFilterThroughProject(LogicalFilter) project.withChildren(new LogicalFilter<>( - ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()), + ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second), project.getAliasToProducer()), project.child())); return PlanUtils.filterOrSelf(splitConjuncts.first, project); } @@ -99,7 +103,7 @@ private static Plan pushDownFilterThroughLimitProject( } project = project.withProjectsAndChild(project.getProjects(), new LogicalFilter<>( - ExpressionUtils.replace(splitConjuncts.second, + ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second), project.getAliasToProducer()), limit.withChildren(project.child()))); return PlanUtils.filterOrSelf(splitConjuncts.first, project); @@ -119,4 +123,31 @@ private static Pair, Set> splitConjunctsByChildOutpu } return Pair.of(remainPredicates, pushDownPredicates); } + + private static Set eliminateDecodeAndEncode(Set expressions) { + return expressions.stream() + .map(PushDownFilterThroughProject::eliminateDecodeAndEncode) + .collect(Collectors.toSet()); + } + + private static Expression eliminateDecodeAndEncode(Expression expression) { + if (expression instanceof DecodeAsVarchar && expression.child(0) instanceof EncodeStrToInteger) { + return expression.child(0).child(0); + } + boolean hasNewChild = false; + List newChildren = Lists.newArrayList(); + for (Expression child : expression.children()) { + Expression replace = eliminateDecodeAndEncode(child); + if (replace != child) { + hasNewChild = true; + newChildren.add(replace); + } else { + newChildren.add(child); + } + } + if (hasNewChild) { + return expression.withChildren(newChildren); + } + return expression; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java index 59a31b4da49753c..7d798ecf3e8cab9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java @@ -34,7 +34,7 @@ * ScalarFunction 'EncodeAsBigInt'. */ public class EncodeAsBigInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java index 30729354379c1ab..5c6382d6ea144da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java @@ -34,7 +34,7 @@ * ScalarFunction 'EncodeAsInt'. */ public class EncodeAsInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java index 7cfce24625771f3..bb30a9a8e8aef53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java @@ -34,7 +34,7 @@ * ScalarFunction 'EncodeAsLargeInt'. */ public class EncodeAsLargeInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(LargeIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java index 0809c935a575e75..355a740197c33ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java @@ -34,7 +34,7 @@ * ScalarFunction 'CompressAsSmallInt'. */ public class EncodeAsSmallInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(SmallIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java new file mode 100644 index 000000000000000..87a9c43687d6a3f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +/** + * Encode_as_XXXInt + */ +public interface EncodeStrToInteger { +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java index 404c30fe379d4a4..2a78b063a973f35 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java @@ -236,6 +236,13 @@ public int hashCode() { aggregateParam, maybeUsingStream, requireProperties); } + public PhysicalHashAggregate withGroupByExpressions(List newGroupByExpressions) { + return new PhysicalHashAggregate<>(newGroupByExpressions, outputExpressions, partitionExpressions, + aggregateParam, maybeUsingStream, groupExpression, getLogicalProperties(), + requireProperties, physicalProperties, statistics, + child()); + } + @Override public PhysicalHashAggregate withChildren(List children) { Preconditions.checkArgument(children.size() == 1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5f031843025274b..ed18d29950b6590 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2118,6 +2118,15 @@ public void setIgnoreShapePlanNodes(String ignoreShapePlanNodes) { needForward = true, fuzzy = true) public boolean enableSortSpill = false; + @VariableMgr.VarAttr( + name = "ENABLE_COMPRESS_MATERIALIZE", + description = {"控制是否启用compress materialize。", + "enable compress-materialize. "}, + needForward = false, fuzzy = false, + varType = VariableAnnotation.EXPERIMENTAL + ) + public boolean enableCompressMaterialize = false; + @VariableMgr.VarAttr( name = ENABLE_AGG_SPILL, description = {"控制是否启用聚合算子落盘。默认为 false。", diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java index a91c0dd47126fcf..e69468e9a282b1a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTETest.java @@ -154,7 +154,8 @@ public void testCTEInHavingAndSubquery() { logicalFilter( logicalProject( logicalJoin( - logicalAggregate(), + logicalProject( + logicalAggregate()), logicalProject() ) ) diff --git a/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out new file mode 100644 index 000000000000000..eee04795628144d --- /dev/null +++ b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out @@ -0,0 +1,55 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !agg_exec -- +aaaaa +bbbbb + +-- !output_contains_gpk -- +aaaaa aaaaa +bbbbb bbbbb + +-- !expr -- +aaa +bbb + +-- !encodeexpr -- +12 +3 + +-- !sort -- +\N 6 + 7 +a 1 +aa 2 +b 4 +b 5 +bb 3 +中 8 +国 9 + +-- !sort -- +国 9 +中 8 +bb 3 +b 4 +b 5 +aa 2 +a 1 + 7 +\N 6 + +-- !sort -- +国 9 +中 8 +bb 3 +b 5 +b 4 +aa 2 +a 1 + 7 +\N 6 + +-- !sort -- +国 9 +中 8 +bb 3 + diff --git a/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy new file mode 100644 index 000000000000000..8489de2aa2a1fd1 --- /dev/null +++ b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("compress_materialize") { + sql """ + drop table if exists compress; + CREATE TABLE `compress` ( + `k` varchar(5) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compress values ("aaaaaa", 1), ("aaaaaa", 2), ("bbbbb", 3), ("bbbbb", 4), ("bbbbb", 5); + + + drop table if exists cmt2; + CREATE TABLE `cmt2` ( + `k2` varchar(5) NOT NULL, + `v2` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k2`) + DISTRIBUTED BY random + PROPERTIES ( + "replication_num" = "1" + ); + + insert into cmt2 values ("aaaa", 1), ("b", 3); + insert into cmt2 values("123456", 123456); + + set ENABLE_COMPRESS_MATERIALIZE = true; + """ + + explain{ + sql (""" + select k from compress group by k; + """) + contains("encode_as_bigint") + } + order_qt_agg_exec "select k from compress group by k;" + + explain{ + sql (""" + select k, substring(k, 1), sum(v) from compress group by k; + """) + contains("encode_as_bigint(k)") + } + order_qt_output_contains_gpk "select k, substring(k, 1) from compress group by k;" + + order_qt_expr """ select substring(k,1,3) from compress group by substring(k,1,3);""" + explain{ + sql "select substring(k,1,3) from compress group by substring(k,1,3);" + contains("encode_as_int(substring(k, 1, 3))") + } + + explain { + sql("select sum(v) from compress group by substring(k, 1, 3);") + contains("group by: encode_as_int(substring(k, 1, 3))") + } + + explain { + sql("select sum(v) from compress group by substring(k, 1, 4);") + contains("group by: encode_as_bigint(substring(k, 1, 4))") + } + + order_qt_encodeexpr "select sum(v) from compress group by substring(k, 1, 3);" + + // TODO: RF targets on compressed_materialze column is broken + // // verify that compressed materialization do not block runtime filter generation + // sql """ + // set disable_join_reorder=true; + // set runtime_filter_mode = GLOBAL; + // set runtime_filter_type=2; + // set enable_runtime_filter_prune=false; + // """ + + // qt_join """ + // explain shape plan + // select * + // from ( + // select k from compress group by k + // ) T join[broadcast] cmt2 on T.k = cmt2.k2; + // """ + + + sql """ + drop table if exists compressInt; + CREATE TABLE `compressInt` ( + `k` varchar(3) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from compressInt group by k" + contains("encode_as_int") + } + + sql """ + drop table if exists compressLargeInt; + CREATE TABLE `compressLargeInt` ( + `k` varchar(10) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressLargeInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from compressLargeInt group by k" + contains("group by: encode_as_largeint(k)") + } + + + sql """ + drop table if exists notcompress; + CREATE TABLE `notcompress` ( + `k` varchar(16) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into notcompress values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from notcompress group by k" + notContains("encode_as_") + } + + sql """ + drop table if exists compressSort; + CREATE TABLE `compressSort` ( + `k` varchar(3) NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressSort values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + insert into compressSort(v) values (6); + insert into compressSort values ("",7), ("中", 8), ("国", 9); + """ + explain { + sql "select v from compressSort order by k" + contains("order by: encode_as_int(k)") +// expect plan fragment: +// 1:VSORT(140) | +// order by: encode_as_int(k)[#5] ASC | +// algorithm: full sort | +// offset: 0 | +// distribute expr lists: + } + qt_sort "select * from compressSort order by k asc, v"; + qt_sort "select * from compressSort order by k desc, v"; + qt_sort "select * from compressSort order by k desc nulls last"; + qt_sort "select * from compressSort order by k desc nulls last, v limit 3"; + +} +