Skip to content

Commit

Permalink
[CALCITE-6214] Remove DISTINCT in COUNT if field is unique
Browse files Browse the repository at this point in the history
  • Loading branch information
JiajunBernoulli committed Jan 21, 2024
1 parent e2c84a6 commit 725ce51
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.util.ImmutableBitSet;

import org.immutables.value.Value;

import java.util.ArrayList;
import java.util.List;

/**
* Planner rule that removes a distinct in count for {@link Aggregate}.
* if relational expression is already distinct.
* (such as {@code COUNT(DISTINCT x)})
*
* @see CoreRules#AGGREGATE_REMOVE_DISTINCT
*/
@Value.Enclosing
public class AggregateRemoveDistinctRule
extends RelRule<AggregateRemoveDistinctRule.Config>
implements SubstitutionRule {

/**
* Creates an AggregateRemoveDistinctRule.
*/
protected AggregateRemoveDistinctRule(Config config) {
super(config);
}

@Override public void onMatch(RelOptRuleCall call) {
final Aggregate topAggregate = call.rel(0);
final Aggregate bottomAggregate = call.rel(1);
// Input must be distinct one column
final ImmutableBitSet groupSet = bottomAggregate.getGroupSet();
if (groupSet.cardinality() != 1) {
return;
}
// Remove `DISTINCT` for `COUNT(DISTINCT $0)`
boolean removed = false;
final List<AggregateCall> newAggregateCallList = new ArrayList<>();
for (AggregateCall aggregateCall : topAggregate.getAggCallList()) {
if (aggregateCall.isDistinct()
&& aggregateCall.getAggregation().getKind() == SqlKind.COUNT
&& aggregateCall.getArgList().size() == 1
&& aggregateCall.getArgList().get(0) < groupSet.cardinality()) {
// Arg0 must be unique key
final AggregateCall newAggregateCall = aggregateCall.withDistinct(false);
newAggregateCallList.add(newAggregateCall);
removed = true;
} else {
newAggregateCallList.add(aggregateCall);
}
}
// Create a new Aggregate if we can remove `DISTINCT`
if (removed) {
final Aggregate newAggregate =
topAggregate.copy(topAggregate.getTraitSet(), bottomAggregate,
topAggregate.getGroupSet(),
topAggregate.getGroupSets(),
newAggregateCallList);
call.transformTo(newAggregate);
}
}


/**
* Rule configuration.
*/
@Value.Immutable
public interface Config extends RelRule.Config {
AggregateRemoveDistinctRule.Config DEFAULT = ImmutableAggregateRemoveDistinctRule
.Config.of()
.withOperandSupplier(b0 ->
b0.operand(Aggregate.class)
.oneInput(b1 ->
b1.operand(Aggregate.class)
.predicate(Aggregate::isSimple)
.anyInputs()))
.as(AggregateRemoveDistinctRule.Config.class);

@Override default AggregateRemoveDistinctRule toRule() {
return new AggregateRemoveDistinctRule(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ private CoreRules() {}
public static final AggregateRemoveRule AGGREGATE_REMOVE =
AggregateRemoveRule.Config.DEFAULT.toRule();

/** Rule that removes a distinct in count for {@link Aggregate}.
* if relational expression is already distinct.
* (such as {@code COUNT(DISTINCT x)})
*/
public static final AggregateRemoveDistinctRule AGGREGATE_REMOVE_DISTINCT =
AggregateRemoveDistinctRule.Config.DEFAULT.toRule();

/** Rule that expands distinct aggregates
* (such as {@code COUNT(DISTINCT x)}) from a
* {@link Aggregate}.
Expand Down
49 changes: 49 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6562,6 +6562,55 @@ private HepProgram getTransitiveProgram() {
.check();
}

@Test void testAggregateDistinctRemove1() {
final String sql = ""
+ "select count(distinct x) cnt\n"
+ "from(\n"
+ " select distinct sal x from emp\n"
+ ") t ";
sql(sql)
.withRule(CoreRules.AGGREGATE_REMOVE_DISTINCT)
.check();
}

@Test void testAggregateDistinctRemove2() {
final String sql = ""
+ "select count(distinct x) cnt\n"
+ "from(\n"
+ " select sal * 12 as x from emp"
+ " group by sal * 12\n"
+ ") t ";
sql(sql)
.withRule(CoreRules.AGGREGATE_REMOVE_DISTINCT)
.check();
}

@Test void testAggregateDistinctRemove3() {
final String sql = ""
+ "select count(distinct x) cnt_x, min(y) as min_y, count(distinct y) as cnt_y\n"
+ "from(\n"
+ " select sal * 12 as x, min(empno) y from emp"
+ " group by sal * 12\n"
+ ") t ";
sql(sql)
.withPreRule(CoreRules.AGGREGATE_PROJECT_MERGE)
.withRule(CoreRules.AGGREGATE_REMOVE_DISTINCT)
.check();
}

@Test void testAggregateDistinctRemove4() {
final String sql = ""
+ "select count(distinct x) cnt_x, min(y) as min_y, count(distinct y) as cnt_y\n"
+ "from(\n"
+ " select max(sal) as x, min(sal) y from emp"
+ " group by deptno\n"
+ ") t ";
sql(sql)
.withPreRule(CoreRules.AGGREGATE_PROJECT_MERGE)
.withRule(CoreRules.AGGREGATE_REMOVE_DISTINCT)
.checkUnchanged();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-2712">[CALCITE-2712]
* Should remove the left join since the aggregate has no call and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,93 @@ LogicalProject(JOB=[$1])
LogicalProject(MGR=[$3])
LogicalFilter(condition=[AND(IS NULL($3), =($2, 'Clerk'))])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testAggregateDistinctRemove1">
<Resource name="sql">
<![CDATA[select count(distinct x) cnt
from(
select distinct sal x from emp
) t ]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalAggregate(group=[{}], CNT=[COUNT(DISTINCT $0)])
LogicalAggregate(group=[{0}])
LogicalProject(X=[$5])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{}], CNT=[COUNT($0)])
LogicalAggregate(group=[{0}])
LogicalProject(X=[$5])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testAggregateDistinctRemove2">
<Resource name="sql">
<![CDATA[select count(distinct x) cnt
from(
select sal * 12 as x from emp group by sal * 12
) t ]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalAggregate(group=[{}], CNT=[COUNT(DISTINCT $0)])
LogicalAggregate(group=[{0}])
LogicalProject(X=[*($5, 12)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{}], CNT=[COUNT($0)])
LogicalAggregate(group=[{0}])
LogicalProject(X=[*($5, 12)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testAggregateDistinctRemove3">
<Resource name="sql">
<![CDATA[select count(distinct x) cnt_x, min(y) as min_y, count(distinct y) as cnt_y
from(
select sal * 12 as x, min(empno) y from emp group by sal * 12
) t ]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalAggregate(group=[{}], CNT_X=[COUNT(DISTINCT $0)], MIN_Y=[MIN($1)], CNT_Y=[COUNT(DISTINCT $1)])
LogicalAggregate(group=[{0}], Y=[MIN($1)])
LogicalProject(X=[*($5, 12)], EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{}], CNT_X=[COUNT($0)], MIN_Y=[MIN($1)], CNT_Y=[COUNT(DISTINCT $1)])
LogicalAggregate(group=[{0}], Y=[MIN($1)])
LogicalProject(X=[*($5, 12)], EMPNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testAggregateDistinctRemove4">
<Resource name="sql">
<![CDATA[select count(distinct x) cnt_x, min(y) as min_y, count(distinct y) as cnt_y
from(
select max(sal) as x, min(sal) y from emp group by deptno
) t ]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalAggregate(group=[{}], CNT_X=[COUNT(DISTINCT $1)], MIN_Y=[MIN($2)], CNT_Y=[COUNT(DISTINCT $2)])
LogicalAggregate(group=[{7}], X=[MAX($5)], Y=[MIN($5)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
Expand Down

0 comments on commit 725ce51

Please sign in to comment.