Skip to content

Commit

Permalink
[fix](nereids)the column name should be case insensitive in tablet pr…
Browse files Browse the repository at this point in the history
…une (apache#44064)

```
CREATE TABLE `t_customers_wide_index_1` (   `CUSTOMER_ID` int NULL,   `ADDRESS` varchar(1500) NULL)DUPLICATE KEY(`CUSTOMER_ID`) DISTRIBUTED BY HASH(`CUSTOMER_ID`) BUCKETS 32;
insert into t_customers_wide_index values (1, "111");

explain SELECT * from t_customers_wide_index WHERE customer_id = 1817422; 
```
before this pr, the tablet prune doesn't work
`tablets=32/32, tabletList=1451717,1451719,1451721 ...`
after this pr, the unused tablet is pruned like bellow:
`tablets=1/32, tabletList=1451767 `
  • Loading branch information
starocean999 authored Nov 18, 2024
1 parent 03bc051 commit f74610c
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.Maps;
import org.apache.commons.collections.map.CaseInsensitiveMap;

import java.util.Collection;
import java.util.HashSet;
Expand Down Expand Up @@ -80,7 +80,7 @@ private Collection<Long> getSelectedTabletIds(Set<Expression> expressions,
return index.getTabletIdsInOrder();
}
HashDistributionInfo hashInfo = (HashDistributionInfo) info;
Map<String, PartitionColumnFilter> filterMap = Maps.newHashMap();
Map<String, PartitionColumnFilter> filterMap = new CaseInsensitiveMap();
expressions.stream().map(ExpressionUtils::checkAndMaybeCommute).filter(Optional::isPresent)
.forEach(expr -> new ExpressionColumnFilterConverter(filterMap).convert(expr.get()));
return new HashDistributionPruner(index.getTabletIdsInOrder(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeRangeSet;
import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -89,7 +90,7 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator {
protected static final int NUM_SPLITTERS_ON_FLIGHT = Config.max_external_cache_loader_thread_pool_size;
protected final TupleDescriptor desc;
// for distribution prunner
protected Map<String, PartitionColumnFilter> columnFilters = Maps.newHashMap();
protected Map<String, PartitionColumnFilter> columnFilters = new CaseInsensitiveMap();
// Use this if partition_prune_algorithm_version is 2.
protected Map<String, ColumnRange> columnNameToRange = Maps.newHashMap();
protected String sortColumn = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.apache.doris.catalog.PrimitiveType;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -84,29 +84,29 @@ public void test() {
inList4.add(new StringLiteral("2"));
shopTypeFilter.setInPredicate(new InPredicate(new SlotRef(null, "shop_type"), inList4, false));

Map<String, PartitionColumnFilter> filters = Maps.newHashMap();
filters.put("dealDate", dealDatefilter);
filters.put("main_brand_id", mainBrandFilter);
filters.put("item_third_cate_id", itemThirdFilter);
filters.put("channel", channelFilter);
filters.put("shop_type", shopTypeFilter);
Map<String, PartitionColumnFilter> filters = new CaseInsensitiveMap();
filters.put("DEALDATE", dealDatefilter);
filters.put("MAIN_BRAND_ID", mainBrandFilter);
filters.put("ITEM_THIRD_CATE_ID", itemThirdFilter);
filters.put("CHANNEL", channelFilter);
filters.put("SHOP_TYPE", shopTypeFilter);

HashDistributionPruner pruner = new HashDistributionPruner(tabletIds, columns, filters, tabletIds.size(), true);

Collection<Long> results = pruner.prune();
// 20 = 1 * 5 * 2 * 2 * 1 (element num of each filter)
Assert.assertEquals(20, results.size());

filters.get("shop_type").getInPredicate().addChild(new StringLiteral("4"));
filters.get("SHOP_TYPE").getInPredicate().addChild(new StringLiteral("4"));
results = pruner.prune();
// 40 = 1 * 5 * 2 * 2 * 2 (element num of each filter)
// 39 is because these is hash conflict
Assert.assertEquals(39, results.size());

filters.get("shop_type").getInPredicate().addChild(new StringLiteral("5"));
filters.get("shop_type").getInPredicate().addChild(new StringLiteral("6"));
filters.get("shop_type").getInPredicate().addChild(new StringLiteral("7"));
filters.get("shop_type").getInPredicate().addChild(new StringLiteral("8"));
filters.get("SHOP_TYPE").getInPredicate().addChild(new StringLiteral("5"));
filters.get("SHOP_TYPE").getInPredicate().addChild(new StringLiteral("6"));
filters.get("SHOP_TYPE").getInPredicate().addChild(new StringLiteral("7"));
filters.get("SHOP_TYPE").getInPredicate().addChild(new StringLiteral("8"));
results = pruner.prune();
// 120 = 1 * 5 * 2 * 2 * 6 (element num of each filter) > 100
Assert.assertEquals(300, results.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -62,8 +63,8 @@ public void testHashDistributionOneUser() throws AnalysisException {

PartitionColumnFilter columnFilter = new PartitionColumnFilter();
columnFilter.setInPredicate(inPredicate);
Map<String, PartitionColumnFilter> filterMap = Maps.newHashMap();
filterMap.put("columnA", columnFilter);
Map<String, PartitionColumnFilter> filterMap = new CaseInsensitiveMap();
filterMap.put("COLUMNA", columnFilter);

DistributionPruner partitionPruner = new HashDistributionPruner(
partitions,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_tablet_prune") {
sql "SET enable_nereids_planner=true"
sql "SET enable_fallback_to_original_planner=false"

sql "drop table if exists t_customers_wide_index"
sql """
CREATE TABLE `t_customers_wide_index` ( `CUSTOMER_ID` int NULL, `ADDRESS` varchar(1500) NULL) ENGINE=OLAP UNIQUE KEY(`CUSTOMER_ID`) DISTRIBUTED BY HASH(`CUSTOMER_ID`) BUCKETS 32 PROPERTIES ( "file_cache_ttl_seconds" = "0", "is_being_synced" = "false", "storage_medium" = "hdd", "storage_format" = "V2", "inverted_index_storage_format" = "V2", "enable_unique_key_merge_on_write" = "true", "light_schema_change" = "true", "store_row_column" = "true", "row_store_page_size" = "16384", "disable_auto_compaction" = "false", "enable_single_replica_compaction" = "false", "group_commit_interval_ms" = "10000", "group_commit_data_bytes" = "134217728", "enable_mow_light_delete" = "false" ); """
sql """
insert into t_customers_wide_index values (1, "111");
"""
explain {
sql("SELECT * from t_customers_wide_index WHERE customer_id = 1817422;")
contains "tablets=1/32"
}
}

0 comments on commit f74610c

Please sign in to comment.