From dae577a3ba52bd7b339f386752c58c43715157eb Mon Sep 17 00:00:00 2001 From: "wenchao.wu" Date: Tue, 19 Mar 2024 21:01:35 +0800 Subject: [PATCH] [PAIMON-3038] add initial map size to solve load index slowly. --- .../org/apache/paimon/utils/Int2ShortHashMap.java | 4 ++++ .../org/apache/paimon/index/PartitionIndex.java | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java index a271bee6636f..453f04974ab5 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/Int2ShortHashMap.java @@ -29,6 +29,10 @@ public Int2ShortHashMap() { this.map = new Int2ShortOpenHashMap(); } + public Int2ShortHashMap(int capacity) { + this.map = new Int2ShortOpenHashMap(capacity); + } + public void put(int key, short value) { map.put(key, value); } diff --git a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java index 28428aa2d22f..e9667cfe380e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java +++ b/paimon-core/src/main/java/org/apache/paimon/index/PartitionIndex.java @@ -22,6 +22,7 @@ import org.apache.paimon.manifest.IndexManifestEntry; import org.apache.paimon.utils.Int2ShortHashMap; import org.apache.paimon.utils.IntIterator; +import org.apache.paimon.utils.MathUtils; import java.io.EOFException; import java.io.IOException; @@ -111,8 +112,8 @@ public static PartitionIndex loadIndex( long targetBucketRowNumber, IntPredicate loadFilter, IntPredicate bucketFilter) { - Int2ShortHashMap map = new Int2ShortHashMap(); List files = indexFileHandler.scan(HASH_INDEX, partition); + Int2ShortHashMap map = new Int2ShortHashMap(calculateInitialMapSize(files)); Map buckets = new HashMap<>(); for (IndexManifestEntry file : files) { try (IntIterator iterator = indexFileHandler.readHashIndex(file.indexFile())) { @@ -137,4 +138,14 @@ public static PartitionIndex loadIndex( } return new PartitionIndex(map, buckets, targetBucketRowNumber); } + + private static int calculateInitialMapSize(List files) { + long size = 16; + for (IndexManifestEntry file : files) { + size = Math.max(size, file.indexFile().rowCount()); + } + return MathUtils.isPowerOf2(size) + ? (int) size + : MathUtils.roundDownToPowerOf2((int) size) * 2; + } }