diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/FileBasedBloomFilter.java b/paimon-common/src/main/java/org/apache/paimon/utils/FileBasedBloomFilter.java index 541185e7bae6..06edd34e9002 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/FileBasedBloomFilter.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/FileBasedBloomFilter.java @@ -31,12 +31,16 @@ /** Util to apply a built bloom filter . */ public class FileBasedBloomFilter { + private static final int FORCE_REFRESH_CACHE = 30; + private final RandomAccessFile file; private final CacheManager cacheManager; private final BloomFilter filter; private final long readOffset; private final int readLength; + private int refreshCount; + public FileBasedBloomFilter( RandomAccessFile file, CacheManager cacheManager, @@ -49,10 +53,14 @@ public FileBasedBloomFilter( this.filter = new BloomFilter(numRecords, readLength); this.readOffset = readOffset; this.readLength = readLength; + this.refreshCount = 0; } public boolean testHash(int hash) { - if (filter.getMemorySegment() == null) { + refreshCount++; + // we should refresh cache in LRU, but we cannot refresh everytime, it is costly. + // so we introduce a refresh count to reduce refresh + if (refreshCount == FORCE_REFRESH_CACHE || filter.getMemorySegment() == null) { MemorySegment segment = cacheManager.getPage( file, @@ -60,6 +68,7 @@ public boolean testHash(int hash) { readLength, (position, length) -> filter.unsetMemorySegment()); filter.setMemorySegment(segment, 0); + refreshCount = 0; } return filter.testHash(hash); }