diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java index 46fcde0f1f41..c0486d535895 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java @@ -26,9 +26,18 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.utils.BloomFilter64; +import org.apache.hadoop.util.bloom.HashFunction; + import java.util.BitSet; -/** Bloom filter for file index. */ +/** + * Bloom filter for file index. + * + *

Note: This class use {@link BloomFilter64} as a base filter. Store the num hash function (one + * integer) and bit set bytes only. Use {@link HashFunction} to hash the objects, which hash bytes + * type(like varchar, binary, etc.) using xx hash, hash numeric type by specified number hash(see + * http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm). + */ public class BloomFilterFileIndex implements FileIndexer { public static final String BLOOM_FILTER = "bloom"; diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/BloomFilter64.java b/paimon-common/src/main/java/org/apache/paimon/utils/BloomFilter64.java index a620a73aace1..75b8661a2eba 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/BloomFilter64.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/BloomFilter64.java @@ -21,7 +21,7 @@ import java.util.BitSet; /** Bloom filter 64 handle 64 bits hash. */ -public class BloomFilter64 { +public final class BloomFilter64 { private final BitSet bitSet; private final int numBits;