From 92c463862c21d83e947da443da58182ad62ef8e0 Mon Sep 17 00:00:00 2001 From: yejunhao Date: Wed, 3 Apr 2024 11:39:44 +0800 Subject: [PATCH] fix comment --- .../fileindex/bloomfilter/BloomFilter.java | 7 +- .../fileindex/bloomfilter/FastHash.java | 80 +++++++++++-------- .../bloomfilter/HashConverter32.java | 25 ++++++ 3 files changed, 73 insertions(+), 39 deletions(-) create mode 100644 paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/HashConverter32.java diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilter.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilter.java index 1b618ff805528..bbc0180c80de8 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilter.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilter.java @@ -30,7 +30,6 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -import java.util.function.Function; /** Bloom filter for file index. */ public class BloomFilter implements FileIndexer { @@ -39,7 +38,7 @@ public class BloomFilter implements FileIndexer { private final org.apache.paimon.utils.BloomFilter filter; - private final Function hashFunction; + private final HashConverter32 hashFunction; public BloomFilter(DataType type, Options options) { int items = options.getInteger("items", 1_000_000); @@ -68,7 +67,7 @@ private Writer() {} @Override public void write(Object key) { - filter.addHash(hashFunction.apply(key)); + filter.addHash(hashFunction.hash(key)); } @Override @@ -103,7 +102,7 @@ public Reader recoverFrom(byte[] serializedBytes) { @Override public Boolean visitEqual(FieldRef fieldRef, Object key) { - return filter.testHash(hashFunction.apply(key)); + return filter.testHash(hashFunction.hash(key)); } } } diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/FastHash.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/FastHash.java index c730713b0a61e..67d4cb1b0d36d 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/FastHash.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/FastHash.java @@ -44,119 +44,129 @@ import org.apache.paimon.types.VarCharType; import org.apache.paimon.utils.MurmurHashUtils; -import java.util.function.Function; - /** Fast hash for object differs to DataType. */ -public class FastHash implements DataTypeVisitor> { +public class FastHash implements DataTypeVisitor { public static final FastHash INSTANCE = new FastHash(); public static final byte[] NULL_BYTES = new byte[0]; @Override - public Function visit(CharType charType) { + public HashConverter32 visit(CharType charType) { return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : ((BinaryString) o).toBytes()); } @Override - public Function visit(VarCharType varCharType) { + public HashConverter32 visit(VarCharType varCharType) { return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : ((BinaryString) o).toBytes()); } @Override - public Function visit(BooleanType booleanType) { - return o -> - MurmurHashUtils.hashBytes( - o == null - ? NULL_BYTES - : ((Boolean) o) ? new byte[] {0x01} : new byte[] {0x00}); + public HashConverter32 visit(BooleanType booleanType) { + throw new UnsupportedOperationException("Doesn't support type boolean"); } @Override - public Function visit(BinaryType binaryType) { + public HashConverter32 visit(BinaryType binaryType) { return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : (byte[]) o); } @Override - public Function visit(VarBinaryType varBinaryType) { + public HashConverter32 visit(VarBinaryType varBinaryType) { return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : (byte[]) o); } @Override - public Function visit(DecimalType decimalType) { - return o -> o == null ? 0 : getLongHash(((Decimal) o).toUnscaledLong()); + public HashConverter32 visit(DecimalType decimalType) { + return o -> + o == null + ? 0 + : getLongHash( + Double.doubleToLongBits( + ((Decimal) o).toBigDecimal().doubleValue())); } @Override - public Function visit(TinyIntType tinyIntType) { + public HashConverter32 visit(TinyIntType tinyIntType) { return o -> o == null ? 0 : getLongHash((byte) o); } @Override - public Function visit(SmallIntType smallIntType) { + public HashConverter32 visit(SmallIntType smallIntType) { return o -> o == null ? 0 : getLongHash((short) o); } @Override - public Function visit(IntType intType) { + public HashConverter32 visit(IntType intType) { return o -> o == null ? 0 : getLongHash((int) o); } @Override - public Function visit(BigIntType bigIntType) { + public HashConverter32 visit(BigIntType bigIntType) { return o -> o == null ? 0 : getLongHash((long) o); } @Override - public Function visit(FloatType floatType) { + public HashConverter32 visit(FloatType floatType) { return o -> o == null ? 0 : getLongHash(Float.floatToIntBits((float) o)); } @Override - public Function visit(DoubleType doubleType) { + public HashConverter32 visit(DoubleType doubleType) { return o -> o == null ? 0 : getLongHash(Double.doubleToLongBits((double) o)); } @Override - public Function visit(DateType dateType) { + public HashConverter32 visit(DateType dateType) { return o -> o == null ? 0 : getLongHash((int) o); } @Override - public Function visit(TimeType timeType) { + public HashConverter32 visit(TimeType timeType) { return o -> o == null ? 0 : getLongHash((int) o); } @Override - public Function visit(TimestampType timestampType) { - return o -> o == null ? 0 : getLongHash(((Timestamp) o).getMillisecond()); + public HashConverter32 visit(TimestampType timestampType) { + // same as orc + return o -> + o == null + ? 0 + : getLongHash( + ((Timestamp) o).getMillisecond() + + ((Timestamp) o).getNanoOfMillisecond() / 1_000_000); } @Override - public Function visit(LocalZonedTimestampType localZonedTimestampType) { - return o -> o == null ? 0 : getLongHash(((Timestamp) o).getMillisecond()); + public HashConverter32 visit(LocalZonedTimestampType localZonedTimestampType) { + return o -> + o == null + ? 0 + : getLongHash( + ((Timestamp) o).getMillisecond() + + ((Timestamp) o).getNanoOfMillisecond() / 1_000_000); } @Override - public Function visit(ArrayType arrayType) { - throw new UnsupportedOperationException(); + public HashConverter32 visit(ArrayType arrayType) { + throw new UnsupportedOperationException("Does not support type array"); } @Override - public Function visit(MultisetType multisetType) { - throw new UnsupportedOperationException(); + public HashConverter32 visit(MultisetType multisetType) { + throw new UnsupportedOperationException("Does not support type mutiset"); } @Override - public Function visit(MapType mapType) { - throw new UnsupportedOperationException(); + public HashConverter32 visit(MapType mapType) { + throw new UnsupportedOperationException("Does not support type map"); } @Override - public Function visit(RowType rowType) { - throw new UnsupportedOperationException(); + public HashConverter32 visit(RowType rowType) { + throw new UnsupportedOperationException("Does not support type row"); } // Thomas Wang's integer hash function diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/HashConverter32.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/HashConverter32.java new file mode 100644 index 0000000000000..645bd115c0d27 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/HashConverter32.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.fileindex.bloomfilter; + +/** Hash object to 32 bit hash code. */ +public interface HashConverter32 { + + int hash(Object o); +}