Skip to content

Commit

Permalink
fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
leaves12138 committed Apr 3, 2024
1 parent 90beffa commit 92c4638
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.function.Function;

/** Bloom filter for file index. */
public class BloomFilter implements FileIndexer {
Expand All @@ -39,7 +38,7 @@ public class BloomFilter implements FileIndexer {

private final org.apache.paimon.utils.BloomFilter filter;

private final Function<Object, Integer> hashFunction;
private final HashConverter32 hashFunction;

public BloomFilter(DataType type, Options options) {
int items = options.getInteger("items", 1_000_000);
Expand Down Expand Up @@ -68,7 +67,7 @@ private Writer() {}

@Override
public void write(Object key) {
filter.addHash(hashFunction.apply(key));
filter.addHash(hashFunction.hash(key));
}

@Override
Expand Down Expand Up @@ -103,7 +102,7 @@ public Reader recoverFrom(byte[] serializedBytes) {

@Override
public Boolean visitEqual(FieldRef fieldRef, Object key) {
return filter.testHash(hashFunction.apply(key));
return filter.testHash(hashFunction.hash(key));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,119 +44,129 @@
import org.apache.paimon.types.VarCharType;
import org.apache.paimon.utils.MurmurHashUtils;

import java.util.function.Function;

/** Fast hash for object differs to DataType. */
public class FastHash implements DataTypeVisitor<Function<Object, Integer>> {
public class FastHash implements DataTypeVisitor<HashConverter32> {

public static final FastHash INSTANCE = new FastHash();

public static final byte[] NULL_BYTES = new byte[0];

@Override
public Function<Object, Integer> visit(CharType charType) {
public HashConverter32 visit(CharType charType) {
return o ->
MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : ((BinaryString) o).toBytes());
}

@Override
public Function<Object, Integer> visit(VarCharType varCharType) {
public HashConverter32 visit(VarCharType varCharType) {
return o ->
MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : ((BinaryString) o).toBytes());
}

@Override
public Function<Object, Integer> visit(BooleanType booleanType) {
return o ->
MurmurHashUtils.hashBytes(
o == null
? NULL_BYTES
: ((Boolean) o) ? new byte[] {0x01} : new byte[] {0x00});
public HashConverter32 visit(BooleanType booleanType) {
throw new UnsupportedOperationException("Doesn't support type boolean");
}

@Override
public Function<Object, Integer> visit(BinaryType binaryType) {
public HashConverter32 visit(BinaryType binaryType) {
return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : (byte[]) o);
}

@Override
public Function<Object, Integer> visit(VarBinaryType varBinaryType) {
public HashConverter32 visit(VarBinaryType varBinaryType) {
return o -> MurmurHashUtils.hashBytes(o == null ? NULL_BYTES : (byte[]) o);
}

@Override
public Function<Object, Integer> visit(DecimalType decimalType) {
return o -> o == null ? 0 : getLongHash(((Decimal) o).toUnscaledLong());
public HashConverter32 visit(DecimalType decimalType) {
return o ->
o == null
? 0
: getLongHash(
Double.doubleToLongBits(
((Decimal) o).toBigDecimal().doubleValue()));
}

@Override
public Function<Object, Integer> visit(TinyIntType tinyIntType) {
public HashConverter32 visit(TinyIntType tinyIntType) {
return o -> o == null ? 0 : getLongHash((byte) o);
}

@Override
public Function<Object, Integer> visit(SmallIntType smallIntType) {
public HashConverter32 visit(SmallIntType smallIntType) {
return o -> o == null ? 0 : getLongHash((short) o);
}

@Override
public Function<Object, Integer> visit(IntType intType) {
public HashConverter32 visit(IntType intType) {
return o -> o == null ? 0 : getLongHash((int) o);
}

@Override
public Function<Object, Integer> visit(BigIntType bigIntType) {
public HashConverter32 visit(BigIntType bigIntType) {
return o -> o == null ? 0 : getLongHash((long) o);
}

@Override
public Function<Object, Integer> visit(FloatType floatType) {
public HashConverter32 visit(FloatType floatType) {
return o -> o == null ? 0 : getLongHash(Float.floatToIntBits((float) o));
}

@Override
public Function<Object, Integer> visit(DoubleType doubleType) {
public HashConverter32 visit(DoubleType doubleType) {
return o -> o == null ? 0 : getLongHash(Double.doubleToLongBits((double) o));
}

@Override
public Function<Object, Integer> visit(DateType dateType) {
public HashConverter32 visit(DateType dateType) {
return o -> o == null ? 0 : getLongHash((int) o);
}

@Override
public Function<Object, Integer> visit(TimeType timeType) {
public HashConverter32 visit(TimeType timeType) {
return o -> o == null ? 0 : getLongHash((int) o);
}

@Override
public Function<Object, Integer> visit(TimestampType timestampType) {
return o -> o == null ? 0 : getLongHash(((Timestamp) o).getMillisecond());
public HashConverter32 visit(TimestampType timestampType) {
// same as orc
return o ->
o == null
? 0
: getLongHash(
((Timestamp) o).getMillisecond()
+ ((Timestamp) o).getNanoOfMillisecond() / 1_000_000);
}

@Override
public Function<Object, Integer> visit(LocalZonedTimestampType localZonedTimestampType) {
return o -> o == null ? 0 : getLongHash(((Timestamp) o).getMillisecond());
public HashConverter32 visit(LocalZonedTimestampType localZonedTimestampType) {
return o ->
o == null
? 0
: getLongHash(
((Timestamp) o).getMillisecond()
+ ((Timestamp) o).getNanoOfMillisecond() / 1_000_000);
}

@Override
public Function<Object, Integer> visit(ArrayType arrayType) {
throw new UnsupportedOperationException();
public HashConverter32 visit(ArrayType arrayType) {
throw new UnsupportedOperationException("Does not support type array");
}

@Override
public Function<Object, Integer> visit(MultisetType multisetType) {
throw new UnsupportedOperationException();
public HashConverter32 visit(MultisetType multisetType) {
throw new UnsupportedOperationException("Does not support type mutiset");
}

@Override
public Function<Object, Integer> visit(MapType mapType) {
throw new UnsupportedOperationException();
public HashConverter32 visit(MapType mapType) {
throw new UnsupportedOperationException("Does not support type map");
}

@Override
public Function<Object, Integer> visit(RowType rowType) {
throw new UnsupportedOperationException();
public HashConverter32 visit(RowType rowType) {
throw new UnsupportedOperationException("Does not support type row");
}

// Thomas Wang's integer hash function
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fileindex.bloomfilter;

/** Hash object to 32 bit hash code. */
public interface HashConverter32 {

int hash(Object o);
}

0 comments on commit 92c4638

Please sign in to comment.