From 8157be98e3e43e7fa95b3f52c3645823b7b3a569 Mon Sep 17 00:00:00 2001 From: Tan-JiaLiang Date: Sat, 28 Dec 2024 17:38:25 +0800 Subject: [PATCH] [common] Using a faster deserialization method in RoaringBitmap32 (#4765) --- .../bitmap/RoaringBitmapBenchmark.java | 111 ++++++++++++++++++ .../apache/paimon/utils/RoaringBitmap32.java | 24 ++-- .../deletionvectors/BitmapDeletionVector.java | 10 +- .../deletionvectors/DeletionVector.java | 27 ++--- .../aggregate/FieldRoaringBitmap32Agg.java | 5 +- 5 files changed, 144 insertions(+), 33 deletions(-) create mode 100644 paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java new file mode 100644 index 000000000000..4b989e96e5ee --- /dev/null +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.benchmark.bitmap; + +import org.apache.paimon.benchmark.Benchmark; +import org.apache.paimon.fs.local.LocalFileIO; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.roaringbitmap.RoaringBitmap; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.util.Random; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Benchmark for {@link RoaringBitmap}. */ +public class RoaringBitmapBenchmark { + + public static final int ROW_COUNT = 10000000; + + @TempDir Path tempDir; + + @Test + public void testDeserialize() throws Exception { + Random random = new Random(); + RoaringBitmap bitmap = new RoaringBitmap(); + for (int i = 0; i < ROW_COUNT; i++) { + if (random.nextBoolean()) { + bitmap.add(i); + } + } + + File file = new File(tempDir.toFile(), "bitmap32-deserialize-benchmark"); + assertThat(file.createNewFile()).isTrue(); + try (FileOutputStream output = new FileOutputStream(file); + DataOutputStream dos = new DataOutputStream(output)) { + bitmap.serialize(dos); + } + + Benchmark benchmark = + new Benchmark("bitmap32-deserialize-benchmark", 100) + .setNumWarmupIters(1) + .setOutputPerIteration(true); + + benchmark.addCase( + "deserialize(DataInput)", + 10, + () -> { + try (LocalFileIO.LocalSeekableInputStream seekableStream = + new LocalFileIO.LocalSeekableInputStream(file); + DataInputStream input = new DataInputStream(seekableStream)) { + new RoaringBitmap().deserialize(input); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + benchmark.addCase( + "deserialize(DataInput, byte[])", + 10, + () -> { + try (LocalFileIO.LocalSeekableInputStream seekableStream = + new LocalFileIO.LocalSeekableInputStream(file); + DataInputStream input = new DataInputStream(seekableStream)) { + new RoaringBitmap().deserialize(input, null); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + benchmark.addCase( + "deserialize(ByteBuffer)", + 10, + () -> { + try (LocalFileIO.LocalSeekableInputStream seekableStream = + new LocalFileIO.LocalSeekableInputStream(file); + DataInputStream input = new DataInputStream(seekableStream)) { + byte[] bytes = new byte[(int) file.length()]; + input.readFully(bytes); + new RoaringBitmap().deserialize(ByteBuffer.wrap(bytes)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + benchmark.run(); + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java index 5f352f61cd3c..6496b7003ef4 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java @@ -85,8 +85,19 @@ public void serialize(DataOutput out) throws IOException { roaringBitmap.serialize(out); } + public byte[] serialize() { + roaringBitmap.runOptimize(); + ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes()); + roaringBitmap.serialize(buffer); + return buffer.array(); + } + public void deserialize(DataInput in) throws IOException { - roaringBitmap.deserialize(in); + roaringBitmap.deserialize(in, null); + } + + public void deserialize(ByteBuffer buffer) throws IOException { + roaringBitmap.deserialize(buffer); } @Override @@ -105,17 +116,6 @@ public void clear() { roaringBitmap.clear(); } - public byte[] serialize() { - roaringBitmap.runOptimize(); - ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes()); - roaringBitmap.serialize(buffer); - return buffer.array(); - } - - public void deserialize(byte[] rbmBytes) throws IOException { - roaringBitmap.deserialize(ByteBuffer.wrap(rbmBytes)); - } - public void flip(final long rangeStart, final long rangeEnd) { roaringBitmap.flip(rangeStart, rangeEnd); } diff --git a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java index 51ae729c2193..55e0a975e303 100644 --- a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java +++ b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java @@ -21,9 +21,9 @@ import org.apache.paimon.utils.RoaringBitmap32; import java.io.ByteArrayOutputStream; -import java.io.DataInput; import java.io.DataOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Objects; /** @@ -93,10 +93,10 @@ public byte[] serializeToBytes() { } } - public static DeletionVector deserializeFromDataInput(DataInput bis) throws IOException { - RoaringBitmap32 roaringBitmap = new RoaringBitmap32(); - roaringBitmap.deserialize(bis); - return new BitmapDeletionVector(roaringBitmap); + public static DeletionVector deserializeFromByteBuffer(ByteBuffer buffer) throws IOException { + RoaringBitmap32 bitmap = new RoaringBitmap32(); + bitmap.deserialize(buffer); + return new BitmapDeletionVector(bitmap); } private void checkPosition(long position) { diff --git a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java index 8feeac63f721..967e80b0bacc 100644 --- a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java +++ b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java @@ -26,9 +26,9 @@ import javax.annotation.Nullable; -import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Optional; @@ -99,11 +99,11 @@ default boolean checkedDelete(long position) { * @return A DeletionVector instance that represents the deserialized data. */ static DeletionVector deserializeFromBytes(byte[] bytes) { - try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes); - DataInputStream dis = new DataInputStream(bis)) { - int magicNum = dis.readInt(); + try { + ByteBuffer buffer = ByteBuffer.wrap(bytes); + int magicNum = buffer.getInt(); if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) { - return BitmapDeletionVector.deserializeFromDataInput(dis); + return BitmapDeletionVector.deserializeFromByteBuffer(buffer); } else { throw new RuntimeException("Invalid magic number: " + magicNum); } @@ -117,22 +117,21 @@ static DeletionVector read(FileIO fileIO, DeletionFile deletionFile) throws IOEx try (SeekableInputStream input = fileIO.newInputStream(path)) { input.seek(deletionFile.offset()); DataInputStream dis = new DataInputStream(input); - int actualLength = dis.readInt(); - if (actualLength != deletionFile.length()) { + int actualSize = dis.readInt(); + if (actualSize != deletionFile.length()) { throw new RuntimeException( "Size not match, actual size: " - + actualLength + + actualSize + ", expert size: " + deletionFile.length() + ", file path: " + path); } - int magicNum = dis.readInt(); - if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) { - return BitmapDeletionVector.deserializeFromDataInput(dis); - } else { - throw new RuntimeException("Invalid magic number: " + magicNum); - } + + // read DeletionVector bytes + byte[] bytes = new byte[actualSize]; + dis.readFully(bytes); + return deserializeFromBytes(bytes); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java index ef7ac20e839a..436a88a3ccd8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java +++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java @@ -22,6 +22,7 @@ import org.apache.paimon.utils.RoaringBitmap32; import java.io.IOException; +import java.nio.ByteBuffer; /** roaring bitmap aggregate a field of a row. */ public class FieldRoaringBitmap32Agg extends FieldAggregator { @@ -43,8 +44,8 @@ public Object agg(Object accumulator, Object inputField) { } try { - roaringBitmapAcc.deserialize((byte[]) accumulator); - roaringBitmapInput.deserialize((byte[]) inputField); + roaringBitmapAcc.deserialize(ByteBuffer.wrap((byte[]) accumulator)); + roaringBitmapInput.deserialize(ByteBuffer.wrap((byte[]) inputField)); roaringBitmapAcc.or(roaringBitmapInput); return roaringBitmapAcc.serialize(); } catch (IOException e) {