Skip to content

Commit

Permalink
[common] Using a faster deserialization method in RoaringBitmap32 (ap…
Browse files Browse the repository at this point in the history
…ache#4765)

(cherry picked from commit 8157be9)
  • Loading branch information
Tan-JiaLiang authored and Zouxxyy committed Jan 3, 2025
1 parent 1f163b1 commit 8c0c033
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.benchmark.bitmap;

import org.apache.paimon.benchmark.Benchmark;
import org.apache.paimon.fs.local.LocalFileIO;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.roaringbitmap.RoaringBitmap;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.Random;

import static org.assertj.core.api.Assertions.assertThat;

/** Benchmark for {@link RoaringBitmap}. */
public class RoaringBitmapBenchmark {

public static final int ROW_COUNT = 10000000;

@TempDir Path tempDir;

@Test
public void testDeserialize() throws Exception {
Random random = new Random();
RoaringBitmap bitmap = new RoaringBitmap();
for (int i = 0; i < ROW_COUNT; i++) {
if (random.nextBoolean()) {
bitmap.add(i);
}
}

File file = new File(tempDir.toFile(), "bitmap32-deserialize-benchmark");
assertThat(file.createNewFile()).isTrue();
try (FileOutputStream output = new FileOutputStream(file);
DataOutputStream dos = new DataOutputStream(output)) {
bitmap.serialize(dos);
}

Benchmark benchmark =
new Benchmark("bitmap32-deserialize-benchmark", 100)
.setNumWarmupIters(1)
.setOutputPerIteration(true);

benchmark.addCase(
"deserialize(DataInput)",
10,
() -> {
try (LocalFileIO.LocalSeekableInputStream seekableStream =
new LocalFileIO.LocalSeekableInputStream(file);
DataInputStream input = new DataInputStream(seekableStream)) {
new RoaringBitmap().deserialize(input);
} catch (IOException e) {
throw new RuntimeException(e);
}
});

benchmark.addCase(
"deserialize(DataInput, byte[])",
10,
() -> {
try (LocalFileIO.LocalSeekableInputStream seekableStream =
new LocalFileIO.LocalSeekableInputStream(file);
DataInputStream input = new DataInputStream(seekableStream)) {
new RoaringBitmap().deserialize(input, null);
} catch (IOException e) {
throw new RuntimeException(e);
}
});

benchmark.addCase(
"deserialize(ByteBuffer)",
10,
() -> {
try (LocalFileIO.LocalSeekableInputStream seekableStream =
new LocalFileIO.LocalSeekableInputStream(file);
DataInputStream input = new DataInputStream(seekableStream)) {
byte[] bytes = new byte[(int) file.length()];
input.readFully(bytes);
new RoaringBitmap().deserialize(ByteBuffer.wrap(bytes));
} catch (IOException e) {
throw new RuntimeException(e);
}
});

benchmark.run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,19 @@ public void serialize(DataOutput out) throws IOException {
roaringBitmap.serialize(out);
}

public byte[] serialize() {
roaringBitmap.runOptimize();
ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
roaringBitmap.serialize(buffer);
return buffer.array();
}

public void deserialize(DataInput in) throws IOException {
roaringBitmap.deserialize(in);
roaringBitmap.deserialize(in, null);
}

public void deserialize(ByteBuffer buffer) throws IOException {
roaringBitmap.deserialize(buffer);
}

@Override
Expand All @@ -105,17 +116,6 @@ public void clear() {
roaringBitmap.clear();
}

public byte[] serialize() {
roaringBitmap.runOptimize();
ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
roaringBitmap.serialize(buffer);
return buffer.array();
}

public void deserialize(byte[] rbmBytes) throws IOException {
roaringBitmap.deserialize(ByteBuffer.wrap(rbmBytes));
}

public void flip(final long rangeStart, final long rangeEnd) {
roaringBitmap.flip(rangeStart, rangeEnd);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
import org.apache.paimon.utils.RoaringBitmap32;

import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Objects;

/**
Expand Down Expand Up @@ -93,10 +93,10 @@ public byte[] serializeToBytes() {
}
}

public static DeletionVector deserializeFromDataInput(DataInput bis) throws IOException {
RoaringBitmap32 roaringBitmap = new RoaringBitmap32();
roaringBitmap.deserialize(bis);
return new BitmapDeletionVector(roaringBitmap);
public static DeletionVector deserializeFromByteBuffer(ByteBuffer buffer) throws IOException {
RoaringBitmap32 bitmap = new RoaringBitmap32();
bitmap.deserialize(buffer);
return new BitmapDeletionVector(bitmap);
}

private void checkPosition(long position) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

import javax.annotation.Nullable;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Optional;

Expand Down Expand Up @@ -99,11 +99,11 @@ default boolean checkedDelete(long position) {
* @return A DeletionVector instance that represents the deserialized data.
*/
static DeletionVector deserializeFromBytes(byte[] bytes) {
try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
DataInputStream dis = new DataInputStream(bis)) {
int magicNum = dis.readInt();
try {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
int magicNum = buffer.getInt();
if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
return BitmapDeletionVector.deserializeFromDataInput(dis);
return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
} else {
throw new RuntimeException("Invalid magic number: " + magicNum);
}
Expand All @@ -117,22 +117,21 @@ static DeletionVector read(FileIO fileIO, DeletionFile deletionFile) throws IOEx
try (SeekableInputStream input = fileIO.newInputStream(path)) {
input.seek(deletionFile.offset());
DataInputStream dis = new DataInputStream(input);
int actualLength = dis.readInt();
if (actualLength != deletionFile.length()) {
int actualSize = dis.readInt();
if (actualSize != deletionFile.length()) {
throw new RuntimeException(
"Size not match, actual size: "
+ actualLength
+ actualSize
+ ", expert size: "
+ deletionFile.length()
+ ", file path: "
+ path);
}
int magicNum = dis.readInt();
if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
return BitmapDeletionVector.deserializeFromDataInput(dis);
} else {
throw new RuntimeException("Invalid magic number: " + magicNum);
}

// read DeletionVector bytes
byte[] bytes = new byte[actualSize];
dis.readFully(bytes);
return deserializeFromBytes(bytes);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.paimon.utils.RoaringBitmap32;

import java.io.IOException;
import java.nio.ByteBuffer;

/** roaring bitmap aggregate a field of a row. */
public class FieldRoaringBitmap32Agg extends FieldAggregator {
Expand All @@ -43,8 +44,8 @@ public Object agg(Object accumulator, Object inputField) {
}

try {
roaringBitmapAcc.deserialize((byte[]) accumulator);
roaringBitmapInput.deserialize((byte[]) inputField);
roaringBitmapAcc.deserialize(ByteBuffer.wrap((byte[]) accumulator));
roaringBitmapInput.deserialize(ByteBuffer.wrap((byte[]) inputField));
roaringBitmapAcc.or(roaringBitmapInput);
return roaringBitmapAcc.serialize();
} catch (IOException e) {
Expand Down

0 comments on commit 8c0c033

Please sign in to comment.