From 8f69480934e59da4e7b4163c1594d7d49d0a525e Mon Sep 17 00:00:00 2001 From: Fang Yong Date: Mon, 29 Jul 2024 11:42:04 +0800 Subject: [PATCH] [core][lookup] Add writer benchmark for sort and hash file store (#3828) --- .../lookup/AbstractLookupBenchmark.java | 39 ++++++ .../LookupBloomFilterBenchmark.java | 23 +--- .../lookup/LookupWriterBenchmark.java | 116 ++++++++++++++++++ 3 files changed, 159 insertions(+), 19 deletions(-) create mode 100644 paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java rename paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/{ => lookup}/LookupBloomFilterBenchmark.java (87%) create mode 100644 paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java new file mode 100644 index 000000000000..652142941f70 --- /dev/null +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/AbstractLookupBenchmark.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.benchmark.lookup; + +/** Abstract benchmark class for lookup. */ +abstract class AbstractLookupBenchmark { + protected static final int[] VALUE_LENGTHS = {0, 500, 1000, 2000, 4000}; + + protected byte[][] generateSequenceInputs(int start, int end) { + int count = end - start; + byte[][] result = new byte[count][4]; + for (int i = 0; i < count; i++) { + result[i] = intToByteArray(i); + } + return result; + } + + protected byte[] intToByteArray(int value) { + return new byte[] { + (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>> 8), (byte) value + }; + } +} diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java similarity index 87% rename from paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java rename to paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java index a65f1364f416..68f9b38ff771 100644 --- a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/LookupBloomFilterBenchmark.java +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupBloomFilterBenchmark.java @@ -16,8 +16,9 @@ * limitations under the License. */ -package org.apache.paimon.benchmark; +package org.apache.paimon.benchmark.lookup; +import org.apache.paimon.benchmark.Benchmark; import org.apache.paimon.io.cache.CacheManager; import org.apache.paimon.lookup.hash.HashLookupStoreFactory; import org.apache.paimon.lookup.hash.HashLookupStoreReader; @@ -36,7 +37,7 @@ import java.util.concurrent.ThreadLocalRandom; /** Benchmark for measure the bloom filter for lookup. */ -public class LookupBloomFilterBenchmark { +public class LookupBloomFilterBenchmark extends AbstractLookupBenchmark { @TempDir Path tempDir; ThreadLocalRandom rnd = ThreadLocalRandom.current(); @@ -57,15 +58,6 @@ public void testLowMatch() throws Exception { "lookup", generateSequenceInputs(0, 100000), generateRandomInputs(100000, 200000)); } - private byte[][] generateSequenceInputs(int start, int end) { - int count = end - start; - byte[][] result = new byte[count][4]; - for (int i = 0; i < count; i++) { - result[i] = intToByteArray(i); - } - return result; - } - private byte[][] generateRandomInputs(int start, int end) { int count = end - start; byte[][] result = new byte[count][4]; @@ -75,18 +67,11 @@ private byte[][] generateRandomInputs(int start, int end) { return result; } - public byte[] intToByteArray(int value) { - return new byte[] { - (byte) (value >>> 24), (byte) (value >>> 16), (byte) (value >>> 8), (byte) value - }; - } - public void innerTest(String name, byte[][] inputs, byte[][] probe) throws Exception { Benchmark benchmark = new Benchmark(name, probe.length).setNumWarmupIters(1).setOutputPerIteration(true); - int[] valueLengths = {0, 500, 1000, 2000}; - for (int valueLength : valueLengths) { + for (int valueLength : VALUE_LENGTHS) { HashLookupStoreReader reader = writeData(null, inputs, valueLength); benchmark.addCase( diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java new file mode 100644 index 000000000000..84a1d6cd48f8 --- /dev/null +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupWriterBenchmark.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.benchmark.lookup; + +import org.apache.paimon.CoreOptions; +import org.apache.paimon.benchmark.Benchmark; +import org.apache.paimon.data.serializer.RowCompactedSerializer; +import org.apache.paimon.io.cache.CacheManager; +import org.apache.paimon.lookup.LookupStoreFactory; +import org.apache.paimon.lookup.LookupStoreWriter; +import org.apache.paimon.options.MemorySize; +import org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension; +import org.apache.paimon.testutils.junit.parameterized.Parameters; +import org.apache.paimon.types.IntType; +import org.apache.paimon.types.RowType; + +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import static org.apache.paimon.CoreOptions.LOOKUP_LOCAL_FILE_TYPE; + +/** Benchmark for measuring the throughput of writing for lookup. */ +@ExtendWith(ParameterizedTestExtension.class) +public class LookupWriterBenchmark extends AbstractLookupBenchmark { + + private final int recordCount; + @TempDir Path tempDir; + + public LookupWriterBenchmark(int recordCount) { + this.recordCount = recordCount; + } + + @Parameters(name = "record-count-{0}") + public static List getVarSeg() { + return Arrays.asList(1000000, 5000000, 10000000, 15000000, 20000000); + } + + @TestTemplate + void testLookupWriter() { + writeLookupDataBenchmark(generateSequenceInputs(0, recordCount)); + } + + public void writeLookupDataBenchmark(byte[][] inputs) { + Benchmark benchmark = + new Benchmark("writer-" + inputs.length, inputs.length) + .setNumWarmupIters(1) + .setOutputPerIteration(true); + for (int valueLength : VALUE_LENGTHS) { + for (CoreOptions.LookupLocalFileType fileType : + CoreOptions.LookupLocalFileType.values()) { + CoreOptions options = + CoreOptions.fromMap( + Collections.singletonMap( + LOOKUP_LOCAL_FILE_TYPE.key(), fileType.name())); + benchmark.addCase( + String.format( + "%s-write-%dB-value-%d-num", + fileType.name(), valueLength, inputs.length), + 5, + () -> { + try { + writeData(options, inputs, valueLength); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + } + + benchmark.run(); + } + + private void writeData(CoreOptions options, byte[][] inputs, int valueLength) + throws IOException { + byte[] value = new byte[valueLength]; + Arrays.fill(value, (byte) 1); + LookupStoreFactory factory = + LookupStoreFactory.create( + options, + new CacheManager(MemorySize.ofMebiBytes(10)), + new RowCompactedSerializer(RowType.of(new IntType())) + .createSliceComparator()); + + File file = new File(tempDir.toFile(), UUID.randomUUID().toString()); + LookupStoreWriter writer = factory.createWriter(file, null); + for (byte[] input : inputs) { + writer.put(input, value); + } + writer.close(); + } +}