Skip to content

Commit

Permalink
[benchmark] Add table read benchmark (apache#2834)
Browse files Browse the repository at this point in the history
  • Loading branch information
Aitozi authored Feb 2, 2024
1 parent 0ca8f8b commit 331d874
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public void run() {
System.out.println(getProcessorName());
System.out.printf(
"%-100s %16s %16s %16s %10s%n",
name + ":", "Best/Avg Time(ms)", "Row Rate(M/s)", "Per Row(ns)", "Relative");
name + ":", "Best/Avg Time(ms)", "Row Rate(K/s)", "Per Row(ns)", "Relative");
System.out.println(
"----------------------------------------------------"
+ "-----------------------------------------------------------------------"
Expand All @@ -84,7 +84,7 @@ public void run() {
"OPERATORTEST_" + name + "_" + c.name,
String.format("%5.0f / %4.0f", r.bestNs / 1000_000.0, r.avgNs / 1000_000.0),
String.format("%10.1f", r.bestRate),
String.format("%6.1f", 1000 / r.bestRate),
String.format("%6.1f", 1000000 / r.bestRate),
String.format("%3.1fX", (firstBest / r.bestNs)));
}
System.out.println("\n\n\n");
Expand Down Expand Up @@ -118,7 +118,8 @@ private Result measure(final Case c) {
}
System.out.println(
" Stopped after " + c.numIters + " iterations, " + totalTime / 1000000 + " ms");
return new Result(1.0 * totalTime / c.numIters, valuesPerIteration / (best / 1000.0), best);
return new Result(
1.0 * totalTime / c.numIters, valuesPerIteration / (best / 1000000.0), best);
}

public Benchmark setOutputPerIteration(boolean outputPerIteration) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class TableBenchmark {

private final RandomDataGenerator random = new RandomDataGenerator();

protected Table createTable(Options tableOptions) throws Exception {
protected Table createTable(Options tableOptions, String tableName) throws Exception {
Options catalogOptions = new Options();
catalogOptions.set(CatalogOptions.WAREHOUSE, tempFile.toUri().toString());
Catalog catalog = CatalogFactory.createCatalog(CatalogContext.create(catalogOptions));
Expand All @@ -72,7 +72,7 @@ protected Table createTable(Options tableOptions) throws Exception {
singletonList("k"),
tableOptions.toMap(),
"");
Identifier identifier = Identifier.create(database, "T");
Identifier identifier = Identifier.create(database, tableName);
catalog.createTable(identifier, schema, false);
return catalog.getTable(identifier);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.paimon.benchmark;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.options.Options;
import org.apache.paimon.reader.RecordReader;
import org.apache.paimon.table.Table;
import org.apache.paimon.table.sink.CommitMessage;
import org.apache.paimon.table.sink.StreamTableCommit;
import org.apache.paimon.table.sink.StreamTableWrite;
import org.apache.paimon.table.sink.StreamWriteBuilder;
import org.apache.paimon.table.source.Split;

import org.junit.jupiter.api.Test;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/** Benchmark for table read. */
public class TableReadBenchmark extends TableBenchmark {

private final int rowCount = 1000000;

@Test
public void testRead() throws Exception {
Map<String, Table> tables = new LinkedHashMap<>();
tables.put("orc", prepareData(orc(), "orc"));
tables.put("parquet", prepareData(parquet(), "parquet"));
tables.put("avro", prepareData(avro(), "avro"));

innerTest(tables);
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s) Per Row(ns) Relative
* ------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-orc 1046 / 1295 2867.3 348.8 1.0X
* OPERATORTEST_read_read-parquet 3076 / 5295 975.4 1025.2 0.3X
* OPERATORTEST_read_read-avro 4156 / 4362 721.8 1385.5 0.3X
*/
}

private Options orc() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.ORC);
return options;
}

private Options parquet() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.PARQUET);
return options;
}

private Options avro() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO);
return options;
}

private void innerTest(Map<String, Table> tables) {
int readTime = 3;
Benchmark benchmark =
new Benchmark("read", readTime * rowCount)
.setNumWarmupIters(1)
.setOutputPerIteration(true);

for (String name : tables.keySet()) {
benchmark.addCase(
"read-" + name,
5,
() -> {
Table table = tables.get(name);
for (int i = 0; i < readTime; i++) {
List<Split> splits = table.newReadBuilder().newScan().plan().splits();
AtomicLong readCount = new AtomicLong(0);
try {
for (Split split : splits) {
RecordReader<InternalRow> reader =
table.newReadBuilder().newRead().createReader(split);
reader.forEachRemaining(row -> readCount.incrementAndGet());
}
System.out.printf("Finish read %d rows.\n", readCount.get());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
}
benchmark.run();
}

private Table prepareData(Options options, String tableName) throws Exception {
Table table = createTable(options, tableName);
StreamWriteBuilder writeBuilder = table.newStreamWriteBuilder();
StreamTableWrite write = writeBuilder.newWrite();
StreamTableCommit commit = writeBuilder.newCommit();
AtomicInteger writeCount = new AtomicInteger(0);
for (int i = 0; i < rowCount; i++) {
try {
write.write(newRandomRow());
writeCount.incrementAndGet();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
List<CommitMessage> commitMessages = write.prepareCommit(true, 1);
commit.commit(1, commitMessages);

write.close();
return table;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public void testOrc() throws Exception {
}

public void innerTest(String name, Options options) throws Exception {
StreamWriteBuilder writeBuilder = createTable(options).newStreamWriteBuilder();
StreamWriteBuilder writeBuilder = createTable(options, "T").newStreamWriteBuilder();
StreamTableWrite write = writeBuilder.newWrite();
StreamTableCommit commit = writeBuilder.newCommit();
long valuesPerIteration = 300_000;
Expand Down

0 comments on commit 331d874

Please sign in to comment.