Skip to content

Commit

Permalink
[core] Introduce VectoredReadable to SeekableInputStream (#3369)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored May 29, 2024
1 parent 8791c5d commit 245cbad
Show file tree
Hide file tree
Showing 13 changed files with 1,562 additions and 61 deletions.
3 changes: 3 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ Apache Software Foundation License 2.0
--------------------------------------

paimon-common/src/main/java/org/apache/paimon/fs/Path.java
paimon-common/src/main/java/org/apache/paimon/fs/FileRange.java
paimon-common/src/main/java/org/apache/paimon/fs/VectoredReadUtils.java
from http://hadoop.apache.org/ version 2.10.2

paimon-common/src/main/java/org/apache/paimon/lookup/hash/HashLookupStoreWriter.java
Expand Down Expand Up @@ -258,6 +260,7 @@ from https://hive.apache.org/ version 3.1.0
paimon-format/src/main/java/org/apache/orc/impl/PhysicalFsWriter.java
paimon-format/src/main/java/org/apache/orc/impl/WriterImpl.java
paimon-format/src/main/java/org/apache/orc/impl/ZstdCodec.java
paimon-format/src/main/java/org/apache/orc/impl/RecordReaderUtils.java
paimon-format/src/main/java/org/apache/orc/CompressionKind.java
paimon-format/src/main/java/org/apache/orc/OrcConf.java
paimon-format/src/main/java/org/apache/orc/OrcFile.java
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

import org.junit.jupiter.api.Test;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -78,6 +80,20 @@ public void testAvroRead() throws Exception {
*/
}

@Test
public void testOrcReadProjection() throws Exception {
innerTestProjection(
Collections.singletonMap("orc", prepareData(orc(), "orc")),
new int[] {0, 5, 10, 14});
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s) Per Row(ns) Relative
* ------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-orc 716 / 728 4187.4 238.8 1.0X
*/
}

private Options orc() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FILE_FORMAT_ORC);
Expand All @@ -97,6 +113,10 @@ private Options avro() {
}

private void innerTest(Map<String, Table> tables) {
innerTestProjection(tables, null);
}

private void innerTestProjection(Map<String, Table> tables, @Nullable int[] projection) {
int readTime = 3;
Benchmark benchmark =
new Benchmark("read", readTime * rowCount)
Expand All @@ -115,7 +135,10 @@ private void innerTest(Map<String, Table> tables) {
try {
for (Split split : splits) {
RecordReader<InternalRow> reader =
table.newReadBuilder().newRead().createReader(split);
table.newReadBuilder()
.withProjection(projection)
.newRead()
.createReader(split);
reader.forEachRemaining(row -> readCount.incrementAndGet());
}
System.out.printf("Finish read %d rows.\n", readCount.get());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,17 @@
import org.apache.paimon.catalog.CatalogContext;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import static org.apache.paimon.utils.ThreadUtils.newDaemonThreadFactory;

/** Utils for {@link FileIO}. */
public class FileIOUtils {

public static final ExecutorService IO_THREAD_POOL =
Executors.newCachedThreadPool(newDaemonThreadFactory("IO-THREAD-POOL"));

public static FileIOLoader checkAccess(FileIOLoader fileIO, Path path, CatalogContext config)
throws IOException {
if (fileIO == null) {
Expand Down
83 changes: 83 additions & 0 deletions paimon-common/src/main/java/org/apache/paimon/fs/FileRange.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fs;

import java.util.concurrent.CompletableFuture;

/* This file is based on source code from the Hadoop Project (http://hadoop.apache.org/), licensed by the Apache
* Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. */

/** A byte range of a file. */
public interface FileRange {

/** Get the starting offset of the range. */
long getOffset();

/** Get the length of the range. */
int getLength();

/** Get the future data for this range. */
CompletableFuture<byte[]> getData();

/**
* Factory method to create a FileRange object.
*
* @param offset starting offset of the range.
* @param length length of the range.
* @return a new instance of FileRangeImpl.
*/
static FileRange createFileRange(long offset, int length) {
return new FileRangeImpl(offset, length);
}

/** An implementation for {@link FileRange}. */
class FileRangeImpl implements FileRange {

private final long offset;
private final int length;
private final CompletableFuture<byte[]> reader;

public FileRangeImpl(long offset, int length) {
this.offset = offset;
this.length = length;
this.reader = new CompletableFuture<>();
}

@Override
public String toString() {
return "range[" + offset + "," + (offset + length) + ")";
}

@Override
public long getOffset() {
return offset;
}

@Override
public int getLength() {
return length;
}

@Override
public CompletableFuture<byte[]> getData() {
return reader;
}
}
}
Loading

0 comments on commit 245cbad

Please sign in to comment.