Skip to content

Commit

Permalink
[parquet] Parquet use vector IO (#3455)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored Jun 3, 2024
1 parent ee1d541 commit 938c893
Show file tree
Hide file tree
Showing 7 changed files with 2,044 additions and 3 deletions.
4 changes: 4 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ paimon-format/src/main/java/org/apache/orc/OrcConf.java
paimon-format/src/main/java/org/apache/orc/OrcFile.java
from https://orc.apache.org/ version 2.0

paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
from https://parquet.apache.org/ version 1.14.0

MIT License
-----------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,33 @@ public void testOrcReadProjection1() throws Exception {
*/
}

@Test
public void testParquetReadProjection() throws Exception {
innerTestProjection(
Collections.singletonMap("parquet", prepareData(orc(), "parquet")),
new int[] {0, 5, 10, 14});
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s) Per Row(ns) Relative
* ------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-orc 716 / 728 4187.4 238.8 1.0X
*/
}

@Test
public void testParquetReadProjection1() throws Exception {
innerTestProjection(
Collections.singletonMap("parquet", prepareData(orc(), "parquet")), new int[] {10});
/*
* OpenJDK 64-Bit Server VM 1.8.0_292-b10 on Mac OS X 10.16
* Apple M1 Pro
* read: Best/Avg Time(ms) Row Rate(K/s) Per Row(ns) Relative
* ------------------------------------------------------------------------------------------------
* OPERATORTEST_read_read-orc 716 / 728 4187.4 238.8 1.0X
*/
}

private Options orc() {
Options options = new Options();
options.set(CoreOptions.FILE_FORMAT, CoreOptions.FILE_FORMAT_ORC);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.paimon.fs.Path;

import org.apache.parquet.io.InputFile;
import org.apache.parquet.io.SeekableInputStream;

import java.io.IOException;

Expand Down Expand Up @@ -52,7 +51,7 @@ public long getLength() {
}

@Override
public SeekableInputStream newStream() throws IOException {
public ParquetInputStream newStream() throws IOException {
return new ParquetInputStream(fileIO.newInputStream(stat.getPath()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public ParquetInputStream(SeekableInputStream in) {
this.in = in;
}

public SeekableInputStream in() {
return in;
}

@Override
public long getPos() throws IOException {
return in.getPos();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public class ParquetUtil {
* @return parquet reader, used for reading footer, status, etc.
*/
public static ParquetFileReader getParquetReader(FileIO fileIO, Path path) throws IOException {
return ParquetFileReader.open(
return new ParquetFileReader(
ParquetInputFile.fromPath(fileIO, path), ParquetReadOptions.builder().build());
}

Expand Down
Loading

0 comments on commit 938c893

Please sign in to comment.