From e18f6ed6fde0f3b10170540aebefa0d437bb19aa Mon Sep 17 00:00:00 2001 From: Jingsong Date: Sun, 8 Dec 2024 22:46:44 +0800 Subject: [PATCH] [parquet] Fix minor format codes in parquet readers --- .../format/parquet/position/CollectionPosition.java | 6 +++--- .../paimon/format/parquet/position/LevelDelegation.java | 1 + .../format/parquet/reader/AbstractColumnReader.java | 5 +---- .../format/parquet/reader/BooleanColumnReader.java | 5 ----- .../parquet/reader/NestedPrimitiveColumnReader.java | 4 +--- .../paimon/format/parquet/reader/ParquetReadState.java | 9 +++------ .../format/parquet/reader/TimestampColumnReader.java | 5 +++-- .../apache/paimon/format/parquet/type/ParquetField.java | 1 + 8 files changed, 13 insertions(+), 23 deletions(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/CollectionPosition.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/CollectionPosition.java index e72a4280f4aa..beb5de7a92e5 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/CollectionPosition.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/CollectionPosition.java @@ -22,14 +22,14 @@ /** To represent collection's position in repeated type. */ public class CollectionPosition { + @Nullable private final boolean[] isNull; private final long[] offsets; - private final long[] length; - private final int valueCount; - public CollectionPosition(boolean[] isNull, long[] offsets, long[] length, int valueCount) { + public CollectionPosition( + @Nullable boolean[] isNull, long[] offsets, long[] length, int valueCount) { this.isNull = isNull; this.offsets = offsets; this.length = length; diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/LevelDelegation.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/LevelDelegation.java index 25bbedc861d1..8e30d90ba2c7 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/LevelDelegation.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/position/LevelDelegation.java @@ -20,6 +20,7 @@ /** To delegate repetition level and definition level. */ public class LevelDelegation { + private final int[] repetitionLevel; private final int[] definitionLevel; diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java index 5e3f4a7e6a33..d4a0ab039b53 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/AbstractColumnReader.java @@ -69,9 +69,6 @@ public abstract class AbstractColumnReader /** If true, the current page is dictionary encoded. */ private boolean isCurrentPageDictionaryEncoded; - /** Total values in the current page. */ - // private int pageValueCount; - /** * Helper struct to track intermediate states while reading Parquet pages in the column chunk. */ @@ -90,7 +87,7 @@ public abstract class AbstractColumnReader */ /** Run length decoder for data and dictionary. */ - protected RunLengthDecoder runLenDecoder; + RunLengthDecoder runLenDecoder; /** Data input stream. */ ByteBufferInputStream dataInputStream; diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java index 83d3c5a07d4b..4355392bf552 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/BooleanColumnReader.java @@ -42,11 +42,6 @@ public BooleanColumnReader(ColumnDescriptor descriptor, PageReadStore pageReadSt checkTypeName(PrimitiveType.PrimitiveTypeName.BOOLEAN); } - @Override - protected boolean supportLazyDecode() { - return true; - } - @Override protected void afterReadPage() { bitOffset = 0; diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java index 7db7aedbf6ae..f0a82a6d711e 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/NestedPrimitiveColumnReader.java @@ -65,6 +65,7 @@ /** Reader to read nested primitive column. */ public class NestedPrimitiveColumnReader implements ColumnReader { + private static final Logger LOG = LoggerFactory.getLogger(NestedPrimitiveColumnReader.class); private final IntArrayList repetitionLevelList = new IntArrayList(0); @@ -95,9 +96,6 @@ public class NestedPrimitiveColumnReader implements ColumnReaderParquet + * Timestamp TIMESTAMP_MILLIS and TIMESTAMP_MICROS are the deprecated ConvertedType. */ public class TimestampColumnReader extends AbstractColumnReader { diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/type/ParquetField.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/type/ParquetField.java index 94fe6b91d9d3..291e9ebbceb3 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/type/ParquetField.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/type/ParquetField.java @@ -22,6 +22,7 @@ /** Field that represent parquet's field type. */ public abstract class ParquetField { + private final DataType type; private final int repetitionLevel; private final int definitionLevel;