diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java index 61e0e1d6b37c..345c9944c9ec 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java @@ -32,7 +32,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Types; @@ -70,7 +69,7 @@ private static Type convertToParquetType( case CHAR: case VARCHAR: return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition) - .as(OriginalType.UTF8) + .as(LogicalTypeAnnotation.stringType()) .named(name); case BOOLEAN: return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition) @@ -97,9 +96,13 @@ private static Type convertToParquetType( .named(name); } case TINYINT: - return Types.primitive(INT32, repetition).as(OriginalType.INT_8).named(name); + return Types.primitive(INT32, repetition) + .as(LogicalTypeAnnotation.intType(8, true)) + .named(name); case SMALLINT: - return Types.primitive(INT32, repetition).as(OriginalType.INT_16).named(name); + return Types.primitive(INT32, repetition) + .as(LogicalTypeAnnotation.intType(16, true)) + .named(name); case INTEGER: return Types.primitive(INT32, repetition).named(name); case BIGINT: @@ -111,9 +114,15 @@ private static Type convertToParquetType( return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition) .named(name); case DATE: - return Types.primitive(INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(INT32, repetition) + .as(LogicalTypeAnnotation.dateType()) + .named(name); case TIME_WITHOUT_TIME_ZONE: - return Types.primitive(INT32, repetition).as(OriginalType.TIME_MILLIS).named(name); + return Types.primitive(INT32, repetition) + .as( + LogicalTypeAnnotation.timeType( + true, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named(name); case TIMESTAMP_WITHOUT_TIME_ZONE: TimestampType timestampType = (TimestampType) type; return createTimestampWithLogicalType( diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java index 8a362961d0d1..90abaa992c17 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java @@ -58,7 +58,7 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.InvalidSchemaException; import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; @@ -233,7 +233,8 @@ public static WritableColumnVector createWritableColumnVector( checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.INT32) - && primitiveType.getOriginalType() == OriginalType.DECIMAL, + && primitiveType.getLogicalTypeAnnotation() + instanceof DecimalLogicalTypeAnnotation, "Unexpected type: %s", typeName); return new HeapIntVector(batchSize); @@ -241,7 +242,8 @@ public static WritableColumnVector createWritableColumnVector( checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.INT64) - && primitiveType.getOriginalType() == OriginalType.DECIMAL, + && primitiveType.getLogicalTypeAnnotation() + instanceof DecimalLogicalTypeAnnotation, "Unexpected type: %s", typeName); return new HeapLongVector(batchSize); @@ -249,7 +251,8 @@ public static WritableColumnVector createWritableColumnVector( checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) - && primitiveType.getOriginalType() == OriginalType.DECIMAL, + && primitiveType.getLogicalTypeAnnotation() + instanceof DecimalLogicalTypeAnnotation, "Unexpected type: %s", typeName); return new HeapBytesVector(batchSize); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/writer/PositionOutputStreamAdapter.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/writer/PositionOutputStreamAdapter.java index 72fda99e443e..39e69e255957 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/writer/PositionOutputStreamAdapter.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/writer/PositionOutputStreamAdapter.java @@ -21,8 +21,7 @@ import org.apache.parquet.io.PositionOutputStream; import java.io.IOException; - -import static org.apache.parquet.Preconditions.checkNotNull; +import java.util.Objects; /** An adapter to turn Paimon's {@link PositionOutputStream} into a {@link PositionOutputStream}. */ class PositionOutputStreamAdapter extends PositionOutputStream { @@ -36,7 +35,7 @@ class PositionOutputStreamAdapter extends PositionOutputStream { * @param out The Paimon stream written to. */ PositionOutputStreamAdapter(org.apache.paimon.fs.PositionOutputStream out) { - this.out = checkNotNull(out, "out"); + this.out = Objects.requireNonNull(out, "out should not be null"); } @Override