From c6e182afa121070d5c4d6c4dc353f39450ed2cf3 Mon Sep 17 00:00:00 2001 From: leoyy0316 <105328124+leoyy0316@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:14:25 +0800 Subject: [PATCH] [format] Fix orc and parquet writer about timestamp not contains [local timezone] and [is_adjust_to_utc] (#2739) --- .../format/orc/reader/OrcSplitReaderUtil.java | 3 ++- .../format/parquet/ParquetSchemaConverter.java | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java index ae00821f3a2a..882f1c753991 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcSplitReaderUtil.java @@ -74,8 +74,9 @@ public static TypeDescription toOrcType(DataType type) { case DATE: return TypeDescription.createDate(); case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: return TypeDescription.createTimestamp(); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return TypeDescription.createTimestampInstant(); case ARRAY: ArrayType arrayType = (ArrayType) type; return TypeDescription.createList(toOrcType(arrayType.getElementType())); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java index 24160df56915..0e445f0b5286 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java @@ -115,11 +115,11 @@ private static Type convertToParquetType( case TIMESTAMP_WITHOUT_TIME_ZONE: TimestampType timestampType = (TimestampType) type; return createTimestampWithLogicalType( - name, timestampType.getPrecision(), repetition); + name, timestampType.getPrecision(), repetition, false); case TIMESTAMP_WITH_LOCAL_TIME_ZONE: LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) type; return createTimestampWithLogicalType( - name, localZonedTimestampType.getPrecision(), repetition); + name, localZonedTimestampType.getPrecision(), repetition, true); case ARRAY: ArrayType arrayType = (ArrayType) type; return ConversionPatterns.listOfElements( @@ -151,13 +151,21 @@ private static Type convertToParquetType( } private static Type createTimestampWithLogicalType( - String name, int precision, Type.Repetition repetition) { + String name, int precision, Type.Repetition repetition, boolean isAdjustToUTC) { if (precision <= 3) { - return Types.primitive(INT64, repetition).as(OriginalType.TIMESTAMP_MILLIS).named(name); + return Types.primitive(INT64, repetition) + .as( + LogicalTypeAnnotation.timestampType( + isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named(name); } else if (precision > 6) { return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name); } else { - return Types.primitive(INT64, repetition).as(OriginalType.TIMESTAMP_MICROS).named(name); + return Types.primitive(INT64, repetition) + .as( + LogicalTypeAnnotation.timestampType( + isAdjustToUTC, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named(name); } }