From d1696aab4c78d7bd54e25bfba6d13be4ae031914 Mon Sep 17 00:00:00 2001 From: yuzelin <33053040+yuzelin@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:53:32 +0800 Subject: [PATCH] [hive] Fix Hive 3 timestamp precision conversion error (#2674) --- .../PaimonTimestampObjectInspector.java | 13 +++-- .../apache/paimon/hive/HiveWriteITCase.java | 37 +++++++++++---- .../PaimonTimestampObjectInspector.java | 2 +- .../hive/PaimonStorageHandlerITCase.java | 47 ++++++++++++++----- 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java b/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java index 4b96dbfb2fee..a70064e239d9 100644 --- a/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java +++ b/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */ +/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The precision is maintained. */ public class PaimonTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector, WriteableObjectInspector { @@ -34,9 +34,14 @@ public PaimonTimestampObjectInspector() { @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null - ? null - : Timestamp.ofEpochMilli(((org.apache.paimon.data.Timestamp) o).getMillisecond()); + if (o == null) { + return null; + } + + org.apache.paimon.data.Timestamp timestamp = (org.apache.paimon.data.Timestamp) o; + long millis = timestamp.getMillisecond(); + int nanos = (int) (millis % 1000 * 1_000_000) + timestamp.getNanoOfMillisecond(); + return Timestamp.ofEpochMilli(millis, nanos); } @Override diff --git a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java index babbbe30fbd8..1fd39dd94e2f 100644 --- a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java +++ b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java @@ -44,6 +44,8 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; +import javax.annotation.Nullable; + import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -91,7 +93,11 @@ public void after() { } private String createAppendOnlyExternalTable( - RowType rowType, List partitionKeys, List data, String tableName) + RowType rowType, + List partitionKeys, + List data, + String tableName, + @Nullable CoreOptions.FileFormatType fileFormatType) throws Exception { String path = folder.newFolder().toURI().toString(); String tableNameNotNull = @@ -100,7 +106,9 @@ private String createAppendOnlyExternalTable( Options conf = new Options(); conf.set(CatalogOptions.WAREHOUSE, path); conf.set(CoreOptions.BUCKET, 2); - conf.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO); + conf.set( + CoreOptions.FILE_FORMAT, + fileFormatType == null ? CoreOptions.FileFormatType.AVRO : fileFormatType); Identifier identifier = Identifier.create(DATABASE_NAME, tableNameNotNull); Table table = FileStoreTestUtils.createFileStoreTable( @@ -152,7 +160,8 @@ public void testInsert() throws Exception { new String[] {"pt", "a", "b", "c"}), Collections.singletonList("pt"), emptyData, - "hive_test_table_output"); + "hive_test_table_output", + null); hiveShell.execute( "insert into " + outputTableName + " values (1,2,3,'Hello'),(4,5,6,'Fine')"); @@ -164,22 +173,30 @@ public void testInsert() throws Exception { public void testInsertTimestampAndDate() throws Exception { List emptyData = Collections.emptyList(); + // test different precisions + int precision = ThreadLocalRandom.current().nextInt(10); + String fraction = precision == 0 ? "" : "." + "123456789".substring(0, precision); + String outputTableName = createAppendOnlyExternalTable( RowType.of( new DataType[] { - DataTypes.INT(), DataTypes.TIMESTAMP(), DataTypes.DATE() + DataTypes.INT(), + DataTypes.TIMESTAMP(precision), + DataTypes.DATE(), }, new String[] {"pt", "a", "b"}), Collections.singletonList("pt"), emptyData, - "hive_test_table_output"); + "hive_test_table_output", + CoreOptions.FileFormatType.ORC); hiveShell.execute( - "insert into " - + outputTableName - + " values(1,'2023-01-13 20:00:01.123','2023-12-23')"); - List select = hiveShell.executeQuery("select * from " + outputTableName); + String.format( + "INSERT INTO %s VALUES (1, '2023-01-13 20:00:01%s', '2023-12-23')", + outputTableName, fraction)); + + List select = hiveShell.executeQuery("SELECT * FROM " + outputTableName); assertThat(select) - .isEqualTo(Collections.singletonList("1\t2023-01-13 20:00:01.123\t2023-12-23")); + .containsExactly(String.format("1\t2023-01-13 20:00:01%s\t2023-12-23", fraction)); } } diff --git a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java index 7f57b242bde8..c18eb7759f9b 100644 --- a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java +++ b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java @@ -27,7 +27,7 @@ import java.time.LocalDateTime; -/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */ +/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The precision is maintained. */ public class PaimonTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector, WriteableObjectInspector { diff --git a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java index 16f6e191a329..8ba1fe357ba0 100644 --- a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java +++ b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java @@ -740,18 +740,20 @@ public void testPredicatePushDown() throws Exception { public void testDateAndTimestamp() throws Exception { ThreadLocalRandom random = ThreadLocalRandom.current(); Options conf = getBasicConf(); - conf.set( - CoreOptions.FILE_FORMAT, + + CoreOptions.FileFormatType fileFormatType = random.nextBoolean() ? CoreOptions.FileFormatType.ORC - : CoreOptions.FileFormatType.PARQUET); + : CoreOptions.FileFormatType.PARQUET; + conf.set(CoreOptions.FILE_FORMAT, fileFormatType); + + int precision = random.nextInt(10); + Table table = FileStoreTestUtils.createFileStoreTable( conf, RowType.of( - new DataType[] { - DataTypes.DATE(), DataTypes.TIMESTAMP(random.nextInt(10)) - }, + new DataType[] {DataTypes.DATE(), DataTypes.TIMESTAMP(precision)}, new String[] {"dt", "ts"}), Collections.emptyList(), Collections.emptyList()); @@ -772,32 +774,53 @@ public void testDateAndTimestamp() throws Exception { GenericRow.of( null, Timestamp.fromLocalDateTime( - LocalDateTime.of(2022, 6, 18, 8, 30, 0, 100_000_000)))); + // to test different precisions + LocalDateTime.of(2022, 6, 18, 8, 30, 0, 123_456_789)))); commit.commit(2, write.prepareCommit(true, 2)); write.close(); commit.close(); createExternalTable(); + assertThat( hiveShell.executeQuery( "SELECT * FROM `" + externalTable + "` WHERE dt = '1971-01-11'")) .containsExactly("1971-01-11\t2022-05-17 17:29:20.1"); assertThat( hiveShell.executeQuery( - "SELECT * FROM `" - + externalTable - + "` WHERE ts = '2022-05-17 17:29:20.1'")) + String.format( + // do not test '.123456789' because the filter pushdown will + // cause wrong result + "SELECT * FROM `%s` WHERE ts = '2022-05-17 17:29:20.1'", + externalTable))) .containsExactly("1971-01-11\t2022-05-17 17:29:20.1"); + assertThat( hiveShell.executeQuery( "SELECT * FROM `" + externalTable + "` WHERE dt = '1971-01-12'")) .containsExactly("1971-01-12\tNULL"); + + // validate '2022-06-18 08:30:00.123456789' + // the original precision is maintained, but the file format will affect the result + // parquet stores timestamp with three forms + String fraction; + if (fileFormatType == CoreOptions.FileFormatType.ORC) { + fraction = ".123456789"; + } else { + if (precision <= 3) { + fraction = ".123"; + } else if (precision <= 6) { + fraction = ".123456"; + } else { + fraction = ".123456789"; + } + } assertThat( hiveShell.executeQuery( "SELECT * FROM `" + externalTable - + "` WHERE ts = '2022-06-18 08:30:00.1'")) - .containsExactly("NULL\t2022-06-18 08:30:00.1"); + + "` WHERE dt IS NULL and ts IS NOT NULL")) + .containsExactly("NULL\t2022-06-18 08:30:00" + fraction); } @Test