Skip to content

Commit

Permalink
[hive] Fix Hive 3 timestamp precision conversion error (apache#2674)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzelin authored Jan 12, 2024
1 parent a9b31af commit d1696aa
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The precision is maintained. */
public class PaimonTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector
implements TimestampObjectInspector, WriteableObjectInspector {

Expand All @@ -34,9 +34,14 @@ public PaimonTimestampObjectInspector() {

@Override
public Timestamp getPrimitiveJavaObject(Object o) {
return o == null
? null
: Timestamp.ofEpochMilli(((org.apache.paimon.data.Timestamp) o).getMillisecond());
if (o == null) {
return null;
}

org.apache.paimon.data.Timestamp timestamp = (org.apache.paimon.data.Timestamp) o;
long millis = timestamp.getMillisecond();
int nanos = (int) (millis % 1000 * 1_000_000) + timestamp.getNanoOfMillisecond();
return Timestamp.ofEpochMilli(millis, nanos);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;

import javax.annotation.Nullable;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -91,7 +93,11 @@ public void after() {
}

private String createAppendOnlyExternalTable(
RowType rowType, List<String> partitionKeys, List<InternalRow> data, String tableName)
RowType rowType,
List<String> partitionKeys,
List<InternalRow> data,
String tableName,
@Nullable CoreOptions.FileFormatType fileFormatType)
throws Exception {
String path = folder.newFolder().toURI().toString();
String tableNameNotNull =
Expand All @@ -100,7 +106,9 @@ private String createAppendOnlyExternalTable(
Options conf = new Options();
conf.set(CatalogOptions.WAREHOUSE, path);
conf.set(CoreOptions.BUCKET, 2);
conf.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO);
conf.set(
CoreOptions.FILE_FORMAT,
fileFormatType == null ? CoreOptions.FileFormatType.AVRO : fileFormatType);
Identifier identifier = Identifier.create(DATABASE_NAME, tableNameNotNull);
Table table =
FileStoreTestUtils.createFileStoreTable(
Expand Down Expand Up @@ -152,7 +160,8 @@ public void testInsert() throws Exception {
new String[] {"pt", "a", "b", "c"}),
Collections.singletonList("pt"),
emptyData,
"hive_test_table_output");
"hive_test_table_output",
null);

hiveShell.execute(
"insert into " + outputTableName + " values (1,2,3,'Hello'),(4,5,6,'Fine')");
Expand All @@ -164,22 +173,30 @@ public void testInsert() throws Exception {
public void testInsertTimestampAndDate() throws Exception {
List<InternalRow> emptyData = Collections.emptyList();

// test different precisions
int precision = ThreadLocalRandom.current().nextInt(10);
String fraction = precision == 0 ? "" : "." + "123456789".substring(0, precision);

String outputTableName =
createAppendOnlyExternalTable(
RowType.of(
new DataType[] {
DataTypes.INT(), DataTypes.TIMESTAMP(), DataTypes.DATE()
DataTypes.INT(),
DataTypes.TIMESTAMP(precision),
DataTypes.DATE(),
},
new String[] {"pt", "a", "b"}),
Collections.singletonList("pt"),
emptyData,
"hive_test_table_output");
"hive_test_table_output",
CoreOptions.FileFormatType.ORC);
hiveShell.execute(
"insert into "
+ outputTableName
+ " values(1,'2023-01-13 20:00:01.123','2023-12-23')");
List<String> select = hiveShell.executeQuery("select * from " + outputTableName);
String.format(
"INSERT INTO %s VALUES (1, '2023-01-13 20:00:01%s', '2023-12-23')",
outputTableName, fraction));

List<String> select = hiveShell.executeQuery("SELECT * FROM " + outputTableName);
assertThat(select)
.isEqualTo(Collections.singletonList("1\t2023-01-13 20:00:01.123\t2023-12-23"));
.containsExactly(String.format("1\t2023-01-13 20:00:01%s\t2023-12-23", fraction));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import java.time.LocalDateTime;

/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The precision is maintained. */
public class PaimonTimestampObjectInspector extends AbstractPrimitiveJavaObjectInspector
implements TimestampObjectInspector, WriteableObjectInspector {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,18 +740,20 @@ public void testPredicatePushDown() throws Exception {
public void testDateAndTimestamp() throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Options conf = getBasicConf();
conf.set(
CoreOptions.FILE_FORMAT,

CoreOptions.FileFormatType fileFormatType =
random.nextBoolean()
? CoreOptions.FileFormatType.ORC
: CoreOptions.FileFormatType.PARQUET);
: CoreOptions.FileFormatType.PARQUET;
conf.set(CoreOptions.FILE_FORMAT, fileFormatType);

int precision = random.nextInt(10);

Table table =
FileStoreTestUtils.createFileStoreTable(
conf,
RowType.of(
new DataType[] {
DataTypes.DATE(), DataTypes.TIMESTAMP(random.nextInt(10))
},
new DataType[] {DataTypes.DATE(), DataTypes.TIMESTAMP(precision)},
new String[] {"dt", "ts"}),
Collections.emptyList(),
Collections.emptyList());
Expand All @@ -772,32 +774,53 @@ public void testDateAndTimestamp() throws Exception {
GenericRow.of(
null,
Timestamp.fromLocalDateTime(
LocalDateTime.of(2022, 6, 18, 8, 30, 0, 100_000_000))));
// to test different precisions
LocalDateTime.of(2022, 6, 18, 8, 30, 0, 123_456_789))));
commit.commit(2, write.prepareCommit(true, 2));
write.close();
commit.close();

createExternalTable();

assertThat(
hiveShell.executeQuery(
"SELECT * FROM `" + externalTable + "` WHERE dt = '1971-01-11'"))
.containsExactly("1971-01-11\t2022-05-17 17:29:20.1");
assertThat(
hiveShell.executeQuery(
"SELECT * FROM `"
+ externalTable
+ "` WHERE ts = '2022-05-17 17:29:20.1'"))
String.format(
// do not test '.123456789' because the filter pushdown will
// cause wrong result
"SELECT * FROM `%s` WHERE ts = '2022-05-17 17:29:20.1'",
externalTable)))
.containsExactly("1971-01-11\t2022-05-17 17:29:20.1");

assertThat(
hiveShell.executeQuery(
"SELECT * FROM `" + externalTable + "` WHERE dt = '1971-01-12'"))
.containsExactly("1971-01-12\tNULL");

// validate '2022-06-18 08:30:00.123456789'
// the original precision is maintained, but the file format will affect the result
// parquet stores timestamp with three forms
String fraction;
if (fileFormatType == CoreOptions.FileFormatType.ORC) {
fraction = ".123456789";
} else {
if (precision <= 3) {
fraction = ".123";
} else if (precision <= 6) {
fraction = ".123456";
} else {
fraction = ".123456789";
}
}
assertThat(
hiveShell.executeQuery(
"SELECT * FROM `"
+ externalTable
+ "` WHERE ts = '2022-06-18 08:30:00.1'"))
.containsExactly("NULL\t2022-06-18 08:30:00.1");
+ "` WHERE dt IS NULL and ts IS NOT NULL"))
.containsExactly("NULL\t2022-06-18 08:30:00" + fraction);
}

@Test
Expand Down

0 comments on commit d1696aa

Please sign in to comment.