Skip to content

Commit

Permalink
[fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0…
Browse files Browse the repository at this point in the history
…,0) type of orc file #41795 (#42298)

cherry pick from #41795

Co-authored-by: Tiewei Fang <[email protected]>
  • Loading branch information
morningman and BePPPower authored Oct 23, 2024
1 parent 9aab16d commit b399ed3
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 6 deletions.
9 changes: 9 additions & 0 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ namespace doris::vectorized {
// TODO: we need to determine it by test.
static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits<uint32_t>::max();
static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = "";
// Because HIVE 0.11 & 0.12 does not support precision and scale for decimal
// The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0)
// We should set a default precision and scale for these orc files.
static constexpr int decimal_precision_for_hive11 = BeConsts::MAX_DECIMAL128_PRECISION;
static constexpr int decimal_scale_for_hive11 = 10;

#define FOR_FLAT_ORC_COLUMNS(M) \
M(TypeIndex::Int8, Int8, orc::LongVectorBatch) \
Expand Down Expand Up @@ -1050,6 +1055,10 @@ TypeDescriptor OrcReader::convert_to_doris_type(const orc::Type* orc_type) {
case orc::TypeKind::TIMESTAMP:
return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2);
case orc::TypeKind::DECIMAL:
if (orc_type->getPrecision() == 0) {
return TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11,
decimal_scale_for_hive11);
}
return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(),
orc_type->getScale());
case orc::TypeKind::DATE:
Expand Down
1 change: 0 additions & 1 deletion be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,6 @@ class OrcReader : public GenericReader {
std::unique_ptr<orc::Reader> _reader;
std::unique_ptr<orc::RowReader> _row_reader;
std::unique_ptr<ORCFilterImpl> _orc_filter;
orc::ReaderOptions _reader_options;
orc::RowReaderOptions _row_reader_options;

std::shared_ptr<io::FileSystem> _file_system;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
2014-02-11
8200-02-11

-- !test_2 --
12345678.6547450000
12345678.6547450000
12345678.6547450000
12345678.6547450000
12345678.6547450000

-- !test_3 --
2 foo 0.8 1 1969-12-31T16:00
5 eat 0.8 6 1969-12-31T16:00:20
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,15 @@ row 000009
Alyssa \N [3, 9, 15, 20]
Ben red []

-- !test_4 --
2 foo 0.8 1.2000000000 1969-12-31T16:00
5 eat 0.8 5.5000000000 1969-12-31T16:00:20
13 bar 80.0 2.2000000000 1969-12-31T16:00:05
29 cat 8.0 3.3000000000 1969-12-31T16:00:10
70 dog 1.8 4.4000000000 1969-12-31T16:00:15
100 zebra 8.0 0E-10 1969-12-31T16:04:10
100 zebra 8.0 0E-10 1969-12-31T16:04:10
100 zebra 8.0 0E-10 1969-12-31T16:04:10
100 zebra 8.0 0E-10 1969-12-31T16:04:10
100 zebra 8.0 0E-10 1969-12-31T16:04:10

Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ suite("test_hdfs_orc_group1_orc_files","external,hive,tvf,external_docker") {

// Doris cannot read this ORC file because of a NOT_IMPLEMENT error.

// uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
// order_qt_test_2 """ select * from HDFS(
// "uri" = "${uri}",
// "hadoop.username" = "${hdfsUserName}",
// "format" = "orc"); """
uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
order_qt_test_2 """ select decimal1 from HDFS(
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "orc") limit 5; """


uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group1/orc_split_elim.orc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ suite("test_hdfs_orc_group2_orc_files","external,hive,tvf,external_docker") {
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "orc"); """

uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group2/orc_split_elim.orc"
qt_test_4 """ select * from HDFS(
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "orc") order by userid limit 10; """
} finally {
}
}
Expand Down

0 comments on commit b399ed3

Please sign in to comment.