Skip to content

Commit

Permalink
[fix](hive)fix select count(*) hive full acid tb opt error. (#46732) (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
hubgeter authored Jan 12, 2025
1 parent 40f3e9b commit 94856b1
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 2 deletions.
1 change: 1 addition & 0 deletions be/src/vec/exec/format/table/transactional_hive_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
++num_delete_files;
}
if (num_delete_rows > 0) {
orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
orc_reader->set_delete_rows(&_delete_rows);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List<HivePartiti
* we don't need to split the file because for parquet/orc format, only metadata is read.
* If we split the file, we will read metadata of a file multiple times, which is not efficient.
*
* - Hive Transactional Table may need merge on read, so do not apply this optimization.
* - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization.
* - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism.
*/
boolean needSplit = true;
if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
&& hiveTransaction != null) {
&& !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) {
int totalFileNum = 0;
for (FileCacheValue fileCacheValue : fileCaches) {
if (fileCacheValue.getFiles() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,18 @@ F
-- !16 --
4 DD

-- !count_1 --
3

-- !count_2 --
6

-- !count_3 --
4

-- !count_4 --
3

-- !count_5 --
3

Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@
3 C
4 D
5 E

-- !count_1 --
4

-- !count_2 --
5

-- !count_3 --
5

Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock

}
}

def test_acid_count = {
qt_count_1 """ select count(*) from orc_full_acid; """ // 3
qt_count_2 """ select count(*) from orc_full_acid_par; """ // 6
qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4
qt_count_4 """ select count(*) from orc_acid_minor; """ //3
qt_count_5 """ select count(*) from orc_acid_major; """ //3
}


String enabled = context.config.otherConfigs.get("enableHiveTest")
Expand Down Expand Up @@ -149,6 +157,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
test_acid()
test_acid_write()


test_acid_count()


sql """drop catalog if exists ${catalog_name}"""
} finally {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", "p2,external,hive,external_remote,ext
qt_2 """ select * from parquet_insert_only_major order by id """
qt_3 """ select * from orc_insert_only_minor order by id """

qt_count_1 """ select count(*) from text_insert_only """ //4
qt_count_2 """ select count(*) from parquet_insert_only_major """ //5
qt_count_3 """ select count(*) from orc_insert_only_minor """ //5


sql """drop catalog ${hms_catalog_name};"""
}

Expand Down

0 comments on commit 94856b1

Please sign in to comment.