Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](hive)fix select count(*) hive full acid tb opt error. #46732

Merged
merged 1 commit into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions be/src/vec/exec/format/table/transactional_hive_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
++num_delete_files;
}
if (num_delete_rows > 0) {
orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
orc_reader->set_delete_rows(&_delete_rows);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,12 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List<HivePartiti
* we don't need to split the file because for parquet/orc format, only metadata is read.
* If we split the file, we will read metadata of a file multiple times, which is not efficient.
*
* - Hive Transactional Table may need merge on read, so do not apply this optimization.
* - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization.
* - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism.
*/
boolean needSplit = true;
if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
&& hiveTransaction != null) {
&& !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) {
int totalFileNum = 0;
for (FileCacheValue fileCacheValue : fileCaches) {
if (fileCacheValue.getFiles() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,18 @@ F
-- !16 --
4 DD

-- !count_1 --
3

-- !count_2 --
6

-- !count_3 --
4

-- !count_4 --
3

-- !count_5 --
3

Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,12 @@
4 D
5 E

-- !count_1 --
4

-- !count_2 --
5

-- !count_3 --
5

Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock

}
}

def test_acid_count = {
qt_count_1 """ select count(*) from orc_full_acid; """ // 3
qt_count_2 """ select count(*) from orc_full_acid_par; """ // 6
qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4
qt_count_4 """ select count(*) from orc_acid_minor; """ //3
qt_count_5 """ select count(*) from orc_acid_major; """ //3
}


String enabled = context.config.otherConfigs.get("enableHiveTest")
Expand Down Expand Up @@ -148,6 +156,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
test_acid()
test_acid_write()


test_acid_count()


sql """drop catalog if exists ${catalog_name}"""
} finally {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", "p2,external,hive,external_remote,ext
qt_2 """ select * from parquet_insert_only_major order by id """
qt_3 """ select * from orc_insert_only_minor order by id """

qt_count_1 """ select count(*) from text_insert_only """ //4
qt_count_2 """ select count(*) from parquet_insert_only_major """ //5
qt_count_3 """ select count(*) from orc_insert_only_minor """ //5


sql """drop catalog ${hms_catalog_name};"""
}

Expand Down
Loading