[fix](hive)fix select count(*) hive full acid tb opt error. (#46732) (#…

…46804) bp #46732
apache · Jan 12, 2025 · 94856b1 · 94856b1
1 parent 40f3e9b
commit 94856b1
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 2 deletions.
diff --git a/be/src/vec/exec/format/table/transactional_hive_reader.cpp b/be/src/vec/exec/format/table/transactional_hive_reader.cpp
@@ -180,6 +180,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
         ++num_delete_files;
     }
     if (num_delete_rows > 0) {
+        orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
         orc_reader->set_delete_rows(&_delete_rows);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
         COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -292,12 +292,12 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List<HivePartiti
          * we don't need to split the file because for parquet/orc format, only metadata is read.
          * If we split the file, we will read metadata of a file multiple times, which is not efficient.
          *
-         * - Hive Transactional Table may need merge on read, so do not apply this optimization.
+         * - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization.
          * - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism.
          */
         boolean needSplit = true;
         if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
-                && hiveTransaction != null) {
+                && !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) {
             int totalFileNum = 0;
             for (FileCacheValue fileCacheValue : fileCaches) {
                 if (fileCacheValue.getFiles() != null) {

diff --git a/regression-test/data/external_table_p0/hive/test_transactional_hive.out b/regression-test/data/external_table_p0/hive/test_transactional_hive.out
@@ -122,3 +122,18 @@ F
 -- !16 --
 4	DD
 
+-- !count_1 --
+3
+
+-- !count_2 --
+6
+
+-- !count_3 --
+4
+
+-- !count_4 --
+3
+
+-- !count_5 --
+3
+
diff --git a/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out b/regression-test/data/external_table_p2/hive/test_hive_translation_insert_only.out
@@ -18,3 +18,13 @@
 3	C
 4	D
 5	E
+
+-- !count_1 --
+4
+
+-- !count_2 --
+5
+
+-- !count_3 --
+5
+
diff --git a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy
@@ -115,6 +115,14 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
 
         }
     }
+
+    def test_acid_count = {
+        qt_count_1 """ select count(*) from orc_full_acid; """ // 3 
+        qt_count_2 """ select count(*) from orc_full_acid_par; """  // 6
+        qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4
+        qt_count_4 """ select count(*) from orc_acid_minor; """ //3
+        qt_count_5 """ select count(*) from orc_acid_major; """ //3
+    }
 
 
     String enabled = context.config.otherConfigs.get("enableHiveTest")
@@ -149,6 +157,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
             test_acid()
             test_acid_write()
 
+
+            test_acid_count()
+
+
             sql """drop catalog if exists ${catalog_name}"""
         } finally {
         }

diff --git a/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy b/regression-test/suites/external_table_p2/hive/test_hive_translation_insert_only.groovy
@@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", "p2,external,hive,external_remote,ext
     qt_2 """ select * from parquet_insert_only_major order by id """ 
     qt_3 """ select * from orc_insert_only_minor order by id """ 
 
+    qt_count_1 """ select count(*) from text_insert_only """ //4 
+    qt_count_2 """ select count(*) from parquet_insert_only_major """ //5 
+    qt_count_3 """ select count(*) from orc_insert_only_minor """ //5
+
+
     sql """drop catalog ${hms_catalog_name};"""
 }