From 5c639e3a72244c8095dcf7472cdb37fee0949f3e Mon Sep 17 00:00:00 2001 From: Socrates Date: Fri, 22 Nov 2024 14:38:08 +0800 Subject: [PATCH] [fix](meta-cache) fix refreshOnlyCatalogCache when use_meta_cache = false (#44363) ### What problem does this PR solve? Bug: When hive catalog set use_meta_cache=false, refresh catalog cannot update the database list. Fix: Set initialized = false in `refreshOnlyCatalogCache()`. --- .../doris/datasource/ExternalCatalog.java | 1 + .../hive/test_hive_use_meta_cache.out | 106 ++++++++++- .../hive/test_hive_use_meta_cache.groovy | 171 +++++++++++------- 3 files changed, 207 insertions(+), 71 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 50edcbbf31f037..5a05baf33364df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -412,6 +412,7 @@ private void refreshOnlyCatalogCache(boolean invalidCache) { if (useMetaCache.get() && metaCache != null) { metaCache.invalidateAll(); } else if (!useMetaCache.get()) { + this.initialized = false; for (ExternalDatabase db : idToDb.values()) { db.setUnInitialized(invalidCache); } diff --git a/regression-test/data/external_table_p0/hive/test_hive_use_meta_cache.out b/regression-test/data/external_table_p0/hive/test_hive_use_meta_cache.out index a5fc7ede82b8be..4d54c601c876a2 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_use_meta_cache.out +++ b/regression-test/data/external_table_p0/hive/test_hive_use_meta_cache.out @@ -28,15 +28,20 @@ test_use_meta_cache_db_hive test_use_meta_cache_tbl_hive -- !sql08 -- -test_use_meta_cache_tbl_hive -- !sql09 -- +p1=part1 +p1=part2 -- !sql10 -- -test_use_meta_cache_db_hive -- !sql11 -- +-- !sql12 -- +test_use_meta_cache_db_hive + +-- !sql13 -- + -- !sql01 -- -- !sql02 -- @@ -61,17 +66,114 @@ test_use_meta_cache_db_hive -- !sql05 -- -- !sql06 -- +test_use_meta_cache_tbl_hive -- !sql07 -- test_use_meta_cache_tbl_hive -- !sql08 -- + +-- !sql09 -- +p1=part1 +p1=part2 + +-- !sql10 -- +test_use_meta_cache_partitioned_tbl_hive +test_use_meta_cache_tbl_hive + +-- !sql11 -- + +-- !sql12 -- +test_use_meta_cache_db_hive + +-- !sql13 -- + +-- !sql01 -- + +-- !sql02 -- +test_use_meta_cache_db + +-- !sql03 -- +test_use_meta_cache_tbl + +-- !sql04 -- + +-- !sql05 -- + +-- !sql01 -- + +-- !sql02 -- + +-- !sql03 -- +test_use_meta_cache_db_hive + +-- !sql04 -- + +-- !sql05 -- + +-- !sql06 -- + +-- !sql07 -- test_use_meta_cache_tbl_hive +-- !sql08 -- + -- !sql09 -- +p1=part1 +p1=part2 -- !sql10 -- + +-- !sql11 -- + +-- !sql12 -- +test_use_meta_cache_db_hive + +-- !sql13 -- + +-- !sql01 -- + +-- !sql02 -- +test_use_meta_cache_db + +-- !sql03 -- +test_use_meta_cache_tbl + +-- !sql04 -- + +-- !sql05 -- + +-- !sql01 -- + +-- !sql02 -- + +-- !sql03 -- test_use_meta_cache_db_hive +-- !sql04 -- + +-- !sql05 -- + +-- !sql06 -- +test_use_meta_cache_tbl_hive + +-- !sql07 -- +test_use_meta_cache_tbl_hive + +-- !sql08 -- + +-- !sql09 -- +p1=part1 +p1=part2 + +-- !sql10 -- +test_use_meta_cache_partitioned_tbl_hive +test_use_meta_cache_tbl_hive + -- !sql11 -- +-- !sql12 -- +test_use_meta_cache_db_hive + +-- !sql13 -- + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_use_meta_cache.groovy b/regression-test/suites/external_table_p0/hive/test_hive_use_meta_cache.groovy index 3562ce3126725d..df12fc74898f06 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_use_meta_cache.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_use_meta_cache.groovy @@ -23,80 +23,113 @@ suite("test_hive_use_meta_cache", "p0,external,hive,external_docker,external_doc return; } - for (String hivePrefix : ["hive3", "hive3"]) { + for (String hivePrefix : ["hive2", "hive3"]) { setHivePrefix(hivePrefix) try { - String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") - String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") - String catalog = "test_${hivePrefix}_use_meta_cache" - String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def test_use_meta_cache = { Boolean use_meta_cache -> + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String use_meta_cache_string = use_meta_cache ? "true" : "false" + String catalog = "test_${hivePrefix}_use_meta_cache_${use_meta_cache}" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") - sql """drop catalog if exists ${catalog}""" - sql """create catalog if not exists ${catalog} properties ( - 'type'='hms', - 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', - 'use_meta_cache' = 'true' - );""" + sql """drop catalog if exists ${catalog}""" + sql """create catalog if not exists ${catalog} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'use_meta_cache' = '${use_meta_cache_string}' + );""" + + // create from Doris, the cache will be filled immediately + String database= "test_use_meta_cache_db" + String table = "test_use_meta_cache_tbl" + String database_hive = "test_use_meta_cache_db_hive" + String table_hive = "test_use_meta_cache_tbl_hive" + String partitioned_table_hive = "test_use_meta_cache_partitioned_tbl_hive" + + sql "switch ${catalog}" + sql "drop database if exists ${database}" + sql "drop database if exists ${database_hive}" + order_qt_sql01 "show databases like '%${database}%'"; + sql "drop database if exists ${database}" + sql "create database ${database}" + order_qt_sql02 "show databases like '%${database}%'"; + sql "use ${database}" + sql "create table ${table} (k1 int)" + order_qt_sql03 "show tables" + sql "drop table ${table}" + order_qt_sql04 "show tables" + sql "drop database ${database}" + order_qt_sql05 "show databases like '%${database}%'"; - // create from Doris, the cache will be filled immediately - String database= "test_use_meta_cache_db" - String table = "test_use_meta_cache_tbl" - String database_hive = "test_use_meta_cache_db_hive" - String table_hive = "test_use_meta_cache_tbl_hive" - sql "switch ${catalog}" - sql "drop database if exists ${database}" - sql "drop database if exists ${database_hive}" - order_qt_sql01 "show databases like '%${database}%'"; - sql "drop database if exists ${database}" - sql "create database ${database}" - order_qt_sql02 "show databases like '%${database}%'"; - sql "use ${database}" - sql "create table ${table} (k1 int)" - order_qt_sql03 "show tables" - sql "drop table ${table}" - order_qt_sql04 "show tables" - sql "drop database ${database}" - order_qt_sql05 "show databases like '%${database}%'"; - - // create from Hive, the cache has different behavior - order_qt_sql01 "show databases like '%${database_hive}%'"; - hive_docker "drop database if exists ${database_hive}" - hive_docker "create database ${database_hive}" - // not see - order_qt_sql02 "show databases like '%${database_hive}%'"; - // but can use - sql "use ${database_hive}" - sql "refresh catalog ${catalog}" - // can see - order_qt_sql03 "show databases like '%${database_hive}%'"; - // show tables first to fill cache - order_qt_sql04 "show tables" - hive_docker "create table ${database_hive}.${table_hive} (k1 int)" - // not see - order_qt_sql05 "show tables" - // but can select - sql "select * from ${table_hive}" - // still not see - order_qt_sql06 "show tables" - sql "refresh database ${database_hive}" - // can see - order_qt_sql07 "show tables" - hive_docker "drop table ${database_hive}.${table_hive}" - // still can see - order_qt_sql08 "show tables" - sql "refresh database ${database_hive}" - // can not see - order_qt_sql09 "show tables" - hive_docker "drop database ${database_hive}" - // still can see - order_qt_sql10 "show databases like '%${database_hive}%'"; - sql "refresh catalog ${catalog}" - // can not see - order_qt_sql11 "show databases like '%${database_hive}%'"; + // create from Hive, the cache has different behavior + order_qt_sql01 "show databases like '%${database_hive}%'"; + hive_docker "drop database if exists ${database_hive}" + hive_docker "create database ${database_hive}" + // not see + order_qt_sql02 "show databases like '%${database_hive}%'"; + if (use_meta_cache) { + // if use meta cache, can use + sql "use ${database_hive}" + sql "refresh catalog ${catalog}" + } else { + // if not use meta cache, can not use + sql "refresh catalog ${catalog}" + sql "use ${database_hive}" + } + + // can see + order_qt_sql03 "show databases like '%${database_hive}%'"; + // show tables first to fill cache + order_qt_sql04 "show tables" + hive_docker "create table ${database_hive}.${table_hive} (k1 int)" + // not see + order_qt_sql05 "show tables" + if (use_meta_cache) { + // but can select + sql "select * from ${table_hive}" + // still not see + order_qt_sql06 "show tables" + sql "refresh database ${database_hive}" + } else { + // if not use meta cache, can not select + sql "refresh database ${database_hive}" + sql "select * from ${table_hive}" + order_qt_sql06 "show tables" + } + // can see + order_qt_sql07 "show tables" + + // test Hive Metastore table partition file listing + hive_docker "create table ${database_hive}.${partitioned_table_hive} (k1 int) partitioned by (p1 string)" + sql "refresh catalog ${catalog}" + order_qt_sql08 "show partitions from ${partitioned_table_hive}" + hive_docker "alter table ${database_hive}.${partitioned_table_hive} add partition (p1='part1')" + hive_docker "alter table ${database_hive}.${partitioned_table_hive} add partition (p1='part2')" + // can see because partition file listing is not cached + order_qt_sql09 "show partitions from ${partitioned_table_hive}" + + // drop tables + hive_docker "drop table ${database_hive}.${partitioned_table_hive}" + hive_docker "drop table ${database_hive}.${table_hive}" + // still can see + order_qt_sql10 "show tables" + sql "refresh database ${database_hive}" + // can not see + order_qt_sql11 "show tables" + + // drop database + hive_docker "drop database ${database_hive}" + // still can see + order_qt_sql12 "show databases like '%${database_hive}%'"; + sql "refresh catalog ${catalog}" + // can not see + order_qt_sql13 "show databases like '%${database_hive}%'"; + } + test_use_meta_cache(true) + test_use_meta_cache(false) } finally { } } } - -