From 6be0114be0562d79dc9a8b73ee1809a658975fb9 Mon Sep 17 00:00:00 2001
From: kakachen
Date: Thu, 14 Nov 2024 01:25:27 +0800
Subject: [PATCH] [Opt](multi-catalog)Improve performance by introducing cache
of list directory files when getting split for each query.
---
.../java/org/apache/doris/common/Config.java | 4 +
.../org/apache/doris/common/CacheFactory.java | 54 +-
.../org/apache/doris/common/EmptyCache.java | 247 ++++++
.../apache/doris/common/EvictableCache.java | 466 ++++++++++++
.../doris/common/EvictableCacheBuilder.java | 286 +++++++
.../doris/datasource/ExternalSchemaCache.java | 2 +-
.../datasource/hive/HMSExternalTable.java | 4 +-
.../datasource/hive/HiveMetaStoreCache.java | 70 +-
.../datasource/hive/source/HiveScanNode.java | 14 +-
.../datasource/hudi/source/HudiScanNode.java | 6 +-
.../iceberg/IcebergMetadataCache.java | 4 +-
.../org/apache/doris/fs/DirectoryLister.java | 29 +
.../org/apache/doris/fs/FileSystemCache.java | 2 +-
.../doris/fs/FileSystemDirectoryLister.java | 37 +
.../doris/fs/FileSystemIOException.java | 65 ++
.../doris/fs/RemoteFileRemoteIterator.java | 47 ++
.../org/apache/doris/fs/RemoteIterator.java | 27 +
.../apache/doris/fs/SimpleRemoteIterator.java | 45 ++
.../TransactionDirectoryListingCacheKey.java | 64 ++
...ransactionScopeCachingDirectoryLister.java | 216 ++++++
...ionScopeCachingDirectoryListerFactory.java | 59 ++
.../translator/PhysicalPlanTranslator.java | 20 +-
.../doris/planner/SingleNodePlanner.java | 16 +-
.../doris/common/TestEvictableCache.java | 708 ++++++++++++++++++
...actionScopeCachingDirectoryListerTest.java | 174 +++++
25 files changed, 2622 insertions(+), 44 deletions(-)
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/common/EmptyCache.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/common/EvictableCache.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/common/EvictableCacheBuilder.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/DirectoryLister.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemDirectoryLister.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemIOException.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/RemoteFileRemoteIterator.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/RemoteIterator.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/SimpleRemoteIterator.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/TransactionDirectoryListingCacheKey.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryLister.java
create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerFactory.java
create mode 100644 fe/fe-core/src/test/java/org/apache/doris/common/TestEvictableCache.java
create mode 100644 fe/fe-core/src/test/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerTest.java
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 66dbff5abed8e64..13a8501f99cc09f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2153,6 +2153,10 @@ public class Config extends ConfigBase {
"Max cache number of external table row count"})
public static long max_external_table_row_count_cache_num = 100000;
+ @ConfField(description = {"每个查询的外表文件元数据缓存的最大文件数量。",
+ "Max cache file number of external table split file meta cache at query level."})
+ public static long max_external_table_split_file_meta_cache_num = 100000;
+
/**
* Max cache loader thread-pool size.
* Max thread pool size for loading external meta cache
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
index 50f46647975e235..4b2b8a2a6cd0b3b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/CacheFactory.java
@@ -19,11 +19,13 @@
import com.github.benmanes.caffeine.cache.AsyncCacheLoader;
import com.github.benmanes.caffeine.cache.AsyncLoadingCache;
+import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.CacheLoader;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.github.benmanes.caffeine.cache.RemovalListener;
import com.github.benmanes.caffeine.cache.Ticker;
+import com.github.benmanes.caffeine.cache.Weigher;
import org.jetbrains.annotations.NotNull;
import java.time.Duration;
@@ -44,28 +46,57 @@
* The cache can be created with the above parameters using the buildCache and buildAsyncCache methods.
*
*/
-public class CacheFactory {
+public class CacheFactory {
private OptionalLong expireAfterWriteSec;
private OptionalLong refreshAfterWriteSec;
- private long maxSize;
+ private OptionalLong maxSize;
private boolean enableStats;
// Ticker is used to provide a time source for the cache.
// Only used for test, to provide a fake time source.
// If not provided, the system time is used.
private Ticker ticker;
+ private OptionalLong maxWeight;
+
+ private Weigher weigher;
+
public CacheFactory(
OptionalLong expireAfterWriteSec,
OptionalLong refreshAfterWriteSec,
long maxSize,
boolean enableStats,
Ticker ticker) {
+ this(expireAfterWriteSec, refreshAfterWriteSec, OptionalLong.of(maxSize), enableStats, ticker,
+ OptionalLong.empty(), null);
+ }
+
+ public CacheFactory(
+ OptionalLong expireAfterWriteSec,
+ OptionalLong refreshAfterWriteSec,
+ boolean enableStats,
+ Ticker ticker,
+ long maxWeight,
+ Weigher weigher) {
+ this(expireAfterWriteSec, refreshAfterWriteSec, OptionalLong.empty(), enableStats, ticker,
+ OptionalLong.of(maxWeight), weigher);
+ }
+
+ private CacheFactory(
+ OptionalLong expireAfterWriteSec,
+ OptionalLong refreshAfterWriteSec,
+ OptionalLong maxSize,
+ boolean enableStats,
+ Ticker ticker,
+ OptionalLong maxWeight,
+ Weigher weigher) {
this.expireAfterWriteSec = expireAfterWriteSec;
this.refreshAfterWriteSec = refreshAfterWriteSec;
this.maxSize = maxSize;
this.enableStats = enableStats;
this.ticker = ticker;
+ this.maxWeight = maxWeight;
+ this.weigher = weigher;
}
// Build a loading cache, without executor, it will use fork-join pool for refresh
@@ -85,6 +116,11 @@ public LoadingCache buildCache(CacheLoader cacheLoader,
return builder.build(cacheLoader);
}
+ public Cache buildCache() {
+ Caffeine