From ebcab7644a016b398322fddc24532658f823cb44 Mon Sep 17 00:00:00 2001 From: Aitozi Date: Mon, 4 Nov 2024 15:22:39 +0800 Subject: [PATCH] fix comments --- .../generated/core_configuration.html | 20 +++++------ .../flink_connector_configuration.html | 12 ------- .../cache/CacheManagerBenchmark.java | 3 +- .../lookup/LookupReaderBenchmark.java | 2 +- .../java/org/apache/paimon/CoreOptions.java | 24 ++++++------- .../apache/paimon/io/cache/CacheManager.java | 35 +++++++++++++++---- .../paimon/io/cache/CacheManagerTest.java | 3 +- .../cache/FileBasedRandomInputViewTest.java | 4 +-- .../utils/FileBasedBloomFilterTest.java | 3 +- .../operation/MemoryFileStoreWrite.java | 2 +- .../paimon/table/query/LocalTableQuery.java | 2 +- 11 files changed, 57 insertions(+), 53 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 3e022e909fbec..b83c47e8de91b 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -399,6 +399,12 @@ MemorySize Max disk size for lookup cache, you can use this option to limit the use of local disks. + +
lookup.cache-max-memory-size
+ 256 mb + MemorySize + Max memory size for lookup data cache. +
lookup.cache-spill-compression
"zstd" @@ -418,10 +424,10 @@ Define the default false positive probability for lookup cache bloom filters. -
lookup.data-cache-max-memory-size
- 256 mb - MemorySize - Max memory size for lookup data cache. +
lookup.cache.high-prio-pool-ratio
+ 0.25 + Double + The fraction of cache memory that is reserved for high-priority data like index, filter.
lookup.hash-load-factor
@@ -429,12 +435,6 @@ Float The index load factor for lookup. - -
lookup.index-cache-max-memory-size
- 64 mb - MemorySize - Max memory size for lookup index cache. -
lookup.local-file-type
hash diff --git a/docs/layouts/shortcodes/generated/flink_connector_configuration.html b/docs/layouts/shortcodes/generated/flink_connector_configuration.html index 2e6fb1f38741b..ae89b4163c0c4 100644 --- a/docs/layouts/shortcodes/generated/flink_connector_configuration.html +++ b/docs/layouts/shortcodes/generated/flink_connector_configuration.html @@ -242,18 +242,6 @@ Boolean If true, flink sink will use managed memory for merge tree; otherwise, it will create an independent memory allocator. - -
sink.operator-uid.suffix
- (none) - String - Set the uid suffix for the writer, dynamic bucket assigner and committer operators. The uid format is ${UID_PREFIX}_${TABLE_NAME}_${USER_UID_SUFFIX}. If the uid suffix is not set, flink will automatically generate the operator uid, which may be incompatible when the topology changes. - - -
source.operator-uid.suffix
- (none) - String - Set the uid suffix for the source operators. After setting, the uid format is ${UID_PREFIX}_${TABLE_NAME}_${USER_UID_SUFFIX}. If the uid suffix is not set, flink will automatically generate the operator uid, which may be incompatible when the topology changes. -
source.checkpoint-align.enabled
false diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/cache/CacheManagerBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/cache/CacheManagerBenchmark.java index 5f28964f19c10..9a64322e0bded 100644 --- a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/cache/CacheManagerBenchmark.java +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/cache/CacheManagerBenchmark.java @@ -52,8 +52,7 @@ public void testCache() throws Exception { CacheKey key2 = CacheKey.forPageIndex(new RandomAccessFile(file2, "r"), 0, 0); for (Cache.CacheType cacheType : Cache.CacheType.values()) { - CacheManager cacheManager = - new CacheManager(cacheType, MemorySize.ofBytes(10), MemorySize.ofBytes(10)); + CacheManager cacheManager = new CacheManager(cacheType, MemorySize.ofBytes(10), 0.1); benchmark.addCase( String.format("cache-%s", cacheType.toString()), 5, diff --git a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupReaderBenchmark.java b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupReaderBenchmark.java index 9947b54a70f0c..bb14d8fab8a2f 100644 --- a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupReaderBenchmark.java +++ b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/lookup/LookupReaderBenchmark.java @@ -47,7 +47,7 @@ /** Benchmark for measuring the throughput of writing for lookup. */ @ExtendWith(ParameterizedTestExtension.class) public class LookupReaderBenchmark extends AbstractLookupBenchmark { - private static final int QUERY_KEY_COUNT = 10000; + private static final int QUERY_KEY_COUNT = 100000; private final int recordCount; private final boolean bloomFilterEnabled; @TempDir Path tempDir; diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java index 9905ba01d0077..0e1096295751b 100644 --- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java @@ -909,18 +909,18 @@ public class CoreOptions implements Serializable { .withDescription( "Spill compression for lookup cache, currently zstd, none, lz4 and lzo are supported."); - public static final ConfigOption LOOKUP_DATA_CACHE_MAX_MEMORY_SIZE = - key("lookup.data-cache-max-memory-size") + public static final ConfigOption LOOKUP_CACHE_MAX_MEMORY_SIZE = + key("lookup.cache-max-memory-size") .memoryType() .defaultValue(MemorySize.parse("256 mb")) - .withFallbackKeys("lookup.cache-max-memory-size") .withDescription("Max memory size for lookup data cache."); - public static final ConfigOption LOOKUP_INDEX_CACHE_MAX_MEMORY_SIZE = - key("lookup.index-cache-max-memory-size") - .memoryType() - .defaultValue(MemorySize.parse("64 mb")) - .withDescription("Max memory size for lookup index cache."); + public static final ConfigOption LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO = + key("lookup.cache.high-prio-pool-ratio") + .doubleType() + .defaultValue(0.25) + .withDescription( + "The fraction of cache memory that is reserved for high-priority data like index, filter."); public static final ConfigOption LOOKUP_CACHE_BLOOM_FILTER_ENABLED = key("lookup.cache.bloom.filter.enabled") @@ -1835,12 +1835,12 @@ public LookupLocalFileType lookupLocalFileType() { return options.get(LOOKUP_LOCAL_FILE_TYPE); } - public MemorySize lookupDataCacheMaxMemory() { - return options.get(LOOKUP_DATA_CACHE_MAX_MEMORY_SIZE); + public MemorySize lookupCacheMaxMemory() { + return options.get(LOOKUP_CACHE_MAX_MEMORY_SIZE); } - public MemorySize lookupIndexCacheMaxMemory() { - return options.get(LOOKUP_INDEX_CACHE_MAX_MEMORY_SIZE); + public double lookupCacheHighPrioPoolRatio() { + return options.get(LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO); } public long targetFileSize(boolean hasPrimaryKey) { diff --git a/paimon-common/src/main/java/org/apache/paimon/io/cache/CacheManager.java b/paimon-common/src/main/java/org/apache/paimon/io/cache/CacheManager.java index 259d5711a8bfe..1d666eee82a33 100644 --- a/paimon-common/src/main/java/org/apache/paimon/io/cache/CacheManager.java +++ b/paimon-common/src/main/java/org/apache/paimon/io/cache/CacheManager.java @@ -21,6 +21,10 @@ import org.apache.paimon.annotation.VisibleForTesting; import org.apache.paimon.memory.MemorySegment; import org.apache.paimon.options.MemorySize; +import org.apache.paimon.utils.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; @@ -29,6 +33,8 @@ /** Cache manager to cache bytes to paged {@link MemorySegment}s. */ public class CacheManager { + private static final Logger LOG = LoggerFactory.getLogger(CacheManager.class); + /** * Refreshing the cache comes with some costs, so not every time we visit the CacheManager, but * every 10 visits, refresh the LRU strategy. @@ -42,19 +48,34 @@ public class CacheManager { @VisibleForTesting public CacheManager(MemorySize maxMemorySize) { - this(Cache.CacheType.GUAVA, maxMemorySize, maxMemorySize); + this(Cache.CacheType.GUAVA, maxMemorySize, 0); } - public CacheManager(MemorySize dataMaxMemorySize, MemorySize indexMaxMemorySize) { - this(Cache.CacheType.GUAVA, dataMaxMemorySize, indexMaxMemorySize); + public CacheManager(MemorySize dataMaxMemorySize, double highPrioPoolRatio) { + this(Cache.CacheType.GUAVA, dataMaxMemorySize, highPrioPoolRatio); } public CacheManager( - Cache.CacheType cacheType, MemorySize maxMemorySize, MemorySize indexMaxMemorySize) { - this.dataCache = CacheBuilder.newBuilder(cacheType).maximumWeight(maxMemorySize).build(); - this.indexCache = - CacheBuilder.newBuilder(cacheType).maximumWeight(indexMaxMemorySize).build(); + Cache.CacheType cacheType, MemorySize maxMemorySize, double highPrioPoolRatio) { + Preconditions.checkArgument( + highPrioPoolRatio >= 0 && highPrioPoolRatio < 1, + "The high priority pool ratio should in the range [0, 1)."); + MemorySize indexCacheSize = + MemorySize.ofBytes((long) (maxMemorySize.getBytes() * highPrioPoolRatio)); + MemorySize dataCacheSize = + MemorySize.ofBytes((long) (maxMemorySize.getBytes() * (1 - highPrioPoolRatio))); + this.dataCache = CacheBuilder.newBuilder(cacheType).maximumWeight(dataCacheSize).build(); + if (highPrioPoolRatio == 0) { + this.indexCache = dataCache; + } else { + this.indexCache = + CacheBuilder.newBuilder(cacheType).maximumWeight(indexCacheSize).build(); + } this.fileReadCount = 0; + LOG.info( + "Initialize cache manager with data cache of {} and index cache of {}.", + dataCacheSize, + indexCacheSize); } @VisibleForTesting diff --git a/paimon-common/src/test/java/org/apache/paimon/io/cache/CacheManagerTest.java b/paimon-common/src/test/java/org/apache/paimon/io/cache/CacheManagerTest.java index 781765b36a4da..cf8076ac8b80c 100644 --- a/paimon-common/src/test/java/org/apache/paimon/io/cache/CacheManagerTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/io/cache/CacheManagerTest.java @@ -48,8 +48,7 @@ void testCaffeineCache() throws Exception { CacheKey key2 = CacheKey.forPageIndex(new RandomAccessFile(file2, "r"), 0, 0); for (Cache.CacheType cacheType : Cache.CacheType.values()) { - CacheManager cacheManager = - new CacheManager(cacheType, MemorySize.ofBytes(10), MemorySize.ofBytes(10)); + CacheManager cacheManager = new CacheManager(cacheType, MemorySize.ofBytes(10), 0.1); byte[] value = new byte[6]; Arrays.fill(value, (byte) 1); for (int i = 0; i < 10; i++) { diff --git a/paimon-common/src/test/java/org/apache/paimon/io/cache/FileBasedRandomInputViewTest.java b/paimon-common/src/test/java/org/apache/paimon/io/cache/FileBasedRandomInputViewTest.java index 7a5c9a1099523..eee17f3b85444 100644 --- a/paimon-common/src/test/java/org/apache/paimon/io/cache/FileBasedRandomInputViewTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/io/cache/FileBasedRandomInputViewTest.java @@ -82,9 +82,7 @@ private void innerTest(int len, int maxFileReadCount) throws IOException { } File file = writeFile(bytes); - CacheManager cacheManager = - new CacheManager( - cacheType, MemorySize.ofKibiBytes(128), MemorySize.ofKibiBytes(128)); + CacheManager cacheManager = new CacheManager(cacheType, MemorySize.ofKibiBytes(128), 0.25); FileBasedRandomInputView view = new FileBasedRandomInputView( PageFileInput.create(file, 1024, null, 0, null), cacheManager); diff --git a/paimon-common/src/test/java/org/apache/paimon/utils/FileBasedBloomFilterTest.java b/paimon-common/src/test/java/org/apache/paimon/utils/FileBasedBloomFilterTest.java index 90072d639ea15..d1471fd74afbe 100644 --- a/paimon-common/src/test/java/org/apache/paimon/utils/FileBasedBloomFilterTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/utils/FileBasedBloomFilterTest.java @@ -64,8 +64,7 @@ public void testProbe() throws IOException { Arrays.stream(inputs).forEach(i -> builder.addHash(Integer.hashCode(i))); File file = writeFile(segment.getArray()); - CacheManager cacheManager = - new CacheManager(cacheType, MemorySize.ofMebiBytes(1), MemorySize.ofMebiBytes(1)); + CacheManager cacheManager = new CacheManager(cacheType, MemorySize.ofMebiBytes(1), 0.1); FileBasedBloomFilter filter = new FileBasedBloomFilter( PageFileInput.create(file, 1024, null, 0, null), diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java index a6abf813e23ed..ff99f06510c93 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/MemoryFileStoreWrite.java @@ -80,7 +80,7 @@ public MemoryFileStoreWrite( this.options = options; this.cacheManager = new CacheManager( - options.lookupDataCacheMaxMemory(), options.lookupIndexCacheMaxMemory()); + options.lookupCacheMaxMemory(), options.lookupCacheHighPrioPoolRatio()); } @Override diff --git a/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java b/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java index d7bddba5b8c53..7416f589878a1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java @@ -102,7 +102,7 @@ public LocalTableQuery(FileStoreTable table) { LookupStoreFactory.create( options, new CacheManager( - options.lookupDataCacheMaxMemory(), + options.lookupCacheMaxMemory(), options.lookupIndexCacheMaxMemory()), new RowCompactedSerializer(keyType).createSliceComparator());