From 63d7a689cad715fb06f51921003cfd169a2dcb4a Mon Sep 17 00:00:00 2001 From: Jingsong Date: Thu, 11 Jul 2024 15:35:01 +0800 Subject: [PATCH] [core] Adjust default value of target-file-size --- .../generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 57 ++++++------------- .../org/apache/paimon/options/MemorySize.java | 4 ++ .../AppendOnlyTableCompactionCoordinator.java | 5 +- .../operation/AppendOnlyFileStoreWrite.java | 2 +- .../operation/KeyValueFileStoreWrite.java | 4 +- ...endOnlyTableCompactionCoordinatorTest.java | 15 +---- .../ChangelogMergeTreeRewriterTest.java | 3 +- .../paimon/mergetree/ContainsLevelsTest.java | 3 +- .../paimon/mergetree/LookupLevelsTest.java | 3 +- .../paimon/mergetree/MergeTreeTestBase.java | 6 +- 11 files changed, 37 insertions(+), 67 deletions(-) diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 37813ba5636b..be308eeb20f0 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -784,7 +784,7 @@
target-file-size
(none) MemorySize - Target size of a file. + Target size of a file.

write-buffer-for-append
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java index b8d932e5bd65..22b697c6f474 100644 --- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java @@ -57,6 +57,8 @@ import java.util.stream.Collectors; import static org.apache.paimon.options.ConfigOptions.key; +import static org.apache.paimon.options.MemorySize.VALUE_128_MB; +import static org.apache.paimon.options.MemorySize.VALUE_256_MB; import static org.apache.paimon.options.description.TextElement.text; import static org.apache.paimon.utils.Preconditions.checkArgument; @@ -439,7 +441,14 @@ public class CoreOptions implements Serializable { key("target-file-size") .memoryType() .noDefaultValue() - .withDescription("Target size of a file."); + .withDescription( + Description.builder() + .text("Target size of a file.") + .linebreak() + .list(text("primary key table: the default value is 128 MB.")) + .linebreak() + .list(text("append table: the default value is 256 MB.")) + .build()); public static final ConfigOption NUM_SORTED_RUNS_COMPACTION_TRIGGER = key("num-sorted-run.compaction-trigger") @@ -1567,19 +1576,17 @@ public MemorySize lookupCacheMaxMemory() { return options.get(LOOKUP_CACHE_MAX_MEMORY_SIZE); } - public long targetFileSize(TableType tableType) { - MemorySize memorySize = options.get(TARGET_FILE_SIZE); - if (memorySize == null) { - memorySize = tableType.getDefaultMemorySize(); - } - return memorySize.getBytes(); + public long targetFileSize(boolean hasPrimaryKey) { + return options.getOptional(TARGET_FILE_SIZE) + .orElse(hasPrimaryKey ? VALUE_128_MB : VALUE_256_MB) + .getBytes(); } - public long compactionFileSize(TableType tableType) { + public long compactionFileSize(boolean hasPrimaryKey) { // file size to join the compaction, we don't process on middle file size to avoid // compact a same file twice (the compression is not calculate so accurately. the output // file maybe be less than target file generated by rolling file write). - return targetFileSize(tableType) / 10 * 7; + return targetFileSize(hasPrimaryKey) / 10 * 7; } public int numSortedRunCompactionTrigger() { @@ -2553,36 +2560,4 @@ public InlineElement getDescription() { return text(description); } } - - /** Specifies the table type. */ - public enum TableType implements DescribedEnum { - PRIMARY_KEY_TABLE( - "primaryKeyTable", MemorySize.ofMebiBytes(128), "The table of primaryKey."), - APPEND_ONLY_TABLE( - "appendOnlyTable", MemorySize.ofMebiBytes(256), "The table of appendOnly."); - - private final String name; - private final MemorySize defaultMemorySize; - private final String description; - - TableType(String name, MemorySize defaultMemorySize, String description) { - this.name = name; - this.defaultMemorySize = defaultMemorySize; - this.description = description; - } - - @Override - public String toString() { - return name; - } - - @Override - public InlineElement getDescription() { - return text(description); - } - - public MemorySize getDefaultMemorySize() { - return defaultMemorySize; - } - } } diff --git a/paimon-common/src/main/java/org/apache/paimon/options/MemorySize.java b/paimon-common/src/main/java/org/apache/paimon/options/MemorySize.java index d40450e0ce79..9b5d7d7ae47e 100644 --- a/paimon-common/src/main/java/org/apache/paimon/options/MemorySize.java +++ b/paimon-common/src/main/java/org/apache/paimon/options/MemorySize.java @@ -56,6 +56,10 @@ public class MemorySize implements java.io.Serializable, Comparable public static final MemorySize MAX_VALUE = new MemorySize(Long.MAX_VALUE); + public static final MemorySize VALUE_128_MB = MemorySize.ofMebiBytes(128); + + public static final MemorySize VALUE_256_MB = MemorySize.ofMebiBytes(256); + private static final List ORDERED_UNITS = Arrays.asList(BYTES, KILO_BYTES, MEGA_BYTES, GIGA_BYTES, TERA_BYTES); diff --git a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java index a21b2e9654fb..07ed3c9dd930 100644 --- a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java +++ b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java @@ -92,9 +92,8 @@ public AppendOnlyTableCompactionCoordinator( } this.streamingMode = isStreaming; CoreOptions coreOptions = table.coreOptions(); - this.targetFileSize = coreOptions.targetFileSize(CoreOptions.TableType.APPEND_ONLY_TABLE); - this.compactionFileSize = - coreOptions.compactionFileSize(CoreOptions.TableType.APPEND_ONLY_TABLE); + this.targetFileSize = coreOptions.targetFileSize(false); + this.compactionFileSize = coreOptions.compactionFileSize(false); this.minFileNum = coreOptions.compactionMinFileNum(); this.maxFileNum = coreOptions.compactionMaxFileNum(); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java index 25620833878a..12757df9b272 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java @@ -98,7 +98,7 @@ public AppendOnlyFileStoreWrite( this.rowType = rowType; this.fileFormat = options.fileFormat(); this.pathFactory = pathFactory; - this.targetFileSize = options.targetFileSize(CoreOptions.TableType.APPEND_ONLY_TABLE); + this.targetFileSize = options.targetFileSize(false); this.compactionMinFileNum = options.compactionMinFileNum(); this.compactionMaxFileNum = options.compactionMaxFileNum(); this.commitForceCompact = options.commitForceCompact(); diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java index e82ecaf1d819..133cfe540b65 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java @@ -154,7 +154,7 @@ public KeyValueFileStoreWrite( valueType, options.fileFormat(), format2PathFactory, - options.targetFileSize(CoreOptions.TableType.PRIMARY_KEY_TABLE)); + options.targetFileSize(true)); this.keyComparatorSupplier = keyComparatorSupplier; this.valueEqualiserSupplier = valueEqualiserSupplier; this.mfFactory = mfFactory; @@ -243,7 +243,7 @@ private CompactManager createCompactManager( levels, compactStrategy, keyComparator, - options.compactionFileSize(CoreOptions.TableType.PRIMARY_KEY_TABLE), + options.compactionFileSize(true), options.numSortedRunStopTrigger(), rewriter, compactionMetrics == null diff --git a/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinatorTest.java b/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinatorTest.java index 60446068c083..6542d65dd629 100644 --- a/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinatorTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinatorTest.java @@ -69,13 +69,7 @@ public void testNoCompactTask() { public void testMinSizeCompactTask() { List files = generateNewFiles( - 100, - appendOnlyFileStoreTable - .coreOptions() - .targetFileSize( - CoreOptions.TableType.APPEND_ONLY_TABLE) - / 3 - + 1); + 100, appendOnlyFileStoreTable.coreOptions().targetFileSize(false) / 3 + 1); assertTasks(files, 100 / 3); } @@ -83,12 +77,7 @@ public void testMinSizeCompactTask() { public void testFilterMiddleFile() { List files = generateNewFiles( - 100, - appendOnlyFileStoreTable - .coreOptions() - .targetFileSize(CoreOptions.TableType.APPEND_ONLY_TABLE) - / 10 - * 8); + 100, appendOnlyFileStoreTable.coreOptions().targetFileSize(false) / 10 * 8); assertTasks(files, 0); } diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/ChangelogMergeTreeRewriterTest.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/ChangelogMergeTreeRewriterTest.java index 0b591b36f82e..c72fac83c7c8 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/ChangelogMergeTreeRewriterTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/ChangelogMergeTreeRewriterTest.java @@ -69,6 +69,7 @@ import java.util.UUID; import java.util.stream.Collectors; +import static org.apache.paimon.options.MemorySize.VALUE_128_MB; import static org.apache.paimon.utils.FileStorePathFactoryTest.createNonPartFactory; import static org.junit.jupiter.api.Assertions.fail; @@ -200,7 +201,7 @@ private KeyValueFileWriterFactory createWriterFactory( valueType, new FlushingFileFormat(formatIdentifier), Collections.singletonMap(formatIdentifier, createNonPartFactory(path)), - CoreOptions.TableType.PRIMARY_KEY_TABLE.getDefaultMemorySize().getBytes()) + VALUE_128_MB.getBytes()) .build(BinaryRow.EMPTY_ROW, 0, new CoreOptions(new Options())); } diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/ContainsLevelsTest.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/ContainsLevelsTest.java index 1c80889a4957..781bab0dfb1a 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/ContainsLevelsTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/ContainsLevelsTest.java @@ -63,6 +63,7 @@ import java.util.UUID; import static org.apache.paimon.io.DataFileTestUtils.row; +import static org.apache.paimon.options.MemorySize.VALUE_128_MB; import static org.apache.paimon.utils.FileStorePathFactoryTest.createNonPartFactory; import static org.assertj.core.api.Assertions.assertThat; @@ -226,7 +227,7 @@ private KeyValueFileWriterFactory createWriterFactory() { rowType, new FlushingFileFormat(identifier), pathFactoryMap, - CoreOptions.TableType.PRIMARY_KEY_TABLE.getDefaultMemorySize().getBytes()) + VALUE_128_MB.getBytes()) .build(BinaryRow.EMPTY_ROW, 0, new CoreOptions(new Options())); } diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupLevelsTest.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupLevelsTest.java index 08b5eee176b2..7b89f409dc8c 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupLevelsTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupLevelsTest.java @@ -64,6 +64,7 @@ import static org.apache.paimon.KeyValue.UNKNOWN_SEQUENCE; import static org.apache.paimon.io.DataFileTestUtils.row; +import static org.apache.paimon.options.MemorySize.VALUE_128_MB; import static org.apache.paimon.utils.FileStorePathFactoryTest.createNonPartFactory; import static org.assertj.core.api.Assertions.assertThat; @@ -306,7 +307,7 @@ private KeyValueFileWriterFactory createWriterFactory() { rowType, new FlushingFileFormat(identifier), pathFactoryMap, - CoreOptions.TableType.PRIMARY_KEY_TABLE.getDefaultMemorySize().getBytes()) + VALUE_128_MB.getBytes()) .build(BinaryRow.EMPTY_ROW, 0, new CoreOptions(new Options())); } diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/MergeTreeTestBase.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/MergeTreeTestBase.java index 3b4eb017fa77..6a6848ce1d73 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/MergeTreeTestBase.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/MergeTreeTestBase.java @@ -192,7 +192,7 @@ public List valueFields(TableSchema schema) { valueType, flushingAvro, pathFactoryMap, - this.options.targetFileSize(CoreOptions.TableType.PRIMARY_KEY_TABLE)); + this.options.targetFileSize(true)); writerFactory = writerFactoryBuilder.build(BinaryRow.EMPTY_ROW, 0, this.options); compactWriterFactory = writerFactoryBuilder.build(BinaryRow.EMPTY_ROW, 0, this.options); writer = createMergeTreeWriter(Collections.emptyList()); @@ -289,7 +289,7 @@ public void testPrepareCommitRecycleReference() throws Exception { options.sortedRunSizeRatio(), options.numSortedRunCompactionTrigger()), comparator, - options.targetFileSize(CoreOptions.TableType.PRIMARY_KEY_TABLE), + options.targetFileSize(true), options.numSortedRunStopTrigger(), new TestRewriter()); writer = createMergeTreeWriter(dataFileMetas, mockFailResultCompactionManager); @@ -542,7 +542,7 @@ private MergeTreeCompactManager createCompactManager( new Levels(comparator, files, options.numLevels()), strategy, comparator, - options.compactionFileSize(CoreOptions.TableType.PRIMARY_KEY_TABLE), + options.compactionFileSize(true), options.numSortedRunStopTrigger(), new TestRewriter(), null,