From 698927db69e167365d8daf639814e2031d65550d Mon Sep 17 00:00:00 2001 From: Jingsong Date: Tue, 6 Aug 2024 10:24:06 +0800 Subject: [PATCH 1/3] [core] Adjust compaction.max.file-num of bucketed append table to 5 --- docs/content/append-table/streaming.md | 2 +- .../generated/core_configuration.html | 4 ++-- .../java/org/apache/paimon/CoreOptions.java | 18 ++++++++++++------ .../AppendOnlyTableCompactionCoordinator.java | 3 ++- .../operation/AppendOnlyFileStoreWrite.java | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/content/append-table/streaming.md b/docs/content/append-table/streaming.md index c3d64a6500f0..3758c7f569db 100644 --- a/docs/content/append-table/streaming.md +++ b/docs/content/append-table/streaming.md @@ -115,7 +115,7 @@ control the strategy of compaction:
compaction.max.file-num
- 50 + 5 Integer For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files, which slows down the performance. diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index bc6f8b5c4efc..c2eba113a5d0 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -124,9 +124,9 @@
compaction.max.file-num
- 50 + (none) Integer - For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files, which slows down the performance. + For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files.
compaction.min.file-num
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java index 156915e2365d..bda7b2ce3297 100644 --- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java @@ -521,12 +521,18 @@ public class CoreOptions implements Serializable { public static final ConfigOption COMPACTION_MAX_FILE_NUM = key("compaction.max.file-num") .intType() - .defaultValue(50) + .noDefaultValue() .withFallbackKeys("compaction.early-max.file-num") .withDescription( - "For file set [f_0,...,f_N], the maximum file number to trigger a compaction " - + "for append-only table, even if sum(size(f_i)) < targetFileSize. This value " - + "avoids pending too much small files, which slows down the performance."); + Description.builder() + .text( + "For file set [f_0,...,f_N], the maximum file number to trigger a compaction " + + "for append-only table, even if sum(size(f_i)) < targetFileSize. This value " + + "avoids pending too much small files.") + .list( + text("Default value of Append Table is '50'."), + text("Default value of Bucketed Append Table is '5'.")) + .build()); public static final ConfigOption CHANGELOG_PRODUCER = key("changelog-producer") @@ -1671,8 +1677,8 @@ public int compactionMinFileNum() { return options.get(COMPACTION_MIN_FILE_NUM); } - public int compactionMaxFileNum() { - return options.get(COMPACTION_MAX_FILE_NUM); + public Optional compactionMaxFileNum() { + return options.getOptional(COMPACTION_MAX_FILE_NUM); } public long dynamicBucketTargetRowNum() { diff --git a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java index 07ed3c9dd930..d54221403d66 100644 --- a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java +++ b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java @@ -95,7 +95,8 @@ public AppendOnlyTableCompactionCoordinator( this.targetFileSize = coreOptions.targetFileSize(false); this.compactionFileSize = coreOptions.compactionFileSize(false); this.minFileNum = coreOptions.compactionMinFileNum(); - this.maxFileNum = coreOptions.compactionMaxFileNum(); + // this is global compaction, avoid too many compaction tasks + this.maxFileNum = coreOptions.compactionMaxFileNum().orElse(50); } public List run() { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java index 94744887bbc7..322c303be7bd 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java @@ -105,7 +105,7 @@ public AppendOnlyFileStoreWrite( this.pathFactory = pathFactory; this.targetFileSize = options.targetFileSize(false); this.compactionMinFileNum = options.compactionMinFileNum(); - this.compactionMaxFileNum = options.compactionMaxFileNum(); + this.compactionMaxFileNum = options.compactionMaxFileNum().orElse(5); this.commitForceCompact = options.commitForceCompact(); this.skipCompaction = options.writeOnly(); this.fileCompression = options.fileCompression(); From b53f589fb35c7723ea0303f611b9623a4c2a960a Mon Sep 17 00:00:00 2001 From: Jingsong Date: Tue, 6 Aug 2024 10:34:08 +0800 Subject: [PATCH 2/3] fix --- .../org/apache/paimon/flink/sink/StoreMultiCommitterTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java index 832da65f7c13..10e432f3c8c2 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java @@ -70,6 +70,7 @@ import java.util.Objects; import java.util.UUID; +import static org.apache.paimon.CoreOptions.COMPACTION_MAX_FILE_NUM; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; @@ -131,6 +132,7 @@ public void beforeEach() throws Exception { Options secondOptions = new Options(); secondOptions.setString("bucket", "1"); secondOptions.setString("bucket-key", "a"); + secondOptions.set(COMPACTION_MAX_FILE_NUM, 50); Schema secondTableSchema = new Schema( rowType2.getFields(), From da1bbb3735a85b7e9cd5fca6c477d8fe1f6d78fc Mon Sep 17 00:00:00 2001 From: Jingsong Date: Tue, 6 Aug 2024 11:47:06 +0800 Subject: [PATCH 3/3] fix test --- .../apache/paimon/spark/sql/DeletionVectorTest.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala index 68aeffe55b98..26d07ce06dad 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala +++ b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala @@ -45,11 +45,13 @@ class DeletionVectorTest extends PaimonSparkTestBase { } else { "" } - spark.sql( - s""" - |CREATE TABLE T (id INT, name STRING) - |TBLPROPERTIES ('deletion-vectors.enabled' = 'true', 'bucket' = '$bucket' $bucketKey) - |""".stripMargin) + spark.sql(s""" + |CREATE TABLE T (id INT, name STRING) + |TBLPROPERTIES ( + | 'deletion-vectors.enabled' = 'true', + | 'compaction.max.file-num' = '50', + | 'bucket' = '$bucket' $bucketKey) + |""".stripMargin) val table = loadTable("T") val dvMaintainerFactory =