diff --git a/docs/content/append-table/streaming.md b/docs/content/append-table/streaming.md index c3d64a6500f0..3758c7f569db 100644 --- a/docs/content/append-table/streaming.md +++ b/docs/content/append-table/streaming.md @@ -115,7 +115,7 @@ control the strategy of compaction:
compaction.max.file-num
- 50 + 5 Integer For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files, which slows down the performance. diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html index 1f5fb0dc1e82..784719d76b30 100644 --- a/docs/layouts/shortcodes/generated/core_configuration.html +++ b/docs/layouts/shortcodes/generated/core_configuration.html @@ -130,9 +130,9 @@
compaction.max.file-num
- 50 + (none) Integer - For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files, which slows down the performance. + For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) < targetFileSize. This value avoids pending too much small files.
compaction.min.file-num
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java index 660580284f64..bd10ff000895 100644 --- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java @@ -522,12 +522,18 @@ public class CoreOptions implements Serializable { public static final ConfigOption COMPACTION_MAX_FILE_NUM = key("compaction.max.file-num") .intType() - .defaultValue(50) + .noDefaultValue() .withFallbackKeys("compaction.early-max.file-num") .withDescription( - "For file set [f_0,...,f_N], the maximum file number to trigger a compaction " - + "for append-only table, even if sum(size(f_i)) < targetFileSize. This value " - + "avoids pending too much small files, which slows down the performance."); + Description.builder() + .text( + "For file set [f_0,...,f_N], the maximum file number to trigger a compaction " + + "for append-only table, even if sum(size(f_i)) < targetFileSize. This value " + + "avoids pending too much small files.") + .list( + text("Default value of Append Table is '50'."), + text("Default value of Bucketed Append Table is '5'.")) + .build()); public static final ConfigOption CHANGELOG_PRODUCER = key("changelog-producer") @@ -1679,8 +1685,8 @@ public int compactionMinFileNum() { return options.get(COMPACTION_MIN_FILE_NUM); } - public int compactionMaxFileNum() { - return options.get(COMPACTION_MAX_FILE_NUM); + public Optional compactionMaxFileNum() { + return options.getOptional(COMPACTION_MAX_FILE_NUM); } public long dynamicBucketTargetRowNum() { diff --git a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java index 07ed3c9dd930..d54221403d66 100644 --- a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java +++ b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java @@ -95,7 +95,8 @@ public AppendOnlyTableCompactionCoordinator( this.targetFileSize = coreOptions.targetFileSize(false); this.compactionFileSize = coreOptions.compactionFileSize(false); this.minFileNum = coreOptions.compactionMinFileNum(); - this.maxFileNum = coreOptions.compactionMaxFileNum(); + // this is global compaction, avoid too many compaction tasks + this.maxFileNum = coreOptions.compactionMaxFileNum().orElse(50); } public List run() { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java index e914f695090d..744a130caf24 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/AppendOnlyFileStoreWrite.java @@ -105,7 +105,7 @@ public AppendOnlyFileStoreWrite( this.pathFactory = pathFactory; this.targetFileSize = options.targetFileSize(false); this.compactionMinFileNum = options.compactionMinFileNum(); - this.compactionMaxFileNum = options.compactionMaxFileNum(); + this.compactionMaxFileNum = options.compactionMaxFileNum().orElse(5); this.commitForceCompact = options.commitForceCompact(); this.skipCompaction = options.writeOnly(); this.fileCompression = options.fileCompression(); diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java index 832da65f7c13..10e432f3c8c2 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/sink/StoreMultiCommitterTest.java @@ -70,6 +70,7 @@ import java.util.Objects; import java.util.UUID; +import static org.apache.paimon.CoreOptions.COMPACTION_MAX_FILE_NUM; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; @@ -131,6 +132,7 @@ public void beforeEach() throws Exception { Options secondOptions = new Options(); secondOptions.setString("bucket", "1"); secondOptions.setString("bucket-key", "a"); + secondOptions.set(COMPACTION_MAX_FILE_NUM, 50); Schema secondTableSchema = new Schema( rowType2.getFields(), diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala index 68aeffe55b98..26d07ce06dad 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala +++ b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala @@ -45,11 +45,13 @@ class DeletionVectorTest extends PaimonSparkTestBase { } else { "" } - spark.sql( - s""" - |CREATE TABLE T (id INT, name STRING) - |TBLPROPERTIES ('deletion-vectors.enabled' = 'true', 'bucket' = '$bucket' $bucketKey) - |""".stripMargin) + spark.sql(s""" + |CREATE TABLE T (id INT, name STRING) + |TBLPROPERTIES ( + | 'deletion-vectors.enabled' = 'true', + | 'compaction.max.file-num' = '50', + | 'bucket' = '$bucket' $bucketKey) + |""".stripMargin) val table = loadTable("T") val dvMaintainerFactory =