Skip to content

Commit

Permalink
[core] Adjust compaction.max.file-num of bucketed append table to 5 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored Aug 7, 2024
1 parent 3210a3e commit 975f644
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/content/append-table/streaming.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ control the strategy of compaction:
</tr>
<tr>
<td><h5>compaction.max.file-num</h5></td>
<td style="word-wrap: break-word;">50</td>
<td style="word-wrap: break-word;">5</td>
<td>Integer</td>
<td>For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append table, even if sum(size(f_i)) &lt; targetFileSize. This value avoids pending too much small files, which slows down the performance.</td>
</tr>
Expand Down
4 changes: 2 additions & 2 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@
</tr>
<tr>
<td><h5>compaction.max.file-num</h5></td>
<td style="word-wrap: break-word;">50</td>
<td style="word-wrap: break-word;">(none)</td>
<td>Integer</td>
<td>For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) &lt; targetFileSize. This value avoids pending too much small files, which slows down the performance.</td>
<td>For file set [f_0,...,f_N], the maximum file number to trigger a compaction for append-only table, even if sum(size(f_i)) &lt; targetFileSize. This value avoids pending too much small files.<ul><li>Default value of Append Table is '50'.</li><li>Default value of Bucketed Append Table is '5'.</li></ul></td>
</tr>
<tr>
<td><h5>compaction.min.file-num</h5></td>
Expand Down
18 changes: 12 additions & 6 deletions paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -522,12 +522,18 @@ public class CoreOptions implements Serializable {
public static final ConfigOption<Integer> COMPACTION_MAX_FILE_NUM =
key("compaction.max.file-num")
.intType()
.defaultValue(50)
.noDefaultValue()
.withFallbackKeys("compaction.early-max.file-num")
.withDescription(
"For file set [f_0,...,f_N], the maximum file number to trigger a compaction "
+ "for append-only table, even if sum(size(f_i)) < targetFileSize. This value "
+ "avoids pending too much small files, which slows down the performance.");
Description.builder()
.text(
"For file set [f_0,...,f_N], the maximum file number to trigger a compaction "
+ "for append-only table, even if sum(size(f_i)) < targetFileSize. This value "
+ "avoids pending too much small files.")
.list(
text("Default value of Append Table is '50'."),
text("Default value of Bucketed Append Table is '5'."))
.build());

public static final ConfigOption<ChangelogProducer> CHANGELOG_PRODUCER =
key("changelog-producer")
Expand Down Expand Up @@ -1679,8 +1685,8 @@ public int compactionMinFileNum() {
return options.get(COMPACTION_MIN_FILE_NUM);
}

public int compactionMaxFileNum() {
return options.get(COMPACTION_MAX_FILE_NUM);
public Optional<Integer> compactionMaxFileNum() {
return options.getOptional(COMPACTION_MAX_FILE_NUM);
}

public long dynamicBucketTargetRowNum() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ public AppendOnlyTableCompactionCoordinator(
this.targetFileSize = coreOptions.targetFileSize(false);
this.compactionFileSize = coreOptions.compactionFileSize(false);
this.minFileNum = coreOptions.compactionMinFileNum();
this.maxFileNum = coreOptions.compactionMaxFileNum();
// this is global compaction, avoid too many compaction tasks
this.maxFileNum = coreOptions.compactionMaxFileNum().orElse(50);
}

public List<AppendOnlyCompactionTask> run() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public AppendOnlyFileStoreWrite(
this.pathFactory = pathFactory;
this.targetFileSize = options.targetFileSize(false);
this.compactionMinFileNum = options.compactionMinFileNum();
this.compactionMaxFileNum = options.compactionMaxFileNum();
this.compactionMaxFileNum = options.compactionMaxFileNum().orElse(5);
this.commitForceCompact = options.commitForceCompact();
this.skipCompaction = options.writeOnly();
this.fileCompression = options.fileCompression();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import java.util.Objects;
import java.util.UUID;

import static org.apache.paimon.CoreOptions.COMPACTION_MAX_FILE_NUM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;

Expand Down Expand Up @@ -131,6 +132,7 @@ public void beforeEach() throws Exception {
Options secondOptions = new Options();
secondOptions.setString("bucket", "1");
secondOptions.setString("bucket-key", "a");
secondOptions.set(COMPACTION_MAX_FILE_NUM, 50);
Schema secondTableSchema =
new Schema(
rowType2.getFields(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,13 @@ class DeletionVectorTest extends PaimonSparkTestBase {
} else {
""
}
spark.sql(
s"""
|CREATE TABLE T (id INT, name STRING)
|TBLPROPERTIES ('deletion-vectors.enabled' = 'true', 'bucket' = '$bucket' $bucketKey)
|""".stripMargin)
spark.sql(s"""
|CREATE TABLE T (id INT, name STRING)
|TBLPROPERTIES (
| 'deletion-vectors.enabled' = 'true',
| 'compaction.max.file-num' = '50',
| 'bucket' = '$bucket' $bucketKey)
|""".stripMargin)

val table = loadTable("T")
val dvMaintainerFactory =
Expand Down

0 comments on commit 975f644

Please sign in to comment.