Skip to content

Commit

Permalink
[doc] first-row merge engine adds changelog producer usage documentat…
Browse files Browse the repository at this point in the history
…ion (#4298)
  • Loading branch information
zhuangchong authored Oct 9, 2024
1 parent 9559ca6 commit 79dae8e
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/content/primary-key-table/merge-engine/aggregation.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: "Aggregation"
weight: 3
type: docs
aliases:
- /cdc-ingestion/merge-engin/aggregation.html
- /primary-key-table/merge-engin/aggregation.html
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
Expand Down
9 changes: 7 additions & 2 deletions docs/content/primary-key-table/merge-engine/first-row.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: "First Row"
weight: 4
type: docs
aliases:
- /cdc-ingestion/merge-engin/first-row.html
- /primary-key-table/merge-engin/first-row.html
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
Expand All @@ -29,11 +29,16 @@ under the License.
By specifying `'merge-engine' = 'first-row'`, users can keep the first row of the same primary key. It differs from the
`deduplicate` merge engine that in the `first-row` merge engine, it will generate insert only changelog.

{{< hint info >}}
`first-row` merge engine only supports `none` and `lookup` changelog producer.
For streaming queries must be used with the `lookup` [changelog producer]({{< ref "primary-key-table/changelog-producer" >}}).
{{< /hint >}}

{{< hint info >}}
1. You can not specify [sequence.field]({{< ref "primary-key-table/sequence-rowkind#sequence-field" >}}).
2. Not accept `DELETE` and `UPDATE_BEFORE` message. You can config `ignore-delete` to ignore these two kinds records.
3. Visibility guarantee: Tables with First Row engine, the files with level 0 will only be visible after compaction.
So by default, compaction is synchronous, and if asynchronous is turned on, there may be delays in the data.
{{< /hint >}}
{{< /hint >}}

This is of great help in replacing log deduplication in streaming computation.
2 changes: 1 addition & 1 deletion docs/content/primary-key-table/merge-engine/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: "Overview"
weight: 1
type: docs
aliases:
- /cdc-ingestion/merge-engin/overview.html
- /primary-key-table/merge-engin/overview.html
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: "Partial Update"
weight: 2
type: docs
aliases:
- /cdc-ingestion/merge-engin/partial-update.html
- /primary-key-table/merge-engin/partial-update.html
---

<!--
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public static void validateTableSchema(TableSchema schema) {
if (options.changelogProducer() != ChangelogProducer.LOOKUP
&& options.changelogProducer() != ChangelogProducer.NONE) {
throw new IllegalArgumentException(
"Only support 'none' and 'lookup' changelog-producer on FIRST_MERGE merge engine");
"Only support 'none' and 'lookup' changelog-producer on FIRST_ROW merge engine");
}
}

Expand Down Expand Up @@ -540,7 +540,7 @@ private static void validateSequenceField(TableSchema schema, CoreOptions option

if (options.mergeEngine() == MergeEngine.FIRST_ROW) {
throw new IllegalArgumentException(
"Do not support use sequence field on FIRST_MERGE merge engine.");
"Do not support use sequence field on FIRST_ROW merge engine.");
}

if (schema.crossPartitionUpdate()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.apache.paimon.mergetree.compact.MergeFunctionFactory;
import org.apache.paimon.operation.FileStoreScan;
import org.apache.paimon.operation.KeyValueFileStoreScan;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.schema.KeyValueFieldsExtractor;
import org.apache.paimon.schema.TableSchema;
Expand Down Expand Up @@ -72,8 +71,7 @@ class PrimaryKeyFileStoreTable extends AbstractFileStoreTable {
public KeyValueFileStore store() {
if (lazyStore == null) {
RowType rowType = tableSchema.logicalRowType();
Options conf = Options.fromMap(tableSchema.options());
CoreOptions options = new CoreOptions(conf);
CoreOptions options = CoreOptions.fromMap(tableSchema.options());
KeyValueFieldsExtractor extractor =
PrimaryKeyTableUtils.PrimaryKeyFieldsExtractor.EXTRACTOR;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void testSequenceField() {
options.put(MERGE_ENGINE.key(), CoreOptions.MergeEngine.FIRST_ROW.toString());
assertThatThrownBy(() -> validateTableSchema(schema))
.hasMessageContaining(
"Do not support use sequence field on FIRST_MERGE merge engine.");
"Do not support use sequence field on FIRST_ROW merge engine.");

options.put(FIELDS_PREFIX + ".f3." + AGG_FUNCTION, "max");
assertThatThrownBy(() -> validateTableSchema(schema))
Expand Down

0 comments on commit 79dae8e

Please sign in to comment.