Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flink] Check parallelism of committer operator while runtime. #3237

Merged
merged 4 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public DataStreamSink<?> sinkFrom(
typeInfo,
new CommitterOperator<>(
true,
false,
commitUser,
createCommitterFactory(),
createCommittableStateManager()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.flink.sink;

import org.apache.paimon.utils.Preconditions;

import org.apache.flink.runtime.state.StateInitializationContext;
import org.apache.flink.runtime.state.StateSnapshotContext;
import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
Expand Down Expand Up @@ -55,6 +57,9 @@ public class CommitterOperator<CommitT, GlobalCommitT> extends AbstractStreamOpe
*/
private final boolean streamingCheckpointEnabled;

/** Whether to check the parallelism while runtime. */
private final boolean forceSingleParallelism;

/**
* This commitUser is valid only for new jobs. After the job starts, this commitUser will be
* recorded into the states of write and commit operators. When the job restarts, commitUser
Expand Down Expand Up @@ -83,10 +88,12 @@ public class CommitterOperator<CommitT, GlobalCommitT> extends AbstractStreamOpe

public CommitterOperator(
boolean streamingCheckpointEnabled,
boolean forceSingleParallelism,
String initialCommitUser,
Committer.Factory<CommitT, GlobalCommitT> committerFactory,
CommittableStateManager<GlobalCommitT> committableStateManager) {
this.streamingCheckpointEnabled = streamingCheckpointEnabled;
this.forceSingleParallelism = forceSingleParallelism;
this.initialCommitUser = initialCommitUser;
this.committablesPerCheckpoint = new TreeMap<>();
this.committerFactory = checkNotNull(committerFactory);
Expand All @@ -98,6 +105,10 @@ public CommitterOperator(
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);

Preconditions.checkArgument(
!forceSingleParallelism || getRuntimeContext().getNumberOfParallelSubtasks() == 1,
"Committer Operator parallelism in paimon MUST be one.");

this.currentWatermark = Long.MIN_VALUE;
this.endInput = false;
// each job can only have one user name and this name must be consistent across restarts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ protected DataStreamSink<?> doCommit(DataStream<Committable> written, String com
OneInputStreamOperator<Committable, Committable> committerOperator =
new CommitterOperator<>(
streamingCheckpointEnabled,
true,
commitUser,
createCommitterFactory(streamingCheckpointEnabled),
createCommittableStateManager());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ protected DataStreamSink<?> doCommit(
new MultiTableCommittableTypeInfo(),
new CommitterOperator<>(
streamingCheckpointEnabled,
false,
commitUser,
createCommitterFactory(),
createCommittableStateManager()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.flink.metrics.groups.OperatorMetricGroup;
import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
import org.apache.flink.runtime.checkpoint.OperatorSubtaskState;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.state.StateInitializationContext;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.api.watermark.Watermark;
Expand Down Expand Up @@ -651,6 +652,19 @@ public void testCommitMetrics() throws Exception {
write.close();
}

@Test
public void testParallelism() throws Exception {
FileStoreTable table = createFileStoreTable();
String commitUser = UUID.randomUUID().toString();
OneInputStreamOperator<Committable, Committable> operator =
createCommitterOperator(table, commitUser, new NoopCommittableStateManager());
try (OneInputStreamOperatorTestHarness<Committable, Committable> testHarness =
createTestHarness(operator, 10, 10, 3)) {
Assertions.assertThatCode(testHarness::open)
.hasMessage("Committer Operator parallelism in paimon MUST be one.");
}
}

// ------------------------------------------------------------------------
// Test utils
// ------------------------------------------------------------------------
Expand Down Expand Up @@ -682,10 +696,25 @@ private OneInputStreamOperatorTestHarness<Committable, Committable> createLossyT

private OneInputStreamOperatorTestHarness<Committable, Committable> createTestHarness(
OneInputStreamOperator<Committable, Committable> operator) throws Exception {
return createTestHarness(operator, 1, 1, 0);
}

private OneInputStreamOperatorTestHarness<Committable, Committable> createTestHarness(
OneInputStreamOperator<Committable, Committable> operator,
int maxParallelism,
int parallelism,
int subTaskIndex)
throws Exception {
TypeSerializer<Committable> serializer =
new CommittableTypeInfo().createSerializer(new ExecutionConfig());
OneInputStreamOperatorTestHarness<Committable, Committable> harness =
new OneInputStreamOperatorTestHarness<>(operator, serializer);
new OneInputStreamOperatorTestHarness<>(
operator,
maxParallelism,
parallelism,
subTaskIndex,
serializer,
new OperatorID());
harness.setup(serializer);
return harness;
}
Expand All @@ -695,6 +724,7 @@ protected OneInputStreamOperator<Committable, Committable> createCommitterOperat
String commitUser,
CommittableStateManager<ManifestCommittable> committableStateManager) {
return new CommitterOperator<>(
true,
true,
commitUser == null ? initialCommitUser : commitUser,
(user, metricGroup) ->
Expand All @@ -710,6 +740,7 @@ protected OneInputStreamOperator<Committable, Committable> createCommitterOperat
CommittableStateManager<ManifestCommittable> committableStateManager,
ThrowingConsumer<StateInitializationContext, Exception> initializeFunction) {
return new CommitterOperator<Committable, ManifestCommittable>(
true,
true,
commitUser == null ? initialCommitUser : commitUser,
(user, metricGroup) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,7 @@ public void testCommitMetrics() throws Exception {
CommitterOperator<MultiTableCommittable, WrappedManifestCommittable> operator =
new CommitterOperator<>(
true,
false,
initialCommitUser,
(user, metricGroup) ->
new StoreMultiCommitter(
Expand All @@ -652,6 +653,7 @@ public void testCommitMetrics() throws Exception {
CommitterOperator<MultiTableCommittable, WrappedManifestCommittable> operator =
new CommitterOperator<>(
true,
false,
initialCommitUser,
(user, metricGroup) ->
new StoreMultiCommitter(
Expand Down
Loading