update

apache · Aug 5, 2024 · 7e49fb7 · 7e49fb7
1 parent b1fac38
commit 7e49fb7
Show file tree

Hide file tree

Showing 9 changed files with 32 additions and 27 deletions.
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -605,12 +605,6 @@
             <td>Integer</td>
             <td>Max split size should be cached for one task while scanning. If splits size cached in enumerator are greater than tasks size multiply by this value, scanner will pause scanning.</td>
         </tr>
-        <tr>
-            <td><h5>scan.shuffle-by-partition</h5></td>
-            <td style="word-wrap: break-word;">false</td>
-            <td>Boolean</td>
-            <td>Whether shuffle by partition and bucket.</td>
-        </tr>
         <tr>
             <td><h5>scan.mode</h5></td>
             <td style="word-wrap: break-word;">default</td>

diff --git a/docs/layouts/shortcodes/generated/flink_connector_configuration.html b/docs/layouts/shortcodes/generated/flink_connector_configuration.html
@@ -128,6 +128,12 @@
             <td>Boolean</td>
             <td>Whether to force the removal of the normalize node when streaming read. Note: This is dangerous and is likely to cause data errors if downstream is used to calculate aggregation and the input is not complete changelog.</td>
         </tr>
+        <tr>
+            <td><h5>streaming-read.shuffle-by-partition</h5></td>
+            <td style="word-wrap: break-word;">true</td>
+            <td>Boolean</td>
+            <td>Whether shuffle by partition and bucket when streaming read. </td>
+        </tr>
         <tr>
             <td><h5>scan.split-enumerator.batch-size</h5></td>
             <td style="word-wrap: break-word;">10</td>

diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -310,12 +310,6 @@ public class CoreOptions implements Serializable {
                             "Max split size should be cached for one task while scanning. "
                                     + "If splits size cached in enumerator are greater than tasks size multiply by this value, scanner will pause scanning.");
 
-    public static final ConfigOption<Boolean> SCAN_SHUFFLE_BY_PARTITION =
-            key("scan.shuffle-by-partition")
-                    .booleanType()
-                    .defaultValue(false)
-                    .withDescription("Whether shuffle by partition and bucket.");
-
     @Immutable
     public static final ConfigOption<MergeEngine> MERGE_ENGINE =
             key("merge-engine")
@@ -1601,10 +1595,6 @@ public int scanSplitMaxPerTask() {
         return options.get(SCAN_MAX_SPLITS_PER_TASK);
     }
 
-    public boolean scanShuffleByPartition() {
-        return options.get(SCAN_SHUFFLE_BY_PARTITION);
-    }
-
     public int localSortMaxNumFileHandles() {
         return options.get(LOCAL_SORT_MAX_NUM_FILE_HANDLES);
     }

diff --git a/...link/paimon-flink-common/src/main/java/org/apache/paimon/flink/FlinkConnectorOptions.java b/...link/paimon-flink-common/src/main/java/org/apache/paimon/flink/FlinkConnectorOptions.java
@@ -217,6 +217,13 @@ public class FlinkConnectorOptions {
                                     + " Note: This is dangerous and is likely to cause data errors if downstream"
                                     + " is used to calculate aggregation and the input is not complete changelog.");
 
+    public static final ConfigOption<Boolean> STREAMING_READ_SHUFFLE_BY_PARTITION =
+            key("streaming-read.shuffle-by-partition")
+                    .booleanType()
+                    .defaultValue(true)
+                    .withDescription(
+                            "Whether shuffle by partition and bucket when streaming read.");
+
     /**
      * Weight of writer buffer in managed memory, Flink will compute the memory size for writer
      * according to the weight, the actual memory used depends on the running environment.

diff --git a/...nk-common/src/main/java/org/apache/paimon/flink/source/ContinuousFileSplitEnumerator.java b/...nk-common/src/main/java/org/apache/paimon/flink/source/ContinuousFileSplitEnumerator.java
@@ -76,14 +76,14 @@ public class ContinuousFileSplitEnumerator
 
     private final int splitMaxNum;
 
+    private final boolean shuffleByPartition;
+
     @Nullable protected Long nextSnapshotId;
 
     protected boolean finished = false;
 
     private boolean stopTriggerScan = false;
 
-    private boolean shuffleByPartition = false;
-
     public ContinuousFileSplitEnumerator(
             SplitEnumeratorContext<FileStoreSourceSplit> context,
             Collection<FileStoreSourceSplit> remainSplits,

diff --git a/...-flink-common/src/main/java/org/apache/paimon/flink/source/ContinuousFileStoreSource.java b/...-flink-common/src/main/java/org/apache/paimon/flink/source/ContinuousFileStoreSource.java
@@ -19,7 +19,9 @@
 package org.apache.paimon.flink.source;
 
 import org.apache.paimon.CoreOptions;
+import org.apache.paimon.flink.FlinkConnectorOptions;
 import org.apache.paimon.flink.metrics.FlinkMetricRegistry;
+import org.apache.paimon.options.Options;
 import org.apache.paimon.table.BucketMode;
 import org.apache.paimon.table.source.ReadBuilder;
 import org.apache.paimon.table.source.StreamDataTableScan;
@@ -99,15 +101,15 @@ protected SplitEnumerator<FileStoreSourceSplit, PendingSplitsCheckpoint> buildEn
             Collection<FileStoreSourceSplit> splits,
             @Nullable Long nextSnapshotId,
             StreamTableScan scan) {
-        CoreOptions coreOptions = CoreOptions.fromMap(options);
+        Options options = Options.fromMap(this.options);
         return new ContinuousFileSplitEnumerator(
                 context,
                 splits,
                 nextSnapshotId,
-                coreOptions.continuousDiscoveryInterval().toMillis(),
+                options.get(CoreOptions.CONTINUOUS_DISCOVERY_INTERVAL).toMillis(),
                 scan,
                 bucketMode,
-                coreOptions.scanSplitMaxPerTask(),
-                coreOptions.scanShuffleByPartition());
+                options.get(CoreOptions.SCAN_MAX_SPLITS_PER_TASK),
+                options.get(FlinkConnectorOptions.STREAMING_READ_SHUFFLE_BY_PARTITION));
     }
 }
diff --git a/.../paimon-flink-common/src/main/java/org/apache/paimon/flink/source/FlinkSourceBuilder.java b/.../paimon-flink-common/src/main/java/org/apache/paimon/flink/source/FlinkSourceBuilder.java
@@ -305,7 +305,8 @@ private DataStream<RowData> buildContinuousStreamOperator() {
                         produceTypeInfo(),
                         createReadBuilder(),
                         conf.get(CoreOptions.CONTINUOUS_DISCOVERY_INTERVAL).toMillis(),
-                        watermarkStrategy == null);
+                        watermarkStrategy == null,
+                        conf.get(FlinkConnectorOptions.STREAMING_READ_SHUFFLE_BY_PARTITION));
         if (parallelism != null) {
             dataStream.getTransformation().setParallelism(parallelism);
         }

diff --git a/.../src/main/java/org/apache/paimon/flink/source/align/AlignedContinuousFileStoreSource.java b/.../src/main/java/org/apache/paimon/flink/source/align/AlignedContinuousFileStoreSource.java
@@ -92,6 +92,6 @@ protected SplitEnumerator<FileStoreSourceSplit, PendingSplitsCheckpoint> buildEn
                 bucketMode,
                 options.get(FlinkConnectorOptions.SOURCE_CHECKPOINT_ALIGN_TIMEOUT).toMillis(),
                 options.get(CoreOptions.SCAN_MAX_SPLITS_PER_TASK),
-                options.get(CoreOptions.SCAN_SHUFFLE_BY_PARTITION));
+                options.get(FlinkConnectorOptions.STREAMING_READ_SHUFFLE_BY_PARTITION));
     }
 }
diff --git a/...n-flink-common/src/main/java/org/apache/paimon/flink/source/operator/MonitorFunction.java b/...n-flink-common/src/main/java/org/apache/paimon/flink/source/operator/MonitorFunction.java
@@ -229,15 +229,20 @@ public static DataStream<RowData> buildSource(
             TypeInformation<RowData> typeInfo,
             ReadBuilder readBuilder,
             long monitorInterval,
-            boolean emitSnapshotWatermark) {
+            boolean emitSnapshotWatermark,
+            boolean shuffleByPartition) {
         return env.addSource(
                         new MonitorFunction(readBuilder, monitorInterval, emitSnapshotWatermark),
                         name + "-Monitor",
                         new JavaTypeInfo<>(Split.class))
                 .forceNonParallel()
                 .partitionCustom(
-                        (key, numPartitions) ->
-                                ChannelComputer.select(key.f0, key.f1, numPartitions),
+                        (key, numPartitions) -> {
+                            if (shuffleByPartition) {
+                                return ChannelComputer.select(key.f0, key.f1, numPartitions);
+                            }
+                            return ChannelComputer.select(key.f1, numPartitions);
+                        },
                         split -> {
                             DataSplit dataSplit = (DataSplit) split;
                             return Tuple2.of(dataSplit.partition(), dataSplit.bucket());