From 275da6e72b76c4ab0fe557f24657af5909317772 Mon Sep 17 00:00:00 2001 From: zhourui999 <1459939299@qq.com> Date: Sun, 7 Apr 2024 10:22:46 +0800 Subject: [PATCH] fix: Fix duplicate primary keys in query results --- .../source/snapshot/SnapshotReaderImpl.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java index 132c3184faddc..cb3c9fc7e1366 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java @@ -271,6 +271,12 @@ private List generateSplits( for (Map.Entry> bucketEntry : buckets.entrySet()) { int bucket = bucketEntry.getKey(); List bucketFiles = bucketEntry.getValue(); + DataSplit.Builder builder = + DataSplit.builder() + .withSnapshot(snapshotId) + .withPartition(partition) + .withBucket(bucket) + .isStreaming(isStreaming); List splitGroups = isStreaming ? splitGenerator.splitForStreaming(bucketFiles) @@ -283,17 +289,12 @@ private List generateSplits( .orElse(null) : null; for (SplitGenerator.SplitGroup splitGroup : splitGroups) { - DataSplit.Builder builder = - DataSplit.builder() - .withSnapshot(snapshotId) - .withPartition(partition) - .withBucket(bucket) - .isStreaming(isStreaming); List dataFiles = splitGroup.files; builder.withDataFiles(dataFiles); - if (splitGroup.rawConvertible) { - builder.rawFiles(convertToRawFiles(partition, bucket, dataFiles)); - } + builder.rawFiles( + splitGroup.rawConvertible + ? convertToRawFiles(partition, bucket, dataFiles) + : Collections.emptyList()); if (deletionVectors) { builder.withDataDeletionFiles( getDeletionFiles(dataFiles, deletionIndexFile));