From eb253ec44ae3bf8f27a134cb0be59130beadf463 Mon Sep 17 00:00:00 2001 From: chenzhuoyu Date: Tue, 27 Aug 2024 18:21:21 +0800 Subject: [PATCH] [core] Introduce level0FileCount for partitions table --- .../paimon/manifest/PartitionEntry.java | 23 ++++++++++++++++++- .../paimon/table/system/PartitionsTable.java | 4 +++- .../table/system/PartitionsTableTest.java | 13 +++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java b/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java index 22b7fc2feed38..74e48fcce2fa4 100644 --- a/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java +++ b/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java @@ -36,6 +36,7 @@ public class PartitionEntry { private final long recordCount; private final long fileSizeInBytes; private final long fileCount; + private final long level0FileCount; private final long lastFileCreationTime; public PartitionEntry( @@ -43,11 +44,13 @@ public PartitionEntry( long recordCount, long fileSizeInBytes, long fileCount, + long level0FileCount, long lastFileCreationTime) { this.partition = partition; this.recordCount = recordCount; this.fileSizeInBytes = fileSizeInBytes; this.fileCount = fileCount; + this.level0FileCount = level0FileCount; this.lastFileCreationTime = lastFileCreationTime; } @@ -71,12 +74,17 @@ public long lastFileCreationTime() { return lastFileCreationTime; } + public long level0FileCount() { + return level0FileCount; + } + public PartitionEntry merge(PartitionEntry entry) { return new PartitionEntry( partition, recordCount + entry.recordCount, fileSizeInBytes + entry.fileSizeInBytes, fileCount + entry.fileCount, + level0FileCount + entry.level0FileCount, Math.max(lastFileCreationTime, entry.lastFileCreationTime)); } @@ -84,16 +92,23 @@ public static PartitionEntry fromManifestEntry(ManifestEntry entry) { long recordCount = entry.file().rowCount(); long fileSizeInBytes = entry.file().fileSize(); long fileCount = 1; + long level0FileCount = 0; + + if (entry.level() == 0) { + level0FileCount = 1; + } if (entry.kind() == DELETE) { recordCount = -recordCount; fileSizeInBytes = -fileSizeInBytes; fileCount = -fileCount; + level0FileCount = -level0FileCount; } return new PartitionEntry( entry.partition(), recordCount, fileSizeInBytes, fileCount, + level0FileCount, entry.file().creationTimeEpochMillis()); } @@ -126,6 +141,7 @@ public boolean equals(Object o) { return recordCount == that.recordCount && fileSizeInBytes == that.fileSizeInBytes && fileCount == that.fileCount + && level0FileCount == that.level0FileCount && lastFileCreationTime == that.lastFileCreationTime && Objects.equals(partition, that.partition); } @@ -133,6 +149,11 @@ public boolean equals(Object o) { @Override public int hashCode() { return Objects.hash( - partition, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime); + partition, + recordCount, + fileSizeInBytes, + fileCount, + level0FileCount, + lastFileCreationTime); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java index 76c0768eeeac4..0b10c0fd43b35 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java @@ -73,7 +73,8 @@ public class PartitionsTable implements ReadonlyTable { new DataField(1, "record_count", new BigIntType(false)), new DataField(2, "file_size_in_bytes", new BigIntType(false)), new DataField(3, "file_count", new BigIntType(false)), - new DataField(4, "last_update_time", DataTypes.TIMESTAMP_MILLIS()))); + new DataField(4, "level0_file_count", new BigIntType(false)), + new DataField(5, "last_update_time", DataTypes.TIMESTAMP_MILLIS()))); private final FileStoreTable storeTable; @@ -206,6 +207,7 @@ private GenericRow toRow( entry.recordCount(), entry.fileSizeInBytes(), entry.fileCount(), + entry.level0FileCount(), Timestamp.fromLocalDateTime( LocalDateTime.ofInstant( Instant.ofEpochMilli(entry.lastFileCreationTime()), diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java index a17dc75466a68..9b4c630c898d8 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/PartitionsTableTest.java @@ -42,6 +42,7 @@ import java.util.ArrayList; import java.util.List; +import static org.apache.paimon.io.DataFileTestUtils.row; import static org.assertj.core.api.Assertions.assertThat; /** Unit tests for {@link PartitionsTable}. */ @@ -102,4 +103,16 @@ public void testPartitionValue() throws Exception { List result = read(partitionsTable, new int[][] {{0}, {1}, {3}}); assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow); } + + @Test + public void testLevel0FileCountValue() throws Exception { + compact(table, row(1), 0); + write(table, GenericRow.of(2, 1, 3), GenericRow.of(3, 1, 4)); + List expectedRow = new ArrayList<>(); + expectedRow.add(GenericRow.of(BinaryString.fromString("[1]"), 2L, 1L)); + expectedRow.add(GenericRow.of(BinaryString.fromString("[2]"), 2L, 2L)); + + List result = read(partitionsTable, new int[][] {{0}, {3}, {4}}); + assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow); + } }