From ff81bcf4e43c83030fb5b77923cad4d4d90488b9 Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Thu, 1 Feb 2024 20:07:25 +0800 Subject: [PATCH] 1 --- docs/content/how-to/system-tables.md | 12 +++++------ .../paimon/table/system/SnapshotsTable.java | 20 +++++++++++++++---- .../table/system/SnapshotsTableTest.java | 3 ++- .../spark/sql/AnalyzeTableTestBase.scala | 5 +++++ 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/docs/content/how-to/system-tables.md b/docs/content/how-to/system-tables.md index 8005ab27b165..e8de9ce67a34 100644 --- a/docs/content/how-to/system-tables.md +++ b/docs/content/how-to/system-tables.md @@ -45,12 +45,12 @@ You can query the snapshot history information of the table through snapshots ta SELECT * FROM MyTable$snapshots; /* -+--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+ -| snapshot_id | schema_id | commit_user | commit_identifier | commit_kind | commit_time | base_manifest_list | delta_manifest_list | changelog_manifest_list | total_record_count | delta_record_count | changelog_record_count | added_file_count | delete_file_count | watermark | -+--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+ -| 2 | 0 | 7ca4cd28-98e... | 2 | APPEND | 2022-10-26 11:44:15.600 | manifest-list-31323d5f-76e6... | manifest-list-31323d5f-76e6... | manifest-list-31323d5f-76e6... | 2 | 2 | 0 | 2 | 0 | 1666755855600 | -| 1 | 0 | 870062aa-3e9... | 1 | APPEND | 2022-10-26 11:44:15.148 | manifest-list-31593d5f-76e6... | manifest-list-31593d5f-76e6... | manifest-list-31593d5f-76e6... | 1 | 1 | 0 | 1 | 0 | 1666755855148 | -+--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+ ++--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+--------------------+ +| snapshot_id | schema_id | commit_user | commit_identifier | commit_kind | commit_time | base_manifest_list | delta_manifest_list | changelog_manifest_list | total_record_count | delta_record_count | changelog_record_count | added_file_count | delete_file_count | watermark | statistics | ++--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+--------------------+ +| 2 | 0 | 7ca4cd28-98e... | 2 | APPEND | 2022-10-26 11:44:15.600 | manifest-list-31323d5f-76e6... | manifest-list-31323d5f-76e6... | manifest-list-31323d5f-76e6... | 2 | 2 | 0 | 2 | 0 | 1666755855600 |{\n "snapshotId"...| +| 1 | 0 | 870062aa-3e9... | 1 | APPEND | 2022-10-26 11:44:15.148 | manifest-list-31593d5f-76e6... | manifest-list-31593d5f-76e6... | manifest-list-31593d5f-76e6... | 1 | 1 | 0 | 1 | 0 | 1666755855148 || NULL| ++--------------+------------+-----------------+-------------------+--------------+-------------------------+--------------------------------+------------------------------- +--------------------------------+---------------------+---------------------+-------------------------+-------------------+--------------------+----------------+--------------------+ 2 rows in set */ ``` diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/SnapshotsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/SnapshotsTable.java index 944c8e605dbd..c33a74e709b2 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/SnapshotsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/SnapshotsTable.java @@ -30,6 +30,8 @@ import org.apache.paimon.operation.FileStoreScan; import org.apache.paimon.predicate.Predicate; import org.apache.paimon.reader.RecordReader; +import org.apache.paimon.stats.Statistics; +import org.apache.paimon.stats.StatsFileHandler; import org.apache.paimon.table.FileStoreTable; import org.apache.paimon.table.ReadonlyTable; import org.apache.paimon.table.Table; @@ -98,7 +100,9 @@ public class SnapshotsTable implements ReadonlyTable { new DataField(11, "changelog_record_count", new BigIntType(true)), new DataField(12, "added_file_count", new IntType(true)), new DataField(13, "delete_file_count", new IntType(true)), - new DataField(14, "watermark", new BigIntType(true)))); + new DataField(14, "watermark", new BigIntType(true)), + new DataField( + 15, "statistics", SerializationUtils.newStringType(true)))); private final FileIO fileIO; private final Path location; @@ -230,8 +234,10 @@ public RecordReader createReader(Split split) throws IOException { } Path location = ((SnapshotsSplit) split).location; Iterator snapshots = new SnapshotManager(fileIO, location).snapshots(); + StatsFileHandler statsFileHandler = dataTable.store().newStatsFileHandler(); Iterator rows = - Iterators.transform(snapshots, snapshot -> toRow(snapshot, dataTable)); + Iterators.transform( + snapshots, snapshot -> toRow(snapshot, dataTable, statsFileHandler)); if (projection != null) { rows = Iterators.transform( @@ -240,7 +246,8 @@ public RecordReader createReader(Split split) throws IOException { return new IteratorRecordReader<>(rows); } - private InternalRow toRow(Snapshot snapshot, FileStoreTable dataTable) { + private InternalRow toRow( + Snapshot snapshot, FileStoreTable dataTable, StatsFileHandler statsFileHandler) { FileStoreScan.Plan plan = dataTable.store().newScan().withSnapshot(snapshot).plan(); return GenericRow.of( snapshot.id(), @@ -260,7 +267,12 @@ private InternalRow toRow(Snapshot snapshot, FileStoreTable dataTable) { snapshot.changelogRecordCount(), plan.files(FileKind.ADD).size(), plan.files(FileKind.DELETE).size(), - snapshot.watermark()); + snapshot.watermark(), + statsFileHandler + .readStats(snapshot) + .map(Statistics::toString) + .map(BinaryString::fromString) + .orElse(null)); } } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/SnapshotsTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/SnapshotsTableTest.java index 4bb5f462e1c8..9d049866b7ac 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/SnapshotsTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/SnapshotsTableTest.java @@ -123,7 +123,8 @@ private List getExceptedResult(long[] snapshotIds) { snapshot.changelogRecordCount(), plan.files(FileKind.ADD).size(), plan.files(FileKind.DELETE).size(), - snapshot.watermark())); + snapshot.watermark(), + null)); } return expectedRow; diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala index 5656fa60e64e..97ca0ca72dca 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala +++ b/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala @@ -47,6 +47,11 @@ abstract class AnalyzeTableTestBase extends PaimonSparkTestBase { Assertions.assertEquals(2L, stats.mergedRecordCount().getAsLong) Assertions.assertTrue(stats.mergedRecordSize().isPresent) Assertions.assertTrue(stats.colStats().isEmpty) + + checkAnswer( + spark.sql( + "SELECT commit_kind, substring(statistics, 1, 13) FROM `T$snapshots` WHERE snapshot_id = '3'"), + Row("ANALYZE", "{\n \"snapshot") :: Nil) } test("Paimon analyze: analyze no scan") {