Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] Support statistic with time travel #4251

Merged
merged 17 commits into from
Oct 8, 2024
16 changes: 16 additions & 0 deletions docs/content/maintenance/system-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,3 +369,19 @@ SELECT * FROM sys.catalog_options;
*/
```

### Statistic Table
You can query the statistic information through statistic table.

```sql
SELECT * FROM T$statistics;

/*
+--------------+------------+-----------------------+------------------+----------+
| snapshot_id | schema_id | mergedRecordCount | mergedRecordSize | colstat |
+--------------+------------+-----------------------+------------------+----------+
| 2 | 0 | 2 | 2 | {} |
+--------------+------------+-----------------------+------------------+----------+
1 rows in set
*/
```

Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,18 @@ public Identifier identifier() {

@Override
public Optional<Statistics> statistics() {
// todo: support time travel
Snapshot latestSnapshot = snapshotManager().latestSnapshot();
Snapshot latestSnapshot;
Long snapshotId = coreOptions().scanSnapshotId();
if (snapshotId == null) {
snapshotId = snapshotManager().latestSnapshotId();
}

if (snapshotId != null && snapshotManager().snapshotExists(snapshotId)) {
latestSnapshot = snapshotManager().snapshot(snapshotId);
} else {
latestSnapshot = snapshotManager().latestSnapshot();
}

if (latestSnapshot != null) {
return store().newStatsFileHandler().readStats(latestSnapshot);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class StatisticTable implements ReadonlyTable {

public static final String STATISTICS = "statistics";

public static final RowType TABLE_TYPE =
private static final RowType TABLE_TYPE =
new RowType(
Arrays.asList(
new DataField(0, "snapshot_id", new BigIntType(false)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SNAPSHOT_ID

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,50 @@ abstract class AnalyzeTableTestBase extends PaimonSparkTestBase {
Row(2, 0, 2, "{ }"))
}

test("Paimon analyze: test statistic system table with snapshot") {
spark.sql(s"""
|CREATE TABLE T (id STRING, name STRING, i INT, l LONG)
|USING PAIMON
|TBLPROPERTIES ('primary-key'='id')
|""".stripMargin)

spark.sql(s"INSERT INTO T VALUES ('1', 'a', 1, 1)")
spark.sql(s"INSERT INTO T VALUES ('2', 'aaa', 1, 2)")
Assertions.assertEquals(0, spark.sql("select * from `T$statistics`").count())

spark.sql(s"ANALYZE TABLE T COMPUTE STATISTICS")

spark.sql(s"INSERT INTO T VALUES ('3', 'b', 2, 1)")
spark.sql(s"INSERT INTO T VALUES ('4', 'bbb', 3, 2)")

spark.sql(s"ANALYZE TABLE T COMPUTE STATISTICS")

withSQLConf("spark.paimon.scan.snapshot-id" -> "3") {
checkAnswer(
sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"),
Row(2, 0, 2, "{ }"))
}

withSQLConf("spark.paimon.scan.snapshot-id" -> "4") {
checkAnswer(
sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"),
Row(2, 0, 2, "{ }"))
}

withSQLConf("spark.paimon.scan.snapshot-id" -> "6") {
checkAnswer(
sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"),
Row(5, 0, 4, "{ }"))
}

withSQLConf("spark.paimon.scan.snapshot-id" -> "100") {
checkAnswer(
sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"),
Row(5, 0, 4, "{ }"))
}

}

test("Paimon analyze: analyze table without snapshot") {
spark.sql(s"CREATE TABLE T (id STRING, name STRING)")
spark.sql(s"ANALYZE TABLE T COMPUTE STATISTICS")
Expand Down
Loading