From 4a89dd9bffdba1e31f05c86b7a174f7160871c34 Mon Sep 17 00:00:00 2001 From: Jingsong Date: Fri, 5 Jul 2024 22:02:06 +0800 Subject: [PATCH] [doc] Document remove_orphan_files whole database --- docs/content/flink/procedures.md | 5 +++-- docs/content/maintenance/manage-snapshots.md | 16 ++++++++++------ docs/content/spark/procedures.md | 3 ++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/docs/content/flink/procedures.md b/docs/content/flink/procedures.md index 3adccb7abef1..fc9d48d670ec 100644 --- a/docs/content/flink/procedures.md +++ b/docs/content/flink/procedures.md @@ -179,14 +179,15 @@ All available procedures are listed below. To remove the orphan data files and metadata files. Arguments: -
  • identifier: the target table identifier. Cannot be empty.
  • +
  • identifier: the target table identifier. Cannot be empty, you can use database_name.* to clean whole database.
  • olderThan: to avoid deleting newly written files, this procedure only deletes orphan files older than 1 day by default. This argument can modify the interval.
  • dryRun: when true, view only orphan files, don't actually remove files. Default is false.
  • CALL remove_orphan_files('default.T', '2023-10-31 12:00:00')

    - CALL remove_orphan_files('default.T', '2023-10-31 12:00:00', true) + CALL remove_orphan_files('default.*', '2023-10-31 12:00:00')

    + CALL remove_orphan_files('default.T', '2023-10-31 12:00:00', true) diff --git a/docs/content/maintenance/manage-snapshots.md b/docs/content/maintenance/manage-snapshots.md index a974b0da99a7..f7fb94d01c61 100644 --- a/docs/content/maintenance/manage-snapshots.md +++ b/docs/content/maintenance/manage-snapshots.md @@ -296,7 +296,15 @@ submit a `remove_orphan_files` job to clean them: {{< tabs "remove_orphan_files" >}} -{{< tab "Flink" >}} +{{< tab "Spark SQL/Flink SQL" >}} +```sql +CALL sys.remove_orphan_files(table => "my_db.my_table", [older_than => "2023-10-31 12:00:00"]) + +CALL sys.remove_orphan_files(table => "my_db.*", [older_than => "2023-10-31 12:00:00"]) +``` +{{< /tab >}} + +{{< tab "Flink Action" >}} ```bash /bin/flink run \ @@ -322,12 +330,8 @@ To avoid deleting files that are newly added by other writing jobs, this action --older_than '2023-10-31 12:00:00' ``` -{{< /tab >}} +The table can be `*` to clean all tables in the database. -{{< tab "Spark" >}} -```sql -CALL sys.remove_orphan_files(table => "tableId", [older_than => "2023-10-31 12:00:00"]) -``` {{< /tab >}} {{< /tabs >}} \ No newline at end of file diff --git a/docs/content/spark/procedures.md b/docs/content/spark/procedures.md index fdd41e077420..56a8abb61902 100644 --- a/docs/content/spark/procedures.md +++ b/docs/content/spark/procedures.md @@ -129,12 +129,13 @@ This section introduce all available spark procedures about paimon. remove_orphan_files To remove the orphan data files and metadata files. Arguments: -
  • table: the target table identifier. Cannot be empty.
  • +
  • table: the target table identifier. Cannot be empty, you can use database_name.* to clean whole database.
  • older_than: to avoid deleting newly written files, this procedure only deletes orphan files older than 1 day by default. This argument can modify the interval.
  • dry_run: when true, view only orphan files, don't actually remove files. Default is false.
  • CALL sys.remove_orphan_files(table => 'default.T', older_than => '2023-10-31 12:00:00')

    + CALL sys.remove_orphan_files(table => 'default.*', older_than => '2023-10-31 12:00:00')

    CALL sys.remove_orphan_files(table => 'default.T', older_than => '2023-10-31 12:00:00', dry_run => true)