From acbd071375ac4cc2053435346737a3b1a64cce2e Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 27 Nov 2024 19:17:06 +0100 Subject: [PATCH] Write `null` when there is no parent-snapshot-id (#1383) --- pyiceberg/manifest.py | 8 ++++++-- tests/utils/test_manifest.py | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 649840fc66..6774499f2e 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -957,7 +957,11 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id super().__init__( format_version=1, output_file=output_file, - meta={"snapshot-id": str(snapshot_id), "parent-snapshot-id": str(parent_snapshot_id), "format-version": "1"}, + meta={ + "snapshot-id": str(snapshot_id), + "parent-snapshot-id": str(parent_snapshot_id) if parent_snapshot_id is not None else "null", + "format-version": "1", + }, ) def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile: @@ -976,7 +980,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id output_file=output_file, meta={ "snapshot-id": str(snapshot_id), - "parent-snapshot-id": str(parent_snapshot_id), + "parent-snapshot-id": str(parent_snapshot_id) if parent_snapshot_id is not None else "null", "sequence-number": str(sequence_number), "format-version": "2", }, diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index bb60ac0a21..97c88a99ee 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=redefined-outer-name,arguments-renamed,fixme from tempfile import TemporaryDirectory -from typing import Dict +from typing import Dict, Optional from unittest.mock import patch import fastavro @@ -526,14 +526,18 @@ def test_write_manifest( @pytest.mark.parametrize("format_version", [1, 2]) +@pytest.mark.parametrize("parent_snapshot_id", [19, None]) def test_write_manifest_list( - generated_manifest_file_file_v1: str, generated_manifest_file_file_v2: str, format_version: TableVersion + generated_manifest_file_file_v1: str, + generated_manifest_file_file_v2: str, + format_version: TableVersion, + parent_snapshot_id: Optional[int], ) -> None: io = load_file_io() snapshot = Snapshot( snapshot_id=25, - parent_snapshot_id=19, + parent_snapshot_id=parent_snapshot_id, timestamp_ms=1602638573590, manifest_list=generated_manifest_file_file_v1 if format_version == 1 else generated_manifest_file_file_v2, summary=Summary(Operation.APPEND), @@ -545,12 +549,20 @@ def test_write_manifest_list( path = tmp_dir + "/manifest-list.avro" output = io.new_output(path) with write_manifest_list( - format_version=format_version, output_file=output, snapshot_id=25, parent_snapshot_id=19, sequence_number=0 + format_version=format_version, + output_file=output, + snapshot_id=25, + parent_snapshot_id=parent_snapshot_id, + sequence_number=0, ) as writer: writer.add_manifests(demo_manifest_list) new_manifest_list = list(read_manifest_list(io.new_input(path))) - expected_metadata = {"snapshot-id": "25", "parent-snapshot-id": "19", "format-version": str(format_version)} + if parent_snapshot_id: + expected_metadata = {"snapshot-id": "25", "parent-snapshot-id": "19", "format-version": str(format_version)} + else: + expected_metadata = {"snapshot-id": "25", "parent-snapshot-id": "null", "format-version": str(format_version)} + if format_version == 2: expected_metadata["sequence-number"] = "0" _verify_metadata_with_fastavro(path, expected_metadata)