From 96d9a60dc4af2549bd4860de8651dc9ef30308e3 Mon Sep 17 00:00:00 2001 From: John Wilkie <124276291+JBWilkie@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:53:03 +0000 Subject: [PATCH] [DAR-4813][External] Allow upload of volumetric DICOM series from folders of DICOM slices (#974) * Ignore files * Allow upload of DICOM slices as volumetric series * Update E2E test --- .gitignore | 1 + darwin/dataset/upload_manager.py | 46 ++++++++++++++++----- e2e_tests/cli/test_push.py | 16 ++----- tests/darwin/dataset/remote_dataset_test.py | 24 ++++------- 4 files changed, 49 insertions(+), 38 deletions(-) diff --git a/.gitignore b/.gitignore index 54f37a022..65ee2ae61 100644 --- a/.gitignore +++ b/.gitignore @@ -184,3 +184,4 @@ scripts/ *test_results.xml version.txt !e2e_tests/data +.cursorrules \ No newline at end of file diff --git a/darwin/dataset/upload_manager.py b/darwin/dataset/upload_manager.py index f401a4d12..cddc42658 100644 --- a/darwin/dataset/upload_manager.py +++ b/darwin/dataset/upload_manager.py @@ -5,6 +5,8 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path, PurePosixPath +import zipfile +import tempfile from typing import ( TYPE_CHECKING, Any, @@ -211,11 +213,18 @@ def __init__( self.name = directory.name self.files = [LocalFile(file, fps=fps) for file in files] self.merge_mode = merge_mode - self._create_layout() + self._prepare_local_files_and_create_layout() - def _create_layout(self): + def _prepare_local_files_and_create_layout(self): """ - Sets the layout as a LayoutV3 object to be used when uploading the files as a dataset item. + This function: + - Ensures that the files to be uploaded are valid for the given merge mode + - Creates a LayoutV3 object for `ItemMergeMode.SLOTS` & `ItemMergeMode.CHANNELS` items + + For `ItemMergeMode.SERIES` items: + - Every slice is zipped into a single source file. This is necessary to upload + individual DICOM slices as volumetric series + - Layout is set to `None` because the files are zipped into a single source file Raises ------ @@ -238,11 +247,9 @@ def _create_layout(self): ] if not self.files: raise ValueError("No `.dcm` files found in 1st level of directory") - self.slot_names = [self.name] * len(self.files) - self.layout = { - "version": 3, - "slots_grid": [[[self.name]]], - } + self._create_series_zip() + self.layout = None + self.slot_names = ["0"] elif self.merge_mode == ItemMergeMode.CHANNELS: # Currently, only image files are supported in multi-channel items. This is planned to change in the future self.files = [ @@ -277,7 +284,27 @@ def serialize_darwin_json_v2(self): slot[optional_property] = local_file.data.get(optional_property) slots.append(slot) - return {"slots": slots, "layout": self.layout, "name": self.name, "path": "/"} + return { + "slots": slots, + "layout": self.layout, + "name": self.name, + "path": "/", + } + + def _create_series_zip(self): + """ + For a given series `MultiFileItem`: + - Zip all `.dcm` files into a temporary zip file + - Replace all `.dcm` files with the zip file + + This is necessary to upload individual DICOM slices as volumetric series + """ + self._temp_zip_dir = tempfile.TemporaryDirectory() + zip_path = Path(self._temp_zip_dir.name) / f"{self.name}.dcm" + with zipfile.ZipFile(zip_path, "w") as zip_file: + for local_file in self.files: + zip_file.write(local_file.local_path, local_file.local_path.name) + self.files = [LocalFile(zip_path)] @property def full_path(self) -> str: @@ -599,7 +626,6 @@ def _request_upload( file.serialize_darwin_json_v2() for file in file_chunk ], "options": { - "ignore_dicom_layout": True, "handle_as_slices": handle_as_slices, }, } diff --git a/e2e_tests/cli/test_push.py b/e2e_tests/cli/test_push.py index 84bbaf846..7d909077c 100644 --- a/e2e_tests/cli/test_push.py +++ b/e2e_tests/cli/test_push.py @@ -206,18 +206,8 @@ def test_push_dicom_series( """ expected_push_dir = "flat_directory_of_2_dicom_files" expected_name = "flat_directory_of_2_dicom_files" - expected_slot_types = ["dicom", "dicom"] - expected_layout = { - "slots_grid": [ - [ - [ - "flat_directory_of_2_dicom_files", - ] - ] - ], - "version": 3, - } - expected_file_names = ["flat_directory_of_2_dicom_files"] + expected_slot_types = ["dicom"] + expected_layout = {"slots": ["0"], "type": "simple", "version": 1} push_dir = Path(__file__).parents[1] / "data" / "push" / f"{expected_push_dir}.zip" items = extract_and_push( push_dir, @@ -232,4 +222,4 @@ def test_push_dicom_series( assert dicom_series_item["slot_types"] == expected_slot_types assert dicom_series_item["layout"] == expected_layout for num, slot in enumerate(dicom_series_item["slots"]): - assert slot["slot_name"] == expected_file_names[num] + assert slot["slot_name"] == "0" diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 6145241c7..07881d7e8 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -984,15 +984,13 @@ def test_dicoms(self, setup_zip): [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 - assert len(local_files) == 5 + assert len(local_files) == 1 assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES assert multi_file_items[0].files == local_files assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name - assert multi_file_items[0].layout == { - "version": 3, - "slots_grid": [[["dicoms"]]], - } + assert multi_file_items[0].slot_names == ["0"] + assert multi_file_items[0].layout is None def test_dicoms_and_other_files(self, setup_zip): directory = ( @@ -1002,15 +1000,13 @@ def test_dicoms_and_other_files(self, setup_zip): [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 - assert len(local_files) == 5 + assert len(local_files) == 1 assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES assert multi_file_items[0].files == local_files assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name - assert multi_file_items[0].layout == { - "version": 3, - "slots_grid": [[["dicoms_and_other_files"]]], - } + assert multi_file_items[0].layout is None + assert multi_file_items[0].slot_names == ["0"] def test_multiple_file_types(self, setup_zip): directory = setup_zip / "push_test_dir" / "multiple_file_types" @@ -1018,15 +1014,13 @@ def test_multiple_file_types(self, setup_zip): [directory], [], 0, item_merge_mode="series" ) assert len(multi_file_items) == 1 - assert len(local_files) == 3 + assert len(local_files) == 1 assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES assert multi_file_items[0].files == local_files assert multi_file_items[0].directory == directory assert multi_file_items[0].name == directory.name - assert multi_file_items[0].layout == { - "version": 3, - "slots_grid": [[["multiple_file_types"]]], - } + assert multi_file_items[0].layout is None + assert multi_file_items[0].slot_names == ["0"] @pytest.mark.usefixtures("setup_zip")