Skip to content

Commit

Permalink
[DAR-4813][External] Allow upload of volumetric DICOM series from fol…
Browse files Browse the repository at this point in the history
…ders of DICOM slices (#974)

* Ignore  files

* Allow upload of DICOM slices as volumetric series

* Update E2E test
  • Loading branch information
JBWilkie authored Nov 26, 2024
1 parent ac0d661 commit 96d9a60
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,4 @@ scripts/
*test_results.xml
version.txt
!e2e_tests/data
.cursorrules
46 changes: 36 additions & 10 deletions darwin/dataset/upload_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from dataclasses import dataclass
from enum import Enum
from pathlib import Path, PurePosixPath
import zipfile
import tempfile
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -211,11 +213,18 @@ def __init__(
self.name = directory.name
self.files = [LocalFile(file, fps=fps) for file in files]
self.merge_mode = merge_mode
self._create_layout()
self._prepare_local_files_and_create_layout()

def _create_layout(self):
def _prepare_local_files_and_create_layout(self):
"""
Sets the layout as a LayoutV3 object to be used when uploading the files as a dataset item.
This function:
- Ensures that the files to be uploaded are valid for the given merge mode
- Creates a LayoutV3 object for `ItemMergeMode.SLOTS` & `ItemMergeMode.CHANNELS` items
For `ItemMergeMode.SERIES` items:
- Every slice is zipped into a single source file. This is necessary to upload
individual DICOM slices as volumetric series
- Layout is set to `None` because the files are zipped into a single source file
Raises
------
Expand All @@ -238,11 +247,9 @@ def _create_layout(self):
]
if not self.files:
raise ValueError("No `.dcm` files found in 1st level of directory")
self.slot_names = [self.name] * len(self.files)
self.layout = {
"version": 3,
"slots_grid": [[[self.name]]],
}
self._create_series_zip()
self.layout = None
self.slot_names = ["0"]
elif self.merge_mode == ItemMergeMode.CHANNELS:
# Currently, only image files are supported in multi-channel items. This is planned to change in the future
self.files = [
Expand Down Expand Up @@ -277,7 +284,27 @@ def serialize_darwin_json_v2(self):
slot[optional_property] = local_file.data.get(optional_property)
slots.append(slot)

return {"slots": slots, "layout": self.layout, "name": self.name, "path": "/"}
return {
"slots": slots,
"layout": self.layout,
"name": self.name,
"path": "/",
}

def _create_series_zip(self):
"""
For a given series `MultiFileItem`:
- Zip all `.dcm` files into a temporary zip file
- Replace all `.dcm` files with the zip file
This is necessary to upload individual DICOM slices as volumetric series
"""
self._temp_zip_dir = tempfile.TemporaryDirectory()
zip_path = Path(self._temp_zip_dir.name) / f"{self.name}.dcm"
with zipfile.ZipFile(zip_path, "w") as zip_file:
for local_file in self.files:
zip_file.write(local_file.local_path, local_file.local_path.name)
self.files = [LocalFile(zip_path)]

@property
def full_path(self) -> str:
Expand Down Expand Up @@ -599,7 +626,6 @@ def _request_upload(
file.serialize_darwin_json_v2() for file in file_chunk
],
"options": {
"ignore_dicom_layout": True,
"handle_as_slices": handle_as_slices,
},
}
Expand Down
16 changes: 3 additions & 13 deletions e2e_tests/cli/test_push.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,18 +206,8 @@ def test_push_dicom_series(
"""
expected_push_dir = "flat_directory_of_2_dicom_files"
expected_name = "flat_directory_of_2_dicom_files"
expected_slot_types = ["dicom", "dicom"]
expected_layout = {
"slots_grid": [
[
[
"flat_directory_of_2_dicom_files",
]
]
],
"version": 3,
}
expected_file_names = ["flat_directory_of_2_dicom_files"]
expected_slot_types = ["dicom"]
expected_layout = {"slots": ["0"], "type": "simple", "version": 1}
push_dir = Path(__file__).parents[1] / "data" / "push" / f"{expected_push_dir}.zip"
items = extract_and_push(
push_dir,
Expand All @@ -232,4 +222,4 @@ def test_push_dicom_series(
assert dicom_series_item["slot_types"] == expected_slot_types
assert dicom_series_item["layout"] == expected_layout
for num, slot in enumerate(dicom_series_item["slots"]):
assert slot["slot_name"] == expected_file_names[num]
assert slot["slot_name"] == "0"
24 changes: 9 additions & 15 deletions tests/darwin/dataset/remote_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,15 +984,13 @@ def test_dicoms(self, setup_zip):
[directory], [], 0, item_merge_mode="series"
)
assert len(multi_file_items) == 1
assert len(local_files) == 5
assert len(local_files) == 1
assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES
assert multi_file_items[0].files == local_files
assert multi_file_items[0].directory == directory
assert multi_file_items[0].name == directory.name
assert multi_file_items[0].layout == {
"version": 3,
"slots_grid": [[["dicoms"]]],
}
assert multi_file_items[0].slot_names == ["0"]
assert multi_file_items[0].layout is None

def test_dicoms_and_other_files(self, setup_zip):
directory = (
Expand All @@ -1002,31 +1000,27 @@ def test_dicoms_and_other_files(self, setup_zip):
[directory], [], 0, item_merge_mode="series"
)
assert len(multi_file_items) == 1
assert len(local_files) == 5
assert len(local_files) == 1
assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES
assert multi_file_items[0].files == local_files
assert multi_file_items[0].directory == directory
assert multi_file_items[0].name == directory.name
assert multi_file_items[0].layout == {
"version": 3,
"slots_grid": [[["dicoms_and_other_files"]]],
}
assert multi_file_items[0].layout is None
assert multi_file_items[0].slot_names == ["0"]

def test_multiple_file_types(self, setup_zip):
directory = setup_zip / "push_test_dir" / "multiple_file_types"
local_files, multi_file_items = _find_files_to_upload_as_multi_file_items(
[directory], [], 0, item_merge_mode="series"
)
assert len(multi_file_items) == 1
assert len(local_files) == 3
assert len(local_files) == 1
assert multi_file_items[0].merge_mode == ItemMergeMode.SERIES
assert multi_file_items[0].files == local_files
assert multi_file_items[0].directory == directory
assert multi_file_items[0].name == directory.name
assert multi_file_items[0].layout == {
"version": 3,
"slots_grid": [[["multiple_file_types"]]],
}
assert multi_file_items[0].layout is None
assert multi_file_items[0].slot_names == ["0"]


@pytest.mark.usefixtures("setup_zip")
Expand Down

0 comments on commit 96d9a60

Please sign in to comment.