From 3b9204d89ae6cbb8d531fd777e05e701972b0041 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sun, 14 May 2023 18:52:13 -0700 Subject: [PATCH 01/96] Make checks more efficient, avoid appending to pydicom.Sequence --- src/highdicom/seg/sop.py | 228 ++++++++++++++++++++++----------------- tests/test_seg.py | 22 ++-- 2 files changed, 135 insertions(+), 115 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index bf16c5c2..373be413 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -913,7 +913,7 @@ def __init__( ``plane_positions`` parameter is provided, the frame in ``pixel_array[i, ...]`` should correspond to either ``source_images[i]`` (if ``source_images`` is a list of single - frame instances) or source_images[0].pixel_array[i, ...] if + frame instances) or ``source_images[0].pixel_array[i, ...]`` if ``source_images`` is a single multiframe instance. Similarly, if ``pixel_array`` is a 3D array representing the @@ -1188,13 +1188,17 @@ def __init__( self._coordinate_system = None # General Reference - self.SourceImageSequence: List[Dataset] = [] + + # Note that appending directly to the SourceImageSequence is typically + # slow so it's more efficient to build as a Python list then convert + # later + source_image_seq: List[Dataset] = [] referenced_series: Dict[str, List[Dataset]] = defaultdict(list) for s_img in source_images: ref = Dataset() ref.ReferencedSOPClassUID = s_img.SOPClassUID ref.ReferencedSOPInstanceUID = s_img.SOPInstanceUID - self.SourceImageSequence.append(ref) + source_image_seq.append(ref) referenced_series[s_img.SeriesInstanceUID].append(ref) # Common Instance Reference @@ -1358,9 +1362,11 @@ def __init__( sffg_item.PlaneOrientationSequence = plane_orientation self.SharedFunctionalGroupsSequence = [sffg_item] - # Information about individual frames will be updated below - self.NumberOfFrames = 0 - self.PerFrameFunctionalGroupsSequence: List[Dataset] = [] + # Information about individual frames. Note that a *very* significant + # efficiency gain is observed when building this as a Python list + # rather than a pydicom sequence, and then converting to a pydicom + # sequence at the end + pffg_sequence: List[Dataset] = [] # Check segment numbers described_segment_numbers = np.array([ @@ -1372,17 +1378,10 @@ def __init__( # Checks on pixels and overlap pixel_array, segments_overlap = self._check_and_cast_pixel_array( pixel_array, - described_segment_numbers, - segmentation_type + len(described_segment_numbers), + segmentation_type, ) self.SegmentsOverlap = segments_overlap.value - if omit_empty_frames and pixel_array.sum() == 0: - omit_empty_frames = False - logger.warning( - 'Encoding an empty segmentation with "omit_empty_frames" ' - 'set to True. Reverting to encoding all frames since omitting ' - 'all frames is not possible.' - ) if has_ref_frame_uid: if plane_positions is None: @@ -1504,8 +1503,11 @@ def __init__( # Remove empty slices if omit_empty_frames: - pixel_array, plane_positions, source_image_indices = \ + plane_positions, source_image_indices, is_empty = \ self._omit_empty_frames(pixel_array, plane_positions) + if is_empty: + # Cannot omit empty frames when all frames are empty + omit_empty_frames = False else: source_image_indices = list(range(pixel_array.shape[0])) @@ -1531,19 +1533,19 @@ def __init__( dimension_position_values = [None] is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated - if is_encaps: - # In the case of encapsulated transfer syntaxes, we will accumulate - # a list of encoded frames to encapsulate at the end - full_frames_list = [] - else: - # In the case of non-encapsulated (uncompressed) transfer syntaxes - # we will accumulate a 1D array of pixels from all frames for - # bitpacking at the end - full_pixel_array = np.array([], np.bool_) + + # In the case of encapsulated transfer syntaxes, we will accumulate + # a list of encoded frames to encapsulate at the end + # In the case of non-encapsulated (uncompressed) transfer syntaxes + # we will accumulate a list of flattened pixels from all frames for + # bitpacking at the end + full_frames_list: Union[List[bytes], List[np.ndarray]] = [] for i, segment_number in enumerate(described_segment_numbers): # Pixel array for just this segment if pixel_array.dtype in (np.float_, np.float32, np.float64): + # Based on the previous checks and casting, if we get here + # the output is a FRACTIONAL segmentation # Floating-point numbers must be mapped to 8-bit integers in # the range [0, max_fractional_value]. if pixel_array.ndim == 4: @@ -1554,29 +1556,36 @@ def __init__( segment_array * float(self.MaximumFractionalValue) ) planes = planes.astype(np.uint8) - elif pixel_array.dtype in (np.uint8, np.uint16): - # Note that integer arrays with segments stacked down the last - # dimension will already have been converted to bool, leaving - # only "label maps" here, which must be converted to binary - # masks. - planes = np.zeros(pixel_array.shape, dtype=np.uint8) - planes[pixel_array == segment_number] = 1 - elif pixel_array.dtype == np.bool_: - if pixel_array.ndim == 4: - planes = pixel_array[:, :, :, segment_number - 1] + else: + if pixel_array.ndim == 3: + # "Label maps" that must be converted to binary masks. + if len(described_segment_numbers) == 1: + # We wish to avoid unnecessary comparison or casting + # operations here, for efficiency reasons + if pixel_array.dtype != np.uint8: + planes = pixel_array.astype(np.uint8) + else: + planes = pixel_array + else: + planes = ( + pixel_array == segment_number + ).astype(np.uint8) else: - planes = pixel_array - planes = planes.astype(np.uint8) - # It may happen that a boolean array is passed that should be - # interpreted as fractional segmentation type. In this case, we - # also need to stretch pixel valeus to 8-bit unsigned integer - # range by multiplying with the maximum fractional value. + planes = pixel_array[:, :, :, segment_number - 1] + if planes.dtype != np.uint8: + planes = planes.astype(np.uint8) + + # It may happen that a binary valued array is passed that + # should be stored as a fractional segmentation. In + # this case, we also need to stretch pixel values to 8-bit + # unsigned integer range by multiplying with the maximum + # fractional value. if segmentation_type == SegmentationTypeValues.FRACTIONAL: - planes *= int(self.MaximumFractionalValue) - else: - raise TypeError('Pixel array has an invalid data type.') + # Avoid an unnecessary multiplication operation if max + # fractional value is 1 + if int(self.MaximumFractionalValue) != 1: + planes *= int(self.MaximumFractionalValue) - contained_plane_index = [] for j in plane_sort_index: # Index of this frame in the original list of source indices source_image_index = source_image_indices[j] @@ -1584,15 +1593,14 @@ def __init__( # Even though completely empty slices were removed earlier, # there may still be slices in which this specific segment is # absent. Such frames should be removed - if omit_empty_frames and np.sum(planes[j]) == 0: - logger.info( + if omit_empty_frames and not np.any(planes[source_image_index]): + logger.debug( 'skip empty plane {} of segment #{}'.format( j, segment_number ) ) continue - contained_plane_index.append(j) - logger.info( + logger.debug( 'add plane #{} for segment #{}'.format( j, segment_number ) @@ -1666,20 +1674,18 @@ def __init__( derivation_src_img_item = Dataset() if hasattr(source_images[0], 'NumberOfFrames'): # A single multi-frame source image - src_img_item = self.SourceImageSequence[0] + src_img_item = source_images[0] # Frame numbers are one-based derivation_src_img_item.ReferencedFrameNumber = ( source_image_index + 1 ) else: # Multiple single-frame source images - src_img_item = self.SourceImageSequence[ - source_image_index - ] + src_img_item = source_images[source_image_index] derivation_src_img_item.ReferencedSOPClassUID = \ - src_img_item.ReferencedSOPClassUID + src_img_item.SOPClassUID derivation_src_img_item.ReferencedSOPInstanceUID = \ - src_img_item.ReferencedSOPInstanceUID + src_img_item.SOPInstanceUID purpose_code = \ codes.cid7202.SourceImageForImageProcessingOperation derivation_src_img_item.PurposeOfReferenceCodeSequence = [ @@ -1693,30 +1699,33 @@ def __init__( derivation_image_item ) else: - logger.warning('spatial locations not preserved') + logger.debug('spatial locations not preserved') identification = Dataset() identification.ReferencedSegmentNumber = segment_number pffp_item.SegmentIdentificationSequence = [ identification, ] - self.PerFrameFunctionalGroupsSequence.append(pffp_item) - self.NumberOfFrames += 1 - - if is_encaps: - # Encode this frame and add to the list for encapsulation - # at the end - for f in contained_plane_index: - full_frames_list.append(self._encode_pixels(planes[f])) - else: - # Concatenate the 1D array for re-encoding at the end - full_pixel_array = np.concatenate([ - full_pixel_array, - planes[contained_plane_index].flatten() - ]) + pffg_sequence.append(pffp_item) + + if is_encaps: + # Encode this frame and add to the list for encapsulation + # at the end + full_frames_list.append( + self._encode_pixels(planes[source_image_index]) + ) + else: + # Concatenate the 1D array for re-encoding at the end + full_frames_list.append( + planes[source_image_index].flatten() + ) self.SegmentSequence.append(segment_descriptions[i]) + self.PerFrameFunctionalGroupsSequence = pffg_sequence + self.SourceImageSequence = source_image_seq + self.NumberOfFrames = len(pffg_sequence) + if is_encaps: # Encapsulate all pre-compressed frames self.PixelData = encapsulate(full_frames_list) @@ -1724,7 +1733,9 @@ def __init__( # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 - self.PixelData = self._encode_pixels(full_pixel_array) + self.PixelData = self._encode_pixels( + np.concatenate(full_frames_list) + ) # Add a null trailing byte if required if len(self.PixelData) % 2 == 1: @@ -1792,9 +1803,9 @@ def _check_segment_numbers(described_segment_numbers: np.ndarray): @staticmethod def _check_and_cast_pixel_array( pixel_array: np.ndarray, - described_segment_numbers: np.ndarray, + number_of_segments: int, segmentation_type: SegmentationTypeValues - ) -> Tuple[np.ndarray, SegmentsOverlapValues]: + ) -> Tuple[np.ndarray, SegmentsOverlapValues, bool]: """Checks on the shape and data type of the pixel array. Also checks for overlapping segments and returns the result. @@ -1803,7 +1814,7 @@ def _check_and_cast_pixel_array( ---------- pixel_array: numpy.ndarray The segmentation pixel array. - described_segment_numbers: numpy.ndarray + number_of_segments: int, The segment numbers from the segment descriptions, in the order they were passed. 1D array of integers. segmentation_type: highdicom.seg.SegmentationTypeValues @@ -1820,26 +1831,24 @@ def _check_and_cast_pixel_array( """ if pixel_array.ndim == 4: # Check that the number of segments in the array matches - if pixel_array.shape[-1] != len(described_segment_numbers): + if pixel_array.shape[-1] != number_of_segments: raise ValueError( 'The number of segments in last dimension of the pixel ' f'array ({pixel_array.shape[-1]}) does not match the ' 'number of described segments ' - f'({len(described_segment_numbers)}).' + f'({number_of_segments}).' ) if pixel_array.dtype in (np.bool_, np.uint8, np.uint16): + max_pixel = pixel_array.max() + if pixel_array.ndim == 3: # A label-map style array where pixel values represent # segment associations - segments_present = np.unique(pixel_array).astype(np.uint16) - segments_present = segments_present[segments_present > 0] # The pixel values in the pixel array must all belong to # a described segment - if not np.all( - np.in1d(segments_present, described_segment_numbers) - ): + if max_pixel > number_of_segments: raise ValueError( 'Pixel array contains segments that lack ' 'descriptions.' @@ -1852,24 +1861,30 @@ def _check_and_cast_pixel_array( # Pixel array is 4D where each segment is stacked down # the last dimension # In this case, each segment of the pixel array should be binary - if pixel_array.max() > 1: + if max_pixel > 1: raise ValueError( 'When passing a 4D stack of segments with an integer ' 'pixel type, the pixel array must be binary.' ) - pixel_array = pixel_array.astype(np.bool_) # Need to check whether or not segments overlap - if pixel_array.shape[-1] == 1: + if max_pixel == 0: + # Empty segments can't overlap (this skips an unnecessary + # further test) + segments_overlap = SegmentsOverlapValues.NO + elif pixel_array.shape[-1] == 1: # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO - elif pixel_array.sum(axis=-1).max() > 1: - segments_overlap = SegmentsOverlapValues.YES else: - segments_overlap = SegmentsOverlapValues.NO + sum_over_segments = pixel_array.sum(axis=-1) + if np.any(sum_over_segments > 1): + segments_overlap = SegmentsOverlapValues.YES + else: + segments_overlap = SegmentsOverlapValues.NO elif (pixel_array.dtype in (np.float_, np.float32, np.float64)): unique_values = np.unique(pixel_array) + if np.min(unique_values) < 0.0 or np.max(unique_values) > 1.0: raise ValueError( 'Floating point pixel array values must be in the ' @@ -1885,10 +1900,13 @@ def _check_and_cast_pixel_array( 'Floating point pixel array values must be either ' '0.0 or 1.0 in case of BINARY segmentation type.' ) - pixel_array = pixel_array.astype(np.bool_) + pixel_array = pixel_array.astype(np.uint8) # Need to check whether or not segments overlap - if pixel_array.shape[-1] == 1: + if len(unique_values) == 1 and unique_values[0] == 0.0: + # All pixels are zero: there can be no overlap + segments_overlap = SegmentsOverlapValues.NO + elif pixel_array.shape[-1] == 1: # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO elif pixel_array.sum(axis=-1).max() > 1: @@ -1912,12 +1930,12 @@ def _check_and_cast_pixel_array( def _omit_empty_frames( pixel_array: np.ndarray, plane_positions: Sequence[Optional[PlanePositionSequence]] - ) -> Tuple[np.ndarray, List[Optional[PlanePositionSequence]], List[int]]: - """Remove empty frames from the pixel array. + ) -> Tuple[List[Optional[PlanePositionSequence]], List[int], bool]: + """Remove empty frames from the plane positions. Empty frames (without any positive pixels) do not need to be included - in the segmentation image. This method removes the relevant frames - and updates the plane positions accordingly. + in the segmentation image. This method update the plane positions such + that the empty frames are omitted. Parameters ---------- @@ -1928,29 +1946,37 @@ def _omit_empty_frames( Returns ------- - pixel_array: numpy.ndarray - Pixel array with empty frames removed plane_positions: List[Optional[highdicom.PlanePositionSequence]] Plane positions with entries corresponding to empty frames removed. source_image_indices: List[int] List giving for each frame in the output pixel array the index of the corresponding frame in the original pixel array + is_empty: bool + Whether the entire image is empty. If so, empty frames should not + be omitted. """ - non_empty_frames = [] + # non_empty_frames = [] non_empty_plane_positions = [] # This list tracks which source image each non-empty frame came from source_image_indices = [] for i, (frm, pos) in enumerate(zip(pixel_array, plane_positions)): - if frm.sum() > 0: - non_empty_frames.append(frm) + if np.any(frm): + # non_empty_frames.append(frm) non_empty_plane_positions.append(pos) source_image_indices.append(i) - pixel_array = np.stack(non_empty_frames) - plane_positions = non_empty_plane_positions + # pixel_array = np.stack(non_empty_frames) + + if len(non_empty_plane_positions) == 0: + logger.warning( + 'Encoding an empty segmentation with "omit_empty_frames" ' + 'set to True. Reverting to encoding all frames since omitting ' + 'all frames is not possible.' + ) + return (plane_positions, list(range(len(plane_positions))), True) - return (pixel_array, plane_positions, source_image_indices) + return (non_empty_plane_positions, source_image_indices, False) def _encode_pixels(self, planes: np.ndarray) -> bytes: """Encodes pixel planes. diff --git a/tests/test_seg.py b/tests/test_seg.py index 85b363d2..2aca3732 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1490,16 +1490,10 @@ def test_pixel_types_fractional( ) # Ensure the recovered pixel array matches what is expected - if pix_type in (np.bool_, np.float_): - assert np.array_equal( - self.get_array_after_writing(instance), - expected_encoding * max_fractional_value - ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' - else: - assert np.array_equal( - self.get_array_after_writing(instance), - expected_encoding - ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' + assert np.array_equal( + self.get_array_after_writing(instance), + expected_encoding * max_fractional_value + ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) # Multi-segment (exclusive) @@ -1516,7 +1510,7 @@ def test_pixel_types_fractional( self._manufacturer_model_name, self._software_versions, self._device_serial_number, - max_fractional_value=1, + max_fractional_value=max_fractional_value, transfer_syntax_uid=fractional_transfer_syntax_uid ) if pix_type == np.float_: @@ -1532,7 +1526,7 @@ def test_pixel_types_fractional( assert np.array_equal( self.get_array_after_writing(instance), - expected_enc_exc + expected_enc_exc * max_fractional_value ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) @@ -1550,7 +1544,7 @@ def test_pixel_types_fractional( self._manufacturer_model_name, self._software_versions, self._device_serial_number, - max_fractional_value=1, + max_fractional_value=max_fractional_value, transfer_syntax_uid=fractional_transfer_syntax_uid ) if pix_type == np.float_: @@ -1566,7 +1560,7 @@ def test_pixel_types_fractional( assert np.array_equal( self.get_array_after_writing(instance), - expected_enc_overlap + expected_enc_overlap * max_fractional_value ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) From 20cdadb3190df37e2b8157803a833a6392c53828 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sun, 14 May 2023 19:28:54 -0700 Subject: [PATCH 02/96] Add check for non-unique plane positions --- src/highdicom/seg/content.py | 8 ++++++++ tests/test_seg.py | 39 +++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index 72208aa2..c5f4d078 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -659,6 +659,14 @@ def get_index_values( return_index=True ) + if len(plane_sort_indices) != len(plane_positions): + raise ValueError( + "Input image/frame positions are not unique according to the " + "Dimension Index Pointers. The generated segmentation would be " + "ambiguous. Ensure that source images/frames have distinct " + "locations." + ) + return (plane_position_values, plane_sort_indices) def get_index_keywords(self) -> List[str]: diff --git a/tests/test_seg.py b/tests/test_seg.py index 2aca3732..c24c04b4 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1,4 +1,5 @@ from collections import defaultdict +from copy import deepcopy import unittest from pathlib import Path @@ -1865,7 +1866,7 @@ def test_construction_empty_source_image(self): ) def test_construction_empty_source_seg_sparse(self): - # Can encoding an empty segmentation with omit_empty_frames=True issues + # Encoding an empty segmentation with omit_empty_frames=True issues # a warning and encodes the full segmentation empty_pixel_array = np.zeros_like(self._ct_pixel_array) seg = Segmentation( @@ -1915,9 +1916,7 @@ def test_construction_invalid_content_label(self): source_images=[self._ct_image], pixel_array=self._ct_pixel_array, segmentation_type=SegmentationTypeValues.FRACTIONAL.value, - segment_descriptions=( - self._segment_descriptions - ), + segment_descriptions=self._segment_descriptions, series_instance_uid=self._series_instance_uid, series_number=self._series_number, sop_instance_uid=self._sop_instance_uid, @@ -1935,9 +1934,35 @@ def test_construction_mixed_source_series(self): source_images=self._ct_series + [self._ct_image], pixel_array=self._ct_pixel_array, segmentation_type=SegmentationTypeValues.FRACTIONAL.value, - segment_descriptions=( - self._additional_segment_descriptions # seg num 2 - ), + segment_descriptions=self._segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number + ) + + def test_construction_nonunqiue_plane_positions(self): + # It should not be possible to construct a segmentation with input + # images with the same plane location, even if they are otherwise + # distinct + ct_image_2 = deepcopy(self._ct_image) + ct_image_2.SOPInstanceUID = UID() + ct_image_2.InstanceNumber = 2 + pixel_array = np.zeros( + (2, *self._ct_image.pixel_array.shape), + dtype=bool + ) + pixel_array[0, 1:5, 10:15] = True + with pytest.raises(ValueError): + Segmentation( + source_images=[self._ct_image, ct_image_2], + pixel_array=pixel_array, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, series_instance_uid=self._series_instance_uid, series_number=self._series_number, sop_instance_uid=self._sop_instance_uid, From fc74493082b7f77ce018f5ed3771b9b6f19a97f2 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 15 May 2023 01:33:57 -0400 Subject: [PATCH 03/96] Tidy up --- src/highdicom/seg/sop.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 373be413..77ac66e8 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1805,7 +1805,7 @@ def _check_and_cast_pixel_array( pixel_array: np.ndarray, number_of_segments: int, segmentation_type: SegmentationTypeValues - ) -> Tuple[np.ndarray, SegmentsOverlapValues, bool]: + ) -> Tuple[np.ndarray, SegmentsOverlapValues]: """Checks on the shape and data type of the pixel array. Also checks for overlapping segments and returns the result. @@ -1882,9 +1882,8 @@ def _check_and_cast_pixel_array( else: segments_overlap = SegmentsOverlapValues.NO - elif (pixel_array.dtype in (np.float_, np.float32, np.float64)): + elif pixel_array.dtype in (np.float_, np.float32, np.float64): unique_values = np.unique(pixel_array) - if np.min(unique_values) < 0.0 or np.max(unique_values) > 1.0: raise ValueError( 'Floating point pixel array values must be in the ' @@ -1934,7 +1933,7 @@ def _omit_empty_frames( """Remove empty frames from the plane positions. Empty frames (without any positive pixels) do not need to be included - in the segmentation image. This method update the plane positions such + in the segmentation image. This method updates the plane positions such that the empty frames are omitted. Parameters @@ -1956,17 +1955,14 @@ def _omit_empty_frames( be omitted. """ - # non_empty_frames = [] non_empty_plane_positions = [] # This list tracks which source image each non-empty frame came from source_image_indices = [] for i, (frm, pos) in enumerate(zip(pixel_array, plane_positions)): if np.any(frm): - # non_empty_frames.append(frm) non_empty_plane_positions.append(pos) source_image_indices.append(i) - # pixel_array = np.stack(non_empty_frames) if len(non_empty_plane_positions) == 0: logger.warning( From 30f9dcc48f7ec123296d1b7513715baedada8d0f Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 15 May 2023 21:46:22 -0400 Subject: [PATCH 04/96] Move compression out of the loop, in preparation for multiprocessing --- src/highdicom/seg/sop.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 77ac66e8..e3ba8338 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1539,7 +1539,7 @@ def __init__( # In the case of non-encapsulated (uncompressed) transfer syntaxes # we will accumulate a list of flattened pixels from all frames for # bitpacking at the end - full_frames_list: Union[List[bytes], List[np.ndarray]] = [] + full_frames_list: List[np.ndarray] = [] for i, segment_number in enumerate(described_segment_numbers): # Pixel array for just this segment @@ -1711,9 +1711,7 @@ def __init__( if is_encaps: # Encode this frame and add to the list for encapsulation # at the end - full_frames_list.append( - self._encode_pixels(planes[source_image_index]) - ) + full_frames_list.append(planes[source_image_index]) else: # Concatenate the 1D array for re-encoding at the end full_frames_list.append( @@ -1728,7 +1726,10 @@ def __init__( if is_encaps: # Encapsulate all pre-compressed frames - self.PixelData = encapsulate(full_frames_list) + compressed_frames = [ + self._encode_pixels(frm) for frm in full_frames_list + ] + self.PixelData = encapsulate(compressed_frames) else: # Encode the whole pixel array at once # This allows for correct bit-packing in cases where From 8b86a12c9bc00f7def60a27fe2c7e9e6996f8697 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Tue, 16 May 2023 07:21:28 -0400 Subject: [PATCH 05/96] Simplify SegmentSequence logic --- src/highdicom/seg/sop.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index e3ba8338..031a637b 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1362,23 +1362,19 @@ def __init__( sffg_item.PlaneOrientationSequence = plane_orientation self.SharedFunctionalGroupsSequence = [sffg_item] - # Information about individual frames. Note that a *very* significant - # efficiency gain is observed when building this as a Python list - # rather than a pydicom sequence, and then converting to a pydicom - # sequence at the end - pffg_sequence: List[Dataset] = [] - # Check segment numbers described_segment_numbers = np.array([ int(item.SegmentNumber) for item in segment_descriptions ]) self._check_segment_numbers(described_segment_numbers) + number_of_segments = len(described_segment_numbers) + self.SegmentSequence = segment_descriptions # Checks on pixels and overlap pixel_array, segments_overlap = self._check_and_cast_pixel_array( pixel_array, - len(described_segment_numbers), + number_of_segments, segmentation_type, ) self.SegmentsOverlap = segments_overlap.value @@ -1541,7 +1537,14 @@ def __init__( # bitpacking at the end full_frames_list: List[np.ndarray] = [] - for i, segment_number in enumerate(described_segment_numbers): + # Information about individual frames is placed into the + # PerFrameFunctionalGroupsSequence. Note that a *very* significant + # efficiency gain is observed when building this as a Python list + # rather than a pydicom sequence, and then converting to a pydicom + # sequence at the end + pffg_sequence: List[Dataset] = [] + + for segment_number in described_segment_numbers: # Pixel array for just this segment if pixel_array.dtype in (np.float_, np.float32, np.float64): # Based on the previous checks and casting, if we get here @@ -1559,9 +1562,11 @@ def __init__( else: if pixel_array.ndim == 3: # "Label maps" that must be converted to binary masks. - if len(described_segment_numbers) == 1: + if number_of_segments == 1: # We wish to avoid unnecessary comparison or casting - # operations here, for efficiency reasons + # operations here, for efficiency reasons. If there is + # only a single segment, the label map pixel array is + # already correct if pixel_array.dtype != np.uint8: planes = pixel_array.astype(np.uint8) else: @@ -1587,7 +1592,7 @@ def __init__( planes *= int(self.MaximumFractionalValue) for j in plane_sort_index: - # Index of this frame in the original list of source indices + # Index of this frame in the original list of source frames source_image_index = source_image_indices[j] # Even though completely empty slices were removed earlier, @@ -1718,8 +1723,6 @@ def __init__( planes[source_image_index].flatten() ) - self.SegmentSequence.append(segment_descriptions[i]) - self.PerFrameFunctionalGroupsSequence = pffg_sequence self.SourceImageSequence = source_image_seq self.NumberOfFrames = len(pffg_sequence) From 1fa59f71a0c33419730896dfc0842824ede02c2a Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Tue, 16 May 2023 10:01:21 -0400 Subject: [PATCH 06/96] Factored out several parts of seg constructor into methods for readibility --- src/highdicom/seg/sop.py | 742 +++++++++++++++++++++++++-------------- 1 file changed, 473 insertions(+), 269 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 031a637b..215e98c2 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1191,7 +1191,7 @@ def __init__( # Note that appending directly to the SourceImageSequence is typically # slow so it's more efficient to build as a Python list then convert - # later + # later. We save conversion for after the main loop so that source_image_seq: List[Dataset] = [] referenced_series: Dict[str, List[Dataset]] = defaultdict(list) for s_img in source_images: @@ -1200,14 +1200,16 @@ def __init__( ref.ReferencedSOPInstanceUID = s_img.SOPInstanceUID source_image_seq.append(ref) referenced_series[s_img.SeriesInstanceUID].append(ref) + self.SourceImageSequence = source_image_seq # Common Instance Reference - self.ReferencedSeriesSequence: List[Dataset] = [] + ref_image_seq: List[Dataset] = [] for series_instance_uid, referenced_images in referenced_series.items(): ref = Dataset() ref.SeriesInstanceUID = series_instance_uid ref.ReferencedInstanceSequence = referenced_images - self.ReferencedSeriesSequence.append(ref) + ref_image_seq.append(ref) + self.ReferencedSeriesSequence = ref_image_seq # Image Pixel self.Rows = pixel_array.shape[1] @@ -1280,40 +1282,14 @@ def __init__( self.LossyImageCompressionMethod = \ src_img.LossyImageCompressionMethod - self.SegmentSequence: List[SegmentDescription] = [] - # Multi-Frame Functional Groups and Multi-Frame Dimensions sffg_item = Dataset() if pixel_measures is None: - if is_multiframe: - src_shared_fg = src_img.SharedFunctionalGroupsSequence[0] - pixel_measures = src_shared_fg.PixelMeasuresSequence - else: - if has_ref_frame_uid: - pixel_measures = PixelMeasuresSequence( - pixel_spacing=src_img.PixelSpacing, - slice_thickness=src_img.SliceThickness, - spacing_between_slices=src_img.get( - 'SpacingBetweenSlices', - None - ) - ) - else: - pixel_spacing = getattr(src_img, 'PixelSpacing', None) - if pixel_spacing is not None: - pixel_measures = PixelMeasuresSequence( - pixel_spacing=pixel_spacing, - slice_thickness=src_img.get( - 'SliceThickness', - None - ), - spacing_between_slices=src_img.get( - 'SpacingBetweenSlices', - None - ) - ) - else: - pixel_measures = None + pixel_measures = self._get_pixel_measures( + source_image=src_img, + has_ref_frame_uid=has_ref_frame_uid, + is_multiframe=is_multiframe, + ) if has_ref_frame_uid: if self._coordinate_system == CoordinateSystemNames.SLIDE: @@ -1413,89 +1389,18 @@ def __init__( plane_sort_index = np.array([0]) are_spatial_locations_preserved = True - plane_position_names = self.DimensionIndexSequence.get_index_keywords() - if ( has_ref_frame_uid and self._coordinate_system == CoordinateSystemNames.SLIDE ): - self.ImageOrientationSlide = deepcopy( - plane_orientation[0].ImageOrientationSlide + self._add_slide_coordinate_metadata( + source_image=src_img, + plane_orientation=plane_orientation, + plane_position_values=plane_position_values, + pixel_measures=pixel_measures, + are_spatial_locations_preserved=are_spatial_locations_preserved, + is_tiled=is_tiled, ) - if are_spatial_locations_preserved and is_tiled: - self.TotalPixelMatrixOriginSequence = deepcopy( - src_img.TotalPixelMatrixOriginSequence - ) - self.TotalPixelMatrixRows = src_img.TotalPixelMatrixRows - self.TotalPixelMatrixColumns = src_img.TotalPixelMatrixColumns - elif are_spatial_locations_preserved and not is_tiled: - self.ImageCenterPointCoordinatesSequence = deepcopy( - src_img.ImageCenterPointCoordinatesSequence - ) - else: - row_index = plane_position_names.index( - 'RowPositionInTotalImagePixelMatrix' - ) - row_offsets = plane_position_values[:, row_index] - col_index = plane_position_names.index( - 'ColumnPositionInTotalImagePixelMatrix' - ) - col_offsets = plane_position_values[:, col_index] - frame_indices = np.lexsort([row_offsets, col_offsets]) - first_frame_index = frame_indices[0] - last_frame_index = frame_indices[-1] - x_index = plane_position_names.index( - 'XOffsetInSlideCoordinateSystem' - ) - x_origin = plane_position_values[first_frame_index, x_index] - y_index = plane_position_names.index( - 'YOffsetInSlideCoordinateSystem' - ) - y_origin = plane_position_values[first_frame_index, y_index] - z_index = plane_position_names.index( - 'ZOffsetInSlideCoordinateSystem' - ) - z_origin = plane_position_values[first_frame_index, z_index] - - if is_tiled: - origin_item = Dataset() - origin_item.XOffsetInSlideCoordinateSystem = \ - format_number_as_ds(x_origin) - origin_item.YOffsetInSlideCoordinateSystem = \ - format_number_as_ds(y_origin) - self.TotalPixelMatrixOriginSequence = [origin_item] - self.TotalPixelMatrixRows = int( - plane_position_values[last_frame_index, row_index] + - self.Rows - ) - self.TotalPixelMatrixColumns = int( - plane_position_values[last_frame_index, col_index] + - self.Columns - ) - else: - transform = ImageToReferenceTransformer( - image_position=(x_origin, y_origin, z_origin), - image_orientation=plane_orientation, - pixel_spacing=pixel_measures[0].PixelSpacing - ) - center_image_coordinates = np.array( - [[self.Columns / 2, self.Rows / 2]], - dtype=float - ) - center_reference_coordinates = transform( - center_image_coordinates - ) - x_center = center_reference_coordinates[0, 0] - y_center = center_reference_coordinates[0, 1] - z_center = center_reference_coordinates[0, 2] - center_item = Dataset() - center_item.XOffsetInSlideCoordinateSystem = \ - format_number_as_ds(x_center) - center_item.YOffsetInSlideCoordinateSystem = \ - format_number_as_ds(y_center) - center_item.ZOffsetInSlideCoordinateSystem = \ - format_number_as_ds(z_center) - self.ImageCenterPointCoordinatesSequence = [center_item] # Remove empty slices if omit_empty_frames: @@ -1546,185 +1451,67 @@ def __init__( for segment_number in described_segment_numbers: # Pixel array for just this segment - if pixel_array.dtype in (np.float_, np.float32, np.float64): - # Based on the previous checks and casting, if we get here - # the output is a FRACTIONAL segmentation - # Floating-point numbers must be mapped to 8-bit integers in - # the range [0, max_fractional_value]. - if pixel_array.ndim == 4: - segment_array = pixel_array[:, :, :, segment_number - 1] - else: - segment_array = pixel_array - planes = np.around( - segment_array * float(self.MaximumFractionalValue) - ) - planes = planes.astype(np.uint8) - else: - if pixel_array.ndim == 3: - # "Label maps" that must be converted to binary masks. - if number_of_segments == 1: - # We wish to avoid unnecessary comparison or casting - # operations here, for efficiency reasons. If there is - # only a single segment, the label map pixel array is - # already correct - if pixel_array.dtype != np.uint8: - planes = pixel_array.astype(np.uint8) - else: - planes = pixel_array - else: - planes = ( - pixel_array == segment_number - ).astype(np.uint8) - else: - planes = pixel_array[:, :, :, segment_number - 1] - if planes.dtype != np.uint8: - planes = planes.astype(np.uint8) - - # It may happen that a binary valued array is passed that - # should be stored as a fractional segmentation. In - # this case, we also need to stretch pixel values to 8-bit - # unsigned integer range by multiplying with the maximum - # fractional value. - if segmentation_type == SegmentationTypeValues.FRACTIONAL: - # Avoid an unnecessary multiplication operation if max - # fractional value is 1 - if int(self.MaximumFractionalValue) != 1: - planes *= int(self.MaximumFractionalValue) - - for j in plane_sort_index: + segment_array = self._get_segment_array( + pixel_array, + segment_number=segment_number, + number_of_segments=number_of_segments, + segmentation_type=segmentation_type, + max_fractional_value=max_fractional_value, + ) + + for plane_index in plane_sort_index: # Index of this frame in the original list of source frames - source_image_index = source_image_indices[j] + source_image_index = source_image_indices[plane_index] # Even though completely empty slices were removed earlier, # there may still be slices in which this specific segment is # absent. Such frames should be removed - if omit_empty_frames and not np.any(planes[source_image_index]): + if ( + omit_empty_frames and not + np.any(segment_array[source_image_index]) + ): logger.debug( - 'skip empty plane {} of segment #{}'.format( - j, segment_number - ) + f'skip empty plane {plane_index} of segment ' + f'#{segment_number}' ) continue logger.debug( - 'add plane #{} for segment #{}'.format( - j, segment_number - ) + f'add plane #{plane_index} for segment #{segment_number}' ) - pffp_item = Dataset() - frame_content_item = Dataset() - - if not has_ref_frame_uid: - index_values = [] - else: - # Look up the position of the plane relative to the indexed - # dimension. - try: - if ( - self._coordinate_system == - CoordinateSystemNames.SLIDE - ): - index_values = [ - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 - for idx, pos in enumerate( - plane_position_values[j] - ) - ] - else: - # In case of the patient coordinate system, the - # value of the attribute the Dimension Index - # Sequence points to (Image Position Patient) has a - # value multiplicity greater than one. - index_values = [ - np.where( - (dimension_position_values[idx] == pos).all( - axis=1 - ) - )[0][0] + 1 - for idx, pos in enumerate( - plane_position_values[j] - ) - ] - except IndexError as error: - raise IndexError( - 'Could not determine position of plane #{} in ' - 'three dimensional coordinate system based on ' - 'dimension index values: {}'.format(j, error) - ) - frame_content_item.DimensionIndexValues = ( - [segment_number] + index_values + # Get the item of the PerFrameFunctionalGroupsSequence for this + # segmentation frame + index_values = self._get_dimension_index_values( + plane_index=plane_index, + dimension_position_values=dimension_position_values, + plane_position_values=plane_position_values, + has_ref_frame_uid=has_ref_frame_uid, + coordinate_system=self._coordinate_system, ) - pffp_item.FrameContentSequence = [frame_content_item] - if has_ref_frame_uid: - pos = plane_positions[j] - if self._coordinate_system == CoordinateSystemNames.SLIDE: - pffp_item.PlanePositionSlideSequence = pos - else: - pffp_item.PlanePositionSequence = pos - - # Determining the source images that map to the frame is not - # always trivial. Since DerivationImageSequence is a type 2 - # attribute, we leave its value empty. - pffp_item.DerivationImageSequence = [] - - if are_spatial_locations_preserved: - derivation_image_item = Dataset() - derivation_code = codes.cid7203.Segmentation - derivation_image_item.DerivationCodeSequence = [ - CodedConcept.from_code(derivation_code) - ] - - derivation_src_img_item = Dataset() - if hasattr(source_images[0], 'NumberOfFrames'): - # A single multi-frame source image - src_img_item = source_images[0] - # Frame numbers are one-based - derivation_src_img_item.ReferencedFrameNumber = ( - source_image_index + 1 - ) - else: - # Multiple single-frame source images - src_img_item = source_images[source_image_index] - derivation_src_img_item.ReferencedSOPClassUID = \ - src_img_item.SOPClassUID - derivation_src_img_item.ReferencedSOPInstanceUID = \ - src_img_item.SOPInstanceUID - purpose_code = \ - codes.cid7202.SourceImageForImageProcessingOperation - derivation_src_img_item.PurposeOfReferenceCodeSequence = [ - CodedConcept.from_code(purpose_code) - ] - derivation_src_img_item.SpatialLocationsPreserved = 'YES' - derivation_image_item.SourceImageSequence = [ - derivation_src_img_item, - ] - pffp_item.DerivationImageSequence.append( - derivation_image_item - ) - else: - logger.debug('spatial locations not preserved') - - identification = Dataset() - identification.ReferencedSegmentNumber = segment_number - pffp_item.SegmentIdentificationSequence = [ - identification, - ] - pffg_sequence.append(pffp_item) + pffg_item = self._get_pffg_item( + segment_number=segment_number, + index_values=index_values, + plane_position=plane_positions[plane_index], + source_images=source_images, + source_image_index=source_image_index, + are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 + has_ref_frame_uid=has_ref_frame_uid, + coordinate_system=self._coordinate_system, + ) + pffg_sequence.append(pffg_item) + # Add the segmentation pixel array for this frame to the list if is_encaps: # Encode this frame and add to the list for encapsulation # at the end - full_frames_list.append(planes[source_image_index]) + full_frames_list.append(segment_array[source_image_index]) else: # Concatenate the 1D array for re-encoding at the end full_frames_list.append( - planes[source_image_index].flatten() + segment_array[source_image_index].flatten() ) self.PerFrameFunctionalGroupsSequence = pffg_sequence - self.SourceImageSequence = source_image_seq self.NumberOfFrames = len(pffg_sequence) if is_encaps: @@ -1804,6 +1591,174 @@ def _check_segment_numbers(described_segment_numbers: np.ndarray): f'from 1. Found {described_segment_numbers[0]}. ' ) + @staticmethod + def _get_pixel_measures( + source_image: Dataset, + has_ref_frame_uid: bool, + is_multiframe: bool, + ) -> Optional[PixelMeasuresSequence]: + """Get a pixel measures sequences from the source image. + + This is a helper method used in the constructor. + + Parameters + ---------- + source_image: pydicom.Dataset + The first source image. + has_ref_frame_uid: bool + Whether the source image has a frame of reference uid. + is_multiframe: bool + Whether the source image is multiframe. + + Returns + ------- + Optional[highdicom.PixelMeasuresSequence] + A PixelMeasuresSequence derived from the source image, if this is + possible. Otherwise None. + + """ + if is_multiframe: + src_shared_fg = source_image.SharedFunctionalGroupsSequence[0] + pixel_measures = src_shared_fg.PixelMeasuresSequence + else: + if has_ref_frame_uid: + pixel_measures = PixelMeasuresSequence( + pixel_spacing=source_image.PixelSpacing, + slice_thickness=source_image.SliceThickness, + spacing_between_slices=source_image.get( + 'SpacingBetweenSlices', + None + ) + ) + else: + pixel_spacing = getattr(source_image, 'PixelSpacing', None) + if pixel_spacing is not None: + pixel_measures = PixelMeasuresSequence( + pixel_spacing=pixel_spacing, + slice_thickness=source_image.get( + 'SliceThickness', + None + ), + spacing_between_slices=source_image.get( + 'SpacingBetweenSlices', + None + ) + ) + else: + pixel_measures = None + + return pixel_measures + + def _add_slide_coordinate_metadata( + self, + source_image: Dataset, + plane_orientation: PlaneOrientationSequence, + plane_position_values: np.ndarray, + pixel_measures: PixelMeasuresSequence, + are_spatial_locations_preserved: bool, + is_tiled: bool, + ) -> None: + """Add metadata related to the slide coordinate system. + + This is a helper method used in the constructor. + + Parameters + ---------- + source_image: pydicom.Dataset + The source image (assumed to be a single source image). + plane_orientation: highdicom.PlaneOrientationSequence + Plane orientation sequence for the segmentation. + plane_position_values: numpy.ndarray + Plane positions of each plane. + pixel_measures: highdicom.PixelMeasuresSequence + PixelMeasuresSequence for the segmentation. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the source image + and the segmentation. + is_tiled: bool + Whether the souce image is a tiled image. + + """ + plane_position_names = self.DimensionIndexSequence.get_index_keywords() + + self.ImageOrientationSlide = deepcopy( + plane_orientation[0].ImageOrientationSlide + ) + if are_spatial_locations_preserved and is_tiled: + self.TotalPixelMatrixOriginSequence = deepcopy( + source_image.TotalPixelMatrixOriginSequence + ) + self.TotalPixelMatrixRows = source_image.TotalPixelMatrixRows + self.TotalPixelMatrixColumns = source_image.TotalPixelMatrixColumns + elif are_spatial_locations_preserved and not is_tiled: + self.ImageCenterPointCoordinatesSequence = deepcopy( + source_image.ImageCenterPointCoordinatesSequence + ) + else: + row_index = plane_position_names.index( + 'RowPositionInTotalImagePixelMatrix' + ) + row_offsets = plane_position_values[:, row_index] + col_index = plane_position_names.index( + 'ColumnPositionInTotalImagePixelMatrix' + ) + col_offsets = plane_position_values[:, col_index] + frame_indices = np.lexsort([row_offsets, col_offsets]) + first_frame_index = frame_indices[0] + last_frame_index = frame_indices[-1] + x_index = plane_position_names.index( + 'XOffsetInSlideCoordinateSystem' + ) + x_origin = plane_position_values[first_frame_index, x_index] + y_index = plane_position_names.index( + 'YOffsetInSlideCoordinateSystem' + ) + y_origin = plane_position_values[first_frame_index, y_index] + z_index = plane_position_names.index( + 'ZOffsetInSlideCoordinateSystem' + ) + z_origin = plane_position_values[first_frame_index, z_index] + + if is_tiled: + origin_item = Dataset() + origin_item.XOffsetInSlideCoordinateSystem = \ + format_number_as_ds(x_origin) + origin_item.YOffsetInSlideCoordinateSystem = \ + format_number_as_ds(y_origin) + self.TotalPixelMatrixOriginSequence = [origin_item] + self.TotalPixelMatrixRows = int( + plane_position_values[last_frame_index, row_index] + + self.Rows + ) + self.TotalPixelMatrixColumns = int( + plane_position_values[last_frame_index, col_index] + + self.Columns + ) + else: + transform = ImageToReferenceTransformer( + image_position=(x_origin, y_origin, z_origin), + image_orientation=plane_orientation, + pixel_spacing=pixel_measures[0].PixelSpacing + ) + center_image_coordinates = np.array( + [[self.Columns / 2, self.Rows / 2]], + dtype=float + ) + center_reference_coordinates = transform( + center_image_coordinates + ) + x_center = center_reference_coordinates[0, 0] + y_center = center_reference_coordinates[0, 1] + z_center = center_reference_coordinates[0, 2] + center_item = Dataset() + center_item.XOffsetInSlideCoordinateSystem = \ + format_number_as_ds(x_center) + center_item.YOffsetInSlideCoordinateSystem = \ + format_number_as_ds(y_center) + center_item.ZOffsetInSlideCoordinateSystem = \ + format_number_as_ds(z_center) + self.ImageCenterPointCoordinatesSequence = [center_item] + @staticmethod def _check_and_cast_pixel_array( pixel_array: np.ndarray, @@ -1978,6 +1933,255 @@ def _omit_empty_frames( return (non_empty_plane_positions, source_image_indices, False) + @staticmethod + def _get_segment_array( + pixel_array: np.ndarray, + segment_number: int, + number_of_segments: int, + segmentation_type: SegmentationTypeValues, + max_fractional_value: int + ) -> np.ndarray: + """Get segmentation array for a specific segment. + + This is a helper method used during the constructor. + + Parameters + ---------- + pixel_array: numpy.ndarray + Full segmentation array containing all segments. + segment_number: int + The segment of interest. + number_of_segments: int + Number of segments in the the segmentation. + segmentation_type: highdicom.seg.SegmentationTypeValues + Desired output segmentation type. + max_fractional_value: int + Value for scaling FRACTIONAL segmentations. + + """ + if pixel_array.dtype in (np.float_, np.float32, np.float64): + # Based on the previous checks and casting, if we get here the + # output is a FRACTIONAL segmentation Floating-point numbers must + # be mapped to 8-bit integers in the range [0, + # max_fractional_value]. + if pixel_array.ndim == 4: + segment_array = pixel_array[:, :, :, segment_number - 1] + else: + segment_array = pixel_array + segment_array = np.around( + segment_array * float(max_fractional_value) + ) + segment_array = segment_array.astype(np.uint8) + else: + if pixel_array.ndim == 3: + # "Label maps" that must be converted to binary masks. + if number_of_segments == 1: + # We wish to avoid unnecessary comparison or casting + # operations here, for efficiency reasons. If there is only + # a single segment, the label map pixel array is already + # correct + if pixel_array.dtype != np.uint8: + segment_array = pixel_array.astype(np.uint8) + else: + segment_array = pixel_array + else: + segment_array = ( + pixel_array == segment_number + ).astype(np.uint8) + else: + segment_array = pixel_array[:, :, :, segment_number - 1] + if segment_array.dtype != np.uint8: + segment_array = segment_array.astype(np.uint8) + + # It may happen that a binary valued array is passed that should be + # stored as a fractional segmentation. In this case, we also need + # to stretch pixel values to 8-bit unsigned integer range by + # multiplying with the maximum fractional value. + if segmentation_type == SegmentationTypeValues.FRACTIONAL: + # Avoid an unnecessary multiplication operation if max + # fractional value is 1 + if int(max_fractional_value) != 1: + segment_array *= int(max_fractional_value) + + return segment_array + + @staticmethod + def _get_dimension_index_values( + plane_index: int, + dimension_position_values: List[np.ndarray], + plane_position_values: np.ndarray, + has_ref_frame_uid: bool, + coordinate_system: Optional[CoordinateSystemNames], + ) -> List[int]: + """Get dimension index values for a frame. + + Parameters + ---------- + plane_index: int + Index of the plane in the sorted planes list. + dimension_position_values: List[numpy.ndarray] + Relative locations of each plane. + plane_position_values: numpy.ndarray + Plane positions of each plane. + has_ref_frame_uid: bool + Whether the source images have a frame of reference. + coordinate_system: Optional[highdicom.CoordinateSystemNames] + The type of coordinate system used (if any). + + Returns + ------- + index_values: List[int] + The dimension index values (except the segment number) for the + given plane. + + """ + if not has_ref_frame_uid: + index_values = [] + else: + # Look up the position of the plane relative to the indexed + # dimension. + try: + if ( + coordinate_system == + CoordinateSystemNames.SLIDE + ): + index_values = [ + np.where( + (dimension_position_values[idx] == pos) + )[0][0] + 1 + for idx, pos in enumerate( + plane_position_values[plane_index] + ) + ] + else: + # In case of the patient coordinate system, the + # value of the attribute the Dimension Index + # Sequence points to (Image Position Patient) has a + # value multiplicity greater than one. + index_values = [ + np.where( + (dimension_position_values[idx] == pos).all( + axis=1 + ) + )[0][0] + 1 + for idx, pos in enumerate( + plane_position_values[plane_index] + ) + ] + except IndexError as error: + raise IndexError( + 'Could not determine position of plane #{} in ' + 'three dimensional coordinate system based on ' + 'dimension index values: {}'.format(plane_index, error) + ) + + return index_values + + @staticmethod + def _get_pffg_item( + segment_number: int, + index_values: List[int], + plane_position: PlanePositionSequence, + source_images: List[Dataset], + source_image_index: int, + are_spatial_locations_preserved: bool, + has_ref_frame_uid: bool, + coordinate_system: Optional[CoordinateSystemNames], + ) -> Dataset: + """Get a single item of the PerFrameFunctionalGroupsSequence. + + This is a helper method used in the constructor. + + Parameters + ---------- + segment_number: int + Segment number of this segmentation frame. + index_values: List[int] + Dimension index values (except segment number) for this frame. + plane_position: highdicom.seg.PlanePositionSequence + Plane position of this frame. + source_images: List[Dataset] + Full list of source images. + source_image_index: int + Index of this frame in the original list of source images. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the segmentation + and the source images. + has_ref_frame_uid: bool + Whether the sources images have a frame of reference UID. + coordinate_system: Optional[highdicom.CoordinateSystemNames] + Coordinate system used, if any. + + Returns + ------- + pydicom.Dataset + Dataset representing the item of the + PerFrameFunctionalGroupsSequence for this segmentation frame. + + """ + pffg_item = Dataset() + frame_content_item = Dataset() + + frame_content_item.DimensionIndexValues = ( + [segment_number] + index_values + ) + pffg_item.FrameContentSequence = [frame_content_item] + if has_ref_frame_uid: + if coordinate_system == CoordinateSystemNames.SLIDE: + pffg_item.PlanePositionSlideSequence = plane_position + else: + pffg_item.PlanePositionSequence = plane_position + + # Determining the source images that map to the frame is not + # always trivial. Since DerivationImageSequence is a type 2 + # attribute, we leave its value empty. + pffg_item.DerivationImageSequence = [] + + if are_spatial_locations_preserved: + derivation_image_item = Dataset() + derivation_code = codes.cid7203.Segmentation + derivation_image_item.DerivationCodeSequence = [ + CodedConcept.from_code(derivation_code) + ] + + derivation_src_img_item = Dataset() + if hasattr(source_images[0], 'NumberOfFrames'): + # A single multi-frame source image + src_img_item = source_images[0] + # Frame numbers are one-based + derivation_src_img_item.ReferencedFrameNumber = ( + source_image_index + 1 + ) + else: + # Multiple single-frame source images + src_img_item = source_images[source_image_index] + derivation_src_img_item.ReferencedSOPClassUID = \ + src_img_item.SOPClassUID + derivation_src_img_item.ReferencedSOPInstanceUID = \ + src_img_item.SOPInstanceUID + purpose_code = \ + codes.cid7202.SourceImageForImageProcessingOperation + derivation_src_img_item.PurposeOfReferenceCodeSequence = [ + CodedConcept.from_code(purpose_code) + ] + derivation_src_img_item.SpatialLocationsPreserved = 'YES' + derivation_image_item.SourceImageSequence = [ + derivation_src_img_item, + ] + pffg_item.DerivationImageSequence.append( + derivation_image_item + ) + else: + logger.debug('spatial locations not preserved') + + identification = Dataset() + identification.ReferencedSegmentNumber = segment_number + pffg_item.SegmentIdentificationSequence = [ + identification, + ] + + return pffg_item + def _encode_pixels(self, planes: np.ndarray) -> bytes: """Encodes pixel planes. From 76be8bfae97b06935a9105cd29767673520a47d2 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Fri, 9 Jun 2023 17:24:40 -0400 Subject: [PATCH 07/96] Add annotation_coordinate_type property and check for 0 in get_coordinate --- src/highdicom/ann/content.py | 5 +++++ src/highdicom/ann/sop.py | 9 +++++++++ tests/test_ann.py | 8 +++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/highdicom/ann/content.py b/src/highdicom/ann/content.py index c7234ca4..bc7c50e6 100644 --- a/src/highdicom/ann/content.py +++ b/src/highdicom/ann/content.py @@ -620,6 +620,11 @@ def get_coordinates( 2D or 3D spatial coordinates of a graphical annotation """ # noqa: E501 + if annotation_number < 1: + raise ValueError( + 'Parameter "annotation_number" must be an integer greater ' + ' than 1.' + ) graphic_data = self.get_graphic_data(coordinate_type) annotation_index = annotation_number - 1 return graphic_data[annotation_index] diff --git a/src/highdicom/ann/sop.py b/src/highdicom/ann/sop.py index 7ae86e40..9833ebbf 100644 --- a/src/highdicom/ann/sop.py +++ b/src/highdicom/ann/sop.py @@ -394,6 +394,15 @@ def get_annotation_groups( return groups + @property + def annotation_coordinate_type( + self + ) -> AnnotationCoordinateTypeValues: + """highdicom.ann.AnnotationCoordinateTypeValues: Annotation coordinate type.""" # noqa: E501 + return AnnotationCoordinateTypeValues( + self.AnnotationCoordinateType + ) + @classmethod def from_dataset( cls, diff --git a/tests/test_ann.py b/tests/test_ann.py index 39b39c1c..ec102e82 100644 --- a/tests/test_ann.py +++ b/tests/test_ann.py @@ -394,7 +394,7 @@ def test_construction(self): version='1.0' ) - annotation_coordinate_type = '3D' + annotation_coordinate_type = AnnotationCoordinateTypeValues.SCOORD3D first_property_type = Code('4421005', 'SCT', 'Cell') first_label = 'cells' first_uid = UID() @@ -469,6 +469,12 @@ def test_construction(self): annotations = MicroscopyBulkSimpleAnnotations.from_dataset(dataset) + assert isinstance( + annotations.annotation_coordinate_type, + AnnotationCoordinateTypeValues + ) + assert annotations.annotation_coordinate_type == annotation_coordinate_type + retrieved_groups = annotations.get_annotation_groups() assert len(retrieved_groups) == 2 From e7c64815966a8dea39bba78136ddcd83f1e03c1b Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Fri, 9 Jun 2023 17:46:08 -0400 Subject: [PATCH 08/96] Add annread function --- src/highdicom/ann/__init__.py | 3 +- src/highdicom/ann/content.py | 57 ++++++++++++++++++++++++++--------- src/highdicom/ann/sop.py | 51 ++++++++++++++++++++++++++++--- tests/test_ann.py | 7 ++++- 4 files changed, 98 insertions(+), 20 deletions(-) diff --git a/src/highdicom/ann/__init__.py b/src/highdicom/ann/__init__.py index f027d0b4..18b5a6e1 100644 --- a/src/highdicom/ann/__init__.py +++ b/src/highdicom/ann/__init__.py @@ -6,7 +6,7 @@ GraphicTypeValues, PixelOriginInterpretationValues, ) -from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations +from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations, annread SOP_CLASS_UIDS = { '1.2.840.10008.5.1.4.1.1.91.1', # Microscopy Bulk Simple Annotations @@ -20,4 +20,5 @@ 'Measurements', 'MicroscopyBulkSimpleAnnotations', 'PixelOriginInterpretationValues', + 'annread', ] diff --git a/src/highdicom/ann/content.py b/src/highdicom/ann/content.py index bc7c50e6..a12b2576 100644 --- a/src/highdicom/ann/content.py +++ b/src/highdicom/ann/content.py @@ -115,13 +115,21 @@ def get_values(self, number_of_annotations: int) -> np.ndarray: return values @classmethod - def from_dataset(cls, dataset: Dataset) -> 'Measurements': + def from_dataset( + cls, + dataset: Dataset, + copy: bool = True + ) -> 'Measurements': """Construct instance from an existing dataset. Parameters ---------- dataset: pydicom.dataset.Dataset Dataset representing an item of the Measurements Sequence. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -138,17 +146,22 @@ def from_dataset(cls, dataset: Dataset) -> 'Measurements': module='microscopy-bulk-simple-annotations', base_path=['AnnotationGroupSequence', 'MeasurementsSequence'], ) - measurements = deepcopy(dataset) + if copy: + measurements = deepcopy(dataset) + else: + measurements = dataset measurements.__class__ = cls measurements.ConceptNameCodeSequence = [ CodedConcept.from_dataset( - measurements.ConceptNameCodeSequence[0] + measurements.ConceptNameCodeSequence[0], + copy=copy, ) ] measurements.MeasurementUnitsCodeSequence = [ CodedConcept.from_dataset( - measurements.MeasurementUnitsCodeSequence[0] + measurements.MeasurementUnitsCodeSequence[0], + copy=copy, ) ] @@ -246,8 +259,10 @@ def __init__( 'Argument "algorithm_identification" must be provided if ' f'argument "algorithm_type" is "{algorithm_type.value}".' ) - if not isinstance(algorithm_identification, - AlgorithmIdentificationSequence): + if not isinstance( + algorithm_identification, + AlgorithmIdentificationSequence + ): raise TypeError( 'Argument "algorithm_identification" must have type ' 'AlgorithmIdentificationSequence.' @@ -750,13 +765,21 @@ def _get_coordinate_index( return coordinate_index @classmethod - def from_dataset(cls, dataset: Dataset) -> 'AnnotationGroup': + def from_dataset( + cls, + dataset: Dataset, + copy: bool = True, + ) -> 'AnnotationGroup': """Construct instance from an existing dataset. Parameters ---------- dataset: pydicom.dataset.Dataset Dataset representing an item of the Annotation Group Sequence. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -773,38 +796,44 @@ def from_dataset(cls, dataset: Dataset) -> 'AnnotationGroup': module='microscopy-bulk-simple-annotations', base_path=['AnnotationGroupSequence'], ) - group = deepcopy(dataset) + if copy: + group = deepcopy(dataset) + else: + group = dataset group.__class__ = cls group._graphic_data = {} # will be handled by get_graphic_data() group.AnnotationPropertyCategoryCodeSequence = [ CodedConcept.from_dataset( - group.AnnotationPropertyCategoryCodeSequence[0] + group.AnnotationPropertyCategoryCodeSequence[0], + copy=copy, ) ] group.AnnotationPropertyTypeCodeSequence = [ CodedConcept.from_dataset( - group.AnnotationPropertyTypeCodeSequence[0] + group.AnnotationPropertyTypeCodeSequence[0], + copy=copy, ) ] if hasattr(group, 'AnnotationGroupAlgorithmIdentificationSequence'): group.AnnotationGroupAlgorithmIdentificationSequence = \ AlgorithmIdentificationSequence.from_sequence( - group.AnnotationGroupAlgorithmIdentificationSequence + group.AnnotationGroupAlgorithmIdentificationSequence, + copy=copy, ) if hasattr(group, 'MeasurementsSequence'): group.MeasurementsSequence = [ - Measurements.from_dataset(ds) + Measurements.from_dataset(ds, copy=copy) for ds in group.MeasurementsSequence ] if hasattr(group, 'AnatomicRegionSequence'): group.AnatomicRegionSequence = [ - CodedConcept.from_dataset(ds) + CodedConcept.from_dataset(ds, copy=copy) for ds in group.AnatomicRegionSequence ] if hasattr(group, 'PrimaryAnatomicStructureSequence'): group.PrimaryAnatomicStructureSequence = [ - CodedConcept.from_dataset(ds) + CodedConcept.from_dataset(ds, copy=copy) for ds in group.PrimaryAnatomicStructureSequence ] diff --git a/src/highdicom/ann/sop.py b/src/highdicom/ann/sop.py index 9833ebbf..e8f7ffb9 100644 --- a/src/highdicom/ann/sop.py +++ b/src/highdicom/ann/sop.py @@ -2,9 +2,21 @@ from collections import defaultdict from copy import deepcopy from operator import eq -from typing import Any, cast, Dict, List, Optional, Sequence, Tuple, Union +from os import PathLike +from typing import ( + Any, + BinaryIO, + cast, + Dict, + List, + Optional, + Sequence, + Tuple, + Union, +) import numpy as np +from pydicom import dcmread from pydicom.dataset import Dataset from pydicom.sr.coding import Code from pydicom.uid import ( @@ -406,7 +418,8 @@ def annotation_coordinate_type( @classmethod def from_dataset( cls, - dataset: Dataset + dataset: Dataset, + copy: bool = True, ) -> 'MicroscopyBulkSimpleAnnotations': """Construct instance from an existing dataset. @@ -414,6 +427,10 @@ def from_dataset( ---------- dataset: pydicom.dataset.Dataset Dataset representing a Microscopy Bulk Simple Annotations instance. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -431,12 +448,38 @@ def from_dataset( 'instance.' ) _check_little_endian(dataset) - ann = deepcopy(dataset) + if copy: + ann = deepcopy(dataset) + else: + ann = dataset ann.__class__ = MicroscopyBulkSimpleAnnotations ann.AnnotationGroupSequence = [ - AnnotationGroup.from_dataset(item) + AnnotationGroup.from_dataset(item, copy=copy) for item in ann.AnnotationGroupSequence ] return cast(MicroscopyBulkSimpleAnnotations, ann) + + +def annread( + fp: Union[str, bytes, PathLike, BinaryIO], +) -> MicroscopyBulkSimpleAnnotations: + """Read a bulk annotations object stored in DICOM File Format. + + Parameters + ---------- + fp: Union[str, bytes, os.PathLike] + Any file-like object representing a DICOM file containing a + MicroscopyBulkSimpleAnnotations object. + + Returns + ------- + highdicom.ann.MicroscopyBulkSimpleAnnotations + Bulk annotations object read from the file. + + """ + return MicroscopyBulkSimpleAnnotations.from_dataset( + dcmread(fp), + copy=False + ) diff --git a/tests/test_ann.py b/tests/test_ann.py index ec102e82..e0933ee1 100644 --- a/tests/test_ann.py +++ b/tests/test_ann.py @@ -15,7 +15,7 @@ AnnotationGroupGenerationTypeValues, GraphicTypeValues, ) -from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations +from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations, annread from highdicom.content import AlgorithmIdentificationSequence from highdicom.sr.coding import CodedConcept from highdicom.uid import UID @@ -469,6 +469,11 @@ def test_construction(self): annotations = MicroscopyBulkSimpleAnnotations.from_dataset(dataset) + with BytesIO() as fp: + annotations.save_as(fp) + fp.seek(0) + annotations = annread(fp) + assert isinstance( annotations.annotation_coordinate_type, AnnotationCoordinateTypeValues From 3de6e1809544c471f5dc0cb2775ddfc2c07c5d17 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Tue, 13 Jun 2023 15:12:21 -0400 Subject: [PATCH 09/96] Fix for single channel floating point pixel array --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 215e98c2..bb642da1 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1864,7 +1864,7 @@ def _check_and_cast_pixel_array( if len(unique_values) == 1 and unique_values[0] == 0.0: # All pixels are zero: there can be no overlap segments_overlap = SegmentsOverlapValues.NO - elif pixel_array.shape[-1] == 1: + elif pixel_array.ndim == 3 or pixel_array.shape[-1] == 1: # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO elif pixel_array.sum(axis=-1).max() > 1: From 1cbd5cd63a46c936c6cde25f66087ce0f502eac3 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 21 Jun 2023 06:54:40 -0400 Subject: [PATCH 10/96] Update user guide --- docs/ann.rst | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/ann.rst b/docs/ann.rst index d98f8199..47bd32f8 100644 --- a/docs/ann.rst +++ b/docs/ann.rst @@ -184,8 +184,20 @@ transmitted over network, etc. Reading Existing Bulk Annotation Objects ---------------------------------------- -You can read an existing bulk annotation object using `pydicom` and then convert -to the `highdicom` object like this: +You can read an existing bulk annotation object from file using the +:func:`highdicom.ann.annread()` function: + +.. code-block:: python + + from pydicom import dcmread + import highdicom as hd + + ann = hd.ann.annread('data/test_files/sm_annotations.dcm') + + assert isinstance(ann, hd.ann.MicroscopyBulkSimpleAnnotations) + +Alternatively you can converting an existing ``pydicom.Dataset`` representing a +bulk annotation object to the `highdicom` object like this: .. code-block:: python @@ -198,7 +210,7 @@ to the `highdicom` object like this: assert isinstance(ann, hd.ann.MicroscopyBulkSimpleAnnotations) -Note that this example (and the following examples) uses an example file that +Note that these examples (and the following examples) uses an example file that you can access from the test data in the `highdicom` repository. It was created using exactly the code in the construction example above. @@ -298,7 +310,7 @@ passed the data in to create the annotation with `highdicom`. import numpy as np graphic_data = group.get_graphic_data( - coordinate_type=ann.AnnotationCoordinateType, + coordinate_type=ann.annotation_coordinate_type, ) assert len(graphic_data) == 2 and isinstance(graphic_data[0], np.ndarray) From 552ae63f390e96b6118f793cb160216c250c8988 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 21 Jun 2023 07:59:20 -0400 Subject: [PATCH 11/96] Codespell typo --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index d3bc789d..d2c87000 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1677,7 +1677,7 @@ def _add_slide_coordinate_metadata( Whether spatial locations are preserved between the source image and the segmentation. is_tiled: bool - Whether the souce image is a tiled image. + Whether the source image is a tiled image. """ plane_position_names = self.DimensionIndexSequence.get_index_keywords() From 4440f6419ec8e57d6066d52d7939a8d707620981 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 22 Jun 2023 14:28:17 -0400 Subject: [PATCH 12/96] Apply suggestions from code review Co-authored-by: Markus D. Herrmann --- src/highdicom/seg/sop.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index d2c87000..39eb0c00 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1598,7 +1598,7 @@ def _get_pixel_measures( has_ref_frame_uid: bool, is_multiframe: bool, ) -> Optional[PixelMeasuresSequence]: - """Get a pixel measures sequences from the source image. + """Get a Pixel Measures Sequence from the source image. This is a helper method used in the constructor. @@ -1942,7 +1942,7 @@ def _get_segment_array( segmentation_type: SegmentationTypeValues, max_fractional_value: int ) -> np.ndarray: - """Get segmentation array for a specific segment. + """Get pixel data array for a specific segment. This is a helper method used during the constructor. @@ -2014,7 +2014,7 @@ def _get_dimension_index_values( has_ref_frame_uid: bool, coordinate_system: Optional[CoordinateSystemNames], ) -> List[int]: - """Get dimension index values for a frame. + """Get Dimension Index Values for a frame. Parameters ---------- @@ -2093,7 +2093,7 @@ def _get_pffg_item( has_ref_frame_uid: bool, coordinate_system: Optional[CoordinateSystemNames], ) -> Dataset: - """Get a single item of the PerFrameFunctionalGroupsSequence. + """Get a single item of the Per Frame Functional Groups Sequence. This is a helper method used in the constructor. @@ -2121,7 +2121,7 @@ def _get_pffg_item( ------- pydicom.Dataset Dataset representing the item of the - PerFrameFunctionalGroupsSequence for this segmentation frame. + Per Frame Functional Groups Sequence for this segmentation frame. """ pffg_item = Dataset() From 5f66eb352afc05de0b69e3f7dec2ed65a7ff8b9e Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 18:44:52 -0400 Subject: [PATCH 13/96] Simplification of _get_pixel_measures --- src/highdicom/seg/sop.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 39eb0c00..e0dc2abc 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1288,7 +1288,6 @@ def __init__( if pixel_measures is None: pixel_measures = self._get_pixel_measures( source_image=src_img, - has_ref_frame_uid=has_ref_frame_uid, is_multiframe=is_multiframe, ) @@ -1595,7 +1594,6 @@ def _check_segment_numbers(described_segment_numbers: np.ndarray): @staticmethod def _get_pixel_measures( source_image: Dataset, - has_ref_frame_uid: bool, is_multiframe: bool, ) -> Optional[PixelMeasuresSequence]: """Get a Pixel Measures Sequence from the source image. @@ -1606,8 +1604,6 @@ def _get_pixel_measures( ---------- source_image: pydicom.Dataset The first source image. - has_ref_frame_uid: bool - Whether the source image has a frame of reference uid. is_multiframe: bool Whether the source image is multiframe. @@ -1622,7 +1618,7 @@ def _get_pixel_measures( src_shared_fg = source_image.SharedFunctionalGroupsSequence[0] pixel_measures = src_shared_fg.PixelMeasuresSequence else: - if has_ref_frame_uid: + if hasattr(source_image, 'FrameOfReferenceUID'): pixel_measures = PixelMeasuresSequence( pixel_spacing=source_image.PixelSpacing, slice_thickness=source_image.SliceThickness, From ce85cbd38dabeb5f3ac6dd5c8d8b7d234178cf9a Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 18:49:22 -0400 Subject: [PATCH 14/96] Add missing Returns section to docstring --- src/highdicom/seg/sop.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index e0dc2abc..04f953bb 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1955,6 +1955,13 @@ def _get_segment_array( max_fractional_value: int Value for scaling FRACTIONAL segmentations. + Returns + ------- + numpy.ndarray: + Pixel data array consisting of pixel data for a single segment for + all planes. Output array has dtype np.uint8 and binary values (0 or + 1). + """ if pixel_array.dtype in (np.float_, np.float32, np.float64): # Based on the previous checks and casting, if we get here the From 53a23922b36687d70086a669de3e18702c21c0c3 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 19:06:26 -0400 Subject: [PATCH 15/96] Remove has_ref_frame_uid from _get_dimension_index_values --- src/highdicom/seg/sop.py | 102 +++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 04f953bb..32d4c62b 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1481,13 +1481,15 @@ def __init__( # Get the item of the PerFrameFunctionalGroupsSequence for this # segmentation frame - index_values = self._get_dimension_index_values( - plane_index=plane_index, - dimension_position_values=dimension_position_values, - plane_position_values=plane_position_values, - has_ref_frame_uid=has_ref_frame_uid, - coordinate_system=self._coordinate_system, - ) + if has_ref_frame_uid: + index_values = self._get_dimension_index_values( + plane_index=plane_index, + dimension_position_values=dimension_position_values, + plane_position_values=plane_position_values, + coordinate_system=self._coordinate_system, + ) + else: + index_values = [] pffg_item = self._get_pffg_item( segment_number=segment_number, index_values=index_values, @@ -2014,7 +2016,6 @@ def _get_dimension_index_values( plane_index: int, dimension_position_values: List[np.ndarray], plane_position_values: np.ndarray, - has_ref_frame_uid: bool, coordinate_system: Optional[CoordinateSystemNames], ) -> List[int]: """Get Dimension Index Values for a frame. @@ -2027,8 +2028,6 @@ def _get_dimension_index_values( Relative locations of each plane. plane_position_values: numpy.ndarray Plane positions of each plane. - has_ref_frame_uid: bool - Whether the source images have a frame of reference. coordinate_system: Optional[highdicom.CoordinateSystemNames] The type of coordinate system used (if any). @@ -2039,49 +2038,46 @@ def _get_dimension_index_values( given plane. """ - if not has_ref_frame_uid: - index_values = [] - else: - # Look up the position of the plane relative to the indexed - # dimension. - try: - if ( - coordinate_system == - CoordinateSystemNames.SLIDE - ): - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[plane_index] - ) - ] - else: - # In case of the patient coordinate system, the - # value of the attribute the Dimension Index - # Sequence points to (Image Position Patient) has a - # value multiplicity greater than one. - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos).all( - axis=1 - ) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[plane_index] - ) - ] - except IndexError as error: - raise IndexError( - 'Could not determine position of plane #{} in ' - 'three dimensional coordinate system based on ' - 'dimension index values: {}'.format(plane_index, error) - ) + # Look up the position of the plane relative to the indexed + # dimension. + try: + if ( + coordinate_system == + CoordinateSystemNames.SLIDE + ): + index_values = [ + int( + np.where( + (dimension_position_values[idx] == pos) + )[0][0] + 1 + ) + for idx, pos in enumerate( + plane_position_values[plane_index] + ) + ] + else: + # In case of the patient coordinate system, the + # value of the attribute the Dimension Index + # Sequence points to (Image Position Patient) has a + # value multiplicity greater than one. + index_values = [ + int( + np.where( + (dimension_position_values[idx] == pos).all( + axis=1 + ) + )[0][0] + 1 + ) + for idx, pos in enumerate( + plane_position_values[plane_index] + ) + ] + except IndexError as error: + raise IndexError( + 'Could not determine position of plane #{} in ' + 'three dimensional coordinate system based on ' + 'dimension index values: {}'.format(plane_index, error) + ) return index_values From 10340a8c8fa3c1d61ed6429110d9a8368d59e2b7 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 19:12:41 -0400 Subject: [PATCH 16/96] rename index_values -> dimension_index_values --- src/highdicom/seg/sop.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 32d4c62b..f565b4d7 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1482,17 +1482,17 @@ def __init__( # Get the item of the PerFrameFunctionalGroupsSequence for this # segmentation frame if has_ref_frame_uid: - index_values = self._get_dimension_index_values( + dimension_index_values = self._get_dimension_index_values( plane_index=plane_index, dimension_position_values=dimension_position_values, plane_position_values=plane_position_values, coordinate_system=self._coordinate_system, ) else: - index_values = [] + dimension_index_values = [] pffg_item = self._get_pffg_item( segment_number=segment_number, - index_values=index_values, + dimension_index_values=dimension_index_values, plane_position=plane_positions[plane_index], source_images=source_images, source_image_index=source_image_index, @@ -2033,7 +2033,7 @@ def _get_dimension_index_values( Returns ------- - index_values: List[int] + dimension_index_values: List[int] The dimension index values (except the segment number) for the given plane. @@ -2084,7 +2084,7 @@ def _get_dimension_index_values( @staticmethod def _get_pffg_item( segment_number: int, - index_values: List[int], + dimension_index_values: List[int], plane_position: PlanePositionSequence, source_images: List[Dataset], source_image_index: int, @@ -2100,7 +2100,7 @@ def _get_pffg_item( ---------- segment_number: int Segment number of this segmentation frame. - index_values: List[int] + dimension_index_values: List[int] Dimension index values (except segment number) for this frame. plane_position: highdicom.seg.PlanePositionSequence Plane position of this frame. From 0881c322bdb315e53cd49d90e3f08d55193ed499 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 19:15:35 -0400 Subject: [PATCH 17/96] Fix missing variable name change --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index f565b4d7..6e52e616 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -2127,7 +2127,7 @@ def _get_pffg_item( frame_content_item = Dataset() frame_content_item.DimensionIndexValues = ( - [int(segment_number)] + index_values + [int(segment_number)] + dimension_index_values ) pffg_item.FrameContentSequence = [frame_content_item] if has_ref_frame_uid: From 17741993f4727d39e8989c37e148915efff5ea79 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 6 Jul 2023 06:43:11 -0400 Subject: [PATCH 18/96] Add further explanation to _get_dimension_index_values docstring --- src/highdicom/seg/sop.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 6e52e616..f66b29de 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -2020,6 +2020,11 @@ def _get_dimension_index_values( ) -> List[int]: """Get Dimension Index Values for a frame. + The Dimension Index Values are a list of integer indices that describe + the position of a frame as indices along each of the dimensions of + the Dimension Index Sequence. See + :class:`highdicom.seg.DimensionIndexSequence`. + Parameters ---------- plane_index: int From 5cbd9bf5a9b9b7d8e849da57c509a178d2cfffe9 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 6 Jul 2023 09:16:20 -0400 Subject: [PATCH 19/96] Simplify _omit_empty_frames to find indices only --- src/highdicom/seg/sop.py | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index f66b29de..d9b55649 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1404,8 +1404,8 @@ def __init__( # Remove empty slices if omit_empty_frames: - plane_positions, source_image_indices, is_empty = \ - self._omit_empty_frames(pixel_array, plane_positions) + source_image_indices, is_empty = \ + self._get_nonempty_frame_indices(pixel_array) if is_empty: # Cannot omit empty frames when all frames are empty omit_empty_frames = False @@ -1493,7 +1493,7 @@ def __init__( pffg_item = self._get_pffg_item( segment_number=segment_number, dimension_index_values=dimension_index_values, - plane_position=plane_positions[plane_index], + plane_position=plane_positions[source_image_index], source_images=source_images, source_image_index=source_image_index, are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 @@ -1884,27 +1884,23 @@ def _check_and_cast_pixel_array( return pixel_array, segments_overlap @staticmethod - def _omit_empty_frames( - pixel_array: np.ndarray, - plane_positions: Sequence[Optional[PlanePositionSequence]] - ) -> Tuple[List[Optional[PlanePositionSequence]], List[int], bool]: - """Remove empty frames from the plane positions. + def _get_nonempty_frame_indices( + pixel_array: np.ndarray + ) -> Tuple[List[int], bool]: + """Get a list of all indices of original frames that are non-empty. Empty frames (without any positive pixels) do not need to be included - in the segmentation image. This method updates the plane positions such - that the empty frames are omitted. + in the segmentation image. This method finds a list of indices of + the input frames that are non-empty, and therefore should be included + in the segmentation image. Parameters ---------- pixel_array: numpy.ndarray Segmentation pixel array - plane_positions: Sequence[Optional[highdicom.PlanePositionSequence]] - Plane positions for each of the frames Returns ------- - plane_positions: List[Optional[highdicom.PlanePositionSequence]] - Plane positions with entries corresponding to empty frames removed. source_image_indices: List[int] List giving for each frame in the output pixel array the index of the corresponding frame in the original pixel array @@ -1913,24 +1909,21 @@ def _omit_empty_frames( be omitted. """ - non_empty_plane_positions = [] - # This list tracks which source image each non-empty frame came from source_image_indices = [] - for i, (frm, pos) in enumerate(zip(pixel_array, plane_positions)): + for i, frm in enumerate(pixel_array): if np.any(frm): - non_empty_plane_positions.append(pos) source_image_indices.append(i) - if len(non_empty_plane_positions) == 0: + if len(source_image_indices) == 0: logger.warning( 'Encoding an empty segmentation with "omit_empty_frames" ' 'set to True. Reverting to encoding all frames since omitting ' 'all frames is not possible.' ) - return (plane_positions, list(range(len(plane_positions))), True) + return (list(range(pixel_array.shape[0])), True) - return (non_empty_plane_positions, source_image_indices, False) + return (source_image_indices, False) @staticmethod def _get_segment_array( From 37ad160d0b0accbc174d4d35f69def2b21c6c39e Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 6 Jul 2023 17:42:16 -0400 Subject: [PATCH 20/96] Rewrite as list comprehension --- src/highdicom/seg/sop.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index d9b55649..98cb13a4 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1910,10 +1910,10 @@ def _get_nonempty_frame_indices( """ # This list tracks which source image each non-empty frame came from - source_image_indices = [] - for i, frm in enumerate(pixel_array): - if np.any(frm): - source_image_indices.append(i) + source_image_indices = [ + i for i, frm in enumerate(pixel_array) + if np.any(frm) + ] if len(source_image_indices) == 0: logger.warning( From 27eac18cf2fac720adc90caeb17bbe7bc32e1fb0 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Fri, 7 Jul 2023 08:10:54 -0400 Subject: [PATCH 21/96] remove unnecessary flattens to save memory --- src/highdicom/seg/sop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index d9b55649..aef93f2c 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -2228,9 +2228,9 @@ def _encode_pixels(self, planes: np.ndarray) -> bytes: else: # The array may represent more than one frame item. if self.SegmentationType == SegmentationTypeValues.BINARY.value: - return pack_bits(planes.flatten()) + return pack_bits(planes) else: - return planes.flatten().tobytes() + return planes.tobytes() @classmethod def from_dataset( From 287047e961523db67c2f8aef7d09748eb1f86230 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sat, 8 Jul 2023 11:01:04 -0400 Subject: [PATCH 22/96] Tidy up of dimension indexing code --- src/highdicom/seg/content.py | 6 +- src/highdicom/seg/sop.py | 211 +++++++++++++++++++---------------- 2 files changed, 119 insertions(+), 98 deletions(-) diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index c5f4d078..2611d145 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -613,7 +613,11 @@ def get_index_values( Returns ------- dimension_index_values: numpy.ndarray - 2D array of dimension index values + Array of dimension index values. The first dimension corresponds + to the items in the input plane_positions sequence. The second + dimension corresponds to the dimensions of the dimension index. + The third dimension (if any) corresponds to the multiplicity + of the values, and is omitted if this is 1 for all dimensions. plane_indices: numpy.ndarray 1D array of planes indices for sorting frames according to their spatial position specified by the dimension index diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index db6dcea8..5f355106 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1380,6 +1380,14 @@ def __init__( plane_orientation == source_plane_orientation ) + # plane_position_values is an array giving, for each plane of the + # input array, the raw values of all attributes that describe its + # position. The first dimension is sorted the same way as the input + # pixel array and the the second is sorted the same way as the + # dimension index sequence (without segment number) + # plane_sort_index is a list of indices into the input planes + # giving the order in which they should be arranged to correctly + # sort them for inclusion into the segmentation plane_position_values, plane_sort_index = \ self.DimensionIndexSequence.get_index_values(plane_positions) else: @@ -1389,49 +1397,54 @@ def __init__( plane_sort_index = np.array([0]) are_spatial_locations_preserved = True - if ( - has_ref_frame_uid and - self._coordinate_system == CoordinateSystemNames.SLIDE - ): - self._add_slide_coordinate_metadata( - source_image=src_img, - plane_orientation=plane_orientation, - plane_position_values=plane_position_values, - pixel_measures=pixel_measures, - are_spatial_locations_preserved=are_spatial_locations_preserved, - is_tiled=is_tiled, - ) - - # Remove empty slices + # Find indices such that empty planes are removed if omit_empty_frames: - source_image_indices, is_empty = \ - self._get_nonempty_frame_indices(pixel_array) + included_plane_indices, is_empty = \ + self._get_nonempty_plane_indices(pixel_array) if is_empty: # Cannot omit empty frames when all frames are empty omit_empty_frames = False + included_plane_indices = list(range(pixel_array.shape[0])) + else: + # Remove all empty plane positions from the list of sorted + # plane position indices + included_plane_indices_set = set(included_plane_indices) + plane_sort_index = [ + ind for ind in plane_sort_index + if ind in included_plane_indices_set + ] else: - source_image_indices = list(range(pixel_array.shape[0])) + included_plane_indices = list(range(pixel_array.shape[0])) if has_ref_frame_uid: - plane_position_values = plane_position_values[source_image_indices] - _, plane_sort_index = np.unique( - plane_position_values, - axis=0, - return_index=True - ) - # Get unique values of attributes in the Plane Position Sequence or # Plane Position Slide Sequence, which define the position of the # plane with respect to the three dimensional patient or slide # coordinate system, respectively. These can subsequently be used # to look up the relative position of a plane relative to the # indexed dimension. - dimension_position_values = [ - np.unique(plane_position_values[:, index], axis=0) + unique_dimension_values = [ + np.unique( + plane_position_values[included_plane_indices, index], + axis=0 + ) for index in range(plane_position_values.shape[1]) ] else: - dimension_position_values = [None] + unique_dimension_values = [None] + + if ( + has_ref_frame_uid and + self._coordinate_system == CoordinateSystemNames.SLIDE + ): + self._add_slide_coordinate_metadata( + source_image=src_img, + plane_orientation=plane_orientation, + plane_position_values=plane_position_values, + pixel_measures=pixel_measures, + are_spatial_locations_preserved=are_spatial_locations_preserved, + is_tiled=is_tiled, + ) is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated @@ -1460,15 +1473,12 @@ def __init__( ) for plane_index in plane_sort_index: - # Index of this frame in the original list of source frames - source_image_index = source_image_indices[plane_index] - # Even though completely empty slices were removed earlier, # there may still be slices in which this specific segment is # absent. Such frames should be removed if ( omit_empty_frames and not - np.any(segment_array[source_image_index]) + np.any(segment_array[plane_index]) ): logger.debug( f'skip empty plane {plane_index} of segment ' @@ -1482,20 +1492,29 @@ def __init__( # Get the item of the PerFrameFunctionalGroupsSequence for this # segmentation frame if has_ref_frame_uid: - dimension_index_values = self._get_dimension_index_values( - plane_index=plane_index, - dimension_position_values=dimension_position_values, - plane_position_values=plane_position_values, - coordinate_system=self._coordinate_system, - ) + plane_pos_val = plane_position_values[plane_index] + try: + dimension_index_values = ( + self._get_dimension_index_values( + unique_dimension_values=unique_dimension_values, + plane_position_value=plane_pos_val, + coordinate_system=self._coordinate_system, + ) + ) + except IndexError as error: + raise IndexError( + 'Could not determine position of plane ' + f'#{plane_index} in three dimensional coordinate ' + f'system based on dimension index values: {error}' + ) else: dimension_index_values = [] pffg_item = self._get_pffg_item( segment_number=segment_number, dimension_index_values=dimension_index_values, - plane_position=plane_positions[source_image_index], + plane_position=plane_positions[plane_index], source_images=source_images, - source_image_index=source_image_index, + source_image_index=plane_index, are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 has_ref_frame_uid=has_ref_frame_uid, coordinate_system=self._coordinate_system, @@ -1506,11 +1525,11 @@ def __init__( if is_encaps: # Encode this frame and add to the list for encapsulation # at the end - full_frames_list.append(segment_array[source_image_index]) + full_frames_list.append(segment_array[plane_index]) else: # Concatenate the 1D array for re-encoding at the end full_frames_list.append( - segment_array[source_image_index].flatten() + segment_array[plane_index].flatten() ) self.PerFrameFunctionalGroupsSequence = pffg_sequence @@ -1884,15 +1903,15 @@ def _check_and_cast_pixel_array( return pixel_array, segments_overlap @staticmethod - def _get_nonempty_frame_indices( + def _get_nonempty_plane_indices( pixel_array: np.ndarray ) -> Tuple[List[int], bool]: - """Get a list of all indices of original frames that are non-empty. + """Get a list of all indices of original planes that are non-empty. - Empty frames (without any positive pixels) do not need to be included - in the segmentation image. This method finds a list of indices of - the input frames that are non-empty, and therefore should be included - in the segmentation image. + Empty planes (without any positive pixels in any of the segments) do + not need to be included in the segmentation image. This method finds a + list of indices of the input frames that are non-empty, and therefore + should be included in the segmentation image. Parameters ---------- @@ -1901,9 +1920,10 @@ def _get_nonempty_frame_indices( Returns ------- - source_image_indices: List[int] - List giving for each frame in the output pixel array the index of - the corresponding frame in the original pixel array + included_plane_indices : List[int] + List giving for each plane position in the resulting segmentation + image the index of the corresponding frame in the original pixel + array. is_empty: bool Whether the entire image is empty. If so, empty frames should not be omitted. @@ -2006,9 +2026,8 @@ def _get_segment_array( @staticmethod def _get_dimension_index_values( - plane_index: int, - dimension_position_values: List[np.ndarray], - plane_position_values: np.ndarray, + unique_dimension_values: List[np.ndarray], + plane_position_value: np.ndarray, coordinate_system: Optional[CoordinateSystemNames], ) -> List[int]: """Get Dimension Index Values for a frame. @@ -2020,12 +2039,21 @@ def _get_dimension_index_values( Parameters ---------- - plane_index: int - Index of the plane in the sorted planes list. - dimension_position_values: List[numpy.ndarray] - Relative locations of each plane. - plane_position_values: numpy.ndarray - Plane positions of each plane. + unique_dimension_values: List[numpy.ndarray] + List of arrays containing, for each dimension in the dimension + index sequence (except ReferencedSegment), the sorted unique + values of all planes along that dimension. Each array in the list + corresponds to one dimension, and has shape (N x m) where N is the + number of unique values for that dimension and m is the + multiplicity of values for that dimension. + plane_position_value: numpy.ndarray + Plane position of the plane. This is a 1D or 2D array containing + each of the raw values for this plane of the attributes listed as + dimension index pointers (except ReferencedSegment). For dimension + indices where the value multiplicity of all attributes is 1, the + array will be 1D. If the value multiplicity of attributes is + greater than 1, these values are stacked along the second + dimension. coordinate_system: Optional[highdicom.CoordinateSystemNames] The type of coordinate system used (if any). @@ -2038,44 +2066,33 @@ def _get_dimension_index_values( """ # Look up the position of the plane relative to the indexed # dimension. - try: - if ( - coordinate_system == - CoordinateSystemNames.SLIDE - ): - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[plane_index] - ) - ] - else: - # In case of the patient coordinate system, the - # value of the attribute the Dimension Index - # Sequence points to (Image Position Patient) has a - # value multiplicity greater than one. - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos).all( - axis=1 - ) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[plane_index] - ) - ] - except IndexError as error: - raise IndexError( - 'Could not determine position of plane #{} in ' - 'three dimensional coordinate system based on ' - 'dimension index values: {}'.format(plane_index, error) - ) + if ( + coordinate_system == + CoordinateSystemNames.SLIDE + ): + index_values = [ + int( + np.where( + (unique_dimension_values[idx] == pos) + )[0][0] + 1 + ) + for idx, pos in enumerate(plane_position_value) + ] + else: + # In case of the patient coordinate system, the + # value of the attribute the Dimension Index + # Sequence points to (Image Position Patient) has a + # value multiplicity greater than one. + index_values = [ + int( + np.where( + (unique_dimension_values[idx] == pos).all( + axis=1 + ) + )[0][0] + 1 + ) + for idx, pos in enumerate(plane_position_value) + ] return index_values From c0d9f72b0855052e6e9c7463ef9eeb86def8f868 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sat, 8 Jul 2023 11:27:33 -0400 Subject: [PATCH 23/96] Add further docstring comment for _get_segment_array --- src/highdicom/seg/sop.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 5f355106..0894a2e7 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1955,7 +1955,9 @@ def _get_segment_array( ) -> np.ndarray: """Get pixel data array for a specific segment. - This is a helper method used during the constructor. + This is a helper method used during the constructor. Note that the + pixel array is expected to have been processed using the + _check_and_cast_pixel_array method before being passed to this method. Parameters ---------- From 30bb5f79436d3be729c6fa3f69b186b3ee5bb8c7 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 26 Jul 2023 20:48:33 -0400 Subject: [PATCH 24/96] Update src/highdicom/seg/sop.py Co-authored-by: Markus D. Herrmann --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 0894a2e7..daa7c608 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1630,7 +1630,7 @@ def _get_pixel_measures( Returns ------- - Optional[highdicom.PixelMeasuresSequence] + Union[highdicom.PixelMeasuresSequence, None] A PixelMeasuresSequence derived from the source image, if this is possible. Otherwise None. From f7878d527f017297c74a96f23641d8ccbada3ee4 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 26 Jul 2023 20:50:37 -0400 Subject: [PATCH 25/96] Apply suggestions from code review Co-authored-by: Markus D. Herrmann --- src/highdicom/seg/sop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index daa7c608..38ce579a 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1791,7 +1791,7 @@ def _check_and_cast_pixel_array( ---------- pixel_array: numpy.ndarray The segmentation pixel array. - number_of_segments: int, + number_of_segments: int The segment numbers from the segment descriptions, in the order they were passed. 1D array of integers. segmentation_type: highdicom.seg.SegmentationTypeValues @@ -1957,12 +1957,12 @@ def _get_segment_array( This is a helper method used during the constructor. Note that the pixel array is expected to have been processed using the - _check_and_cast_pixel_array method before being passed to this method. + ``_check_and_cast_pixel_array`` method before being passed to this method. Parameters ---------- pixel_array: numpy.ndarray - Full segmentation array containing all segments. + Full segmentation pixel array containing all segments. segment_number: int The segment of interest. number_of_segments: int From 9092a000b3c1518b3c04c98b07d3b6682fb9a889 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 26 Jul 2023 20:55:14 -0400 Subject: [PATCH 26/96] Rename methods, fix lints --- src/highdicom/seg/sop.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 38ce579a..bd6c8801 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1286,7 +1286,7 @@ def __init__( # Multi-Frame Functional Groups and Multi-Frame Dimensions sffg_item = Dataset() if pixel_measures is None: - pixel_measures = self._get_pixel_measures( + pixel_measures = self._get_pixel_measures_sequence( source_image=src_img, is_multiframe=is_multiframe, ) @@ -1464,7 +1464,7 @@ def __init__( for segment_number in described_segment_numbers: # Pixel array for just this segment - segment_array = self._get_segment_array( + segment_array = self._get_segment_pixel_array( pixel_array, segment_number=segment_number, number_of_segments=number_of_segments, @@ -1613,7 +1613,7 @@ def _check_segment_numbers(described_segment_numbers: np.ndarray): ) @staticmethod - def _get_pixel_measures( + def _get_pixel_measures_sequence( source_image: Dataset, is_multiframe: bool, ) -> Optional[PixelMeasuresSequence]: @@ -1946,7 +1946,7 @@ def _get_nonempty_plane_indices( return (source_image_indices, False) @staticmethod - def _get_segment_array( + def _get_segment_pixel_array( pixel_array: np.ndarray, segment_number: int, number_of_segments: int, @@ -1957,7 +1957,8 @@ def _get_segment_array( This is a helper method used during the constructor. Note that the pixel array is expected to have been processed using the - ``_check_and_cast_pixel_array`` method before being passed to this method. + ``_check_and_cast_pixel_array`` method before being passed to this + method. Parameters ---------- From 378c7406979516ca8d5b344935d9f5700d4f47c9 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 16:15:47 +0100 Subject: [PATCH 27/96] Add multithreading option to segmentation creation --- src/highdicom/seg/sop.py | 131 ++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 50 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index bd6c8801..839994bc 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1,6 +1,7 @@ """Module for SOP classes of the SEG modality.""" import logging from collections import defaultdict +from concurrent.futures import Future, ProcessPoolExecutor from contextlib import contextmanager from copy import deepcopy from os import PathLike @@ -20,6 +21,7 @@ Union, cast, ) +import warnings import numpy as np from pydicom.dataset import Dataset @@ -886,6 +888,7 @@ def __init__( content_creator_identification: Optional[ ContentCreatorIdentificationCodeSequence ] = None, + workers: int = 0, **kwargs: Any ) -> None: """ @@ -1038,6 +1041,11 @@ def __init__( content_creator_identification: Union[highdicom.ContentCreatorIdentificationCodeSequence, None], optional Identifying information for the person who created the content of this segmentation. + workers: int, optional + Number of worker processes to use for frame compression. If 0, no + workers are used and compression is performed in the main process + (this is the default behavior). If negative, as many processes are + created as the machine has processors. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1447,13 +1455,45 @@ def __init__( ) is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated + process_pool: Optional[ProcessPoolExecutor] = None - # In the case of encapsulated transfer syntaxes, we will accumulate - # a list of encoded frames to encapsulate at the end - # In the case of non-encapsulated (uncompressed) transfer syntaxes - # we will accumulate a list of flattened pixels from all frames for - # bitpacking at the end - full_frames_list: List[np.ndarray] = [] + if is_encaps: + if workers == 0: + # In the case of encapsulated transfer syntaxes with no + # workers, we will accumulate a list of encoded frames to + # encapsulate at the end + compressed_frames_list: List[bytes] = [] + else: + # In the case of encapsulated transfer syntaxes with multiple + # workers, we will create a process pool and accumulate a list + # of encoded frames to encapsulate at the end + frame_futures_list: List[Future] = [] + + # If workers is negative, pass None to use all processors + process_pool = ProcessPoolExecutor( + workers if workers > 0 else None + ) + + # Parameters to use when calling the encode_frame function in + # either of the above two cases + encode_frame_kwargs = dict( + transfer_syntax_uid=self.file_meta.TransferSyntaxUID, + bits_allocated=self.BitsAllocated, + bits_stored=self.BitsStored, + photometric_interpretation=self.PhotometricInterpretation, + pixel_representation=self.PixelRepresentation + ) + else: + # In the case of non-encapsulated (uncompressed) transfer syntaxes + # we will accumulate a list of flattened pixels from all frames for + # bitpacking at the end + full_frames_list: List[np.ndarray] = [] + if workers != 0: + warnings.warn( + "Setting workers != 0 when using a non-encapsulated " + "transfer syntax has no effect.", + UserWarning + ) # Information about individual frames is placed into the # PerFrameFunctionalGroupsSequence. Note that a *very* significant @@ -1523,11 +1563,26 @@ def __init__( # Add the segmentation pixel array for this frame to the list if is_encaps: - # Encode this frame and add to the list for encapsulation - # at the end - full_frames_list.append(segment_array[plane_index]) + if process_pool is None: + # Encode this frame and add resulting bytes to the list + # for encapsulation at the end + compressed_frames_list.append( + encode_frame( + segment_array[plane_index], + **encode_frame_kwargs, + ) + ) + else: + # Submit this frame for encoding this frame and add the + # future to the list for encapsulation at the end + future = process_pool.submit( + encode_frame, + array=segment_array[plane_index].copy(), + **encode_frame_kwargs, + ) + frame_futures_list.append(future) else: - # Concatenate the 1D array for re-encoding at the end + # Concatenate the 1D array for encoding at the end full_frames_list.append( segment_array[plane_index].flatten() ) @@ -1536,16 +1591,19 @@ def __init__( self.NumberOfFrames = len(pffg_sequence) if is_encaps: + if process_pool is not None: + compressed_frames_list = [ + fut.result() for fut in frame_futures_list + ] + process_pool.shutdown() + # Encapsulate all pre-compressed frames - compressed_frames = [ - self._encode_pixels(frm) for frm in full_frames_list - ] - self.PixelData = encapsulate(compressed_frames) + self.PixelData = encapsulate(compressed_frames_list) else: # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 - self.PixelData = self._encode_pixels( + self.PixelData = self._encode_pixels_native( np.concatenate(full_frames_list) ) @@ -2204,53 +2262,26 @@ def _get_pffg_item( return pffg_item - def _encode_pixels(self, planes: np.ndarray) -> bytes: - """Encodes pixel planes. + def _encode_pixels_native(self, planes: np.ndarray) -> bytes: + """Encode pixel planes using a native transfer syntax. Parameters ---------- planes: numpy.ndarray - Array representing one or more segmentation image planes. - For encapsulated transfer syntaxes, only a single frame may be - processed. For other transfer syntaxes, multiple planes in a 3D - array may be processed. + Array representing one or more segmentation image planes. If + multiple image planes, planes stacked down the first dimension + (index 0). Returns ------- bytes Encoded pixels - Raises - ------ - ValueError - If multiple frames are passed when using an encapsulated - transfer syntax. - """ - if self.file_meta.TransferSyntaxUID.is_encapsulated: - # Check that only a single plane was passed - if planes.ndim == 3: - if planes.shape[0] == 1: - planes = planes[0, ...] - else: - raise ValueError( - 'Only single frame can be encoded at at time ' - 'in case of encapsulated format encoding.' - ) - return encode_frame( - planes, - transfer_syntax_uid=self.file_meta.TransferSyntaxUID, - bits_allocated=self.BitsAllocated, - bits_stored=self.BitsStored, - photometric_interpretation=self.PhotometricInterpretation, - pixel_representation=self.PixelRepresentation - ) + if self.SegmentationType == SegmentationTypeValues.BINARY.value: + return pack_bits(planes) else: - # The array may represent more than one frame item. - if self.SegmentationType == SegmentationTypeValues.BINARY.value: - return pack_bits(planes) - else: - return planes.tobytes() + return planes.tobytes() @classmethod def from_dataset( From be6963bc2035327320df25f54cfdbc82e72da9ef Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 16:23:50 +0100 Subject: [PATCH 28/96] Remove array copy, seems to be unnecessary --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 839994bc..79d5d884 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1577,7 +1577,7 @@ def __init__( # future to the list for encapsulation at the end future = process_pool.submit( encode_frame, - array=segment_array[plane_index].copy(), + array=segment_array[plane_index], **encode_frame_kwargs, ) frame_futures_list.append(future) From 808af48384ded81ebefb489f4964d52242e5f6c2 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 16:34:25 +0100 Subject: [PATCH 29/96] Add test --- tests/test_seg.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_seg.py b/tests/test_seg.py index c24c04b4..64774f1b 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1407,6 +1407,25 @@ def test_construction_7(self): assert SegmentsOverlapValues[instance.SegmentsOverlap] == \ SegmentsOverlapValues.NO + def test_construction_workers(self): + instance = Segmentation( + [self._ct_image], + self._ct_pixel_array, + SegmentationTypeValues.FRACTIONAL.value, + self._segment_descriptions, + self._series_instance_uid, + self._series_number, + self._sop_instance_uid, + self._instance_number, + self._manufacturer, + self._manufacturer_model_name, + self._software_versions, + self._device_serial_number, + content_label=self._content_label, + transfer_syntax_uid=RLELossless, + workers=2, + ) + def test_pixel_types_fractional( self, fractional_transfer_syntax_uid, From 9934170cd7373e44cc20fd37dceec99694fc0d76 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 16:56:20 +0100 Subject: [PATCH 30/96] Add section to docs on multiprocessing --- docs/seg.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/seg.rst b/docs/seg.rst index ebe73871..63e786e0 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -606,6 +606,21 @@ and 1): - The clear frame boundaries make retrieving individual frames from ``"FRACTIONAL"`` image files possible. +Multiprocessing +--------------- + +When creating large, multiframe ``"FRACTIONAL"`` segmentations using a +compressed transfer syntax, the time taken to compress the frames can become +large and dominate the time taken to create the segmentation. By default, +frames are compressed in series using the main process, however the ``workers`` +parameter allows you to specify a number of additional worker processes that +will be used to compress frames in parallel. Setting ``workers`` to a negative +number uses all available processes on your machine. Note that while this is +likely to result in significantly lower creations times for segmentations with +a very large number of frames, for segmentations with only a few frames the +additional overhead of spawning processes may in fact slow the entire +segmentation creation process down. + Geometry of SEG Images ---------------------- From 08a82ba960bfe95ba6a48ad864b766ed7dcfe101 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 5 Jul 2023 17:25:26 +0100 Subject: [PATCH 31/96] Retrieve one segment array at a time: simpler and slightly faster --- src/highdicom/seg/sop.py | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 79d5d884..73732e8e 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1503,22 +1503,23 @@ def __init__( pffg_sequence: List[Dataset] = [] for segment_number in described_segment_numbers: - # Pixel array for just this segment - segment_array = self._get_segment_pixel_array( - pixel_array, - segment_number=segment_number, - number_of_segments=number_of_segments, - segmentation_type=segmentation_type, - max_fractional_value=max_fractional_value, - ) - for plane_index in plane_sort_index: - # Even though completely empty slices were removed earlier, - # there may still be slices in which this specific segment is + + # Pixel array for just this segment and this position + segment_array = self._get_segment_pixel_array( + pixel_array[plane_index], + segment_number=segment_number, + number_of_segments=number_of_segments, + segmentation_type=segmentation_type, + max_fractional_value=max_fractional_value, + ) + + # Even though completely empty planes were removed earlier, + # there may still be planes in which this specific segment is # absent. Such frames should be removed if ( omit_empty_frames and not - np.any(segment_array[plane_index]) + np.any(segment_array) ): logger.debug( f'skip empty plane {plane_index} of segment ' @@ -1568,7 +1569,7 @@ def __init__( # for encapsulation at the end compressed_frames_list.append( encode_frame( - segment_array[plane_index], + segment_array, **encode_frame_kwargs, ) ) @@ -1577,15 +1578,13 @@ def __init__( # future to the list for encapsulation at the end future = process_pool.submit( encode_frame, - array=segment_array[plane_index], + array=segment_array, **encode_frame_kwargs, ) frame_futures_list.append(future) else: # Concatenate the 1D array for encoding at the end - full_frames_list.append( - segment_array[plane_index].flatten() - ) + full_frames_list.append(segment_array.flatten()) self.PerFrameFunctionalGroupsSequence = pffg_sequence self.NumberOfFrames = len(pffg_sequence) @@ -2021,7 +2020,9 @@ def _get_segment_pixel_array( Parameters ---------- pixel_array: numpy.ndarray - Full segmentation pixel array containing all segments. + Full segmentation pixel array containing all segments for a single + plane. Array is therefore either (Rows x Columns x Segments) or + (Rows x Columns) in case of a "label map" style array. segment_number: int The segment of interest. number_of_segments: int @@ -2044,8 +2045,8 @@ def _get_segment_pixel_array( # output is a FRACTIONAL segmentation Floating-point numbers must # be mapped to 8-bit integers in the range [0, # max_fractional_value]. - if pixel_array.ndim == 4: - segment_array = pixel_array[:, :, :, segment_number - 1] + if pixel_array.ndim == 3: + segment_array = pixel_array[:, :, segment_number - 1] else: segment_array = pixel_array segment_array = np.around( @@ -2053,7 +2054,7 @@ def _get_segment_pixel_array( ) segment_array = segment_array.astype(np.uint8) else: - if pixel_array.ndim == 3: + if pixel_array.ndim == 2: # "Label maps" that must be converted to binary masks. if number_of_segments == 1: # We wish to avoid unnecessary comparison or casting @@ -2069,7 +2070,7 @@ def _get_segment_pixel_array( pixel_array == segment_number ).astype(np.uint8) else: - segment_array = pixel_array[:, :, :, segment_number - 1] + segment_array = pixel_array[:, :, segment_number - 1] if segment_array.dtype != np.uint8: segment_array = segment_array.astype(np.uint8) From 47286dd113bfca1d047708574b79b1579765b000 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 6 Jul 2023 09:31:08 -0400 Subject: [PATCH 32/96] Fix docstring for _get_segment_array --- src/highdicom/seg/sop.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 73732e8e..c3d51f91 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -2010,7 +2010,7 @@ def _get_segment_pixel_array( segmentation_type: SegmentationTypeValues, max_fractional_value: int ) -> np.ndarray: - """Get pixel data array for a specific segment. + """Get pixel data array for a specific segment and plane. This is a helper method used during the constructor. Note that the pixel array is expected to have been processed using the @@ -2020,9 +2020,9 @@ def _get_segment_pixel_array( Parameters ---------- pixel_array: numpy.ndarray - Full segmentation pixel array containing all segments for a single - plane. Array is therefore either (Rows x Columns x Segments) or - (Rows x Columns) in case of a "label map" style array. + Segmentation pixel array containing all segments for a single plane. + Array is therefore either (Rows x Columns x Segments) or (Rows x + Columns) in case of a "label map" style array. segment_number: int The segment of interest. number_of_segments: int @@ -2036,8 +2036,8 @@ def _get_segment_pixel_array( ------- numpy.ndarray: Pixel data array consisting of pixel data for a single segment for - all planes. Output array has dtype np.uint8 and binary values (0 or - 1). + a single plane. Output array has dtype np.uint8 and binary values + (0 or 1). """ if pixel_array.dtype in (np.float_, np.float32, np.float64): From 22e9024da2385c10e3977f88762a57c4139bd30c Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sat, 8 Jul 2023 12:42:26 -0400 Subject: [PATCH 33/96] add ability for user to pass their own process pool --- src/highdicom/seg/sop.py | 67 +++++++++++++++++++++++++++++----------- tests/test_seg.py | 27 ++++++++++++++-- 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index c3d51f91..58262cf9 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1,7 +1,7 @@ """Module for SOP classes of the SEG modality.""" import logging from collections import defaultdict -from concurrent.futures import Future, ProcessPoolExecutor +from concurrent.futures import Executor, Future, ProcessPoolExecutor from contextlib import contextmanager from copy import deepcopy from os import PathLike @@ -888,7 +888,7 @@ def __init__( content_creator_identification: Optional[ ContentCreatorIdentificationCodeSequence ] = None, - workers: int = 0, + workers: Union[int, Executor] = 0, **kwargs: Any ) -> None: """ @@ -1041,11 +1041,23 @@ def __init__( content_creator_identification: Union[highdicom.ContentCreatorIdentificationCodeSequence, None], optional Identifying information for the person who created the content of this segmentation. - workers: int, optional + workers: Union[int, concurrent.futures.Executor], optional Number of worker processes to use for frame compression. If 0, no workers are used and compression is performed in the main process (this is the default behavior). If negative, as many processes are created as the machine has processors. + + Alternatively, you may directly pass an instance of a class derived + from ``concurrent.futures.Executor`` (most likely an instance of + ``concurrent.futures.ProcessPoolExecutor``) for highdicom to use. + You may wish to do this either to have greater control over the + setup of the executor, or to avoid the setup cost of spawning new + processes each time this ``__init__`` method is called if your + application creates a large number of Segmentations. + + Note that if you use worker processes, you must ensure that your + main process uses the ``if __name__ == "__main__"`` idiom to guard + against spawned child processes creating further workers. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1455,24 +1467,37 @@ def __init__( ) is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated - process_pool: Optional[ProcessPoolExecutor] = None + process_pool: Optional[Executor] = None + + if not isinstance(workers, (int, Executor)): + raise TypeError( + 'Argument "workers" must be of type int or ' + 'concurrent.futures.Executor (or a derived class).' + ) + using_multiprocessing = ( + isinstance(workers, Executor) or workers != 0 + ) if is_encaps: - if workers == 0: - # In the case of encapsulated transfer syntaxes with no + if using_multiprocessing: + # In the case of encapsulated transfer syntaxes with multiple # workers, we will accumulate a list of encoded frames to # encapsulate at the end - compressed_frames_list: List[bytes] = [] - else: - # In the case of encapsulated transfer syntaxes with multiple - # workers, we will create a process pool and accumulate a list - # of encoded frames to encapsulate at the end frame_futures_list: List[Future] = [] - # If workers is negative, pass None to use all processors - process_pool = ProcessPoolExecutor( - workers if workers > 0 else None - ) + # Use the existing executor or create one + if isinstance(workers, Executor): + process_pool = workers + else: + # If workers is negative, pass None to use all processors + process_pool = ProcessPoolExecutor( + workers if workers > 0 else None + ) + else: + # In the case of encapsulated transfer syntaxes with no + # workers, we will accumulate a list of encoded frames to + # encapsulate at the end + compressed_frames_list: List[bytes] = [] # Parameters to use when calling the encode_frame function in # either of the above two cases @@ -1488,12 +1513,14 @@ def __init__( # we will accumulate a list of flattened pixels from all frames for # bitpacking at the end full_frames_list: List[np.ndarray] = [] - if workers != 0: + if using_multiprocessing: warnings.warn( - "Setting workers != 0 when using a non-encapsulated " + "Setting workers != 0 or passing an instance of " + "concurrent.futures.Executor when using a non-encapsulated " "transfer syntax has no effect.", UserWarning ) + using_multiprocessing = False # Information about individual frames is placed into the # PerFrameFunctionalGroupsSequence. Note that a *very* significant @@ -1594,7 +1621,11 @@ def __init__( compressed_frames_list = [ fut.result() for fut in frame_futures_list ] - process_pool.shutdown() + + # Shutdown the pool if we created it, otherwise it is the + # caller's responsibility + if process_pool is not workers: + process_pool.shutdown() # Encapsulate all pre-compressed frames self.PixelData = encapsulate(compressed_frames_list) diff --git a/tests/test_seg.py b/tests/test_seg.py index 64774f1b..a705ef8e 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1,4 +1,5 @@ from collections import defaultdict +from concurrent.futures import ProcessPoolExecutor from copy import deepcopy import unittest from pathlib import Path @@ -1408,9 +1409,10 @@ def test_construction_7(self): SegmentsOverlapValues.NO def test_construction_workers(self): + # Create a segmentation with multiple workers instance = Segmentation( - [self._ct_image], - self._ct_pixel_array, + self._ct_series, + self._ct_series_mask_array, SegmentationTypeValues.FRACTIONAL.value, self._segment_descriptions, self._series_instance_uid, @@ -1426,6 +1428,27 @@ def test_construction_workers(self): workers=2, ) + def test_construction_workers_manual(self): + # Create a segmentation with multiple workers created manually + with ProcessPoolExecutor(2) as pool: + instance = Segmentation( + self._ct_series, + self._ct_series_mask_array, + SegmentationTypeValues.FRACTIONAL.value, + self._segment_descriptions, + self._series_instance_uid, + self._series_number, + self._sop_instance_uid, + self._instance_number, + self._manufacturer, + self._manufacturer_model_name, + self._software_versions, + self._device_serial_number, + content_label=self._content_label, + transfer_syntax_uid=RLELossless, + workers=pool, + ) + def test_pixel_types_fractional( self, fractional_transfer_syntax_uid, From 2aee2636b2035d244b272944438073e6a5fc97bf Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 26 Jul 2023 23:19:47 -0400 Subject: [PATCH 34/96] Add basic tiled_full option --- src/highdicom/seg/sop.py | 137 +++++++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 14 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 58262cf9..acb99d00 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -51,7 +51,10 @@ PlanePositionSequence, PixelMeasuresSequence ) -from highdicom.enum import CoordinateSystemNames +from highdicom.enum import ( + CoordinateSystemNames, + DimensionOrganizationTypeValues, +) from highdicom.frame import encode_frame from highdicom.seg.content import ( DimensionIndexSequence, @@ -889,6 +892,11 @@ def __init__( ContentCreatorIdentificationCodeSequence ] = None, workers: Union[int, Executor] = 0, + dimension_organization_type: Union[ + DimensionOrganizationTypeValues, + str, + None, + ] = None, **kwargs: Any ) -> None: """ @@ -1058,6 +1066,8 @@ def __init__( Note that if you use worker processes, you must ensure that your main process uses the ``if __name__ == "__main__"`` idiom to guard against spawned child processes creating further workers. + dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None], optional + Dimension organization type to use for the output image. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1417,6 +1427,15 @@ def __init__( plane_sort_index = np.array([0]) are_spatial_locations_preserved = True + # Dimension Organization Type + dimension_organization_type = self._check_dimension_organization_type( + dimension_organization_type=dimension_organization_type, + is_tiled=is_tiled, + are_spatial_locations_preserved=are_spatial_locations_preserved, + ) + if dimension_organization_type is not None: + self.DimensionOrganizationType = dimension_organization_type.value + # Find indices such that empty planes are removed if omit_empty_frames: included_plane_indices, is_empty = \ @@ -1577,17 +1596,23 @@ def __init__( ) else: dimension_index_values = [] - pffg_item = self._get_pffg_item( - segment_number=segment_number, - dimension_index_values=dimension_index_values, - plane_position=plane_positions[plane_index], - source_images=source_images, - source_image_index=plane_index, - are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 - has_ref_frame_uid=has_ref_frame_uid, - coordinate_system=self._coordinate_system, - ) - pffg_sequence.append(pffg_item) + + if ( + dimension_organization_type != + DimensionOrganizationTypeValues.TILED_FULL + ): + # No per-frame functional group for TILED FULL + pffg_item = self._get_pffg_item( + segment_number=segment_number, + dimension_index_values=dimension_index_values, + plane_position=plane_positions[plane_index], + source_images=source_images, + source_image_index=plane_index, + are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 + has_ref_frame_uid=has_ref_frame_uid, + coordinate_system=self._coordinate_system, + ) + pffg_sequence.append(pffg_item) # Add the segmentation pixel array for this frame to the list if is_encaps: @@ -1613,8 +1638,11 @@ def __init__( # Concatenate the 1D array for encoding at the end full_frames_list.append(segment_array.flatten()) - self.PerFrameFunctionalGroupsSequence = pffg_sequence - self.NumberOfFrames = len(pffg_sequence) + if ( + dimension_organization_type != + DimensionOrganizationTypeValues.TILED_FULL + ): + self.PerFrameFunctionalGroupsSequence = pffg_sequence if is_encaps: if process_pool is not None: @@ -1628,11 +1656,13 @@ def __init__( process_pool.shutdown() # Encapsulate all pre-compressed frames + self.NumberOfFrames = len(compressed_frames_list) self.PixelData = encapsulate(compressed_frames_list) else: # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 + self.NumberOfFrames = len(full_frames_list) self.PixelData = self._encode_pixels_native( np.concatenate(full_frames_list) ) @@ -1865,6 +1895,85 @@ def _add_slide_coordinate_metadata( format_number_as_ds(z_center) self.ImageCenterPointCoordinatesSequence = [center_item] + @staticmethod + def _check_dimension_organization_type( + dimension_organization_type: Union[ + DimensionOrganizationTypeValues, + str, + None, + ], + is_tiled: bool, + are_spatial_locations_preserved: bool, + ) -> Optional[DimensionOrganizationTypeValues]: + """Checks that the specified Dimension Organization Type is valid. + + Parameters + ---------- + dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None] + The specified DimensionOrganizationType for the output Segmentation. + is_tiled: bool + Whether the source image is a tiled image. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the source image + and the segmentation pixel array. + + Returns + ------- + Optional[highdicom.enum.DimensionOrganizationTypeValues]: + DimensionOrganizationType to use for the output Segmentation. + + """ # noqa: E501 + if is_tiled and dimension_organization_type is None: + dimension_organization_type = \ + DimensionOrganizationTypeValues.TILED_SPARSE + + if dimension_organization_type is not None: + dimension_organization_type = DimensionOrganizationTypeValues( + dimension_organization_type + ) + tiled_dimension_organization_types = [ + DimensionOrganizationTypeValues.TILED_SPARSE, + DimensionOrganizationTypeValues.TILED_FULL + ] + + if ( + dimension_organization_type in + tiled_dimension_organization_types + ): + if not is_tiled: + raise ValueError( + f"A value of {dimension_organization_type.value} " + 'for parameter "dimension_organization_type" is ' + 'only valid if the source images are tiled.' + ) + + if ( + dimension_organization_type == + DimensionOrganizationTypeValues.TILED_FULL + ): + # Only allow TILED_FULL if the source image is TILED_FULL + # and spatial locations are preserved. This could be + # relaxed in the future by checking the plane positions. + if ( + not hasattr(src_img, 'DimensionOrganizationType') or + src_img.DimensionOrganizationType != 'TILED_FULL' + ): + raise ValueError( + 'A value of "TILED_FULL" for parameter ' + '"dimension_organization_type" is not permitted unless ' + 'the source images also have ' + 'DimensionOrganizationType of "TILED_FULL".' + ) + if not are_spatial_locations_preserved: + raise ValueError( + 'A value of "TILED_FULL" for parameter ' + '"dimension_organization_type" is not permitted if ' + 'the "plane_positions" of the segmentation do not ' + 'match the plane positions of the source image.' + ) + + return dimension_organization_type + @staticmethod def _check_and_cast_pixel_array( pixel_array: np.ndarray, From 29e45bfd6334265b6640711a70024993f208927e Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 27 Jul 2023 08:55:00 -0400 Subject: [PATCH 35/96] Fixes --- src/highdicom/seg/sop.py | 23 ++++++++++++++++++++--- tests/test_seg.py | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index acb99d00..7ec9217a 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1432,6 +1432,8 @@ def __init__( dimension_organization_type=dimension_organization_type, is_tiled=is_tiled, are_spatial_locations_preserved=are_spatial_locations_preserved, + omit_empty_frames=omit_empty_frames, + source_image=src_img, ) if dimension_organization_type is not None: self.DimensionOrganizationType = dimension_organization_type.value @@ -1675,7 +1677,11 @@ def __init__( self.copy_patient_and_study_information(src_img) # Build lookup tables for efficient decoding - self._build_luts() + if ( + dimension_organization_type is not None and + dimension_organization_type.value != "TILED_FULL" + ): + self._build_luts() def add_segments( self, @@ -1904,6 +1910,8 @@ def _check_dimension_organization_type( ], is_tiled: bool, are_spatial_locations_preserved: bool, + omit_empty_frames: bool, + source_image: Dataset, ) -> Optional[DimensionOrganizationTypeValues]: """Checks that the specified Dimension Organization Type is valid. @@ -1916,6 +1924,10 @@ def _check_dimension_organization_type( are_spatial_locations_preserved: bool Whether spatial locations are preserved between the source image and the segmentation pixel array. + omit_empty_frames: bool + Whether it was specified to omit empty frames. + source_image: pydicom.Dataset + Representative dataset of the source images. Returns ------- @@ -1955,8 +1967,8 @@ def _check_dimension_organization_type( # and spatial locations are preserved. This could be # relaxed in the future by checking the plane positions. if ( - not hasattr(src_img, 'DimensionOrganizationType') or - src_img.DimensionOrganizationType != 'TILED_FULL' + not hasattr(source_image, 'DimensionOrganizationType') or + source_image.DimensionOrganizationType != 'TILED_FULL' ): raise ValueError( 'A value of "TILED_FULL" for parameter ' @@ -1971,6 +1983,11 @@ def _check_dimension_organization_type( 'the "plane_positions" of the segmentation do not ' 'match the plane positions of the source image.' ) + if omit_empty_frames: + raise ValueError( + 'Parameter "omit_empty_frames" should be False if ' + 'using "dimension_organization_type" of "TILED_FULL".' + ) return dimension_organization_type diff --git a/tests/test_seg.py b/tests/test_seg.py index a705ef8e..74bc8eee 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1449,6 +1449,25 @@ def test_construction_workers_manual(self): workers=pool, ) + def test_construction_tiled_full(self): + instance = Segmentation( + [self._sm_image], + pixel_array=self._sm_pixel_array, + segmentation_type=SegmentationTypeValues.FRACTIONAL.value, + segment_descriptions=self._segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number, + dimension_organization_type="TILED_FULL", + omit_empty_frames=False, + ) + assert instance.DimensionOrganizationType == "TILED_FULL" + def test_pixel_types_fractional( self, fractional_transfer_syntax_uid, From c1dfdc6d64964a636ac60613e073bfec713eab5a Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 08:11:17 +0200 Subject: [PATCH 36/96] Test for optional parameters --- tests/test_content.py | 143 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 6 deletions(-) diff --git a/tests/test_content.py b/tests/test_content.py index f28568f9..bc95a38c 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1,3 +1,4 @@ +import datetime from unittest import TestCase import pytest @@ -10,6 +11,7 @@ from highdicom.sr import CodedConcept from highdicom import ( + IssuerOfIdentifier, PaletteColorLUT, ContentCreatorIdentificationCodeSequence, ModalityLUT, @@ -31,7 +33,11 @@ VOILUTTransformation, VOILUTFunctionValues, ) -from highdicom.sr.value_types import CodeContentItem, TextContentItem +from highdicom.sr.value_types import ( + CodeContentItem, + TextContentItem, + DateTimeContentItem +) from .utils import write_and_read_dataset @@ -1068,7 +1074,7 @@ def test_construction_processing(self): description = codes.SCT.SpecimenFreezing instance = SpecimenPreparationStep( specimen_id=specimen_id, - processing_procedure=SpecimenProcessing(description=description) + processing_procedure=SpecimenProcessing(description=description), ) seq = instance.SpecimenPreparationStepContentItemSequence assert len(seq) == 3 @@ -1077,6 +1083,8 @@ def test_construction_processing(self): assert instance.specimen_id == specimen_id assert instance.processing_type == processing_type + assert instance.processing_datetime is None + assert instance.issuer_of_specimen_id is None assert instance.fixative is None assert instance.embedding_medium is None @@ -1090,10 +1098,74 @@ def test_construction_processing(self): assert processing_type_item.value == processing_type assert processing_type_item.relationship_type is None - staining_item = seq[2] - assert staining_item.name == codes.DCM.ProcessingStepDescription - assert staining_item.value == description - assert staining_item.relationship_type is None + processing_step_description_item = seq[2] + assert processing_step_description_item.name == codes.DCM.ProcessingStepDescription + assert processing_step_description_item.value == description + assert processing_step_description_item.relationship_type is None + + def test_construction_processing_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + processing_description = 'processing description' + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + + instance = SpecimenPreparationStep( + specimen_id=specimen_id, + processing_procedure=processing_procedure, + processing_description=processing_description, + processing_datetime=processing_datetime, + issuer_of_specimen_id=issuer_of_specimen_id, + fixative=fixative, + embedding_medium=embedding_medium + ) + + seq = instance.SpecimenPreparationStepContentItemSequence + assert len(seq) == 8 + + specimen_id_item = seq[0] + assert specimen_id_item.name == codes.DCM.SpecimenIdentifier + assert specimen_id_item.value == specimen_id + assert specimen_id_item.relationship_type is None + + issuer_of_specimen_id_item = seq[1] + assert issuer_of_specimen_id_item.name == codes.DCM.IssuerOfSpecimenIdentifier + assert issuer_of_specimen_id_item.value == issuer_of_specimen_id.LocalNamespaceEntityID + assert issuer_of_specimen_id_item.relationship_type is None + + processing_type_item = seq[2] + assert processing_type_item.name == codes.DCM.ProcessingType + assert processing_type_item.value == processing_type + assert processing_type_item.relationship_type is None + + processing_datetime_item = seq[3] + assert processing_datetime_item.name == codes.DCM.DatetimeOfProcessing + assert processing_datetime_item.value == processing_datetime + assert processing_datetime_item.relationship_type is None + + processing_description_item = seq[4] + assert processing_description_item.name == codes.DCM.ProcessingStepDescription + assert processing_description_item.value == processing_description + assert processing_description_item.relationship_type is None + + collection_step_item = seq[5] + assert collection_step_item.name == codes.SCT.SpecimenCollection + assert collection_step_item.value == procedure + assert collection_step_item.relationship_type is None + + fixative_item = seq[6] + assert fixative_item.name == codes.SCT.TissueFixative + assert fixative_item.value == fixative + assert fixative_item.relationship_type is None + + embedding_item = seq[7] + assert embedding_item.name == codes.SCT.TissueEmbeddingMedium + assert embedding_item.value == embedding_medium + assert embedding_item.relationship_type is None def test_construction_processing_from_dataset(self): specimen_id = 'specimen id' @@ -1125,6 +1197,65 @@ def test_construction_processing_from_dataset(self): assert isinstance(processing_procedure, SpecimenProcessing) assert processing_procedure.description == description + def test_construction_processing_from_dataset_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_description = "processing description" + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + TextContentItem( + name=codes.DCM.IssuerOfSpecimenIdentifier, + value=issuer_of_specimen_id.LocalNamespaceEntityID + ), + DateTimeContentItem( + name=codes.DCM.DatetimeOfProcessing, + value=processing_datetime + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + TextContentItem( + name=codes.DCM.ProcessingStepDescription, + value=processing_description + ), + CodeContentItem( + name=codes.SCT.SpecimenCollection, + value=procedure + ), + CodeContentItem( + name=codes.SCT.TissueFixative, + value=fixative + ), + CodeContentItem( + name=codes.SCT.TissueEmbeddingMedium, + value=embedding_medium + ) + + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative == fixative + assert instance.embedding_medium == embedding_medium + assert instance.processing_description == processing_description + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenCollection) + assert processing_procedure.procedure == procedure + assert instance.processing_datetime == processing_datetime + assert instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID class TestSpecimenDescription(TestCase): From b6e68c0ba7c88a2d3323e4b61392080f54573b6a Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 08:14:33 +0200 Subject: [PATCH 37/96] Raise ValueError if duplicating "Processing Step Description" --- src/highdicom/content.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index ddbed2a4..a1e482f7 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -981,6 +981,14 @@ def __init__( """ # noqa: E501 super().__init__() + if ( + isinstance(processing_procedure, SpecimenProcessing) and + processing_description is not None + ): + raise ValueError( + 'Processing description must be None if procedure is of type ' + '"SpecimenProcessing".' + ) sequence = ContentSequence(is_root=False, is_sr=False) specimen_identifier_item = TextContentItem( name=codes.DCM.SpecimenIdentifier, @@ -1184,7 +1192,7 @@ def from_dataset( raise ValueError( 'Specimen Preparation Step Content Item Sequence must ' 'contain exactly one content item "Processing Step ' - 'Description" when processing type is "Specimen .' + 'Description" when processing type is "Specimen ' 'Processing".' ) instance._processing_procedure = SpecimenProcessing( From 97be91ce96b736fd975a6507043f689e9c46036f Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 08:15:41 +0200 Subject: [PATCH 38/96] Fix concept code name for DateTimeContentItem --- src/highdicom/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index a1e482f7..d83ece94 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1030,7 +1030,7 @@ def __init__( if processing_datetime is not None: processing_datetime_item = DateTimeContentItem( - name=codes.DCM.DateTimeOfProcessing, + name=codes.DCM.DatetimeOfProcessing, value=processing_datetime ) sequence.append(processing_datetime_item) From afb3d7f96af390dbb09a244fd02f33fc21e98c26 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 08:17:34 +0200 Subject: [PATCH 39/96] Add additional properties --- src/highdicom/content.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index d83ece94..cc02d58f 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1126,6 +1126,40 @@ def embedding_medium(self) -> Union[CodedConcept, None]: return None return items[0].value + @property + def processing_description(self) -> Union[str, CodedConcept, None]: + """Union[str, highdicom.sr.CodedConcept]: Processing description""" + if isinstance(self._processing_procedure, SpecimenProcessing): + return None + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.ProcessingStepDescription + ) + if len(items) == 0: + return None + return items[0].value + + @property + def processing_datetime(self) -> Union[datetime.datetime, None]: + """datetime.datetime: Processing datetime""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.DatetimeOfProcessing + ) + if len(items) == 0: + return None + return items[0].value + + @property + def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: + """highdicom.content.IssuerOfIdentifier: Issuer of specimen id""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.IssuerOfSpecimenIdentifier + ) + if len(items) == 0: + return None + return items[0].value + @classmethod def from_dataset( cls, From 5407c9f598a793c7d2f95a36ae9fa5a6820a16f9 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 12:20:09 +0200 Subject: [PATCH 40/96] Tests for datetime-based content items --- tests/test_valuetypes.py | 142 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 tests/test_valuetypes.py diff --git a/tests/test_valuetypes.py b/tests/test_valuetypes.py new file mode 100644 index 00000000..af821acf --- /dev/null +++ b/tests/test_valuetypes.py @@ -0,0 +1,142 @@ +import datetime + +import pytest +from pydicom import Dataset +from pydicom.sr.codedict import codes +from pydicom.sr.coding import Code +from pydicom.valuerep import DT, DA, TM + +from highdicom.sr.coding import CodedConcept +from highdicom.sr.enum import ValueTypeValues +from highdicom.sr.value_types import DateContentItem, DateTimeContentItem, TimeContentItem +from tests.utils import write_and_read_dataset + + +class TestDateTimeContentItem: + test_datetime_values = [ + DT("2023"), + DT("202306"), + DT("20230623"), + DT("2023062311"), + DT("202306231112"), + DT("20230623111247"), + DT("20230623111247.123456"), + ] + + @pytest.mark.parametrize("datetime_value", test_datetime_values) + def test_construct_from_datetime(self, datetime_value: DT): + name = codes.DCM.DatetimeOfProcessing + assert isinstance(name, Code) + value_type = ValueTypeValues.DATETIME + item = DateTimeContentItem( + name=name, + value=datetime_value + ) + + assert item.name == name + assert item.value == datetime_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.datetime) + assert item.value.isoformat() == datetime_value.isoformat() + + + @pytest.mark.parametrize("datetime_value", test_datetime_values) + def test_from_dataset(self, datetime_value: DT): + name = codes.DCM.DatetimeOfProcessing + assert isinstance(name, Code) + value_type = ValueTypeValues.DATETIME + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.DateTime = datetime_value + + dataset_reread = write_and_read_dataset(dataset) + item = DateTimeContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == datetime_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.datetime) + assert item.value.isoformat() == datetime_value.isoformat() + + +class TestDateContentItem: + def test_construct_from_date(self): + date_value = DA("20230623") + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.DATE + item = DateContentItem( + name=name, + value=date_value + ) + + assert item.name == name + assert item.value == date_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.date) + assert item.value.isoformat() == date_value.isoformat() + + + def test_from_dataset(self): + date_value = DA("20230623") + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.DATE + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.Date = date_value + + dataset_reread = write_and_read_dataset(dataset) + item = DateContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == date_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.date) + assert item.value.isoformat() == date_value.isoformat() + +class TestTimeContentItem: + test_time_values = [ + TM("11"), + TM("1112"), + TM("111247"), + TM("111247.123456"), + ] + + @pytest.mark.parametrize("time_value", test_time_values) + def test_construct_from_time(self, time_value: TM): + name = codes.DCM.AcquisitionTime + assert isinstance(name, Code) + value_type = ValueTypeValues.TIME + item = TimeContentItem( + name=name, + value=time_value + ) + + assert item.name == name + assert item.value == time_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.time) + assert item.value.isoformat() == time_value.isoformat() + + + @pytest.mark.parametrize("time_value", test_time_values) + def test_from_dataset(self, time_value: TM): + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.TIME + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.Time = time_value + + dataset_reread = write_and_read_dataset(dataset) + item = TimeContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == time_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.time) + assert item.value.isoformat() == time_value.isoformat() From 852414a0a1feaad329e6614d1107c025fe65b9d8 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 12:20:34 +0200 Subject: [PATCH 41/96] Fix for parsing of datetime-type content --- src/highdicom/sr/value_types.py | 60 ++++++++++++++------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/src/highdicom/sr/value_types.py b/src/highdicom/sr/value_types.py index 16a1958c..c44719af 100644 --- a/src/highdicom/sr/value_types.py +++ b/src/highdicom/sr/value_types.py @@ -877,19 +877,15 @@ def __init__( @property def value(self) -> datetime.time: """datetime.time: time""" - allowed_formats = [ - '%H:%M:%S.%f', - '%H:%M:%S', - '%H:%M', - '%H', - ] - for fmt in allowed_formats: - try: - dt = datetime.datetime.strptime(self.Time.isoformat(), fmt) - return dt.time() - except ValueError: - continue - raise ValueError(f'Could not decode time value "{self.Time}"') + if isinstance(self.Time, TM): + return self.Time + try: + return TM(self.Time) + except ValueError as exception: + raise ValueError( + f'Could not decode time value "{self.Time}"' + ) from exception + @classmethod def from_dataset( @@ -952,8 +948,14 @@ def __init__( @property def value(self) -> datetime.date: """datetime.date: date""" - fmt = '%Y-%m-%d' - return datetime.datetime.strptime(self.Date.isoformat(), fmt).date() + if isinstance(self.Date, DA): + return self.Date + try: + return DA(self.Date) + except ValueError as exception: + raise ValueError( + f'Could not decode date value "{self.Date}"' + ) from exception @classmethod def from_dataset( @@ -1016,26 +1018,14 @@ def __init__( @property def value(self) -> datetime.datetime: """datetime.datetime: datetime""" - allowed_formats = [ - '%Y-%m-%dT%H:%M:%S.%f%z', - '%Y-%m-%dT%H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%S%z', - '%Y-%m-%dT%H:%M', - '%Y-%m-%dT%H:%M%z', - '%Y-%m-%dT%H', - '%Y-%m-%dT%H%z', - '%Y-%m-%d', - '%Y-%m', - '%Y', - ] - for fmt in allowed_formats: - try: - dt = datetime.datetime.strptime(self.DateTime.isoformat(), fmt) - return dt - except ValueError: - continue - raise ValueError(f'Could not decode datetime value "{self.DateTime}"') + if isinstance(self.DateTime, DT): + return self.DateTime + try: + return DT(self.DateTime) + except ValueError as exception: + raise ValueError( + f'Could not decode datetime value "{self.DateTime}"' + ) from exception @classmethod def from_dataset( From 4d734fda16981162f8525b594e85e45a5e36e279 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Fri, 23 Jun 2023 13:49:54 +0200 Subject: [PATCH 42/96] Add specimen container and specimen type --- src/highdicom/content.py | 40 +++++++++++++++++++++++++++++++++++++++- tests/test_content.py | 32 +++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index cc02d58f..e4325697 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -960,7 +960,9 @@ def __init__( processing_datetime: Optional[datetime.datetime] = None, issuer_of_specimen_id: Optional[IssuerOfIdentifier] = None, fixative: Optional[Union[Code, CodedConcept]] = None, - embedding_medium: Optional[Union[Code, CodedConcept]] = None + embedding_medium: Optional[Union[Code, CodedConcept]] = None, + specimen_container: Optional[Union[Code, CodedConcept]] = None, + specimen_type: Optional[Union[Code, CodedConcept]] = None, ): """ Parameters @@ -978,6 +980,10 @@ def __init__( Fixative used during processing embedding_medium: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional Embedding medium used during processing + specimen_container: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + Container the specimen resides in. + specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + The anatomic pathology specimen type of the specimen. """ # noqa: E501 super().__init__() @@ -1067,6 +1073,20 @@ def __init__( sequence.append(embedding_medium_item) self.SpecimenPreparationStepContentItemSequence = sequence + if specimen_container is not None: + specimen_container_item = CodeContentItem( + name=codes.SCT.SpecimenContainer, + value=specimen_container + ) + sequence.append(specimen_container_item) + + if specimen_type is not None: + specimen_type_item = CodeContentItem( + name=codes.SCT.SpecimenType, + value=specimen_type + ) + sequence.append(specimen_type_item) + @property def specimen_id(self) -> str: """str: Specimen identifier""" @@ -1160,6 +1180,24 @@ def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: return None return items[0].value + @property + def specimen_container(self) -> Union[CodedConcept, None]: + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.SCT.SpecimenContainer + ) + if len(items) == 0: + return None + return items[0].value + + @property + def specimen_type(self) -> Union[CodedConcept, None]: + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.SCT.SpecimenType + ) + if len(items) == 0: + return None + return items[0].value + @classmethod def from_dataset( cls, diff --git a/tests/test_content.py b/tests/test_content.py index bc95a38c..0edf0e12 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1038,6 +1038,7 @@ def test_construction_different_first_values(self): blue_lut=b_lut, ) +class TestSpecimenPreparationStep: def test_construction_staining_from_dataset(self): specimen_id = 'specimen id' processing_type = codes.SCT.Staining @@ -1113,6 +1114,8 @@ def test_construction_processing_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") fixative = CodedConcept("fixative", "test", "test fixative") embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept("specimen container", "test", "test specimen container") + specimen_type = CodedConcept("specimen type", "test", "test specimen type") instance = SpecimenPreparationStep( specimen_id=specimen_id, @@ -1121,11 +1124,13 @@ def test_construction_processing_optionals(self): processing_datetime=processing_datetime, issuer_of_specimen_id=issuer_of_specimen_id, fixative=fixative, - embedding_medium=embedding_medium + embedding_medium=embedding_medium, + specimen_container=specimen_container, + specimen_type=specimen_type ) seq = instance.SpecimenPreparationStepContentItemSequence - assert len(seq) == 8 + assert len(seq) == 10 specimen_id_item = seq[0] assert specimen_id_item.name == codes.DCM.SpecimenIdentifier @@ -1167,6 +1172,16 @@ def test_construction_processing_optionals(self): assert embedding_item.value == embedding_medium assert embedding_item.relationship_type is None + specimen_container_item = seq[8] + assert specimen_container_item.name == codes.SCT.SpecimenContainer + assert specimen_container_item.value == specimen_container + assert specimen_container_item.relationship_type is None + + specimen_type_item = seq[9] + assert specimen_type_item.name == codes.SCT.SpecimenType + assert specimen_type_item.value == specimen_type + assert specimen_type_item.relationship_type is None + def test_construction_processing_from_dataset(self): specimen_id = 'specimen id' processing_type = codes.SCT.SpecimenProcessing @@ -1207,6 +1222,8 @@ def test_construction_processing_from_dataset_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") fixative = CodedConcept("fixative", "test", "test fixative") embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept("specimen container", "test", "test specimen container") + specimen_type = CodedConcept("specimen type", "test", "test specimen type") dataset = Dataset() dataset.SpecimenPreparationStepContentItemSequence = [ TextContentItem( @@ -1240,8 +1257,15 @@ def test_construction_processing_from_dataset_optionals(self): CodeContentItem( name=codes.SCT.TissueEmbeddingMedium, value=embedding_medium + ), + CodeContentItem( + name=codes.SCT.SpecimenContainer, + value=specimen_container + ), + CodeContentItem( + name=codes.SCT.SpecimenType, + value=specimen_type ) - ] dataset_reread = write_and_read_dataset(dataset) instance = SpecimenPreparationStep.from_dataset(dataset_reread) @@ -1256,6 +1280,8 @@ def test_construction_processing_from_dataset_optionals(self): assert processing_procedure.procedure == procedure assert instance.processing_datetime == processing_datetime assert instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID + assert instance.specimen_container == specimen_container + assert instance.specimen_type == specimen_type class TestSpecimenDescription(TestCase): From 71088869c4c048023ee7dae81d2753103f671859 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sat, 24 Jun 2023 08:28:46 +0200 Subject: [PATCH 43/96] Fix type annotation --- src/highdicom/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index e4325697..59c8929d 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1170,7 +1170,7 @@ def processing_datetime(self) -> Union[datetime.datetime, None]: return items[0].value @property - def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: + def issuer_of_specimen_id(self) -> Union[str, None]: """highdicom.content.IssuerOfIdentifier: Issuer of specimen id""" items = self.SpecimenPreparationStepContentItemSequence.find( From 0ca5b8fde14a5785087a6e4af0fe90f31ebe0e1a Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sat, 24 Jun 2023 08:31:58 +0200 Subject: [PATCH 44/96] Return simple datetime --- src/highdicom/sr/value_types.py | 48 ++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/highdicom/sr/value_types.py b/src/highdicom/sr/value_types.py index c44719af..954cfcd8 100644 --- a/src/highdicom/sr/value_types.py +++ b/src/highdicom/sr/value_types.py @@ -878,13 +878,15 @@ def __init__( def value(self) -> datetime.time: """datetime.time: time""" if isinstance(self.Time, TM): - return self.Time - try: - return TM(self.Time) - except ValueError as exception: - raise ValueError( - f'Could not decode time value "{self.Time}"' - ) from exception + value = self.Time + else: + try: + value = TM(self.Time) + except ValueError as exception: + raise ValueError( + f'Could not decode time value "{self.Time}"' + ) from exception + return value.replace() @classmethod @@ -949,13 +951,15 @@ def __init__( def value(self) -> datetime.date: """datetime.date: date""" if isinstance(self.Date, DA): - return self.Date - try: - return DA(self.Date) - except ValueError as exception: - raise ValueError( - f'Could not decode date value "{self.Date}"' - ) from exception + value = self.Date + else: + try: + value = DA(self.Date) + except ValueError as exception: + raise ValueError( + f'Could not decode date value "{self.Date}"' + ) from exception + return value.replace() @classmethod def from_dataset( @@ -1019,13 +1023,15 @@ def __init__( def value(self) -> datetime.datetime: """datetime.datetime: datetime""" if isinstance(self.DateTime, DT): - return self.DateTime - try: - return DT(self.DateTime) - except ValueError as exception: - raise ValueError( - f'Could not decode datetime value "{self.DateTime}"' - ) from exception + value = self.DateTime + else: + try: + value = DT(self.DateTime) + except ValueError as exception: + raise ValueError( + f'Could not decode datetime value "{self.DateTime}"' + ) from exception + return value.replace() @classmethod def from_dataset( From 39341a3c7b9b39ac87f8902ba64154178cb2c175 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Thu, 3 Aug 2023 16:15:34 +0200 Subject: [PATCH 45/96] Add addtitional parameters to SpecimenDescription --- src/highdicom/content.py | 62 ++++++++++++++++++++++++++++- tests/test_content.py | 85 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 59c8929d..154026f2 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -19,6 +19,7 @@ UniversalEntityIDTypeValues, VOILUTFunctionValues, ) +from highdicom.sr.enum import ValueTypeValues from highdicom.sr.coding import CodedConcept from highdicom.sr.value_types import ( CodeContentItem, @@ -1348,6 +1349,9 @@ def __init__( specimen_preparation_steps: Optional[ Sequence[SpecimenPreparationStep] ] = None, + specimen_type: Optional[Union[Code, CodedConcept]] = None, + specimen_short_description: Optional[str] = None, + specimen_detailed_description: Optional[str] = None, issuer_of_specimen_id: Optional[IssuerOfIdentifier] = None, primary_anatomic_structures: Optional[ Sequence[Union[Code, CodedConcept]] @@ -1369,6 +1373,12 @@ def __init__( specimen_preparation_steps: Sequence[highdicom.SpecimenPreparationStep], optional Steps that were applied during the preparation of the examined specimen in the laboratory prior to image acquisition + specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + The type of the examined specimen. + specimen_short_description: str, optional + Short description of the examined specimen. + specimen_detailed_description: str, optional + Detailed description of the examined specimen. issuer_of_specimen_id: highdicom.IssuerOfIdentifier, optional Description of the issuer of the specimen identifier primary_anatomic_structures: Sequence[Union[pydicom.sr.Code, highdicom.sr.CodedConcept]] @@ -1388,7 +1398,6 @@ def __init__( ) self.SpecimenPreparationSequence.append(step_item) if specimen_location is not None: - loc_item: Union[TextContentItem, NumContentItem] loc_seq: List[Union[TextContentItem, NumContentItem]] = [] if isinstance(specimen_location, str): loc_item = TextContentItem( @@ -1416,6 +1425,12 @@ def __init__( loc_seq.append(loc_item) self.SpecimenLocalizationContentItemSequence = loc_seq + if specimen_type is not None: + self.SpecimenTypeCodeSequence = [specimen_type] + if specimen_short_description is not None: + self.SpecimenShortDescription = specimen_short_description + if specimen_detailed_description is not None: + self.SpecimenDetailedDescription = specimen_detailed_description self.IssuerOfTheSpecimenIdentifierSequence: List[Dataset] = [] if issuer_of_specimen_id is not None: self.IssuerOfTheSpecimenIdentifierSequence.append( @@ -1457,11 +1472,46 @@ def specimen_uid(self) -> UID: """highdicom.UID: Unique specimen identifier""" return UID(self.SpecimenUID) + @property + def specimen_location(self) -> Union[str, Tuple[float, float, float], None]: + sequence = self.get("SpecimenLocalizationContentItemSequence") + if sequence is None: + return None + if isinstance(sequence[0], TextContentItem): + return sequence[0].value + return tuple(item.value for item in sequence) + @property def specimen_preparation_steps(self) -> List[SpecimenPreparationStep]: """highdicom.SpecimenPreparationStep: Specimen preparation steps""" return list(self.SpecimenPreparationSequence) + @property + def specimen_type(self) -> Union[CodedConcept, None]: + sequence = self.get("SpecimenTypeCodeSequence") + if sequence is None: + return None + return sequence[0] + + @property + def specimen_short_description(self) -> Union[str, None]: + return self.get("SpecimenShortDescription") + + @property + def specimen_detailed_description(self) -> Union[str, None]: + return self.get("SpecimenDetailedDescription") + + @property + def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: + sequence = self.get("IssuerOfTheSpecimenIdentifierSequence") + if len(sequence) == 0: + return None + return sequence[0] + + @property + def primary_anatomic_structures(self) -> Union[List[CodedConcept], None]: + return self.get("PrimaryAnatomicStructureSequence") + @classmethod def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': """Construct object from an existing dataset. @@ -1505,6 +1555,16 @@ def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': CodedConcept.from_dataset(ds) for ds in desc.SpecimenTypeCodeSequence ] + if hasattr(desc, 'SpecimenLocalizationContentItemSequence'): + if desc.SpecimenLocalizationContentItemSequence[0].ValueType == ValueTypeValues.TEXT.value: + content_item_type = TextContentItem + else: + content_item_type = NumContentItem + desc.SpecimenLocalizationContentItemSequence = [ + content_item_type.from_dataset(ds) + for ds in desc.SpecimenLocalizationContentItemSequence + ] + return desc diff --git a/tests/test_content.py b/tests/test_content.py index 0edf0e12..33610ffe 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1294,7 +1294,45 @@ def test_construction(self): ) assert instance.specimen_id == specimen_id assert instance.specimen_uid == specimen_uid + assert instance.specimen_location is None assert len(instance.specimen_preparation_steps) == 0 + assert instance.specimen_type is None + assert instance.specimen_short_description is None + assert instance.specimen_detailed_description is None + assert instance.issuer_of_specimen_id is None + assert instance.primary_anatomic_structures is None + + def test_construction_optionals(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_location = "specimen location" + specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_short_description = "specimen short description" + specimen_detailed_description = "specimen detailed description" + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + primary_anatomic_structures = [ + CodedConcept( + "anatomic strucutre", + "test", + "test anatomic structure" + ) + ] + instance = SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_location=specimen_location, + specimen_type=specimen_type, + specimen_short_description=specimen_short_description, + specimen_detailed_description=specimen_detailed_description, + issuer_of_specimen_id=issuer_of_specimen_id, + primary_anatomic_structures=primary_anatomic_structures + ) + assert instance.specimen_location == specimen_location + assert instance.specimen_type == specimen_type + assert instance.specimen_short_description == specimen_short_description + assert instance.specimen_detailed_description == specimen_detailed_description + assert instance.issuer_of_specimen_id == issuer_of_specimen_id + assert instance.primary_anatomic_structures == primary_anatomic_structures def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' @@ -1347,3 +1385,50 @@ def test_construction_from_dataset(self): assert instance.specimen_id == specimen_id assert instance.specimen_uid == specimen_uid assert len(instance.specimen_preparation_steps) == 0 + + def test_construction_from_dataset_with_optionals(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_location = "specimen location" + specimen_preparation_steps = [ + SpecimenPreparationStep( + specimen_id, + SpecimenCollection(procedure=codes.SCT.Biopsy) + ) + ] + specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_short_description = "specimen short description" + specimen_detailed_description = "specimen detailed description" + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + primary_anatomic_structures = [ + CodedConcept( + "anatomic strucutre", + "test", + "test anatomic structure" + ) + ] + dataset = Dataset() + dataset.SpecimenIdentifier = specimen_id + dataset.SpecimenUID = str(specimen_uid) + dataset.SpecimenLocalizationContentItemSequence = [ + TextContentItem( + name=codes.DCM.LocationOfSpecimen, + value=specimen_location + ) + ] + dataset.SpecimenTypeCodeSequence = [specimen_type] + dataset.SpecimenPreparationSequence = specimen_preparation_steps + dataset.SpecimenShortDescription = specimen_short_description + dataset.SpecimenDetailedDescription = specimen_detailed_description + dataset.IssuerOfTheSpecimenIdentifierSequence = [issuer_of_specimen_id] + dataset.PrimaryAnatomicStructureSequence = primary_anatomic_structures + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenDescription.from_dataset(dataset_reread) + assert instance.specimen_location == specimen_location + assert instance.specimen_preparation_steps == specimen_preparation_steps + assert instance.specimen_type == specimen_type + assert instance.specimen_short_description == specimen_short_description + assert instance.specimen_detailed_description == specimen_detailed_description + assert instance.issuer_of_specimen_id == issuer_of_specimen_id + assert instance.primary_anatomic_structures == primary_anatomic_structures + From bf5e05c902fbdd8fbe382f27926e1117b9189bf3 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Thu, 3 Aug 2023 16:37:36 +0200 Subject: [PATCH 46/96] Fix spelling and duplicate test classes --- tests/test_content.py | 494 +++++++++++++++++++++--------------------- 1 file changed, 246 insertions(+), 248 deletions(-) diff --git a/tests/test_content.py b/tests/test_content.py index 33610ffe..59b69bba 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -419,7 +419,6 @@ def test_construction_missing_substances(self): class TestSpecimenPreparationStep(TestCase): - def test_construction_collection(self): specimen_id = 'specimen id' processing_type = codes.SCT.SpecimenCollection @@ -631,6 +630,250 @@ def test_construction_staining(self): assert staining_item.value == substance assert staining_item.relationship_type is None + def test_construction_staining_from_dataset(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.Staining + substance = codes.SCT.HematoxylinStain + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + CodeContentItem( + name=codes.SCT.UsingSubstance, + value=substance + ), + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative is None + assert instance.embedding_medium is None + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenStaining) + assert processing_procedure.substances == [substance] + + def test_construction_processing(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenProcessing + description = codes.SCT.SpecimenFreezing + instance = SpecimenPreparationStep( + specimen_id=specimen_id, + processing_procedure=SpecimenProcessing(description=description), + ) + seq = instance.SpecimenPreparationStepContentItemSequence + assert len(seq) == 3 + assert not seq.is_root + assert not seq.is_sr + + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.processing_datetime is None + assert instance.issuer_of_specimen_id is None + assert instance.fixative is None + assert instance.embedding_medium is None + + specimen_id_item = seq[0] + assert specimen_id_item.name == codes.DCM.SpecimenIdentifier + assert specimen_id_item.value == specimen_id + assert specimen_id_item.relationship_type is None + + processing_type_item = seq[1] + assert processing_type_item.name == codes.DCM.ProcessingType + assert processing_type_item.value == processing_type + assert processing_type_item.relationship_type is None + + processing_step_description_item = seq[2] + assert processing_step_description_item.name == codes.DCM.ProcessingStepDescription + assert processing_step_description_item.value == description + assert processing_step_description_item.relationship_type is None + + def test_construction_processing_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + processing_description = 'processing description' + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept("specimen container", "test", "test specimen container") + specimen_type = CodedConcept("specimen type", "test", "test specimen type") + + instance = SpecimenPreparationStep( + specimen_id=specimen_id, + processing_procedure=processing_procedure, + processing_description=processing_description, + processing_datetime=processing_datetime, + issuer_of_specimen_id=issuer_of_specimen_id, + fixative=fixative, + embedding_medium=embedding_medium, + specimen_container=specimen_container, + specimen_type=specimen_type + ) + + seq = instance.SpecimenPreparationStepContentItemSequence + assert len(seq) == 10 + + specimen_id_item = seq[0] + assert specimen_id_item.name == codes.DCM.SpecimenIdentifier + assert specimen_id_item.value == specimen_id + assert specimen_id_item.relationship_type is None + + issuer_of_specimen_id_item = seq[1] + assert issuer_of_specimen_id_item.name == codes.DCM.IssuerOfSpecimenIdentifier + assert issuer_of_specimen_id_item.value == issuer_of_specimen_id.LocalNamespaceEntityID + assert issuer_of_specimen_id_item.relationship_type is None + + processing_type_item = seq[2] + assert processing_type_item.name == codes.DCM.ProcessingType + assert processing_type_item.value == processing_type + assert processing_type_item.relationship_type is None + + processing_datetime_item = seq[3] + assert processing_datetime_item.name == codes.DCM.DatetimeOfProcessing + assert processing_datetime_item.value == processing_datetime + assert processing_datetime_item.relationship_type is None + + processing_description_item = seq[4] + assert processing_description_item.name == codes.DCM.ProcessingStepDescription + assert processing_description_item.value == processing_description + assert processing_description_item.relationship_type is None + + collection_step_item = seq[5] + assert collection_step_item.name == codes.SCT.SpecimenCollection + assert collection_step_item.value == procedure + assert collection_step_item.relationship_type is None + + fixative_item = seq[6] + assert fixative_item.name == codes.SCT.TissueFixative + assert fixative_item.value == fixative + assert fixative_item.relationship_type is None + + embedding_item = seq[7] + assert embedding_item.name == codes.SCT.TissueEmbeddingMedium + assert embedding_item.value == embedding_medium + assert embedding_item.relationship_type is None + + specimen_container_item = seq[8] + assert specimen_container_item.name == codes.SCT.SpecimenContainer + assert specimen_container_item.value == specimen_container + assert specimen_container_item.relationship_type is None + + specimen_type_item = seq[9] + assert specimen_type_item.name == codes.SCT.SpecimenType + assert specimen_type_item.value == specimen_type + assert specimen_type_item.relationship_type is None + + def test_construction_processing_from_dataset(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenProcessing + description = codes.SCT.SpecimenFreezing + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + CodeContentItem( + name=codes.DCM.ProcessingStepDescription, + value=description + ), + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative is None + assert instance.embedding_medium is None + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenProcessing) + assert processing_procedure.description == description + + def test_construction_processing_from_dataset_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_description = "processing description" + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept("specimen container", "test", "test specimen container") + specimen_type = CodedConcept("specimen type", "test", "test specimen type") + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + TextContentItem( + name=codes.DCM.IssuerOfSpecimenIdentifier, + value=issuer_of_specimen_id.LocalNamespaceEntityID + ), + DateTimeContentItem( + name=codes.DCM.DatetimeOfProcessing, + value=processing_datetime + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + TextContentItem( + name=codes.DCM.ProcessingStepDescription, + value=processing_description + ), + CodeContentItem( + name=codes.SCT.SpecimenCollection, + value=procedure + ), + CodeContentItem( + name=codes.SCT.TissueFixative, + value=fixative + ), + CodeContentItem( + name=codes.SCT.TissueEmbeddingMedium, + value=embedding_medium + ), + CodeContentItem( + name=codes.SCT.SpecimenContainer, + value=specimen_container + ), + CodeContentItem( + name=codes.SCT.SpecimenType, + value=specimen_type + ) + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative == fixative + assert instance.embedding_medium == embedding_medium + assert instance.processing_description == processing_description + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenCollection) + assert processing_procedure.procedure == procedure + assert instance.processing_datetime == processing_datetime + assert instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID + assert instance.specimen_container == specimen_container + assert instance.specimen_type == specimen_type + class TestVOILUTTransformation(TestCase): @@ -1038,251 +1281,6 @@ def test_construction_different_first_values(self): blue_lut=b_lut, ) -class TestSpecimenPreparationStep: - def test_construction_staining_from_dataset(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.Staining - substance = codes.SCT.HematoxylinStain - dataset = Dataset() - dataset.SpecimenPreparationStepContentItemSequence = [ - TextContentItem( - name=codes.DCM.SpecimenIdentifier, - value=specimen_id - ), - CodeContentItem( - name=codes.DCM.ProcessingType, - value=processing_type - ), - CodeContentItem( - name=codes.SCT.UsingSubstance, - value=substance - ), - ] - dataset_reread = write_and_read_dataset(dataset) - instance = SpecimenPreparationStep.from_dataset(dataset_reread) - assert isinstance(instance, SpecimenPreparationStep) - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative is None - assert instance.embedding_medium is None - processing_procedure = instance.processing_procedure - assert isinstance(processing_procedure, SpecimenStaining) - assert processing_procedure.substances == [substance] - - def test_construction_processing(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenProcessing - description = codes.SCT.SpecimenFreezing - instance = SpecimenPreparationStep( - specimen_id=specimen_id, - processing_procedure=SpecimenProcessing(description=description), - ) - seq = instance.SpecimenPreparationStepContentItemSequence - assert len(seq) == 3 - assert not seq.is_root - assert not seq.is_sr - - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.processing_datetime is None - assert instance.issuer_of_specimen_id is None - assert instance.fixative is None - assert instance.embedding_medium is None - - specimen_id_item = seq[0] - assert specimen_id_item.name == codes.DCM.SpecimenIdentifier - assert specimen_id_item.value == specimen_id - assert specimen_id_item.relationship_type is None - - processing_type_item = seq[1] - assert processing_type_item.name == codes.DCM.ProcessingType - assert processing_type_item.value == processing_type - assert processing_type_item.relationship_type is None - - processing_step_description_item = seq[2] - assert processing_step_description_item.name == codes.DCM.ProcessingStepDescription - assert processing_step_description_item.value == description - assert processing_step_description_item.relationship_type is None - - def test_construction_processing_optionals(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenCollection - procedure = codes.SCT.Excision - processing_procedure = SpecimenCollection(procedure=procedure) - processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) - processing_description = 'processing description' - issuer_of_specimen_id = IssuerOfIdentifier("issuer id") - fixative = CodedConcept("fixative", "test", "test fixative") - embedding_medium = CodedConcept("embedding", "test", "test embedding") - specimen_container = CodedConcept("specimen container", "test", "test specimen container") - specimen_type = CodedConcept("specimen type", "test", "test specimen type") - - instance = SpecimenPreparationStep( - specimen_id=specimen_id, - processing_procedure=processing_procedure, - processing_description=processing_description, - processing_datetime=processing_datetime, - issuer_of_specimen_id=issuer_of_specimen_id, - fixative=fixative, - embedding_medium=embedding_medium, - specimen_container=specimen_container, - specimen_type=specimen_type - ) - - seq = instance.SpecimenPreparationStepContentItemSequence - assert len(seq) == 10 - - specimen_id_item = seq[0] - assert specimen_id_item.name == codes.DCM.SpecimenIdentifier - assert specimen_id_item.value == specimen_id - assert specimen_id_item.relationship_type is None - - issuer_of_specimen_id_item = seq[1] - assert issuer_of_specimen_id_item.name == codes.DCM.IssuerOfSpecimenIdentifier - assert issuer_of_specimen_id_item.value == issuer_of_specimen_id.LocalNamespaceEntityID - assert issuer_of_specimen_id_item.relationship_type is None - - processing_type_item = seq[2] - assert processing_type_item.name == codes.DCM.ProcessingType - assert processing_type_item.value == processing_type - assert processing_type_item.relationship_type is None - - processing_datetime_item = seq[3] - assert processing_datetime_item.name == codes.DCM.DatetimeOfProcessing - assert processing_datetime_item.value == processing_datetime - assert processing_datetime_item.relationship_type is None - - processing_description_item = seq[4] - assert processing_description_item.name == codes.DCM.ProcessingStepDescription - assert processing_description_item.value == processing_description - assert processing_description_item.relationship_type is None - - collection_step_item = seq[5] - assert collection_step_item.name == codes.SCT.SpecimenCollection - assert collection_step_item.value == procedure - assert collection_step_item.relationship_type is None - - fixative_item = seq[6] - assert fixative_item.name == codes.SCT.TissueFixative - assert fixative_item.value == fixative - assert fixative_item.relationship_type is None - - embedding_item = seq[7] - assert embedding_item.name == codes.SCT.TissueEmbeddingMedium - assert embedding_item.value == embedding_medium - assert embedding_item.relationship_type is None - - specimen_container_item = seq[8] - assert specimen_container_item.name == codes.SCT.SpecimenContainer - assert specimen_container_item.value == specimen_container - assert specimen_container_item.relationship_type is None - - specimen_type_item = seq[9] - assert specimen_type_item.name == codes.SCT.SpecimenType - assert specimen_type_item.value == specimen_type - assert specimen_type_item.relationship_type is None - - def test_construction_processing_from_dataset(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenProcessing - description = codes.SCT.SpecimenFreezing - dataset = Dataset() - dataset.SpecimenPreparationStepContentItemSequence = [ - TextContentItem( - name=codes.DCM.SpecimenIdentifier, - value=specimen_id - ), - CodeContentItem( - name=codes.DCM.ProcessingType, - value=processing_type - ), - CodeContentItem( - name=codes.DCM.ProcessingStepDescription, - value=description - ), - ] - dataset_reread = write_and_read_dataset(dataset) - instance = SpecimenPreparationStep.from_dataset(dataset_reread) - assert isinstance(instance, SpecimenPreparationStep) - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative is None - assert instance.embedding_medium is None - processing_procedure = instance.processing_procedure - assert isinstance(processing_procedure, SpecimenProcessing) - assert processing_procedure.description == description - - def test_construction_processing_from_dataset_optionals(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenCollection - procedure = codes.SCT.Excision - processing_procedure = SpecimenCollection(procedure=procedure) - processing_description = "processing description" - processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) - issuer_of_specimen_id = IssuerOfIdentifier("issuer id") - fixative = CodedConcept("fixative", "test", "test fixative") - embedding_medium = CodedConcept("embedding", "test", "test embedding") - specimen_container = CodedConcept("specimen container", "test", "test specimen container") - specimen_type = CodedConcept("specimen type", "test", "test specimen type") - dataset = Dataset() - dataset.SpecimenPreparationStepContentItemSequence = [ - TextContentItem( - name=codes.DCM.SpecimenIdentifier, - value=specimen_id - ), - TextContentItem( - name=codes.DCM.IssuerOfSpecimenIdentifier, - value=issuer_of_specimen_id.LocalNamespaceEntityID - ), - DateTimeContentItem( - name=codes.DCM.DatetimeOfProcessing, - value=processing_datetime - ), - CodeContentItem( - name=codes.DCM.ProcessingType, - value=processing_type - ), - TextContentItem( - name=codes.DCM.ProcessingStepDescription, - value=processing_description - ), - CodeContentItem( - name=codes.SCT.SpecimenCollection, - value=procedure - ), - CodeContentItem( - name=codes.SCT.TissueFixative, - value=fixative - ), - CodeContentItem( - name=codes.SCT.TissueEmbeddingMedium, - value=embedding_medium - ), - CodeContentItem( - name=codes.SCT.SpecimenContainer, - value=specimen_container - ), - CodeContentItem( - name=codes.SCT.SpecimenType, - value=specimen_type - ) - ] - dataset_reread = write_and_read_dataset(dataset) - instance = SpecimenPreparationStep.from_dataset(dataset_reread) - assert isinstance(instance, SpecimenPreparationStep) - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative == fixative - assert instance.embedding_medium == embedding_medium - assert instance.processing_description == processing_description - processing_procedure = instance.processing_procedure - assert isinstance(processing_procedure, SpecimenCollection) - assert processing_procedure.procedure == procedure - assert instance.processing_datetime == processing_datetime - assert instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID - assert instance.specimen_container == specimen_container - assert instance.specimen_type == specimen_type - class TestSpecimenDescription(TestCase): def test_construction(self): @@ -1312,7 +1310,7 @@ def test_construction_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") primary_anatomic_structures = [ CodedConcept( - "anatomic strucutre", + "anatomic structure", "test", "test anatomic structure" ) @@ -1402,7 +1400,7 @@ def test_construction_from_dataset_with_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") primary_anatomic_structures = [ CodedConcept( - "anatomic strucutre", + "anatomic structure", "test", "test anatomic structure" ) From 87fc76053679df11f8e3a3b967834e6f5bfe6049 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Thu, 3 Aug 2023 16:59:50 +0200 Subject: [PATCH 47/96] Flake8 fixes --- src/highdicom/content.py | 6 +- src/highdicom/sr/value_types.py | 1 - tests/test_content.py | 98 ++++++++++++++++++++++++++------- tests/test_valuetypes.py | 10 ++-- 4 files changed, 87 insertions(+), 28 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 154026f2..48c647db 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1556,7 +1556,10 @@ def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': for ds in desc.SpecimenTypeCodeSequence ] if hasattr(desc, 'SpecimenLocalizationContentItemSequence'): - if desc.SpecimenLocalizationContentItemSequence[0].ValueType == ValueTypeValues.TEXT.value: + if ( + desc.SpecimenLocalizationContentItemSequence[0].ValueType == + ValueTypeValues.TEXT.value + ): content_item_type = TextContentItem else: content_item_type = NumContentItem @@ -1565,7 +1568,6 @@ def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': for ds in desc.SpecimenLocalizationContentItemSequence ] - return desc diff --git a/src/highdicom/sr/value_types.py b/src/highdicom/sr/value_types.py index 954cfcd8..5969ffe4 100644 --- a/src/highdicom/sr/value_types.py +++ b/src/highdicom/sr/value_types.py @@ -888,7 +888,6 @@ def value(self) -> datetime.time: ) from exception return value.replace() - @classmethod def from_dataset( cls, diff --git a/tests/test_content.py b/tests/test_content.py index 59b69bba..c3a4df5e 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -691,7 +691,10 @@ def test_construction_processing(self): assert processing_type_item.relationship_type is None processing_step_description_item = seq[2] - assert processing_step_description_item.name == codes.DCM.ProcessingStepDescription + assert ( + processing_step_description_item.name == + codes.DCM.ProcessingStepDescription + ) assert processing_step_description_item.value == description assert processing_step_description_item.relationship_type is None @@ -705,8 +708,16 @@ def test_construction_processing_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") fixative = CodedConcept("fixative", "test", "test fixative") embedding_medium = CodedConcept("embedding", "test", "test embedding") - specimen_container = CodedConcept("specimen container", "test", "test specimen container") - specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_container = CodedConcept( + "specimen container", + "test", + "test specimen container" + ) + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) instance = SpecimenPreparationStep( specimen_id=specimen_id, @@ -729,27 +740,42 @@ def test_construction_processing_optionals(self): assert specimen_id_item.relationship_type is None issuer_of_specimen_id_item = seq[1] - assert issuer_of_specimen_id_item.name == codes.DCM.IssuerOfSpecimenIdentifier - assert issuer_of_specimen_id_item.value == issuer_of_specimen_id.LocalNamespaceEntityID + assert ( + issuer_of_specimen_id_item.name == + codes.DCM.IssuerOfSpecimenIdentifier + ) + assert ( + issuer_of_specimen_id_item.value == + issuer_of_specimen_id.LocalNamespaceEntityID + ) assert issuer_of_specimen_id_item.relationship_type is None processing_type_item = seq[2] - assert processing_type_item.name == codes.DCM.ProcessingType + assert ( + processing_type_item.name == codes.DCM.ProcessingType + ) assert processing_type_item.value == processing_type assert processing_type_item.relationship_type is None processing_datetime_item = seq[3] - assert processing_datetime_item.name == codes.DCM.DatetimeOfProcessing + assert ( + processing_datetime_item.name == codes.DCM.DatetimeOfProcessing + ) assert processing_datetime_item.value == processing_datetime assert processing_datetime_item.relationship_type is None processing_description_item = seq[4] - assert processing_description_item.name == codes.DCM.ProcessingStepDescription + assert ( + processing_description_item.name == + codes.DCM.ProcessingStepDescription + ) assert processing_description_item.value == processing_description assert processing_description_item.relationship_type is None collection_step_item = seq[5] - assert collection_step_item.name == codes.SCT.SpecimenCollection + assert ( + collection_step_item.name == codes.SCT.SpecimenCollection + ) assert collection_step_item.value == procedure assert collection_step_item.relationship_type is None @@ -813,8 +839,16 @@ def test_construction_processing_from_dataset_optionals(self): issuer_of_specimen_id = IssuerOfIdentifier("issuer id") fixative = CodedConcept("fixative", "test", "test fixative") embedding_medium = CodedConcept("embedding", "test", "test embedding") - specimen_container = CodedConcept("specimen container", "test", "test specimen container") - specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_container = CodedConcept( + "specimen container", + "test", + "test specimen container" + ) + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) dataset = Dataset() dataset.SpecimenPreparationStepContentItemSequence = [ TextContentItem( @@ -870,7 +904,10 @@ def test_construction_processing_from_dataset_optionals(self): assert isinstance(processing_procedure, SpecimenCollection) assert processing_procedure.procedure == procedure assert instance.processing_datetime == processing_datetime - assert instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID + assert ( + instance.issuer_of_specimen_id == + issuer_of_specimen_id.LocalNamespaceEntityID + ) assert instance.specimen_container == specimen_container assert instance.specimen_type == specimen_type @@ -1281,8 +1318,8 @@ def test_construction_different_first_values(self): blue_lut=b_lut, ) -class TestSpecimenDescription(TestCase): +class TestSpecimenDescription(TestCase): def test_construction(self): specimen_id = 'specimen 1' specimen_uid = UID() @@ -1304,7 +1341,11 @@ def test_construction_optionals(self): specimen_id = 'specimen 1' specimen_uid = UID() specimen_location = "specimen location" - specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) specimen_short_description = "specimen short description" specimen_detailed_description = "specimen detailed description" issuer_of_specimen_id = IssuerOfIdentifier("issuer id") @@ -1328,9 +1369,16 @@ def test_construction_optionals(self): assert instance.specimen_location == specimen_location assert instance.specimen_type == specimen_type assert instance.specimen_short_description == specimen_short_description - assert instance.specimen_detailed_description == specimen_detailed_description - assert instance.issuer_of_specimen_id == issuer_of_specimen_id - assert instance.primary_anatomic_structures == primary_anatomic_structures + assert ( + instance.specimen_detailed_description == + specimen_detailed_description + ) + assert ( + instance.issuer_of_specimen_id == issuer_of_specimen_id + ) + assert ( + instance.primary_anatomic_structures == primary_anatomic_structures + ) def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' @@ -1394,7 +1442,11 @@ def test_construction_from_dataset_with_optionals(self): SpecimenCollection(procedure=codes.SCT.Biopsy) ) ] - specimen_type = CodedConcept("specimen type", "test", "test specimen type") + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) specimen_short_description = "specimen short description" specimen_detailed_description = "specimen detailed description" issuer_of_specimen_id = IssuerOfIdentifier("issuer id") @@ -1426,7 +1478,11 @@ def test_construction_from_dataset_with_optionals(self): assert instance.specimen_preparation_steps == specimen_preparation_steps assert instance.specimen_type == specimen_type assert instance.specimen_short_description == specimen_short_description - assert instance.specimen_detailed_description == specimen_detailed_description + assert ( + instance.specimen_detailed_description == + specimen_detailed_description + ) assert instance.issuer_of_specimen_id == issuer_of_specimen_id - assert instance.primary_anatomic_structures == primary_anatomic_structures - + assert ( + instance.primary_anatomic_structures == primary_anatomic_structures + ) diff --git a/tests/test_valuetypes.py b/tests/test_valuetypes.py index af821acf..43193949 100644 --- a/tests/test_valuetypes.py +++ b/tests/test_valuetypes.py @@ -8,7 +8,11 @@ from highdicom.sr.coding import CodedConcept from highdicom.sr.enum import ValueTypeValues -from highdicom.sr.value_types import DateContentItem, DateTimeContentItem, TimeContentItem +from highdicom.sr.value_types import ( + DateContentItem, + DateTimeContentItem, + TimeContentItem +) from tests.utils import write_and_read_dataset @@ -39,7 +43,6 @@ def test_construct_from_datetime(self, datetime_value: DT): assert isinstance(item.value, datetime.datetime) assert item.value.isoformat() == datetime_value.isoformat() - @pytest.mark.parametrize("datetime_value", test_datetime_values) def test_from_dataset(self, datetime_value: DT): name = codes.DCM.DatetimeOfProcessing @@ -77,7 +80,6 @@ def test_construct_from_date(self): assert isinstance(item.value, datetime.date) assert item.value.isoformat() == date_value.isoformat() - def test_from_dataset(self): date_value = DA("20230623") name = codes.DCM.AcquisitionDate @@ -97,6 +99,7 @@ def test_from_dataset(self): assert isinstance(item.value, datetime.date) assert item.value.isoformat() == date_value.isoformat() + class TestTimeContentItem: test_time_values = [ TM("11"), @@ -121,7 +124,6 @@ def test_construct_from_time(self, time_value: TM): assert isinstance(item.value, datetime.time) assert item.value.isoformat() == time_value.isoformat() - @pytest.mark.parametrize("time_value", test_time_values) def test_from_dataset(self, time_value: TM): name = codes.DCM.AcquisitionDate From 5c5cbb660a518dbb394099e35cddc1c82b4e2d98 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 7 Aug 2023 19:49:10 -0400 Subject: [PATCH 48/96] Merge variables used for compressed and uncompressed frames --- src/highdicom/seg/sop.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 58262cf9..0ca3d0a5 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1478,12 +1478,19 @@ def __init__( isinstance(workers, Executor) or workers != 0 ) + # List of frames. In the case of native transfer syntaxes, we will + # collect a list of frames as flattened NumPy arrays for bitpacking at + # the end. In the case of encapsulated transfer syntaxes with no + # workers, we will accumulate a list of encoded frames to encapsulate + # at the end + frames: Union[List[bytes], List[np.ndarray]] = [] + if is_encaps: if using_multiprocessing: # In the case of encapsulated transfer syntaxes with multiple # workers, we will accumulate a list of encoded frames to # encapsulate at the end - frame_futures_list: List[Future] = [] + frame_futures: List[Future] = [] # Use the existing executor or create one if isinstance(workers, Executor): @@ -1493,11 +1500,6 @@ def __init__( process_pool = ProcessPoolExecutor( workers if workers > 0 else None ) - else: - # In the case of encapsulated transfer syntaxes with no - # workers, we will accumulate a list of encoded frames to - # encapsulate at the end - compressed_frames_list: List[bytes] = [] # Parameters to use when calling the encode_frame function in # either of the above two cases @@ -1509,10 +1511,6 @@ def __init__( pixel_representation=self.PixelRepresentation ) else: - # In the case of non-encapsulated (uncompressed) transfer syntaxes - # we will accumulate a list of flattened pixels from all frames for - # bitpacking at the end - full_frames_list: List[np.ndarray] = [] if using_multiprocessing: warnings.warn( "Setting workers != 0 or passing an instance of " @@ -1594,7 +1592,7 @@ def __init__( if process_pool is None: # Encode this frame and add resulting bytes to the list # for encapsulation at the end - compressed_frames_list.append( + frames.append( encode_frame( segment_array, **encode_frame_kwargs, @@ -1608,18 +1606,18 @@ def __init__( array=segment_array, **encode_frame_kwargs, ) - frame_futures_list.append(future) + frame_futures.append(future) else: # Concatenate the 1D array for encoding at the end - full_frames_list.append(segment_array.flatten()) + frames.append(segment_array.flatten()) self.PerFrameFunctionalGroupsSequence = pffg_sequence self.NumberOfFrames = len(pffg_sequence) if is_encaps: if process_pool is not None: - compressed_frames_list = [ - fut.result() for fut in frame_futures_list + frames = [ + fut.result() for fut in frame_futures ] # Shutdown the pool if we created it, otherwise it is the @@ -1628,13 +1626,13 @@ def __init__( process_pool.shutdown() # Encapsulate all pre-compressed frames - self.PixelData = encapsulate(compressed_frames_list) + self.PixelData = encapsulate(frames) else: # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 self.PixelData = self._encode_pixels_native( - np.concatenate(full_frames_list) + np.concatenate(frames) ) # Add a null trailing byte if required From 74d5e834742e7a2e55f7037444133e2f2bbb81f1 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 9 Aug 2023 21:34:23 -0400 Subject: [PATCH 49/96] Switch ordering of tiles to actually match TILED_FULL --- src/highdicom/seg/content.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index 2611d145..5e65a1df 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -384,32 +384,32 @@ def __init__( z_axis_index.DimensionDescriptionLabel = \ 'Z Offset in Slide Coordinate System' - row_dimension_index = Dataset() - row_dimension_index.DimensionIndexPointer = tag_for_keyword( + column_dimension_index = Dataset() + column_dimension_index.DimensionIndexPointer = tag_for_keyword( 'ColumnPositionInTotalImagePixelMatrix' ) - row_dimension_index.FunctionalGroupPointer = tag_for_keyword( + column_dimension_index.FunctionalGroupPointer = tag_for_keyword( 'PlanePositionSlideSequence' ) - row_dimension_index.DimensionOrganizationUID = dim_uid - row_dimension_index.DimensionDescriptionLabel = \ + column_dimension_index.DimensionOrganizationUID = dim_uid + column_dimension_index.DimensionDescriptionLabel = \ 'Column Position In Total Image Pixel Matrix' - column_dimension_index = Dataset() - column_dimension_index.DimensionIndexPointer = tag_for_keyword( + row_dimension_index = Dataset() + row_dimension_index.DimensionIndexPointer = tag_for_keyword( 'RowPositionInTotalImagePixelMatrix' ) - column_dimension_index.FunctionalGroupPointer = tag_for_keyword( + row_dimension_index.FunctionalGroupPointer = tag_for_keyword( 'PlanePositionSlideSequence' ) - column_dimension_index.DimensionOrganizationUID = dim_uid - column_dimension_index.DimensionDescriptionLabel = \ + row_dimension_index.DimensionOrganizationUID = dim_uid + row_dimension_index.DimensionDescriptionLabel = \ 'Row Position In Total Image Pixel Matrix' - # Organize frames for each segment similar to TILED_FULL, first - # along the row dimension (column indices from left to right) and - # then along the column dimension (row indices from top to bottom) - # of the Total Pixel Matrix. + # Organize frames for each segment similar to TILED_FULL, with + # segment position changing least frequently, followed by position + # of the row (from top to bottom) and then position of the column + # (from left to right) changing most frequently self.extend([ segment_number_index, row_dimension_index, From cde407df7226acc245aca33440be890247bd6fb7 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 17 Aug 2023 22:48:25 -0400 Subject: [PATCH 50/96] Refactor utils code into iter_tiled_full_frame_data --- src/highdicom/seg/sop.py | 158 ++++++++++++++------------ src/highdicom/utils.py | 233 ++++++++++++++++++++++----------------- 2 files changed, 220 insertions(+), 171 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 7ec9217a..e8485a30 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1677,11 +1677,7 @@ def __init__( self.copy_patient_and_study_information(src_img) # Build lookup tables for efficient decoding - if ( - dimension_organization_type is not None and - dimension_organization_type.value != "TILED_FULL" - ): - self._build_luts() + self._build_luts() def add_segments( self, @@ -2610,6 +2606,11 @@ def _build_luts(self) -> None: referenced_uids = self._get_ref_instance_uids() all_referenced_sops = {uids[2] for uids in referenced_uids} + is_tiled_full = ( + hasattr(self, 'DimensionOrganizationType') + and self.DimensionOrganizationType == 'TILED_FULL' + ) + segment_numbers = [] referenced_instances: Optional[List[str]] = [] referenced_frames: Optional[List[int]] = [] @@ -2636,80 +2637,95 @@ def _build_luts(self) -> None: locations_list_type = List[Optional[SpatialLocationsPreservedValues]] locations_preserved: locations_list_type = [] self._single_source_frame_per_seg_frame = True - for frame_item in self.PerFrameFunctionalGroupsSequence: - # Get segment number for this frame - seg_id_seg = frame_item.SegmentIdentificationSequence[0] - seg_num = seg_id_seg.ReferencedSegmentNumber - segment_numbers.append(int(seg_num)) - - # Get dimension indices for this frame - indices = frame_item.FrameContentSequence[0].DimensionIndexValues - if not isinstance(indices, (MultiValue, list)): - # In case there is a single dimension index - indices = [indices] - if len(indices) != len(self._dim_ind_pointers) + 1: - # (+1 because referenced segment number is ignored) + + if is_tiled_full: + tiled_full_dim_indices = { + tag_for_keyword('RowPositionInTotalImagePixelMatrix'), + tag_for_keyword('ColumnPositionInTotalImagePixelMatrix'), + } + if set(dim_indices.keys()) != tiled_full_dim_indices: raise RuntimeError( - 'Unexpected mismatch between dimension index values in ' - 'per-frames functional groups sequence and items in the ' - 'dimension index sequence.' + 'Expected segmentation images with ' + '"DimensionOrganizationType" of "TILED_FULL" are expected ' + 'to have the following dimension index pointers: ' + 'SegmentNumber, RowPositionInTotalImagePixelMatrix, ' + 'ColumnPositionInTotalImagePixelMatrix.' ) - for ptr in self._dim_ind_pointers: - dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) - - frame_source_instances = [] - frame_source_frames = [] - for der_im in frame_item.DerivationImageSequence: - for src_im in der_im.SourceImageSequence: - frame_source_instances.append( - src_im.ReferencedSOPInstanceUID + else: + for frame_item in self.PerFrameFunctionalGroupsSequence: + # Get segment number for this frame + seg_id_seg = frame_item.SegmentIdentificationSequence[0] + seg_num = seg_id_seg.ReferencedSegmentNumber + segment_numbers.append(int(seg_num)) + + # Get dimension indices for this frame + indices = frame_item.FrameContentSequence[0].DimensionIndexValues + if not isinstance(indices, (MultiValue, list)): + # In case there is a single dimension index + indices = [indices] + if len(indices) != len(self._dim_ind_pointers) + 1: + # (+1 because referenced segment number is ignored) + raise RuntimeError( + 'Unexpected mismatch between dimension index values in ' + 'per-frames functional groups sequence and items in the ' + 'dimension index sequence.' ) - if hasattr(src_im, 'SpatialLocationsPreserved'): - locations_preserved.append( - SpatialLocationsPreservedValues( - src_im.SpatialLocationsPreserved - ) + for ptr in self._dim_ind_pointers: + dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) + + frame_source_instances = [] + frame_source_frames = [] + for der_im in frame_item.DerivationImageSequence: + for src_im in der_im.SourceImageSequence: + frame_source_instances.append( + src_im.ReferencedSOPInstanceUID ) - else: - locations_preserved.append( - None - ) - - if hasattr(src_im, 'ReferencedFrameNumber'): - if isinstance( - src_im.ReferencedFrameNumber, - MultiValue - ): - frame_source_frames.extend( - [ - int(f) - for f in src_im.ReferencedFrameNumber - ] + if hasattr(src_im, 'SpatialLocationsPreserved'): + locations_preserved.append( + SpatialLocationsPreservedValues( + src_im.SpatialLocationsPreserved + ) ) else: - frame_source_frames.append( - int(src_im.ReferencedFrameNumber) + locations_preserved.append( + None ) - else: - frame_source_frames.append(_NO_FRAME_REF_VALUE) - if ( - len(set(frame_source_instances)) != 1 or - len(set(frame_source_frames)) != 1 - ): - self._single_source_frame_per_seg_frame = False - else: - ref_instance_uid = frame_source_instances[0] - if ref_instance_uid not in all_referenced_sops: - raise AttributeError( - f'SOP instance {ref_instance_uid} referenced in the ' - 'source image sequence is not included in the ' - 'Referenced Series Sequence or Studies Containing ' - 'Other Referenced Instances Sequence. This is an ' - 'error with the integrity of the Segmentation object.' - ) - referenced_instances.append(ref_instance_uid) - referenced_frames.append(frame_source_frames[0]) + if hasattr(src_im, 'ReferencedFrameNumber'): + if isinstance( + src_im.ReferencedFrameNumber, + MultiValue + ): + frame_source_frames.extend( + [ + int(f) + for f in src_im.ReferencedFrameNumber + ] + ) + else: + frame_source_frames.append( + int(src_im.ReferencedFrameNumber) + ) + else: + frame_source_frames.append(_NO_FRAME_REF_VALUE) + + if ( + len(set(frame_source_instances)) != 1 or + len(set(frame_source_frames)) != 1 + ): + self._single_source_frame_per_seg_frame = False + else: + ref_instance_uid = frame_source_instances[0] + if ref_instance_uid not in all_referenced_sops: + raise AttributeError( + f'SOP instance {ref_instance_uid} referenced in the ' + 'source image sequence is not included in the ' + 'Referenced Series Sequence or Studies Containing ' + 'Other Referenced Instances Sequence. This is an ' + 'error with the integrity of the Segmentation object.' + ) + referenced_instances.append(ref_instance_uid) + referenced_frames.append(frame_source_frames[0]) # Summarise if any( diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index 07c9a15f..ae70a20b 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -1,5 +1,5 @@ import itertools -from typing import Iterator, List, Optional, Sequence, Tuple +from typing import Iterator, Generator, List, Optional, Sequence, Tuple import numpy as np from pydicom.dataset import Dataset @@ -149,30 +149,64 @@ def compute_plane_position_tiled_full( ) -def compute_plane_position_slide_per_frame( - dataset: Dataset -) -> List[PlanePositionSequence]: - """Computes the plane position for each frame in given dataset with - respect to the slide coordinate system. +def iter_tiled_full_frame_data( + dataset: Dataset, +) -> Generator[Tuple[int, int, int, int, float, float, float], None, None]: + """Get data on the position of each tile in a TILED_FULL image. + + This works only with images with Dimension Organization Type of + "TILED_FULL". + + Unlike :func:`highdicom.utils.compute_plane_position_slide_per_frame`, + this functions returns the data in their basic Python types rather than + wrapping as :class:`highdicom.PlanePositionSequence` Parameters ---------- dataset: pydicom.dataset.Dataset - VL Whole Slide Microscopy Image + VL Whole Slide Microscopy Image or Segmentation Image using the + "TILED_FULL" DimensionOrganizationType. Returns ------- - List[highdicom.PlanePositionSequence] - Plane Position Sequence per frame - - Raises - ------ - ValueError - When `dataset` does not represent a VL Whole Slide Microscopy Image + channel: int + 1-based integer index of the "channel". The meaning of "channel" + depends on the image type. For segmentation images, the channel is the + segment number. For other images, it is the optical path number. + focal_plane_index: int + 1-based integer index of the focal plane. + column_position: int + 1-based column position of the tile (measured left from the left side + of the total pixel matrix). + row_position: int + 1-based row position of the tile (measured down from the top of the + total pixel matrix). + x: float + X coordinate in the frame-of-reference coordinate system in millimeter + units. + y: float + Y coordinate in the frame-of-reference coordinate system in millimeter + units. + z: float + Z coordinate in the frame-of-reference coordinate system in millimeter + units. """ - if not dataset.SOPClassUID == '1.2.840.10008.5.1.4.1.1.77.1.6': - raise ValueError('Expected a VL Whole Slide Microscopy Image') + allowed_sop_class_uids = { + '1.2.840.10008.5.1.4.1.1.77.1.6', # VL Whole Slide Microscopy Image + '1.2.840.10008.5.1.4.1.1.66.4', # Segmentation Image + } + if dataset.SOPClassUID not in allowed_sop_class_uids: + raise ValueError( + 'Expected a VL Whole Slide Microscopy Image or Segmentation Image.' + ) + if ( + not hasattr(dataset, "DimensionOrganizationType") or + dataset.DimensionOrganizationType != "TILED_FULL" + ): + raise ValueError( + 'Expected an image with "TILED_FULL" dimension organization type.' + ) image_origin = dataset.TotalPixelMatrixOriginSequence[0] image_orientation = ( @@ -194,11 +228,19 @@ def compute_plane_position_slide_per_frame( 'TotalPixelMatrixFocalPlanes', 1 ) - num_optical_paths = getattr( - dataset, - 'NumberOfOpticalPaths', - len(dataset.OpticalPathSequence) - ) + + is_segmentation = dataset.SOPClassUID == '1.2.840.10008.5.1.4.1.1.66.4' + + # The "channels" output is either segment for segmentations, or optical + # path for other images + if is_segmentation: + num_channels = len(dataset.SegmentSequence) + else: + num_channels = getattr( + dataset, + 'NumberOfOpticalPaths', + len(dataset.OpticalPathSequence) + ) shared_fg = dataset.SharedFunctionalGroupsSequence[0] pixel_measures = shared_fg.PixelMeasuresSequence[0] @@ -216,91 +258,82 @@ def compute_plane_position_slide_per_frame( x_offset = image_origin.XOffsetInSlideCoordinateSystem y_offset = image_origin.YOffsetInSlideCoordinateSystem - transformer_lut = {} - for slice_index in range(1, num_focal_planes + 1): - # These checks are needed for mypy to determine the correct type - z_offset = float(slice_index - 1) * spacing_between_slices - transformer_lut[slice_index] = PixelToReferenceTransformer( - image_position=(x_offset, y_offset, z_offset), - image_orientation=image_orientation, - pixel_spacing=pixel_spacing - ) + # Array of tile indices (col_index, row_index) + tile_indices = np.array( + [ + (c, r) for (r, c) in + itertools.product( + range(1, tiles_per_column + 1), + range(1, tiles_per_row + 1) + ) + ] + ) - def _compute_plane_position_tiled_full_efficiently( - row_index: int, - column_index: int, - rows: int, - columns: int, - transformer: PixelToReferenceTransformer - ) -> PlanePositionSequence: - """More efficient implementation of `compute_plane_position_tiled_full`. - - Function re-uses an existing `transformer` instance instead of creating - one for every function call. This can hurt performance if the number - of frames in an image is large. - - Parameters - ---------- - row_index: int - One-based Row index value for a given frame (tile) along the column - direction of the tiled Total Pixel Matrix, which is defined by - the second triplet in `image_orientation` (values should be in the - range [1, *n*], where *n* is the number of tiles per column) - column_index: int - One-based Column index value for a given frame (tile) along the row - direction of the tiled Total Pixel Matrix, which is defined by - the first triplet in `image_orientation` (values should be in the - range [1, *n*], where *n* is the number of tiles per row) - rows: int - Number of rows per Frame (tile) - columns: int - Number of columns per Frame (tile) - transformer: highdicom.spatial.PixelToReferenceTransformer - Callable transformer instance to map pixel indices into reference - slide coordinates - - Returns - ------- - highdicom.PlanePositionSequence - Position, of the plane in the slide coordinate system - - """ - row_offset_frame = ((row_index - 1) * rows) - column_offset_frame = ((column_index - 1) * columns) - - # We should only be dealing with planar rotations. - transformed_coordinates = transformer( - np.array([(column_offset_frame, row_offset_frame)], dtype=int) - ) - x = transformed_coordinates[0, 0] - y = transformed_coordinates[0, 1] - z = transformed_coordinates[0, 2] + # Pixel offsets of each in the total pixel matrix + frame_pixel_offsets = ( + (tile_indices - 1) * np.array([dataset.Columns, dataset.Rows]) + ) - return PlanePositionSequence( - coordinate_system=CoordinateSystemNames.SLIDE, - image_position=(x, y, z), - # Position of plane (tile) in Total Pixel Matrix: - # First tile has position (1, 1) - pixel_matrix_position=( - column_offset_frame + 1, - row_offset_frame + 1, + for channel in range(1, num_channels + 1): + for slice_index in range(1, num_focal_planes + 1): + # These checks are needed for mypy to determine the correct type + z_offset = float(slice_index - 1) * spacing_between_slices + transformer = PixelToReferenceTransformer( + image_position=(x_offset, y_offset, z_offset), + image_orientation=image_orientation, + pixel_spacing=pixel_spacing ) - ) + reference_coordinates = transformer(frame_pixel_offsets) + + for offsets, coords in zip( + frame_pixel_offsets, + reference_coordinates + ): + yield ( + channel, + slice_index, + offsets[0] + 1, + offsets[1] + 1, + coords[0], + coords[1], + coords[2], + ) + + +def compute_plane_position_slide_per_frame( + dataset: Dataset +) -> List[PlanePositionSequence]: + """Computes the plane position for each frame in given dataset with + respect to the slide coordinate system for an image using the TILED_FULL + DimensionOrganizationType. + + Parameters + ---------- + dataset: pydicom.dataset.Dataset + VL Whole Slide Microscopy Image or Segmentation Image using the + "TILED_FULL" DimensionOrganizationType. + + Returns + ------- + List[highdicom.PlanePositionSequence] + Plane Position Sequence per frame + + Raises + ------ + ValueError + When `dataset` does not represent a VL Whole Slide Microscopy Image or + Segmentation Image or the image does not use the "TILED_FULL" dimension + organization type. + + """ return [ - _compute_plane_position_tiled_full_efficiently( - row_index=r, - column_index=c, - rows=dataset.Rows, - columns=dataset.Columns, - transformer=transformer_lut[s], - ) - for _, s, r, c in itertools.product( - range(num_optical_paths), - range(1, num_focal_planes + 1), - range(1, tiles_per_column + 1), # column direction, top to bottom - range(1, tiles_per_row + 1), # row direction, left to right + PlanePositionSequence( + coordinate_system=CoordinateSystemNames.SLIDE, + image_position=(x, y, z), + pixel_matrix_position=(c, r), ) + for _, _, c, r, x, y, z in iter_tiled_full_frame_data(dataset) ] From 8f71961b5ca3210a77f47d9443aabb46233b5437 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 17 Aug 2023 23:15:09 -0400 Subject: [PATCH 51/96] Fix types --- src/highdicom/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index ae70a20b..106e077f 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -293,11 +293,11 @@ def iter_tiled_full_frame_data( yield ( channel, slice_index, - offsets[0] + 1, - offsets[1] + 1, - coords[0], - coords[1], - coords[2], + int(offsets[0] + 1), + int(offsets[1] + 1), + float(coords[0]), + float(coords[1]), + float(coords[2]), ) From 7bbdc26527e07712445c28e4532d20b765f9f5e3 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 17 Aug 2023 23:50:41 -0400 Subject: [PATCH 52/96] Fix tests relating to order of dimension indices --- tests/test_seg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_seg.py b/tests/test_seg.py index 74bc8eee..ed7eac28 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -544,9 +544,9 @@ def test_construction_2(self): assert len(seq) == 6 assert seq[0].DimensionIndexPointer == 0x0062000B assert seq[0].FunctionalGroupPointer == 0x0062000A - assert seq[1].DimensionIndexPointer == 0x0048021E + assert seq[1].DimensionIndexPointer == 0x0048021F assert seq[1].FunctionalGroupPointer == 0x0048021A - assert seq[2].DimensionIndexPointer == 0x0048021F + assert seq[2].DimensionIndexPointer == 0x0048021E assert seq[2].FunctionalGroupPointer == 0x0048021A assert seq[3].DimensionIndexPointer == 0x0040072A assert seq[3].FunctionalGroupPointer == 0x0048021A @@ -788,8 +788,8 @@ def check_dimension_index_vals(seg): else: # Build up the mapping from index to value for dim_kw, dim_ind in zip([ + 'RowPositionInTotalImagePixelMatrix', 'ColumnPositionInTotalImagePixelMatrix', - 'RowPositionInTotalImagePixelMatrix' ], [1, 2]): index_mapping = defaultdict(list) for f in seg.PerFrameFunctionalGroupsSequence: From b44ff78a46d41ca2602f296c8fc5c4b56fa74ec3 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 24 Aug 2023 09:54:12 -0400 Subject: [PATCH 53/96] Basic implementation of autotiling. Need to add omitting frame logic --- src/highdicom/content.py | 34 +++++++ src/highdicom/seg/sop.py | 202 +++++++++++++++++++++++++++++++-------- src/highdicom/utils.py | 85 +++++++++++++++- 3 files changed, 281 insertions(+), 40 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index ddbed2a4..1493a98b 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -389,6 +389,40 @@ def from_sequence( pixel_measures.__class__ = PixelMeasuresSequence return cast(PixelMeasuresSequence, pixel_measures) + def __eq__(self, other: DataElementSequence) -> bool: + """Determine whether two sets of pixel measures are the same. + + Parameters + ---------- + other: pydicom.Sequence + A second pixel measures sequence, to be compared to self. + + Returns + ------- + bool: + True if all items match exactly. False otherwise. + + """ + if not isinstance(other, DataElementSequence): + raise TypeError('Second item must be of type pydicom.Sequence.') + if len(other) != 1: + raise ValueError('Second item must have length 1.') + + if other[0].SliceThickness != self[0].SliceThickness: + return False + if other[0].PixelSpacing != self[0].PixelSpacing: + return False + if ( + hasattr(other[0], 'SpacingBetweenSlices') != + hasattr(self[0], 'SpacingBetweenSlices') + ): + return False + if hasattr(self[0], 'SpacingBetweenSlices'): + if other[0].SpacingBetweenSlices != self[0].SpacingBetweenSlices: + return False + + return True + class PlanePositionSequence(DataElementSequence): diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index e8485a30..d03363c9 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -56,6 +56,12 @@ DimensionOrganizationTypeValues, ) from highdicom.frame import encode_frame +from highdicom.utils import ( + compute_plane_position_tiled_full, + get_tile_array, + iter_tiled_full_frame_data, + tile_pixel_matrix, +) from highdicom.seg.content import ( DimensionIndexSequence, SegmentDescription, @@ -897,6 +903,8 @@ def __init__( str, None, ] = None, + tile_pixel_array: bool = False, + tile_size: Union[Sequence[int], None] = None, **kwargs: Any ) -> None: """ @@ -1068,6 +1076,36 @@ def __init__( against spawned child processes creating further workers. dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None], optional Dimension organization type to use for the output image. + tile_pixel_array: bool + If True, `highdicom` will automatically convert an input total + pixel matrix into a sequence of frames representing tiles of the + segmentation. This is valid only when the source image supports + tiling (e.g. VL While Slide Microscopy images). + + If True, the input pixel array must consist of a single "frame", + i.e. must be either a 2D numpy array, a 3D numpy array with a size + of 1 down the first dimension (axis zero), or a 4D numpy array also + with a size of 1 down the first dimension. The input pixel array is + treated as the total pixel matrix of the segmentation, and this is + tiled along the row and column dimension to create an output image + with multiple, smaller frames. + + If no ``pixel_measures``, ``plane_positions``, + ``plane_orientation`` are supplied, the total pixel matrix of the + segmentation is assumed to correspond to the total pixel matrix of + the (single) source image. If ``plane_positions`` is supplied, the + sequence should contain a singe item representing the plane + position of the entire total pixel matrix. Plane positions of + the newly created tiles will derived automatically from this. + + If False, the pixel array is already considered to consist of one + or more existing frames, as described above. + tile_size: Union[Sequence[int], None] = None + Tile size to use when tiling the input pixel array. If ``None`` + (the default), the tile size is copied from the source image. + Otherwise the tile size is specified explicitly as (number of rows, + number of columns). This value is ignored if ``tile_pixel_array`` + is False. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1120,7 +1158,6 @@ def __init__( 'Only one source image should be provided in case images ' 'are multi-frame images.' ) - is_tiled = hasattr(src_img, 'TotalPixelMatrixRows') supported_transfer_syntaxes = { ImplicitVRLittleEndian, ExplicitVRLittleEndian, @@ -1138,6 +1175,19 @@ def __init__( if pixel_array.ndim not in [3, 4]: raise ValueError('Pixel array must be a 2D, 3D, or 4D array.') + is_tiled = hasattr(src_img, 'TotalPixelMatrixRows') + if tile_pixel_array and not is_tiled: + raise ValueError( + 'When argument "tile_pixel_array" is True, the source image ' + 'must be a tiled image.' + ) + if tile_pixel_array and pixel_array.shape[0] != 1: + raise ValueError( + 'When argument "tile_pixel_array" is True, the input pixel ' + 'array must contain only one frame representing the total ' + 'pixel matrix.' + ) + super().__init__( study_instance_uid=src_img.StudyInstanceUID, series_instance_uid=series_instance_uid, @@ -1222,7 +1272,7 @@ def __init__( # Note that appending directly to the SourceImageSequence is typically # slow so it's more efficient to build as a Python list then convert - # later. We save conversion for after the main loop so that + # later. We save conversion for after the main loop source_image_seq: List[Dataset] = [] referenced_series: Dict[str, List[Dataset]] = defaultdict(list) for s_img in source_images: @@ -1243,8 +1293,14 @@ def __init__( self.ReferencedSeriesSequence = ref_image_seq # Image Pixel - self.Rows = pixel_array.shape[1] - self.Columns = pixel_array.shape[2] + if tile_pixel_array: + # By default use the same tile size as the source image (even if + # they are not spatially aligned) + tile_size = tile_size or (src_img.Rows, src_img.Columns) + self.Rows, self.Columns = (tile_size) + else: + self.Rows = pixel_array.shape[1] + self.Columns = pixel_array.shape[2] # Segmentation Image self.ImageType = ['DERIVED', 'PRIMARY'] @@ -1315,11 +1371,12 @@ def __init__( # Multi-Frame Functional Groups and Multi-Frame Dimensions sffg_item = Dataset() + source_pixel_measures = self._get_pixel_measures_sequence( + source_image=src_img, + is_multiframe=is_multiframe, + ) if pixel_measures is None: - pixel_measures = self._get_pixel_measures_sequence( - source_image=src_img, - is_multiframe=is_multiframe, - ) + pixel_measures = source_pixel_measures if has_ref_frame_uid: if self._coordinate_system == CoordinateSystemNames.SLIDE: @@ -1386,40 +1443,77 @@ def __init__( self.SegmentsOverlap = segments_overlap.value if has_ref_frame_uid: - if plane_positions is None: - if pixel_array.shape[0] != len(source_plane_positions): - raise ValueError( - 'Number of plane positions in source image(s) does not ' - 'match size of first dimension of "pixel_array" ' - 'argument.' + if tile_pixel_array: + + if plane_positions is None: + # Use the origin of the source image + origin_seq = src_img.TotalPixelMatrixOriginSequence[0] + x_offset = origin_seq.XOffsetInSlideCoordinateSystem + y_offset = origin_seq.YOffsetInSlideCoordinateSystem + else: + # Use the provided image origin + x_offset = plane_positions[0].XOffsetInSlideCoordinateSystem + y_offset = plane_positions[0].YOffsetInSlideCoordinateSystem + orientation = plane_orientation[0].ImageOrientationSlide + + plane_positions = [ + compute_plane_position_tiled_full( + row_index=r, + column_index=c, + x_offset=x_offset, + y_offset=y_offset, + rows=self.Rows, + columns=self.Columns, + image_orientation=orientation, + pixel_spacing=pixel_measures[0].PixelSpacing, ) - plane_positions = source_plane_positions - else: - if pixel_array.shape[0] != len(plane_positions): - raise ValueError( - 'Number of PlanePositionSequence items provided via ' - '"plane_positions" argument does not match size of ' - 'first dimension of "pixel_array" argument.' + for c, r in tile_pixel_matrix( + total_pixel_matrix_rows=pixel_array.shape[1], + total_pixel_matrix_columns=pixel_array.shape[2], + rows=self.Rows, + columns=self.Columns, ) + ] + + else: + if plane_positions is None: + if pixel_array.shape[0] != len(source_plane_positions): + raise ValueError( + 'Number of plane positions in source image(s) does ' + 'not match size of first dimension of ' + '"pixel_array" argument.' + ) + plane_positions = source_plane_positions + else: + if pixel_array.shape[0] != len(plane_positions): + raise ValueError( + 'Number of PlanePositionSequence items provided ' + 'via "plane_positions" argument does not match ' + 'size of first dimension of "pixel_array" argument.' + ) + + # plane_position_values is an array giving, for each plane of + # the input array, the raw values of all attributes that + # describe its position. The first dimension is sorted the same + # way as the input pixel array and the second is sorted the + # same way as the dimension index sequence (without segment + # number) plane_sort_index is a list of indices into the input + # planes giving the order in which they should be arranged to + # correctly sort them for inclusion into the segmentation + plane_position_values, plane_sort_index = \ + self.DimensionIndexSequence.get_index_values( + plane_positions + ) are_spatial_locations_preserved = ( all( plane_positions[i] == source_plane_positions[i] for i in range(len(plane_positions)) ) and - plane_orientation == source_plane_orientation + plane_orientation == source_plane_orientation and + pixel_measures == source_pixel_measures ) - # plane_position_values is an array giving, for each plane of the - # input array, the raw values of all attributes that describe its - # position. The first dimension is sorted the same way as the input - # pixel array and the the second is sorted the same way as the - # dimension index sequence (without segment number) - # plane_sort_index is a list of indices into the input planes - # giving the order in which they should be arranged to correctly - # sort them for inclusion into the segmentation - plane_position_values, plane_sort_index = \ - self.DimensionIndexSequence.get_index_values(plane_positions) else: # Only one spatial location supported plane_positions = [None] @@ -1455,7 +1549,10 @@ def __init__( if ind in included_plane_indices_set ] else: - included_plane_indices = list(range(pixel_array.shape[0])) + if tile_pixel_array: + included_plane_indices = list(range(len(plane_positions))) + else: + included_plane_indices = list(range(pixel_array.shape[0])) if has_ref_frame_uid: # Get unique values of attributes in the Plane Position Sequence or @@ -1553,9 +1650,22 @@ def __init__( for segment_number in described_segment_numbers: for plane_index in plane_sort_index: + if tile_pixel_array: + pos = plane_positions[plane_index][0] + plane_array = get_tile_array( + pixel_array[0], + row_offset=pos.RowPositionInTotalImagePixelMatrix, + column_offset=pos.ColumnPositionInTotalImagePixelMatrix, + tile_rows=self.Rows, + tile_columns=self.Columns, + ) + else: + # Select the relevant existing frame + plane_array = pixel_array[plane_index] + # Pixel array for just this segment and this position segment_array = self._get_segment_pixel_array( - pixel_array[plane_index], + plane_array, segment_number=segment_number, number_of_segments=number_of_segments, segmentation_type=segmentation_type, @@ -2639,11 +2749,15 @@ def _build_luts(self) -> None: self._single_source_frame_per_seg_frame = True if is_tiled_full: - tiled_full_dim_indices = { - tag_for_keyword('RowPositionInTotalImagePixelMatrix'), - tag_for_keyword('ColumnPositionInTotalImagePixelMatrix'), - } - if set(dim_indices.keys()) != tiled_full_dim_indices: + # With TILED_FULL, there is no PerFrameFunctionalGroupsSequence, + # so we have to deduce the per-frame information + row_tag = tag_for_keyword('RowPositionInTotalImagePixelMatrix') + col_tag = tag_for_keyword('ColumnPositionInTotalImagePixelMatrix') + x_tag = tag_for_keyword('XOffsetInSlideCoordinateSystem') + y_tag = tag_for_keyword('YOffsetInSlideCoordinateSystem') + z_tag = tag_for_keyword('ZOffsetInSlideCoordinateSystem') + tiled_full_dim_indices = {row_tag, col_tag, x_tag, y_tag, z_tag} + if len(set(dim_indices.keys()) - tiled_full_dim_indices) > 0: raise RuntimeError( 'Expected segmentation images with ' '"DimensionOrganizationType" of "TILED_FULL" are expected ' @@ -2651,6 +2765,16 @@ def _build_luts(self) -> None: 'SegmentNumber, RowPositionInTotalImagePixelMatrix, ' 'ColumnPositionInTotalImagePixelMatrix.' ) + self._single_source_frame_per_seg_frame = False + ( + segment_numbers, + _, + dim_indices[row_tag], + dim_indices[col_tag], + dim_indices[x_tag], + dim_indices[y_tag], + dim_indices[z_tag], + ) = zip(*iter_tiled_full_frame_data(self)) else: for frame_item in self.PerFrameFunctionalGroupsSequence: # Get segment number for this frame diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index 106e077f..cded0316 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -20,6 +20,9 @@ def tile_pixel_matrix( ) -> Iterator[Tuple[int, int]]: """Tiles an image into smaller frames (rectangular regions). + Follows the convention used in image with Dimension Organization Type + "TILED_FULL" images. + Parameters ---------- total_pixel_matrix_rows: int @@ -41,7 +44,85 @@ def tile_pixel_matrix( tiles_per_row = int(np.ceil(total_pixel_matrix_columns / columns)) tile_row_indices = iter(range(1, tiles_per_col + 1)) tile_col_indices = iter(range(1, tiles_per_row + 1)) - return itertools.product(tile_col_indices, tile_row_indices) + return ( + (c, r) for (r, c) in itertools.product( + tile_row_indices, + tile_col_indices + ) + ) + + +def get_tile_array( + pixel_array: np.ndarray, + row_offset: int, + column_offset: int, + tile_rows: int, + tile_columns: int, + pad: bool = True, +) -> np.ndarray: + """Extract a tile from a total pixel matrix array. + + Parameters + ---------- + pixel_array: np.ndarray + Array representing a total pixel matrix. The first two dimensions + are treated as the rows and columns, respectively, of the total pixel + matrix. Any subsequent dimensions are not used but are retained in the + output array. + row_offset: int + Offset of the first row of the requested tile from the top of the total + pixel matrix (1-based index). + column_offset: int + Offset of the first column of the requested tile from the left of the + total pixel matrix (1-based index). + tile_rows: int + Number of rows per tile. + tile_columns: + Number of columns per tile. + pad: bool + Whether to pad the returned array with zeros at the right and/or bottom + to ensure that it matches the correct tile size. Otherwise, the returned + array is not padded and may be smaller than the full tile size. + + Returns + ------- + np.ndarray: + Returned pixel array for the requested tile. + + """ + if row_offset < 1 or row_offset > pixel_array.shape[0]: + raise ValueError( + "Row offset must be between 1 and the size of dimension 0 of the " + "pixel array." + ) + if column_offset < 1 or column_offset > pixel_array.shape[1]: + raise ValueError( + "Column offset must be between 1 and the size of dimension 1 of " + "the pixel array." + ) + # Move to pythonic 1-based indexing + row_offset -= 1 + column_offset -= 1 + row_end = row_offset + tile_rows + if row_end > pixel_array.shape[0]: + pad_rows = row_end - pixel_array.shape[0] + row_end = pixel_array.shape[0] + else: + pad_rows = 0 + column_end = column_offset + tile_columns + if column_end > pixel_array.shape[1]: + pad_columns = column_end - pixel_array.shape[1] + column_end = pixel_array.shape[1] + else: + pad_columns = 0 + # Account for 1-based to 0-based index conversion + tile_array = pixel_array[row_offset:row_end, column_offset:column_end] + if pad_rows > 0 or pad_columns > 0: + extra_dims = pixel_array.ndim - 2 + padding = [(0, pad_rows), (0, pad_columns)] + [(0, 0)] * extra_dims + tile_array = np.pad(tile_array, padding) + + return tile_array def compute_plane_position_tiled_full( @@ -114,6 +195,8 @@ def compute_plane_position_tiled_full( When only one of `slice_index` and `spacing_between_slices` is provided """ + if row_index < 1 or column_index < 1: + raise ValueError("Row and column indices must be positive intergers.") row_offset_frame = ((row_index - 1) * rows) column_offset_frame = ((column_index - 1) * columns) From 33f635e72bda051a4e4bc2ab279fa339dae61633 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 31 Aug 2023 18:19:07 -0400 Subject: [PATCH 54/96] Add frame omission logic and tests --- src/highdicom/content.py | 5 + src/highdicom/seg/sop.py | 244 +++++++++++++++++++++++++++++---------- src/highdicom/utils.py | 64 ++++++++++ tests/test_ann.py | 5 +- tests/test_seg.py | 133 ++++++++++++++++++++- tests/test_utils.py | 66 ++++++++++- 6 files changed, 449 insertions(+), 68 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 1493a98b..9a9d78b8 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -473,6 +473,11 @@ def __init__( item.XOffsetInSlideCoordinateSystem = DS(x, auto_format=True) item.YOffsetInSlideCoordinateSystem = DS(y, auto_format=True) item.ZOffsetInSlideCoordinateSystem = DS(z, auto_format=True) + if row_position < 0 or col_position < 0: + raise ValueError( + 'Both items in "pixel_matrix_position" must be positive ' + 'integers.' + ) item.RowPositionInTotalImagePixelMatrix = row_position item.ColumnPositionInTotalImagePixelMatrix = col_position elif coordinate_system == CoordinateSystemNames.PATIENT: diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index d03363c9..1f5fd5cf 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -57,6 +57,7 @@ ) from highdicom.frame import encode_frame from highdicom.utils import ( + are_plane_positions_tiled_full, compute_plane_position_tiled_full, get_tile_array, iter_tiled_full_frame_data, @@ -1184,8 +1185,8 @@ def __init__( if tile_pixel_array and pixel_array.shape[0] != 1: raise ValueError( 'When argument "tile_pixel_array" is True, the input pixel ' - 'array must contain only one frame representing the total ' - 'pixel matrix.' + 'array must contain only one "frame" representing the entire ' + 'entire pixel matrix.' ) super().__init__( @@ -1452,8 +1453,20 @@ def __init__( y_offset = origin_seq.YOffsetInSlideCoordinateSystem else: # Use the provided image origin - x_offset = plane_positions[0].XOffsetInSlideCoordinateSystem - y_offset = plane_positions[0].YOffsetInSlideCoordinateSystem + pp = plane_positions[0][0] + rp = pp.RowPositionInTotalImagePixelMatrix + cp = pp.ColumnPositionInTotalImagePixelMatrix + if rp != 1 or cp != 1: + raise ValueError( + "When specifying a single plane position when " + 'the "tile_pixel_array" argument is True, the ' + "plane position must be at the top left corner " + "of the total pixel matrix. I.e. it must have " + "RowPositionInTotalImagePixelMatrix and " + "ColumnPositionInTotalImagePixelMatrix equal to 1." + ) + x_offset = pp.XOffsetInSlideCoordinateSystem + y_offset = pp.YOffsetInSlideCoordinateSystem orientation = plane_orientation[0].ImageOrientationSlide plane_positions = [ @@ -1521,6 +1534,26 @@ def __init__( plane_sort_index = np.array([0]) are_spatial_locations_preserved = True + if are_spatial_locations_preserved: + if tile_pixel_array: + if ( + pixel_array.shape[1:3] != + ( + src_img.TotalPixelMatrixRows, + src_img.TotalPixelMatrixColumns + ) + ): + raise ValueError( + "Shape of input pixel_array does not match shape of " + "the total pixel matrix of the source image." + ) + else: + if pixel_array.shape[1:3] != (src_img.Rows, src_img.Columns): + raise ValueError( + "Shape of input pixel_array does not match shape of " + "the source image." + ) + # Dimension Organization Type dimension_organization_type = self._check_dimension_organization_type( dimension_organization_type=dimension_organization_type, @@ -1528,18 +1561,30 @@ def __init__( are_spatial_locations_preserved=are_spatial_locations_preserved, omit_empty_frames=omit_empty_frames, source_image=src_img, + plane_positions=plane_positions, + rows=self.Rows, + columns=self.Columns, ) if dimension_organization_type is not None: self.DimensionOrganizationType = dimension_organization_type.value # Find indices such that empty planes are removed if omit_empty_frames: - included_plane_indices, is_empty = \ - self._get_nonempty_plane_indices(pixel_array) + if tile_pixel_array: + included_plane_indices, is_empty = \ + self._get_nonempty_tile_indices( + pixel_array, + plane_positions=plane_positions, + rows=self.Rows, + columns=self.Columns, + ) + else: + included_plane_indices, is_empty = \ + self._get_nonempty_plane_indices(pixel_array) if is_empty: # Cannot omit empty frames when all frames are empty omit_empty_frames = False - included_plane_indices = list(range(pixel_array.shape[0])) + included_plane_indices = list(range(len(plane_positions))) else: # Remove all empty plane positions from the list of sorted # plane position indices @@ -1549,10 +1594,7 @@ def __init__( if ind in included_plane_indices_set ] else: - if tile_pixel_array: - included_plane_indices = list(range(len(plane_positions))) - else: - included_plane_indices = list(range(pixel_array.shape[0])) + included_plane_indices = list(range(len(plane_positions))) if has_ref_frame_uid: # Get unique values of attributes in the Plane Position Sequence or @@ -2018,6 +2060,9 @@ def _check_dimension_organization_type( are_spatial_locations_preserved: bool, omit_empty_frames: bool, source_image: Dataset, + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, ) -> Optional[DimensionOrganizationTypeValues]: """Checks that the specified Dimension Organization Type is valid. @@ -2031,9 +2076,15 @@ def _check_dimension_organization_type( Whether spatial locations are preserved between the source image and the segmentation pixel array. omit_empty_frames: bool - Whether it was specified to omit empty frames. + Whether it was specified to omit empty frames. source_image: pydicom.Dataset - Representative dataset of the source images. + Representative dataset of the source images. + plane_positions: Sequence[highdicom.PlanePositionSequence] + Plane positions of all frames. + rows: int + Number of rows in each frame of the segmentation image. + columns: int + Number of columns in each frame of the segmentation image. Returns ------- @@ -2069,25 +2120,18 @@ def _check_dimension_organization_type( dimension_organization_type == DimensionOrganizationTypeValues.TILED_FULL ): - # Only allow TILED_FULL if the source image is TILED_FULL - # and spatial locations are preserved. This could be - # relaxed in the future by checking the plane positions. - if ( - not hasattr(source_image, 'DimensionOrganizationType') or - source_image.DimensionOrganizationType != 'TILED_FULL' + if not are_plane_positions_tiled_full( + plane_positions, + rows, + columns, ): raise ValueError( 'A value of "TILED_FULL" for parameter ' '"dimension_organization_type" is not permitted unless ' - 'the source images also have ' - 'DimensionOrganizationType of "TILED_FULL".' - ) - if not are_spatial_locations_preserved: - raise ValueError( - 'A value of "TILED_FULL" for parameter ' - '"dimension_organization_type" is not permitted if ' 'the "plane_positions" of the segmentation do not ' - 'match the plane positions of the source image.' + 'do not follow the relevant requirements. See ' + 'https://dicom.nema.org/medical/dicom/current/output/' + 'chtml/part03/sect_C.7.6.17.3.html#sect_C.7.6.17.3.' ) if omit_empty_frames: raise ValueError( @@ -2265,6 +2309,68 @@ def _get_nonempty_plane_indices( return (source_image_indices, False) + @staticmethod + def _get_nonempty_tile_indices( + pixel_array: np.ndarray, + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, + ) -> Tuple[List[int], bool]: + """Get a list of all indices of tile locations that are non-empty. + + This is similar to _get_nonempty_plane_indices, but works on a total + pixel matrix rather than a set of frames. Empty planes (without any + positive pixels in any of the segments) do not need to be included in + the segmentation image. This method finds a list of indices of the + input frames that are non-empty, and therefore should be included in + the segmentation image. + + Parameters + ---------- + pixel_array: numpy.ndarray + Segmentation pixel array + plane_positions: Sequence[highdicom.PlanePositionSequence] + Plane positions of each tile. + rows: int + Number of rows in each tile. + columns: int + Number of columns in each tile. + + Returns + ------- + included_plane_indices : List[int] + List giving for each plane position in the resulting segmentation + image the index of the corresponding frame in the original pixel + array. + is_empty: bool + Whether the entire image is empty. If so, empty frames should not + be omitted. + + """ + # This list tracks which source image each non-empty frame came from + source_image_indices = [ + i for i, pos in enumerate(plane_positions) + if np.any( + get_tile_array( + pixel_array[0], + row_offset=pos[0].RowPositionInTotalImagePixelMatrix, + column_offset=pos[0].ColumnPositionInTotalImagePixelMatrix, + tile_rows=rows, + tile_columns=columns, + ) + ) + ] + + if len(source_image_indices) == 0: + logger.warning( + 'Encoding an empty segmentation with "omit_empty_frames" ' + 'set to True. Reverting to encoding all frames since omitting ' + 'all frames is not possible.' + ) + return (list(range(len(plane_positions))), True) + + return (source_image_indices, False) + @staticmethod def _get_segment_pixel_array( pixel_array: np.ndarray, @@ -2717,8 +2823,8 @@ def _build_luts(self) -> None: all_referenced_sops = {uids[2] for uids in referenced_uids} is_tiled_full = ( - hasattr(self, 'DimensionOrganizationType') - and self.DimensionOrganizationType == 'TILED_FULL' + hasattr(self, 'DimensionOrganizationType') and + self.DimensionOrganizationType == 'TILED_FULL' ) segment_numbers = [] @@ -2742,10 +2848,6 @@ def _build_luts(self) -> None: ptr: [] for ptr in self._dim_ind_pointers } - # Create a list of source images and check for spatial locations - # preserved and that there is a single source frame per seg frame - locations_list_type = List[Optional[SpatialLocationsPreservedValues]] - locations_preserved: locations_list_type = [] self._single_source_frame_per_seg_frame = True if is_tiled_full: @@ -2775,7 +2877,18 @@ def _build_luts(self) -> None: dim_indices[y_tag], dim_indices[z_tag], ) = zip(*iter_tiled_full_frame_data(self)) + + # There is no way to deduce whether the spatial locations are + # preserved in the tiled full case + self._locations_preserved = None else: + # Create a list of source images and check for spatial locations + # preserved + locations_list_type = List[ + Optional[SpatialLocationsPreservedValues] + ] + locations_preserved: locations_list_type = [] + for frame_item in self.PerFrameFunctionalGroupsSequence: # Get segment number for this frame seg_id_seg = frame_item.SegmentIdentificationSequence[0] @@ -2783,7 +2896,8 @@ def _build_luts(self) -> None: segment_numbers.append(int(seg_num)) # Get dimension indices for this frame - indices = frame_item.FrameContentSequence[0].DimensionIndexValues + content_seq = frame_item.FrameContentSequence[0] + indices = content_seq.DimensionIndexValues if not isinstance(indices, (MultiValue, list)): # In case there is a single dimension index indices = [indices] @@ -2791,8 +2905,8 @@ def _build_luts(self) -> None: # (+1 because referenced segment number is ignored) raise RuntimeError( 'Unexpected mismatch between dimension index values in ' - 'per-frames functional groups sequence and items in the ' - 'dimension index sequence.' + 'per-frames functional groups sequence and items in ' + 'the dimension index sequence.' ) for ptr in self._dim_ind_pointers: dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) @@ -2842,35 +2956,37 @@ def _build_luts(self) -> None: ref_instance_uid = frame_source_instances[0] if ref_instance_uid not in all_referenced_sops: raise AttributeError( - f'SOP instance {ref_instance_uid} referenced in the ' - 'source image sequence is not included in the ' + f'SOP instance {ref_instance_uid} referenced in ' + 'the source image sequence is not included in the ' 'Referenced Series Sequence or Studies Containing ' 'Other Referenced Instances Sequence. This is an ' - 'error with the integrity of the Segmentation object.' + 'error with the integrity of the Segmentation ' + 'object.' ) referenced_instances.append(ref_instance_uid) referenced_frames.append(frame_source_frames[0]) - # Summarise - if any( - isinstance(v, SpatialLocationsPreservedValues) and - v == SpatialLocationsPreservedValues.NO - for v in locations_preserved - ): - Type = Optional[SpatialLocationsPreservedValues] - self._locations_preserved: Type = SpatialLocationsPreservedValues.NO - elif all( - isinstance(v, SpatialLocationsPreservedValues) and - v == SpatialLocationsPreservedValues.YES - for v in locations_preserved - ): - self._locations_preserved = SpatialLocationsPreservedValues.YES - else: - self._locations_preserved = None + # Summarise + if any( + isinstance(v, SpatialLocationsPreservedValues) and + v == SpatialLocationsPreservedValues.NO + for v in locations_preserved + ): + Type = Optional[SpatialLocationsPreservedValues] + self._locations_preserved: Type = \ + SpatialLocationsPreservedValues.NO + elif all( + isinstance(v, SpatialLocationsPreservedValues) and + v == SpatialLocationsPreservedValues.YES + for v in locations_preserved + ): + self._locations_preserved = SpatialLocationsPreservedValues.YES + else: + self._locations_preserved = None - if not self._single_source_frame_per_seg_frame: - referenced_instances = None - referenced_frames = None + if not self._single_source_frame_per_seg_frame: + referenced_instances = None + referenced_frames = None self._db_man = _SegDBManager( referenced_uids=referenced_uids, @@ -3492,7 +3608,17 @@ def _check_indexing_with_source_frames( """ # Checks that it is possible to index using source frames in this # dataset - if self._locations_preserved is None: + is_tiled_full = ( + hasattr(self, 'DimensionOrganizationType') and + self.DimensionOrganizationType == 'TILED_FULL' + ) + if is_tiled_full: + raise RuntimeError( + 'Indexing via source frames is not possible when a ' + 'segmentation is stored using the DimensionOrganizationType ' + '"TILED_FULL".' + ) + elif self._locations_preserved is None: if not ignore_spatial_locations: raise RuntimeError( 'Indexing via source frames is not permissible since this ' diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index cded0316..7470c7b4 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -436,3 +436,67 @@ def is_tiled_image(dataset: Dataset) -> bool: ): return True return False + + +def are_plane_positions_tiled_full( + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, +) -> bool: + """Determine whether a list of plane positions matches "TILED_FULL". + + This takes a list of plane positions for each frame and determines whether + the plane positions satisfy the requirements of "TILED_FULL". Plane + positions match the TILED_FULL dimension organization type if they are + non-overlapping, and cover the entire image plane in the order specified in + the standard. + + The test implemented in this function is necessary and sufficient for the + use of TILED_FULL in a newly created tiled image (thus allowing the plane + positions to be omitted from the image and defined implicitly). + + Parameters + ---------- + plane_positions: Sequence[PlanePositionSequence] + Plane positions of each frame. + rows: int + Number of rows in each frame. + columns: int + Number of columns in each frame. + + Returns + ------- + bool: + True if the supplied plane positions satisfy the requirements for + TILED_FULL. False otherwise. + + """ + max_r = -1 + max_c = -1 + for plane_position in plane_positions: + r = plane_position[0].RowPositionInTotalImagePixelMatrix + c = plane_position[0].ColumnPositionInTotalImagePixelMatrix + if r > max_r: + max_r = r + if c > max_c: + max_c = c + + expected_positions = [ + (r, c) for (r, c) in itertools.product( + range(1, max_r + 1, rows), + range(1, max_c + 1, columns), + ) + ] + if len(expected_positions) != len(plane_positions): + return False + + for (r_exp, c_exp), plane_position in zip( + expected_positions, + plane_positions + ): + r = plane_position[0].RowPositionInTotalImagePixelMatrix + c = plane_position[0].ColumnPositionInTotalImagePixelMatrix + if r != r_exp or c != c_exp: + return False + + return True diff --git a/tests/test_ann.py b/tests/test_ann.py index e0933ee1..43d3ae8e 100644 --- a/tests/test_ann.py +++ b/tests/test_ann.py @@ -478,7 +478,10 @@ def test_construction(self): annotations.annotation_coordinate_type, AnnotationCoordinateTypeValues ) - assert annotations.annotation_coordinate_type == annotation_coordinate_type + assert ( + annotations.annotation_coordinate_type == + annotation_coordinate_type + ) retrieved_groups = annotations.get_annotation_groups() assert len(retrieved_groups) == 2 diff --git a/tests/test_seg.py b/tests/test_seg.py index ed7eac28..6b8b3432 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -3,6 +3,7 @@ from copy import deepcopy import unittest from pathlib import Path +import warnings import numpy as np import pytest @@ -25,7 +26,10 @@ PixelMeasuresSequence, PlaneOrientationSequence, ) -from highdicom.enum import CoordinateSystemNames +from highdicom.enum import ( + CoordinateSystemNames, + DimensionOrganizationTypeValues, +) from highdicom.seg import ( segread, DimensionIndexSequence, @@ -658,6 +662,16 @@ def setUp(self): ) self._sm_pixel_array[2:3, 1:5, 7:9] = True + # Total pixel matrix segmentation array for tests + self._sm_total_pixel_array = np.zeros( + ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ), + dtype=bool + ) + self._sm_total_pixel_array[45:60, 5:70] = True + # A series of single frame CT images ct_series = [ dcmread(f) @@ -1410,7 +1424,7 @@ def test_construction_7(self): def test_construction_workers(self): # Create a segmentation with multiple workers - instance = Segmentation( + Segmentation( self._ct_series, self._ct_series_mask_array, SegmentationTypeValues.FRACTIONAL.value, @@ -1431,9 +1445,9 @@ def test_construction_workers(self): def test_construction_workers_manual(self): # Create a segmentation with multiple workers created manually with ProcessPoolExecutor(2) as pool: - instance = Segmentation( - self._ct_series, - self._ct_series_mask_array, + Segmentation( + self._ct_series, + self._ct_series_mask_array, SegmentationTypeValues.FRACTIONAL.value, self._segment_descriptions, self._series_instance_uid, @@ -1467,6 +1481,115 @@ def test_construction_tiled_full(self): omit_empty_frames=False, ) assert instance.DimensionOrganizationType == "TILED_FULL" + assert not hasattr(instance, "PerFrameFunctionalGroupsSequence") + + @staticmethod + @pytest.fixture( + params=[ + DimensionOrganizationTypeValues.TILED_FULL, + DimensionOrganizationTypeValues.TILED_SPARSE, + ]) + def dimension_organization_type(request): + return request.param + + @staticmethod + @pytest.fixture( + params=[ + SegmentationTypeValues.FRACTIONAL, + SegmentationTypeValues.BINARY, + ]) + def segmentation_type(request): + return request.param + + @staticmethod + @pytest.fixture( + params=[ + None, + (10, 10), + (10, 25), + (25, 25), + (30, 30), + ]) + def tile_size(request): + return request.param + + @staticmethod + @pytest.fixture(params=[False, True]) + def locations_preserved(request): + return request.param + + # with and without omitting frames + # with tiled full and tiled sparse + # single and multiple segments + # with and without spatial locations preserved + def test_construction_autotile( + self, + tile_size, + dimension_organization_type, + segmentation_type, + locations_preserved, + ): + + if locations_preserved: + pixel_measures = None + plane_orientation = None + plane_positions = None + else: + pixel_measures = PixelMeasuresSequence( + pixel_spacing=(0.0001, 0.0001), + slice_thickness=0.001, + ) + plane_orientation = PlaneOrientationSequence( + coordinate_system="SLIDE", + image_orientation=[0.0, -1.0, 0.0, 1.0, 0.0, 0.0] + ) + plane_positions = [ + PlanePositionSequence( + coordinate_system="SLIDE", + image_position=[1.1234, -5.4323214, 0.0], + pixel_matrix_position=(1, 1), + ) + ] + + if dimension_organization_type.value == "TILED_FULL": + # Cannot omit empty frames with TILED_FULL + omit_empty_frames_values = [False] + else: + omit_empty_frames_values = [False, True] + + for omit_empty_frames in omit_empty_frames_values: + instance = Segmentation( + [self._sm_image], + pixel_array=self._sm_total_pixel_array, + segmentation_type=segmentation_type, + segment_descriptions=self._segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number, + dimension_organization_type=dimension_organization_type, + omit_empty_frames=omit_empty_frames, + plane_orientation=plane_orientation, + plane_positions=plane_positions, + pixel_measures=pixel_measures, + tile_pixel_array=True, + tile_size=tile_size, + ) + assert ( + instance.DimensionOrganizationType == + dimension_organization_type.value + ) + if tile_size is not None: + assert instance.Rows == tile_size[0] + assert instance.Columns == tile_size[1] + + with warnings.catch_warnings(record=True) as w: + self.get_array_after_writing(instance) + assert len(w) == 0 def test_pixel_types_fractional( self, diff --git a/tests/test_utils.py b/tests/test_utils.py index 054f5ef3..ebea82ac 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,12 +4,19 @@ from pydicom import dcmread from pydicom.dataset import Dataset +from pydicom.uid import VLWholeSlideMicroscopyImageStorage import pytest from highdicom import PlanePositionSequence +from highdicom.sr import CodedConcept from highdicom.enum import CoordinateSystemNames -from highdicom.utils import compute_plane_position_tiled_full, is_tiled_image +from highdicom.utils import ( + compute_plane_position_tiled_full, + compute_plane_position_slide_per_frame, + is_tiled_image, + are_plane_positions_tiled_full, +) params_plane_positions = [ @@ -167,10 +174,11 @@ def test_is_tiled_image(filepath, expected_output): assert is_tiled_image(dcm) == expected_output -def compute_plane_position_slide_per_frame(): +def test_compute_plane_position_slide_per_frame(): iterator = itertools.product(range(1, 4), range(1, 3)) for num_optical_paths, num_focal_planes in iterator: image = Dataset() + image.SOPClassUID = VLWholeSlideMicroscopyImageStorage image.Rows = 4 image.Columns = 4 image.TotalPixelMatrixRows = 16 @@ -185,8 +193,23 @@ def compute_plane_position_slide_per_frame(): pixel_measures_item.SpacingBetweenSlices = 1.0 shared_fg_item.PixelMeasuresSequence = [pixel_measures_item] image.SharedFunctionalGroupsSequence = [shared_fg_item] + origin_item = Dataset() + origin_item.XOffsetInSlideCoordinateSystem = 0.0 + origin_item.YOffsetInSlideCoordinateSystem = 0.0 + image.TotalPixelMatrixOriginSequence = [origin_item] + image.DimensionOrganizationType = "TILED_FULL" + optical_path_item = Dataset() + optical_path_item.OpticalPathIdentifier = '1' + optical_path_item.IlluminationTypeCodeSequence = [ + CodedConcept( + value="111744", + meaning="Brightfield illumination", + scheme_designator="DCM", + ) + ] + image.OpticalPathSequence = [optical_path_item] - plane_positions = compute_plane_position_tiled_full(image) + plane_positions = compute_plane_position_slide_per_frame(image) tiles_per_column = math.ceil(image.TotalPixelMatrixRows / image.Rows) tiles_per_row = math.ceil(image.TotalPixelMatrixColumns / image.Columns) @@ -196,3 +219,40 @@ def compute_plane_position_slide_per_frame(): tiles_per_row, tiles_per_column ]) + + +def test_are_plane_positions_tiled_full(): + + sm_path = Path(__file__).parents[1].joinpath( + 'data/test_files/sm_image.dcm' + ) + sm_image = dcmread(sm_path) + + # The plane positions from a TILED_FULL image should satsify the + # requirements + plane_positions = compute_plane_position_slide_per_frame(sm_image) + assert are_plane_positions_tiled_full( + plane_positions, + sm_image.Rows, + sm_image.Columns, + ) + + # If a plane is missing, it should not satisfy the requirements + plane_positions_missing = plane_positions[:5] + plane_positions[6:] + assert not are_plane_positions_tiled_full( + plane_positions_missing, + sm_image.Rows, + sm_image.Columns, + ) + + # If a plane is misordered, it should not satisfy the requirements + plane_positions_misordered = [ + plane_positions[1], + plane_positions[0], + *plane_positions[2:] + ] + assert not are_plane_positions_tiled_full( + plane_positions_misordered, + sm_image.Rows, + sm_image.Columns, + ) From f15aa1aef25a4de118b5356c3f4277daac149fcc Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sat, 2 Sep 2023 10:30:48 -0400 Subject: [PATCH 55/96] Work in progress implementation of constructing tiled outputs --- src/highdicom/seg/sop.py | 116 +++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 28 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 1f5fd5cf..f53a994e 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -3295,9 +3295,10 @@ def segmented_property_types(self) -> List[CodedConcept]: def _get_pixels_by_seg_frame( self, - num_output_frames: int, + output_shape: Union[int, Tuple[int, int]], indices_iterator: Iterable[Tuple[int, int, int]], segment_numbers: np.ndarray, + tiled_output: bool = False, combine_segments: bool = False, relabel: bool = False, rescale_fractional: bool = True, @@ -3312,17 +3313,29 @@ def _get_pixels_by_seg_frame( Parameters ---------- - num_output_frames: int - Number of frames in the output array. - indices_iterator: Iterable[Tuple[int, int, int]], - An iterable object that yields tuples of (out_frame_index, - seg_frame_index, output_segment_number) that describes how to - construct the desired output pixel array from the segmentation - image's pixel array. out_frame_index is the (0-based) index of a - frame of the output array. 'seg_frame_index' is the (0-based) - frame index of a frame of the segmentation image that should be - placed into that output frame with as segment number + output_shape: Union[int, Tuple[int, int]] + Shape of the output array. If tiled_output is False, this is the + number of frames in the output array. If tiled_output is True, it + is a tuple containing the number of (rows, columns) in the output + array. + indices_iterator: Union[Iterable[Tuple[int, int, int]], Iterable[Tuple[Tuple[int, int], int, int]]] + An iterable object that yields tuples of ((output_row_position, + output_column_position), seg_frame_index, output_segment_number) (if + tiled_output is True) or (out_frame_index, seg_frame_index, + output_segment_number) (if tiled_output is False) that describes + how to construct the desired output pixel array from the + segmentation image's pixel array. 'out_frame_index' is the + (0-based) index of a frame of the output array. + 'output_row_position' and 'output_column_position' give the + (0-based) position in the full output array of the top left pixel + of the segmentation frame. 'seg_frame_index' is the (0-based) frame + index of a frame of the segmentation image that should be placed + into that output frame (or tile position) with as segment number 'output_segment_number'. + tiled_output: bool, optional + Whether the output array is (a region of) a total pixel matrix + formed by tiling frames in two dimensions. If False, the output + array is a 3D stack of frames. segment_numbers: np.ndarray One dimensional numpy array containing segment numbers corresponding to the columns of the seg frames matrix. @@ -3415,6 +3428,23 @@ def _get_pixels_by_seg_frame( else: _, h, w = self.pixel_array.shape + def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: + if ro + h <= output_shape[0]: + r_end = ro + h + r_in = h + else: + r_end = output_shape[0] + r_in = output_shape[0] - (ro + h) + + if co + w <= output_shape[1]: + c_end = co + w + c_in = w + else: + c_end = output_shape[1] + c_in = output_shape[1] - (co + w) + + return r_end, r_in, c_end, c_in + if combine_segments: # Check whether segmentation is binary, or fractional with only # binary values @@ -3448,47 +3478,77 @@ def _get_pixels_by_seg_frame( pixel_array = pixel_array[None, :, :] # Initialize empty pixel array + full_output_shape = ( + output_shape if tiled_output else (output_shape, h, w) + ) out_array = np.zeros( - (num_output_frames, h, w), + full_output_shape, dtype=intermediate_dtype ) # Loop over the supplied iterable - for fo, fi, seg_n in indices_iterator: + for output_location, fi, seg_n in indices_iterator: pix_value = intermediate_dtype.type(seg_n) + + if tiled_output: + ro, co = output_location + r_end, r_in, c_end, c_in = _get_tiled_indices(ro, co) + + input_indexer = (fi, slice(None, r_in), slice(None, c_in)) + output_indexer = (slice(ro, r_end), slice(co, c_end)) + else: + fo = output_location + input_indexer = (fi, slice(None), slice(None)) + output_indexer = (fo, slice(None), slice(None)) + if not skip_overlap_checks: if np.any( np.logical_and( - pixel_array[fi, :, :] > 0, - out_array[fo, :, :] > 0 + pixel_array[input_indexer] > 0, + out_array[output_indexer] > 0 ) ): raise RuntimeError( "Cannot combine segments because segments " "overlap." ) - out_array[fo, :, :] = np.maximum( - pixel_array[fi, :, :] * pix_value, - out_array[fo, :, :] + out_array[output_indexer] = np.maximum( + pixel_array[input_indexer] * pix_value, + out_array[output_indexer] ) else: # Initialize empty pixel array + full_output_shape = ( + (*output_shape, num_segments) if tiled_output else + (output_shape, h, w, num_segments) + ) out_array = np.zeros( - (num_output_frames, h, w, num_segments), - intermediate_dtype + full_output_shape, + dtype=intermediate_dtype ) - # Loop through output frames - for fo, fi, seg_n in indices_iterator: + # loop through output frames + for output_location, fi, seg_n in indices_iterator: + if tiled_output: + ro, co = output_location + r_end, r_in, c_end, c_in = _get_tiled_indices(ro, co) + + input_indexer = (fi, slice(None, r_in), slice(None, c_in)) + output_indexer = (slice(ro, r_end), slice(co, c_end), seg_n) + else: + fo = output_location + input_indexer = (fi, slice(None), slice(None)) + output_indexer = (fo, slice(None), slice(None), seg_n) + # Copy data to to output array if self.pixel_array.ndim == 2: # Special case with a single segmentation frame - out_array[fo, :, :, seg_n] = \ + out_array[output_indexer] = \ self.pixel_array.copy() else: - out_array[fo, :, :, seg_n] = \ - self.pixel_array[fi, :, :].copy() + out_array[output_indexer] = \ + self.pixel_array[input_indexer].copy() if rescale_fractional: if self.segmentation_type == SegmentationTypeValues.FRACTIONAL: @@ -3848,7 +3908,7 @@ def get_pixels_by_source_instance( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(source_sop_instance_uids), + output_shape=len(source_sop_instance_uids), indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, @@ -4102,7 +4162,7 @@ def get_pixels_by_source_frame( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(source_frame_numbers), + output_shape=len(source_frame_numbers), indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, @@ -4351,7 +4411,7 @@ def get_pixels_by_dimension_index_values( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(dimension_index_values), + output_shape=len(dimension_index_values), indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, From 6109eed35588bff4bc6566f09e070caa83b273ca Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 4 Sep 2023 13:53:58 -0400 Subject: [PATCH 56/96] Working implementation of tiled outputs, need tests --- data/test_files/seg_image_sm_control.dcm | Bin 20730 -> 20720 bytes src/highdicom/seg/sop.py | 723 +++++++++++++++++++---- 2 files changed, 601 insertions(+), 122 deletions(-) diff --git a/data/test_files/seg_image_sm_control.dcm b/data/test_files/seg_image_sm_control.dcm index 0bcc183c4a73d16ffc1d3c7f648db4f8364a233b..bf695236c632163863a707d74b5eb252f8f8c2d2 100644 GIT binary patch delta 295 zcmeyhknzJp#tki+%xP*2lNF5@Y*yFuXJmXaSX}&@8kn0JK)~b+z6z7~SjlWo(w)Hx)LG~>*8&2XezK%*u2MT3(%^~g)ZfcNcMbm#i2#P%?awTzi!A5+wYA-1IS?lUOZrL J`FX72000jkS&#q# delta 322 zcmeycknz_-#tki+%mr!;lM{6pY*yFuXJmXeIZ%J*WC5LfoDmER4FCQ!JP4RP-&ayh z2*@)s)ibj+G%z=@gaV7n4K^Z^Z~5{}ws4c#oS-{{6RdyMjtYrE}^Q`-iAh0Pis GCpZ97rDa$E diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index f53a994e..5f1bf700 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -25,7 +25,7 @@ import numpy as np from pydicom.dataset import Dataset -from pydicom.datadict import keyword_for_tag, tag_for_keyword +from pydicom.datadict import get_entry, keyword_for_tag, tag_for_keyword from pydicom.encaps import encapsulate from pydicom.multival import MultiValue from pydicom.pixel_data_handlers.numpy_handler import pack_bits @@ -59,6 +59,7 @@ from highdicom.utils import ( are_plane_positions_tiled_full, compute_plane_position_tiled_full, + is_tiled_image, get_tile_array, iter_tiled_full_frame_data, tile_pixel_matrix, @@ -149,11 +150,34 @@ class _SegDBManager: """Database manager for data associated with a segmentation image.""" + # Dictionary mapping DCM VRs to appropriate SQLite types + _DCM_SQL_TYPE_MAP = { + 'CS': 'VARCHAR', + 'DS': 'REAL', + 'FD': 'REAL', + 'FL': 'REAL', + 'IS': 'INTEGER', + 'LO': 'TEXT', + 'LT': 'TEXT', + 'PN': 'TEXT', + 'SH': 'TEXT', + 'SL': 'INTEGER', + 'SS': 'INTEGER', + 'ST': 'TEXT', + 'UI': 'TEXT', + 'UL': 'INTEGER', + 'UR': 'TEXT', + 'US or SS': 'INTEGER', + 'US': 'INTEGER', + 'UT': 'TEXT', + } + def __init__( self, referenced_uids: List[Tuple[str, str, str]], segment_numbers: List[int], dim_indices: Dict[int, List[int]], + dim_values: Dict[int, List[Any]], referenced_instances: Optional[List[str]], referenced_frames: Optional[List[int]], ): @@ -170,7 +194,11 @@ def __init__( dim_indices: Dict[int, List[int]] Dictionary mapping the integer tag value of each dimension index pointer (excluding SegmentNumber) to a list of dimension indices - for each frames in the segmentation image. + for each frame in the segmentation image. + dim_values: Dict[int, List[Values]] + Dictionary mapping the integer tag value of each dimension index + pointer (excluding SegmentNumber) to a list of dimension values + for each frame in the segmentation image. referenced_instances: Optional[List[str]] SOP Instance UID of each referenced image instance for each frame in the segmentation image. Should be omitted if there is not a @@ -188,14 +216,6 @@ def __init__( self._number_of_frames = len(segment_numbers) - self._dim_ind_col_names = {} - for i, t in enumerate(dim_indices.keys()): - kw = keyword_for_tag(t) - if kw == '': - kw = f'UnknownDimensionIndex{i}' - col_name = kw + '_DimensionIndexValues' - self._dim_ind_col_names[t] = col_name - # Construct the columns and values to put into a frame look-up table # table within sqlite. There will be one row per frame in the # segmentation instance @@ -210,12 +230,40 @@ def __init__( col_defs.append('SegmentNumber INTEGER NOT NULL') col_data.append(segment_numbers) - # Columns for other dimension index values - col_defs += [ - f'{col_name} INTEGER NOT NULL' - for col_name in self._dim_ind_col_names.values() - ] - col_data.extend(list(dim_indices.values())) + self._dim_ind_col_names = {} + for i, t in enumerate(dim_indices.keys()): + vr, vm_str, _, _, kw = get_entry(t) + if kw == '': + kw = f'UnknownDimensionIndex{i}' + ind_col_name = kw + '_DimensionIndexValues' + self._dim_ind_col_names[t] = ind_col_name + + # Add column for dimension index + col_defs.append(f'{ind_col_name} INTEGER NOT NULL') + col_data.append(dim_indices[t]) + + # Add column for dimension value + # For this to be possible, must have a fixed VM + # and a VR that we can map to a sqlite type + # Otherwise, we just omit the data from the db + try: + vm = int(vm_str) + except ValueError: + continue + try: + sql_type = self._DCM_SQL_TYPE_MAP[vr] + except KeyError: + continue + + if vm > 1: + for d in range(vm): + data = [el[d] for el in dim_values[t]] + col_defs.append(f'{kw}_{d} {sql_type} NOT NULL') + col_data.append(data) + else: + # Single column + col_defs.append(f'{kw} {sql_type} NOT NULL') + col_data.append(dim_values[t]) # Columns related to source frames, if they are usable for indexing if (referenced_frames is None) != (referenced_instances is None): @@ -441,6 +489,23 @@ def get_unique_dim_index_values( ) } + def is_indexable_as_total_pixel_matrix(self) -> bool: + """Whether the segmentation can be indexed as a total pixel matrix. + + Returns + ------- + bool: + True if the segmentation may be indexed using row and column + positions in the total pixel matrix. False otherwise. + + """ + row_pos_kw = tag_for_keyword('RowPositionInTotalImagePixelMatrix') + col_pos_kw = tag_for_keyword('ColumnPositionInTotalImagePixelMatrix') + return ( + row_pos_kw in self._dim_ind_col_names and + col_pos_kw in self._dim_ind_col_names + ) + @contextmanager def _generate_temp_table( self, @@ -568,7 +633,17 @@ def iterate_indices_by_source_instance( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over segmentation frame indices for given source image instances. @@ -607,15 +682,17 @@ def iterate_indices_by_source_instance( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Run query to create the iterable of indices needed to construct the # desired pixel array. The approach here is to create two temporary # tables in the SQLite database, one for the desired source UIDs, and @@ -657,7 +734,14 @@ def iterate_indices_by_source_instance( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) @contextmanager def iterate_indices_by_source_frame( @@ -667,7 +751,17 @@ def iterate_indices_by_source_frame( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over frame indices for given source image frames. This is intended for the case of a segmentation image that references a @@ -708,15 +802,17 @@ def iterate_indices_by_source_frame( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Run query to create the iterable of indices needed to construct the # desired pixel array. The approach here is to create two temporary # tables in the SQLite database, one for the desired frame numbers, and @@ -758,7 +854,14 @@ def iterate_indices_by_source_frame( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) @contextmanager def iterate_indices_by_dimension_index_values( @@ -768,7 +871,17 @@ def iterate_indices_by_dimension_index_values( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over frame indices for given dimension index values. This is intended to be the most flexible and lowest-level (and there @@ -811,15 +924,17 @@ def iterate_indices_by_dimension_index_values( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Create temporary table of desired dimension indices table_name = 'TemporaryDimensionIndexValues' @@ -862,7 +977,160 @@ def iterate_indices_by_dimension_index_values( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) + + @contextmanager + def iterate_indices_for_tiled_region( + self, + row_start: int, + row_end: int, + column_start: int, + column_end: int, + tile_shape: Tuple[int, int], + segment_numbers: Sequence[int], + combine_segments: bool = False, + relabel: bool = False, + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: + """Iterate over segmentation frame indices for a given region of the + segmentation's total pixel matrix. + + This is intended for the case of a segmentation image that is stored as + a tiled representation of total pixel matrix. + + This yields an iterator to the underlying database result that iterates + over information on the steps required to construct the requested + segmentation mask from the stored frames of the segmentation image. + + This method is intended to be used as a context manager that yields the + requested iterator. The iterator is only valid while the context + manager is active. + + Parameters + ---------- + row_start: int + Row index (1-based) in the total pixel matrix of the first row of + the output array. May be negative (last row is -1). + row_end: int + Row index (1-based) in the total pixel matrix one beyond the last + row of the output array. May be negative (last row is -1). + column_start: int + Column index (1-based) in the total pixel matrix of the first + column of the output array. May be negative (last column is -1). + column_end: int + Column index (1-based) in the total pixel matrix one beyond the last + column of the output array. May be negative (last column is -1). + tile_shape: Tuple[int, int] + Shape of each tile (rows, columns). + segment_numbers: Sequence[int] + Numbers of segments to include. + combine_segments: bool, optional + If True, produce indices to combine the different segments into a + single label map in which the value of a pixel represents its + segment. If False (the default), segments are binary and stacked + down the last dimension of the output array. + relabel: bool, optional + If True and ``combine_segments`` is ``True``, the output segment + numbers are relabelled into the range ``0`` to + ``len(segment_numbers)`` (inclusive) according to the position of + the original segment numbers in ``segment_numbers`` parameter. If + ``combine_segments`` is ``False``, this has no effect. + + Yields + ------ + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: + Indices required to construct the requested mask. Each + triplet denotes the (output indexer, segmentation indexer, + output segment number) representing a list of "instructions" to + create the requested output array by copying frames from the + segmentation dataset and inserting them into the output array with + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. + + """ # noqa: E501 + th, tw = tile_shape + + oh = row_end - row_start + ow = column_end - column_start + + row_offset_start = row_start - th + 1 + column_offset_start = column_start - tw + 1 + + # Construct the query The ORDER BY is not logically necessary + # but seems to improve performance of the downstream numpy + # operations, presumably as it is more cache efficient + query = ( + 'SELECT ' + ' L.RowPositionInTotalImagePixelMatrix,' + ' L.ColumnPositionInTotalImagePixelMatrix,' + ' L.FrameNumber - 1,' + ' S.OutputSegmentNumber ' + 'FROM FrameLUT L ' + 'INNER JOIN TemporarySegmentNumbers S' + ' ON L.SegmentNumber = S.SegmentNumber ' + 'WHERE (' + ' L.RowPositionInTotalImagePixelMatrix >= ' + f' {row_offset_start}' + f' AND L.RowPositionInTotalImagePixelMatrix < {row_end}' + ' AND L.ColumnPositionInTotalImagePixelMatrix >= ' + f' {column_offset_start}' + f' AND L.ColumnPositionInTotalImagePixelMatrix < {column_end}' + ')' + 'ORDER BY ' + ' L.RowPositionInTotalImagePixelMatrix,' + ' L.ColumnPositionInTotalImagePixelMatrix,' + ' S.OutputSegmentNumber' + ) + + with self._generate_temp_segment_table( + segment_numbers=segment_numbers, + combine_segments=combine_segments, + relabel=relabel + ): + yield ( + ( + ( + slice( + max(rp - row_start, 0), + min(rp + th - row_start, oh) + ), + slice( + max(cp - column_start, 0), + min(cp + tw - column_start, ow) + ), + ), + ( + fi, + slice( + max(row_start - rp, 0), + min(row_end - rp, th) + ), + slice( + max(column_start - cp, 0), + min(column_end - cp, tw) + ), + ), + seg_no + ) + for (rp, cp, fi, seg_no) in self._db_con.execute(query) + ) class Segmentation(SOPClass): @@ -2839,6 +3107,14 @@ def _build_luts(self) -> None: for dim_ind in self.DimensionIndexSequence if dim_ind.DimensionIndexPointer != seg_num_tag ] + + func_grp_pointers = {} + for dim_ind in self.DimensionIndexSequence: + ptr = dim_ind.DimensionIndexPointer + if ptr in self._dim_ind_pointers: + grp_ptr = getattr(dim_ind, "FunctionalGroupPointer", None) + func_grp_pointers[ptr] = grp_ptr + dim_ind_positions = { dim_ind.DimensionIndexPointer: i for i, dim_ind in enumerate(self.DimensionIndexSequence) @@ -2847,6 +3123,9 @@ def _build_luts(self) -> None: dim_indices: Dict[int, List[int]] = { ptr: [] for ptr in self._dim_ind_pointers } + dim_values: Dict[int, List[Any]] = { + ptr: [] for ptr in self._dim_ind_pointers + } self._single_source_frame_per_seg_frame = True @@ -2871,13 +3150,18 @@ def _build_luts(self) -> None: ( segment_numbers, _, - dim_indices[row_tag], - dim_indices[col_tag], - dim_indices[x_tag], - dim_indices[y_tag], - dim_indices[z_tag], + dim_values[row_tag], + dim_values[col_tag], + dim_values[x_tag], + dim_values[y_tag], + dim_values[z_tag], ) = zip(*iter_tiled_full_frame_data(self)) + # Create indices for each of the dimensions + for ptr, vals in dim_values.items(): + _, indices = np.unique(vals, return_inverse=True) + dim_indices[ptr] = (indices + 1).tolist() + # There is no way to deduce whether the spatial locations are # preserved in the tiled full case self._locations_preserved = None @@ -2910,6 +3194,12 @@ def _build_luts(self) -> None: ) for ptr in self._dim_ind_pointers: dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) + grp_ptr = func_grp_pointers[ptr] + if grp_ptr is not None: + dim_val = frame_item[grp_ptr][0][ptr].value + else: + dim_val = frame_item[ptr].value + dim_values[ptr].append(dim_val) frame_source_instances = [] frame_source_frames = [] @@ -2992,6 +3282,7 @@ def _build_luts(self) -> None: referenced_uids=referenced_uids, segment_numbers=segment_numbers, dim_indices=dim_indices, + dim_values=dim_values, referenced_instances=referenced_instances, referenced_frames=referenced_frames, ) @@ -3296,9 +3587,14 @@ def segmented_property_types(self) -> List[CodedConcept]: def _get_pixels_by_seg_frame( self, output_shape: Union[int, Tuple[int, int]], - indices_iterator: Iterable[Tuple[int, int, int]], + indices_iterator: Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], segment_numbers: np.ndarray, - tiled_output: bool = False, combine_segments: bool = False, relabel: bool = False, rescale_fractional: bool = True, @@ -3314,28 +3610,25 @@ def _get_pixels_by_seg_frame( Parameters ---------- output_shape: Union[int, Tuple[int, int]] - Shape of the output array. If tiled_output is False, this is the - number of frames in the output array. If tiled_output is True, it - is a tuple containing the number of (rows, columns) in the output - array. - indices_iterator: Union[Iterable[Tuple[int, int, int]], Iterable[Tuple[Tuple[int, int], int, int]]] - An iterable object that yields tuples of ((output_row_position, - output_column_position), seg_frame_index, output_segment_number) (if - tiled_output is True) or (out_frame_index, seg_frame_index, - output_segment_number) (if tiled_output is False) that describes - how to construct the desired output pixel array from the - segmentation image's pixel array. 'out_frame_index' is the - (0-based) index of a frame of the output array. - 'output_row_position' and 'output_column_position' give the - (0-based) position in the full output array of the top left pixel - of the segmentation frame. 'seg_frame_index' is the (0-based) frame - index of a frame of the segmentation image that should be placed - into that output frame (or tile position) with as segment number - 'output_segment_number'. - tiled_output: bool, optional - Whether the output array is (a region of) a total pixel matrix - formed by tiling frames in two dimensions. If False, the output - array is a 3D stack of frames. + Shape of the output array. If an integer is False, this is the + number of frames in the output array and the number of rows and + columns are taken to match those of each segmentation frame. If a + tuple of integers, it contains the number of (rows, columns) in the + output array and there is no frame dimension (this is the tiled + case). Note in either case, the segments dimension (if relevant) is + omitted. + indices_iterator: Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int ]] + An iterable object that yields tuples of (output_indexer, + segmentation_indexer, output_segment_number) that describes how to + construct the desired output pixel array from the segmentation + image's pixel array. 'output_indexer' is a tuple that may be used + directly to index the output array to place a single frame's pixels + into the output array. Similarly 'segmentation_indexer' is a tuple + that may be used directly to index the segmentation pixel array + to retrieve the pixels to place into the output array. + with as segment number 'output_segment_number'. Note that in both + cases the indexers access the frame, row and column dimensions of + the relevant array, but not the segment dimension (if relevant). segment_numbers: np.ndarray One dimensional numpy array containing segment numbers corresponding to the columns of the seg frames matrix. @@ -3376,7 +3669,7 @@ def _get_pixels_by_seg_frame( pixel_array: np.ndarray Segmentation pixel array - """ + """ # noqa: E501 if ( segment_numbers.min() < 1 or segment_numbers.max() > self.number_of_segments @@ -3428,23 +3721,6 @@ def _get_pixels_by_seg_frame( else: _, h, w = self.pixel_array.shape - def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: - if ro + h <= output_shape[0]: - r_end = ro + h - r_in = h - else: - r_end = output_shape[0] - r_in = output_shape[0] - (ro + h) - - if co + w <= output_shape[1]: - c_end = co + w - c_in = w - else: - c_end = output_shape[1] - c_in = output_shape[1] - (co + w) - - return r_end, r_in, c_end, c_in - if combine_segments: # Check whether segmentation is binary, or fractional with only # binary values @@ -3479,7 +3755,9 @@ def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: # Initialize empty pixel array full_output_shape = ( - output_shape if tiled_output else (output_shape, h, w) + output_shape + if isinstance(output_shape, tuple) + else (output_shape, h, w) ) out_array = np.zeros( full_output_shape, @@ -3487,24 +3765,13 @@ def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: ) # Loop over the supplied iterable - for output_location, fi, seg_n in indices_iterator: + for (output_indexer, seg_indexer, seg_n) in indices_iterator: pix_value = intermediate_dtype.type(seg_n) - if tiled_output: - ro, co = output_location - r_end, r_in, c_end, c_in = _get_tiled_indices(ro, co) - - input_indexer = (fi, slice(None, r_in), slice(None, c_in)) - output_indexer = (slice(ro, r_end), slice(co, c_end)) - else: - fo = output_location - input_indexer = (fi, slice(None), slice(None)) - output_indexer = (fo, slice(None), slice(None)) - if not skip_overlap_checks: if np.any( np.logical_and( - pixel_array[input_indexer] > 0, + pixel_array[seg_indexer] > 0, out_array[output_indexer] > 0 ) ): @@ -3513,33 +3780,28 @@ def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: "overlap." ) out_array[output_indexer] = np.maximum( - pixel_array[input_indexer] * pix_value, + pixel_array[seg_indexer] * pix_value, out_array[output_indexer] ) else: # Initialize empty pixel array full_output_shape = ( - (*output_shape, num_segments) if tiled_output else - (output_shape, h, w, num_segments) + (*output_shape, num_segments) + if isinstance(output_shape, tuple) + else (output_shape, h, w, num_segments) ) + print(full_output_shape) out_array = np.zeros( full_output_shape, dtype=intermediate_dtype ) # loop through output frames - for output_location, fi, seg_n in indices_iterator: - if tiled_output: - ro, co = output_location - r_end, r_in, c_end, c_in = _get_tiled_indices(ro, co) + for (output_indexer, seg_indexer, seg_n) in indices_iterator: - input_indexer = (fi, slice(None, r_in), slice(None, c_in)) - output_indexer = (slice(ro, r_end), slice(co, c_end), seg_n) - else: - fo = output_location - input_indexer = (fi, slice(None), slice(None)) - output_indexer = (fo, slice(None), slice(None), seg_n) + # Output indexer needs segment index + output_indexer = (*output_indexer, seg_n) # Copy data to to output array if self.pixel_array.ndim == 2: @@ -3548,7 +3810,7 @@ def _get_tiled_indices(ro: int, co: int) -> Tuple[int, int, int, int]: self.pixel_array.copy() else: out_array[output_indexer] = \ - self.pixel_array[input_indexer].copy() + self.pixel_array[seg_indexer].copy() if rescale_fractional: if self.segmentation_type == SegmentationTypeValues.FRACTIONAL: @@ -3999,19 +4261,19 @@ def get_pixels_by_source_frame( the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. ignore_spatial_locations: bool, optional - Ignore whether or not spatial locations were preserved in the - derivation of the segmentation frames from the source frames. In - some segmentation images, the pixel locations in the segmentation - frames may not correspond to pixel locations in the frames of the - source image from which they were derived. The segmentation image - may or may not specify whether or not spatial locations are - preserved in this way through use of the optional (0028,135A) - SpatialLocationsPreserved attribute. If this attribute specifies - that spatial locations are not preserved, or is absent from the - segmentation image, highdicom's default behavior is to disallow - indexing by source frames. To override this behavior and retrieve - segmentation pixels regardless of the presence or value of the - spatial locations preserved attribute, set this parameter to True. + Ignore whether or not spatial locations were preserved in the + derivation of the segmentation frames from the source frames. In + some segmentation images, the pixel locations in the segmentation + frames may not correspond to pixel locations in the frames of the + source image from which they were derived. The segmentation image + may or may not specify whether or not spatial locations are + preserved in this way through use of the optional (0028,135A) + SpatialLocationsPreserved attribute. If this attribute specifies + that spatial locations are not preserved, or is absent from the + segmentation image, highdicom's default behavior is to disallow + indexing by source frames. To override this behavior and retrieve + segmentation pixels regardless of the presence or value of the + spatial locations preserved attribute, set this parameter to True. assert_missing_frames_are_empty: bool, optional Assert that requested source frame numbers that are not referenced by the segmentation image contain no segments. If a source frame @@ -4421,6 +4683,223 @@ def get_pixels_by_dimension_index_values( dtype=dtype, ) + def get_total_pixel_matrix( + self, + row_start: int = 1, + row_end: Optional[int] = None, + column_start: int = 1, + column_end: Optional[int] = None, + segment_numbers: Optional[Sequence[int]] = None, + combine_segments: bool = False, + relabel: bool = False, + rescale_fractional: bool = True, + skip_overlap_checks: bool = False, + dtype: Union[type, str, np.dtype, None] = None, + ): + """Get the pixel array as a (region of) the total pixel matrix. + + This is intended for retrieving segmentation masks derived from + multi-frame (enhanced) source images that are tiled. The method + returns (a region of) the 2D total pixel matrix implied by the + frames within the segmentation. + + The output array will have 3 dimensions under the default behavior, and + 2 dimensions if ``combine_segments`` is set to ``True``. The first two + dimensions are the rows and columns of the total pixel matrix, + respectively. By default, the full total pixel matrix is returned, + however a smaller region may be requested using the ``row_start``, + ``row_end``, ``column_start`` and ``column_end`` parameters as 1-based + indices into the total pixel matrix. + + When ``combine_segments`` is ``False`` (the default behavior), the + segments are stacked down the final (3rd) dimension of the pixel array. + If ``segment_numbers`` was specified, then ``pixel_array[:, :, i]`` + represents the data for segment ``segment_numbers[i]``. If + ``segment_numbers`` was unspecified, then ``pixel_array[:, :, i]`` + represents the data for segment ``parser.segment_numbers[i]``. Note + that in neither case does ``pixel_array[:, :, i]`` represent + the segmentation data for the segment with segment number ``i``, since + segment numbers begin at 1 in DICOM. + + When ``combine_segments`` is ``True``, then the segmentation data from + all specified segments is combined into a multi-class array in which + pixel value is used to denote the segment to which a pixel belongs. + This is only possible if the segments do not overlap and either the + type of the segmentation is ``BINARY`` or the type of the segmentation + is ``FRACTIONAL`` but all values are exactly 0.0 or 1.0. the segments + do not overlap. If the segments do overlap, a ``RuntimeError`` will be + raised. After combining, the value of a pixel depends upon the + ``relabel`` parameter. In both cases, pixels that appear in no segments + with have a value of ``0``. If ``relabel`` is ``False``, a pixel that + appears in the segment with segment number ``i`` (according to the + original segment numbering of the segmentation object) will have a + value of ``i``. If ``relabel`` is ``True``, the value of a pixel in + segment ``i`` is related not to the original segment number, but to the + index of that segment number in the ``segment_numbers`` parameter of + this method. Specifically, pixels belonging to the segment with segment + number ``segment_numbers[i]`` is given the value ``i + 1`` in the + output pixel array (since 0 is reserved for pixels that belong to no + segments). In this case, the values in the output pixel array will + always lie in the range ``0`` to ``len(segment_numbers)`` inclusive. + + Parameters + ---------- + row_start: int, optional + 1-based row index in the total pixel matrix of the first row to + include in the output array. May be negative, in which case the + last row is considered index -1. + row_end: Union[int, None], optional + 1-based row index in the total pixel matrix of the first row beyond + the last row to include in the output array. A ``row_end`` value of + ``n`` will include rows ``n - 1`` and below, similar to standard + Python indexing. If ``None``, rows up until the final row of the + total pixel matrix are included. May be negative, in which case the + last row is considered index -1. + column_start: int, optional + 1-based column index in the total pixel matrix of the first column + to include in the output array. May be negative, in which case the + last column is considered index -1. + column_end: Union[int, None], optional + 1-based column index in the total pixel matrix of the first column + beyond the last column to include in the output array. A + ``column_end`` value of ``n`` will include columns ``n - 1`` and + below, similar to standard Python indexing. If ``None``, columns up + until the final column of the total pixel matrix are included. May + be negative, in which case the last column is considered index -1. + segment_numbers: Optional[Sequence[int]], optional + Sequence containing segment numbers to include. If unspecified, + all segments are included. + combine_segments: bool, optional + If True, combine the different segments into a single label + map in which the value of a pixel represents its segment. + If False (the default), segments are binary and stacked down the + last dimension of the output array. + relabel: bool, optional + If True and ``combine_segments`` is ``True``, the pixel values in + the output array are relabelled into the range ``0`` to + ``len(segment_numbers)`` (inclusive) according to the position of + the original segment numbers in ``segment_numbers`` parameter. If + ``combine_segments`` is ``False``, this has no effect. + rescale_fractional: bool + If this is a FRACTIONAL segmentation and ``rescale_fractional`` is + True, the raw integer-valued array stored in the segmentation image + output will be rescaled by the MaximumFractionalValue such that + each pixel lies in the range 0.0 to 1.0. If False, the raw integer + values are returned. If the segmentation has BINARY type, this + parameter has no effect. + skip_overlap_checks: bool + If True, skip checks for overlap between different segments. By + default, checks are performed to ensure that the segments do not + overlap. However, this reduces performance. If checks are skipped + and multiple segments do overlap, the segment with the highest + segment number (after relabelling, if applicable) will be placed + into the output array. + dtype: Union[type, str, numpy.dtype, None] + Data type of the returned array. If None, an appropriate type will + be chosen automatically. If the returned values are rescaled + fractional values, this will be numpy.float32. Otherwise, the + smallest unsigned integer type that accommodates all of the output + values will be chosen. + + Returns + ------- + pixel_array: np.ndarray + Pixel array representing the segmentation's total pixel matrix. + + Note + ---- + This method uses 1-based indexing of rows and columns in order to match + the conventions used in the DICOM standard. The first row of the total + pixel matrix is row 1, and the last is ``self.TotalPixelMatrixRows``. + This is is unlike standard Python and NumPy indexing which is 0-based. + For negative indices, the two are equavalent with the final row/column + having index -1. + + """ + # Check whether this segmentation is appropriate for tile-based indexing + if not is_tiled_image(self): + raise RuntimeError("Segmentation is not a tiled image.") + if not self._db_man.is_indexable_as_total_pixel_matrix(): + raise RuntimeError( + "Segmentation does not have appropriate dimension indices " + "to be indexed as a total pixel matrix." + ) + + # Checks on validity of the inputs + if segment_numbers is None: + segment_numbers = list(self.segment_numbers) + if len(segment_numbers) == 0: + raise ValueError( + 'Segment numbers may not be empty.' + ) + + if row_start is None: + row_start = 1 + if row_end is None: + row_end = self.TotalPixelMatrixRows + 1 + if column_start is None: + column_start = 1 + if column_end is None: + column_end = self.TotalPixelMatrixColumns + 1 + + if column_start == 0 or row_start == 0: + raise ValueError( + 'Arguments "row_start" and "column_start" may not be 0.' + ) + + if row_start > self.TotalPixelMatrixRows + 1: + raise ValueError( + 'Invalid value for "row_start".' + ) + elif row_start < 0: + row_start = self.TotalPixelMatrixRows + row_start + 1 + if row_end > self.TotalPixelMatrixRows + 1: + raise ValueError( + 'Invalid value for "row_end".' + ) + elif row_end < 0: + row_end = self.TotalPixelMatrixRows + row_end + 1 + + if column_start > self.TotalPixelMatrixColumns + 1: + raise ValueError( + 'Invalid value for "column_start".' + ) + elif column_start < 0: + column_start = self.TotalPixelMatrixColumns + column_start + 1 + if column_end > self.TotalPixelMatrixColumns + 1: + raise ValueError( + 'Invalid value for "column_end".' + ) + elif column_end < 0: + column_end = self.TotalPixelMatrixColumns + column_end + 1 + + output_shape = ( + row_end - row_start, + column_end - column_start, + ) + + with self._db_man.iterate_indices_for_tiled_region( + row_start=row_start, + row_end=row_end, + column_start=column_start, + column_end=column_end, + tile_shape=(self.Rows, self.Columns), + segment_numbers=segment_numbers, + combine_segments=combine_segments, + relabel=relabel, + ) as indices: + + return self._get_pixels_by_seg_frame( + output_shape=output_shape, + indices_iterator=indices, + segment_numbers=np.array(segment_numbers), + combine_segments=combine_segments, + relabel=relabel, + rescale_fractional=rescale_fractional, + skip_overlap_checks=skip_overlap_checks, + dtype=dtype, + ) + def segread(fp: Union[str, bytes, PathLike, BinaryIO]) -> Segmentation: """Read a segmentation image stored in DICOM File Format. From c50614ff40361241f0e2ae4815c5a61a8be877a4 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sun, 10 Sep 2023 18:30:25 -0400 Subject: [PATCH 57/96] Add tests, fix bug with padded tiles --- src/highdicom/seg/sop.py | 42 +++++++++++----- tests/test_seg.py | 105 +++++++++++++++++++++++++++++++++++---- 2 files changed, 124 insertions(+), 23 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 5f1bf700..3e3f8356 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1885,6 +1885,9 @@ def __init__( has_ref_frame_uid and self._coordinate_system == CoordinateSystemNames.SLIDE ): + total_pixel_matrix_size = ( + pixel_array.shape[1:3] if tile_pixel_array else None + ) self._add_slide_coordinate_metadata( source_image=src_img, plane_orientation=plane_orientation, @@ -1892,6 +1895,7 @@ def __init__( pixel_measures=pixel_measures, are_spatial_locations_preserved=are_spatial_locations_preserved, is_tiled=is_tiled, + total_pixel_matrix_size=total_pixel_matrix_size, ) is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated @@ -2215,6 +2219,7 @@ def _add_slide_coordinate_metadata( pixel_measures: PixelMeasuresSequence, are_spatial_locations_preserved: bool, is_tiled: bool, + total_pixel_matrix_size: Optional[Tuple[int, int]] = None, ) -> None: """Add metadata related to the slide coordinate system. @@ -2235,6 +2240,12 @@ def _add_slide_coordinate_metadata( and the segmentation. is_tiled: bool Whether the source image is a tiled image. + total_pixel_matrix_size: Optional[Tuple[int, int]] + Size (rows, columns) of the total pixel matrix, if known. If None, + this will be deduced from the specified plane position values. + Explicitly providing the total pixel matrix size is required if the + total pixel matrix is smaller than the total area covered by the + provided tiles (i.e. the provided plane positions are padded). """ plane_position_names = self.DimensionIndexSequence.get_index_keywords() @@ -2284,14 +2295,18 @@ def _add_slide_coordinate_metadata( origin_item.YOffsetInSlideCoordinateSystem = \ format_number_as_ds(y_origin) self.TotalPixelMatrixOriginSequence = [origin_item] - self.TotalPixelMatrixRows = int( - plane_position_values[last_frame_index, row_index] + - self.Rows - ) - self.TotalPixelMatrixColumns = int( - plane_position_values[last_frame_index, col_index] + - self.Columns - ) + if total_pixel_matrix_size is None: + self.TotalPixelMatrixRows = int( + plane_position_values[last_frame_index, row_index] + + self.Rows - 1 + ) + self.TotalPixelMatrixColumns = int( + plane_position_values[last_frame_index, col_index] + + self.Columns - 1 + ) + else: + self.TotalPixelMatrixRows = total_pixel_matrix_size[0] + self.TotalPixelMatrixColumns = total_pixel_matrix_size[1] else: transform = ImageToReferenceTransformer( image_position=(x_origin, y_origin, z_origin), @@ -3096,8 +3111,6 @@ def _build_luts(self) -> None: ) segment_numbers = [] - referenced_instances: Optional[List[str]] = [] - referenced_frames: Optional[List[int]] = [] # Get list of all dimension index pointers, excluding the segment # number, since this is treated differently @@ -3150,8 +3163,8 @@ def _build_luts(self) -> None: ( segment_numbers, _, - dim_values[row_tag], dim_values[col_tag], + dim_values[row_tag], dim_values[x_tag], dim_values[y_tag], dim_values[z_tag], @@ -3165,7 +3178,13 @@ def _build_luts(self) -> None: # There is no way to deduce whether the spatial locations are # preserved in the tiled full case self._locations_preserved = None + + referenced_instances = None + referenced_frames = None else: + referenced_instances: Optional[List[str]] = [] + referenced_frames: Optional[List[int]] = [] + # Create a list of source images and check for spatial locations # preserved locations_list_type = List[ @@ -3791,7 +3810,6 @@ def _get_pixels_by_seg_frame( if isinstance(output_shape, tuple) else (output_shape, h, w, num_segments) ) - print(full_output_shape) out_array = np.zeros( full_output_shape, dtype=intermediate_dtype diff --git a/tests/test_seg.py b/tests/test_seg.py index 6b8b3432..40d2c6a7 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1,6 +1,7 @@ from collections import defaultdict from concurrent.futures import ProcessPoolExecutor from copy import deepcopy +import itertools import unittest from pathlib import Path import warnings @@ -670,7 +671,18 @@ def setUp(self): ), dtype=bool ) - self._sm_total_pixel_array[45:60, 5:70] = True + self._sm_total_pixel_array[38:43, 5:41] = True + self._sm_total_pixel_array[4:24, 25:29] = True + + self._sm_total_pixel_array_multiclass = np.zeros( + ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + self._sm_total_pixel_array_multiclass[38:43, 5:41] = 1 + self._sm_total_pixel_array_multiclass[4:24, 25:29] = 2 # A series of single frame CT images ct_series = [ @@ -1518,17 +1530,25 @@ def tile_size(request): def locations_preserved(request): return request.param - # with and without omitting frames - # with tiled full and tiled sparse - # single and multiple segments - # with and without spatial locations preserved + @staticmethod + @pytest.fixture(params=[1, 2]) + def num_segments(request): + return request.param + def test_construction_autotile( self, tile_size, dimension_organization_type, segmentation_type, locations_preserved, + num_segments, ): + if num_segments == 1: + pixel_array = self._sm_total_pixel_array + segment_descriptions = self._segment_descriptions + else: + pixel_array = self._sm_total_pixel_array_multiclass + segment_descriptions = self._both_segment_descriptions if locations_preserved: pixel_measures = None @@ -1540,29 +1560,39 @@ def test_construction_autotile( slice_thickness=0.001, ) plane_orientation = PlaneOrientationSequence( - coordinate_system="SLIDE", + coordinate_system='SLIDE', image_orientation=[0.0, -1.0, 0.0, 1.0, 0.0, 0.0] ) plane_positions = [ PlanePositionSequence( - coordinate_system="SLIDE", + coordinate_system='SLIDE', image_position=[1.1234, -5.4323214, 0.0], pixel_matrix_position=(1, 1), ) ] - if dimension_organization_type.value == "TILED_FULL": + if dimension_organization_type.value == 'TILED_FULL': # Cannot omit empty frames with TILED_FULL omit_empty_frames_values = [False] else: omit_empty_frames_values = [False, True] - for omit_empty_frames in omit_empty_frames_values: + transfer_syntax_uids = [ExplicitVRLittleEndian] + if segmentation_type.value == 'FRACTIONAL': + transfer_syntax_uids += [ + JPEG2000Lossless, + JPEGLSLossless, + ] + + for omit_empty_frames, transfer_syntax_uid in itertools.product( + omit_empty_frames_values, + transfer_syntax_uids, + ): instance = Segmentation( [self._sm_image], - pixel_array=self._sm_total_pixel_array, + pixel_array=pixel_array, segmentation_type=segmentation_type, - segment_descriptions=self._segment_descriptions, + segment_descriptions=segment_descriptions, series_instance_uid=self._series_instance_uid, series_number=self._series_number, sop_instance_uid=self._sop_instance_uid, @@ -1578,6 +1608,8 @@ def test_construction_autotile( pixel_measures=pixel_measures, tile_pixel_array=True, tile_size=tile_size, + max_fractional_value=1, + transfer_syntax_uid=transfer_syntax_uid, ) assert ( instance.DimensionOrganizationType == @@ -1587,10 +1619,61 @@ def test_construction_autotile( assert instance.Rows == tile_size[0] assert instance.Columns == tile_size[1] + # pydicom raises warnings if it has to pad or truncate pixel data with warnings.catch_warnings(record=True) as w: self.get_array_after_writing(instance) assert len(w) == 0 + # Check that full reconstructed array matches the input + reconstructed_array = instance.get_total_pixel_matrix( + combine_segments=True, + ) + assert reconstructed_array.shape == ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ) + assert np.array_equal( + reconstructed_array, + pixel_array, + ) + + def to_numpy(c): + # Move from our 1-based convention to numpy zero based + if c is None: + # None is handled the same + return None + elif c > 0: + # Positive indices are 1-based + return c - 1 + else: + # Negative indices are the same + return c + + # Check that subregions defined in different ways match the input + for rs, re, cs, ce in [ + (34, 48, 3, None), + (-13, None, -34, -23), + ]: + reconstructed_array = instance.get_total_pixel_matrix( + combine_segments=True, + row_start=rs, + row_end=re, + column_start=cs, + column_end=ce, + ) + + rs_np = to_numpy(rs) + re_np = to_numpy(re) + cs_np = to_numpy(cs) + ce_np = to_numpy(ce) + expected_array = pixel_array[ + (slice(rs_np, re_np), slice(cs_np, ce_np)) + ] + assert np.array_equal( + reconstructed_array, + expected_array, + ) + def test_pixel_types_fractional( self, fractional_transfer_syntax_uid, From 402371a0e531bdaa45c88bb07e47e0bc73ebb6a3 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 11 Sep 2023 17:35:41 -0400 Subject: [PATCH 58/96] Add doc sections on total pixel matrices for seg creation and parsing --- docs/seg.rst | 216 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/docs/seg.rst b/docs/seg.rst index 63e786e0..50a1b557 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -486,6 +486,140 @@ segments. device_serial_number='1234567890', ) +Constructing SEG Images from a Total Pixel Matrix +------------------------------------------------- + +Some digital pathology images are represented as "tiled" images, +in which the full image (known as the "total pixel matrix") is divided up +into smaller rectangular regions in the row and column dimensions and each +region ("tile") is stored as a frame in a multiframe DICOM image. + +Segmentations of such images are stored as a tiled image in the same manner. +There are a two options in `highdicom` for doing this. You can either pass each +tile/frame individually stacked as a 1D list down the first dimension of the +``pixel_array`` as we have already seen (with the location of each frame either +matching that of the corresponding frame in the source image or explicitly +specified in the ``plane_positions`` argument), or you can pass the 2D total +pixel matrix of the segmentation and have `highdicom` automatically create the +tiles for you. + +To enable this latter option, pass the ``pixel_array`` as a single frame (i.e. +a 2D labelmap array, a 3D labelmap array with a single frame stacked down the +first axis, or a 4D array with a single frame stacked down the first dimension +and any number of segments stacked down the last dimension) and set the +``tile_pixel_array`` argument to ``True``. You can optionally choose the size +(in pixels) of each tile using the ``tile_size`` argument, or, by default, the +tile size of the source image will be used (regardless of whether the +segmentation is represented at the same resolution as the source image). + +If you need to specify the plane positions of the image explicitly, you should +pass a single item to the ``plane_positions`` argument giving the location of +the top left corner of the full total pixel matrix. Otherwise, all the usual +options are available to you. + +.. code-block:: python + + # Use an example slide microscopy image from the highdicom test data + # directory + sm_image = dcmread('data/test_files/sm_image.dcm') + + # The source image has multiple frames/tiles, but here we create a mask + # corresponding to the entire total pixel matrix + mask = np.zeros( + ( + sm_image.TotalPixelMatrixRows, + sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + mask[38:43, 5:41] = 1 + + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Stucture") + property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") + segment_descriptions = [ + hd.seg.SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=property_category, + segmented_property_type=property_type, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ), + ] + + seg = hd.seg.Segmentation( + source_images=[sm_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + tile_pixel_array=True, + ) + + # The result stores the mask as a set of 10 tiles of the non-empty region of + # the total pixel matrix, each of size (10, 10), matching # the tile size of + # the source image + assert seg.NumberOfFrames == 10 + assert seg.pixel_array.shape == (10, 10, 10) + +``"TILED_FULL"`` and ``"TILED_SPARSE"`` +--------------------------------------- + +When the segmentation is stored as a tiled image, there are two ways in which +the locations of each frame/tile may be specified in the resulting object. +These are defined by the value of the *DimensionOrganizationType* attribute: + +- ``"TILED_SPARSE"``: The position of each tile is explicitly defined in the + *PerFrameFunctionalGroupsSequence* of the object. This requires a potentially + very long sequence to store all the per-frame metadata, but does allow for + the omission of empty frames from the segmentation and other irregular tiling + strategies. +- ``"TILED_FULL"``: The position of each tile is implicitly defined using a + predetermined order of the frames. This saves the need to store the pre-frame + metadata but does not allow for the omission of empty frames of the + segmentation and is generally less flexible. It may also be simpler for a + receiving application to process, since the tiles are guaranteed to be + regularly and consistently ordered. + +You can control tihs behavior by specifying the +``dimension_organization_type`` parameter and passing a value of the +:class:`highdicom.DimensionOrganizationType` enum. The default value is +``"TILED_SPARSE"``. Generally, the ``"TILED_FULL"`` option will be used in +combination with ``tile_pixel_array`` argument. + + +.. code-block:: python + + # Using the same example as above, this time as TILED_FULL + seg = hd.seg.Segmentation( + source_images=[sm_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + tile_pixel_array=True, + omit_empty_frames=False, + dimeension_organization_type=hd.DimensionOrganizationTypeValues.TILED_FULL, + ) + + # The result stores the mask as a set of 25 tiles of the entire region of + # the total pixel matrix, each of size (10, 10), matching the tile size of + # the source image + assert seg.NumberOfFrames == 25 + assert seg.pixel_array.shape == (25, 10, 10) Representation of Fractional SEGs --------------------------------- @@ -1116,6 +1250,88 @@ as stored in the SEG will be returned. # [0. 0.2509804 0.5019608] +Reconstructing Total Pixel Matrices from Tiled Segmentations +------------------------------------------------------------ + +For segmentations of digital pathology images that are stored as tiled images, +the :meth:`highdicom.seg.Segmentation.get_pixels_by_source_frame()` method will +return the segmentation mask as a set of frames stacked down the first +dimension of the array. However, for such images, you typically want to work +with the large 2D total pixel matrix that is formed by correctly arranging the +tiles into a 2D array. `highdicom` provides the +:meth:`highdicom.seg.Segmentation.get_total_pixel_matrix()` method for this +purpose. + +Called without any parameters, it returns a 3D array containing the full total +pixel matrix. The first two dimensions are the spatial dimensions, and the +third is the segments dimension. Behind the scenes highdicom has stitched +together the required frames stored in the original file for you. Like with the +other methods described above, setting ``combine_segments`` to ``True`` +combines all the segments into, in this case, a 2D array. + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_control.dcm') + + # Get the full total pixel matrix + mask = seg.get_total_pixel_matrix() + + expected_shape = ( + seg.TotalPixelMatrixRows, + seg.TotalPixelMatrixColumns, + seg.number_of_segments, + ) + assert mask.shape == expected_shape + + # Combine the segments into a single array + mask = seg.get_total_pixel_matrix(combine_segments=True) + + assert mask.shape == (seg.TotalPixelMatrixRows, seg.TotalPixelMatrixColumns) + +Furthermore, you can request a sub-region of the full total pixel matrix by +specifying the start and/or stop indices for the rows and/or columns within the +total pixel matrix. Note that this method follows DICOM 1-based convention for +indexing rows and columns, i.e. the first row and column of the total pixel +matrix are indexed by the number 1 (not 0 as is common within Python). Negative +indices are also supported to index relative to the last row or column, with -1 +being the index of the last row or column. Like for standard Python indexing, +the stop indices are specified as one beyond the final row/column in the +returned array. Note that the requested region does not have to start or stop +at the edges of the underlying frames: `highdicom` stitches together only the +relevant parts of the frames to create the requested image for you. + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_control.dcm') + + # Get a region of the total pixel matrix + mask = seg.get_total_pixel_matrix( + combine_segments=True, + row_start=20, + row_end=40, + column_start=10, + column_end=20, + ) + + assert mask.shape == (20, 10) + + # A further example using negative indices. Since row_end is not provided, + # the default behavior is to include the last row in the total pixel matrix. + mask = seg.get_total_pixel_matrix( + combine_segments=True, + row_start=21, + column_start=-30, + column_end=-25, + ) + + assert mask.shape == (30, 5) + Viewing DICOM SEG Images ------------------------ From f1a284bcd4912a81802bf3f1b1741a7c5fd7fa09 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 11 Sep 2023 18:12:11 -0400 Subject: [PATCH 59/96] Add tiled full test image, links in docs --- .../test_files/seg_image_sm_dots_tiled_full.dcm | Bin 0 -> 43966 bytes docs/seg.rst | 16 ++++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 data/test_files/seg_image_sm_dots_tiled_full.dcm diff --git a/data/test_files/seg_image_sm_dots_tiled_full.dcm b/data/test_files/seg_image_sm_dots_tiled_full.dcm new file mode 100644 index 0000000000000000000000000000000000000000..8421dddc6fa5ae5c5fc8ae3b4d5a2f92c3e4b601 GIT binary patch literal 43966 zcmeI5TXP)OamNqAi(s0jK!{ctMxlok<4`iSoO79*Dse3dKmt?%L195ha=Ap}vLH8w zTLBB2d8J`-P8KMh!r%$N=;dMwIQ?s|^m7ilnyxyI9N9l^G#Llm1-}mE6 zP|+2~e_aW~iW^eL`AoI-8536cRKo!qD}m;1#YIWtqBz#hkHW~>IMIo3Y-B=f z109$s@rTq1*E!$Qmb9$t#VCKf)VGbK5;;p#6+NWhQnkqu^|0~a;c{bXW6iG^PJ6>@ zSdG<2)cC~Q)Xyfye{pASYUbwL=bV50z`Gk(7Yez%Vdbguo5RZZ+<54_VfAkNy|I3x z^|12Q{LCq(eUb67vb;O1bgccruf%~LIY)o*aDQi1O(!4ddpGatdvb4|s@kVY{S>(V z_y5!%Ltd-OsJrt@UE=s*v$^@PdV228xh}SS##j`|KpR$f`BeW=)*b`g4P>k)E(l{C zxxfyq(aUmo-`$;zpf7iBpK0T<&}`J#n}>Dd$N=1pIZsH^G7aBW#%=e093&U4&5 zGS;@%xA%^R5k{npSRKU)gD@E3+hesMHS1ka3d~Qd@%p32Qr-LP^PgW(9%qKsBkidl z94z11B$4k#qJCmA2go`xk#AjSO%U@}7d#KAu2l)0Z+&gSC=5dFLW5o5mySZ~N0HME z0S_tl>Wrt(sVVLx^gJwv9BGc`==XCi42H;4RqhiFoSqt=xHmRCcY9*aQg;>NOak0GPUJ!yYA-AAi;5>CDz4rR?mUhc%EAHC$FK^DD zP>bsOT=PNu_%z3te>**UR@LecuoF%1_RiX3eM@Z1Z;qh#N%fK zOYhFccB9$YSofyZz4?vi!m2m5w(y|t-Dy0ouX-~J&8@~`>DObM5-#&f*o%ZRIzJ2<6jy_={{$;FoTFp;QPmJID=gpIeXM&llpZNQcf5Nva#^gb?^o66 z$<)^DneD6Ut&_#f_EmLZ-<5{(_-d1<74DP^L*KXf0YjWQp=x9E9G^HgJv*Xi8cSOn z+e;gpb#Ge+SykOnuk`rMmW|rvby!`yBK&{oyrzI*JaOeS5pH{=jQKVaQh1!>>h}pR})X zt9zC9xX0NOPrG+)zN2ajYn!Wb9qzE%Mztek>Ca=X)avgre$)KK{M1ah&((Hoakn+F zLz2Gfw6u%e+iDzpJ&%(vEp2xDUsc}MhbnTR`am3HyCx@Tk`-2N4=m~VXl=bpap z#cOXQmyt-cUn1u_66vT=I>i8t??K4bjauGQFBY+{M)%iiJ> zZ+mC!L1PK;D;TDm#ezCtx~ z1*by>e7`yAqPkt*X>QTOTRST&^)0&n(Z<%=0@|AP^pibS_NQ$tyUMlujf}q1Ka`a{ zO$*&fJ?+P@pV8+2`)yf=ft`H?>}=r2ualkKe8w0n+RkE6I-~oQx3vGUMZIHD#9+?NUe9(Zf@<;Koh$G-0OpiV1` zH)4f)?9!ogi|X%)3%0y%7^ub7x>tuC>-2Y0ok}w}5))0TH})KZJ<-nfc1u@z z74~QxX2Xz`aY-Cmb(>Vm zq8duuxnFDD(eIBbc*W!Cxqcjg(Da1NeeUv9J7QjhV`=dvi|Qnt_Nbcs_|`QWUiaX4 z`4!mr=))Un3+wH>Tj6y@-AK>fYM&F0YsprkWvm>zmEPF(g1WgSAu{<4D8}_>eRZ|* zpe`e7Md_4Je}oKHRG1!1DrJ#t3OjW3?)>b`&H1S@`trR!e|+2FRJ*symhDF#|iP%$Lvdzc7s~6Q}xa$77khlV-0`Xm>ObkdS;g{2;MamqE6yrgogON*w&w~1Q5owE<6k(Di zzKgB4nD%0kI-W(!2~v^#ic5=>Js7DlAbV|`A>o+;Cbp)7AcvcjJfXVI4mtvVGgAH zM9Ov?lLQ3;M#t7tw-Uu@R4DZjiMEkViq)w%GDx}9qd>6;R7;CgbTCpbq|yqng2D(= zVmN}tP)uDIzXWAMt#I-Ef{_}|AQhxj6dncYU0S5#gOQ@VDA70#R4IkH>mi9XoCp(9 ztYq;6nZu}samDJ?_cBO@;!$XRQ9M&xq>_V?3b1>mO%f8$%oEgrk#i1Y-tx)!o500U zXrdIUe=jWZJ)J=+N|7S+#dRqYQYq)xjeLc7I4Jfd+?6nwdMc^|@E#~-K*55Dm^bnR zdfpMvriY!(x2Uhsyl$!eqX3zmx5IC5>AchDO9?((777-yAYXRTv!3q{t>Rc8n;gp;xl`bt(Q68jx6NsH-6%<4| z09oh@f?>j`2+y4?Rm`+iEK;Lcq%=&0<$sBhigO?p^^FD+@gWS!1ZtFUSiP8shko!*Xa9p_U-^RbdR^+ij%IwxVBL zKh!ErN{dq-mLj6aJfx(;P(;NlN}002hxOA$zL-N60pk@G?D_p!k4B+Q0md(kON&$< zmI5Y;9RVm?#K*(SgHX3p024D?v1XfX0VF4q+P>m6>Ow}NViNQyRf%!R!%&1IqI3`v zPfC}siZOtpE3fC5i=kkz3MmDZjV16M;FP2? z$>HvWIDt5QvQVqwdFn5Ev?`sWQmD%6qvv5LNbXXNi6;RM3$IOV6~-h#^IBq7rZ|T|8&7|?_c&H zB`FVTlqh^Dt4`%%C}hhQ((w>~#Jm)_#KNOTFcnY~pk@<-VqLIKUFt!KNh-zy22oj& zvUwN^VajEX1=5K&rpAu~7|9`$0%mt4>S{x1!It$a&ZqcM8>x;@A!JetRjF!}GHjB~ z!%^6TQWx-wTMUu}zPRobE?Jfo0bPVv1qJ&QURJ9dwl5*33S4QmDi2FR)fmM|(Bd=eCkX^ND+ECpdP2;N0la6Q>y^S0I+r&*BVu)3DT%83n3%gX%H`hhMFXQ5fPZY>$AH&oP{vzK8avlb@+4)A&aXBu86P* zuVlG4;1%r$zTbnD#8_e|I<15$&bRm=ia4=RByWmFfoC#J_@uI0xr@Vp}uXO$I8fJrpU^R|4pF(Z*Bm4Ynd zk?j$R)hpA3Rk|2NY+!k@3i9w4tRQ8zB2+s|w4pCpW{M(F%t>y9!p?-9=A`s?k!Kdk z6GCp!ZmOs09-OtAdpxvjQ~e`5P%56>^-nw(ifu3F-b=o=ozk><3zD?~eIn zUN7+EqH&%z!LFYAnmqHVHY&#`c#_DzpXq`3$A1_}p9^>TZ~4mLXCQ&YNZ{0k!+dM- zoxT#d|CEJB<@tGf>{NMfc5tTy2@E7Kkig58z!`bC)8OYNN?;)ImniJRH&s%YO6Fl6 zetdMkS<-RRx&6|-4sd>PsK4RyFy;IC<`B31y2xq3?i>Q#7y3%cCGHy v%8`dKh4%J!yx8kM!5<$G&-n-5>koJSdwnH7IT)QU3d7MlDSdiZ_@wjy^%l|D literal 0 HcmV?d00001 diff --git a/docs/seg.rst b/docs/seg.rst index 50a1b557..e47a5b02 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -573,13 +573,17 @@ options are available to you. When the segmentation is stored as a tiled image, there are two ways in which the locations of each frame/tile may be specified in the resulting object. -These are defined by the value of the *DimensionOrganizationType* attribute: +These are defined by the value of the +`"DimensionOrganizationType" +`_ +attribute: - ``"TILED_SPARSE"``: The position of each tile is explicitly defined in the - *PerFrameFunctionalGroupsSequence* of the object. This requires a potentially - very long sequence to store all the per-frame metadata, but does allow for - the omission of empty frames from the segmentation and other irregular tiling - strategies. + `"PerFrameFunctionalGroupsSequence" + `_ + of the object. This requires a potentially very long sequence to store all + the per-frame metadata, but does allow for the omission of empty frames from + the segmentation and other irregular tiling strategies. - ``"TILED_FULL"``: The position of each tile is implicitly defined using a predetermined order of the frames. This saves the need to store the pre-frame metadata but does not allow for the omission of empty frames of the @@ -589,7 +593,7 @@ These are defined by the value of the *DimensionOrganizationType* attribute: You can control tihs behavior by specifying the ``dimension_organization_type`` parameter and passing a value of the -:class:`highdicom.DimensionOrganizationType` enum. The default value is +:class:`highdicom.DimensionOrganizationTypeValues` enum. The default value is ``"TILED_SPARSE"``. Generally, the ``"TILED_FULL"`` option will be used in combination with ``tile_pixel_array`` argument. From 2d2110e570a88f66d4ec42fd4ef4d68fcbaf616a Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 11 Sep 2023 21:19:28 -0400 Subject: [PATCH 60/96] Minor typos in docstrings --- src/highdicom/seg/sop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 3e3f8356..bad2e253 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1345,7 +1345,7 @@ def __init__( against spawned child processes creating further workers. dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None], optional Dimension organization type to use for the output image. - tile_pixel_array: bool + tile_pixel_array: bool, optional If True, `highdicom` will automatically convert an input total pixel matrix into a sequence of frames representing tiles of the segmentation. This is valid only when the source image supports @@ -1369,7 +1369,7 @@ def __init__( If False, the pixel array is already considered to consist of one or more existing frames, as described above. - tile_size: Union[Sequence[int], None] = None + tile_size: Union[Sequence[int], None], optional Tile size to use when tiling the input pixel array. If ``None`` (the default), the tile size is copied from the source image. Otherwise the tile size is specified explicitly as (number of rows, From f118207268b00dce03bcbfcde41a06d56f43b1a9 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 14 Sep 2023 09:26:15 -0400 Subject: [PATCH 61/96] Add pyramid utility method --- src/highdicom/seg/pyramid.py | 156 +++++++++++++++++++++++++++++++++++ src/highdicom/seg/sop.py | 43 +++++++++- 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/highdicom/seg/pyramid.py diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py new file mode 100644 index 00000000..d1a63cee --- /dev/null +++ b/src/highdicom/seg/pyramid.py @@ -0,0 +1,156 @@ +"""Tools for constructing multi-resolution segmentation pyramids.""" +from typing import Any, List, Optional, Sequence, Tuple, Union + +import numpy as np +from PIL import Image +from pydicom import Dataset + +from highdicom.content import PixelMeasuresSequence +from highdicom.seg.sop import Segmentation +from highdicom.seg.enum import ( + SegmentationTypeValues, +) +from highdicom.seg.content import ( + SegmentDescription, +) +from highdicom.uid import UID + + +def create_segmentation_pyramid( + source_images: Sequence[Dataset], + pixel_arrays: Sequence[np.ndarray], + segmentation_type: Union[str, SegmentationTypeValues], + segment_descriptions: Sequence[SegmentDescription], + series_instance_uid: str, + series_number: int, + manufacturer: str, + manufacturer_model_name: str, + software_versions: Union[str, Tuple[str]], + device_serial_number: str, + downsample_factors: Optional[Sequence[float]] = None, + sop_instance_uids: Optional[List[str]] = None, + pyramid_uid: Optional[str] = None, + pyramid_label: Optional[str] = None, + **kwargs: Any +) -> List[Segmentation]: + """Construct a multi-resolution segmentation pyramid series. + + """ + if pyramid_uid is None: + pyramid_uid = UID() + + n_sources = len(source_images) + n_pix_arrays = len(pixel_arrays) + + if n_sources == 1 and n_pix_arrays == 1: + if downsample_factors is None: + raise TypeError( + 'Argument "downsample_factors" must be provided when providing ' + 'only a single source image and pixel array.' + ) + if len(downsample_factors) < 1: + raise ValueError('Argument "downsample_factors" may not be empty.') + if any(f <= 1.0 for f in downsample_factors): + raise ValueError( + 'All items in "downsample_factors" must be greater than 1.' + ) + n_outputs = len(downsample_factors) + 1 # including original + elif downsample_factors is not None: + raise TypeError( + 'Argument "downsample_factors" must not be provided when multiple ' + 'source images or pixel arrays are provided.' + ) + if n_sources > 1 and n_pix_arrays > 1: + if n_sources != n_pix_arrays: + raise ValueError( + "If providing multiple source images and multiple pixel " + "arrays, the number of items in the two lists must match." + ) + n_outputs = n_sources + + if n_pix_arrays == 1: + # Create a pillow image for use later with resizing + mask_image = Image.fromarray(pixel_arrays[0]) + + all_segs = [] + + # Work "up" pyramid from high to low resolution + for output_level in range(n_outputs): + if n_sources > 1: + source_image = source_images[output_level] + else: + source_image = source_images[0] + + if n_pix_arrays > 1: + pixel_array = pixel_arrays[output_level] + else: + need_resize = True + if n_sources > 1: + output_size = ( + source_image.TotalPixelMatrixColumns, + source_image.TotalPixelMatrixRows + ) + else: + if output_level == 0: + pixel_array = pixel_arrays[0] + need_resize = False + else: + f = downsample_factors[output_level - 1] + output_size = ( + int(source_images[0].TotalPixelMatrixColumns / f), + int(source_images[0].TotalPixelMatrixRows / f) + ) + + if need_resize: + pixel_array = np.array( + mask_image.resize(output_size, Image.Resampling.NEAREST) + ) + + if n_sources == 1: + row_spacing = ( + source_image.PixelSpacing[0] * + (pixel_arrays[0].shape[0] / pixel_array.shape[0]) + ) + column_spacing = ( + source_image.PixelSpacing[1] * + (pixel_arrays[0].shape[1] / pixel_array.shape[1]) + ) + pixel_measures = PixelMeasuresSequence( + pixel_spacing=(row_spacing, column_spacing), + slice_thickness=source_image.SliceThickness, + ) + else: + # This will be copied from the source image + pixel_measures = None + + if sop_instance_uids is None: + sop_instance_uid = UID() + else: + sop_instance_uid = sop_instance_uids[output_level] + + # Create the output segmentation + seg = Segmentation( + source_images=[source_image], + pixel_array=pixel_array, + segmentation_type=segmentation_type, + segment_descriptions=segment_descriptions, + series_instance_uid=series_instance_uid, + series_number=series_number, + sop_instance_uid=sop_instance_uid, + instance_number=output_level + 1, + manufacturer=manufacturer, + manufacturer_model_name=manufacturer_model_name, + software_versions=software_versions, + device_serial_number=device_serial_number, + pyramid_uid=pyramid_uid, + pyramid_label=pyramid_label, + tile_pixel_array=True, + plane_orientation=None, + plane_positions=None, + pixel_measures=pixel_measures, + **kwargs, + ) + + all_segs.append(seg) + + return all_segs diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index bad2e253..e6e7c374 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -78,7 +78,11 @@ from highdicom.seg.utils import iter_segments from highdicom.spatial import ImageToReferenceTransformer from highdicom.sr.coding import CodedConcept -from highdicom.valuerep import check_person_name, _check_code_string +from highdicom.valuerep import ( + check_person_name, + _check_code_string, + _check_long_string, +) from highdicom.uid import UID as hd_UID @@ -1174,6 +1178,8 @@ def __init__( ] = None, tile_pixel_array: bool = False, tile_size: Union[Sequence[int], None] = None, + pyramid_uid: Optional[str] = None, + pyramid_label: Optional[str] = None, **kwargs: Any ) -> None: """ @@ -1375,6 +1381,14 @@ def __init__( Otherwise the tile size is specified explicitly as (number of rows, number of columns). This value is ignored if ``tile_pixel_array`` is False. + pyramid_uid: Optional[str], optional + Unique identifier for the pyramid containing this segmentation. + Should only be used if this segmentation is part of a + multi-resolution pyramid. + pyramid_label: Optional[str], optional + Human readable label for the pyramid containing this segmentation. + Should only be used if this segmentation is part of a + multi-resolution pyramid. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1638,6 +1652,33 @@ def __init__( self.LossyImageCompressionMethod = \ src_img.LossyImageCompressionMethod + # Multi-Resolution Pyramid + if pyramid_uid is not None: + if not is_tiled: + raise TypeError( + 'Argument "pyramid_uid" should only be specified ' + 'for tiled images.' + ) + if ( + self._coordinate_system is None or + self._coordinate_system != CoordinateSystemNames.SLIDE + ): + raise TypeError( + 'Argument "pyramid_uid" should only be specified for ' + 'segmentations in the SLIDE coordinate system.' + ) + self.PyramidUID = pyramid_uid + + if pyramid_label is not None: + _check_long_string(pyramid_label) + self.PyramidLabel = pyramid_label + + elif pyramid_label is not None: + raise TypeError( + 'Argument "pyramid_label" should not be specified if ' + '"pyramid_uid" is not specified.' + ) + # Multi-Frame Functional Groups and Multi-Frame Dimensions sffg_item = Dataset() source_pixel_measures = self._get_pixel_measures_sequence( From 187539ce04a995ad645d3403b6d6578a34de9e76 Mon Sep 17 00:00:00 2001 From: CPBridge Date: Thu, 14 Sep 2023 14:59:46 +0000 Subject: [PATCH 62/96] Add export of create_segmentation_pyramid --- src/highdicom/seg/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/highdicom/seg/__init__.py b/src/highdicom/seg/__init__.py index 94e8f720..73d75d88 100644 --- a/src/highdicom/seg/__init__.py +++ b/src/highdicom/seg/__init__.py @@ -12,6 +12,7 @@ DimensionIndexSequence, ) from highdicom.seg import utils +from highdicom.seg.pyramid import create_segmentation_pyramid SOP_CLASS_UIDS = { '1.2.840.10008.5.1.4.1.1.66.4', # Segmentation @@ -27,5 +28,6 @@ 'SegmentDescription', 'SegmentsOverlapValues', 'SpatialLocationsPreservedValues', + 'create_segmentation_pyramid', 'utils', ] From 08588005e51db0b3ab78d95df726717e5f110e25 Mon Sep 17 00:00:00 2001 From: CPBridge Date: Thu, 14 Sep 2023 16:05:50 +0000 Subject: [PATCH 63/96] Fixes to pyramid --- src/highdicom/seg/pyramid.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py index d1a63cee..1f4afa12 100644 --- a/src/highdicom/seg/pyramid.py +++ b/src/highdicom/seg/pyramid.py @@ -42,6 +42,15 @@ def create_segmentation_pyramid( n_sources = len(source_images) n_pix_arrays = len(pixel_arrays) + if n_sources == 0: + raise ValueError( + 'Argument "source_images" must not be empty.' + ) + if n_pix_arrays == 0: + raise ValueError( + 'Argument "pixel_arrays" must not be empty.' + ) + if n_sources == 1 and n_pix_arrays == 1: if downsample_factors is None: raise TypeError( @@ -55,18 +64,22 @@ def create_segmentation_pyramid( 'All items in "downsample_factors" must be greater than 1.' ) n_outputs = len(downsample_factors) + 1 # including original - elif downsample_factors is not None: - raise TypeError( - 'Argument "downsample_factors" must not be provided when multiple ' - 'source images or pixel arrays are provided.' - ) - if n_sources > 1 and n_pix_arrays > 1: - if n_sources != n_pix_arrays: - raise ValueError( - "If providing multiple source images and multiple pixel " - "arrays, the number of items in the two lists must match." + else: + if downsample_factors is not None: + raise TypeError( + 'Argument "downsample_factors" must not be provided when ' + 'multiple source images or pixel arrays are provided.' ) - n_outputs = n_sources + if n_sources > 1 and n_pix_arrays > 1: + if n_sources != n_pix_arrays: + raise ValueError( + "If providing multiple source images and multiple pixel " + "arrays, the number of items in the two lists must match." + ) + n_outputs = n_sources + else: + # Either n_sources > 1 or n_pix_arrays > 1 but not both + n_outputs = max(n_sources, n_pix_arrays) if n_pix_arrays == 1: # Create a pillow image for use later with resizing From e8eb0f53ef65688e4823fa580d65d84fb9392e37 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 14 Sep 2023 16:50:12 -0400 Subject: [PATCH 64/96] Fix error reading TILED_FULL segmentations from file --- src/highdicom/seg/sop.py | 39 ++++++++++++++++++++------------------- tests/test_seg.py | 2 ++ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index bad2e253..ed05e195 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -3028,25 +3028,26 @@ def from_dataset( sf_groups.PixelMeasuresSequence = pixel_measures # Per-frame functional group items - for pffg_item in seg.PerFrameFunctionalGroupsSequence: - if hasattr(pffg_item, 'PlanePositionSequence'): - plane_pos = PlanePositionSequence.from_sequence( - pffg_item.PlanePositionSequence, - copy=False - ) - pffg_item.PlanePositionSequence = plane_pos - if hasattr(pffg_item, 'PlaneOrientationSequence'): - plane_ori = PlaneOrientationSequence.from_sequence( - pffg_item.PlaneOrientationSequence, - copy=False, - ) - pffg_item.PlaneOrientationSequence = plane_ori - if hasattr(pffg_item, 'PixelMeasuresSequence'): - pixel_measures = PixelMeasuresSequence.from_sequence( - pffg_item.PixelMeasuresSequence, - copy=False, - ) - pffg_item.PixelMeasuresSequence = pixel_measures + if hasattr(seg, 'PerFrameFunctionalGroupsSequence'): + for pffg_item in seg.PerFrameFunctionalGroupsSequence: + if hasattr(pffg_item, 'PlanePositionSequence'): + plane_pos = PlanePositionSequence.from_sequence( + pffg_item.PlanePositionSequence, + copy=False + ) + pffg_item.PlanePositionSequence = plane_pos + if hasattr(pffg_item, 'PlaneOrientationSequence'): + plane_ori = PlaneOrientationSequence.from_sequence( + pffg_item.PlaneOrientationSequence, + copy=False, + ) + pffg_item.PlaneOrientationSequence = plane_ori + if hasattr(pffg_item, 'PixelMeasuresSequence'): + pixel_measures = PixelMeasuresSequence.from_sequence( + pffg_item.PixelMeasuresSequence, + copy=False, + ) + pffg_item.PixelMeasuresSequence = pixel_measures seg._build_luts() diff --git a/tests/test_seg.py b/tests/test_seg.py index 40d2c6a7..016d4100 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -2898,6 +2898,8 @@ def test_segread(self): assert isinstance(seg, Segmentation) seg = segread('data/test_files/seg_image_sm_numbers.dcm') assert isinstance(seg, Segmentation) + seg = segread('data/test_files/seg_image_sm_dots_tiled_full.dcm') + assert isinstance(seg, Segmentation) def test_properties(self): # SM segs From 811bb597778100c34ca9bdf96daacf823598c6db Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 14 Sep 2023 17:56:46 -0400 Subject: [PATCH 65/96] Add full docs --- src/highdicom/seg/pyramid.py | 105 +++++++++++++++++++++++++++++++++++ src/highdicom/seg/sop.py | 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py index 1f4afa12..10356475 100644 --- a/src/highdicom/seg/pyramid.py +++ b/src/highdicom/seg/pyramid.py @@ -35,9 +35,114 @@ def create_segmentation_pyramid( ) -> List[Segmentation]: """Construct a multi-resolution segmentation pyramid series. + A multi-resolution pyramid represents the same segmentation array at + multiple resolutions + + This function handles multiple related scenarios: + + * Constructing a segmentation of a source image pyramid given a + segmentation pixel array of the highest resolution source image, with + highdicom performing the downsampling automatically to match the + resolution of the other source images (pass multiple ``source_images`` + and a single item in ``pixel_arrays``). + * Constructing a segmentation of a source image pyramid given user-provided + segmentation pixel arrays for each level in the source pyramid (pass + multiple ``source_images`` and a matching number of ``pixel_arrays``). + * Constructing a segmentation of a single source image given multiple + user-provided downsampled segmentation pixel arrays (pass a single item + in ``source_images``, and multiple items in ``pixel_arrays``). + * Constructing a segmentation of a single source image and a single + segmentation pixel array by downsampling by a given list of + ``downsample_factors`` (pass a single item in ``source_images``, a single + item in ``pixel_arrays``, and a list of one or more desired + ``downsample_factors``). + + In all cases, the items in both ``source_images`` and ``pixel_arrays`` + should be sorted in pyramid order from highest resolution (smallest + spacing) to lowest resolution (largest spacing), and the pixel array + in ``pixel_arrays[0]`` must be the segmentation of the source image in + ``source_images[0]`` with spatial locations preserved (a one-to-one + correspondence between pixels in the source image's total pixel matrix and + the provided segmentation pixel array). + + In all cases, the provided pixel arrays should be total pixel matrices. + Tiling is performed automatically. + + Parameters + ---------- + source_images: Sequence[pydicom.Dataset] + List of source images. If there are multiple source images, they should + represent be from the same series and pyramid. + pixel_arrays: Sequence[numpy.ndarray] + List of segmentation pixel arrays. Each should be a total pixel matrix. + segmentation_type: Union[str, highdicom.seg.SegmentationTypeValues] + Type of segmentation, either ``"BINARY"`` or ``"FRACTIONAL"`` + segment_descriptions: Sequence[highdicom.seg.SegmentDescription] + Description of each segment encoded in `pixel_array`. In the case of + pixel arrays with multiple integer values, the segment description + with the corresponding segment number is used to describe each segment. + series_number: int + Number of the output segmentation series. + manufacturer: str + Name of the manufacturer of the device (developer of the software) + that creates the instance + manufacturer_model_name: str + Name of the device model (name of the software library or + application) that creates the instance + software_versions: Union[str, Tuple[str]] + Version(s) of the software that creates the instance. + device_serial_number: str + Manufacturer's serial number of the device + downsample_factors: Optional[Sequence[float]], optional + Factors by which to downsample the pixel array to create each of the + output segmentation objects. This should be provided if and only if a + single source image and single pixel array are provided. Note that the + original array is always used to create the first segmentation output, + so the number of created segmententation instances is one greater than + the number of items in this list. Items must be numbers greater than + 1 and sorted in ascending order. A downsampling factor of *n* implies + that the output array is *1/n* time the size of input pixel array. For + example a list ``[2, 4, 8]`` would be produce 4 output segmentation + instances. The first is the same size as the original pixel array, the + next is half the size, the next is a quarter of the size of the + original, and the last is one eighth the size of the original. + Output sizes are rounded to the nearest integer. + series_instance_uid: Optional[str], optional + UID of the output segmentation series. If not specified, UIDs are + generated automatically using highdicom's prefix. + sop_instance_uids: Optional[List[str]], optional + SOP instance UIDS of the output instances. If not specified, UIDs are + generated automatically using highdicom's prefix. + pyramid_uid: Optional[str], optional + UID for the output imaging pyramid. If not specified, a UID is generated + using highdicom's prefix. + pyramid_label: Optional[str], optional + A human readable label for the output pyramid. + **kwargs: Any + Any further parameters are passed directly to the constructor of the + :class:highdicom.seg.Segmentation object. However the following + parameters are disallowed: ``instance_number``, ``sop_instance_uid``, + ``plane_orientation``, ``plane_positions``, ``pixel_measures``, + ``pixel_array``, ``tile_pixel_array``. + + Note + ---- + Downsampling is performed via simple nearest neighbor interpolation. If + more control is needed over the downsampling process (for example + anti-aliasing), explicitly pass the downsampled arrays. + """ + # TODO check dimensions of input arrays + # TODO check total pixel matrix sizes correspond + # TODO check ordering of items in list + # TODO check source images are same series and pyramid + # TODO add support for single source image with predefined downsampled + # arrays + # TODO disallow duplicate items in kwargs if pyramid_uid is None: pyramid_uid = UID() + if series_instance_uid is None: + series_instance_uid = UID() n_sources = len(source_images) n_pix_arrays = len(pixel_arrays) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index b69e5466..4b313c20 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1273,7 +1273,7 @@ def __init__( series_instance_uid: str UID of the series series_number: int - Number of the series within the study + Number of the output segmentation series. sop_instance_uid: str UID that should be assigned to the instance instance_number: int From ac7735263de022d97619202dbb9a252f6eb398a0 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 20 Sep 2023 19:16:43 -0400 Subject: [PATCH 66/96] Fix variable name change introduced by merge --- src/highdicom/seg/sop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 4d7d75a6..753ca422 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1665,7 +1665,7 @@ def __init__( # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 - self.NumberOfFrames = len(full_frames_list) + self.NumberOfFrames = len(frames) self.PixelData = self._encode_pixels_native( np.concatenate(frames) ) From ee598f7ae11445f348aba264f10150e751ff2296 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 13:57:43 +0200 Subject: [PATCH 67/96] Add docstrings to new properties --- src/highdicom/content.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 48c647db..1a1442a4 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1183,6 +1183,8 @@ def issuer_of_specimen_id(self) -> Union[str, None]: @property def specimen_container(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen container""" + items = self.SpecimenPreparationStepContentItemSequence.find( codes.SCT.SpecimenContainer ) @@ -1192,6 +1194,8 @@ def specimen_container(self) -> Union[CodedConcept, None]: @property def specimen_type(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen type""" + items = self.SpecimenPreparationStepContentItemSequence.find( codes.SCT.SpecimenType ) From 979ff9f5fb8aa8f50ac318b3bb278c1ee07a623f Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:08:32 +0200 Subject: [PATCH 68/96] Append steps before adding preparation step sequence to dataset --- src/highdicom/content.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 1a1442a4..603d3a1e 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1072,21 +1072,21 @@ def __init__( value=embedding_medium ) sequence.append(embedding_medium_item) - self.SpecimenPreparationStepContentItemSequence = sequence - if specimen_container is not None: specimen_container_item = CodeContentItem( name=codes.SCT.SpecimenContainer, value=specimen_container ) sequence.append(specimen_container_item) - if specimen_type is not None: specimen_type_item = CodeContentItem( name=codes.SCT.SpecimenType, value=specimen_type ) sequence.append(specimen_type_item) + self.SpecimenPreparationStepContentItemSequence = sequence + + @property def specimen_id(self) -> str: From 98856834c75e737801c3ee7dd47754890e43f2b3 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:10:25 +0200 Subject: [PATCH 69/96] Move new optional arguments to end of init --- src/highdicom/content.py | 8 ++++---- tests/test_content.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 603d3a1e..49691090 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1353,13 +1353,13 @@ def __init__( specimen_preparation_steps: Optional[ Sequence[SpecimenPreparationStep] ] = None, - specimen_type: Optional[Union[Code, CodedConcept]] = None, - specimen_short_description: Optional[str] = None, - specimen_detailed_description: Optional[str] = None, issuer_of_specimen_id: Optional[IssuerOfIdentifier] = None, primary_anatomic_structures: Optional[ Sequence[Union[Code, CodedConcept]] - ] = None + ] = None, + specimen_type: Optional[Union[Code, CodedConcept]] = None, + specimen_short_description: Optional[str] = None, + specimen_detailed_description: Optional[str] = None, ): """ Parameters diff --git a/tests/test_content.py b/tests/test_content.py index c3a4df5e..0e95a51e 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1360,11 +1360,11 @@ def test_construction_optionals(self): specimen_id=specimen_id, specimen_uid=specimen_uid, specimen_location=specimen_location, + issuer_of_specimen_id=issuer_of_specimen_id, + primary_anatomic_structures=primary_anatomic_structures, specimen_type=specimen_type, specimen_short_description=specimen_short_description, specimen_detailed_description=specimen_detailed_description, - issuer_of_specimen_id=issuer_of_specimen_id, - primary_anatomic_structures=primary_anatomic_structures ) assert instance.specimen_location == specimen_location assert instance.specimen_type == specimen_type From 1f7fb58ee63e28a69c177301dab9b6d45b7ebb69 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson <83275777+erikogabrielsson@users.noreply.github.com> Date: Sun, 24 Sep 2023 14:12:57 +0200 Subject: [PATCH 70/96] Add reference to valid container type Co-authored-by: Chris Bridge --- src/highdicom/content.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 1a1442a4..13c29ba4 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -982,7 +982,9 @@ def __init__( embedding_medium: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional Embedding medium used during processing specimen_container: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Container the specimen resides in. + Container the specimen resides in (see + :dcm:`CID 8101 ` + "Container Type" for options). specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional The anatomic pathology specimen type of the specimen. From 149aa6fe4688214e54f7d450e67742630ed9b8b3 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:19:19 +0200 Subject: [PATCH 71/96] Add references to CIDs --- src/highdicom/content.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index fc718b27..c275d403 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -978,15 +978,17 @@ def __init__( Description of processing issuer_of_specimen_id: highdicom.IssuerOfIdentifier, optional fixative: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Fixative used during processing + Fixative used during processing (see :dcm:`CID 8114 ` + "Specimen Fixative" for options). embedding_medium: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Embedding medium used during processing + Embedding medium used during processing see :dcm:`CID 8115 ` + "Specimen Embedding Media" for options). specimen_container: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Container the specimen resides in (see - :dcm:`CID 8101 ` + Container the specimen resides in (see :dcm:`CID 8101 ` "Container Type" for options). specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - The anatomic pathology specimen type of the specimen. + The anatomic pathology specimen type of the specimen (see :dcm:`CID 8103 ` + "Anatomic Pathology Specimen Type" for options). """ # noqa: E501 super().__init__() @@ -1088,8 +1090,6 @@ def __init__( sequence.append(specimen_type_item) self.SpecimenPreparationStepContentItemSequence = sequence - - @property def specimen_id(self) -> str: """str: Specimen identifier""" From 6d0339cc564897ef5ce3551dbc47fa545346a5ae Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:22:14 +0200 Subject: [PATCH 72/96] Check for type of issuer of specimen id on constructed preparation step --- tests/test_content.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_content.py b/tests/test_content.py index 0e95a51e..29d3b387 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -904,6 +904,7 @@ def test_construction_processing_from_dataset_optionals(self): assert isinstance(processing_procedure, SpecimenCollection) assert processing_procedure.procedure == procedure assert instance.processing_datetime == processing_datetime + assert isinstance(instance.issuer_of_specimen_id, str) assert ( instance.issuer_of_specimen_id == issuer_of_specimen_id.LocalNamespaceEntityID From 6b8d2aa4905fb01105d5f66383d2a7d37decda28 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:30:28 +0200 Subject: [PATCH 73/96] Test date, time, and datetime conversion from dataset with pydicom config.datetime_conversion setting --- tests/test_valuetypes.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/test_valuetypes.py b/tests/test_valuetypes.py index 43193949..2a268b36 100644 --- a/tests/test_valuetypes.py +++ b/tests/test_valuetypes.py @@ -5,6 +5,7 @@ from pydicom.sr.codedict import codes from pydicom.sr.coding import Code from pydicom.valuerep import DT, DA, TM +from pydicom import config from highdicom.sr.coding import CodedConcept from highdicom.sr.enum import ValueTypeValues @@ -44,7 +45,13 @@ def test_construct_from_datetime(self, datetime_value: DT): assert item.value.isoformat() == datetime_value.isoformat() @pytest.mark.parametrize("datetime_value", test_datetime_values) - def test_from_dataset(self, datetime_value: DT): + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset( + self, + datetime_value: DT, + datetime_conversion: bool + ): + config.datetime_conversion = datetime_conversion name = codes.DCM.DatetimeOfProcessing assert isinstance(name, Code) value_type = ValueTypeValues.DATETIME @@ -80,7 +87,9 @@ def test_construct_from_date(self): assert isinstance(item.value, datetime.date) assert item.value.isoformat() == date_value.isoformat() - def test_from_dataset(self): + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset(self, datetime_conversion: bool): + config.datetime_conversion = datetime_conversion date_value = DA("20230623") name = codes.DCM.AcquisitionDate assert isinstance(name, Code) @@ -125,7 +134,13 @@ def test_construct_from_time(self, time_value: TM): assert item.value.isoformat() == time_value.isoformat() @pytest.mark.parametrize("time_value", test_time_values) - def test_from_dataset(self, time_value: TM): + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset( + self, + time_value: TM, + datetime_conversion: bool + ): + config.datetime_conversion = datetime_conversion name = codes.DCM.AcquisitionDate assert isinstance(name, Code) value_type = ValueTypeValues.TIME From 921c14bcb0525c348dfee7302d2ff59fbae4d17d Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 14:38:56 +0200 Subject: [PATCH 74/96] Add docstrings for missed properties --- src/highdicom/content.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index c275d403..c367cc13 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1470,16 +1470,17 @@ def __init__( @property def specimen_id(self) -> str: - """str: Specimen identifier""" + """str: Specimen identifier.""" return str(self.SpecimenIdentifier) @property def specimen_uid(self) -> UID: - """highdicom.UID: Unique specimen identifier""" + """highdicom.UID: Unique specimen identifier.""" return UID(self.SpecimenUID) @property def specimen_location(self) -> Union[str, Tuple[float, float, float], None]: + """Tuple[float, float, float]: Specimen location in container.""" sequence = self.get("SpecimenLocalizationContentItemSequence") if sequence is None: return None @@ -1489,11 +1490,12 @@ def specimen_location(self) -> Union[str, Tuple[float, float, float], None]: @property def specimen_preparation_steps(self) -> List[SpecimenPreparationStep]: - """highdicom.SpecimenPreparationStep: Specimen preparation steps""" + """highdicom.SpecimenPreparationStep: Specimen preparation steps.""" return list(self.SpecimenPreparationSequence) @property def specimen_type(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen type.""" sequence = self.get("SpecimenTypeCodeSequence") if sequence is None: return None @@ -1501,14 +1503,17 @@ def specimen_type(self) -> Union[CodedConcept, None]: @property def specimen_short_description(self) -> Union[str, None]: + """str: Short description of specimen.""" return self.get("SpecimenShortDescription") @property def specimen_detailed_description(self) -> Union[str, None]: + """str: Detailed description of specimen.""" return self.get("SpecimenDetailedDescription") @property def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: + """IssuerOfIdentifier: Issuer of identifier for the specimen.""" sequence = self.get("IssuerOfTheSpecimenIdentifierSequence") if len(sequence) == 0: return None @@ -1516,6 +1521,8 @@ def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: @property def primary_anatomic_structures(self) -> Union[List[CodedConcept], None]: + """List[highdicom.sr.CodedConcept]: List of anatomic structures of the + specimen.""" return self.get("PrimaryAnatomicStructureSequence") @classmethod From 9ee5879b28230784c80fd824b05ee9009cc7e317 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 16:42:58 +0200 Subject: [PATCH 75/96] Fix typo in docstring --- src/highdicom/ann/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/ann/content.py b/src/highdicom/ann/content.py index c7234ca4..25f165f9 100644 --- a/src/highdicom/ann/content.py +++ b/src/highdicom/ann/content.py @@ -485,7 +485,7 @@ def anatomic_regions(self) -> List[CodedConcept]: @property def primary_anatomic_structures(self) -> List[CodedConcept]: """List[highdicom.sr.CodedConcept]: - List of anatomic anatomic structures the annotations represent. + List of anatomic structures the annotations represent. May be empty. """ From f9c40fa04ccbdd9b31eb9fc932a298cf3ab7add1 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 17:46:32 +0200 Subject: [PATCH 76/96] Add from_dataset for IssuerOfIdentifier --- src/highdicom/content.py | 65 +++++++++++++++++++++++++++++++++++++++- tests/test_content.py | 42 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index c367cc13..ed9a6542 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -712,14 +712,72 @@ def __init__( """ # noqa: E501 super().__init__() + self._issuer_of_identifier = issuer_of_identifier if issuer_of_identifier_type is None: self.LocalNamespaceEntityID = issuer_of_identifier + self._issuer_of_identifier_type = None else: self.UniversalEntityID = issuer_of_identifier issuer_of_identifier_type = UniversalEntityIDTypeValues( issuer_of_identifier_type ) self.UniversalEntityIDType = issuer_of_identifier_type.value + self._issuer_of_identifier_type = issuer_of_identifier_type + + @property + def issuer_of_identifier(self) -> str: + """str: Identifier of the issuer.""" + return self._issuer_of_identifier + + @property + def issuer_of_identifier_type(self) -> Union[ + UniversalEntityIDTypeValues, None + ]: + """highdicom.UniversalEntityIDTypeValues: Type of the issuer.""" + return self._issuer_of_identifier_type + + @classmethod + def from_dataset( + cls, + dataset: Dataset, + ) -> 'IssuerOfIdentifier': + """Construct object from an existing dataset. + + Parameters + ---------- + dataset: pydicom.dataset.Dataset + Dataset + + Returns + ------- + highdicom.IssuerOfIdentifier + Issuer of identifier + + """ + issuer_of_identifier = deepcopy(dataset) + issuer_of_identifier.__class__ = cls + if hasattr(issuer_of_identifier, "LocalNamespaceEntityID"): + issuer_id = issuer_of_identifier.LocalNamespaceEntityID + issuer_type = None + elif hasattr(issuer_of_identifier, 'UniversalEntityID'): + if not hasattr(issuer_of_identifier, 'UniversalEntityIDType'): + raise ValueError( + 'Dataset with UniversalEntityID must have ', + '"UniversalEntityIDType set".' + ) + issuer_id = issuer_of_identifier.UniversalEntityID + issuer_type = UniversalEntityIDTypeValues( + issuer_of_identifier.UniversalEntityIDType + ) + else: + raise ValueError( + 'Dataset must have "LocalNamespaceEntityID" or ', + '"UniversalEntityID" and "UniversalEntityIDType" set.' + ) + issuer_of_identifier._issuer_of_identifier = issuer_id + issuer_of_identifier._issuer_of_identifier_type = issuer_type + + return cast(IssuerOfIdentifier, issuer_of_identifier) class SpecimenCollection(ContentSequence): @@ -1174,7 +1232,7 @@ def processing_datetime(self) -> Union[datetime.datetime, None]: @property def issuer_of_specimen_id(self) -> Union[str, None]: - """highdicom.content.IssuerOfIdentifier: Issuer of specimen id""" + """str: Issuer of specimen id""" items = self.SpecimenPreparationStepContentItemSequence.find( codes.DCM.IssuerOfSpecimenIdentifier @@ -1580,6 +1638,11 @@ def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': content_item_type.from_dataset(ds) for ds in desc.SpecimenLocalizationContentItemSequence ] + if hasattr(desc, 'IssuerOfTheSpecimenIdentifierSequence'): + desc.IssuerOfTheSpecimenIdentifierSequence = [ + IssuerOfIdentifier.from_dataset(ds) + for ds in desc.IssuerOfTheSpecimenIdentifierSequence + ] return desc diff --git a/tests/test_content.py b/tests/test_content.py index 29d3b387..4a9b81f9 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -8,6 +8,7 @@ from pydicom.data import get_testdata_file, get_testdata_files import numpy as np +from highdicom.enum import UniversalEntityIDTypeValues from highdicom.sr import CodedConcept from highdicom import ( @@ -1483,7 +1484,48 @@ def test_construction_from_dataset_with_optionals(self): instance.specimen_detailed_description == specimen_detailed_description ) + assert isinstance(instance.issuer_of_specimen_id, IssuerOfIdentifier) assert instance.issuer_of_specimen_id == issuer_of_specimen_id assert ( instance.primary_anatomic_structures == primary_anatomic_structures ) + + +class TestIssuerOfIdentifier(TestCase): + def test_construction(self): + issuer_of_identifier = "issuer of identifier" + instance = IssuerOfIdentifier(issuer_of_identifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type is None + + def test_construction_with_optionals(self): + issuer_of_identifier = "issuer of identifier id" + issuer_of_identifier_type = UniversalEntityIDTypeValues.DNS + instance = IssuerOfIdentifier( + issuer_of_identifier, + issuer_of_identifier_type + ) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type == issuer_of_identifier_type + + def test_construction_from_dataset(self): + issuer_of_identifier = "issuer of identifier" + dataset = Dataset() + dataset.LocalNamespaceEntityID = issuer_of_identifier + dataset_reread = write_and_read_dataset(dataset) + instance = IssuerOfIdentifier.from_dataset(dataset_reread) + assert isinstance(instance, IssuerOfIdentifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type is None + + def test_construction_from_dataset_with_optionals(self): + issuer_of_identifier = "issuer of identifier" + issuer_of_identifier_type = UniversalEntityIDTypeValues.DNS + dataset = Dataset() + dataset.UniversalEntityID = issuer_of_identifier + dataset.UniversalEntityIDType = issuer_of_identifier_type.value + dataset_reread = write_and_read_dataset(dataset) + instance = IssuerOfIdentifier.from_dataset(dataset_reread) + assert isinstance(instance, IssuerOfIdentifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type == issuer_of_identifier_type From 345a5122ed2a34b8aa7d2c21577f0dd911a32d3f Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Sun, 24 Sep 2023 20:03:19 +0200 Subject: [PATCH 77/96] Add description of issuer_of_specimen_id argument --- src/highdicom/content.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index ed9a6542..ac9529d2 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1035,6 +1035,7 @@ def __init__( processing_description: Union[str, pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional Description of processing issuer_of_specimen_id: highdicom.IssuerOfIdentifier, optional + The issuer of the identifier of the processed specimen. fixative: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional Fixative used during processing (see :dcm:`CID 8114 ` "Specimen Fixative" for options). From 2b269bb15636b2a994e9842183b0a4a66f163310 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson <83275777+erikogabrielsson@users.noreply.github.com> Date: Mon, 25 Sep 2023 09:03:33 +0200 Subject: [PATCH 78/96] Add 'copy' parameter to from_dataset()-method Co-authored-by: Chris Bridge --- src/highdicom/content.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index ac9529d2..b60d5e13 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -740,6 +740,7 @@ def issuer_of_identifier_type(self) -> Union[ def from_dataset( cls, dataset: Dataset, + copy: bool = True, ) -> 'IssuerOfIdentifier': """Construct object from an existing dataset. @@ -747,6 +748,10 @@ def from_dataset( ---------- dataset: pydicom.dataset.Dataset Dataset + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -754,7 +759,10 @@ def from_dataset( Issuer of identifier """ - issuer_of_identifier = deepcopy(dataset) + if copy: + issuer_of_identifier = deepcopy(dataset) + else: + issuer_of_identifier = dataset issuer_of_identifier.__class__ = cls if hasattr(issuer_of_identifier, "LocalNamespaceEntityID"): issuer_id = issuer_of_identifier.LocalNamespaceEntityID From 187f668bde66a333e62928f9e28884c0e0c7d217 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson <83275777+erikogabrielsson@users.noreply.github.com> Date: Mon, 25 Sep 2023 09:07:53 +0200 Subject: [PATCH 79/96] Check that specimen_short_description is valid Co-authored-by: Chris Bridge --- src/highdicom/content.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index b60d5e13..68912a61 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1501,6 +1501,7 @@ def __init__( if specimen_type is not None: self.SpecimenTypeCodeSequence = [specimen_type] if specimen_short_description is not None: + _check_long_string(specimen_short_description) self.SpecimenShortDescription = specimen_short_description if specimen_detailed_description is not None: self.SpecimenDetailedDescription = specimen_detailed_description From e424f430b6b934158502d7607076d11469648312 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Mon, 25 Sep 2023 09:38:33 +0200 Subject: [PATCH 80/96] Tests for specimen_short_description --- tests/test_content.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_content.py b/tests/test_content.py index 4a9b81f9..1d315ea4 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1382,6 +1382,28 @@ def test_construction_optionals(self): instance.primary_anatomic_structures == primary_anatomic_structures ) + def test_construction_with_to_long_short_description(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_short_description = "x" * 65 + with pytest.raises(ValueError): + SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_short_description=specimen_short_description + ) + + def test_construction_with_backslash_in_short_description(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_short_description = 'short_description_with_backslash\\' + with pytest.raises(ValueError): + SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_short_description=specimen_short_description + ) + def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' specimen_id = 'section specimen' From 9d9fb4e60cff080137fe6b4ea7e6000c8ae1d4b2 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Mon, 25 Sep 2023 09:56:40 +0200 Subject: [PATCH 81/96] Add reference to CID for specimen type argument --- src/highdicom/content.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 68912a61..604ec05d 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1447,7 +1447,8 @@ def __init__( Steps that were applied during the preparation of the examined specimen in the laboratory prior to image acquisition specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - The type of the examined specimen. + The anatomic pathology specimen type of the specimen (see :dcm:`CID 8103 ` + "Anatomic Pathology Specimen Type" for options). specimen_short_description: str, optional Short description of the examined specimen. specimen_detailed_description: str, optional From 2984da4a81e260018c37e1fb8ce3c5c9146b27c3 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Mon, 25 Sep 2023 10:38:02 +0200 Subject: [PATCH 82/96] Convert Code specimen_type to CodedConcept --- src/highdicom/content.py | 3 +++ tests/test_content.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index 604ec05d..ff66941b 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -1500,7 +1500,10 @@ def __init__( self.SpecimenLocalizationContentItemSequence = loc_seq if specimen_type is not None: + if isinstance(specimen_type, Code): + specimen_type = CodedConcept.from_code(specimen_type) self.SpecimenTypeCodeSequence = [specimen_type] + if specimen_short_description is not None: _check_long_string(specimen_short_description) self.SpecimenShortDescription = specimen_short_description diff --git a/tests/test_content.py b/tests/test_content.py index 1d315ea4..059cc625 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -5,6 +5,7 @@ from pydicom import dcmread from pydicom.dataset import Dataset from pydicom.sr.codedict import codes +from pydicom.sr.coding import Code from pydicom.data import get_testdata_file, get_testdata_files import numpy as np @@ -1404,6 +1405,23 @@ def test_construction_with_backslash_in_short_description(self): specimen_short_description=specimen_short_description ) + def test_construction_with_code_specimen_type(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_type = Code( + "specimen type", + "test", + "test specimen type" + ) + instance = SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_type=specimen_type, + ) + assert isinstance(instance.specimen_type, CodedConcept) + assert instance.specimen_type == specimen_type + + def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' specimen_id = 'section specimen' From 35bcb4287c82f2f35a457368f5efb8062967b228 Mon Sep 17 00:00:00 2001 From: Erik O Gabrielsson Date: Mon, 25 Sep 2023 10:39:48 +0200 Subject: [PATCH 83/96] Remove extra line --- tests/test_content.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_content.py b/tests/test_content.py index 059cc625..5511ac42 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1421,7 +1421,6 @@ def test_construction_with_code_specimen_type(self): assert isinstance(instance.specimen_type, CodedConcept) assert instance.specimen_type == specimen_type - def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' specimen_id = 'section specimen' From 795acdf6af2c2d8da56179f364fa025ef933e9bc Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Mon, 2 Oct 2023 18:17:38 -0400 Subject: [PATCH 84/96] Flake 8 fixes --- tests/test_ann.py | 4 +++- tests/test_seg.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_ann.py b/tests/test_ann.py index 93fd3ef8..e7c491fe 100644 --- a/tests/test_ann.py +++ b/tests/test_ann.py @@ -582,7 +582,9 @@ def test_construction(self): annotations.annotation_coordinate_type, AnnotationCoordinateTypeValues ) - assert annotations.annotation_coordinate_type == annotation_coordinate_type + assert ( + annotations.annotation_coordinate_type == annotation_coordinate_type + ) retrieved_groups = annotations.get_annotation_groups() assert len(retrieved_groups) == 2 diff --git a/tests/test_seg.py b/tests/test_seg.py index 8ca3d5ec..1fbc2f93 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1410,7 +1410,7 @@ def test_construction_7(self): def test_construction_workers(self): # Create a segmentation with multiple workers - instance = Segmentation( + Segmentation( self._ct_series, self._ct_series_mask_array, SegmentationTypeValues.FRACTIONAL.value, @@ -1431,9 +1431,9 @@ def test_construction_workers(self): def test_construction_workers_manual(self): # Create a segmentation with multiple workers created manually with ProcessPoolExecutor(2) as pool: - instance = Segmentation( - self._ct_series, - self._ct_series_mask_array, + Segmentation( + self._ct_series, + self._ct_series_mask_array, SegmentationTypeValues.FRACTIONAL.value, self._segment_descriptions, self._series_instance_uid, From 4cf43cc0f4dda644aa273ba89011cc7909b53539 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Tue, 3 Oct 2023 12:51:52 -0400 Subject: [PATCH 85/96] Add TotalPixelMatrixFocalPlanes --- src/highdicom/seg/sop.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index a2b657ac..cb659a66 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -2260,6 +2260,7 @@ def _add_slide_coordinate_metadata( ) self.TotalPixelMatrixRows = source_image.TotalPixelMatrixRows self.TotalPixelMatrixColumns = source_image.TotalPixelMatrixColumns + self.TotalPixelMatrixFocalPlanes = 1 elif are_spatial_locations_preserved and not is_tiled: self.ImageCenterPointCoordinatesSequence = deepcopy( source_image.ImageCenterPointCoordinatesSequence @@ -2296,6 +2297,7 @@ def _add_slide_coordinate_metadata( origin_item.YOffsetInSlideCoordinateSystem = \ format_number_as_ds(y_origin) self.TotalPixelMatrixOriginSequence = [origin_item] + self.TotalPixelMatrixFocalPlanes = 1 if total_pixel_matrix_size is None: self.TotalPixelMatrixRows = int( plane_position_values[last_frame_index, row_index] + From 28e0ca6e87338498dcd88af878dfb1f7dccde44b Mon Sep 17 00:00:00 2001 From: CPBridge Date: Thu, 19 Oct 2023 15:12:06 +0000 Subject: [PATCH 86/96] WIP adding input checks for pyramid segs --- src/highdicom/seg/pyramid.py | 117 +++++++++++++++++++++++++++++------ src/highdicom/seg/sop.py | 2 +- 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py index 10356475..d3d0f1ac 100644 --- a/src/highdicom/seg/pyramid.py +++ b/src/highdicom/seg/pyramid.py @@ -36,26 +36,28 @@ def create_segmentation_pyramid( """Construct a multi-resolution segmentation pyramid series. A multi-resolution pyramid represents the same segmentation array at - multiple resolutions + multiple resolutions. This function handles multiple related scenarios: * Constructing a segmentation of a source image pyramid given a - segmentation pixel array of the highest resolution source image, with - highdicom performing the downsampling automatically to match the - resolution of the other source images (pass multiple ``source_images`` - and a single item in ``pixel_arrays``). + segmentation pixel array of the highest resolution source image. + Highdicom performs the downsampling automatically to match the + resolution of the other source images. For this case, pass multiple + ``source_images`` and a single item in ``pixel_arrays``. * Constructing a segmentation of a source image pyramid given user-provided - segmentation pixel arrays for each level in the source pyramid (pass - multiple ``source_images`` and a matching number of ``pixel_arrays``). + segmentation pixel arrays for each level in the source pyramid. For this + case, pass multiple ``source_images`` and a matching number of + ``pixel_arrays``. * Constructing a segmentation of a single source image given multiple - user-provided downsampled segmentation pixel arrays (pass a single item - in ``source_images``, and multiple items in ``pixel_arrays``). + user-provided downsampled segmentation pixel arrays. For this case, pass + a single item in ``source_images``, and multiple items in + ``pixel_arrays``). * Constructing a segmentation of a single source image and a single segmentation pixel array by downsampling by a given list of - ``downsample_factors`` (pass a single item in ``source_images``, a single - item in ``pixel_arrays``, and a list of one or more desired - ``downsample_factors``). + ``downsample_factors``. For this case, pass a single item in + ``source_images``, a single item in ``pixel_arrays``, and a list of one + or more desired ``downsample_factors``. In all cases, the items in both ``source_images`` and ``pixel_arrays`` should be sorted in pyramid order from highest resolution (smallest @@ -72,7 +74,7 @@ def create_segmentation_pyramid( ---------- source_images: Sequence[pydicom.Dataset] List of source images. If there are multiple source images, they should - represent be from the same series and pyramid. + be from the same series and pyramid. pixel_arrays: Sequence[numpy.ndarray] List of segmentation pixel arrays. Each should be a total pixel matrix. segmentation_type: Union[str, highdicom.seg.SegmentationTypeValues] @@ -132,9 +134,6 @@ def create_segmentation_pyramid( anti-aliasing), explicitly pass the downsampled arrays. """ - # TODO check dimensions of input arrays - # TODO check total pixel matrix sizes correspond - # TODO check ordering of items in list # TODO check source images are same series and pyramid # TODO add support for single source image with predefined downsampled # arrays @@ -168,6 +167,17 @@ def create_segmentation_pyramid( raise ValueError( 'All items in "downsample_factors" must be greater than 1.' ) + if len(downsample_factors) > 1: + if any( + z1 > z2 for z1, z2 in zip( + downsample_factors[:-1], + downsample_factors[1:] + ) + ): + raise ValueError( + 'Items in argument "downsample_factors" must be sorted in ' + 'ascending order.' + ) n_outputs = len(downsample_factors) + 1 # including original else: if downsample_factors is not None: @@ -178,14 +188,85 @@ def create_segmentation_pyramid( if n_sources > 1 and n_pix_arrays > 1: if n_sources != n_pix_arrays: raise ValueError( - "If providing multiple source images and multiple pixel " - "arrays, the number of items in the two lists must match." + 'If providing multiple source images and multiple pixel ' + 'arrays, the number of items in the two lists must match.' ) n_outputs = n_sources else: # Either n_sources > 1 or n_pix_arrays > 1 but not both n_outputs = max(n_sources, n_pix_arrays) + # Check the source images are appropriately ordered + for index in range(1, len(source_images)): + r0 = source_images[index - 1].TotalPixelMatrixRows + c0 = source_images[index - 1].TotalPixelMatrixColumns + r1 = source_images[index].TotalPixelMatrixRows + c1 = source_images[index].TotalPixelMatrixColumns + + if r0 >= r1 or c0 >= c1: + raise ValueError( + 'Items in argument "source_images" must be strictly ordered in ' + 'decreasing resolution.' + ) + + # Check that pixel arrays have an appropriate shape + for pixel_array in pixel_arrays: + if pixel_array.ndim not in (2, 3, 4): + raise ValueError( + 'Each item of argument "pixel_arrays" must be a NumPy array ' + 'with 2, 3, or 4 dimensions.' + ) + if pixel_array.ndim > 2 and pixel_array.shape[0] != 1: + raise ValueError( + 'Each item of argument "pixel_arrays" must contain a single ' + 'frame, with a size of 1 along dimension 0.' + ) + + # Check the pixel arrays are appropriately ordered + for index in range(1, len(pixel_arrays)): + arr0 = pixel_arrays[index - 1] + arr1 = pixel_arrays[index] + + if arr0.ndim == 2: + r0 = arr0.shape[:2] + c0 = arr0.shape[:2] + else: + r0 = arr0.shape[1:3] + c0 = arr0.shape[1:3] + + if arr_1.ndim == 2: + r1 = arr_1.shape[:2] + c1 = arr_1.shape[:2] + else: + r1 = arr_1.shape[1:3] + c1 = arr_1.shape[1:3] + + if r0 >= r1 or c0 >= c1: + raise ValueError( + 'Items in argument "pixel_arrays" must be strictly ordered in ' + 'decreasing resolution.' + ) + + # Check that input dimensions match + for index, (source_image, pixel_array) in enumerate( + zip(source_images, pixel_arrays) + ): + src_shape = ( + source_image.TotalPixelMatrixRows, + source_image.TotalPixelMatrixColumns + ) + pix_shape = ( + pixel_array.shape[1:3] if pixel_array.ndim > 2 + else pixel_array.shape + ) + if pix_shape != src_shape: + raise ValueError( + "The shape of each provided pixel array must match the shape " + "of the total pixel matrix of the corresponding source image. " + f"Got pixel array of shape {pix_shape} for a source image of " + f"shape {src_shape} at index {index}." + ) + if n_pix_arrays == 1: # Create a pillow image for use later with resizing mask_image = Image.fromarray(pixel_arrays[0]) diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 6a70bc43..404fddc6 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1355,7 +1355,7 @@ def __init__( If True, `highdicom` will automatically convert an input total pixel matrix into a sequence of frames representing tiles of the segmentation. This is valid only when the source image supports - tiling (e.g. VL While Slide Microscopy images). + tiling (e.g. VL Whole Slide Microscopy images). If True, the input pixel array must consist of a single "frame", i.e. must be either a 2D numpy array, a 3D numpy array with a size From 477c0988dbcbe401793a531dd7d0e6e5cbbc1d5a Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Sat, 28 Oct 2023 10:03:52 -0400 Subject: [PATCH 87/96] WIP pyramid finalization --- src/highdicom/seg/pyramid.py | 50 +++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py index d3d0f1ac..620dd291 100644 --- a/src/highdicom/seg/pyramid.py +++ b/src/highdicom/seg/pyramid.py @@ -134,10 +134,26 @@ def create_segmentation_pyramid( anti-aliasing), explicitly pass the downsampled arrays. """ - # TODO check source images are same series and pyramid # TODO add support for single source image with predefined downsampled # arrays - # TODO disallow duplicate items in kwargs + + # Disallow duplicate items in kwargs + kwarg_keys = set(kwargs.keys()) + disallowed_keys = { + 'instance_number', + 'sop_instance_uid', + 'plane_orientation', + 'plane_positions', + 'pixel_array', + 'tile_pixel_array', + } + error_keys = kwarg_keys & disallowed_keys + if len(error_keys) > 0: + raise TypeError( + f'kwargs supplied to the create_segmentation_pyramid function ' + f'should not contain a value for parameter {error_keys[0]}.' + ) + if pyramid_uid is None: pyramid_uid = UID() if series_instance_uid is None: @@ -209,6 +225,26 @@ def create_segmentation_pyramid( 'decreasing resolution.' ) + # Check that the source images are from the same series and pyramid + if len(source_images) > 1: + series_uid = source_images[0].SeriesInstanceUID + if not all( + dcm.SeriesInstanceUID == series_uid + for dcm in source_images[1:] + ): + raise ValueError( + 'All source images should belong to the same series.' + ) + pyramid_uid = source_images[0].PyramidUID + if not all( + dcm.PyramidUID == pyramid_uid + for dcm in source_images[1:] + ): + raise ValueError( + 'All source images should belong to the same pyramid ' + '(share a Pyramid UID).' + ) + # Check that pixel arrays have an appropriate shape for pixel_array in pixel_arrays: if pixel_array.ndim not in (2, 3, 4): @@ -234,12 +270,12 @@ def create_segmentation_pyramid( r0 = arr0.shape[1:3] c0 = arr0.shape[1:3] - if arr_1.ndim == 2: - r1 = arr_1.shape[:2] - c1 = arr_1.shape[:2] + if arr1.ndim == 2: + r1 = arr1.shape[:2] + c1 = arr1.shape[:2] else: - r1 = arr_1.shape[1:3] - c1 = arr_1.shape[1:3] + r1 = arr1.shape[1:3] + c1 = arr1.shape[1:3] if r0 >= r1 or c0 >= c1: raise ValueError( From 3c657f76047696131134ef4448eb31b8e9d86177 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 8 Nov 2023 22:43:38 -0500 Subject: [PATCH 88/96] Add tests for all four pyramid input combinations --- src/highdicom/seg/pyramid.py | 40 +++++-- tests/test_seg.py | 209 +++++++++++++++++++++++++++++++++++ 2 files changed, 241 insertions(+), 8 deletions(-) diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py index 620dd291..af7f7ba0 100644 --- a/src/highdicom/seg/pyramid.py +++ b/src/highdicom/seg/pyramid.py @@ -4,6 +4,7 @@ import numpy as np from PIL import Image from pydicom import Dataset +from pydicom.uid import VLWholeSlideMicroscopyImageStorage from highdicom.content import PixelMeasuresSequence from highdicom.seg.sop import Segmentation @@ -134,9 +135,6 @@ def create_segmentation_pyramid( anti-aliasing), explicitly pass the downsampled arrays. """ - # TODO add support for single source image with predefined downsampled - # arrays - # Disallow duplicate items in kwargs kwarg_keys = set(kwargs.keys()) disallowed_keys = { @@ -212,6 +210,13 @@ def create_segmentation_pyramid( # Either n_sources > 1 or n_pix_arrays > 1 but not both n_outputs = max(n_sources, n_pix_arrays) + if sop_instance_uids is not None: + if len(sop_instance_uids) != n_outputs: + raise ValueError( + 'Number of specified SOP Instance UIDs does not match number ' + 'of output images.' + ) + # Check the source images are appropriately ordered for index in range(1, len(source_images)): r0 = source_images[index - 1].TotalPixelMatrixRows @@ -219,12 +224,19 @@ def create_segmentation_pyramid( r1 = source_images[index].TotalPixelMatrixRows c1 = source_images[index].TotalPixelMatrixColumns - if r0 >= r1 or c0 >= c1: + if r0 <= r1 or c0 <= c1: raise ValueError( 'Items in argument "source_images" must be strictly ordered in ' 'decreasing resolution.' ) + # Check that source images are WSI + for im in source_images: + if im.SOPClassUID != VLWholeSlideMicroscopyImageStorage: + raise ValueError( + 'Source images must have IOD VLWholeSlideMicroscopyImageStorage' + ) + # Check that the source images are from the same series and pyramid if len(source_images) > 1: series_uid = source_images[0].SeriesInstanceUID @@ -235,6 +247,11 @@ def create_segmentation_pyramid( raise ValueError( 'All source images should belong to the same series.' ) + if not all(hasattr(dcm, 'PyramidUID') for dcm in source_images): + raise ValueError( + 'All source images should belong to the same pyramid ' + '(share a Pyramid UID).' + ) pyramid_uid = source_images[0].PyramidUID if not all( dcm.PyramidUID == pyramid_uid @@ -277,7 +294,7 @@ def create_segmentation_pyramid( r1 = arr1.shape[1:3] c1 = arr1.shape[1:3] - if r0 >= r1 or c0 >= c1: + if r0 <= r1 or c0 <= c1: raise ValueError( 'Items in argument "pixel_arrays" must be strictly ordered in ' 'decreasing resolution.' @@ -342,17 +359,24 @@ def create_segmentation_pyramid( ) if n_sources == 1: + source_pixel_measures = ( + source_image + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + ) + src_pixel_spacing = source_pixel_measures.PixelSpacing + src_slice_thickness = source_pixel_measures.SliceThickness row_spacing = ( - source_image.PixelSpacing[0] * + src_pixel_spacing[0] * (pixel_arrays[0].shape[0] / pixel_array.shape[0]) ) column_spacing = ( - source_image.PixelSpacing[1] * + src_pixel_spacing[1] * (pixel_arrays[0].shape[1] / pixel_array.shape[1]) ) pixel_measures = PixelMeasuresSequence( pixel_spacing=(row_spacing, column_spacing), - slice_thickness=source_image.SliceThickness, + slice_thickness=src_slice_thickness ) else: # This will be copied from the source image diff --git a/tests/test_seg.py b/tests/test_seg.py index 3266371f..7bae0b48 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from PIL import Image from pydicom.data import get_testdata_file, get_testdata_files from pydicom.datadict import tag_for_keyword @@ -32,6 +33,7 @@ DimensionOrganizationTypeValues, ) from highdicom.seg import ( + create_segmentation_pyramid, segread, DimensionIndexSequence, SegmentationTypeValues, @@ -3734,3 +3736,210 @@ def test_iter_segments_ct_single_frame_2_segments(self): seg_id_item_2 = item_segment_2[1][0].SegmentIdentificationSequence[0] assert seg_id_item_2.ReferencedSegmentNumber == 2 assert item_segment_2[2].SegmentNumber == 2 + + +class TestPyramid(unittest.TestCase): + + def setUp(self): + file_path = Path(__file__) + data_dir = file_path.parent.parent.joinpath('data') + self._sm_image = dcmread( + str(data_dir.joinpath('test_files', 'sm_image.dcm')) + ) + tpm_size = ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ) + self._seg_pix = np.zeros( + tpm_size, + dtype=np.uint8, + ) + self._seg_pix[5:15, 3:8] = 1 + + self._n_downsamples = 3 + self._downsampled_pix_arrays = [self._seg_pix] + seg_pil = Image.fromarray(self._seg_pix) + pyramid_uid = UID() + self._source_pyramid = [deepcopy(self._sm_image)] + self._source_pyramid[0].PyramidUID = pyramid_uid + for i in range(1, self._n_downsamples): + f = 2 ** i + out_size = ( + self._sm_image.TotalPixelMatrixRows // f, + self._sm_image.TotalPixelMatrixColumns // f + ) + + # Resize the segmentation arrays + resized = np.array( + seg_pil.resize(out_size, Image.Resampling.NEAREST) + ) + self._downsampled_pix_arrays.append(resized) + + # Mock lower-resolution source images. No need to have their pixel + # data correctly set as it isn't used. Just update the relevant + # metadata + src_pixel_spacing = ( + self._sm_image + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + .PixelSpacing + ) + pixel_spacing = [src_pixel_spacing[0] * f, src_pixel_spacing[1] * f] + downsampled_source_im = deepcopy(self._sm_image) + delattr(downsampled_source_im, 'PixelData') + downsampled_source_im.TotalPixelMatrixRows = out_size[0] + downsampled_source_im.TotalPixelMatrixColumns = out_size[1] + ( + downsampled_source_im + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + .PixelSpacing + ) = pixel_spacing + downsampled_source_im.PyramidUID = pyramid_uid + self._source_pyramid.append(downsampled_source_im) + + self._segmented_property_category = \ + codes.SCT.MorphologicallyAbnormalStructure + self._segmented_property_type = codes.SCT.Neoplasm + self._segment_descriptions = [ + SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=self._segmented_property_category, + segmented_property_type=self._segmented_property_type, + algorithm_type=SegmentAlgorithmTypeValues.AUTOMATIC.value, + algorithm_identification=AlgorithmIdentificationSequence( + name='bla', + family=codes.DCM.ArtificialIntelligence, + version='v1' + ) + ), + ] + + def test_pyramid_factors(self): + downsample_factors = [2.0, 5.0] + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + downsample_factors=downsample_factors, + ) + + assert len(segs) == len(downsample_factors) + 1 + tol = 0.01 + for f, seg in zip([1.0, *downsample_factors], segs): + assert hasattr(seg, 'PyramidUID') + assert abs( + seg.TotalPixelMatrixRows - int(self._seg_pix.shape[0] / f) + ) < tol + assert abs( + seg.TotalPixelMatrixColumns - int(self._seg_pix.shape[1] / f) + ) < tol + + def test_pyramid_downsample_factors(self): + # Test construction when given a single source image, single + # segmentation mask, and specified downsample factors + downsample_factors = [2.0, 5.0] + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + downsample_factors=downsample_factors, + ) + + assert len(segs) == len(downsample_factors) + 1 + tol = 0.01 + for f, seg in zip([1.0, *downsample_factors], segs): + assert hasattr(seg, 'PyramidUID') + assert abs( + seg.TotalPixelMatrixRows - int(self._seg_pix.shape[0] / f) + ) < tol + assert abs( + seg.TotalPixelMatrixColumns - int(self._seg_pix.shape[1] / f) + ) < tol + + def test_single_source_multiple_pixel_arrays(self): + # Test construction when given a single source image and multiple + # segmentation images + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=self._downsampled_pix_arrays, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._downsampled_pix_arrays) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) + + def test_multiple_source_single_pixel_array(self): + # Test construction when given multiple source images and a single + # segmentation image + segs = create_segmentation_pyramid( + source_images=self._source_pyramid, + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._source_pyramid) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) + + def test_multiple_source_multiple_pixel_arrays(self): + # Test construction when given multiple source images and multiple + # segmentation images + segs = create_segmentation_pyramid( + source_images=self._source_pyramid, + pixel_arrays=self._downsampled_pix_arrays, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._source_pyramid) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) From 6b7401df362ae0e42ee5ae630a21033b5aa7eb16 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 8 Nov 2023 23:10:05 -0500 Subject: [PATCH 89/96] Add pyramid doc section --- docs/seg.rst | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/docs/seg.rst b/docs/seg.rst index e47a5b02..a85bf7c9 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -625,6 +625,102 @@ combination with ``tile_pixel_array`` argument. assert seg.NumberOfFrames == 25 assert seg.pixel_array.shape == (25, 10, 10) +Multi-resolution Pyramids +------------------------- + +Whole slide digital pathology images can often be very large and as such it +is common to represent them as *multi-resolution pyramids* of images, i.e. +to store multiple versions of the same image at different resolutions. This +helps viewers render the image at different zoom levels. + +Within DICOM, this can also extend to segmentations derived from whole slide +images. Multiple different SEG images may be stored, each representing the +same segmentation at a different resolution, as different instances within a +DICOM series. + +*highdicom* provides the :func:`highdicom.seg.create_segmentation_pyramid` +function to assist with this process. This function handles multiple related +scenarios: + +* Constructing a segmentation of a source image pyramid given a + segmentation pixel array of the highest resolution source image. + Highdicom performs the downsampling automatically to match the + resolution of the other source images. For this case, pass multiple + ``source_images`` and a single item in ``pixel_arrays``. +* Constructing a segmentation of a source image pyramid given user-provided + segmentation pixel arrays for each level in the source pyramid. For this + case, pass multiple ``source_images`` and a matching number of + ``pixel_arrays``. +* Constructing a segmentation of a single source image given multiple + user-provided downsampled segmentation pixel arrays. For this case, pass + a single item in ``source_images``, and multiple items in + ``pixel_arrays``). +* Constructing a segmentation of a single source image and a single + segmentation pixel array by downsampling by a given list of + ``downsample_factors``. For this case, pass a single item in + ``source_images``, a single item in ``pixel_arrays``, and a list of one + or more desired ``downsample_factors``. + +Here is a simple of example of specifying a single source image and segmentation +array, and having *highdicom* create a multi-resolution pyramid segmentation +series at user-specified downsample factors. + +.. code-block:: python + + import highdicom as hd + from pydicom import dcmread + import numpy as np + + + # Use an example slide microscopy image from the highdicom test data + # directory + sm_image = dcmread('data/test_files/sm_image.dcm') + + # The source image has multiple frames/tiles, but here we create a mask + # corresponding to the entire total pixel matrix + mask = np.zeros( + ( + sm_image.TotalPixelMatrixRows, + sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + mask[38:43, 5:41] = 1 + + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Stucture") + property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") + segment_descriptions = [ + hd.seg.SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=property_category, + segmented_property_type=property_type, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ), + ] + + # This will create a segmentation series of three images: one at the + # original source image resolution (implicit), one at half the size, and + # another at a quarter of the original size. + seg_pyramid = hd.seg.create_segmentation_pyramid( + source_images=[sm_image], + pixel_arrays=[mask], + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + downsample_factors=[2.0, 4.0] + ) + +Note that the :func:`highdicom.seg.create_segmentation_pyramid` function always +behaves as if the ``tile_pixel_array`` input is ``True`` within the segmentation +constructor, i.e. it assumes that the input segmentation masks represent total +pixel matrices. + Representation of Fractional SEGs --------------------------------- From e15aac1405d85c8e5ed4d81e8bcc977d02527e59 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Wed, 8 Nov 2023 23:27:28 -0500 Subject: [PATCH 90/96] Version bump for release --- src/highdicom/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/version.py b/src/highdicom/version.py index 8c306aa6..81edede8 100644 --- a/src/highdicom/version.py +++ b/src/highdicom/version.py @@ -1 +1 @@ -__version__ = '0.21.1' +__version__ = '0.22.0' From a14c3d8828e9cf155e7adcbee2661a7f62c69fd8 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 08:49:46 -0500 Subject: [PATCH 91/96] Fix codespell errors --- docs/seg.rst | 6 +++--- src/highdicom/seg/sop.py | 4 ++-- src/highdicom/utils.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/seg.rst b/docs/seg.rst index a85bf7c9..0adc53c6 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -534,7 +534,7 @@ options are available to you. ) mask[38:43, 5:41] = 1 - property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Stucture") + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Structure") property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") segment_descriptions = [ hd.seg.SegmentDescription( @@ -591,7 +591,7 @@ attribute: receiving application to process, since the tiles are guaranteed to be regularly and consistently ordered. -You can control tihs behavior by specifying the +You can control this behavior by specifying the ``dimension_organization_type`` parameter and passing a value of the :class:`highdicom.DimensionOrganizationTypeValues` enum. The default value is ``"TILED_SPARSE"``. Generally, the ``"TILED_FULL"`` option will be used in @@ -687,7 +687,7 @@ series at user-specified downsample factors. ) mask[38:43, 5:41] = 1 - property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Stucture") + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Structure") property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") segment_descriptions = [ hd.seg.SegmentDescription( diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 5173d94a..be933a3d 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1369,7 +1369,7 @@ def __init__( ``plane_orientation`` are supplied, the total pixel matrix of the segmentation is assumed to correspond to the total pixel matrix of the (single) source image. If ``plane_positions`` is supplied, the - sequence should contain a singe item representing the plane + sequence should contain a single item representing the plane position of the entire total pixel matrix. Plane positions of the newly created tiles will derived automatically from this. @@ -4892,7 +4892,7 @@ def get_total_pixel_matrix( the conventions used in the DICOM standard. The first row of the total pixel matrix is row 1, and the last is ``self.TotalPixelMatrixRows``. This is is unlike standard Python and NumPy indexing which is 0-based. - For negative indices, the two are equavalent with the final row/column + For negative indices, the two are equivalent with the final row/column having index -1. """ diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index 45845878..7ef56207 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -196,7 +196,7 @@ def compute_plane_position_tiled_full( """ if row_index < 1 or column_index < 1: - raise ValueError("Row and column indices must be positive intergers.") + raise ValueError("Row and column indices must be positive integers.") row_offset_frame = ((row_index - 1) * rows) column_offset_frame = ((column_index - 1) * columns) From d244e7390077eb11c3c540e5afc91fd06969cdaa Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 10:39:55 -0500 Subject: [PATCH 92/96] Fix wrong transfer syntax in test --- tests/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 2e4d424d..164aea86 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -260,7 +260,7 @@ def test_jpegls_monochrome(self): assert compressed_frame.endswith(b'\xFF\xD9') decoded_frame = decode_frame( value=compressed_frame, - transfer_syntax_uid=JPEG2000Lossless, + transfer_syntax_uid=JPEGLSLossless, rows=frame.shape[0], columns=frame.shape[1], samples_per_pixel=1, From 393c186781bffd1a6e54b707d4911dc77f0ce7c1 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 10:54:40 -0500 Subject: [PATCH 93/96] doctest fix --- src/highdicom/seg/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index 9238cdba..495f89d6 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -701,8 +701,8 @@ def get_index_keywords(self) -> List[str]: >>> names = dimension_index.get_index_keywords() >>> for name in names: ... print(name) - ColumnPositionInTotalImagePixelMatrix RowPositionInTotalImagePixelMatrix + ColumnPositionInTotalImagePixelMatrix XOffsetInSlideCoordinateSystem YOffsetInSlideCoordinateSystem ZOffsetInSlideCoordinateSystem From b61aa588c6d03afb2ee5566467f334687c4db64e Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 11:12:19 -0500 Subject: [PATCH 94/96] Fix for new seg tests when libjpeg not installed --- tests/test_seg.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/test_seg.py b/tests/test_seg.py index 7bae0b48..4a572eaf 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1581,10 +1581,15 @@ def test_construction_autotile( transfer_syntax_uids = [ExplicitVRLittleEndian] if segmentation_type.value == 'FRACTIONAL': - transfer_syntax_uids += [ - JPEG2000Lossless, - JPEGLSLossless, - ] + try: + import libjpeg # noqa: F401 + except ModuleNotFoundError: + pass + else: + transfer_syntax_uids += [ + JPEG2000Lossless, + JPEGLSLossless, + ] for omit_empty_frames, transfer_syntax_uid in itertools.product( omit_empty_frames_values, From 50c97e565170463c48b07e1712ba10f3424886ed Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 11:20:58 -0500 Subject: [PATCH 95/96] Workaround use of math.prod in python 3.7 --- tests/test_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index ebea82ac..e9bfdbae 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -213,12 +213,12 @@ def test_compute_plane_position_slide_per_frame(): tiles_per_column = math.ceil(image.TotalPixelMatrixRows / image.Rows) tiles_per_row = math.ceil(image.TotalPixelMatrixColumns / image.Columns) - assert len(plane_positions) == math.prod([ - num_optical_paths, - num_focal_planes, - tiles_per_row, - tiles_per_column - ]) + assert len(plane_positions) == ( + num_optical_paths + * num_focal_planes + * tiles_per_row + * tiles_per_column + ) def test_are_plane_positions_tiled_full(): From edf619834b9dede7ae3d092f27dc38373d03fc49 Mon Sep 17 00:00:00 2001 From: Chris Bridge Date: Thu, 9 Nov 2023 11:27:25 -0500 Subject: [PATCH 96/96] Flake8 fix --- tests/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e9bfdbae..377f6027 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -214,10 +214,10 @@ def test_compute_plane_position_slide_per_frame(): tiles_per_column = math.ceil(image.TotalPixelMatrixRows / image.Rows) tiles_per_row = math.ceil(image.TotalPixelMatrixColumns / image.Columns) assert len(plane_positions) == ( - num_optical_paths - * num_focal_planes - * tiles_per_row - * tiles_per_column + num_optical_paths * + num_focal_planes * + tiles_per_row * + tiles_per_column )