Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added static typing to pydicer modules #164

Merged
merged 3 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions pydicer/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
"available in the .pydicer directory.",
"type": int,
"default": 0,
"choices": [logging.NOTSET, logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR],
"choices": [
logging.NOTSET,
logging.DEBUG,
logging.INFO,
logging.WARNING,
logging.ERROR,
],
},
"for_fallback_linkage": {
"module": "general",
Expand Down Expand Up @@ -80,7 +86,6 @@
class PyDicerConfig:
class __PyDicerConfig: # pylint: disable=invalid-name
def __init__(self, working_dir=None):

if working_dir is None:
raise ValueError("working_dir must be set on config init")
self.working_dir = Path(working_dir)
Expand Down Expand Up @@ -128,7 +133,7 @@ def get_working_dir(self):
"""
return self.instance.working_dir

def get_config(self, name):
def get_config(self, name: str) -> object:
"""Get the value of the config item with the specified name

Args:
Expand All @@ -146,7 +151,7 @@ def get_config(self, name):

return self.instance.pydicer_config[name]

def set_config(self, name, value):
def set_config(self, name: str, value: object):
"""Set the value for the config with the given name

Args:
Expand All @@ -163,7 +168,8 @@ def set_config(self, name, value):

if not isinstance(value, PYDICER_CONFIG[name]["type"]) and not value is None:
raise ValueError(
f"Config {name} must be of type " f"{type(self.instance.pydicer_config[name])}"
f"Config {name} must be of type "
f"{type(self.instance.pydicer_config[name])}"
)

self.instance.pydicer_config[name] = value
Expand Down
16 changes: 11 additions & 5 deletions pydicer/convert/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import copy
import shutil
from pathlib import Path
from typing import Union

import pandas as pd
import numpy as np
import SimpleITK as sitk
Expand Down Expand Up @@ -51,7 +53,7 @@
]


def get_object_type(sop_class_uid):
def get_object_type(sop_class_uid: str) -> str:
"""Get the type of the object (used for the output path)

Args:
Expand All @@ -69,7 +71,9 @@ def get_object_type(sop_class_uid):
return object_type


def handle_missing_slice(files, ignore_duplicates=False):
def handle_missing_slice(
files: Union[pd.DataFrame, list], ignore_duplicates: bool = False
) -> list:
"""function to interpolate missing slices in an image

Example usage:
Expand Down Expand Up @@ -98,6 +102,8 @@ def handle_missing_slice(files, ignore_duplicates=False):
Args:
df_files (pd.DataFrame|list): the DataFrame which was produced by PreprocessData
or list of filepaths to dicom slices
ignore_duplicates (booleanbool, optional): specifices whether the function is to ignore
duplicate slices when handling missing ones

Returns:
file_paths(list): a list of the interpolated file paths
Expand Down Expand Up @@ -231,7 +237,7 @@ def handle_missing_slice(files, ignore_duplicates=False):
return df_files.file_path.tolist()


def link_via_frame_of_reference(for_uid, df_preprocess):
def link_via_frame_of_reference(for_uid: str, df_preprocess: pd.DataFrame) -> pd.DataFrame:
"""Find the image series linked to this FOR

Args:
Expand Down Expand Up @@ -271,7 +277,7 @@ def __init__(self, working_directory="."):
self.pydicer_directory = working_directory.joinpath(PYDICER_DIR_NAME)
self.output_directory = working_directory.joinpath(CONVERTED_DIR_NAME)

def add_entry(self, entry):
def add_entry(self, entry: dict):
"""Add an entry of a converted data object to the patient's converted dataframe.

Args:
Expand Down Expand Up @@ -308,7 +314,7 @@ def add_entry(self, entry):
df_pat_data = df_pat_data.reset_index(drop=True)
df_pat_data.to_csv(converted_df_path)

def convert(self, patient=None, force=True):
def convert(self, patient: Union[str, list]=None, force: bool=True):
"""Converts the DICOM which was preprocessed into the pydicer output directory.

Args:
Expand Down
7 changes: 6 additions & 1 deletion pydicer/convert/headers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import logging
import json
from typing import Union
from pathlib import Path

import pydicom

logger = logging.getLogger(__name__)


def convert_dicom_headers(dcm_file, binary_path, json_file):
def convert_dicom_headers(
dcm_file: Union[str, Path], binary_path: str, json_file: Union[str, Path]
):
"""Save the DICOM Headers as a JSON file

Args:
Expand Down
44 changes: 32 additions & 12 deletions pydicer/dataset/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
logger = logging.getLogger(__name__)


def rt_latest_struct(df, **kwargs):
def rt_latest_struct(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Select the latest Structure set and the image which it is linked to. You can specify keyword
arguments to for a match on any top level DICOM attributes. You may also supply lists of values
to these, one of which should match to select that series.
Expand Down Expand Up @@ -91,18 +91,24 @@ def rt_latest_struct(df, **kwargs):
keep_rows.append(struct_row.name) # Track index of row to keep

# Find the linked image
df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
df_linked_img = df[
df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
]

if len(df_linked_img) == 0:
logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
logger.warning(
"No linked images found for structure: %s", struct_row.hashed_uid
)
continue

keep_rows.append(df_linked_img.iloc[0].name) # Keep the index of the row of the image too
keep_rows.append(
df_linked_img.iloc[0].name
) # Keep the index of the row of the image too

return df.loc[keep_rows]


def rt_latest_dose(df, **kwargs):
def rt_latest_dose(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Select the latest RTDOSE and the image, structure and plan which it is linked to. You can
specify keyword arguments to for a match on any top level DICOM attributes. You may also supply
lists of values to these, one of which should match to select that series.
Expand Down Expand Up @@ -191,16 +197,22 @@ def rt_latest_dose(df, **kwargs):
keep_rows.append(dose_row.name) # Track index of row of dose to keep

# Find the linked plan
df_linked_plan = df[df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid]
df_linked_plan = df[
df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid
]

if len(df_linked_plan) == 0:
logger.warning("No linked plans found for dose: %s", dose_row.sop_instance_uid)
logger.warning(
"No linked plans found for dose: %s", dose_row.sop_instance_uid
)
continue

# Find the linked structure set
plan_row = df_linked_plan.iloc[0]
keep_rows.append(plan_row.name) # Keep the index of the row of the plan
df_linked_struct = df[df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid]
df_linked_struct = df[
df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid
]

if len(df_linked_struct) == 0:
# Try to link via Frame of Reference instead
Expand All @@ -209,18 +221,26 @@ def rt_latest_dose(df, **kwargs):
]

if len(df_linked_struct) == 0:
logger.warning("No structures found for plan: %s", plan_row.sop_instance_uid)
logger.warning(
"No structures found for plan: %s", plan_row.sop_instance_uid
)
continue

# Find the linked image
struct_row = df_linked_struct.iloc[0]
keep_rows.append(struct_row.name) # Keep the index of the row of the structure
df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
df_linked_img = df[
df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
]

if len(df_linked_img) == 0:
logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
logger.warning(
"No linked images found for structure: %s", struct_row.hashed_uid
)
continue

keep_rows.append(df_linked_img.iloc[0].name) # Keep the index of the row of the image too
keep_rows.append(
df_linked_img.iloc[0].name
) # Keep the index of the row of the image too

return df.loc[keep_rows]
33 changes: 24 additions & 9 deletions pydicer/dataset/nnunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
nnunet_description: str = "",
dataset_name: str = CONVERTED_DIR_NAME,
image_modality: str = "CT",
mapping_id=DEFAULT_MAPPING_ID,
mapping_id: str = DEFAULT_MAPPING_ID,
):
"""Prepare a dataset to train models using nnUNet.

Expand Down Expand Up @@ -219,12 +219,16 @@ def check_duplicates_train_test(self):
"""

if len(self.training_cases) == 0:
raise SystemError("training_cases are empty, run split_dataset function first.")
raise SystemError(
"training_cases are empty, run split_dataset function first."
)

img_stats = []

df = read_converted_data(self.working_directory, dataset_name=self.dataset_name)
df_images = df[(df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")]
df_images = df[
(df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")
]

for case in self.training_cases + self.testing_cases:
df_pat = df_images[df_images.patient_id == case]
Expand Down Expand Up @@ -252,7 +256,9 @@ def check_duplicates_train_test(self):

# Check to see if we have any duplicate image spacing and sizes, if so inspect these
# further
duplicated_rows = df_img_stats.duplicated(subset=["spacing", "size"], keep=False)
duplicated_rows = df_img_stats.duplicated(
subset=["spacing", "size"], keep=False
)
df_img_stats["voxel_sum"] = df_img_stats.apply(
lambda row: sitk.GetArrayFromImage(sitk.ReadImage(row.img_path)).sum()
if row.name in duplicated_rows.index
Expand Down Expand Up @@ -342,7 +348,9 @@ def check_structure_names(self) -> pd.DataFrame:
print(f"Structure {s} is missing for patients: {missing_pats}")

incomplete_structures.append(s)
incomplete_patients += [p for p in missing_pats if not p in incomplete_patients]
incomplete_patients += [
p for p in missing_pats if not p in incomplete_patients
]

if incomplete_structures:
print(
Expand Down Expand Up @@ -383,7 +391,8 @@ def check_overlapping_structures(self):
structure_name_j = structure_names[sj]

structure_sum = (
structure_set[structure_name_i] + structure_set[structure_name_j]
structure_set[structure_name_i]
+ structure_set[structure_name_j]
)
arr = sitk.GetArrayFromImage(structure_sum)
if arr.max() > 1:
Expand Down Expand Up @@ -444,7 +453,9 @@ def prepare_dataset(self) -> Path:
"""

if len(self.training_cases) == 0:
raise SystemError("training_cases are empty, run split_dataset function first.")
raise SystemError(
"training_cases are empty, run split_dataset function first."
)

# First check that all cases (in training set) have the structures which are to be learnt
df_structures = self.check_structure_names()
Expand Down Expand Up @@ -571,7 +582,9 @@ def generate_training_scripts(
raise FileNotFoundError(
"Ensure that the folder in which to generate the script exists."
)
script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
script_path = script_directory.joinpath(
f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
)

if isinstance(folds, str):
folds = [folds]
Expand Down Expand Up @@ -637,7 +650,9 @@ def train(self, script_directory: Union[str, Path] = ".", in_screen: bool = True
"""
# Make sure the script folder exists
script_directory = Path(script_directory)
script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
script_path = script_directory.joinpath(
f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
)

if not script_path.exists():
raise FileNotFoundError(
Expand Down
4 changes: 2 additions & 2 deletions pydicer/dataset/preparation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from pathlib import Path
from typing import Callable
from typing import Callable, Union

import pandas as pd

Expand All @@ -22,7 +22,7 @@ class PrepareDataset:
Defaults to ".".
"""

def __init__(self, working_directory="."):
def __init__(self, working_directory: Union[str, Path] = "."):
self.working_directory = Path(working_directory)

def add_object_to_dataset(self, dataset_name: str, data_object_row: pd.Series):
Expand Down
Loading
Loading