From 3592fc0169a9a6e8cd18a7cb3e4bbcc2bde56e0f Mon Sep 17 00:00:00 2001
From: Daniel Al Mouiee <d.almouiee@gmail.com>
Date: Fri, 2 Feb 2024 16:16:29 +1100
Subject: [PATCH 1/3] Added static typing to `pydicer` modules

---
 pydicer/config.py                | 16 +++++---
 pydicer/convert/data.py          | 16 +++++---
 pydicer/convert/headers.py       |  7 +++-
 pydicer/dataset/functions.py     | 44 +++++++++++++++------
 pydicer/dataset/nnunet.py        | 33 +++++++++++-----
 pydicer/dataset/preparation.py   |  4 +-
 pydicer/dataset/structureset.py  | 25 ++++++++----
 pydicer/generate/object.py       | 66 +++++++++++++++++---------------
 pydicer/generate/segmentation.py |  2 +-
 pydicer/input/base.py            |  3 +-
 pydicer/input/filesystem.py      |  5 ++-
 pydicer/input/orthanc.py         | 48 ++++++++++++++++-------
 pydicer/input/pacs.py            | 21 ++++++++--
 pydicer/input/tcia.py            | 12 +++++-
 pydicer/input/test.py            |  4 +-
 pydicer/input/web.py             |  4 +-
 pydicer/logger.py                | 11 ++++--
 pydicer/preprocess/data.py       |  7 +++-
 pydicer/quarantine.py            |  6 +--
 pydicer/tool.py                  |  9 +++--
 pydicer/utils.py                 | 39 +++++++++++--------
 pydicer/visualise/data.py        |  8 +++-
 22 files changed, 264 insertions(+), 126 deletions(-)

diff --git a/pydicer/config.py b/pydicer/config.py
index e140483..0cef9f9 100644
--- a/pydicer/config.py
+++ b/pydicer/config.py
@@ -15,7 +15,13 @@
         "available in the .pydicer directory.",
         "type": int,
         "default": 0,
-        "choices": [logging.NOTSET, logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR],
+        "choices": [
+            logging.NOTSET,
+            logging.DEBUG,
+            logging.INFO,
+            logging.WARNING,
+            logging.ERROR,
+        ],
     },
     "for_fallback_linkage": {
         "module": "general",
@@ -80,7 +86,6 @@
 class PyDicerConfig:
     class __PyDicerConfig:  # pylint: disable=invalid-name
         def __init__(self, working_dir=None):
-
             if working_dir is None:
                 raise ValueError("working_dir must be set on config init")
             self.working_dir = Path(working_dir)
@@ -128,7 +133,7 @@ def get_working_dir(self):
         """
         return self.instance.working_dir
 
-    def get_config(self, name):
+    def get_config(self, name: str) -> object:
         """Get the value of the config item with the specified name
 
         Args:
@@ -146,7 +151,7 @@ def get_config(self, name):
 
         return self.instance.pydicer_config[name]
 
-    def set_config(self, name, value):
+    def set_config(self, name: str, value: object):
         """Set the value for the config with the given name
 
         Args:
@@ -163,7 +168,8 @@ def set_config(self, name, value):
 
         if not isinstance(value, PYDICER_CONFIG[name]["type"]) and not value is None:
             raise ValueError(
-                f"Config {name} must be of type " f"{type(self.instance.pydicer_config[name])}"
+                f"Config {name} must be of type "
+                f"{type(self.instance.pydicer_config[name])}"
             )
 
         self.instance.pydicer_config[name] = value
diff --git a/pydicer/convert/data.py b/pydicer/convert/data.py
index 4fb507d..85d130c 100644
--- a/pydicer/convert/data.py
+++ b/pydicer/convert/data.py
@@ -3,6 +3,8 @@
 import copy
 import shutil
 from pathlib import Path
+from typing import Union
+
 import pandas as pd
 import numpy as np
 import SimpleITK as sitk
@@ -51,7 +53,7 @@
 ]
 
 
-def get_object_type(sop_class_uid):
+def get_object_type(sop_class_uid: str) -> str:
     """Get the type of the object (used for the output path)
 
     Args:
@@ -69,7 +71,9 @@ def get_object_type(sop_class_uid):
     return object_type
 
 
-def handle_missing_slice(files, ignore_duplicates=False):
+def handle_missing_slice(
+    files: Union[pd.DataFrame, list], ignore_duplicates: bool = False
+) -> list:
     """function to interpolate missing slices in an image
 
     Example usage:
@@ -98,6 +102,8 @@ def handle_missing_slice(files, ignore_duplicates=False):
     Args:
         df_files (pd.DataFrame|list): the DataFrame which was produced by PreprocessData
         or list of filepaths to dicom slices
+        ignore_duplicates (booleanbool, optional): specifices whether the function is to ignore
+        duplicate slices when handling missing ones
 
     Returns:
         file_paths(list): a list of the interpolated file paths
@@ -231,7 +237,7 @@ def handle_missing_slice(files, ignore_duplicates=False):
     return df_files.file_path.tolist()
 
 
-def link_via_frame_of_reference(for_uid, df_preprocess):
+def link_via_frame_of_reference(for_uid: str, df_preprocess: pd.DataFrame) -> pd.DataFrame:
     """Find the image series linked to this FOR
 
     Args:
@@ -271,7 +277,7 @@ def __init__(self, working_directory="."):
         self.pydicer_directory = working_directory.joinpath(PYDICER_DIR_NAME)
         self.output_directory = working_directory.joinpath(CONVERTED_DIR_NAME)
 
-    def add_entry(self, entry):
+    def add_entry(self, entry: dict):
         """Add an entry of a converted data object to the patient's converted dataframe.
 
         Args:
@@ -308,7 +314,7 @@ def add_entry(self, entry):
         df_pat_data = df_pat_data.reset_index(drop=True)
         df_pat_data.to_csv(converted_df_path)
 
-    def convert(self, patient=None, force=True):
+    def convert(self, patient: Union[str, list]=None, force: bool=True):
         """Converts the DICOM which was preprocessed into the pydicer output directory.
 
         Args:
diff --git a/pydicer/convert/headers.py b/pydicer/convert/headers.py
index 499643b..bf3144b 100644
--- a/pydicer/convert/headers.py
+++ b/pydicer/convert/headers.py
@@ -1,11 +1,16 @@
 import logging
 import json
+from typing import Union
+from pathlib import Path
+
 import pydicom
 
 logger = logging.getLogger(__name__)
 
 
-def convert_dicom_headers(dcm_file, binary_path, json_file):
+def convert_dicom_headers(
+    dcm_file: Union[str, Path], binary_path: str, json_file: Union[str, Path]
+):
     """Save the DICOM Headers as a JSON file
 
     Args:
diff --git a/pydicer/dataset/functions.py b/pydicer/dataset/functions.py
index 7abc957..ec19cb8 100644
--- a/pydicer/dataset/functions.py
+++ b/pydicer/dataset/functions.py
@@ -7,7 +7,7 @@
 logger = logging.getLogger(__name__)
 
 
-def rt_latest_struct(df, **kwargs):
+def rt_latest_struct(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
     """Select the latest Structure set and the image which it is linked to. You can specify keyword
     arguments to for a match on any top level DICOM attributes. You may also supply lists of values
     to these, one of which should match to select that series.
@@ -91,18 +91,24 @@ def rt_latest_struct(df, **kwargs):
         keep_rows.append(struct_row.name)  # Track index of row to keep
 
         # Find the linked image
-        df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
+        df_linked_img = df[
+            df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
+        ]
 
         if len(df_linked_img) == 0:
-            logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
+            logger.warning(
+                "No linked images found for structure: %s", struct_row.hashed_uid
+            )
             continue
 
-        keep_rows.append(df_linked_img.iloc[0].name)  # Keep the index of the row of the image too
+        keep_rows.append(
+            df_linked_img.iloc[0].name
+        )  # Keep the index of the row of the image too
 
     return df.loc[keep_rows]
 
 
-def rt_latest_dose(df, **kwargs):
+def rt_latest_dose(df: pd.DataFrame, **kwargs) -> pd.DataFrame:
     """Select the latest RTDOSE and the image, structure and plan which it is linked to. You can
     specify keyword arguments to for a match on any top level DICOM attributes. You may also supply
     lists of values to these, one of which should match to select that series.
@@ -191,16 +197,22 @@ def rt_latest_dose(df, **kwargs):
         keep_rows.append(dose_row.name)  # Track index of row of dose to keep
 
         # Find the linked plan
-        df_linked_plan = df[df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid]
+        df_linked_plan = df[
+            df["sop_instance_uid"] == dose_row.referenced_sop_instance_uid
+        ]
 
         if len(df_linked_plan) == 0:
-            logger.warning("No linked plans found for dose: %s", dose_row.sop_instance_uid)
+            logger.warning(
+                "No linked plans found for dose: %s", dose_row.sop_instance_uid
+            )
             continue
 
         # Find the linked structure set
         plan_row = df_linked_plan.iloc[0]
         keep_rows.append(plan_row.name)  # Keep the index of the row of the plan
-        df_linked_struct = df[df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid]
+        df_linked_struct = df[
+            df["sop_instance_uid"] == plan_row.referenced_sop_instance_uid
+        ]
 
         if len(df_linked_struct) == 0:
             # Try to link via Frame of Reference instead
@@ -209,18 +221,26 @@ def rt_latest_dose(df, **kwargs):
             ]
 
         if len(df_linked_struct) == 0:
-            logger.warning("No structures found for plan: %s", plan_row.sop_instance_uid)
+            logger.warning(
+                "No structures found for plan: %s", plan_row.sop_instance_uid
+            )
             continue
 
         # Find the linked image
         struct_row = df_linked_struct.iloc[0]
         keep_rows.append(struct_row.name)  # Keep the index of the row of the structure
-        df_linked_img = df[df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid]
+        df_linked_img = df[
+            df["sop_instance_uid"] == struct_row.referenced_sop_instance_uid
+        ]
 
         if len(df_linked_img) == 0:
-            logger.warning("No linked images found for structure: %s", struct_row.hashed_uid)
+            logger.warning(
+                "No linked images found for structure: %s", struct_row.hashed_uid
+            )
             continue
 
-        keep_rows.append(df_linked_img.iloc[0].name)  # Keep the index of the row of the image too
+        keep_rows.append(
+            df_linked_img.iloc[0].name
+        )  # Keep the index of the row of the image too
 
     return df.loc[keep_rows]
diff --git a/pydicer/dataset/nnunet.py b/pydicer/dataset/nnunet.py
index 2befca5..67606e5 100644
--- a/pydicer/dataset/nnunet.py
+++ b/pydicer/dataset/nnunet.py
@@ -45,7 +45,7 @@ def __init__(
         nnunet_description: str = "",
         dataset_name: str = CONVERTED_DIR_NAME,
         image_modality: str = "CT",
-        mapping_id=DEFAULT_MAPPING_ID,
+        mapping_id: str = DEFAULT_MAPPING_ID,
     ):
         """Prepare a dataset to train models using nnUNet.
 
@@ -219,12 +219,16 @@ def check_duplicates_train_test(self):
         """
 
         if len(self.training_cases) == 0:
-            raise SystemError("training_cases are empty, run split_dataset function first.")
+            raise SystemError(
+                "training_cases are empty, run split_dataset function first."
+            )
 
         img_stats = []
 
         df = read_converted_data(self.working_directory, dataset_name=self.dataset_name)
-        df_images = df[(df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")]
+        df_images = df[
+            (df.modality == "CT") | (df.modality == "MR") | (df.modality == "PT")
+        ]
 
         for case in self.training_cases + self.testing_cases:
             df_pat = df_images[df_images.patient_id == case]
@@ -252,7 +256,9 @@ def check_duplicates_train_test(self):
 
         # Check to see if we have any duplicate image spacing and sizes, if so inspect these
         # further
-        duplicated_rows = df_img_stats.duplicated(subset=["spacing", "size"], keep=False)
+        duplicated_rows = df_img_stats.duplicated(
+            subset=["spacing", "size"], keep=False
+        )
         df_img_stats["voxel_sum"] = df_img_stats.apply(
             lambda row: sitk.GetArrayFromImage(sitk.ReadImage(row.img_path)).sum()
             if row.name in duplicated_rows.index
@@ -342,7 +348,9 @@ def check_structure_names(self) -> pd.DataFrame:
                 print(f"Structure {s} is missing for patients: {missing_pats}")
 
                 incomplete_structures.append(s)
-                incomplete_patients += [p for p in missing_pats if not p in incomplete_patients]
+                incomplete_patients += [
+                    p for p in missing_pats if not p in incomplete_patients
+                ]
 
         if incomplete_structures:
             print(
@@ -383,7 +391,8 @@ def check_overlapping_structures(self):
                     structure_name_j = structure_names[sj]
 
                     structure_sum = (
-                        structure_set[structure_name_i] + structure_set[structure_name_j]
+                        structure_set[structure_name_i]
+                        + structure_set[structure_name_j]
                     )
                     arr = sitk.GetArrayFromImage(structure_sum)
                     if arr.max() > 1:
@@ -444,7 +453,9 @@ def prepare_dataset(self) -> Path:
         """
 
         if len(self.training_cases) == 0:
-            raise SystemError("training_cases are empty, run split_dataset function first.")
+            raise SystemError(
+                "training_cases are empty, run split_dataset function first."
+            )
 
         # First check that all cases (in training set) have the structures which are to be learnt
         df_structures = self.check_structure_names()
@@ -571,7 +582,9 @@ def generate_training_scripts(
             raise FileNotFoundError(
                 "Ensure that the folder in which to generate the script exists."
             )
-        script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
+        script_path = script_directory.joinpath(
+            f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
+        )
 
         if isinstance(folds, str):
             folds = [folds]
@@ -637,7 +650,9 @@ def train(self, script_directory: Union[str, Path] = ".", in_screen: bool = True
         """
         # Make sure the script folder exists
         script_directory = Path(script_directory)
-        script_path = script_directory.joinpath(f"train_{self.nnunet_id}_{self.nnunet_name}.sh")
+        script_path = script_directory.joinpath(
+            f"train_{self.nnunet_id}_{self.nnunet_name}.sh"
+        )
 
         if not script_path.exists():
             raise FileNotFoundError(
diff --git a/pydicer/dataset/preparation.py b/pydicer/dataset/preparation.py
index 23d15bc..2d73a50 100644
--- a/pydicer/dataset/preparation.py
+++ b/pydicer/dataset/preparation.py
@@ -1,7 +1,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import Callable
+from typing import Callable, Union
 
 import pandas as pd
 
@@ -22,7 +22,7 @@ class PrepareDataset:
             Defaults to ".".
     """
 
-    def __init__(self, working_directory="."):
+    def __init__(self, working_directory: Union[str, Path] = "."):
         self.working_directory = Path(working_directory)
 
     def add_object_to_dataset(self, dataset_name: str, data_object_row: pd.Series):
diff --git a/pydicer/dataset/structureset.py b/pydicer/dataset/structureset.py
index 35038a3..f65fac5 100644
--- a/pydicer/dataset/structureset.py
+++ b/pydicer/dataset/structureset.py
@@ -10,7 +10,9 @@
 logger = logging.getLogger(__name__)
 
 
-def get_mapping_for_structure_set(structure_set_row: pd.Series, mapping_id: str):
+def get_mapping_for_structure_set(
+    structure_set_row: pd.Series, mapping_id: str
+) -> dict:
     """Searches the folder hierarchy to find a structure name mapping file with the given ID.
 
     Args:
@@ -52,7 +54,8 @@ def __init__(self, structure_set_row, mapping_id=DEFAULT_MAPPING_ID):
         self.structure_set_id = structure_set_row.hashed_uid
 
         self.structure_names = [
-            s.name.replace(".nii.gz", "") for s in self.structure_set_path.glob("*.nii.gz")
+            s.name.replace(".nii.gz", "")
+            for s in self.structure_set_path.glob("*.nii.gz")
         ]
         self.unmapped_structure_names = self.structure_names
 
@@ -61,7 +64,9 @@ def __init__(self, structure_set_row, mapping_id=DEFAULT_MAPPING_ID):
         # Check if we can find a mapping for this structure set, if not we'll just used the
         # unmapped structure names
         if mapping_id is not None:
-            self.structure_mapping = get_mapping_for_structure_set(structure_set_row, mapping_id)
+            self.structure_mapping = get_mapping_for_structure_set(
+                structure_set_row, mapping_id
+            )
 
             if self.structure_mapping is None:
                 logger.warning("No mapping file found with id %s", mapping_id)
@@ -71,7 +76,7 @@ def __init__(self, structure_set_row, mapping_id=DEFAULT_MAPPING_ID):
 
         self.cache = {}
 
-    def get_mapped_structure_name(self, item):
+    def get_mapped_structure_name(self, item: str) -> str:
         """Get the structure set specific name for a structure that may have been mapped.
 
         Args:
@@ -86,7 +91,9 @@ def get_mapped_structure_name(self, item):
         if self.structure_mapping is not None:
             if item in self.structure_mapping:
                 for variation in self.structure_mapping[item]:
-                    variation_path = self.structure_set_path.joinpath(f"{variation}.nii.gz")
+                    variation_path = self.structure_set_path.joinpath(
+                        f"{variation}.nii.gz"
+                    )
                     if variation_path.exists():
                         # Found variation, let's use that file...
                         # TODO an issue would occur if there were multiple files that would match
@@ -96,7 +103,7 @@ def get_mapped_structure_name(self, item):
 
         return structure_name
 
-    def get_standardised_structure_name(self, item):
+    def get_standardised_structure_name(self, item: str) -> str:
         """Get the standardised name for a structure that is present in this structure set.
 
         Args:
@@ -150,7 +157,7 @@ def values(self):
     def items(self):
         return [(s, self[s]) for s in self.structure_names]
 
-    def get_unmapped_structures(self):
+    def get_unmapped_structures(self) -> list:
         """Get a list of structures for which no structure was found based on the mapping. If no
         mapping is being used this will always be empty.
 
@@ -160,7 +167,9 @@ def get_unmapped_structures(self):
         missing_mappings = []
         for k in self.keys():
             structure_name = self.get_mapped_structure_name(k)
-            structure_path = self.structure_set_path.joinpath(f"{structure_name}.nii.gz")
+            structure_path = self.structure_set_path.joinpath(
+                f"{structure_name}.nii.gz"
+            )
             if not structure_path.exists():
                 missing_mappings.append(k)
 
diff --git a/pydicer/generate/object.py b/pydicer/generate/object.py
index a22f3c0..58cd9fc 100644
--- a/pydicer/generate/object.py
+++ b/pydicer/generate/object.py
@@ -1,4 +1,6 @@
 import logging
+from typing import Union
+from pathlib import Path
 
 import pandas as pd
 import SimpleITK as sitk
@@ -10,14 +12,14 @@
 
 
 def add_object(
-    working_directory,
-    object_id,
-    patient_id,
-    object_type,
-    modality,
-    for_uid=None,
-    referenced_sop_instance_uid=None,
-    datasets=None,
+    working_directory: Path,
+    object_id: str,
+    patient_id: str,
+    object_type: str,
+    modality: str,
+    for_uid: str = None,
+    referenced_sop_instance_uid: str = None,
+    datasets: Union[str, list] = None,
 ):
     """Add a generated object to the project.
 
@@ -155,7 +157,9 @@ def add_object(
         df_converted.to_csv(patient_directory.joinpath("converted.csv"))
 
 
-def get_linked_for_and_ref_uid(working_directory, patient_id, linked_obj):
+def get_linked_for_and_ref_uid(
+    working_directory: Path, patient_id: str, linked_obj: Union[str, pd.Series] = None
+) -> tuple:
     """Determine the linked frame of reference UID and SOP instance UID
 
     Args:
@@ -194,14 +198,14 @@ def get_linked_for_and_ref_uid(working_directory, patient_id, linked_obj):
 
 
 def add_image_object(
-    working_directory,
-    image,
-    image_id,
-    modality,
-    patient_id,
-    linked_image=None,
-    for_uid=None,
-    datasets=None,
+    working_directory: Path,
+    image: sitk.Image,
+    image_id: str,
+    modality: str,
+    patient_id: str,
+    linked_image: pd.Series = None,
+    for_uid: str = None,
+    datasets: Union[list, str] = None,
 ):
     """Add a generated image object to the project.
 
@@ -259,13 +263,13 @@ def add_image_object(
 
 
 def add_structure_object(
-    working_directory,
-    structures,
-    structure_id,
-    patient_id,
-    linked_image=None,
-    for_uid=None,
-    datasets=None,
+    working_directory: Path,
+    structures: dict,
+    structure_id: str,
+    patient_id: str,
+    linked_image: Union[str, pd.Series] = None,
+    for_uid: str = None,
+    datasets: Union[list, str] = None,
 ):
     """Add a generated structure object to the project.
 
@@ -325,13 +329,13 @@ def add_structure_object(
 
 
 def add_dose_object(
-    working_directory,
-    dose,
-    dose_id,
-    patient_id,
-    linked_plan=None,
-    for_uid=None,
-    datasets=None,
+    working_directory: Path,
+    dose: sitk.Image,
+    dose_id: str,
+    patient_id: str,
+    linked_plan: Union[str, pd.Series] = None,
+    for_uid: str = None,
+    datasets: Union[list, str] = None,
 ):
     """Add a generated dose object to the project.
 
diff --git a/pydicer/generate/segmentation.py b/pydicer/generate/segmentation.py
index 49f8cb7..1618ee2 100644
--- a/pydicer/generate/segmentation.py
+++ b/pydicer/generate/segmentation.py
@@ -117,7 +117,7 @@ def read_all_segmentation_logs(
     dataset_name: str = CONVERTED_DIR_NAME,
     segment_id: str = None,
     modality: str = None,
-):
+) -> pd.DataFrame:
     """Read all auto-segmentation logs in a dataset.
     Args:
         dataset_name (str): The name of the dataset to read for.
diff --git a/pydicer/input/base.py b/pydicer/input/base.py
index 677b9a8..2735b88 100644
--- a/pydicer/input/base.py
+++ b/pydicer/input/base.py
@@ -1,5 +1,6 @@
 import tempfile
 import logging
+from typing import Union
 
 import abc
 from pathlib import Path
@@ -9,7 +10,7 @@
 
 
 class InputBase(abc.ABC):
-    def __init__(self, working_directory=None):
+    def __init__(self, working_directory: Union[str, Path] = None):
         """
         Base class for input modules.
 
diff --git a/pydicer/input/filesystem.py b/pydicer/input/filesystem.py
index 00bdc19..8cbef44 100644
--- a/pydicer/input/filesystem.py
+++ b/pydicer/input/filesystem.py
@@ -1,8 +1,11 @@
+from pathlib import Path
+from typing import Union
+
 from pydicer.input.base import InputBase
 
 
 class FileSystemInput(InputBase):
-    def __init__(self, directory):
+    def __init__(self, directory: Union[str, Path]):
         """
         Class for inputing files from the file system
 
diff --git a/pydicer/input/orthanc.py b/pydicer/input/orthanc.py
index 0ac1926..f0e8646 100644
--- a/pydicer/input/orthanc.py
+++ b/pydicer/input/orthanc.py
@@ -1,5 +1,7 @@
-import logging
 from io import BytesIO
+import logging
+from typing import Union
+from Pathlib import Path
 
 import pydicom
 from pyorthanc.deprecated.client import Orthanc
@@ -10,7 +12,7 @@
 logger = logging.getLogger(__name__)
 
 
-def adapt_dataset_from_bytes(blob):
+def adapt_dataset_from_bytes(blob: bytes) -> pydicom.Dataset:
     """Convert bytes coming from Orthanc to DICOM dataset
 
     Args:
@@ -24,7 +26,14 @@ def adapt_dataset_from_bytes(blob):
 
 
 class OrthancInput(InputBase):
-    def __init__(self, host, port, username=None, password=None, working_directory=None):
+    def __init__(
+        self,
+        host: str,
+        port: int,
+        username: str = None,
+        password: str = None,
+        working_directory: Union[str, Path] = None,
+    ):
         """Class for fetching files from Orthanc.
 
         Args:
@@ -54,7 +63,9 @@ def __init__(self, host, port, username=None, password=None, working_directory=N
         # connection error if we can't connect to the Orthanc
         self.orthanc.c_find({"Level": "Patient", "Query": {"PatientID": "XXX"}})
 
-    def fetch_data(self, patients, modalities=None):
+    def fetch_data(
+        self, patients: Union[list, str], modalities: Union[list, str] = None
+    ):
         """Download the DICOM data from Orthanc
 
         Args:
@@ -74,7 +85,6 @@ def fetch_data(self, patients, modalities=None):
             modalities = [modalities]
 
         for patient in get_iterator(patients, unit="patients", name="Orthanc Fetch"):
-
             # Find the Orthanc ID for this patient
             orthanc_patient_ids = self.orthanc.c_find(
                 {"Level": "Patient", "Query": {"PatientID": patient}}
@@ -86,34 +96,44 @@ def fetch_data(self, patients, modalities=None):
 
             if len(orthanc_patient_ids) > 1:
                 logger.warning(
-                    "Patient returned multple Orthanc IDs: %s. Selecting first only", patient
+                    "Patient returned multple Orthanc IDs: %s. Selecting first only",
+                    patient,
                 )
 
             orthanc_patient_id = orthanc_patient_ids[0]
 
-            patient_information = self.orthanc.get_patient_information(orthanc_patient_id)
+            patient_information = self.orthanc.get_patient_information(
+                orthanc_patient_id
+            )
             patient_id = patient_information["MainDicomTags"]["PatientID"]
 
             # Loop over each study for this patient
             study_identifiers = patient_information["Studies"]
             for study_identifier in study_identifiers:
-
                 # Loop over each series in this study
                 study_information = self.orthanc.get_study_information(study_identifier)
                 series_identifiers = study_information["Series"]
                 for series_identifier in series_identifiers:
-                    series_information = self.orthanc.get_series_information(series_identifier)
+                    series_information = self.orthanc.get_series_information(
+                        series_identifier
+                    )
 
                     # Skip if this isn't one of the modalities we want
                     modality = series_information["MainDicomTags"]["Modality"]
                     if modalities is not None and not modality in modalities:
                         continue
 
-                    series_information = self.orthanc.get_series_information(series_identifier)
-                    series_instance_uid = series_information["MainDicomTags"]["SeriesInstanceUID"]
+                    series_information = self.orthanc.get_series_information(
+                        series_identifier
+                    )
+                    series_instance_uid = series_information["MainDicomTags"][
+                        "SeriesInstanceUID"
+                    ]
 
                     # Create the output directory for this series
-                    series_path = self.working_directory.joinpath(patient_id, series_instance_uid)
+                    series_path = self.working_directory.joinpath(
+                        patient_id, series_instance_uid
+                    )
                     series_path.mkdir(exist_ok=True, parents=True)
 
                     # Loop over each instance in this series
@@ -127,7 +147,9 @@ def fetch_data(self, patients, modalities=None):
                         f = self.orthanc.get_instance_file(instance_identifier)
                         ds = adapt_dataset_from_bytes(f)
 
-                        sop_instance_uid = instance_information["MainDicomTags"]["SOPInstanceUID"]
+                        sop_instance_uid = instance_information["MainDicomTags"][
+                            "SOPInstanceUID"
+                        ]
                         ds_file_name = f"{modality}.{sop_instance_uid}.dcm"
                         ds_path = series_path.joinpath(ds_file_name)
 
diff --git a/pydicer/input/pacs.py b/pydicer/input/pacs.py
index a36d191..58402f5 100644
--- a/pydicer/input/pacs.py
+++ b/pydicer/input/pacs.py
@@ -1,4 +1,7 @@
 import os
+from pathlib import Path
+from typing import Union
+
 import pydicom
 
 from platipy.dicom.communication.connector import DicomConnector
@@ -7,7 +10,13 @@
 
 
 class DICOMPACSInput(InputBase):
-    def __init__(self, host, port, ae_title=None, working_directory=None):
+    def __init__(
+        self,
+        host: str,
+        port: str,
+        ae_title: str = None,
+        working_directory: Union[str, Path] = None,
+    ):
         """Class for fetching files from DICOM PACS. Currently only supports C-GET commands to
         fetch the data.
 
@@ -26,13 +35,18 @@ def __init__(self, host, port, ae_title=None, working_directory=None):
         super().__init__(working_directory)
 
         self.dicom_connector = DicomConnector(
-            host=host, port=port, ae_title=ae_title, output_directory=self.working_directory
+            host=host,
+            port=port,
+            ae_title=ae_title,
+            output_directory=self.working_directory,
         )
 
         if not self.dicom_connector.verify():
             raise ConnectionError("Unable to connect to DICOM PACS.")
 
-    def fetch_data(self, patients, modalities=None):
+    def fetch_data(
+        self, patients: Union[list, str], modalities: Union[list, str] = None
+    ):
         """Download the DICOM data from the PACS.
 
         Args:
@@ -52,7 +66,6 @@ def fetch_data(self, patients, modalities=None):
             modalities = [modalities]
 
         for patient in patients:
-
             dataset = pydicom.Dataset()
             dataset.PatientID = patient
             dataset.PatientName = ""
diff --git a/pydicer/input/tcia.py b/pydicer/input/tcia.py
index ecafe0c..3c7bc8b 100644
--- a/pydicer/input/tcia.py
+++ b/pydicer/input/tcia.py
@@ -1,10 +1,18 @@
+from typing import Union
+
 from platipy.dicom.download import tcia
 
 from pydicer.input.base import InputBase
 
 
 class TCIAInput(InputBase):
-    def __init__(self, collection, patient_ids, modalities=None, working_directory=None):
+    def __init__(
+        self,
+        collection: str,
+        patient_ids: list,
+        modalities: list = None,
+        working_directory: Union[str, list] = None,
+    ):
         """
         Input class that interfaces with the TCIA API
 
@@ -14,7 +22,7 @@ def __init__(self, collection, patient_ids, modalities=None, working_directory=N
                 fetched
             modalities (list, optional): A list of strings defining the modalites to fetch. Will
                                         fetch all modalities available if not specified.
-            working_directory (str): (str|pathlib.Path, optional): The working directory in which
+            working_directory (str|pathlib.Path, optional): The working directory in which
                 to store the data fetched. Defaults to a temp directory.
         """
         super().__init__(working_directory)
diff --git a/pydicer/input/test.py b/pydicer/input/test.py
index aa1fb71..50be0cd 100644
--- a/pydicer/input/test.py
+++ b/pydicer/input/test.py
@@ -1,10 +1,12 @@
+from typing import Union
+
 from pydicer.input.web import WebInput
 
 
 class TestInput(WebInput):
     __test__ = False  # pytest will try to use this as a test class without this
 
-    def __init__(self, working_directory=None):
+    def __init__(self, working_directory: Union[str, list] = None):
         """
         A test input class to download example data from zenodo
 
diff --git a/pydicer/input/web.py b/pydicer/input/web.py
index d10c74c..08449fa 100644
--- a/pydicer/input/web.py
+++ b/pydicer/input/web.py
@@ -1,4 +1,6 @@
 import logging
+from pathlib import Path
+from typing import Union
 
 from pydicer.input.base import InputBase
 from pydicer.utils import download_and_extract_zip_file
@@ -7,7 +9,7 @@
 
 
 class WebInput(InputBase):
-    def __init__(self, data_url, working_directory=None):
+    def __init__(self, data_url: str, working_directory: Union[str, Path] = None):
         """
         Class for downloading and saving input data off the internet
 
diff --git a/pydicer/logger.py b/pydicer/logger.py
index 2220e5c..d0a5580 100644
--- a/pydicer/logger.py
+++ b/pydicer/logger.py
@@ -23,12 +23,13 @@ def __init__(self, pat_id, data_directory, force=True):
             df_pat_log = pd.DataFrame(columns=SUMMARY_CSV_COLS)
             df_pat_log.to_csv(self.summary_csv_path, index=False)
 
-    def log_module_error(self, module, hashed_uid, error_log):
+    def log_module_error(self, module: str, hashed_uid: str, error_log: str):
         """Function to log errors for a specific pydicer module
 
         Args:
             module (str): pydicer module to log error for in CSV
-            error (str): error to log in CSV
+            hashed_uid (str): hashed UID of the patient being logged to the error CSV
+            error_log (str): error to log in CSV
         """
         end_time = dt.now()
         df_error = pd.DataFrame(
@@ -37,11 +38,12 @@ def log_module_error(self, module, hashed_uid, error_log):
         )
         df_error.to_csv(self.summary_csv_path, header=False, mode="a", index=False)
 
-    def eval_module_process(self, module, hashed_uid):
+    def eval_module_process(self, module: str, hashed_uid: str):
         """Function to log if any patient had issues for a specific pydicer module
 
         Args:
             module (str): pydicer module to check if no errors were generated for all patients
+            hashed_uid (str): hashed UID of the patient being logged to the error CSV
         """
 
         end_time = dt.now()
@@ -53,7 +55,8 @@ def eval_module_process(self, module, hashed_uid):
         ]
         if len(df_summary_mod) == 0:
             df_final_summary = pd.DataFrame(
-                [[module, hashed_uid, 0, "", self.start_time, end_time]], columns=SUMMARY_CSV_COLS
+                [[module, hashed_uid, 0, "", self.start_time, end_time]],
+                columns=SUMMARY_CSV_COLS,
             )
             df_final_summary.to_csv(
                 self.summary_csv_path,
diff --git a/pydicer/preprocess/data.py b/pydicer/preprocess/data.py
index c7a18fe..5a75e27 100644
--- a/pydicer/preprocess/data.py
+++ b/pydicer/preprocess/data.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Union
 from pathlib import Path
 
 import pandas as pd
@@ -36,7 +37,7 @@ def __init__(self, working_directory):
         self.pydicer_directory = working_directory.joinpath(PYDICER_DIR_NAME)
         self.pydicer_directory.mkdir(exist_ok=True)
 
-    def scan_file(self, file):
+    def scan_file(self, file: Union[str, Path]) -> dict:
         """Scan a DICOM file.
 
         Args:
@@ -147,7 +148,9 @@ def scan_file(self, file):
 
         return None
 
-    def preprocess(self, input_directory, force=True):
+    def preprocess(
+        self, input_directory: Union(Path, list), force: bool = True
+    ) -> pd.DataFrame:
         """
         Function to preprocess information regarding the data located in an Input working directory
 
diff --git a/pydicer/quarantine.py b/pydicer/quarantine.py
index 1123a84..37ae8ec 100644
--- a/pydicer/quarantine.py
+++ b/pydicer/quarantine.py
@@ -9,11 +9,11 @@
 QUARATINE_DICOM_KEYS = ["PatientID", "Modality", "SOPInstanceUID", "SeriesDescription"]
 
 
-def copy_file_to_quarantine(file, working_directory, error_msg):
+def copy_file_to_quarantine(file: Path, working_directory: Path, error_msg: str):
     """Move a DICOM file that couldn't be processed into the quarantine directory
 
     Args:
-        file (Path): DICOM path to be moved into quarantine
+        file (pathlib.Path): DICOM path to be moved into quarantine
         working_directory (pathlib.Path): Main working directory for pydicer
         error_msg (str): error message associated with the quarantined file
     """
@@ -61,7 +61,7 @@ def copy_file_to_quarantine(file, working_directory, error_msg):
     df_summary.to_csv(summary_file)
 
 
-def read_quarantined_data(working_directory: Path):
+def read_quarantined_data(working_directory: Path) -> pd.DataFrame:
     """A function to read the data from the quarantine summary.
 
     Args:
diff --git a/pydicer/tool.py b/pydicer/tool.py
index e2a0e37..ac7b0bc 100644
--- a/pydicer/tool.py
+++ b/pydicer/tool.py
@@ -2,6 +2,7 @@
 import logging
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
+from typing import Union
 
 import pandas as pd
 
@@ -84,7 +85,7 @@ def __init__(self, working_directory="."):
         self.dataset = PrepareDataset(self.working_directory)
         self.analyse = AnalyseData(self.working_directory)
 
-    def set_verbosity(self, verbosity):
+    def set_verbosity(self, verbosity: int):
         """Set's the verbosity of the tool to the std out (console). When 0 (not set) the tool will
         display a progress bar. Other values indicate Python's build in logging levels:
         - DEBUG: 10
@@ -137,7 +138,7 @@ def update_logging(self):
             console_handler.setLevel(verbosity)
             logger.addHandler(console_handler)
 
-    def add_input(self, input_obj):
+    def add_input(self, input_obj: Union[str, Path, InputBase]):
         """Add an input location containing DICOM data. Must a str, pathlib.Path or InputBase
         object, such as:
         - FileSystemInput
@@ -163,7 +164,7 @@ def add_input(self, input_obj):
                 "input_obj must be of type str, pathlib.Path or inherit InputBase"
             )
 
-    def preprocess(self, force=True):
+    def preprocess(self, force: bool = True):
         """Preprocess the DICOM data in preparation for conversion
 
         Args:
@@ -181,7 +182,7 @@ def preprocess(self, force=True):
 
         self.preprocessed_data = read_preprocessed_data(self.working_directory)
 
-    def run_pipeline(self, patient=None, force=True):
+    def run_pipeline(self, patient: Union[str, list] = None, force: bool = True):
         """Runs the entire conversion pipeline, including computation of DVHs and first-order
         radiomics.
 
diff --git a/pydicer/utils.py b/pydicer/utils.py
index 23c3b0d..f3dce30 100644
--- a/pydicer/utils.py
+++ b/pydicer/utils.py
@@ -8,6 +8,7 @@
 import shutil
 from datetime import datetime
 from pathlib import Path
+from typing import Union
 
 import pandas as pd
 import SimpleITK as sitk
@@ -20,7 +21,7 @@
 logger = logging.getLogger(__name__)
 
 
-def hash_uid(uid, truncate=6):
+def hash_uid(uid: str, truncate: int = 6) -> str:
     """Hash a UID and truncate it
 
     Args:
@@ -36,7 +37,7 @@ def hash_uid(uid, truncate=6):
     return hash_sha.hexdigest()[:truncate]
 
 
-def determine_dcm_datetime(ds, require_time=False):
+def determine_dcm_datetime(ds: pydicom.Dataset, require_time: bool = False) -> datetime:
     """Get a date/time value from a DICOM dataset. Will attempt to pull from SeriesDate/SeriesTime
     field first. Will fallback to StudyDate/StudyTime or InstanceCreationDate/InstanceCreationTime
     if not available.
@@ -70,7 +71,11 @@ def determine_dcm_datetime(ds, require_time=False):
     return None
 
 
-def load_object_metadata(row: pd.Series, keep_tags=None, remove_tags=None):
+def load_object_metadata(
+    row: pd.Series,
+    keep_tags: Union[str, list] = None,
+    remove_tags: Union[str, list] = None,
+) -> pydicom.Dataset:
     """Loads the object's metadata
 
     Args:
@@ -147,7 +152,7 @@ def load_object_metadata(row: pd.Series, keep_tags=None, remove_tags=None):
     return pydicom.Dataset.from_json(ds_dict, bulk_data_uri_handler=lambda _: None)
 
 
-def load_dvh(row, struct_hash=None):
+def load_dvh(row: pd.Series, struct_hash: Union[list, str] = None) -> pd.DataFrame:
     """Loads an object's Dose Volume Histogram (DVH)
 
     Args:
@@ -201,7 +206,7 @@ def load_dvh(row, struct_hash=None):
     return df_result
 
 
-def read_preprocessed_data(working_directory: Path):
+def read_preprocessed_data(working_directory: Path) -> pd.DataFrame:
     """Reads the pydicer preprocessed data
 
     Args:
@@ -230,10 +235,10 @@ def read_preprocessed_data(working_directory: Path):
 
 def read_converted_data(
     working_directory: Path,
-    dataset_name=CONVERTED_DIR_NAME,
-    patients=None,
-    join_working_directory=True,
-):
+    dataset_name: str = CONVERTED_DIR_NAME,
+    patients: list = None,
+    join_working_directory: bool = True,
+) -> pd.DataFrame:
     """Read the converted data frame from the supplied data directory.
 
     Args:
@@ -287,7 +292,7 @@ def read_converted_data(
     return df.reset_index(drop=True)
 
 
-def parse_patient_kwarg(patient):
+def parse_patient_kwarg(patient: Union[list, str]) -> list:
     """Helper function to prepare patient list from kwarg used in functions throughout pydicer.
 
     Args:
@@ -317,7 +322,7 @@ def parse_patient_kwarg(patient):
     return patient
 
 
-def read_simple_itk_image(row):
+def read_simple_itk_image(row: pd.Series) -> sitk.Image:
     """Reads the SimpleITK Image object given a converted dataframe row.
 
     Args:
@@ -338,7 +343,9 @@ def read_simple_itk_image(row):
     return sitk.ReadImage(str(nifti_path))
 
 
-def get_iterator(iterable, length=None, unit="it", name=None):
+def get_iterator(
+    iterable, length: int = None, unit: str = "it", name: str = None
+):
     """Get the appropriate iterator based on the level of verbosity configured.
 
     Args:
@@ -369,7 +376,7 @@ def get_iterator(iterable, length=None, unit="it", name=None):
     return iterator
 
 
-def map_structure_name(struct_name, struct_map_dict):
+def map_structure_name(struct_name: str, struct_map_dict: dict) -> str:
     """Function to map a structure's name according to a mapping dictionary
 
     Args:
@@ -534,7 +541,7 @@ def add_structure_name_mapping(
         json.dump(mapping_dict, structures_map_file, ensure_ascii=False, indent=4)
 
 
-def download_and_extract_zip_file(zip_url, output_directory):
+def download_and_extract_zip_file(zip_url: str, output_directory: Union[str, Path]):
     """Downloads a zip file from the URL specified and extracts the contents to the output
     directory.
 
@@ -555,7 +562,9 @@ def download_and_extract_zip_file(zip_url, output_directory):
             zip_ref.extractall(output_directory)
 
 
-def fetch_converted_test_data(working_directory=None, dataset="HNSCC"):
+def fetch_converted_test_data(
+    working_directory: Union[str, Path] = None, dataset: str = "HNSCC"
+) -> Path:
     """Fetch some public data which has already been converted using PyDicer.
     Useful for unit testing as well as examples.
 
diff --git a/pydicer/visualise/data.py b/pydicer/visualise/data.py
index 28f1ee3..664d6c9 100644
--- a/pydicer/visualise/data.py
+++ b/pydicer/visualise/data.py
@@ -1,4 +1,5 @@
 import logging
+from typing import Union
 from pathlib import Path
 import SimpleITK as sitk
 import matplotlib
@@ -31,7 +32,12 @@ def __init__(self, working_directory="."):
         self.working_directory = Path(working_directory)
         self.output_directory = self.working_directory.joinpath(CONVERTED_DIR_NAME)
 
-    def visualise(self, dataset_name=CONVERTED_DIR_NAME, patient=None, force=True):
+    def visualise(
+        self,
+        dataset_name: str = CONVERTED_DIR_NAME,
+        patient: Union[list, str] = None,
+        force: bool = True,
+    ):
         """Visualise the data in the working directory. PNG files are generates providing a
         snapshot of the various data objects.
 

From 2ad01d6fac0a40551e3a44a22d019f69e3ca6ce1 Mon Sep 17 00:00:00 2001
From: Daniel Al Mouiee <d.almouiee@gmail.com>
Date: Fri, 2 Feb 2024 16:22:40 +1100
Subject: [PATCH 2/3] Fixed `pathlib` import

---
 pydicer/input/orthanc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pydicer/input/orthanc.py b/pydicer/input/orthanc.py
index f0e8646..096a715 100644
--- a/pydicer/input/orthanc.py
+++ b/pydicer/input/orthanc.py
@@ -1,7 +1,7 @@
 from io import BytesIO
 import logging
 from typing import Union
-from Pathlib import Path
+from pathlib import Path
 
 import pydicom
 from pyorthanc.deprecated.client import Orthanc

From 27209d538765bf4a887341cdc27382da29d1e73d Mon Sep 17 00:00:00 2001
From: Daniel Al Mouiee <d.almouiee@gmail.com>
Date: Fri, 2 Feb 2024 16:31:45 +1100
Subject: [PATCH 3/3] Fixed `Union` usage

---
 pydicer/preprocess/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pydicer/preprocess/data.py b/pydicer/preprocess/data.py
index 5a75e27..1a13748 100644
--- a/pydicer/preprocess/data.py
+++ b/pydicer/preprocess/data.py
@@ -149,7 +149,7 @@ def scan_file(self, file: Union[str, Path]) -> dict:
         return None
 
     def preprocess(
-        self, input_directory: Union(Path, list), force: bool = True
+        self, input_directory: Union[Path, list], force: bool = True
     ) -> pd.DataFrame:
         """
         Function to preprocess information regarding the data located in an Input working directory