AFM-SPM · ns-rse · Dec 17, 2024 · Jan 15, 2025
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -91,6 +91,7 @@ Aside from the comments in YAML file itself the fields are described below.
 | `cores`                                              |                                         | integer                                            | `2`                         | Number of cores to run parallel processes on.                                                                                                                                                                                                                                                                                                                                                                                             |
 | `file_ext`                                           |                                         | string                                             | `.spm`                      | File extensions to search for.                                                                                                                                                                                                                                                                                                                                                                                                            |
 | `loading`                                            | `channel`                               | string                                             | `Height`                    | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process.                                                                                                                                                                                                                                                                                                  |
+|                                                      | `extract`                               | string                                             | `raw`                       | The array to extract when loading from `.topostats` images.                                                                                                                                                                                                                                                                                                                                                                               |
 | `filter`                                             | `run`                                   | boolean                                            | `true`                      | Whether to run the filtering stage, without this other stages won't run so leave as `true`.                                                                                                                                                                                                                                                                                                                                               |
 |                                                      | `threshold_method`                      | str                                                | `std_dev`                   | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`.                                                                                                                                                                                                                                                                                                                                                              |
 |                                                      | `otsu_threshold_multiplier`             | float                                              | `1.0`                       | Factor by which the derived Otsu Threshold should be scaled.                                                                                                                                                                                                                                                                                                                                                                              |

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -349,6 +349,7 @@ def load_scan_dummy() -> LoadScans:
 @pytest.fixture()
 def load_scan_topostats_test_file(tmp_path: Path, loading_config: dict) -> LoadScans:
     """Instantiate a LoadScans object for a temporarily saved test .topostats file."""
+    loading_config["extract"] = "all"
     return LoadScans([tmp_path / "topostats_file_test.topostats"], **loading_config)
 
 
@@ -359,9 +360,9 @@ def load_scan(loading_config: dict) -> LoadScans:
 
 
 @pytest.fixture()
-def load_scan_data() -> LoadScans:
+def load_scan_data(loading_config: dict) -> LoadScans:
     """Instance of a LoadScans object after applying the get_data func."""
-    scan_data = LoadScans([RESOURCES / "test_image" / "minicircle_small.topostats"], channel="Height")
+    scan_data = LoadScans([RESOURCES / "test_image" / "minicircle_small.topostats"], **loading_config)
     scan_data.get_data()
     return scan_data
 

diff --git a/tests/resources/test_image/minicircle_small.topostats b/tests/resources/test_image/minicircle_small.topostats
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -58,6 +58,7 @@
 }
 
 # pylint: disable=protected-access
+# pylint: disable=too-many-arguments
 # pylint: disable=too-many-lines
 # pylint: disable=too-many-positional-arguments
 
@@ -1118,11 +1119,21 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:
 
 
 @pytest.mark.parametrize(
-    ("image", "pixel_to_nm_scaling", "grain_mask_above", "grain_mask_below", "grain_trace_data"),
+    (
+        "image",
+        "pixel_to_nm_scaling",
+        "filename",
+        "img_path",
+        "grain_mask_above",
+        "grain_mask_below",
+        "grain_trace_data",
+    ),
     [
         pytest.param(
             np.arange(0, 100).reshape(10, 10),
             3.14159265,
+            "below_grain_mask_with_grain_trace_data",
+            "./below_grain_mask_with_grain_trace_data.topostats",
             None,
             np.zeros((10, 10)),
             {
@@ -1224,6 +1235,8 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:
         pytest.param(
             np.arange(0, 100).reshape(10, 10),
             3.14159265,
+            "above_grain_mask_without_grain_trace_data",
+            "./above_grain_mask_without_grain_trace_data.topostats",
             np.zeros((10, 10)),
             None,
             None,
@@ -1232,6 +1245,8 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:
         pytest.param(
             np.arange(0, 100).reshape(10, 10),
             3.14159265,
+            "above_and_below_grain_masks_without_grain_trace_data",
+            "./above_and_below_grain_masks_without_grain_trace_data.topostats",
             np.zeros((10, 10)),
             np.zeros((10, 10)),
             None,
@@ -1244,14 +1259,19 @@ def test_save_and_load_topostats_file(
     tmp_path: Path,
     image: np.ndarray,
     pixel_to_nm_scaling: float,
+    filename: str,
+    img_path: str,
     grain_mask_above: np.ndarray,
     grain_mask_below: np.ndarray,
     grain_trace_data: dict,
 ) -> None:
     """Test saving a .topostats file."""
     topostats_object = {
-        "image_flattened": image,
+        "filename": filename,
+        "img_path": img_path,
         "pixel_to_nm_scaling": pixel_to_nm_scaling,
+        "image_original": image,
+        "image_flattened": image,
         "grain_masks": {"above": grain_mask_above, "below": grain_mask_below},
         "grain_trace_data": grain_trace_data,
     }
@@ -1273,8 +1293,7 @@ def test_save_and_load_topostats_file(
         "grain_masks",
         "grain_trace_data",
     }
-
-    np.testing.assert_array_equal(image, loadscans.img_dict["topostats_file_test"]["image_original"])
+    np.testing.assert_array_equal(image, loadscans.img_dict["image_original"])
     assert pixel_to_nm_scaling == loadscans.img_dict["pixel_to_nm_scaling"]
     if grain_mask_above is not None:
         np.testing.assert_array_equal(grain_mask_above, loadscans.img_dict["grain_masks"]["above"])

diff --git a/tests/test_run_modules.py b/tests/test_run_modules.py
@@ -121,6 +121,8 @@ def test_run_topostats_process_all(caplog) -> None:
             "./tests/resources/test_image/",
             "--file-ext",
             ".topostats",
+            "--extract",
+            "all",
             "process",
         ]
     )
@@ -149,3 +151,25 @@ def test_run_topostats_process_debug(caplog) -> None:
         assert "File extension : .topostats" in caplog.text
         assert "Images processed : 1" in caplog.text
         assert "~~~~~~~~~~~~~~~~~~~~ COMPLETE ~~~~~~~~~~~~~~~~~~~~" in caplog.text
+
+
+def test_filters(caplog) -> None:
+    """Test running the filters module.
+
+    We use the command line entry point to test that _just_ filters runs.
+    """
+    caplog.set_level(logging.INFO)
+    entry_point(
+        manually_provided_args=[
+            "--config",
+            f"{BASE_DIR / 'topostats' / 'default_config.yaml'}",
+            "--base-dir",
+            "./tests/resources/test_image/",
+            "--file-ext",
+            ".topostats",
+            "filter",  # This is the sub-command we wish to test, it will call run_modules.filters()
+        ]
+    )
+    assert "Looking for images with extension   : .topostats" in caplog.text
+    assert "Extracting image from tests/resources/test_image/minicircle_small.topostats" in caplog.text
+    assert "[minicircle_small] Filtering completed." in caplog.text
diff --git a/topostats/default_config.yaml b/topostats/default_config.yaml
@@ -5,6 +5,7 @@ cores: 2 # Number of CPU cores to utilise for processing multiple files simultan
 file_ext: .spm # File extension of the data files.
 loading:
   channel: Height # Channel to pull data from in the data files.
+  extract: raw # Array to extract when loading .topostats files.
 filter:
   run: true # Options : true, false
   row_alignment_quantile: 0.5 # lower values may improve flattening of larger features

diff --git a/topostats/entry_point.py b/topostats/entry_point.py
@@ -106,6 +106,9 @@ def create_parser() -> arg.ArgumentParser:
         required=False,
         help="Channel to extract.",
     )
+    parser.add_argument(
+        "--extract", dest="extract", type=str, required=False, help="Array to extract when loading '.topostats' files."
+    )
     parser.add_argument(
         "--image-set",
         dest="image_set",
@@ -593,8 +596,8 @@ def create_parser() -> arg.ArgumentParser:
     # Filter
     filter_parser = subparsers.add_parser(
         "filter",
-        description="WIP DO NOT USE - Load and filter images, saving as .topostats files for subsequent processing.",
-        help="WIP DO NOT USE - Load and filter images, saving as .topostats files for subsequent processing.",
+        description="Load and filter images, saving as .topostats files for subsequent processing.",
+        help="Load and filter images, saving as .topostats files for subsequent processing.",
     )
     filter_parser.add_argument(
         "--row-alignment-quantile",

diff --git a/topostats/filters.py b/topostats/filters.py
@@ -24,6 +24,7 @@
 # pylint: disable=broad-except
 # pylint: disable=too-many-instance-attributes
 # pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
 # pylint: disable=too-many-branches
 # pylint: disable=dangerous-default-value
 

diff --git a/topostats/io.py b/topostats/io.py
@@ -596,12 +596,17 @@ class LoadScans:
         Path to a valid AFM scan to load.
     channel : str
         Image channel to extract from the scan.
+    extract : str
+        What to extract from ''.topostats'' files, default is ''all'' which loads everything but if using in
+       ''run_topostats'' functions then specific subsets of data are required and this allows just those to be
+       loaded. Options include ''raw'' and ''filter'' at present.
     """
 
     def __init__(
         self,
         img_paths: list[str | Path],
         channel: str,
+        extract: str = "all",
     ):
         """
         Initialise the class.
@@ -612,12 +617,18 @@ def __init__(
             Path to a valid AFM scan to load.
         channel : str
             Image channel to extract from the scan.
+        extract : str
+            What to extract from ''.topostats'' files, default is ''all'' which loads everything but if using in
+           ''run_topostats'' functions then specific subsets of data are required and this allows just those to be
+           loaded. Options include ''raw'' and ''filter'' at present.
         """
         self.img_paths = img_paths
         self.img_path = None
         self.channel = channel
         self.channel_data = None
+        self.extract = extract
         self.filename = None
+        self.suffix = None
         self.image = None
         self.pixel_to_nm_scaling = None
         self.grain_masks = {}
@@ -756,7 +767,6 @@ def get_data(self) -> None:
             ".topostats": self.load_topostats,
             ".asd": self.load_asd,
         }
-
         for img_path in self.img_paths:
             self.img_path = img_path
             self.filename = img_path.stem
@@ -767,8 +777,10 @@ def get_data(self) -> None:
             # Check that the file extension is supported
             if suffix in suffix_to_loader:
                 try:
-                    if suffix == ".topostats":
-                        self.image, self.pixel_to_nm_scaling, self.img_dict = suffix_to_loader[suffix]()
+                    if suffix == ".topostats" and self.extract in (None, "all"):
+                        self.image, self.pixel_to_nm_scaling, self.img_dict = self.load_topostats()
+                    elif suffix == ".topostats" and self.extract not in (None, "all"):
+                        self.image, self.pixel_to_nm_scaling, _ = self.load_topostats(self.extract)
                     else:
                         self.image, self.pixel_to_nm_scaling = suffix_to_loader[suffix]()
                 except Exception as e:

diff --git a/topostats/processing.py b/topostats/processing.py
@@ -1261,6 +1261,76 @@ def process_scan(
     )
 
 
+def process_filters(
+    topostats_object: dict,
+    base_dir: str | Path,
+    filter_config: dict,
+    plotting_config: dict,
+    output_dir: str | Path = "output",
+) -> tuple[str, bool]:
+    """
+    Filter an image return the flattened images and save to ''.topostats''.
+
+    Runs just the first key step of flattening images to remove noise, tilt and optionally scars saving to
+    ''.topostats'' for subsequent processing and analyses.
+
+    Parameters
+    ----------
+    topostats_object : dict[str, Union[npt.NDArray, Path, float]]
+        A dictionary with keys 'image', 'img_path' and 'pixel_to_nm_scaling' containing a file or frames' image, it's
+        path and it's
+        pixel to namometre scaling value.
+    base_dir : str | Path
+        Directory to recursively search for files, if not specified the current directory is scanned.
+    filter_config : dict
+        Dictionary of configuration options for running the Filter stage.
+    plotting_config : dict
+        Dictionary of configuration options for plotting figures.
+    output_dir : str | Path
+        Directory to save output to, it will be created if it does not exist. If it already exists then it is possible
+        that output will be over-written.
+
+    Returns
+    -------
+    tuple[str, bool]
+        A tuple of the image and a boolean indicating if the image was successfully processed.
+    """
+    core_out_path, filter_out_path, _, _ = get_out_paths(
+        image_path=topostats_object["img_path"],
+        base_dir=base_dir,
+        output_dir=output_dir,
+        filename=topostats_object["filename"],
+        plotting_config=plotting_config,
+    )
+
+    plotting_config = add_pixel_to_nm_to_plotting_config(plotting_config, topostats_object["pixel_to_nm_scaling"])
+
+    # Flatten Image
+    try:
+        image_flattened = run_filters(
+            unprocessed_image=topostats_object["image_original"],
+            pixel_to_nm_scaling=topostats_object["pixel_to_nm_scaling"],
+            filename=topostats_object["filename"],
+            filter_out_path=filter_out_path,
+            core_out_path=core_out_path,
+            filter_config=filter_config,
+            plotting_config=plotting_config,
+        )
+        # Use flattened image if one is returned, else use original image
+        topostats_object["image_flattened"] = (
+            image_flattened if image_flattened is not None else topostats_object["image_original"]
+        )
+
+        # Save the topostats dictionary object to .topostats file.
+        save_topostats_file(
+            output_dir=core_out_path, filename=str(topostats_object["filename"]), topostats_object=topostats_object
+        )
+        return (topostats_object["filename"], True)
+    except:  # noqa: E722  # pylint: disable=bare-except
+        LOGGER.info(f"Filtering failed for image : {topostats_object['filename']}")
+        return (topostats_object["filename"], False)
+
+
 def check_run_steps(  # noqa: C901
     filter_run: bool,
     grains_run: bool,