Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Adds filter sub-command #1071

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Aside from the comments in YAML file itself the fields are described below.
| `cores` | | integer | `2` | Number of cores to run parallel processes on. |
| `file_ext` | | string | `.spm` | File extensions to search for. |
| `loading` | `channel` | string | `Height` | The channel of data to be processed, what this is will depend on the file-format you are processing and the channel you wish to process. |
| | `extract` | string | `raw` | The array to extract when loading from `.topostats` images. |
| `filter` | `run` | boolean | `true` | Whether to run the filtering stage, without this other stages won't run so leave as `true`. |
| | `threshold_method` | str | `std_dev` | Threshold method for filtering, options are `ostu`, `std_dev` or `absolute`. |
| | `otsu_threshold_multiplier` | float | `1.0` | Factor by which the derived Otsu Threshold should be scaled. |
Expand Down
5 changes: 3 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ def load_scan_dummy() -> LoadScans:
@pytest.fixture()
def load_scan_topostats_test_file(tmp_path: Path, loading_config: dict) -> LoadScans:
"""Instantiate a LoadScans object for a temporarily saved test .topostats file."""
loading_config["extract"] = "all"
return LoadScans([tmp_path / "topostats_file_test.topostats"], **loading_config)


Expand All @@ -359,9 +360,9 @@ def load_scan(loading_config: dict) -> LoadScans:


@pytest.fixture()
def load_scan_data() -> LoadScans:
def load_scan_data(loading_config: dict) -> LoadScans:
"""Instance of a LoadScans object after applying the get_data func."""
scan_data = LoadScans([RESOURCES / "test_image" / "minicircle_small.topostats"], channel="Height")
scan_data = LoadScans([RESOURCES / "test_image" / "minicircle_small.topostats"], **loading_config)
scan_data.get_data()
return scan_data

Expand Down
Binary file modified tests/resources/test_image/minicircle_small.topostats
Binary file not shown.
27 changes: 23 additions & 4 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
}

# pylint: disable=protected-access
# pylint: disable=too-many-arguments
# pylint: disable=too-many-lines
# pylint: disable=too-many-positional-arguments

Expand Down Expand Up @@ -1118,11 +1119,21 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:


@pytest.mark.parametrize(
("image", "pixel_to_nm_scaling", "grain_mask_above", "grain_mask_below", "grain_trace_data"),
(
"image",
"pixel_to_nm_scaling",
"filename",
"img_path",
"grain_mask_above",
"grain_mask_below",
"grain_trace_data",
),
[
pytest.param(
np.arange(0, 100).reshape(10, 10),
3.14159265,
"below_grain_mask_with_grain_trace_data",
"./below_grain_mask_with_grain_trace_data.topostats",
None,
np.zeros((10, 10)),
{
Expand Down Expand Up @@ -1224,6 +1235,8 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:
pytest.param(
np.arange(0, 100).reshape(10, 10),
3.14159265,
"above_grain_mask_without_grain_trace_data",
"./above_grain_mask_without_grain_trace_data.topostats",
np.zeros((10, 10)),
None,
None,
Expand All @@ -1232,6 +1245,8 @@ def test_hdf5_to_dict_nested_dict_group_path(tmp_path: Path) -> None:
pytest.param(
np.arange(0, 100).reshape(10, 10),
3.14159265,
"above_and_below_grain_masks_without_grain_trace_data",
"./above_and_below_grain_masks_without_grain_trace_data.topostats",
np.zeros((10, 10)),
np.zeros((10, 10)),
None,
Expand All @@ -1244,14 +1259,19 @@ def test_save_and_load_topostats_file(
tmp_path: Path,
image: np.ndarray,
pixel_to_nm_scaling: float,
filename: str,
img_path: str,
grain_mask_above: np.ndarray,
grain_mask_below: np.ndarray,
grain_trace_data: dict,
) -> None:
"""Test saving a .topostats file."""
topostats_object = {
"image_flattened": image,
"filename": filename,
"img_path": img_path,
"pixel_to_nm_scaling": pixel_to_nm_scaling,
"image_original": image,
"image_flattened": image,
"grain_masks": {"above": grain_mask_above, "below": grain_mask_below},
"grain_trace_data": grain_trace_data,
}
Expand All @@ -1273,8 +1293,7 @@ def test_save_and_load_topostats_file(
"grain_masks",
"grain_trace_data",
}

np.testing.assert_array_equal(image, loadscans.img_dict["topostats_file_test"]["image_original"])
np.testing.assert_array_equal(image, loadscans.img_dict["image_original"])
assert pixel_to_nm_scaling == loadscans.img_dict["pixel_to_nm_scaling"]
if grain_mask_above is not None:
np.testing.assert_array_equal(grain_mask_above, loadscans.img_dict["grain_masks"]["above"])
Expand Down
24 changes: 24 additions & 0 deletions tests/test_run_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def test_run_topostats_process_all(caplog) -> None:
"./tests/resources/test_image/",
"--file-ext",
".topostats",
"--extract",
"all",
"process",
]
)
Expand Down Expand Up @@ -149,3 +151,25 @@ def test_run_topostats_process_debug(caplog) -> None:
assert "File extension : .topostats" in caplog.text
assert "Images processed : 1" in caplog.text
assert "~~~~~~~~~~~~~~~~~~~~ COMPLETE ~~~~~~~~~~~~~~~~~~~~" in caplog.text


def test_filters(caplog) -> None:
"""Test running the filters module.

We use the command line entry point to test that _just_ filters runs.
"""
caplog.set_level(logging.INFO)
entry_point(
manually_provided_args=[
"--config",
f"{BASE_DIR / 'topostats' / 'default_config.yaml'}",
"--base-dir",
"./tests/resources/test_image/",
"--file-ext",
".topostats",
"filter", # This is the sub-command we wish to test, it will call run_modules.filters()
]
)
assert "Looking for images with extension : .topostats" in caplog.text
assert "Extracting image from tests/resources/test_image/minicircle_small.topostats" in caplog.text
assert "[minicircle_small] Filtering completed." in caplog.text
1 change: 1 addition & 0 deletions topostats/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cores: 2 # Number of CPU cores to utilise for processing multiple files simultan
file_ext: .spm # File extension of the data files.
loading:
channel: Height # Channel to pull data from in the data files.
extract: raw # Array to extract when loading .topostats files.
filter:
run: true # Options : true, false
row_alignment_quantile: 0.5 # lower values may improve flattening of larger features
Expand Down
7 changes: 5 additions & 2 deletions topostats/entry_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def create_parser() -> arg.ArgumentParser:
required=False,
help="Channel to extract.",
)
parser.add_argument(
"--extract", dest="extract", type=str, required=False, help="Array to extract when loading '.topostats' files."
)
parser.add_argument(
"--image-set",
dest="image_set",
Expand Down Expand Up @@ -593,8 +596,8 @@ def create_parser() -> arg.ArgumentParser:
# Filter
filter_parser = subparsers.add_parser(
"filter",
description="WIP DO NOT USE - Load and filter images, saving as .topostats files for subsequent processing.",
help="WIP DO NOT USE - Load and filter images, saving as .topostats files for subsequent processing.",
description="Load and filter images, saving as .topostats files for subsequent processing.",
help="Load and filter images, saving as .topostats files for subsequent processing.",
)
filter_parser.add_argument(
"--row-alignment-quantile",
Expand Down
1 change: 1 addition & 0 deletions topostats/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
# pylint: disable=broad-except
# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
# pylint: disable=too-many-branches
# pylint: disable=dangerous-default-value

Expand Down
18 changes: 15 additions & 3 deletions topostats/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,12 +596,17 @@ class LoadScans:
Path to a valid AFM scan to load.
channel : str
Image channel to extract from the scan.
extract : str
What to extract from ''.topostats'' files, default is ''all'' which loads everything but if using in
''run_topostats'' functions then specific subsets of data are required and this allows just those to be
loaded. Options include ''raw'' and ''filter'' at present.
"""

def __init__(
self,
img_paths: list[str | Path],
channel: str,
extract: str = "all",
):
"""
Initialise the class.
Expand All @@ -612,12 +617,18 @@ def __init__(
Path to a valid AFM scan to load.
channel : str
Image channel to extract from the scan.
extract : str
What to extract from ''.topostats'' files, default is ''all'' which loads everything but if using in
''run_topostats'' functions then specific subsets of data are required and this allows just those to be
loaded. Options include ''raw'' and ''filter'' at present.
"""
self.img_paths = img_paths
self.img_path = None
self.channel = channel
self.channel_data = None
self.extract = extract
self.filename = None
self.suffix = None
self.image = None
self.pixel_to_nm_scaling = None
self.grain_masks = {}
Expand Down Expand Up @@ -756,7 +767,6 @@ def get_data(self) -> None:
".topostats": self.load_topostats,
".asd": self.load_asd,
}

for img_path in self.img_paths:
self.img_path = img_path
self.filename = img_path.stem
Expand All @@ -767,8 +777,10 @@ def get_data(self) -> None:
# Check that the file extension is supported
if suffix in suffix_to_loader:
try:
if suffix == ".topostats":
self.image, self.pixel_to_nm_scaling, self.img_dict = suffix_to_loader[suffix]()
if suffix == ".topostats" and self.extract in (None, "all"):
self.image, self.pixel_to_nm_scaling, self.img_dict = self.load_topostats()
elif suffix == ".topostats" and self.extract not in (None, "all"):
self.image, self.pixel_to_nm_scaling, _ = self.load_topostats(self.extract)
else:
self.image, self.pixel_to_nm_scaling = suffix_to_loader[suffix]()
except Exception as e:
Expand Down
70 changes: 70 additions & 0 deletions topostats/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,76 @@ def process_scan(
)


def process_filters(
topostats_object: dict,
base_dir: str | Path,
filter_config: dict,
plotting_config: dict,
output_dir: str | Path = "output",
) -> tuple[str, bool]:
"""
Filter an image return the flattened images and save to ''.topostats''.

Runs just the first key step of flattening images to remove noise, tilt and optionally scars saving to
''.topostats'' for subsequent processing and analyses.

Parameters
----------
topostats_object : dict[str, Union[npt.NDArray, Path, float]]
A dictionary with keys 'image', 'img_path' and 'pixel_to_nm_scaling' containing a file or frames' image, it's
path and it's
pixel to namometre scaling value.
base_dir : str | Path
Directory to recursively search for files, if not specified the current directory is scanned.
filter_config : dict
Dictionary of configuration options for running the Filter stage.
plotting_config : dict
Dictionary of configuration options for plotting figures.
output_dir : str | Path
Directory to save output to, it will be created if it does not exist. If it already exists then it is possible
that output will be over-written.

Returns
-------
tuple[str, bool]
A tuple of the image and a boolean indicating if the image was successfully processed.
"""
core_out_path, filter_out_path, _, _ = get_out_paths(
image_path=topostats_object["img_path"],
base_dir=base_dir,
output_dir=output_dir,
filename=topostats_object["filename"],
plotting_config=plotting_config,
)

plotting_config = add_pixel_to_nm_to_plotting_config(plotting_config, topostats_object["pixel_to_nm_scaling"])

# Flatten Image
try:
image_flattened = run_filters(
unprocessed_image=topostats_object["image_original"],
pixel_to_nm_scaling=topostats_object["pixel_to_nm_scaling"],
filename=topostats_object["filename"],
filter_out_path=filter_out_path,
core_out_path=core_out_path,
filter_config=filter_config,
plotting_config=plotting_config,
)
# Use flattened image if one is returned, else use original image
topostats_object["image_flattened"] = (
image_flattened if image_flattened is not None else topostats_object["image_original"]
)

# Save the topostats dictionary object to .topostats file.
save_topostats_file(
output_dir=core_out_path, filename=str(topostats_object["filename"]), topostats_object=topostats_object
)
return (topostats_object["filename"], True)
except: # noqa: E722 # pylint: disable=bare-except
LOGGER.info(f"Filtering failed for image : {topostats_object['filename']}")
return (topostats_object["filename"], False)


def check_run_steps( # noqa: C901
filter_run: bool,
grains_run: bool,
Expand Down
Loading
Loading