From 26f0b748a8d4042cf53c94fcdc106729f8a2ee92 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 11:40:07 -0500 Subject: [PATCH 01/15] rm unused args, fix #185 --- wsinfer/modellib/data.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/wsinfer/modellib/data.py b/wsinfer/modellib/data.py index c5393bd..4c0c7d5 100644 --- a/wsinfer/modellib/data.py +++ b/wsinfer/modellib/data.py @@ -60,10 +60,6 @@ class WholeSlideImagePatches(torch.utils.data.Dataset): Path to whole slide image file. patch_path : str, Path Path to npy file with coordinates of input image. - um_px : float - Scale of the resulting patches. For example, 0.5 for ~20x magnification. - patch_size : int - The size of patches in pixels. transform : callable, optional A callable to modify a retrieved patch. The callable must accept a PIL.Image.Image instance and return a torch.Tensor. @@ -73,14 +69,10 @@ def __init__( self, wsi_path: str | Path, patch_path: str | Path, - um_px: float, - patch_size: int, transform: Callable[[Image.Image], torch.Tensor] | None = None, ): self.wsi_path = wsi_path self.patch_path = patch_path - self.um_px = float(um_px) - self.patch_size = int(patch_size) self.transform = transform assert Path(wsi_path).exists(), "wsi path not found" From 51029b29ae0f549feb9c04d2eaf08ea9f0c766b7 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 11:58:49 -0500 Subject: [PATCH 02/15] account for empty hierarchy or patches fix #195 --- wsinfer/patchlib/__init__.py | 28 +++++++++++++++++----------- wsinfer/patchlib/patch.py | 6 ++++-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/wsinfer/patchlib/__init__.py b/wsinfer/patchlib/__init__.py index 4aa0c25..ba73018 100644 --- a/wsinfer/patchlib/__init__.py +++ b/wsinfer/patchlib/__init__.py @@ -158,9 +158,11 @@ def segment_and_patch_one_slide( slide.dimensions[0] / thumb.size[0], slide.dimensions[1] / thumb.size[1], ) - polygon, contours, hierarchy = get_multipolygon_from_binary_arr( - arr.astype("uint8") * 255, scale=scale - ) + _res = get_multipolygon_from_binary_arr(arr.astype("uint8") * 255, scale=scale) + if _res is None: + logger.warning(f"No tissue was found in slide {slide_path}") + return None + polygon, contours, hierarchy = _res # Get the coordinates of patches inside the tissue polygon. slide_width, slide_height = slide.dimensions @@ -176,15 +178,19 @@ def segment_and_patch_one_slide( ) logger.info(f"Found {len(coords)} patches within tissue") - # Save coordinates to HDF5. + # Save coordinates to HDF5, if at least one patch was found. patch_path.parent.mkdir(exist_ok=True, parents=True) - save_hdf5( - path=patch_path, - coords=coords, - patch_size=patch_size, - patch_spacing_um_px=patch_spacing_um_px, - compression="gzip", - ) + if coords.size > 0: + logger.info(f"Writing patches to {patch_path}") + save_hdf5( + path=patch_path, + coords=coords, + patch_size=patch_size, + patch_spacing_um_px=patch_spacing_um_px, + compression="gzip", + ) + else: + logger.warning(f"No patches found for slide {slide_path}") # Save thumbnail with drawn contours. logger.info(f"Writing tissue thumbnail with contours to disk: {mask_path}") diff --git a/wsinfer/patchlib/patch.py b/wsinfer/patchlib/patch.py index dcacee2..4cfe038 100644 --- a/wsinfer/patchlib/patch.py +++ b/wsinfer/patchlib/patch.py @@ -32,7 +32,7 @@ def temporary_recursion_limit(limit: int) -> Iterator[None]: def get_multipolygon_from_binary_arr( arr: npt.NDArray[np.int_], scale: tuple[float, float] | None = None -) -> tuple[MultiPolygon, Sequence[npt.NDArray[np.int_]], npt.NDArray[np.int_]]: +) -> tuple[MultiPolygon, Sequence[npt.NDArray[np.int_]], npt.NDArray[np.int_]] | None: """Create a Shapely Polygon from a binary array. Parameters @@ -54,8 +54,10 @@ def get_multipolygon_from_binary_arr( """ # Find contours and hierarchy contours: Sequence[npt.NDArray] - hierarchy: npt.NDArray + hierarchy: npt.NDArray | None contours, hierarchy = cv.findContours(arr, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE) + if hierarchy is None: + return None hierarchy = hierarchy.squeeze(0) logger.info(f"Detected {len(contours)} contours") From 4dc0117cd13928ac27882159c34203269ef28434 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 12:04:17 -0500 Subject: [PATCH 03/15] please mypy --- wsinfer/patchlib/patch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wsinfer/patchlib/patch.py b/wsinfer/patchlib/patch.py index 4cfe038..5d2687d 100644 --- a/wsinfer/patchlib/patch.py +++ b/wsinfer/patchlib/patch.py @@ -100,6 +100,8 @@ def merge_polygons(polygon: MultiPolygon, idx: int, add: bool) -> MultiPolygon: polygon = polygon.difference(new_poly) # Check if current polygon has a child + if hierarchy is None: + raise NotImplementedError() child_idx = hierarchy[idx][2] if child_idx >= 0: # Call this function recursively, negate `add` parameter From 54f40d91185e8a5425926161521329eae473de9e Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 12:04:28 -0500 Subject: [PATCH 04/15] make error message more helpful if patch dir does not exist --- wsinfer/modellib/run_inference.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wsinfer/modellib/run_inference.py b/wsinfer/modellib/run_inference.py index 4c0aec4..1046a04 100644 --- a/wsinfer/modellib/run_inference.py +++ b/wsinfer/modellib/run_inference.py @@ -83,7 +83,12 @@ def run_inference( # Check patches directory. patch_dir = results_dir / "patches" if not patch_dir.exists(): - raise errors.PatchDirectoryNotFound("Results dir must include 'patches' dir") + raise errors.PatchDirectoryNotFound( + "The 'patches' directory was not found in results directory. This can" + " happen for a few reasons: 1) no tissue was detected in the slides," + " 2) the physical spacing (MPP) could not be read from any of the slides" + ", or 3) something else... Please read the logs above for potential errors." + ) # Create the patch paths based on the whole slide image paths. In effect, only # create patch paths if the whole slide image patch exists. patch_paths = [patch_dir / p.with_suffix(".h5").name for p in wsi_paths] From 78086a80cc10f898fa50f78d4ff0bec5adafe59f Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 12:14:11 -0500 Subject: [PATCH 05/15] rm unused args in wsi patches dataset fixes #185 --- wsinfer/modellib/run_inference.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/wsinfer/modellib/run_inference.py b/wsinfer/modellib/run_inference.py index 1046a04..7ba8aa5 100644 --- a/wsinfer/modellib/run_inference.py +++ b/wsinfer/modellib/run_inference.py @@ -150,8 +150,6 @@ def run_inference( dset = WholeSlideImagePatches( wsi_path=wsi_path, patch_path=patch_path, - um_px=model_info.config.spacing_um_px, - patch_size=model_info.config.patch_size_pixels, transform=transform, ) except Exception: From db8c4c717517e0f21814064a56fc015cdd452e86 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 12:14:36 -0500 Subject: [PATCH 06/15] check that there are patches in the loaded file fixes #195 --- wsinfer/modellib/data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wsinfer/modellib/data.py b/wsinfer/modellib/data.py index 4c0c7d5..b0d9ed7 100644 --- a/wsinfer/modellib/data.py +++ b/wsinfer/modellib/data.py @@ -79,7 +79,8 @@ def __init__( assert Path(patch_path).exists(), "patch path not found" self.patches = _read_patch_coords(self.patch_path) - + if self.patches.size == 0: + raise ValueError(f"No patches were found in {self.patch_path}") assert self.patches.ndim == 2, "expected 2D array of patch coordinates" # x, y, width, height assert self.patches.shape[1] == 4, "expected second dimension to have len 4" From 6e81da708c76d5bbe9158309ff50dd9796a8b6fa Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 12:16:12 -0500 Subject: [PATCH 07/15] ensure thumbnail is RGB fixes #216 --- wsinfer/patchlib/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wsinfer/patchlib/__init__.py b/wsinfer/patchlib/__init__.py index ba73018..38776fd 100644 --- a/wsinfer/patchlib/__init__.py +++ b/wsinfer/patchlib/__init__.py @@ -121,7 +121,9 @@ def segment_and_patch_one_slide( if len(thumbsize) != 2: raise ValueError(f"Length of 'thumbsize' must be 2 but got {len(thumbsize)}") thumb: Image.Image = slide.get_thumbnail(thumbsize) - # TODO: allow the min hole size and min object size to be set in physical units. + if thumb.mode != "RGB": + logger.warning(f"Converting mode of thumbnail from {thumb.mode} to RGB") + thumb = thumb.convert("RGB") # thumb has ~12 MPP. thumb_mpp = (mpp * (np.array(slide.dimensions) / thumb.size)).mean() From 064ba854265a667af7ec36c2b803e227541a3b5b Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:15:43 -0500 Subject: [PATCH 08/15] appease mypy --- tests/test_all.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/test_all.py b/tests/test_all.py index 551b9b2..f79eb21 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -6,6 +6,7 @@ import sys import time from pathlib import Path +from unittest.mock import patch as mock_patch, MagicMock import geojson as geojsonlib import h5py @@ -24,6 +25,7 @@ from wsinfer.wsi import HAS_OPENSLIDE from wsinfer.wsi import HAS_TIFFSLIDE + @pytest.fixture def tiff_image(tmp_path: Path) -> Path: x = np.empty((4096, 4096, 3), dtype="uint8") @@ -82,7 +84,7 @@ def test_cli_run_with_registered_models( backend: str, tiff_image: Path, tmp_path: Path, -): +) -> None: """A regression test of the command 'wsinfer run'.""" reference_csv = Path(__file__).parent / "reference" / model / "purple.csv" @@ -151,7 +153,7 @@ def test_cli_run_with_registered_models( for geojson_row in d["features"]: assert geojson_row["type"] == "Feature" - isinstance(geojson_row["id"] , str) + isinstance(geojson_row["id"], str) assert geojson_row["geometry"]["type"] == "Polygon" res = [] for i, prob_col in enumerate(prob_cols): @@ -178,7 +180,7 @@ def test_cli_run_with_registered_models( assert [df_coords] == geojson_row["geometry"]["coordinates"] -def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path): +def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path) -> None: model = "breast-tumor-resnet34.tcga-brca" reference_csv = Path(__file__).parent / "reference" / model / "purple.csv" if not reference_csv.exists(): @@ -246,7 +248,7 @@ def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path): ), f"Column {prob_col} not allclose at atol=1e-07" -def test_cli_run_no_model_or_config(tmp_path: Path): +def test_cli_run_no_model_or_config(tmp_path: Path) -> None: """Test that --model or (--config and --model-path) is required.""" wsi_dir = tmp_path / "slides" wsi_dir.mkdir() @@ -265,7 +267,7 @@ def test_cli_run_no_model_or_config(tmp_path: Path): assert "one of --model or (--config and --model-path) is required" in result.output -def test_cli_run_model_and_config(tmp_path: Path): +def test_cli_run_model_and_config(tmp_path: Path) -> None: """Test that (model and weights) or config is required.""" wsi_dir = tmp_path / "slides" wsi_dir.mkdir() @@ -298,7 +300,7 @@ def test_cli_run_model_and_config(tmp_path: Path): @pytest.mark.xfail -def test_convert_to_sbu(): +def test_convert_to_sbu() -> None: # TODO: create a synthetic output and then convert it. Check that it is valid. assert False @@ -330,7 +332,7 @@ def test_patch_cli( backend: str, tmp_path: Path, tiff_image: Path, -): +) -> None: """Test of 'wsinfer patch'.""" orig_slide_size = 4096 orig_slide_spacing = 0.25 @@ -380,7 +382,7 @@ def test_patch_cli( # FIXME: parametrize this test across our models. -def test_jit_compile(): +def test_jit_compile() -> None: w = get_registered_model("breast-tumor-resnet34.tcga-brca") model = get_pretrained_torch_module(w) @@ -411,7 +413,7 @@ def test_jit_compile(): ) -def test_issue_89(): +def test_issue_89() -> None: """Do not fail if 'git' is not installed.""" model_obj = get_registered_model("breast-tumor-resnet34.tcga-brca") d = _get_info_for_save(model_obj) @@ -433,7 +435,7 @@ def test_issue_89(): os.environ["PATH"] = orig_path # reset path -def test_issue_94(tmp_path: Path, tiff_image: Path): +def test_issue_94(tmp_path: Path, tiff_image: Path) -> None: """Gracefully handle unreadable slides.""" # We have a valid tiff in 'tiff_image.parent'. We put in an unreadable file too. @@ -461,7 +463,7 @@ def test_issue_94(tmp_path: Path, tiff_image: Path): assert not results_dir.joinpath("model-outputs-csv").joinpath("bad.csv").exists() -def test_issue_97(tmp_path: Path, tiff_image: Path): +def test_issue_97(tmp_path: Path, tiff_image: Path) -> None: """Write a run_metadata file per run.""" runner = CliRunner() @@ -502,7 +504,7 @@ def test_issue_97(tmp_path: Path, tiff_image: Path): assert len(metas) == 2 -def test_issue_125(tmp_path: Path): +def test_issue_125(tmp_path: Path) -> None: """Test that path in model config can be saved when a pathlib.Path object.""" w = get_registered_model("breast-tumor-resnet34.tcga-brca") From 06accf76591eae264b191351b43b9e27adaac514 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:16:10 -0500 Subject: [PATCH 09/15] test that openslide and tiffslide pad regions fixes #203 --- tests/test_all.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_all.py b/tests/test_all.py index f79eb21..fd93b24 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -512,3 +512,24 @@ def test_issue_125(tmp_path: Path) -> None: info = _get_info_for_save(w) with open(tmp_path / "foo.json", "w") as f: json.dump(info, f) + + +def test_issue_203(tiff_image: Path) -> None: + """Test that openslide and tiffslide pad an image if an out-of-bounds region + is requested. + """ + import openslide + import tiffslide + + with tiffslide.TiffSlide(tiff_image) as tslide: + w, h = tslide.dimensions + img = tslide.read_region((w, h), level=0, size=(256, 256)) + assert img.size == (256, 256) + assert np.allclose(np.array(img), 0) + del tslide, img + + with openslide.OpenSlide(tiff_image) as oslide: + w, h = oslide.dimensions + img = oslide.read_region((w, h), level=0, size=(256, 256)) + assert img.size == (256, 256) + assert np.allclose(np.array(img), 0) From 73dd4eaaa7a78c34575544576c7e21fdbed6433a Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:16:40 -0500 Subject: [PATCH 10/15] rm unused import --- tests/test_all.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index fd93b24..8d175be 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -6,7 +6,6 @@ import sys import time from pathlib import Path -from unittest.mock import patch as mock_patch, MagicMock import geojson as geojsonlib import h5py From 95e3b0609ea42d5b078150d23bc71624dac9e1bd Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:25:09 -0500 Subject: [PATCH 11/15] handle symlinked slides dirs fixes #214 --- tests/test_all.py | 25 +++++++++++++++++ wsinfer/cli/convert_csv_to_sbubmi.py | 10 +++---- wsinfer/cli/infer.py | 11 +++----- wsinfer/cli/patch.py | 40 ++++++++++++++-------------- wsinfer/patchlib/__init__.py | 4 +-- 5 files changed, 54 insertions(+), 36 deletions(-) diff --git a/tests/test_all.py b/tests/test_all.py index 8d175be..a6c47d0 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -532,3 +532,28 @@ def test_issue_203(tiff_image: Path) -> None: img = oslide.read_region((w, h), level=0, size=(256, 256)) assert img.size == (256, 256) assert np.allclose(np.array(img), 0) + + +def test_issue_214(tmp_path: Path, tiff_image: Path) -> None: + """Test that symlinked slides don't mess things up.""" + link = tmp_path / "forlinks" / "arbitrary-link-name.tiff" + link.parent.mkdir(parents=True) + link.symlink_to(tiff_image) + + runner = CliRunner() + results_dir = tmp_path / "inference" + result = runner.invoke( + cli, + [ + "run", + "--wsi-dir", + str(link.parent), + "--results-dir", + str(results_dir), + "--model", + "breast-tumor-resnet34.tcga-brca", + ], + ) + assert result.exit_code == 0 + assert (results_dir / "patches" / link.with_suffix(".h5").name).exists() + assert (results_dir / "model-outputs-csv" / link.with_suffix(".csv").name).exists() diff --git a/wsinfer/cli/convert_csv_to_sbubmi.py b/wsinfer/cli/convert_csv_to_sbubmi.py index d55df94..091aaef 100644 --- a/wsinfer/cli/convert_csv_to_sbubmi.py +++ b/wsinfer/cli/convert_csv_to_sbubmi.py @@ -249,21 +249,17 @@ def get_color(row: pd.Series) -> tuple[float, float, float]: @click.command() @click.argument( "results_dir", - type=click.Path( - exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True - ), + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), ) @click.argument( "output", - type=click.Path(exists=False, path_type=Path, resolve_path=True), + type=click.Path(exists=False, path_type=Path), ) @click.option( "--wsi-dir", required=True, help="Directory with whole slide images.", - type=click.Path( - exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True - ), + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), ) @click.option("--execution-id", required=True, help="Unique execution ID for this run.") @click.option("--study-id", required=True, help="Study ID, like TCGA-BRCA.") diff --git a/wsinfer/cli/infer.py b/wsinfer/cli/infer.py index 639452b..5c03c2e 100644 --- a/wsinfer/cli/infer.py +++ b/wsinfer/cli/infer.py @@ -188,7 +188,7 @@ def get_stdout(args: list[str]) -> str: @click.option( "-i", "--wsi-dir", - type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True), + type=click.Path(exists=True, file_okay=False, path_type=Path), required=True, help="Directory containing whole slide images. This directory can *only* contain" " whole slide images.", @@ -196,7 +196,7 @@ def get_stdout(args: list[str]) -> str: @click.option( "-o", "--results-dir", - type=click.Path(file_okay=False, path_type=Path, resolve_path=True), + type=click.Path(file_okay=False, path_type=Path), required=True, help="Directory to store results. If directory exists, will skip" " whole slides for which outputs exist.", @@ -212,7 +212,7 @@ def get_stdout(args: list[str]) -> str: @click.option( "-c", "--config", - type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True), + type=click.Path(exists=True, dir_okay=False, path_type=Path), help=( "Path to configuration for the trained model. Use this option if the" " model weights are not registered in wsinfer. Mutually exclusive with" @@ -222,7 +222,7 @@ def get_stdout(args: list[str]) -> str: @click.option( "-p", "--model-path", - type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True), + type=click.Path(exists=True, dir_okay=False, path_type=Path), help=( "Path to the pretrained model. Use only when --config is passed. Mutually " "exclusive with --model." @@ -349,9 +349,6 @@ def run( "--config and --model-path must both be set if one is set." ) - wsi_dir = wsi_dir.resolve() - results_dir = results_dir.resolve() - if not wsi_dir.exists(): raise FileNotFoundError(f"Whole slide image directory not found: {wsi_dir}") diff --git a/wsinfer/cli/patch.py b/wsinfer/cli/patch.py index 56d9983..d914bf3 100644 --- a/wsinfer/cli/patch.py +++ b/wsinfer/cli/patch.py @@ -11,7 +11,7 @@ @click.option( "-i", "--wsi-dir", - type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True), + type=click.Path(exists=True, file_okay=False, path_type=Path), required=True, help="Directory containing whole slide images. This directory can *only* contain" " whole slide images.", @@ -19,7 +19,7 @@ @click.option( "-o", "--results-dir", - type=click.Path(file_okay=False, path_type=Path, resolve_path=True), + type=click.Path(file_okay=False, path_type=Path), required=True, help="Directory to store patch results. If directory exists, will skip" " whole slides for which outputs exist.", @@ -32,7 +32,7 @@ help="Physical spacing of the patch in micrometers per pixel.", ) @click.option( - "--thumbsize", + "--seg-thumbsize", default=(2048, 2048), type=(int, int), help="The size of the slide thumbnail (in pixels) used for tissue segmentation." @@ -40,25 +40,25 @@ " max(thumbsize).", ) @click.option( - "--median-filter-size", + "--seg-median-filter-size", default=7, type=click.IntRange(min=3), help="The kernel size for median filtering. Must be greater than 1 and odd.", ) @click.option( - "--binary-threshold", + "--seg-binary-threshold", default=7, type=click.IntRange(min=1), help="The threshold for image binarization.", ) @click.option( - "--closing-kernel-size", + "--seg-closing-kernel-size", default=6, type=click.IntRange(min=1), help="The kernel size for binary closing (morphological operation).", ) @click.option( - "--min-object-size-um2", + "--seg-min-object-size-um2", default=200**2, type=click.FloatRange(min=0), help="The minimum size of an object to keep during tissue detection. If a" @@ -66,7 +66,7 @@ " The default is 200um x 200um. The units of this argument are microns squared.", ) @click.option( - "--min-hole-size-um2", + "--seg-min-hole-size-um2", default=190**2, type=click.FloatRange(min=0), help="The minimum size of a hole to keep as a hole. If a hole is smaller than this" @@ -78,12 +78,12 @@ def patch( results_dir: str, patch_size_px: int, patch_spacing_um_px: float, - thumbsize: tuple[int, int], - median_filter_size: int, - binary_threshold: int, - closing_kernel_size: int, - min_object_size_um2: float, - min_hole_size_um2: float, + seg_thumbsize: tuple[int, int], + seg_median_filter_size: int, + seg_binary_threshold: int, + seg_closing_kernel_size: int, + seg_min_object_size_um2: float, + seg_min_hole_size_um2: float, ) -> None: """Patch a directory of whole slide iamges.""" segment_and_patch_directory_of_slides( @@ -91,10 +91,10 @@ def patch( save_dir=results_dir, patch_size_px=patch_size_px, patch_spacing_um_px=patch_spacing_um_px, - thumbsize=thumbsize, - median_filter_size=median_filter_size, - binary_threshold=binary_threshold, - closing_kernel_size=closing_kernel_size, - min_object_size_um2=min_object_size_um2, - min_hole_size_um2=min_hole_size_um2, + thumbsize=seg_thumbsize, + median_filter_size=seg_median_filter_size, + binary_threshold=seg_binary_threshold, + closing_kernel_size=seg_closing_kernel_size, + min_object_size_um2=seg_min_object_size_um2, + min_hole_size_um2=seg_min_hole_size_um2, ) diff --git a/wsinfer/patchlib/__init__.py b/wsinfer/patchlib/__init__.py index 38776fd..10b7989 100644 --- a/wsinfer/patchlib/__init__.py +++ b/wsinfer/patchlib/__init__.py @@ -85,8 +85,8 @@ def segment_and_patch_one_slide( None """ - save_dir = Path(save_dir).resolve() - slide_path = Path(slide_path).resolve() + save_dir = Path(save_dir) + slide_path = Path(slide_path) slide_prefix = slide_path.stem logger.info(f"Segmenting and patching slide {slide_path}") From a371d2732d8621da7d46055d3d158c609aa2c4cd Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:50:57 -0500 Subject: [PATCH 12/15] add strided patches fixes #202 This adds a command line option '--patch-overlap-ratio' that controls the level of overlap between adjacent patches. Negative values create space between patches, and values closer to 1 makes patches overlap more. --- wsinfer/cli/infer.py | 12 ++++++++++++ wsinfer/patchlib/__init__.py | 8 ++++++-- wsinfer/patchlib/patch.py | 20 +++++++++++++++++--- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/wsinfer/cli/infer.py b/wsinfer/cli/infer.py index 5c03c2e..705ce0c 100644 --- a/wsinfer/cli/infer.py +++ b/wsinfer/cli/infer.py @@ -303,6 +303,16 @@ def get_stdout(args: list[str]) -> str: " area, it is filled with foreground. The default is 190um x 190um. The units of" " this argument are microns squared.", ) +@click.option( + "--patch-overlap-ratio", + default=0.0, + type=click.FloatRange(min=None, max=1, max_open=True), + help="The ratio of overlap among patches. The default value of 0 produces" + " non-overlapping patches. A value in (0, 1) will produce overlapping patches." + " Negative values will add space between patches. A value of -1 would skip" + " every other patch. A value of 0.5 will provide 50%% of overlap between patches." + " Values must be in (-inf, 1).", +) def run( ctx: click.Context, *, @@ -321,6 +331,7 @@ def run( seg_closing_kernel_size: int, seg_min_object_size_um2: float, seg_min_hole_size_um2: float, + patch_overlap_ratio: float = 0.0, ) -> None: """Run model inference on a directory of whole slide images. @@ -398,6 +409,7 @@ def run( closing_kernel_size=seg_closing_kernel_size, min_object_size_um2=seg_min_object_size_um2, min_hole_size_um2=seg_min_hole_size_um2, + overlap=patch_overlap_ratio, ) if not results_dir.joinpath("patches").exists(): diff --git a/wsinfer/patchlib/__init__.py b/wsinfer/patchlib/__init__.py index 10b7989..ee15598 100644 --- a/wsinfer/patchlib/__init__.py +++ b/wsinfer/patchlib/__init__.py @@ -14,7 +14,7 @@ from ..wsi import _validate_wsi_directory from ..wsi import get_avg_mpp from .patch import get_multipolygon_from_binary_arr -from .patch import get_nonoverlapping_patch_coordinates_within_polygon +from .patch import get_patch_coordinates_within_polygon from .segment import segment_tissue logger = logging.getLogger(__name__) @@ -34,6 +34,7 @@ def segment_and_patch_one_slide( closing_kernel_size: int = 6, min_object_size_um2: float = 200**2, min_hole_size_um2: float = 190**2, + overlap: float = 0.0, ) -> None: """Get non-overlapping patch coordinates in tissue regions of a whole slide image. @@ -171,12 +172,13 @@ def segment_and_patch_one_slide( half_patch_size = round(patch_size / 2) # Nx4 --> N x (minx, miny, width, height) - coords = get_nonoverlapping_patch_coordinates_within_polygon( + coords = get_patch_coordinates_within_polygon( slide_width=slide_width, slide_height=slide_height, patch_size=patch_size, half_patch_size=half_patch_size, polygon=polygon, + overlap=overlap, ) logger.info(f"Found {len(coords)} patches within tissue") @@ -299,6 +301,7 @@ def segment_and_patch_directory_of_slides( closing_kernel_size: int = 6, min_object_size_um2: float = 200**2, min_hole_size_um2: float = 190**2, + overlap: float = 0.0, ) -> None: """Get non-overlapping patch coordinates in tissue regions for a directory of whole slide images. @@ -373,6 +376,7 @@ def segment_and_patch_directory_of_slides( closing_kernel_size=closing_kernel_size, min_object_size_um2=min_object_size_um2, min_hole_size_um2=min_hole_size_um2, + overlap=overlap, ) except Exception as e: logger.error(f"Failed to segment and patch slide\n{slide_path}", exc_info=e) diff --git a/wsinfer/patchlib/patch.py b/wsinfer/patchlib/patch.py index 5d2687d..ee8d030 100644 --- a/wsinfer/patchlib/patch.py +++ b/wsinfer/patchlib/patch.py @@ -128,12 +128,13 @@ def merge_polygons(polygon: MultiPolygon, idx: int, add: bool) -> MultiPolygon: return polygon, contours_unscaled, hierarchy[np.newaxis] -def get_nonoverlapping_patch_coordinates_within_polygon( +def get_patch_coordinates_within_polygon( slide_width: int, slide_height: int, patch_size: int, half_patch_size: int, polygon: Polygon, + overlap: float = 0.0, ) -> npt.NDArray[np.int_]: """Get coordinates of patches within a polygon. @@ -149,6 +150,12 @@ def get_nonoverlapping_patch_coordinates_within_polygon( Half of the length of a patch in pixels. polygon : Polygon A shapely Polygon representing the presence of tissue. + overlap : float + The proportion of the patch_size to overlap. A value of 0.5 + would have an overlap of 50%. A value of 0.2 would have an + overlap of 20%. Negative values will add space between patches. + A value of -1 would skip every other patch. Value must be in (-inf, 1). + The default value of 0.0 produces non-overlapping patches. Returns ------- @@ -157,12 +164,19 @@ def get_nonoverlapping_patch_coordinates_within_polygon( contains the coordinates of the top-left of a tile: (minx, miny). """ + if overlap >= 1: + raise ValueError(f"overlap must be in (-inf, 1) but got {overlap}") + + # Handle potentially overlapping slides. + step_size = round((1 - overlap) * patch_size) + logger.info(f"Patches are {patch_size} px, with step size of {step_size} px.") + # Make an array of Nx2, where each row is (x, y) centroid of the patch. tile_centroids_arr: npt.NDArray[np.int_] = np.array( list( itertools.product( - range(0 + half_patch_size, slide_width, patch_size), - range(0 + half_patch_size, slide_height, patch_size), + range(0 + half_patch_size, slide_width, step_size), + range(0 + half_patch_size, slide_height, step_size), ) ) ) From c974d1b563683e836bdb9dd15729622886cf0c33 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:57:00 -0500 Subject: [PATCH 13/15] save jsons with geojson ext --- wsinfer/write_geojson.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wsinfer/write_geojson.py b/wsinfer/write_geojson.py index ecd52bc..1ab072a 100644 --- a/wsinfer/write_geojson.py +++ b/wsinfer/write_geojson.py @@ -71,7 +71,7 @@ def make_geojson(csv: Path, results_dir: Path) -> None: if not prob_cols: raise KeyError("Did not find any columns with prob_ prefix.") geojson = _dataframe_to_geojson(df, prob_cols) - with open(results_dir / "model-outputs-geojson" / f"{filename}.json", "w") as f: + with open(results_dir / "model-outputs-geojson" / f"{filename}.geojson", "w") as f: json.dump(geojson, f) @@ -95,7 +95,7 @@ def write_geojsons(csvs: list[Path], results_dir: Path, num_workers: int) -> Non "that contains model-outputs, masks, and patches." ) if output.exists(): - geojsons = list((results_dir / "model-outputs-geojson").glob("*.json")) + geojsons = list((results_dir / "model-outputs-geojson").glob("*.geojson")) # Makes a list of filenames for both geojsons and csvs geojson_filenames = [filename.stem for filename in geojsons] From da971951d2ef804ba10907ab7430db6c2909c489 Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 13:57:11 -0500 Subject: [PATCH 14/15] print some logs --- wsinfer/cli/infer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/wsinfer/cli/infer.py b/wsinfer/cli/infer.py index 705ce0c..9b937cd 100644 --- a/wsinfer/cli/infer.py +++ b/wsinfer/cli/infer.py @@ -447,9 +447,12 @@ def run( with open(run_metadata_outpath, "w") as f: json.dump(run_metadata, f, indent=2) - click.secho("Finished.", fg="green") - + click.echo("Writing inference results to GeoJSON files") csvs = list((results_dir / "model-outputs-csv").glob("*.csv")) write_geojsons(csvs, results_dir, num_workers) + if qupath: + click.echo("Creating QuPath project with results") make_qupath_project(wsi_dir, results_dir) + + click.secho("Finished.", fg="green") From 39d8e9d79768010898b9caf4348d098d7d4d16ff Mon Sep 17 00:00:00 2001 From: kaczmarj Date: Thu, 22 Feb 2024 14:17:08 -0500 Subject: [PATCH 15/15] use geojson ext for purple.json --- tests/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index a6c47d0..b4d2b70 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -145,7 +145,7 @@ def test_cli_run_with_registered_models( geojson_dir = results_dir / "model-outputs-geojson" # result = runner.invoke(cli, ["togeojson", str(results_dir), str(geojson_dir)]) assert result.exit_code == 0 - with open(geojson_dir / "purple.json") as f: + with open(geojson_dir / "purple.geojson") as f: d: geojsonlib.GeoJSON = geojsonlib.load(f) assert d.is_valid, "geojson not valid!" assert len(d["features"]) == len(df_ref)