diff --git a/README.md b/README.md index 6c2a7c61..ca659150 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ Simple way to leverage the class-specific activation of convolutional layers in * [Prerequisites](#prerequisites) * [Installation](#installation) * [Usage](#usage) -* [Technical Roadmap](#technical-roadmap) * [Documentation](#documentation) * [Contributing](#contributing) * [Credits](#credits) @@ -58,20 +57,6 @@ python scripts/cam_example.py --model resnet50 --class-idx 232 - - -## Technical roadmap - -The project is currently under development, here are the objectives for the next releases: - -- [x] Parallel CAMs: enable batch processing. -- [x] Benchmark: compare class activation map computations for different architectures. -- [ ] Signature improvement: retrieve automatically the specific required layer names. -- [ ] Refined RPN: create a region proposal network using CAM. -- [ ] Task transfer: turn a well-trained classifier into an object detector. - - - ## Documentation The full package documentation is available [here](https://frgfm.github.io/torch-cam/) for detailed specifications. The documentation was built with [Sphinx](sphinx-doc.org) using a [theme](github.com/readthedocs/sphinx_rtd_theme) provided by [Read the Docs](readthedocs.org). diff --git a/scripts/cam_example.py b/scripts/cam_example.py index 4a28e51e..a1ce5a1e 100644 --- a/scripts/cam_example.py +++ b/scripts/cam_example.py @@ -1,10 +1,10 @@ #!usr/bin/python -# -*- coding: utf-8 -*- """ CAM visualization """ +import math import argparse from io import BytesIO @@ -18,18 +18,18 @@ from torchcam.cams import CAM, GradCAM, GradCAMpp, SmoothGradCAMpp, ScoreCAM, SSCAM, ISCAM from torchcam.utils import overlay_mask -VGG_CONFIG = {_vgg: dict(input_layer='features', conv_layer='features') +VGG_CONFIG = {_vgg: dict(conv_layer='features') for _vgg in models.vgg.__dict__.keys()} -RESNET_CONFIG = {_resnet: dict(input_layer='conv1', conv_layer='layer4', fc_layer='fc') +RESNET_CONFIG = {_resnet: dict(conv_layer='layer4', fc_layer='fc') for _resnet in models.resnet.__dict__.keys()} -DENSENET_CONFIG = {_densenet: dict(input_layer='features', conv_layer='features', fc_layer='classifier') +DENSENET_CONFIG = {_densenet: dict(conv_layer='features', fc_layer='classifier') for _densenet in models.densenet.__dict__.keys()} MODEL_CONFIG = { **VGG_CONFIG, **RESNET_CONFIG, **DENSENET_CONFIG, - 'mobilenet_v2': dict(input_layer='features', conv_layer='features') + 'mobilenet_v2': dict(conv_layer='features') } @@ -43,7 +43,6 @@ def main(args): # Pretrained imagenet model model = models.__dict__[args.model](pretrained=True).eval().to(device=device) conv_layer = MODEL_CONFIG[args.model]['conv_layer'] - input_layer = MODEL_CONFIG[args.model]['input_layer'] fc_layer = MODEL_CONFIG[args.model]['fc_layer'] # Image @@ -57,15 +56,17 @@ def main(args): # Hook the corresponding layer in the model cam_extractors = [CAM(model, conv_layer, fc_layer), GradCAM(model, conv_layer), - GradCAMpp(model, conv_layer), SmoothGradCAMpp(model, conv_layer, input_layer), - ScoreCAM(model, conv_layer, input_layer), SSCAM(model, conv_layer, input_layer), - ISCAM(model, conv_layer, input_layer)] + GradCAMpp(model, conv_layer), SmoothGradCAMpp(model, conv_layer), + ScoreCAM(model, conv_layer), SSCAM(model, conv_layer), + ISCAM(model, conv_layer)] # Don't trigger all hooks for extractor in cam_extractors: extractor._hooks_enabled = False - fig, axes = plt.subplots(1, len(cam_extractors), figsize=(7, 2)) + num_rows = 2 + num_cols = math.ceil(len(cam_extractors) / num_rows) + _, axes = plt.subplots(num_rows, num_cols, figsize=(6, 4)) for idx, extractor in enumerate(cam_extractors): extractor._hooks_enabled = True model.zero_grad() @@ -76,6 +77,7 @@ def main(args): # Use the hooked data to compute activation map activation_map = extractor(class_idx, scores).cpu() + # Clean data extractor.clear_hooks() extractor._hooks_enabled = False @@ -85,9 +87,13 @@ def main(args): # Plot the result result = overlay_mask(pil_img, heatmap) - axes[idx].imshow(result) - axes[idx].axis('off') - axes[idx].set_title(extractor.__class__.__name__, size=8) + axes[idx // num_cols][idx % num_cols].imshow(result) + axes[idx // num_cols][idx % num_cols].set_title(extractor.__class__.__name__, size=8) + + # Clear axes + for row in axes: + for ax in row: + ax.axis('off') plt.tight_layout() if args.savefig: diff --git a/test/test_cams.py b/test/test_cams.py index 7f7359b9..6655ade7 100644 --- a/test/test_cams.py +++ b/test/test_cams.py @@ -4,6 +4,7 @@ import requests import torch from PIL import Image +from torch import nn from torchvision.models import mobilenet_v2, resnet18 from torchvision.transforms.functional import normalize, resize, to_tensor @@ -20,7 +21,7 @@ def _forward(model, input_tensor): return scores -class Tester(unittest.TestCase): +class CAMCoreTester(unittest.TestCase): def _verify_cam(self, cam): # Simple verifications self.assertIsInstance(cam, torch.Tensor) @@ -67,76 +68,91 @@ def _test_extractor(self, extractor, model): def _test_cam(self, name): # Get a pretrained model - model = resnet18(pretrained=False).eval() - conv_layer = 'layer4' - input_layer = 'conv1' - fc_layer = 'fc' - - # Hook the corresponding layer in the model - extractor = cams.__dict__[name](model, conv_layer, fc_layer if name == 'CAM' else input_layer) - - self._test_extractor(extractor, model) - - def _test_cam_arbitrary_layer(self, name): - model = resnet18(pretrained=False).eval() conv_layer = 'layer4.1.relu' - input_layer = 'conv1' - fc_layer = 'fc' # Hook the corresponding layer in the model - extractor = cams.__dict__[name](model, conv_layer, fc_layer if name == 'CAM' else input_layer) + extractor = cams.__dict__[name](model, conv_layer) - self._test_extractor(extractor, model) + with torch.no_grad(): + self._test_extractor(extractor, model) def _test_gradcam(self, name): # Get a pretrained model model = mobilenet_v2(pretrained=False) - conv_layer = 'features' + conv_layer = 'features.17.conv.3' # Hook the corresponding layer in the model extractor = cams.__dict__[name](model, conv_layer) self._test_extractor(extractor, model) - def _test_gradcam_arbitrary_layer(self, name): + def test_smooth_gradcampp(self): - model = mobilenet_v2(pretrained=False) - conv_layer = 'features.17.conv.3' + # Get a pretrained model + model = mobilenet_v2(pretrained=False).eval() # Hook the corresponding layer in the model - extractor = cams.__dict__[name](model, conv_layer) + extractor = cams.SmoothGradCAMpp(model) self._test_extractor(extractor, model) - def test_smooth_gradcampp(self): - # Get a pretrained model - model = mobilenet_v2(pretrained=False) - conv_layer = 'features' - input_layer = 'features' +class CAMUtilsTester(unittest.TestCase): - # Hook the corresponding layer in the model - extractor = cams.SmoothGradCAMpp(model, conv_layer, input_layer) + @staticmethod + def _get_custom_module(): - self._test_extractor(extractor, model) + mod = nn.Sequential( + nn.Sequential( + nn.Conv2d(3, 8, 3, 1), + nn.ReLU(), + nn.Conv2d(8, 16, 3, 1), + nn.ReLU(), + nn.AdaptiveAvgPool2d((1, 1)) + ), + nn.Flatten(1), + nn.Linear(16, 1) + ) + return mod + + def test_locate_candidate_layer(self): + + # ResNet-18 + mod = resnet18().eval() + self.assertEqual(cams.utils.locate_candidate_layer(mod), 'layer4') + + # Custom model + mod = self._get_custom_module() + + self.assertEqual(cams.utils.locate_candidate_layer(mod), '0.3') + # Check that the model is switched back to its origin mode afterwards + self.assertTrue(mod.training) + + def test_locate_linear_layer(self): + + # ResNet-18 + mod = resnet18().eval() + self.assertEqual(cams.utils.locate_linear_layer(mod), 'fc') + + # Custom model + mod = self._get_custom_module() + self.assertEqual(cams.utils.locate_linear_layer(mod), '2') for cam_extractor in ['CAM', 'ScoreCAM', 'SSCAM', 'ISCAM']: def do_test(self, cam_extractor=cam_extractor): self._test_cam(cam_extractor) - self._test_cam_arbitrary_layer(cam_extractor) - setattr(Tester, "test_" + cam_extractor.lower(), do_test) + setattr(CAMCoreTester, "test_" + cam_extractor.lower(), do_test) for cam_extractor in ['GradCAM', 'GradCAMpp']: def do_test(self, cam_extractor=cam_extractor): self._test_gradcam(cam_extractor) - self._test_gradcam_arbitrary_layer(cam_extractor) - setattr(Tester, "test_" + cam_extractor.lower(), do_test) + setattr(CAMCoreTester, "test_" + cam_extractor.lower(), do_test) if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index c7be1633..1d715c80 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -6,7 +6,7 @@ from torchcam import utils -class Tester(unittest.TestCase): +class UtilsTester(unittest.TestCase): def test_overlay_mask(self): img = Image.fromarray(np.zeros((4, 4, 3)).astype(np.uint8)) diff --git a/torchcam/cams/__init__.py b/torchcam/cams/__init__.py index 41225277..ad26e96e 100644 --- a/torchcam/cams/__init__.py +++ b/torchcam/cams/__init__.py @@ -1,2 +1,3 @@ from .cam import * from .gradcam import * +from .utils import * diff --git a/torchcam/cams/cam.py b/torchcam/cams/cam.py index fe5ae0a8..cffc51aa 100644 --- a/torchcam/cams/cam.py +++ b/torchcam/cams/cam.py @@ -1,9 +1,12 @@ import math +import logging import torch from torch import Tensor from torch import nn import torch.nn.functional as F -from typing import Optional, List +from typing import Optional, List, Tuple + +from .utils import locate_candidate_layer, locate_linear_layer __all__ = ['CAM', 'ScoreCAM', 'SSCAM', 'ISCAM'] @@ -13,26 +16,38 @@ class _CAM: Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: nn.Module, - conv_layer: str + target_layer: Optional[str] = None, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: # Obtain a mapping from module name to module instance for each layer in the model self.submodule_dict = dict(model.named_modules()) - if conv_layer not in self.submodule_dict.keys(): - raise ValueError(f"Unable to find submodule {conv_layer} in the model") + # If the layer is not specified, try automatic resolution + if target_layer is None: + target_layer = locate_candidate_layer(model, input_shape) + # Warn the user of the choice + if isinstance(target_layer, str): + logging.warning(f"no value was provided for `target_layer`, thus set to '{target_layer}'.") + else: + raise ValueError("unable to resolve `target_layer` automatically, please specify its value.") + + if target_layer not in self.submodule_dict.keys(): + raise ValueError(f"Unable to find submodule {target_layer} in the model") + self.target_layer = target_layer self.model = model # Init hooks self.hook_a: Optional[Tensor] = None self.hook_handles: List[torch.utils.hooks.RemovableHandle] = [] # Forward hook - self.hook_handles.append(self.submodule_dict[conv_layer].register_forward_hook(self._hook_a)) + self.hook_handles.append(self.submodule_dict[target_layer].register_forward_hook(self._hook_a)) # Enable hooks self._hooks_enabled = True # Should ReLU be used before normalization @@ -129,7 +144,7 @@ class CAM(_CAM): .. math:: L^{(c)}_{CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big) - where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, and :math:`w_k^{(c)}` is the weight corresponding to class :math:`c` for unit :math:`k` in the fully connected layer.. @@ -144,18 +159,29 @@ class CAM(_CAM): Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer fc_layer: name of the fully convolutional layer + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: nn.Module, - conv_layer: str, - fc_layer: str + target_layer: Optional[str] = None, + fc_layer: Optional[str] = None, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer) + super().__init__(model, target_layer, input_shape) + + # If the layer is not specified, try automatic resolution + if fc_layer is None: + fc_layer = locate_linear_layer(model) + # Warn the user of the choice + if isinstance(fc_layer, str): + logging.warning(f"no value was provided for `fc_layer`, thus set to '{fc_layer}'.") + else: + raise ValueError("unable to resolve `fc_layer` automatically, please specify its value.") # Softmax weight self._fc_weights = self.submodule_dict[fc_layer].weight.data @@ -180,7 +206,7 @@ class ScoreCAM(_CAM): .. math:: w_k^{(c)} = softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b)) - where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, :math:`Y^{(c)}(X)` is the model output score for class :math:`c` before softmax for input :math:`X`, :math:`X_b` is a baseline image, and :math:`M_k` is defined as follows: @@ -195,29 +221,29 @@ class ScoreCAM(_CAM): >>> from torchvision.models import resnet18 >>> from torchcam.cams import ScoreCAM >>> model = resnet18(pretrained=True).eval() - >>> cam = ScoreCAM(model, 'layer4', 'conv1') + >>> cam = ScoreCAM(model, 'layer4') >>> with torch.no_grad(): out = model(input_tensor) >>> cam(class_idx=100) Args: model: input model - conv_layer: name of the last convolutional layer - input_layer: name of the first layer + target_layer: name of the target layer batch_size: batch size used to forward masked inputs + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: nn.Module, - conv_layer: str, - input_layer: str, - batch_size: int = 32 + target_layer: Optional[str] = None, + batch_size: int = 32, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer) + super().__init__(model, target_layer, input_shape) # Input hook - self.hook_handles.append(self.submodule_dict[input_layer].register_forward_pre_hook(self._store_input)) + self.hook_handles.append(model.register_forward_pre_hook(self._store_input)) self.bs = batch_size # Ensure ReLU is applied to CAM before normalization self._relu = True @@ -248,6 +274,9 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso # Disable hook updates self._hooks_enabled = False + # Switch to eval + origin_mode = self.model.training + self.model.eval() # Process by chunk (GPU RAM limitation) for idx in range(math.ceil(weights.shape[0] / self.bs)): @@ -258,6 +287,8 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso # Reenable hook updates self._hooks_enabled = True + # Put back the model in the correct mode + self.model.training = origin_mode return weights @@ -280,7 +311,7 @@ class SSCAM(ScoreCAM): w_k^{(c)} = \\frac{1}{N} \\sum\\limits_1^N softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b)) where :math:`N` is the number of samples used to smooth the weights, - :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, :math:`Y^{(c)}(X)` is the model output score for class :math:`c` before softmax for input :math:`X`, :math:`X_b` is a baseline image, and :math:`M_k` is defined as follows: @@ -297,30 +328,30 @@ class SSCAM(ScoreCAM): >>> from torchvision.models import resnet18 >>> from torchcam.cams import SSCAM >>> model = resnet18(pretrained=True).eval() - >>> cam = SSCAM(model, 'layer4', 'conv1') + >>> cam = SSCAM(model, 'layer4') >>> with torch.no_grad(): out = model(input_tensor) >>> cam(class_idx=100) Args: model: input model - conv_layer: name of the last convolutional layer - input_layer: name of the first layer + target_layer: name of the target layer batch_size: batch size used to forward masked inputs num_samples: number of noisy samples used for weight computation std: standard deviation of the noise added to the normalized activation + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: nn.Module, - conv_layer: str, - input_layer: str, + target_layer: Optional[str] = None, batch_size: int = 32, num_samples: int = 35, - std: float = 2.0 + std: float = 2.0, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer, input_layer, batch_size) + super().__init__(model, target_layer, batch_size, input_shape) self.num_samples = num_samples self.std = std @@ -346,6 +377,9 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso # Disable hook updates self._hooks_enabled = False + # Switch to eval + origin_mode = self.model.training + self.model.eval() for _idx in range(self.num_samples): noisy_m = self._input * (upsampled_a + @@ -363,6 +397,8 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso # Reenable hook updates self._hooks_enabled = True + # Put back the model in the correct mode + self.model.training = origin_mode return weights @@ -385,7 +421,7 @@ class ISCAM(ScoreCAM): w_k^{(c)} = \\sum\\limits_{i=1}^N \\frac{i}{N} softmax(Y^{(c)}(M_k) - Y^{(c)}(X_b)) where :math:`N` is the number of samples used to smooth the weights, - :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, :math:`Y^{(c)}(X)` is the model output score for class :math:`c` before softmax for input :math:`X`, :math:`X_b` is a baseline image, and :math:`M_k` is defined as follows: @@ -402,28 +438,28 @@ class ISCAM(ScoreCAM): >>> from torchvision.models import resnet18 >>> from torchcam.cams import ISSCAM >>> model = resnet18(pretrained=True).eval() - >>> cam = ISCAM(model, 'layer4', 'conv1') + >>> cam = ISCAM(model, 'layer4') >>> with torch.no_grad(): out = model(input_tensor) >>> cam(class_idx=100) Args: model: input model - conv_layer: name of the last convolutional layer - input_layer: name of the first layer + target_layer: name of the target layer batch_size: batch size used to forward masked inputs num_samples: number of noisy samples used for weight computation + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: nn.Module, - conv_layer: str, - input_layer: str, + target_layer: Optional[str] = None, batch_size: int = 32, - num_samples: int = 10 + num_samples: int = 10, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer, input_layer, batch_size) + super().__init__(model, target_layer, batch_size, input_shape) self.num_samples = num_samples @@ -449,6 +485,9 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso self._hooks_enabled = False fmap = torch.zeros((upsampled_a.shape[0], *self._input.shape[1:]), dtype=upsampled_a.dtype, device=upsampled_a.device) + # Switch to eval + origin_mode = self.model.training + self.model.eval() for _idx in range(self.num_samples): fmap += (_idx + 1) / self.num_samples * self._input * upsampled_a @@ -463,5 +502,7 @@ def _get_weights(self, class_idx: int, scores: Optional[Tensor] = None) -> Tenso # Reenable hook updates self._hooks_enabled = True + # Put back the model in the correct mode + self.model.training = origin_mode return weights diff --git a/torchcam/cams/gradcam.py b/torchcam/cams/gradcam.py index f43c06f6..7bf44178 100644 --- a/torchcam/cams/gradcam.py +++ b/torchcam/cams/gradcam.py @@ -1,6 +1,6 @@ import torch from torch import Tensor -from typing import Optional +from typing import Optional, Tuple from .cam import _CAM @@ -12,16 +12,18 @@ class _GradCAM(_CAM): Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: torch.nn.Module, - conv_layer: str + target_layer: Optional[str] = None, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer) + super().__init__(model, target_layer, input_shape) # Init hook self.hook_g: Optional[Tensor] = None # Ensure ReLU is applied before normalization @@ -29,7 +31,7 @@ def __init__( # Model output is used by the extractor self._score_used = True # Backward hook - self.hook_handles.append(self.submodule_dict[conv_layer].register_backward_hook(self._hook_g)) + self.hook_handles.append(self.submodule_dict[self.target_layer].register_backward_hook(self._hook_g)) def _hook_g(self, module: torch.nn.Module, input: Tensor, output: Tensor) -> None: """Gradient hook""" @@ -67,7 +69,7 @@ class GradCAM(_GradCAM): w_k^{(c)} = \\frac{1}{H \\cdot W} \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} - where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, and :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax. @@ -81,7 +83,8 @@ class GradCAM(_GradCAM): Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer + input_shape: shape of the expected input tensor excluding the batch dimension """ def _get_weights(self, class_idx: int, scores: Tensor) -> Tensor: # type: ignore[override] @@ -109,7 +112,7 @@ class GradCAMpp(_GradCAM): w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big) - where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax, and :math:`\\alpha_k^{(c)}(i, j)` being defined as: @@ -132,7 +135,8 @@ class GradCAMpp(_GradCAM): Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer + input_shape: shape of the expected input tensor excluding the batch dimension """ def _get_weights(self, class_idx: int, scores: Tensor) -> Tensor: # type: ignore[override] @@ -166,7 +170,7 @@ class SmoothGradCAMpp(_GradCAM): w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big) - where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at + where :math:`A_k(x, y)` is the activation of node :math:`k` in the target layer of the model at position :math:`(x, y)`, :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax, and :math:`\\alpha_k^{(c)}(i, j)` being defined as: @@ -197,24 +201,27 @@ class SmoothGradCAMpp(_GradCAM): Args: model: input model - conv_layer: name of the last convolutional layer + target_layer: name of the target layer + num_samples: number of samples to use for smoothing + std: standard deviation of the noise + input_shape: shape of the expected input tensor excluding the batch dimension """ def __init__( self, model: torch.nn.Module, - conv_layer: str, - first_layer: str, + target_layer: Optional[str] = None, num_samples: int = 4, - std: float = 0.3 + std: float = 0.3, + input_shape: Tuple[int, ...] = (3, 224, 224), ) -> None: - super().__init__(model, conv_layer) + super().__init__(model, target_layer, input_shape) # Model scores is not used by the extractor self._score_used = False # Input hook - self.hook_handles.append(self.submodule_dict[first_layer].register_forward_pre_hook(self._store_input)) + self.hook_handles.append(model.register_forward_pre_hook(self._store_input)) # Noise distribution self.num_samples = num_samples self.std = std diff --git a/torchcam/cams/utils.py b/torchcam/cams/utils.py new file mode 100644 index 00000000..83b9ceb6 --- /dev/null +++ b/torchcam/cams/utils.py @@ -0,0 +1,75 @@ +import torch +from torch import Tensor +from torch import nn +from typing import List, Optional, Tuple +from functools import partial + +__all__ = ['locate_candidate_layer', 'locate_linear_layer'] + + +def locate_candidate_layer(mod: nn.Module, input_shape: Tuple[int, ...] = (3, 224, 224)) -> Optional[str]: + """Attempts to find a candidate layer to use for CAM extraction + + Args: + mod: the module to inspect + input_shape: the expected shape of input tensor excluding the batch dimension + + Returns: + str: the candidate layer for CAM + """ + + # Set module in eval mode + module_mode = mod.training + mod.eval() + + output_shapes: List[Tuple[Optional[str], Tuple[int, ...]]] = [] + + def _record_output_shape(module: nn.Module, input: Tensor, output: Tensor, name: Optional[str] = None) -> None: + """Activation hook""" + output_shapes.append((name, output.shape)) + + hook_handles: List[torch.utils.hooks.RemovableHandle] = [] + # forward hook on all layers + for n, m in mod.named_modules(): + hook_handles.append(m.register_forward_hook(partial(_record_output_shape, name=n))) + + # forward empty + with torch.no_grad(): + _ = mod(torch.rand(1, *input_shape)) + + # Remove all temporary hooks + for handle in hook_handles: + handle.remove() + + # Put back the model in the corresponding mode + mod.training = module_mode + + # Check output shapes + candidate_layer = None + for layer_name, output_shape in output_shapes: + # Stop before flattening or global pooling + if len(output_shape) != (len(input_shape) + 1) or all(v == 1 for v in output_shape[2:]): + break + else: + candidate_layer = layer_name + + return candidate_layer + + +def locate_linear_layer(mod: nn.Module) -> Optional[str]: + """Attempts to find a fully connecter layer to use for CAM extraction + + Args: + mod: the module to inspect + + Returns: + str: the candidate layer + """ + + candidate_layer = None + for layer_name, m in mod.named_modules(): + if isinstance(m, nn.Linear): + candidate_layer = layer_name + break + + return candidate_layer