diff --git a/docs/source/main/model_zoo/MODEL_ZOO.md b/docs/source/main/model_zoo/MODEL_ZOO.md
index 7500e21..6c91376 100644
--- a/docs/source/main/model_zoo/MODEL_ZOO.md
+++ b/docs/source/main/model_zoo/MODEL_ZOO.md
@@ -21,6 +21,13 @@ The [Model ZOO](https://chmura.put.poznan.pl/s/2pJk4izRurzQwu3) is a collection
| | | | | |
| | | | | |
+## Recognition models
+
+| Model | Input size | CM/PX | Description | Example image |
+|---------|---|---|---|---|
+| NAIP Place recognition | 224 | 100 | ConvNeXt nano trained using SimSiam onn NAIP imagery | |
+| | | | | |
+
## Object detection models
| Model | Input size | CM/PX | Description | Example image |
diff --git a/src/deepness/common/processing_parameters/map_processing_parameters.py b/src/deepness/common/processing_parameters/map_processing_parameters.py
index 8f2cf23..d461082 100644
--- a/src/deepness/common/processing_parameters/map_processing_parameters.py
+++ b/src/deepness/common/processing_parameters/map_processing_parameters.py
@@ -20,6 +20,7 @@ class ModelOutputFormat(enum.Enum):
ALL_CLASSES_AS_SEPARATE_LAYERS = 'All classes as separate layers'
CLASSES_AS_SEPARATE_LAYERS_WITHOUT_ZERO_CLASS = 'Classes as separate layers (without 0 class)'
ONLY_SINGLE_CLASS_AS_LAYER = 'Single class as a vector layer'
+ RECOGNITION_RESULT = 'Cosine distance between query image and map'
@classmethod
def get_all_names(cls):
diff --git a/src/deepness/common/processing_parameters/recognition_parameters.py b/src/deepness/common/processing_parameters/recognition_parameters.py
new file mode 100644
index 0000000..e350b68
--- /dev/null
+++ b/src/deepness/common/processing_parameters/recognition_parameters.py
@@ -0,0 +1,17 @@
+import enum
+from dataclasses import dataclass
+from typing import Optional
+
+from deepness.common.processing_parameters.map_processing_parameters import \
+ MapProcessingParameters
+from deepness.processing.models.model_base import ModelBase
+
+
+@dataclass
+class RecognitionParameters(MapProcessingParameters):
+ """
+ Parameters for Inference of Recognition model (including pre/post-processing) obtained from UI.
+ """
+
+ query_image_path: str # path to query image
+ model: ModelBase # wrapper of the loaded model
diff --git a/src/deepness/deepness_dockwidget.py b/src/deepness/deepness_dockwidget.py
index 3044d9c..88f409a 100644
--- a/src/deepness/deepness_dockwidget.py
+++ b/src/deepness/deepness_dockwidget.py
@@ -18,6 +18,7 @@
from deepness.common.processing_parameters.detection_parameters import DetectionParameters, DetectorType
from deepness.common.processing_parameters.map_processing_parameters import (MapProcessingParameters, ModelOutputFormat,
ProcessedAreaType)
+from deepness.common.processing_parameters.recognition_parameters import RecognitionParameters
from deepness.common.processing_parameters.regression_parameters import RegressionParameters
from deepness.common.processing_parameters.segmentation_parameters import SegmentationParameters
from deepness.common.processing_parameters.superresolution_parameters import SuperresolutionParameters
@@ -196,6 +197,7 @@ def get_selected_processed_area_type(self) -> ProcessedAreaType:
def _create_connections(self):
self.pushButton_runInference.clicked.connect(self._run_inference)
self.pushButton_runTrainingDataExport.clicked.connect(self._run_training_data_export)
+ self.pushButton_browseQueryImagePath.clicked.connect(self._browse_query_image_path)
self.pushButton_browseModelPath.clicked.connect(self._browse_model_path)
self.comboBox_processedAreaSelection.currentIndexChanged.connect(self._set_processed_area_mask_options)
self.comboBox_modelType.currentIndexChanged.connect(self._model_type_changed)
@@ -216,6 +218,7 @@ def _model_type_changed(self):
detection_enabled = False
regression_enabled = False
superresolution_enabled = False
+ recognition_enabled = False
if model_type == ModelType.SEGMENTATION:
segmentation_enabled = True
@@ -225,6 +228,8 @@ def _model_type_changed(self):
regression_enabled = True
elif model_type == ModelType.SUPERRESOLUTION:
superresolution_enabled = True
+ elif model_type == ModelType.RECOGNITION:
+ recognition_enabled = True
else:
raise Exception(f"Unsupported model type ({model_type})!")
@@ -232,8 +237,10 @@ def _model_type_changed(self):
self.mGroupBox_detectionParameters.setVisible(detection_enabled)
self.mGroupBox_regressionParameters.setVisible(regression_enabled)
self.mGroupBox_superresolutionParameters.setVisible(superresolution_enabled)
- # Disable output format options for super-resolution models.
- self.mGroupBox_6.setEnabled(not superresolution_enabled)
+ self.mGroupBox_recognitionParameters.setVisible(recognition_enabled)
+ # Disable output format options for super-resolution or recognition models.
+ if recognition_enabled or superresolution_enabled:
+ self.mGroupBox_6.setEnabled(False)
def _detector_type_changed(self):
detector_type = DetectorType(self.comboBox_detectorType.currentText())
@@ -268,6 +275,16 @@ def _browse_model_path(self):
self.lineEdit_modelPath.setText(file_path)
self._load_model_and_display_info()
+ def _browse_query_image_path(self):
+ file_path, _ = QFileDialog.getOpenFileName(
+ self,
+ "Select image file...",
+ os.path.expanduser("~"),
+ "All files (*.*)",
+ )
+ if file_path:
+ self.lineEdit_recognitionPath.setText(file_path)
+
def _load_default_model_parameters(self):
"""
Load the default parameters from model metadata
@@ -478,6 +495,8 @@ def get_inference_parameters(self) -> MapProcessingParameters:
params = self.get_regression_parameters(map_processing_parameters)
elif model_type == ModelType.SUPERRESOLUTION:
params = self.get_superresolution_parameters(map_processing_parameters)
+ elif model_type == ModelType.RECOGNITION:
+ params = self.get_recognition_parameters(map_processing_parameters)
elif model_type == ModelType.DETECTION:
params = self.get_detection_parameters(map_processing_parameters)
@@ -515,6 +534,14 @@ def get_superresolution_parameters(self, map_processing_parameters: MapProcessin
)
return params
+ def get_recognition_parameters(self, map_processing_parameters: MapProcessingParameters) -> RecognitionParameters:
+ params = RecognitionParameters(
+ **map_processing_parameters.__dict__,
+ model=self._model,
+ query_image_path=self.lineEdit_recognitionPath.text(),
+ )
+ return params
+
def get_detection_parameters(self, map_processing_parameters: MapProcessingParameters) -> DetectionParameters:
params = DetectionParameters(
diff --git a/src/deepness/deepness_dockwidget.ui b/src/deepness/deepness_dockwidget.ui
index 0e4aaa1..0b797d5 100644
--- a/src/deepness/deepness_dockwidget.ui
+++ b/src/deepness/deepness_dockwidget.ui
@@ -24,9 +24,9 @@
0
- 0
+ -325
452
- 1621
+ 1742
@@ -626,6 +626,51 @@
+ -
+
+
+
+ 0
+ 0
+
+
+
+ Recognition parameters
+
+
+
-
+
+
+
+ 75
+ true
+
+
+
+ NOTE: Applicable only if a recognition model is used
+
+
+
+ -
+
+
+ Image to localize path:
+
+
+
+ -
+
+
+ -
+
+
+ Browse
+
+
+
+
+
+
-
diff --git a/src/deepness/processing/map_processor/map_processor_recognition.py b/src/deepness/processing/map_processor/map_processor_recognition.py
new file mode 100644
index 0000000..08f6dd7
--- /dev/null
+++ b/src/deepness/processing/map_processor/map_processor_recognition.py
@@ -0,0 +1,219 @@
+""" This file implements map processing for Recognition model """
+
+import os
+import uuid
+from typing import List
+
+import numpy as np
+from deepness.common.lazy_package_loader import LazyPackageLoader
+from deepness.common.misc import TMP_DIR_PATH
+from deepness.common.processing_parameters.recognition_parameters import \
+ RecognitionParameters
+from deepness.processing.map_processor.map_processing_result import (
+ MapProcessingResult, MapProcessingResultCanceled,
+ MapProcessingResultSuccess)
+from deepness.processing.map_processor.map_processor_with_model import \
+ MapProcessorWithModel
+from numpy.linalg import norm
+from osgeo import gdal, osr
+from qgis.core import QgsProject, QgsRasterLayer
+
+cv2 = LazyPackageLoader('cv2')
+
+
+class MapProcessorRecognition(MapProcessorWithModel):
+ """
+ MapProcessor specialized for Recognition model
+ """
+
+ def __init__(self, params: RecognitionParameters, **kwargs):
+ super().__init__(params=params, model=params.model, **kwargs)
+ self.recognition_parameters = params
+ self.model = params.model
+ self._result_imgs = None
+
+ def get_result_imgs(self):
+ return self._result_imgs
+
+ def _run(self) -> MapProcessingResult:
+ try:
+ print("*" * 80)
+ print(self.recognition_parameters.query_image_path)
+ query_img = cv2.imread(self.recognition_parameters.query_image_path)
+ except Exception as err:
+ print(err)
+ raise RuntimeError("unable to open image")
+
+ query_img_emb = self.model.process(query_img)[0]
+
+ final_shape_px = (
+ self.img_size_y_pixels,
+ self.img_size_x_pixels,
+ )
+
+ stride = self.stride_px
+ full_result_img = np.zeros(final_shape_px, np.float32)
+ mask = np.zeros_like(full_result_img, dtype=np.int16)
+ highest = 0
+ for tile_img, tile_params in self.tiles_generator():
+ if self.isCanceled():
+ return MapProcessingResultCanceled()
+
+ # See note in the class description why are we adding/subtracting 1 here
+ tile_result = self._process_tile(tile_img)[0]
+
+ # cosine similarity
+ cossim = np.dot(query_img_emb[0],tile_result[0])/(norm(query_img_emb[0])*norm(tile_result[0]))
+
+ x_bin = tile_params.x_bin_number
+ y_bin = tile_params.y_bin_number
+ size = self.params.tile_size_px
+ if cossim > highest:
+ highest = cossim
+ x_high = x_bin
+ y_high = y_bin
+ full_result_img[y_bin*stride:y_bin*stride+size, x_bin*stride:x_bin*stride +size] += cossim
+ mask[y_bin*stride:y_bin*stride+size, x_bin*stride:x_bin*stride +size] += 1
+
+ full_result_img = full_result_img/mask
+ self._result_img = full_result_img
+
+ self._create_rlayers_from_images_for_base_extent(self._result_img, x_high, y_high, size, stride)#*255).astype(int))
+ result_message = self._create_result_message(self._result_img, x_high*self.params.tile_size_px, y_high*self.params.tile_size_px)
+ return MapProcessingResultSuccess(result_message)
+
+ def _create_result_message(self, result_img: List[np.ndarray], x_high, y_high) -> str:
+ txt = f"Recognition ended, best result found at {x_high}, {y_high}, {result_img.shape}"
+ return txt
+
+ def limit_extended_extent_image_to_base_extent_with_mask(self, full_img):
+ """
+ Limit an image which is for extended_extent to the base_extent image.
+ If a limiting polygon was used for processing, it will be also applied.
+ :param full_img:
+ :return:
+ """
+ # TODO look for some inplace operation to save memory
+ # cv2.copyTo(src=full_img, mask=area_mask_img, dst=full_img) # this doesn't work due to implementation details
+ # full_img = cv2.copyTo(src=full_img, mask=self.area_mask_img)
+
+ b = self.base_extent_bbox_in_full_image
+ result_img = full_img[
+ int(b.y_min * self.recognition_parameters.scale_factor) : int(
+ b.y_max * self.recognition_parameters.scale_factor
+ ),
+ int(b.x_min * self.recognition_parameters.scale_factor) : int(
+ b.x_max * self.recognition_parameters.scale_factor
+ ),
+ :,
+ ]
+ return result_img
+
+ def load_rlayer_from_file(self, file_path):
+ """
+ Create raster layer from tif file
+ """
+ file_name = os.path.basename(file_path)
+ base_file_name = file_name.split("___")[
+ 0
+ ] # we remove the random_id string we created a moment ago
+ rlayer = QgsRasterLayer(file_path, base_file_name)
+ if rlayer.width() == 0:
+ raise Exception(
+ "0 width - rlayer not loaded properly. Probably invalid file path?"
+ )
+ rlayer.setCrs(self.rlayer.crs())
+ return rlayer
+
+ def _create_rlayers_from_images_for_base_extent(
+ self, result_img: np.ndarray,
+ x_high,
+ y_high,
+ size,
+ stride
+ ):
+ group = (
+ QgsProject.instance()
+ .layerTreeRoot()
+ .insertGroup(0, "Cosine similarity score")
+ )
+
+ min_value = np.min(result_img)
+ y = y_high * stride
+ x = x_high * stride
+ print(f"{x},{y}")
+
+ result_img[y, x:x+size-1] = 1
+ result_img[y+size-1, x:x+size-1] = 1
+ result_img[y:y+size-1, x] = 1
+ result_img[y:y+size-1, x+size-1] = 1
+
+ # TODO: We are creating a new file for each layer.
+ # Maybe can we pass ownership of this file to QGis?
+ # Or maybe even create vlayer directly from array, without a file?
+
+ # for i, channel_id in enumerate(["Super Resolution"]):
+ random_id = str(uuid.uuid4()).replace("-", "")
+ file_path = os.path.join(TMP_DIR_PATH, f"{random_id}.tif")
+ self.save_result_img_as_tif(file_path=file_path, img=np.expand_dims(result_img, axis=2))
+
+ rlayer = self.load_rlayer_from_file(file_path)
+ OUTPUT_RLAYER_OPACITY = 0.5
+ rlayer.renderer().setOpacity(OUTPUT_RLAYER_OPACITY)
+
+ QgsProject.instance().addMapLayer(rlayer, False)
+ group.addLayer(rlayer)
+
+ def save_result_img_as_tif(self, file_path: str, img: np.ndarray):
+ """
+ As we cannot pass easily an numpy array to be displayed as raster layer, we create temporary geotif files,
+ which will be loaded as layer later on
+
+ Partially based on example from:
+ https://gis.stackexchange.com/questions/82031/gdal-python-set-projection-of-a-raster-not-working
+ """
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+ extent = self.base_extent
+ crs = self.rlayer.crs()
+
+ geo_transform = [
+ extent.xMinimum(),
+ self.rlayer_units_per_pixel, # / self.recognition_parameters.scale_factor,
+ 0,
+ extent.yMaximum(),
+ 0,
+ -self.rlayer_units_per_pixel, # / self.recognition_parameters.scale_factor,
+ ]
+
+ driver = gdal.GetDriverByName("GTiff")
+ n_lines = img.shape[0]
+ n_cols = img.shape[1]
+ n_chanels = img.shape[2]
+ # data_type = gdal.GDT_Byte
+ data_type = gdal.GDT_Float32
+ grid_data = driver.Create(
+ "grid_data", n_cols, n_lines, n_chanels, data_type
+ ) # , options)
+ # loop over chanels
+ for i in range(1, img.shape[2] + 1):
+ grid_data.GetRasterBand(i).WriteArray(img[:, :, i - 1])
+
+ # crs().srsid() - maybe we can use the ID directly - but how?
+ # srs.ImportFromEPSG()
+ srs = osr.SpatialReference()
+ srs.SetFromUserInput(crs.authid())
+
+ grid_data.SetProjection(srs.ExportToWkt())
+ grid_data.SetGeoTransform(geo_transform)
+ driver.CreateCopy(file_path, grid_data, 0)
+
+ def _process_tile(self, tile_img: np.ndarray) -> np.ndarray:
+ result = self.model.process(tile_img)
+ # result[np.isnan(result)] = 0
+
+ # NOTE - currently we are saving result as float32, so we are losing some accuraccy.
+ # result = np.clip(result, 0, 255) # old version with uint8_t - not used anymore
+ # result = result.astype(np.float32)
+
+ return result
diff --git a/src/deepness/processing/map_processor/map_processor_with_model.py b/src/deepness/processing/map_processor/map_processor_with_model.py
index 7f20b82..b5a8c5e 100644
--- a/src/deepness/processing/map_processor/map_processor_with_model.py
+++ b/src/deepness/processing/map_processor/map_processor_with_model.py
@@ -37,6 +37,8 @@ def _get_indexes_of_model_output_channels_to_create(self) -> List[int]:
output_channels = list(range(0, self.model.get_number_of_output_channels()))
elif self.params.model_output_format == ModelOutputFormat.CLASSES_AS_SEPARATE_LAYERS_WITHOUT_ZERO_CLASS:
output_channels = list(range(1, self.model.get_number_of_output_channels()))
+ elif self.params.model_output_format == ModelOutputFormat.RECOGNITION_RESULT:
+ output_channels = 1
else:
raise Exception(f"Unhandled model output format {self.params.model_output_format}")
diff --git a/src/deepness/processing/models/model_types.py b/src/deepness/processing/models/model_types.py
index e7f2839..463a7d6 100644
--- a/src/deepness/processing/models/model_types.py
+++ b/src/deepness/processing/models/model_types.py
@@ -3,14 +3,17 @@
from deepness.common.processing_parameters.detection_parameters import DetectionParameters
from deepness.common.processing_parameters.map_processing_parameters import MapProcessingParameters
+from deepness.common.processing_parameters.recognition_parameters import RecognitionParameters
from deepness.common.processing_parameters.regression_parameters import RegressionParameters
from deepness.common.processing_parameters.segmentation_parameters import SegmentationParameters
from deepness.common.processing_parameters.superresolution_parameters import SuperresolutionParameters
from deepness.processing.map_processor.map_processor_detection import MapProcessorDetection
+from deepness.processing.map_processor.map_processor_recognition import MapProcessorRecognition
from deepness.processing.map_processor.map_processor_regression import MapProcessorRegression
from deepness.processing.map_processor.map_processor_segmentation import MapProcessorSegmentation
from deepness.processing.map_processor.map_processor_superresolution import MapProcessorSuperresolution
from deepness.processing.models.detector import Detector
+from deepness.processing.models.recognition import Recognition
from deepness.processing.models.regressor import Regressor
from deepness.processing.models.segmentor import Segmentor
from deepness.processing.models.superresolution import Superresolution
@@ -21,6 +24,7 @@ class ModelType(enum.Enum):
REGRESSION = Regressor.get_class_display_name()
DETECTION = Detector.get_class_display_name()
SUPERRESOLUTION = Superresolution.get_class_display_name()
+ RECOGNITION = Recognition.get_class_display_name()
@dataclass
@@ -56,6 +60,12 @@ def get_model_definitions(cls):
model_class=Superresolution,
parameters_class=SuperresolutionParameters,
map_processor_class=MapProcessorSuperresolution,
+ ), # recognition
+ cls(
+ model_type=ModelType.RECOGNITION,
+ model_class=Recognition,
+ parameters_class=RecognitionParameters,
+ map_processor_class=MapProcessorRecognition,
)
]
diff --git a/src/deepness/processing/models/recognition.py b/src/deepness/processing/models/recognition.py
new file mode 100644
index 0000000..efe1fe0
--- /dev/null
+++ b/src/deepness/processing/models/recognition.py
@@ -0,0 +1,169 @@
+""" Module including the class for the recognition of the images
+"""
+import logging
+from typing import List
+
+import numpy as np
+from deepness.common.lazy_package_loader import LazyPackageLoader
+from deepness.processing.models.model_base import ModelBase
+
+cv2 = LazyPackageLoader('cv2')
+
+IMG_SIZE = 224
+mean=(0.485, 0.456, 0.406)
+std=(0.229, 0.224, 0.225)
+
+def normalize(img, mean, std, max_pixel_value=255.0):
+ mean = np.array(mean, dtype=np.float32)
+ mean *= max_pixel_value
+
+ std = np.array(std, dtype=np.float32)
+ std *= max_pixel_value
+
+ denominator = np.reciprocal(std, dtype=np.float32)
+
+ img = img.astype(np.float32)
+ img -= mean
+ img *= denominator
+
+ return img
+
+class Recognition(ModelBase):
+ """Class implements recognition model
+
+ Recognition model is used to predict class confidence per pixel of the image.
+ """
+
+ def __init__(self, model_file_path: str):
+ """
+
+ Parameters
+ ----------
+ model_file_path : str
+ Path to the model file
+ """
+ super(Recognition, self).__init__(model_file_path)
+
+
+ def process(self, img):
+ """Process a single tile image
+
+ Parameters
+ ----------
+ img : np.ndarray
+ Image to process ([TILE_SIZE x TILE_SIZE x channels], type uint8, values 0 to 255)
+
+ Returns
+ -------
+ np.ndarray
+ embeddings
+ """
+ input_batch = self.preprocessing(img)
+ model_output = self.sess.run(
+ output_names=None, input_feed={self.input_name: input_batch}
+ )
+ res = self.postprocessing(model_output)
+ return res
+
+
+ def preprocessing(self, image: np.ndarray):
+ """Preprocess image before inference
+
+ Parameters
+ ----------
+ image : np.ndarray
+ Image to preprocess in RGB format
+
+ Returns
+ -------
+ np.ndarray
+ Preprocessed image
+ """
+ img = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
+ img = img[:, :, : self.input_shape[-3]]
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+ input_batch = img.astype("float32")
+ input_batch = normalize(input_batch, mean, std, max_pixel_value=255.0)
+ input_batch = input_batch.transpose(2, 0, 1)
+ input_batch = np.expand_dims(input_batch, axis=0)
+
+ return input_batch
+
+ def postprocessing(self, model_output: List) -> np.ndarray:
+ """Postprocess the model output.
+ Function returns the array of embeddings
+
+ Anything to do here?
+
+ Parameters
+ ----------
+ model_output : List
+ Output embeddings from the (Recognition) model
+
+ Returns
+ -------
+ np.ndarray
+ Same as input
+ """
+ # TODO - compute cosine similarity to self.query_img_emb
+ #cannot, won't work for query image
+
+ return np.array(model_output)
+
+ def get_number_of_output_channels(self):
+ """Returns model's number of class
+
+ Returns
+ -------
+ int
+ Number of channels in the output layer
+ """
+ logging.warning(f"outputs_layers: {self.outputs_layers}")
+ logging.info(f"outputs_layers: {self.outputs_layers}")
+ print(f"outputs_layers: {self.outputs_layers}")
+
+ if len(self.outputs_layers) == 1:
+ return self.outputs_layers[0].shape[1]
+ else:
+ raise NotImplementedError(
+ "Model with multiple output layers is not supported! Use only one output layer."
+ )
+
+ @classmethod
+ def get_class_display_name(cls):
+ """Returns the name of the class to be displayed in the GUI
+
+ Returns
+ -------
+ str
+ Name of the class
+ """
+ return cls.__name__
+
+ def check_loaded_model_outputs(self):
+ """Checks if the model outputs are valid
+
+ Valid means that:
+ - the model has only one output
+ - the output is 2D (N,C)
+ - the batch size is 1
+
+ """
+ if len(self.outputs_layers) == 1:
+ shape = self.outputs_layers[0].shape
+
+ if len(shape) != 2:
+ raise Exception(
+ f"Recognition model output should have 4 dimensions: (B,C,H,W). Has {shape}"
+ )
+
+ if shape[0] != 1:
+ raise Exception(
+ f"Recognition model can handle only 1-Batch outputs. Has {shape}"
+ )
+
+ else:
+ raise NotImplementedError(
+ "Model with multiple output layers is not supported! Use only one output layer."
+ )