diff --git a/docs/source/main/model_zoo/MODEL_ZOO.md b/docs/source/main/model_zoo/MODEL_ZOO.md index 7500e21..6c91376 100644 --- a/docs/source/main/model_zoo/MODEL_ZOO.md +++ b/docs/source/main/model_zoo/MODEL_ZOO.md @@ -21,6 +21,13 @@ The [Model ZOO](https://chmura.put.poznan.pl/s/2pJk4izRurzQwu3) is a collection | | | | | | | | | | | | +## Recognition models + +| Model | Input size | CM/PX | Description | Example image | +|---------|---|---|---|---| +| NAIP Place recognition | 224 | 100 | ConvNeXt nano trained using SimSiam onn NAIP imagery | | +| | | | | | + ## Object detection models | Model | Input size | CM/PX | Description | Example image | diff --git a/src/deepness/common/processing_parameters/map_processing_parameters.py b/src/deepness/common/processing_parameters/map_processing_parameters.py index 8f2cf23..d461082 100644 --- a/src/deepness/common/processing_parameters/map_processing_parameters.py +++ b/src/deepness/common/processing_parameters/map_processing_parameters.py @@ -20,6 +20,7 @@ class ModelOutputFormat(enum.Enum): ALL_CLASSES_AS_SEPARATE_LAYERS = 'All classes as separate layers' CLASSES_AS_SEPARATE_LAYERS_WITHOUT_ZERO_CLASS = 'Classes as separate layers (without 0 class)' ONLY_SINGLE_CLASS_AS_LAYER = 'Single class as a vector layer' + RECOGNITION_RESULT = 'Cosine distance between query image and map' @classmethod def get_all_names(cls): diff --git a/src/deepness/common/processing_parameters/recognition_parameters.py b/src/deepness/common/processing_parameters/recognition_parameters.py new file mode 100644 index 0000000..e350b68 --- /dev/null +++ b/src/deepness/common/processing_parameters/recognition_parameters.py @@ -0,0 +1,17 @@ +import enum +from dataclasses import dataclass +from typing import Optional + +from deepness.common.processing_parameters.map_processing_parameters import \ + MapProcessingParameters +from deepness.processing.models.model_base import ModelBase + + +@dataclass +class RecognitionParameters(MapProcessingParameters): + """ + Parameters for Inference of Recognition model (including pre/post-processing) obtained from UI. + """ + + query_image_path: str # path to query image + model: ModelBase # wrapper of the loaded model diff --git a/src/deepness/deepness_dockwidget.py b/src/deepness/deepness_dockwidget.py index 3044d9c..88f409a 100644 --- a/src/deepness/deepness_dockwidget.py +++ b/src/deepness/deepness_dockwidget.py @@ -18,6 +18,7 @@ from deepness.common.processing_parameters.detection_parameters import DetectionParameters, DetectorType from deepness.common.processing_parameters.map_processing_parameters import (MapProcessingParameters, ModelOutputFormat, ProcessedAreaType) +from deepness.common.processing_parameters.recognition_parameters import RecognitionParameters from deepness.common.processing_parameters.regression_parameters import RegressionParameters from deepness.common.processing_parameters.segmentation_parameters import SegmentationParameters from deepness.common.processing_parameters.superresolution_parameters import SuperresolutionParameters @@ -196,6 +197,7 @@ def get_selected_processed_area_type(self) -> ProcessedAreaType: def _create_connections(self): self.pushButton_runInference.clicked.connect(self._run_inference) self.pushButton_runTrainingDataExport.clicked.connect(self._run_training_data_export) + self.pushButton_browseQueryImagePath.clicked.connect(self._browse_query_image_path) self.pushButton_browseModelPath.clicked.connect(self._browse_model_path) self.comboBox_processedAreaSelection.currentIndexChanged.connect(self._set_processed_area_mask_options) self.comboBox_modelType.currentIndexChanged.connect(self._model_type_changed) @@ -216,6 +218,7 @@ def _model_type_changed(self): detection_enabled = False regression_enabled = False superresolution_enabled = False + recognition_enabled = False if model_type == ModelType.SEGMENTATION: segmentation_enabled = True @@ -225,6 +228,8 @@ def _model_type_changed(self): regression_enabled = True elif model_type == ModelType.SUPERRESOLUTION: superresolution_enabled = True + elif model_type == ModelType.RECOGNITION: + recognition_enabled = True else: raise Exception(f"Unsupported model type ({model_type})!") @@ -232,8 +237,10 @@ def _model_type_changed(self): self.mGroupBox_detectionParameters.setVisible(detection_enabled) self.mGroupBox_regressionParameters.setVisible(regression_enabled) self.mGroupBox_superresolutionParameters.setVisible(superresolution_enabled) - # Disable output format options for super-resolution models. - self.mGroupBox_6.setEnabled(not superresolution_enabled) + self.mGroupBox_recognitionParameters.setVisible(recognition_enabled) + # Disable output format options for super-resolution or recognition models. + if recognition_enabled or superresolution_enabled: + self.mGroupBox_6.setEnabled(False) def _detector_type_changed(self): detector_type = DetectorType(self.comboBox_detectorType.currentText()) @@ -268,6 +275,16 @@ def _browse_model_path(self): self.lineEdit_modelPath.setText(file_path) self._load_model_and_display_info() + def _browse_query_image_path(self): + file_path, _ = QFileDialog.getOpenFileName( + self, + "Select image file...", + os.path.expanduser("~"), + "All files (*.*)", + ) + if file_path: + self.lineEdit_recognitionPath.setText(file_path) + def _load_default_model_parameters(self): """ Load the default parameters from model metadata @@ -478,6 +495,8 @@ def get_inference_parameters(self) -> MapProcessingParameters: params = self.get_regression_parameters(map_processing_parameters) elif model_type == ModelType.SUPERRESOLUTION: params = self.get_superresolution_parameters(map_processing_parameters) + elif model_type == ModelType.RECOGNITION: + params = self.get_recognition_parameters(map_processing_parameters) elif model_type == ModelType.DETECTION: params = self.get_detection_parameters(map_processing_parameters) @@ -515,6 +534,14 @@ def get_superresolution_parameters(self, map_processing_parameters: MapProcessin ) return params + def get_recognition_parameters(self, map_processing_parameters: MapProcessingParameters) -> RecognitionParameters: + params = RecognitionParameters( + **map_processing_parameters.__dict__, + model=self._model, + query_image_path=self.lineEdit_recognitionPath.text(), + ) + return params + def get_detection_parameters(self, map_processing_parameters: MapProcessingParameters) -> DetectionParameters: params = DetectionParameters( diff --git a/src/deepness/deepness_dockwidget.ui b/src/deepness/deepness_dockwidget.ui index 0e4aaa1..0b797d5 100644 --- a/src/deepness/deepness_dockwidget.ui +++ b/src/deepness/deepness_dockwidget.ui @@ -24,9 +24,9 @@ 0 - 0 + -325 452 - 1621 + 1742 @@ -626,6 +626,51 @@ + + + + + 0 + 0 + + + + Recognition parameters + + + + + + + 75 + true + + + + NOTE: Applicable only if a recognition model is used + + + + + + + Image to localize path: + + + + + + + + + + Browse + + + + + + diff --git a/src/deepness/processing/map_processor/map_processor_recognition.py b/src/deepness/processing/map_processor/map_processor_recognition.py new file mode 100644 index 0000000..08f6dd7 --- /dev/null +++ b/src/deepness/processing/map_processor/map_processor_recognition.py @@ -0,0 +1,219 @@ +""" This file implements map processing for Recognition model """ + +import os +import uuid +from typing import List + +import numpy as np +from deepness.common.lazy_package_loader import LazyPackageLoader +from deepness.common.misc import TMP_DIR_PATH +from deepness.common.processing_parameters.recognition_parameters import \ + RecognitionParameters +from deepness.processing.map_processor.map_processing_result import ( + MapProcessingResult, MapProcessingResultCanceled, + MapProcessingResultSuccess) +from deepness.processing.map_processor.map_processor_with_model import \ + MapProcessorWithModel +from numpy.linalg import norm +from osgeo import gdal, osr +from qgis.core import QgsProject, QgsRasterLayer + +cv2 = LazyPackageLoader('cv2') + + +class MapProcessorRecognition(MapProcessorWithModel): + """ + MapProcessor specialized for Recognition model + """ + + def __init__(self, params: RecognitionParameters, **kwargs): + super().__init__(params=params, model=params.model, **kwargs) + self.recognition_parameters = params + self.model = params.model + self._result_imgs = None + + def get_result_imgs(self): + return self._result_imgs + + def _run(self) -> MapProcessingResult: + try: + print("*" * 80) + print(self.recognition_parameters.query_image_path) + query_img = cv2.imread(self.recognition_parameters.query_image_path) + except Exception as err: + print(err) + raise RuntimeError("unable to open image") + + query_img_emb = self.model.process(query_img)[0] + + final_shape_px = ( + self.img_size_y_pixels, + self.img_size_x_pixels, + ) + + stride = self.stride_px + full_result_img = np.zeros(final_shape_px, np.float32) + mask = np.zeros_like(full_result_img, dtype=np.int16) + highest = 0 + for tile_img, tile_params in self.tiles_generator(): + if self.isCanceled(): + return MapProcessingResultCanceled() + + # See note in the class description why are we adding/subtracting 1 here + tile_result = self._process_tile(tile_img)[0] + + # cosine similarity + cossim = np.dot(query_img_emb[0],tile_result[0])/(norm(query_img_emb[0])*norm(tile_result[0])) + + x_bin = tile_params.x_bin_number + y_bin = tile_params.y_bin_number + size = self.params.tile_size_px + if cossim > highest: + highest = cossim + x_high = x_bin + y_high = y_bin + full_result_img[y_bin*stride:y_bin*stride+size, x_bin*stride:x_bin*stride +size] += cossim + mask[y_bin*stride:y_bin*stride+size, x_bin*stride:x_bin*stride +size] += 1 + + full_result_img = full_result_img/mask + self._result_img = full_result_img + + self._create_rlayers_from_images_for_base_extent(self._result_img, x_high, y_high, size, stride)#*255).astype(int)) + result_message = self._create_result_message(self._result_img, x_high*self.params.tile_size_px, y_high*self.params.tile_size_px) + return MapProcessingResultSuccess(result_message) + + def _create_result_message(self, result_img: List[np.ndarray], x_high, y_high) -> str: + txt = f"Recognition ended, best result found at {x_high}, {y_high}, {result_img.shape}" + return txt + + def limit_extended_extent_image_to_base_extent_with_mask(self, full_img): + """ + Limit an image which is for extended_extent to the base_extent image. + If a limiting polygon was used for processing, it will be also applied. + :param full_img: + :return: + """ + # TODO look for some inplace operation to save memory + # cv2.copyTo(src=full_img, mask=area_mask_img, dst=full_img) # this doesn't work due to implementation details + # full_img = cv2.copyTo(src=full_img, mask=self.area_mask_img) + + b = self.base_extent_bbox_in_full_image + result_img = full_img[ + int(b.y_min * self.recognition_parameters.scale_factor) : int( + b.y_max * self.recognition_parameters.scale_factor + ), + int(b.x_min * self.recognition_parameters.scale_factor) : int( + b.x_max * self.recognition_parameters.scale_factor + ), + :, + ] + return result_img + + def load_rlayer_from_file(self, file_path): + """ + Create raster layer from tif file + """ + file_name = os.path.basename(file_path) + base_file_name = file_name.split("___")[ + 0 + ] # we remove the random_id string we created a moment ago + rlayer = QgsRasterLayer(file_path, base_file_name) + if rlayer.width() == 0: + raise Exception( + "0 width - rlayer not loaded properly. Probably invalid file path?" + ) + rlayer.setCrs(self.rlayer.crs()) + return rlayer + + def _create_rlayers_from_images_for_base_extent( + self, result_img: np.ndarray, + x_high, + y_high, + size, + stride + ): + group = ( + QgsProject.instance() + .layerTreeRoot() + .insertGroup(0, "Cosine similarity score") + ) + + min_value = np.min(result_img) + y = y_high * stride + x = x_high * stride + print(f"{x},{y}") + + result_img[y, x:x+size-1] = 1 + result_img[y+size-1, x:x+size-1] = 1 + result_img[y:y+size-1, x] = 1 + result_img[y:y+size-1, x+size-1] = 1 + + # TODO: We are creating a new file for each layer. + # Maybe can we pass ownership of this file to QGis? + # Or maybe even create vlayer directly from array, without a file? + + # for i, channel_id in enumerate(["Super Resolution"]): + random_id = str(uuid.uuid4()).replace("-", "") + file_path = os.path.join(TMP_DIR_PATH, f"{random_id}.tif") + self.save_result_img_as_tif(file_path=file_path, img=np.expand_dims(result_img, axis=2)) + + rlayer = self.load_rlayer_from_file(file_path) + OUTPUT_RLAYER_OPACITY = 0.5 + rlayer.renderer().setOpacity(OUTPUT_RLAYER_OPACITY) + + QgsProject.instance().addMapLayer(rlayer, False) + group.addLayer(rlayer) + + def save_result_img_as_tif(self, file_path: str, img: np.ndarray): + """ + As we cannot pass easily an numpy array to be displayed as raster layer, we create temporary geotif files, + which will be loaded as layer later on + + Partially based on example from: + https://gis.stackexchange.com/questions/82031/gdal-python-set-projection-of-a-raster-not-working + """ + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + extent = self.base_extent + crs = self.rlayer.crs() + + geo_transform = [ + extent.xMinimum(), + self.rlayer_units_per_pixel, # / self.recognition_parameters.scale_factor, + 0, + extent.yMaximum(), + 0, + -self.rlayer_units_per_pixel, # / self.recognition_parameters.scale_factor, + ] + + driver = gdal.GetDriverByName("GTiff") + n_lines = img.shape[0] + n_cols = img.shape[1] + n_chanels = img.shape[2] + # data_type = gdal.GDT_Byte + data_type = gdal.GDT_Float32 + grid_data = driver.Create( + "grid_data", n_cols, n_lines, n_chanels, data_type + ) # , options) + # loop over chanels + for i in range(1, img.shape[2] + 1): + grid_data.GetRasterBand(i).WriteArray(img[:, :, i - 1]) + + # crs().srsid() - maybe we can use the ID directly - but how? + # srs.ImportFromEPSG() + srs = osr.SpatialReference() + srs.SetFromUserInput(crs.authid()) + + grid_data.SetProjection(srs.ExportToWkt()) + grid_data.SetGeoTransform(geo_transform) + driver.CreateCopy(file_path, grid_data, 0) + + def _process_tile(self, tile_img: np.ndarray) -> np.ndarray: + result = self.model.process(tile_img) + # result[np.isnan(result)] = 0 + + # NOTE - currently we are saving result as float32, so we are losing some accuraccy. + # result = np.clip(result, 0, 255) # old version with uint8_t - not used anymore + # result = result.astype(np.float32) + + return result diff --git a/src/deepness/processing/map_processor/map_processor_with_model.py b/src/deepness/processing/map_processor/map_processor_with_model.py index 7f20b82..b5a8c5e 100644 --- a/src/deepness/processing/map_processor/map_processor_with_model.py +++ b/src/deepness/processing/map_processor/map_processor_with_model.py @@ -37,6 +37,8 @@ def _get_indexes_of_model_output_channels_to_create(self) -> List[int]: output_channels = list(range(0, self.model.get_number_of_output_channels())) elif self.params.model_output_format == ModelOutputFormat.CLASSES_AS_SEPARATE_LAYERS_WITHOUT_ZERO_CLASS: output_channels = list(range(1, self.model.get_number_of_output_channels())) + elif self.params.model_output_format == ModelOutputFormat.RECOGNITION_RESULT: + output_channels = 1 else: raise Exception(f"Unhandled model output format {self.params.model_output_format}") diff --git a/src/deepness/processing/models/model_types.py b/src/deepness/processing/models/model_types.py index e7f2839..463a7d6 100644 --- a/src/deepness/processing/models/model_types.py +++ b/src/deepness/processing/models/model_types.py @@ -3,14 +3,17 @@ from deepness.common.processing_parameters.detection_parameters import DetectionParameters from deepness.common.processing_parameters.map_processing_parameters import MapProcessingParameters +from deepness.common.processing_parameters.recognition_parameters import RecognitionParameters from deepness.common.processing_parameters.regression_parameters import RegressionParameters from deepness.common.processing_parameters.segmentation_parameters import SegmentationParameters from deepness.common.processing_parameters.superresolution_parameters import SuperresolutionParameters from deepness.processing.map_processor.map_processor_detection import MapProcessorDetection +from deepness.processing.map_processor.map_processor_recognition import MapProcessorRecognition from deepness.processing.map_processor.map_processor_regression import MapProcessorRegression from deepness.processing.map_processor.map_processor_segmentation import MapProcessorSegmentation from deepness.processing.map_processor.map_processor_superresolution import MapProcessorSuperresolution from deepness.processing.models.detector import Detector +from deepness.processing.models.recognition import Recognition from deepness.processing.models.regressor import Regressor from deepness.processing.models.segmentor import Segmentor from deepness.processing.models.superresolution import Superresolution @@ -21,6 +24,7 @@ class ModelType(enum.Enum): REGRESSION = Regressor.get_class_display_name() DETECTION = Detector.get_class_display_name() SUPERRESOLUTION = Superresolution.get_class_display_name() + RECOGNITION = Recognition.get_class_display_name() @dataclass @@ -56,6 +60,12 @@ def get_model_definitions(cls): model_class=Superresolution, parameters_class=SuperresolutionParameters, map_processor_class=MapProcessorSuperresolution, + ), # recognition + cls( + model_type=ModelType.RECOGNITION, + model_class=Recognition, + parameters_class=RecognitionParameters, + map_processor_class=MapProcessorRecognition, ) ] diff --git a/src/deepness/processing/models/recognition.py b/src/deepness/processing/models/recognition.py new file mode 100644 index 0000000..efe1fe0 --- /dev/null +++ b/src/deepness/processing/models/recognition.py @@ -0,0 +1,169 @@ +""" Module including the class for the recognition of the images +""" +import logging +from typing import List + +import numpy as np +from deepness.common.lazy_package_loader import LazyPackageLoader +from deepness.processing.models.model_base import ModelBase + +cv2 = LazyPackageLoader('cv2') + +IMG_SIZE = 224 +mean=(0.485, 0.456, 0.406) +std=(0.229, 0.224, 0.225) + +def normalize(img, mean, std, max_pixel_value=255.0): + mean = np.array(mean, dtype=np.float32) + mean *= max_pixel_value + + std = np.array(std, dtype=np.float32) + std *= max_pixel_value + + denominator = np.reciprocal(std, dtype=np.float32) + + img = img.astype(np.float32) + img -= mean + img *= denominator + + return img + +class Recognition(ModelBase): + """Class implements recognition model + + Recognition model is used to predict class confidence per pixel of the image. + """ + + def __init__(self, model_file_path: str): + """ + + Parameters + ---------- + model_file_path : str + Path to the model file + """ + super(Recognition, self).__init__(model_file_path) + + + def process(self, img): + """Process a single tile image + + Parameters + ---------- + img : np.ndarray + Image to process ([TILE_SIZE x TILE_SIZE x channels], type uint8, values 0 to 255) + + Returns + ------- + np.ndarray + embeddings + """ + input_batch = self.preprocessing(img) + model_output = self.sess.run( + output_names=None, input_feed={self.input_name: input_batch} + ) + res = self.postprocessing(model_output) + return res + + + def preprocessing(self, image: np.ndarray): + """Preprocess image before inference + + Parameters + ---------- + image : np.ndarray + Image to preprocess in RGB format + + Returns + ------- + np.ndarray + Preprocessed image + """ + img = cv2.resize(image, (IMG_SIZE, IMG_SIZE)) + img = img[:, :, : self.input_shape[-3]] + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + input_batch = img.astype("float32") + input_batch = normalize(input_batch, mean, std, max_pixel_value=255.0) + input_batch = input_batch.transpose(2, 0, 1) + input_batch = np.expand_dims(input_batch, axis=0) + + return input_batch + + def postprocessing(self, model_output: List) -> np.ndarray: + """Postprocess the model output. + Function returns the array of embeddings + + Anything to do here? + + Parameters + ---------- + model_output : List + Output embeddings from the (Recognition) model + + Returns + ------- + np.ndarray + Same as input + """ + # TODO - compute cosine similarity to self.query_img_emb + #cannot, won't work for query image + + return np.array(model_output) + + def get_number_of_output_channels(self): + """Returns model's number of class + + Returns + ------- + int + Number of channels in the output layer + """ + logging.warning(f"outputs_layers: {self.outputs_layers}") + logging.info(f"outputs_layers: {self.outputs_layers}") + print(f"outputs_layers: {self.outputs_layers}") + + if len(self.outputs_layers) == 1: + return self.outputs_layers[0].shape[1] + else: + raise NotImplementedError( + "Model with multiple output layers is not supported! Use only one output layer." + ) + + @classmethod + def get_class_display_name(cls): + """Returns the name of the class to be displayed in the GUI + + Returns + ------- + str + Name of the class + """ + return cls.__name__ + + def check_loaded_model_outputs(self): + """Checks if the model outputs are valid + + Valid means that: + - the model has only one output + - the output is 2D (N,C) + - the batch size is 1 + + """ + if len(self.outputs_layers) == 1: + shape = self.outputs_layers[0].shape + + if len(shape) != 2: + raise Exception( + f"Recognition model output should have 4 dimensions: (B,C,H,W). Has {shape}" + ) + + if shape[0] != 1: + raise Exception( + f"Recognition model can handle only 1-Batch outputs. Has {shape}" + ) + + else: + raise NotImplementedError( + "Model with multiple output layers is not supported! Use only one output layer." + )