Merge pull request #127 from PUTvision/feat/multi_batch_processing

Feat/multi batch processing
PUTvision · Jan 9, 2024 · b023c6a · b023c6a
2 parents 0f1a2fe + 6b63808
commit b023c6a
Show file tree

Hide file tree

Showing 38 changed files with 800 additions and 423 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -8,7 +8,7 @@
     "python.analysis.autoImportUserSymbols": true,
     "[python]": {
         "editor.codeActionsOnSave": {
-            "source.organizeImports": true
+            "source.organizeImports": "explicit"
         }
     },
     "isort.args": [
@@ -41,4 +41,12 @@
     "python.testing.pytestArgs": [
         "."
     ],
+    "autopep8.args": [
+        "--max-line-length",
+        "9999",
+        "--experimental",
+    ],
+    "flake8.args": [
+        "--max-line-length=9999"
+    ],
 }
diff --git a/docs/source/creators/creators_description_classes.rst b/docs/source/creators/creators_description_classes.rst
@@ -14,7 +14,7 @@ Once the processing of ortophoto is finished, a report with model-specific infor
 Common rules for models and processing:
  * Model needs to be in ONNX format, which contains both the network architecture and weights.
  * All model classes process the data in chunks called 'tiles', that is a small part of the entire ortophoto - tiles size and overlap is configurable.
- * Every model should have one input of size :code:`[BATCH_SIZE, CHANNELS, SIZE_PX, SIZE_PX]`. :code:`BATCH_SIZE` can be 1.
+ * Every model should have one input of size :code:`[BATCH_SIZE, CHANNELS, SIZE_PX, SIZE_PX]`. :code:`BATCH_SIZE` can be 1 or dynamic.
  * Size of processed tiles (in pixels) is model defined, but needs to be equal in x and y axes, so that the tiles can be square.
  * If the processed tile needs to be padded (e.g. on otophoto borders) it will be padded with 0 values.
  * Input image data - only uint8_t value for each pixel channel is supported
@@ -43,7 +43,7 @@ Detection models allow to solve problem of objects detection, that is finding an
 Example application is detection of oil and water tanks on satellite images.
 
 The detection model output is list of bounding boxes, with assigned class and confidence value. This information is not really standardized between different model architectures.
-Currently plugin supports :code:`YOLOv5`, :code:`YOLOv7` and :code:`ULTRALYTICS` output types.
+Currently plugin supports :code:`YOLOv5`, :code:`YOLOv7` and :code:`ULTRALYTICS` output types. Detection model also supports the instance segmentation output type from :code:`ULTRALYTICS`.
 
 For each object class, a separate vector layer can be created, with information saved as rectangle polygons (so the output can be potentially easily exported to a text).
 

diff --git a/docs/source/creators/creators_example_onnx_model.rst b/docs/source/creators/creators_example_onnx_model.rst
@@ -31,7 +31,7 @@ Steps based on `EXPORTING A MODEL FROM PYTORCH TO ONNX AND RUNNING IT USING ONNX
     x = torch.rand(1, INP_CHANNEL, INP_HEIGHT, INP_WIDTH) # eg. torch.rand([1, 3, 256, 256])
     _ = model(x)
 
-* Step 3. Call export function
+* Step 3a. Call export function with static batch_size=1:
 
   .. code-block:: 
 
@@ -44,6 +44,20 @@ Steps based on `EXPORTING A MODEL FROM PYTORCH TO ONNX AND RUNNING IT USING ONNX
                     output_names=['output'],
                     do_constant_folding=False)
 
+* Step 3b. Call export function with dynamic batch_size:
+
+  .. code-block:: 
+
+    torch.onnx.export(model,
+                    x,  # model input
+                    'model.onnx',  # where to save the model
+                    export_params=True,
+                    opset_version=15,
+                    input_names=['input'],
+                    output_names=['output'],
+                    dynamic_axes={'input': {0: 'batch_size'},  # variable lenght axes
+                                  'output': {0: 'batch_size'}})
+
 ================
 Tensorflow/Keras
 ================
@@ -63,3 +77,9 @@ Steps based on the `tensorflow-onnx <https://github.com/onnx/tensorflow-onnx>`_
   .. code-block:: 
 
     python -m tf2onnx.convert --saved-model YOUR_MODEL_CHECKPOINT_PATH --output model.onnx --opset 15
+
+===============================================
+Update ONNX model to support dynamic batch size
+===============================================
+
+To convert model to support dynamic batch size, you need to update :code:`model.onnx` file. You can do it manually using `this <https://github.com/onnx/onnx/issues/2182#issuecomment-881752539>` script. Please note that the script is not perfect and may not work for all models.
diff --git a/docs/source/images/ui_processing_params.webp b/docs/source/images/ui_processing_params.webp
diff --git a/docs/source/main/main_ui_explanation.rst b/docs/source/main/main_ui_explanation.rst
@@ -56,11 +56,15 @@ Processing parameters
 
 .. image:: ../images/ui_processing_params.webp
 
+These options may be a fixed value for some models.
+
 **Resolution** - Size of the images passed to the model in pixels. Usually needs to be the same as the one used during training.
 
 **Tile size** - Defines the processing resolution (in px/cm) of the Input layer Determines the resolution of images fed into the model, allowing to scale of the input images. Should be similar to the resolution used to train the model.
 
-**Tiles overlap** - Defines how much tiles should overlap with their neighbors during processing. Especially required for a model which introduces distortions on the edges of images, so that they can be removed in postprocessing.
+**Batch size** - Number of images passed to the model at once.
+
+**Tiles overlap** - Defines how much tiles should overlap with their neighbors during processing. Especially required for a model which introduces distortions on the edges of images, so that they can be removed in postprocessing. Can be defined in percent of tile size or in pixels.
 
 .. image:: ../images/ui_segment_params.webp
 

diff --git a/examples/deeplabv3_segmentation_landcover/deeplabv3_landcover_4c_batched.onnx b/examples/deeplabv3_segmentation_landcover/deeplabv3_landcover_4c_batched.onnx
diff --git a/src/deepness/common/config_entry_key.py b/src/deepness/common/config_entry_key.py
@@ -20,6 +20,8 @@ class ConfigEntryKey(enum.Enum):
     PROCESSED_AREA_TYPE = enum.auto(), ''  # string of ProcessedAreaType, e.g. "ProcessedAreaType.VISIBLE_PART.value"
     MODEL_TYPE = enum.auto(), ''  # string of ModelType enum, e.g. "ModelType.SEGMENTATION.value"
     PREPROCESSING_RESOLUTION = enum.auto(), 3.0
+    MODEL_BATCH_SIZE = enum.auto(), 1
+    PROCESS_LOCAL_CACHE = enum.auto(), False
     PREPROCESSING_TILES_OVERLAP = enum.auto(), 15
 
     SEGMENTATION_PROBABILITY_THRESHOLD_ENABLED = enum.auto(), True

diff --git a/src/deepness/common/processing_parameters/map_processing_parameters.py b/src/deepness/common/processing_parameters/map_processing_parameters.py
@@ -37,6 +37,8 @@ class MapProcessingParameters:
     resolution_cm_per_px: float  # image resolution to used during processing
     processed_area_type: ProcessedAreaType  # whether to perform operation on the entire field or part
     tile_size_px: int  # Tile size for processing (model input size)
+    batch_size: int  # Batch size for processing
+    local_cache: bool  # Whether to use local cache for tiles (on disk, /tmp directory)
 
     input_layer_id: str  # raster layer to process
     mask_layer_id: Optional[str]  # Processing of masked layer - if processed_area_type is FROM_POLYGONS

diff --git a/src/deepness/common/temp_files_handler.py b/src/deepness/common/temp_files_handler.py
@@ -0,0 +1,19 @@
+import os.path as path
+import shutil
+from tempfile import mkdtemp
+
+
+class TempFilesHandler:
+    def __init__(self) -> None:
+        self._temp_dir = mkdtemp()
+
+        print(f'Created temp dir: {self._temp_dir} for processing')
+
+    def get_results_img_path(self):
+        return path.join(self._temp_dir, 'results.dat')
+
+    def get_area_mask_img_path(self):
+        return path.join(self._temp_dir, 'area_mask.dat')
+
+    def __del__(self):
+        shutil.rmtree(self._temp_dir)
diff --git a/src/deepness/deepness_dockwidget.py b/src/deepness/deepness_dockwidget.py
@@ -92,8 +92,9 @@ def _load_ui_from_config(self):
 
             # needs to be loaded after the model is set up
             self.comboBox_outputFormatClassNumber.setCurrentIndex(ConfigEntryKey.MODEL_OUTPUT_FORMAT_CLASS_NUMBER.get())
-
             self.doubleSpinBox_resolution_cm_px.setValue(ConfigEntryKey.PREPROCESSING_RESOLUTION.get())
+            self.spinBox_batchSize.setValue(ConfigEntryKey.MODEL_BATCH_SIZE.get())
+            self.checkBox_local_cache.setChecked(ConfigEntryKey.PROCESS_LOCAL_CACHE.get())
             self.spinBox_processingTileOverlapPercentage.setValue(ConfigEntryKey.PREPROCESSING_TILES_OVERLAP.get())
 
             self.doubleSpinBox_probabilityThreshold.setValue(
@@ -129,6 +130,8 @@ def _save_ui_to_config(self):
         ConfigEntryKey.MODEL_OUTPUT_FORMAT_CLASS_NUMBER.set(self.comboBox_outputFormatClassNumber.currentIndex())
 
         ConfigEntryKey.PREPROCESSING_RESOLUTION.set(self.doubleSpinBox_resolution_cm_px.value())
+        ConfigEntryKey.MODEL_BATCH_SIZE.set(self.spinBox_batchSize.value())
+        ConfigEntryKey.PROCESS_LOCAL_CACHE.set(self.checkBox_local_cache.isChecked())
         ConfigEntryKey.PREPROCESSING_TILES_OVERLAP.set(self.spinBox_processingTileOverlapPercentage.value())
 
         ConfigEntryKey.SEGMENTATION_PROBABILITY_THRESHOLD_ENABLED.set(
@@ -272,6 +275,13 @@ def _load_default_model_parameters(self):
         value = self._model.get_metadata_resolution()
         if value is not None:
             self.doubleSpinBox_resolution_cm_px.setValue(value)
+
+        value = self._model.get_model_batch_size()
+        if value is not None:
+            self.spinBox_batchSize.setValue(value)
+            self.spinBox_batchSize.setEnabled(False)
+        else:
+            self.spinBox_batchSize.setEnabled(True)
 
         value = self._model.get_metadata_tile_size()
         if value is not None:
@@ -355,10 +365,18 @@ def _load_model_and_display_info(self, abort_if_no_file_path: bool = False):
             input_0_shape = self._model.get_input_shape()
             txt += f'Input shape: {input_0_shape}   =   [BATCH_SIZE * CHANNELS * SIZE * SIZE]'
             input_size_px = input_0_shape[-1]
+            batch_size = self._model.get_model_batch_size()
 
             # TODO idk how variable input will be handled
             self.spinBox_tileSize_px.setValue(input_size_px)
             self.spinBox_tileSize_px.setEnabled(False)
+
+            if batch_size is not None:
+                self.spinBox_batchSize.setValue(batch_size)
+                self.spinBox_batchSize.setEnabled(False)
+            else:
+                self.spinBox_batchSize.setEnabled(True)
+
             self._input_channels_mapping_widget.set_model(self._model)
 
             # super resolution
@@ -375,6 +393,7 @@ def _load_model_and_display_info(self, abort_if_no_file_path: bool = False):
                   "Model may be not usable."
             logging.exception(txt)
             self.spinBox_tileSize_px.setEnabled(True)
+            self.spinBox_batchSize.setEnabled(True)
             length_limit = 300
             exception_msg = (str(e)[:length_limit] + '..') if len(str(e)) > length_limit else str(e)
             msg = txt + f'\n\nException: {exception_msg}'
@@ -517,6 +536,8 @@ def _get_map_processing_parameters(self) -> MapProcessingParameters:
         params = MapProcessingParameters(
             resolution_cm_per_px=self.doubleSpinBox_resolution_cm_px.value(),
             tile_size_px=self.spinBox_tileSize_px.value(),
+            batch_size=self.spinBox_batchSize.value(),
+            local_cache=self.checkBox_local_cache.isChecked(),
             processed_area_type=processed_area_type,
             mask_layer_id=self.get_mask_layer_id(),
             input_layer_id=self._get_input_layer_id(),