From 6a4364837c2f057d3170ac7834bfaed0622ab9eb Mon Sep 17 00:00:00 2001
From: Valerio Marsocci <49810041+VMarsocci@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:28:06 +0200
Subject: [PATCH] Fbps (#46)

* ignore index in testing

* fbps and readme
---
 .github/CONTRIBUTING.md                 | 32 ++++++++++
 configs/datasets/fivebillionpixels.yaml | 57 +++++++++---------
 datasets/fivebillionpixels.py           | 77 ++++---------------------
 3 files changed, 71 insertions(+), 95 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 8167c410..21f4d46d 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -6,6 +6,7 @@ We welcome all forms of contributions, including but not limited to the followin
 - Incorporate downstream datasets
 - Add new decoder heads
 - Fix typo or bugs
+- Add new decoders
 
 ### Workflow
 
@@ -16,6 +17,37 @@ We welcome all forms of contributions, including but not limited to the followin
 
 Note: For significant modifications or any bugs spotting, please consider opening an issue for discussion beforehand.
 
+## Code structure 
+
+### engines
+In engines, basic modules in the training pipeline are defined including data_preprocessor, trainer and evaluator.
+1. data_preprocessor selects the bands needed by an encoder and pads unavailable bands with zeros, and different augmentations.
+2. trainer supports mixed precision/distributed training and print training stats and metrics in real time.
+3. evaluator can be called independently and evaluate a model also in distributed way and compute per class metrics.
+
+### datasets
+1. The implementations are simplified and standardized.
+2. Dataset metas are read from configs, including newly added classes (name), ignore_index, and so on.
+3. To add (register) a new dataset implementation, use the decorator ```@DATASET_REGISTRY.register()```.
+
+### foundation_models
+1. Support multi-stage output that may be needed by segmentors, specified by output layers in encoder config.
+2. All the encoder should work properly.
+3. To add (register) a new encoder implementation, use the decorator ```@ENCODER_REGISTRY.register()```.
+
+### segmentors
+1. The UperNet implementation is based on [mmsegmentation](https://github.com/open-mmlab/mmsegmentation/tree/main)
+2. To add (register) a new encoder implementation, use the decorator ```@SEGMENTOR_REGISTRY.register()```.
+3. So far, we have UPerNet for unitemporal semantic segmentation, UPerNetCD for change detection and MTUPerNet for multitemporal semantic segmentation
+4. for multi-temporal, L-TAE and linear projection are supported
+
+### augmentations
+1. All the available augmentations are in ```data_preproessor.py```
+2. To add (register) a new augmentation implementation, use the decorator ```@AUGMENTER_REGISTRY.register()```.
+
+All the parameters can also be set in the run config file.
+
+## Adding new features
 
 ### Adding a new geospatial foundation model
 1. Inside the `foundation_models` folder:
diff --git a/configs/datasets/fivebillionpixels.yaml b/configs/datasets/fivebillionpixels.yaml
index 315915d8..26336011 100644
--- a/configs/datasets/fivebillionpixels.yaml
+++ b/configs/datasets/fivebillionpixels.yaml
@@ -2,6 +2,7 @@ dataset_name: FiveBillionPixels
 root_path: ./data/FiveBillionPixels/cropped
 download_url: False 
 auto_download: False
+use_cmyk: False
 
 img_size: 520
 multi_temporal: False
@@ -37,50 +38,50 @@ classes:
 - railway station
 - airport
 distribution:
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
-  - 0.88889
-  - 0.11111
+  - 0.
+  - 0.0368
+  - 0.0253
+  - 0.3567
+  - 0.0752
+  - 0.0095
+  - 0.0694
+  - 0.0096
+  - 0.0004
+  - 0.0055
+  - 0.0025
+  - 0.0568
+  - 0.0548
+  - 0.1396
+  - 0.0102
+  - 0.0129
+  - 0.0004
+  - 0.0456
+  - 0.0447
+  - 0.0003
+  - 0.0002
+  - 0.0383
+  - 0.0025
+  - 0.0007
+  - 0.0011
 
 bands:
   optical:
+    - B8
     - B4
     - B3
     - B2
-    - B8
 data_mean:
   optical:
+    - 92.6
     - 124.3
     - 94.2
     - 98.
-    - 92.6
 data_std:
   optical:
+    - 44.5
     - 51.
     - 50.
     - 47.1
-    - 44.5
 data_min:
   optical: [0.0000, 0.0000, 0.0000, 0.0000]
 data_max:
diff --git a/datasets/fivebillionpixels.py b/datasets/fivebillionpixels.py
index 5b4bb0a7..dedd6ef1 100644
--- a/datasets/fivebillionpixels.py
+++ b/datasets/fivebillionpixels.py
@@ -34,90 +34,33 @@ def __init__(self, cfg, split, is_train = True):
         """
         super().__init__()
         self._base_dir = cfg['root_path']
-        # print(os.path.join(self._base_dir, split, 'imgs', '*.tif'))
-        # print(os.path.join(self._base_dir, split, 'labels', '*.tif'))
-        # print(self._image_dir)
-        # print(self._label_dir)
-        # _splits_dir = os.path.join(self._base_dir, 'list')
-        # self.split = [split]
-
-        # self.args = args
-
-        # self.im_ids = []
-        # self.images = []
-        # self.labels = []
-
-        # for splt in self.split:
-        #     with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f:
-        #         lines = f.read().splitlines()
-
-        #     if splt == 'train':
-        #         lines = random.sample(lines, len(os.listdir(os.path.join(args.target_dir, args.target))))
-        #     elif split == 'val':
-        #         lines = random.sample(lines, 500)
-        # self.root_path = cfg['root_path']
         self.data_mean = cfg['data_mean']
         self.data_std = cfg['data_std']
         self.classes = cfg['classes']
+        self.use_cmyk = cfg['use_cmyk']
         self.class_num = len(self.classes)
         self.split = split
         self.is_train = is_train
 
         self._image_dir = sorted(glob(os.path.join(self._base_dir, self.split, 'imgs', '*.tif')))
         self._label_dir = sorted(glob(os.path.join(self._base_dir, self.split, 'labels', '*.tif')))
-        # print(split)
-        # print(os.path.join(self._base_dir, self.split, 'imgs', '*.tif'))
-        # print(os.path.join(self._base_dir, self.split, 'labels', '*.png'))
-        # print(self._image_dir)
-        # print((self._label_dir))
-        # print(len(self._image_dir))
-        # print(len(self._label_dir))
-
-        # self.split_mapping = {'train': 'training', 'val': 'validation', 'test': 'validation'}
-
-        # self.image_list = sorted(glob(os.path.join(self.root_path, self.split_mapping[self.split], '*merged.tif')))
-        # self.target_list = sorted(glob(os.path.join(self.root_path, self.split_mapping[self.split], '*mask.tif')))
-
-
-        # for ii, line in enumerate(lines):
-        #     _image = os.path.join(self._image_dir, line + ".tif")
-        #     _label = os.path.join(self._label_dir, line + ".png")
-        #     assert os.path.isfile(_image)
-        #     assert os.path.isfile(_label)
-        #     self.im_ids.append(line)
-        #     self.images.append(_image)
-        #     self.labels.append(_label)
-
-        # assert (len(self.images) == len(self.labels))
-
-        # Display stats
-        # print('Number of images in {}: {:d}'.format(split, len(self.images)))
 
     def __len__(self):
         return len(self._image_dir)
 
     def __getitem__(self, index):
-        # _img, _target = self._make_img_gt_point_pair(index)
-        # print(index)
-        # image = Image.open(self._image_dir[index]).convert('CMYK') #check it also on the normalization
-        # target = Image.open(self._label_dir[index])
-
-        image = tiff.imread(self._image_dir[index])#.convert('CMYK') #check it also on the normalization
-        target = tiff.imread(self._label_dir[index]) #, cv2.IMREAD_UNCHANGED)
 
-        # image = TF.pil_to_tensor(image)
-        # target = TF.pil_to_tensor(target).squeeze(0).to(torch.int64)
-
-        image = image.astype(np.float32)  # Convert to float32
+        if self.use_cmyk:
+            image = Image.open(self._image_dir[index]).convert('CMYK')
+            image = TF.pil_to_tensor(image)
+        else:
+            image = tiff.imread(self._image_dir[index])#.convert('CMYK') #check it also on the normalization
+            image = image.astype(np.float32)  # Convert to float32
+            image = torch.from_numpy(image).permute(2, 0, 1)
+        
+        target = tiff.imread(self._label_dir[index])
         target = target.astype(np.int64)  # Convert to int64 (since it's a mask)
-
-        # Tile the image and target to the fixed size specified in the config
-        # image, target = self.tile_image_and_mask(image, target, self.img_size)
-
-        image = torch.from_numpy(image).permute(2, 0, 1)
         target = torch.from_numpy(target).long()
-        # print(image.shape)
-        # print(target.shape)
 
         output = {
             'image': {