VMarsocci · KerekesDavid · Sep 17, 2024 · Sep 17, 2024
diff --git a/configs/augmentations/regression_default.yaml b/configs/augmentations/regression_default.yaml
@@ -1,13 +1,11 @@
 train:
   RegPreprocessor: ~
-  NormalizeMinMax: ~
-#   NormalizeMeanStd: ~
+  NormalizeMeanStd: ~
   RandomCropToEncoder: ~
-#   RandomFlip:
-#     ud_probability: 0.3
-#     lr_probability: 0.3
+  RandomFlip:
+    ud_probability: 0.3
+    lr_probability: 0.3
 test:
   RegPreprocessor: ~
-  NormalizeMinMax: ~
-#   NormalizeMeanStd: ~
-  Tile: ~
+  NormalizeMeanStd: ~
+  Tile: ~
diff --git a/configs/datasets/biomassters.yaml b/configs/datasets/biomassters.yaml
@@ -1,14 +1,14 @@
 dataset_name: BioMassters
-root_path: /geoinfo_vol1/home/r/i/ru/
-# download_url: #https://huggingface.co/datasets/ibm-nasa-geospatial/hls_burn_scars/resolve/main/hls_burn_scars.tar.gz?download=true
+root_path: /geomatics/gpuserver-0/vmarsocci/biomassters
+download_url: #https://huggingface.co/datasets/ibm-nasa-geospatial/hls_burn_scars/resolve/main/hls_burn_scars.tar.gz?download=true
 auto_download: False
 img_size: 256
-temporal: 12 #6 (summer month if multi_temp false), 12
-multi_temporal: True
+multi_temporal: 12
 multi_modal: True
 
+
 # classes
-ignore_index: -1
+# ignore_index: -1
 num_classes: 1
 classes:
   - regression
@@ -25,39 +25,29 @@ bands:
     - B2
     - B3
     - B4
-    - B5
-    - B6
-    - B7
-    - B8
-    - B8A
-    - B11
-    - B12
-    - CLP
+    # - B5
+    # - B6
+    # - B7
+    # - B8
+    # - B8a
+    # - B11
+    # - B12
   sar:
     - ASC_VV
     - ASC_VH
     - DSC_VV
     - DSC_VH
 
-# TODO: fix the normalization 
 data_mean:
   optical:
-#     - 66.7703
-#     - 88.4452
-#     - 85.1047
-  sar:
+    - 66.7703
+    - 88.4452
+    - 85.1047
+  # sar:
 
 data_std:
   optical:
-#     - 48.3066
-#     - 51.9129
-#     - 62.7612
-  sar:
-
-data_min:
-  optical: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
-  sar: [-25, -62, -25, -60]
-
-data_max:
-  optical: [19616., 18400., 17536., 17097., 16928., 16768., 16593., 16492., 15401., 15226.,   255.]
-  sar: [29,  28,  30,  22]
+    - 48.3066
+    - 51.9129
+    - 62.7612
+  # sar:
diff --git a/configs/segmentors/reg_upernet.yaml b/configs/segmentors/reg_upernet.yaml
diff --git a/configs/segmentors/reg_upernet_mt.yaml b/configs/segmentors/reg_upernet_mt.yaml
@@ -1,7 +1,8 @@
-segmentor_name: MTUPerNet_regress
+segmentor_name: MTUPerNet
 task_name: regression
+binary: False
 multi_temporal_strategy: linear
-time_frames: 12
+# time_frames: 2
 #task_model_args:
   #num_frames: 1
   #mt_strategy: "ltae" #activated only when if num_frames > 1

diff --git a/datasets/biomassters.py b/datasets/biomassters.py
@@ -2,51 +2,38 @@
 import torch
 import pandas as pd
 import pathlib
-import rasterio
-from skimage import io
-from os.path import join as opj
 from .utils import read_tif
 from utils.registry import DATASET_REGISTRY
 
 s1_min = np.array([-25 , -62 , -25, -60], dtype="float32")
 s1_max = np.array([ 29 ,  28,  30,  22 ], dtype="float32")
 s1_mm = s1_max - s1_min
+
 s2_max = np.array(
     [19616., 18400., 17536., 17097., 16928., 16768., 16593., 16492., 15401., 15226.,   255.],
     dtype="float32",
 )
+
 IMG_SIZE = (256, 256)
 
-def read_imgs(multi_temporal, temp , fname, data_dir):
-    imgs_s1, imgs_s2, mask = [], [], []
-    if multi_temporal:
-        month_list = list(range(12))
-    else:
-        month_list = [temp]
-
-    for month in month_list:
-
-        s1_fname = '%s_%s_%02d.tif' % (str.split(fname, '_')[0], 'S1', month)
-        s2_fname = '%s_%s_%02d.tif' % (str.split(fname, '_')[0], 'S2', month)
-
-        s1_filepath = data_dir.joinpath(s1_fname)
-        if s1_filepath.exists():      
-            img_s1 = io.imread(s1_filepath)
-            m = img_s1 == -9999
-            img_s1 = img_s1.astype("float32")
-            img_s1 = (img_s1 - s1_min) / s1_mm
-            img_s1 = np.where(m, 0, img_s1)
-        else:            
-            img_s1 = np.zeros(IMG_SIZE + (4,), dtype="float32")
-
-        s2_filepath = data_dir.joinpath(s2_fname)
-        if s2_filepath.exists():            
-            img_s2 = io.imread(s2_filepath)
+
+def read_imgs(chip_id: str, data_dir: pathlib.Path):
+    imgs, imgs_s1, imgs_s2, mask = [], [], [], []
+    for month in range(12):
+        img_s1 = read_tif(data_dir.joinpath(f"{chip_id}_S1_{month:0>2}.tif"))
+        m = img_s1 == -9999
+        img_s1 = img_s1.astype("float32")
+        img_s1 = (img_s1 - s1_min) / s1_mm
+        img_s1 = np.where(m, 0, img_s1)
+        filepath = data_dir.joinpath(f"{chip_id}_S2_{month:0>2}.tif")
+        if filepath.exists():
+            img_s2 = read_tif(filepath)
             img_s2 = img_s2.astype("float32")
             img_s2 = img_s2 / s2_max
-        else:            
+        else:
             img_s2 = np.zeros(IMG_SIZE + (11,), dtype="float32")
-
+
+        # img = np.concatenate([img_s1, img_s2], axis=2)
         img_s1 = np.transpose(img_s1, (2, 0, 1))
         img_s2 = np.transpose(img_s2, (2, 0, 1))
         imgs_s1.append(img_s1)
@@ -55,43 +42,39 @@ def read_imgs(multi_temporal, temp , fname, data_dir):
 
     mask = np.array(mask)
 
-    imgs_s1 = np.stack(imgs_s1, axis=1)  # [c, t, h, w] prithvi
+    imgs_s1 = np.stack(imgs_s1, axis=1)  # [c, t, h, w]
     imgs_s2 = np.stack(imgs_s2, axis=1)  # [c, t, h, w]
 
     return imgs_s1, imgs_s2, mask
 
 @DATASET_REGISTRY.register()
 class BioMassters(torch.utils.data.Dataset):
     def __init__(self, cfg, split): #, augs=False):
-
-        self.root_path = cfg['root_path']
-        self.data_min = cfg['data_min']
-        self.data_max = cfg['data_max']
-        self.multi_temporal = cfg['multi_temporal']
-        self.temp = cfg['temporal']
+        df_path = pathlib.Path(cfg["root_path"]).joinpath("The_BioMassters_-_features_metadata.csv.csv")
+        df: pd.DataFrame = pd.read_csv(str(df_path))
+        self.df = df[df.split == split].copy()
+        self.dir_features = pathlib.Path(cfg["root_path"]).joinpath(f"{split}_features")
+        self.dir_labels = pathlib.Path(cfg["root_path"]).joinpath( f"{split}_agbm")
         self.split = split
         # self.augs = augs
-
-        self.data_path = pathlib.Path(self.root_path).joinpath(f"{split}_Data_list.csv")
-        self.id_list = pd.read_csv(self.data_path)['chip_id']
-        self.dir_features = pathlib.Path(self.root_path).joinpath("TRAIN/train_features")        
-        self.dir_labels = pathlib.Path(self.root_path).joinpath( "TRAIN/train_agbm")        
 
     def __len__(self):
-        return len(self.id_list)
+        return len(self.df)
 
     def __getitem__(self, index):
+        item = self.df.iloc[index]
+
+        # print(item.chip_id)
+        # print(self.dir_features)
 
-        chip_id = self.id_list.iloc[index]
-        fname = str(chip_id)+'_agbm.tif'
+        imgs_s1, imgs_s2, mask = read_imgs(item.chip_id, self.dir_features)
+        if self.dir_labels is not None:
+            target = read_tif(self.dir_labels.joinpath(f'{item.chip_id}_agbm.tif'))
+        else:
+            target = item.chip_id
 
-        imgs_s1, imgs_s2, mask = read_imgs(self.multi_temporal, self.temp, fname, self.dir_features)
-        with rasterio.open(self.dir_labels.joinpath(fname)) as lbl:
-            target = lbl.read(1)
-        target = np.nan_to_num(target)
-#         print(imgs_s1.shape, imgs_s2.shape, len(mask), target.shape)#(4, 1, 256, 256) (11, 1, 256, 256) 1 (256, 256)
 
-        # format (B/C, T, H, W)
+        # Reshaping tensors from (T, H, W, C) to (C, T, H, W)
         imgs_s1 = torch.from_numpy(imgs_s1).float()
         imgs_s2 = torch.from_numpy(imgs_s2).float()
         target = torch.from_numpy(target).float()
@@ -107,12 +90,11 @@ def __getitem__(self, index):
 
     @staticmethod
     def get_splits(dataset_config):
-        dataset_train = BioMassters(cfg=dataset_config, split="train")
-        dataset_val = BioMassters(cfg=dataset_config, split="val")
+        dataset_train = BioMassters(cfg=dataset_config, split="test")
+        dataset_val = BioMassters(cfg=dataset_config, split="test")
         dataset_test = BioMassters(cfg=dataset_config, split="test")
-#         print('loaded sample points',len(dataset_train), len(dataset_val), len(dataset_test))
         return dataset_train, dataset_val, dataset_test
 
     @staticmethod
     def download(dataset_config:dict, silent=False):
-        pass
+        pass
diff --git a/engine/evaluator.py b/engine/evaluator.py
@@ -170,11 +170,14 @@ def __init__(self, args, val_loader, exp_dir, device):
 
     @torch.no_grad()
     def evaluate(self, model, model_name='model'):
+        # TODO: Rework this to allow evaluation only runs
+        # Move common parts to parent class, and get loss function from the registry.
         t = time.time()
 
         model.eval()
 
         tag = f'Evaluating {model_name} on {self.split} set'
+        # confusion_matrix = torch.zeros((self.num_classes, self.num_classes), device=self.device)
 
         for batch_idx, data in enumerate(tqdm(self.val_loader, desc=tag)):
             image, target = data['image'], data['target']
@@ -183,8 +186,14 @@ def evaluate(self, model, model_name='model'):
 
             logits = model(image, output_shape=target.shape[-2:]).squeeze(dim=1)
             mse = F.mse_loss(logits, target)
-
-        metrics = {"MSE" : mse.item(), "RMSE" : torch.sqrt(mse).item()}
+            # pred = torch.argmax(logits, dim=1)
+            # valid_mask = target != -1
+            # pred, target = pred[valid_mask], target[valid_mask]
+            # count = torch.bincount((pred * self.num_classes + target), minlength=self.num_classes ** 2)
+            # confusion_matrix += count.view(self.num_classes, self.num_classes)
+
+        # torch.distributed.all_reduce(confusion_matrix, op=torch.distributed.ReduceOp.SUM)
+        metrics = {"MSE" : mse.item, "RMSE" : torch.sqrt(mse).item}
         self.log_metrics(metrics)
 
         used_time = time.time() - t
@@ -195,11 +204,20 @@ def evaluate(self, model, model_name='model'):
     def __call__(self, model, model_name='model'):
         return self.evaluate(model, model_name)
 
+
+    # def compute_metrics(self, confusion_matrix):
+    #     iou = torch.diag(confusion_matrix) / (confusion_matrix.sum(dim=1) + confusion_matrix.sum(dim=0) - torch.diag(confusion_matrix)) * 100
+    #     iou = iou.cpu()
+    #     metrics = {'IoU': [iou[i].item() for i in range(self.num_classes)], 'mIoU': iou.mean().item()}
+
+    #     return metrics
+
     def log_metrics(self, metrics):
         header = "------- MSE and RMSE --------\n"
+        # iou = '\n'.join(c.ljust(self.max_name_len, ' ') + '\t{:>7}'.format('%.3f' % num) for c, num in zip(self.classes, metrics['MSE'])) + '\n'
         mse = "-------------------\n" + 'MSE \t{:>7}'.format('%.3f' % metrics['MSE'])+'\n'
         rmse = "-------------------\n" + 'RMSE \t{:>7}'.format('%.3f' % metrics['RMSE'])
         self.logger.info(header+mse+rmse)
 
         if self.args.use_wandb and self.args.rank == 0:
-            self.wandb.log({"val_MSE": metrics["MSE"], "val_RMSE": metrics["RMSE"]})
+            self.wandb.log({"val_MSE": metrics["MSE"], "val_RMSE": metrics["RMSE"]})
diff --git a/environment.yaml b/environment.yaml
@@ -11,7 +11,6 @@ dependencies:
   - pillow
   - pytorch>=2.1
   - rasterio
-  - scikit-image
   - scikit-learn
   - tensorboard
   - torchaudio
@@ -29,4 +28,4 @@ dependencies:
   - google-cloud-storage
   - omegaconf
   - pydataverse
-  - pytest
+  - pytest