diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8167c410..21f4d46d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,6 +6,7 @@ We welcome all forms of contributions, including but not limited to the followin - Incorporate downstream datasets - Add new decoder heads - Fix typo or bugs +- Add new decoders ### Workflow @@ -16,6 +17,37 @@ We welcome all forms of contributions, including but not limited to the followin Note: For significant modifications or any bugs spotting, please consider opening an issue for discussion beforehand. +## Code structure + +### engines +In engines, basic modules in the training pipeline are defined including data_preprocessor, trainer and evaluator. +1. data_preprocessor selects the bands needed by an encoder and pads unavailable bands with zeros, and different augmentations. +2. trainer supports mixed precision/distributed training and print training stats and metrics in real time. +3. evaluator can be called independently and evaluate a model also in distributed way and compute per class metrics. + +### datasets +1. The implementations are simplified and standardized. +2. Dataset metas are read from configs, including newly added classes (name), ignore_index, and so on. +3. To add (register) a new dataset implementation, use the decorator ```@DATASET_REGISTRY.register()```. + +### foundation_models +1. Support multi-stage output that may be needed by segmentors, specified by output layers in encoder config. +2. All the encoder should work properly. +3. To add (register) a new encoder implementation, use the decorator ```@ENCODER_REGISTRY.register()```. + +### segmentors +1. The UperNet implementation is based on [mmsegmentation](https://github.com/open-mmlab/mmsegmentation/tree/main) +2. To add (register) a new encoder implementation, use the decorator ```@SEGMENTOR_REGISTRY.register()```. +3. So far, we have UPerNet for unitemporal semantic segmentation, UPerNetCD for change detection and MTUPerNet for multitemporal semantic segmentation +4. for multi-temporal, L-TAE and linear projection are supported + +### augmentations +1. All the available augmentations are in ```data_preproessor.py``` +2. To add (register) a new augmentation implementation, use the decorator ```@AUGMENTER_REGISTRY.register()```. + +All the parameters can also be set in the run config file. + +## Adding new features ### Adding a new geospatial foundation model 1. Inside the `foundation_models` folder: diff --git a/configs/datasets/fivebillionpixels.yaml b/configs/datasets/fivebillionpixels.yaml index 315915d8..26336011 100644 --- a/configs/datasets/fivebillionpixels.yaml +++ b/configs/datasets/fivebillionpixels.yaml @@ -2,6 +2,7 @@ dataset_name: FiveBillionPixels root_path: ./data/FiveBillionPixels/cropped download_url: False auto_download: False +use_cmyk: False img_size: 520 multi_temporal: False @@ -37,50 +38,50 @@ classes: - railway station - airport distribution: - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 - - 0.88889 - - 0.11111 + - 0. + - 0.0368 + - 0.0253 + - 0.3567 + - 0.0752 + - 0.0095 + - 0.0694 + - 0.0096 + - 0.0004 + - 0.0055 + - 0.0025 + - 0.0568 + - 0.0548 + - 0.1396 + - 0.0102 + - 0.0129 + - 0.0004 + - 0.0456 + - 0.0447 + - 0.0003 + - 0.0002 + - 0.0383 + - 0.0025 + - 0.0007 + - 0.0011 bands: optical: + - B8 - B4 - B3 - B2 - - B8 data_mean: optical: + - 92.6 - 124.3 - 94.2 - 98. - - 92.6 data_std: optical: + - 44.5 - 51. - 50. - 47.1 - - 44.5 data_min: optical: [0.0000, 0.0000, 0.0000, 0.0000] data_max: diff --git a/datasets/fivebillionpixels.py b/datasets/fivebillionpixels.py index 5b4bb0a7..dedd6ef1 100644 --- a/datasets/fivebillionpixels.py +++ b/datasets/fivebillionpixels.py @@ -34,90 +34,33 @@ def __init__(self, cfg, split, is_train = True): """ super().__init__() self._base_dir = cfg['root_path'] - # print(os.path.join(self._base_dir, split, 'imgs', '*.tif')) - # print(os.path.join(self._base_dir, split, 'labels', '*.tif')) - # print(self._image_dir) - # print(self._label_dir) - # _splits_dir = os.path.join(self._base_dir, 'list') - # self.split = [split] - - # self.args = args - - # self.im_ids = [] - # self.images = [] - # self.labels = [] - - # for splt in self.split: - # with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f: - # lines = f.read().splitlines() - - # if splt == 'train': - # lines = random.sample(lines, len(os.listdir(os.path.join(args.target_dir, args.target)))) - # elif split == 'val': - # lines = random.sample(lines, 500) - # self.root_path = cfg['root_path'] self.data_mean = cfg['data_mean'] self.data_std = cfg['data_std'] self.classes = cfg['classes'] + self.use_cmyk = cfg['use_cmyk'] self.class_num = len(self.classes) self.split = split self.is_train = is_train self._image_dir = sorted(glob(os.path.join(self._base_dir, self.split, 'imgs', '*.tif'))) self._label_dir = sorted(glob(os.path.join(self._base_dir, self.split, 'labels', '*.tif'))) - # print(split) - # print(os.path.join(self._base_dir, self.split, 'imgs', '*.tif')) - # print(os.path.join(self._base_dir, self.split, 'labels', '*.png')) - # print(self._image_dir) - # print((self._label_dir)) - # print(len(self._image_dir)) - # print(len(self._label_dir)) - - # self.split_mapping = {'train': 'training', 'val': 'validation', 'test': 'validation'} - - # self.image_list = sorted(glob(os.path.join(self.root_path, self.split_mapping[self.split], '*merged.tif'))) - # self.target_list = sorted(glob(os.path.join(self.root_path, self.split_mapping[self.split], '*mask.tif'))) - - - # for ii, line in enumerate(lines): - # _image = os.path.join(self._image_dir, line + ".tif") - # _label = os.path.join(self._label_dir, line + ".png") - # assert os.path.isfile(_image) - # assert os.path.isfile(_label) - # self.im_ids.append(line) - # self.images.append(_image) - # self.labels.append(_label) - - # assert (len(self.images) == len(self.labels)) - - # Display stats - # print('Number of images in {}: {:d}'.format(split, len(self.images))) def __len__(self): return len(self._image_dir) def __getitem__(self, index): - # _img, _target = self._make_img_gt_point_pair(index) - # print(index) - # image = Image.open(self._image_dir[index]).convert('CMYK') #check it also on the normalization - # target = Image.open(self._label_dir[index]) - - image = tiff.imread(self._image_dir[index])#.convert('CMYK') #check it also on the normalization - target = tiff.imread(self._label_dir[index]) #, cv2.IMREAD_UNCHANGED) - # image = TF.pil_to_tensor(image) - # target = TF.pil_to_tensor(target).squeeze(0).to(torch.int64) - - image = image.astype(np.float32) # Convert to float32 + if self.use_cmyk: + image = Image.open(self._image_dir[index]).convert('CMYK') + image = TF.pil_to_tensor(image) + else: + image = tiff.imread(self._image_dir[index])#.convert('CMYK') #check it also on the normalization + image = image.astype(np.float32) # Convert to float32 + image = torch.from_numpy(image).permute(2, 0, 1) + + target = tiff.imread(self._label_dir[index]) target = target.astype(np.int64) # Convert to int64 (since it's a mask) - - # Tile the image and target to the fixed size specified in the config - # image, target = self.tile_image_and_mask(image, target, self.img_size) - - image = torch.from_numpy(image).permute(2, 0, 1) target = torch.from_numpy(target).long() - # print(image.shape) - # print(target.shape) output = { 'image': {