diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e5b22c33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +.vscode/ +__pycache__ diff --git a/.travis.yml b/.travis.yml index 527d74e7..12c0a39a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,11 +6,13 @@ install: - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - hash -r - - conda config --set always_yes yes --set changeps1 no + - conda config --set always_yes yes + - conda config --set changeps1 no + - conda config --prepend channels conda-forge + - conda config --prepend channels pytorch - conda update -q conda - conda info -a - - - conda create -q -n ci_env python=3.6 pytorch-cpu torchvision-cpu torchvision ruamel_yaml h5py scikit-image scikit-learn fiona rasterio tqdm -c pytorch + - conda create -q -n ci_env python=3.6 pytorch-cpu torchvision-cpu torchvision ruamel_yaml h5py>=2.10 scikit-image scikit-learn fiona rasterio tqdm - source activate ci_env before_script: - unzip ./data/massachusetts_buildings.zip -d ./data diff --git a/conf/config.canvecaux.yaml b/conf/config.canvecaux.yaml new file mode 100644 index 00000000..d9abb918 --- /dev/null +++ b/conf/config.canvecaux.yaml @@ -0,0 +1,69 @@ +# Deep learning configuration file ------------------------------------------------ +# Five sections : +# 1) Global parameters; those are re-used amongst the next three operations (sampling, training and inference) +# 2) Sampling parameters +# 3) Training parameters +# 4) Inference parameters +# 5) Model parameters + +# Global parameters + +global: + samples_size: 256 + num_classes: 5 + data_path: ./data/kingston_wv2_40cm/images + number_of_bands: 4 + model_name: unet # One of unet, unetsmall, checkpointed_unet or ternausnet + bucket_name: # name of the S3 bucket where data is stored. Leave blank if using local files + task: segmentation # Task to perform. Either segmentation or classification + num_gpus: 1 + aux_vector_file: ./data/canvec_191031_127357_roads.gpkg # https://drive.google.com/file/d/1PCxn2197NiOVKOxGgQIA__w69jAJmjXp + aux_vector_dist_maps: true + meta_map: + scale_data: [0,1] + debug_mode: True + coordconv_convert: + coordconv_scale: + +# Sample parameters; used in images_to_samples.py ------------------- + +sample: + prep_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + samples_dist: 200 + min_annotated_percent: 10 # Min % of non background pixels in stored samples. Default: 0 + mask_reference: False + +# Training parameters; used in train_model.py ---------------------- + +training: + state_dict_path: + output_path: ./data/output + num_trn_samples: + num_val_samples: + num_tst_samples: + batch_size: 8 + num_epochs: 100 + loss_fn: Lovasz # One of CrossEntropy, Lovasz, Focal, OhemCrossEntropy (*Lovasz for segmentation tasks only) + optimizer: adam # One of adam, sgd or adabound + learning_rate: 0.0001 + weight_decay: 0 + step_size: 4 + gamma: 0.9 + class_weights: + batch_metrics: # (int) Metrics computed every (int) batches. If left blank, will not perform metrics. If (int)=1, metrics computed on all batches. + ignore_index: 0 # Specifies a target value that is ignored and does not contribute to the input gradient. Default: None + augmentation: + rotate_limit: 45 + rotate_prob: 0.5 + hflip_prob: 0.5 + dropout: + dropout_prob: + +# Inference parameters; used in inference.py -------- + +inference: + img_dir_or_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + working_folder: ./data/output + state_dict_path: ./data/output/checkpoint.pth.tar + chunk_size: 256 # (int) Size (height and width) of each prediction patch. Default: 512 + overlap: 10 # (int) Percentage of overlap between 2 chunks. Default: 10 diff --git a/conf/config.coordconv.yaml b/conf/config.coordconv.yaml new file mode 100644 index 00000000..105040fd --- /dev/null +++ b/conf/config.coordconv.yaml @@ -0,0 +1,69 @@ +# Deep learning configuration file ------------------------------------------------ +# Five sections : +# 1) Global parameters; those are re-used amongst the next three operations (sampling, training and inference) +# 2) Sampling parameters +# 3) Training parameters +# 4) Inference parameters +# 5) Model parameters + +# Global parameters + +global: + samples_size: 256 + num_classes: 5 + data_path: ./data/kingston_wv2_40cm/images + number_of_bands: 3 + model_name: unet # One of unet, unetsmall, checkpointed_unet or ternausnet + bucket_name: # name of the S3 bucket where data is stored. Leave blank if using local files + task: segmentation # Task to perform. Either segmentation or classification + num_gpus: 1 + aux_vector_file: + aux_vector_dist_maps: + meta_map: + scale_data: [0,1] + debug_mode: True + coordconv_convert: true + coordconv_scale: 0.4 + +# Sample parameters; used in images_to_samples.py ------------------- + +sample: + prep_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + samples_dist: 200 + min_annotated_percent: 10 # Min % of non background pixels in stored samples. Default: 0 + mask_reference: False + +# Training parameters; used in train_model.py ---------------------- + +training: + state_dict_path: + output_path: ./data/output + num_trn_samples: + num_val_samples: + num_tst_samples: + batch_size: 8 + num_epochs: 100 + loss_fn: Lovasz # One of CrossEntropy, Lovasz, Focal, OhemCrossEntropy (*Lovasz for segmentation tasks only) + optimizer: adam # One of adam, sgd or adabound + learning_rate: 0.0001 + weight_decay: 0 + step_size: 4 + gamma: 0.9 + class_weights: + batch_metrics: # (int) Metrics computed every (int) batches. If left blank, will not perform metrics. If (int)=1, metrics computed on all batches. + ignore_index: 0 # Specifies a target value that is ignored and does not contribute to the input gradient. Default: None + augmentation: + rotate_limit: 45 + rotate_prob: 0.5 + hflip_prob: 0.5 + dropout: + dropout_prob: + +# Inference parameters; used in inference.py -------- + +inference: + img_dir_or_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + working_folder: ./data/output + state_dict_path: ./data/output/checkpoint.pth.tar + chunk_size: 256 # (int) Size (height and width) of each prediction patch. Default: 512 + overlap: 10 # (int) Percentage of overlap between 2 chunks. Default: 10 diff --git a/conf/config.metasegm.yaml b/conf/config.metasegm.yaml new file mode 100644 index 00000000..7438ba2b --- /dev/null +++ b/conf/config.metasegm.yaml @@ -0,0 +1,70 @@ +# Deep learning configuration file ------------------------------------------------ +# Five sections : +# 1) Global parameters; those are re-used amongst the next three operations (sampling, training and inference) +# 2) Sampling parameters +# 3) Training parameters +# 4) Inference parameters +# 5) Model parameters + +# Global parameters + +global: + samples_size: 256 + num_classes: 5 + data_path: ./data/kingston_wv2_40cm/images + number_of_bands: 5 + model_name: unet # One of unet, unetsmall, checkpointed_unet or ternausnet + bucket_name: # name of the S3 bucket where data is stored. Leave blank if using local files + task: segmentation # Task to perform. Either segmentation or classification + num_gpus: 1 + aux_vector_file: + aux_vector_dist_maps: + meta_map: + "properties/eo:gsd": "scaled_channel" + scale_data: [0,1] + debug_mode: True + coordconv_convert: + coordconv_scale: + +# Sample parameters; used in images_to_samples.py ------------------- + +sample: + prep_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + samples_dist: 200 + min_annotated_percent: 10 # Min % of non background pixels in stored samples. Default: 0 + mask_reference: False + +# Training parameters; used in train_model.py ---------------------- + +training: + state_dict_path: + output_path: ./data/output + num_trn_samples: + num_val_samples: + num_tst_samples: + batch_size: 8 + num_epochs: 100 + loss_fn: Lovasz # One of CrossEntropy, Lovasz, Focal, OhemCrossEntropy (*Lovasz for segmentation tasks only) + optimizer: adam # One of adam, sgd or adabound + learning_rate: 0.0001 + weight_decay: 0 + step_size: 4 + gamma: 0.9 + class_weights: + batch_metrics: # (int) Metrics computed every (int) batches. If left blank, will not perform metrics. If (int)=1, metrics computed on all batches. + ignore_index: 0 # Specifies a target value that is ignored and does not contribute to the input gradient. Default: None + augmentation: + rotate_limit: 45 + rotate_prob: 0.5 + hflip_prob: 0.5 + dropout: + dropout_prob: + +# Inference parameters; used in inference.py -------- + +inference: + img_dir_or_csv_file: ./data/trn_val_tst_kingston.csv # https://drive.google.com/file/d/1uNizOAToa-R_sik0DvBqDUVwjqYdOALJ + working_folder: ./data/output + state_dict_path: ./data/output/checkpoint.pth.tar + chunk_size: 256 # (int) Size (height and width) of each prediction patch. Default: 512 + overlap: 10 # (int) Percentage of overlap between 2 chunks. Default: 10 diff --git a/conf/config_ci_segmentation_local.yaml b/conf/config_ci_segmentation_local.yaml index 9308162c..e675cc37 100644 --- a/conf/config_ci_segmentation_local.yaml +++ b/conf/config_ci_segmentation_local.yaml @@ -10,7 +10,7 @@ global: samples_size: 256 - num_classes: 2 + num_classes: 1 # will automatically create a 'background' class data_path: ./data number_of_bands: 3 model_name: checkpointed_unet # One of unet, unetsmall, checkpointed_unet, ternausnet, fcn_resnet101, deeplabv3_resnet101 @@ -47,7 +47,7 @@ training: dropout_prob: False # (float) Set dropout probability, e.g. 0.5 class_weights: [1.0, 2.0] batch_metrics: 1 - ignore_index: 0 # Specifies a target value that is ignored and does not contribute to the input gradient + ignore_index: # Specifies a target value that is ignored and does not contribute to the input gradient augmentation: rotate_limit: 45 rotate_prob: 0.5 diff --git a/data/images_to_samples_ci_csv.csv b/data/images_to_samples_ci_csv.csv index 49db5fd3..41250ff4 100644 --- a/data/images_to_samples_ci_csv.csv +++ b/data/images_to_samples_ci_csv.csv @@ -1,4 +1,4 @@ -./data/22978945_15.tif,./data/massachusetts_buildings.gpkg,class,trn -./data/23429155_15.tif,./data/massachusetts_buildings.gpkg,class,val -./data/23429155_15.tif,./data/massachusetts_buildings.gpkg,class,val -./data/23429155_15.tif,./data/massachusetts_buildings.gpkg,class,tst +./data/22978945_15.tif,,./data/massachusetts_buildings.gpkg,properties/class,trn +./data/23429155_15.tif,,./data/massachusetts_buildings.gpkg,properties/class,val +./data/23429155_15.tif,,./data/massachusetts_buildings.gpkg,properties/class,val +./data/23429155_15.tif,,./data/massachusetts_buildings.gpkg,properties/class,tst diff --git a/data/inference_classif_ci_csv.csv b/data/inference_classif_ci_csv.csv index 164b8fe9..192f62c1 100644 --- a/data/inference_classif_ci_csv.csv +++ b/data/inference_classif_ci_csv.csv @@ -1,3 +1,3 @@ -./data/classification/135.tif -./data/classification/408.tif -./data/classification/2533.tif +./data/classification/135.tif, +./data/classification/408.tif, +./data/classification/2533.tif, diff --git a/data/inference_sem_seg_ci_csv.csv b/data/inference_sem_seg_ci_csv.csv index b7946af8..9b0e8316 100644 --- a/data/inference_sem_seg_ci_csv.csv +++ b/data/inference_sem_seg_ci_csv.csv @@ -1,2 +1,2 @@ -./data/22978945_15.tif -./data/23429155_15.tif +./data/22978945_15.tif, +./data/23429155_15.tif, diff --git a/images_to_samples.py b/images_to_samples.py index 88f10378..cbe006e8 100644 --- a/images_to_samples.py +++ b/images_to_samples.py @@ -1,25 +1,23 @@ import argparse import os from pathlib import Path - import numpy as np import warnings -import fiona import rasterio -from rasterio import features import time from tqdm import tqdm from collections import OrderedDict -from utils.CreateDataset import create_files_and_datasets -from utils.utils import read_parameters, assert_band_number, image_reader_as_array, \ - create_or_empty_folder, validate_num_classes, read_csv -from utils.preprocess import minmax_scale +from utils.CreateDataset import create_files_and_datasets, MetaSegmentationDataset +from utils.utils import ( + read_parameters, image_reader_as_array, vector_to_raster, + create_or_empty_folder, validate_num_classes, read_csv, get_key_def +) try: import boto3 except ModuleNotFoundError: - warnings.warn('The boto3 library counldn\'t be imported. Ignore if not using AWS s3 buckets', ImportWarning) + warnings.warn("The boto3 library couldn't be imported. Ignore if not using AWS s3 buckets", ImportWarning) pass @@ -51,18 +49,15 @@ def mask_image(arrayA, arrayB): return ma_array -def resize_datasets(hdf5_file): - """Function to add one entry to both the datasets""" - - n = hdf5_file['sat_img'].shape[0] - - new_size = n + 1 - hdf5_file['sat_img'].resize(new_size, axis=0) - hdf5_file['map_img'].resize(new_size, axis=0) +def append_to_dataset(dataset, sample): + old_size = dataset.shape[0] # this function always appends samples on the first axis + dataset.resize(old_size + 1, axis=0) + dataset[old_size, ...] = sample + return old_size # the index to the newly added sample, or the previous size of the dataset def samples_preparation(in_img_array, label_array, sample_size, dist_samples, samples_count, num_classes, samples_file, - dataset, min_annotated_percent=0): + dataset, min_annotated_percent=0, image_metadata=None): """ Extract and write samples from input image and reference image :param in_img_array: numpy array of the input image @@ -74,6 +69,7 @@ def samples_preparation(in_img_array, label_array, sample_size, dist_samples, sa :param samples_file: (hdf5 dataset) hdfs file where samples will be written :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst' :param min_annotated_percent: (int) Minimum % of non background pixels in sample, in order to store it + :param image_metadata: (Ruamel) list of optionnal metadata specified in the associated metadata file :return: updated samples count and number of classes. """ @@ -90,6 +86,12 @@ def samples_preparation(in_img_array, label_array, sample_size, dist_samples, sa else: raise ValueError(f"Dataset value must be trn or val. Provided value is {dataset}") + metadata_idx = -1 + if image_metadata: + # there should be one set of metadata per raster + # ...all samples created by tiling below will point to that metadata by index + metadata_idx = append_to_dataset(samples_file["metadata"], repr(image_metadata)) + # half tile padding half_tile = int(sample_size / 2) pad_in_img_array = np.pad(in_img_array, ((half_tile, half_tile), (half_tile, half_tile), (0, 0)), @@ -101,16 +103,16 @@ def samples_preparation(in_img_array, label_array, sample_size, dist_samples, sa data = (pad_in_img_array[row:row + sample_size, column:column + sample_size, :]) target = np.squeeze(pad_label_array[row:row + sample_size, column:column + sample_size, :], axis=2) - target_class_num = target.max() u, count = np.unique(target, return_counts=True) - target_background_percent = count[0] / np.sum(count) * 100 + target_background_percent = count[0] / np.sum(count) * 100 if 0 in u else 0 - if target_background_percent >= min_annotated_percent: - resize_datasets(samples_file) - samples_file["sat_img"][idx_samples, ...] = data - samples_file["map_img"][idx_samples, ...] = target + if target_background_percent < 100 - min_annotated_percent: + append_to_dataset(samples_file["sat_img"], data) + append_to_dataset(samples_file["map_img"], target) + append_to_dataset(samples_file["meta_idx"], metadata_idx) idx_samples += 1 + target_class_num = np.max(u) if num_classes < target_class_num: num_classes = target_class_num @@ -125,47 +127,19 @@ def samples_preparation(in_img_array, label_array, sample_size, dist_samples, sa return samples_count, num_classes -def vector_to_raster(vector_file, input_image, attribute_name): - """ - Function to rasterize vector data. - :param vector_file: (str) Path and name of reference GeoPackage - :param input_image: (str) Path and name of the input raster image - :param attribute_name: (str) Attribute containing the pixel value to write - :return: numpy array of the burned image - """ - - # Extract vector features to burn in the raster image - with fiona.open(vector_file, 'r') as src: - lst_vector = [vector for vector in src] - - # Sort feature in order to priorize the burning in the raster image (ex: vegetation before roads...) - lst_vector.sort(key=lambda vector: vector['properties'][attribute_name]) - lst_vector_tuple = [(vector['geometry'], int(vector['properties'][attribute_name])) for vector in lst_vector] - - # TODO: check a vector entity is empty (e.g. if a vector['type'] in lst_vector is None.) - # Open input raster image to have access to number of rows, column, crs... - with rasterio.open(input_image, 'r') as src: - burned_raster = rasterio.features.rasterize((vector_tuple for vector_tuple in lst_vector_tuple), - fill=0, - out_shape=src.shape, - transform=src.transform, - dtype=np.uint8) - - return burned_raster - - def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ - gpkg_file = [] + bucket_file_cache = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] Path.mkdir(Path(data_path), exist_ok=True) csv_file = params['sample']['prep_csv_file'] + final_samples_folder = None if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) @@ -197,34 +171,39 @@ def main(params): if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] - if info['gpkg'] not in gpkg_file: - gpkg_file.append(info['gpkg']) + if info['gpkg'] not in bucket_file_cache: + bucket_file_cache.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] - - if os.path.isfile(info['tif']): - assert_band_number(info['tif'], params['global']['number_of_bands']) - else: - raise IOError(f'Could not locate "{info["tif"]}". Make sure file exists in this directory.') + if info['meta']: + if info['meta'] not in bucket_file_cache: + bucket_file_cache.append(info['meta']) + bucket.download_file(info['meta'], info['meta'].split('/')[-1]) + info['meta'] = info['meta'].split('/')[-1] _tqdm.set_postfix(OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size'])) - # Read the input raster image - np_input_image = image_reader_as_array(info['tif']) - # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) - # Burn vector file in a raster file - np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) - - # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre - # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. - if params['global']['scale_data']: - sc_min, sc_max = params['global']['scale_data'] - np_input_image = minmax_scale(np_input_image, - orig_range=(np.min(np_input_image), np.max(np_input_image)), - scale_range=(sc_min,sc_max)) + assert os.path.isfile(info['tif']), f"could not open raster file at {info['tif']}" + with rasterio.open(info['tif'], 'r') as raster: + + # Burn vector file in a raster file + np_label_raster = vector_to_raster(vector_file=info['gpkg'], + input_image=raster, + attribute_name=info['attribute_name'], + fill=get_key_def('ignore_idx', get_key_def('training', params, {}), 0)) + + # Read the input raster image + np_input_image = image_reader_as_array(input_image=raster, + scale=get_key_def('scale_data', params['global'], None), + aux_vector_file=get_key_def('aux_vector_file', params['global'], None), + aux_vector_attrib=get_key_def('aux_vector_attrib', params['global'], None), + aux_vector_ids=get_key_def('aux_vector_ids', params['global'], None), + aux_vector_dist_maps=get_key_def('aux_vector_dist_maps', params['global'], True), + aux_vector_dist_log=get_key_def('aux_vector_dist_log', params['global'], True), + aux_vector_scale=get_key_def('aux_vector_scale', params['global'], None)) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: @@ -239,6 +218,15 @@ def main(params): else: raise ValueError(f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}") + meta_map, metadata = get_key_def("meta_map", params["global"], {}), None + if info['meta'] is not None and isinstance(info['meta'], str) and os.path.isfile(info['meta']): + metadata = read_parameters(info['meta']) + + input_band_count = np_input_image.shape[2] + MetaSegmentationDataset.get_meta_layer_count(meta_map) + assert input_band_count == params['global']['number_of_bands'], \ + f"The number of bands in the input image ({input_band_count}) and the parameter" \ + f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical" + np_label_raster = np.reshape(np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation(np_input_image, np_label_raster, @@ -248,7 +236,8 @@ def main(params): number_classes, out_file, info['dataset'], - params['sample']['min_annotated_percent']) + params['sample']['min_annotated_percent'], + metadata) _tqdm.set_postfix(OrderedDict(number_samples=number_samples)) out_file.flush() @@ -259,7 +248,7 @@ def main(params): print("Number of samples created: ", number_samples) - if bucket_name: + if bucket_name and final_samples_folder: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') diff --git a/inference.py b/inference.py index c3cb6370..910306ac 100644 --- a/inference.py +++ b/inference.py @@ -16,10 +16,9 @@ from pathlib import Path from models.model_choice import net -from utils.utils import read_parameters, assert_band_number, load_from_checkpoint, \ - image_reader_as_array, read_csv, get_device_ids, gpu_stats -from utils.preprocess import minmax_scale - +from utils.utils import read_parameters, load_from_checkpoint, image_reader_as_array, \ + read_csv, get_device_ids, gpu_stats, get_key_def +from utils.CreateDataset import MetaSegmentationDataset try: import boto3 except ModuleNotFoundError: @@ -49,7 +48,7 @@ def create_new_raster_from_base(input_raster, output_raster, write_array): dst.write(write_array[:, :], 1) -def sem_seg_inference(model, nd_array, overlay, chunk_size, num_classes, device): +def sem_seg_inference(model, nd_array, overlay, chunk_size, num_classes, device, meta_map=None, metadata=None): """Inference on images using semantic segmentation Args: model: model to use for inference @@ -90,6 +89,8 @@ def sem_seg_inference(model, nd_array, overlay, chunk_size, num_classes, device) col_end = col_start + chunk_size chunk_input = padded_array[row_start:row_end, col_start:col_end, :] + if meta_map: + chunk_input = MetaSegmentationDataset.append_meta_layers(chunk_input, meta_map, metadata) inputs = torch.from_numpy(np.float32(np.transpose(chunk_input, (2, 0, 1)))) inputs.unsqueeze_(0) @@ -120,12 +121,13 @@ def sem_seg_inference(model, nd_array, overlay, chunk_size, num_classes, device) raise IOError(f"Error classifying image : Image shape of {len(nd_array.shape)} is not recognized") -def classifier(params, img_list, model): +def classifier(params, img_list, model, device): """ Classify images by class :param params: :param img_list: :param model: + :param device: :return: """ weights_file_name = params['inference']['state_dict_path'] @@ -163,8 +165,7 @@ def classifier(params, img_list, model): img = to_tensor(img) img = img.unsqueeze(0) with torch.no_grad(): - if torch.cuda.is_available(): - img = img.cuda() + img = img.to(device) outputs = model(img) _, predicted = torch.max(outputs, 1) @@ -219,6 +220,7 @@ def main(params): print(f'Inferences will be saved to: {working_folder}') bucket = None + bucket_file_cache = [] bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) @@ -258,7 +260,7 @@ def main(params): assert len(list_img) >= 0, f'No .tif files found in {img_dir_or_csv}' if params['global']['task'] == 'classification': - classifier(params, list_img, model) + classifier(params, list_img, model, device) elif params['global']['task'] == 'segmentation': if bucket: @@ -269,6 +271,10 @@ def main(params): chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] + if num_classes == 1: + # assume background is implicitly needed (makes no sense to predict with one class otherwise) + # this will trigger some warnings elsewhere, but should succeed nonetheless + num_classes = 2 with tqdm(list_img, desc='image list', position=0) as _tqdm: for img in _tqdm: img_name = os.path.basename(img['tif']) @@ -276,30 +282,46 @@ def main(params): local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" + if img['meta']: + if img['meta'] not in bucket_file_cache: + bucket_file_cache.append(img['meta']) + bucket.download_file(img['meta'], img['meta'].split('/')[-1]) + img['meta'] = img['meta'].split('/')[-1] else: local_img = img['tif'] inference_image = os.path.join(params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") - assert_band_number(local_img, params['global']['number_of_bands']) + assert os.path.isfile(local_img), f"could not open raster file at {local_img}" + with rasterio.open(local_img, 'r') as raster: + + np_input_image = image_reader_as_array(input_image=raster, + scale=get_key_def('scale_data', params['global'], None), + aux_vector_file=get_key_def('aux_vector_file', params['global'], None), + aux_vector_attrib=get_key_def('aux_vector_attrib', params['global'], None), + aux_vector_ids=get_key_def('aux_vector_ids', params['global'], None), + aux_vector_dist_maps=get_key_def('aux_vector_dist_maps', params['global'], True), + aux_vector_scale=get_key_def('aux_vector_scale', params['global'], None)) - nd_array_tif = image_reader_as_array(local_img) - assert(len(np.unique(nd_array_tif))>1), (f'Image "{img_name}" only contains {np.unique(nd_array_tif)} value.') + meta_map, metadata = get_key_def("meta_map", params["global"], {}), None + if meta_map: + assert img['meta'] is not None and isinstance(img['meta'], str) and os.path.isfile(img['meta']), \ + "global configuration requested metadata mapping onto loaded samples, but raster did not have available metadata" + metadata = read_parameters(img['meta']) - # See: http://cs231n.github.io/neural-networks-2/#datapre. e.g. Scale arrays from [0,255] to [0,1] - scale = params['global']['scale_data'] - if scale: - sc_min, sc_max = params['global']['scale_data'] - nd_array_tif = minmax_scale(nd_array_tif, - orig_range=(np.min(nd_array_tif), np.max(nd_array_tif)), - scale_range=(sc_min,sc_max)) if debug: - _tqdm.set_postfix(OrderedDict(image_name=img_name, image_shape=nd_array_tif.shape, scale=scale)) + _tqdm.set_postfix(OrderedDict(image_name=img_name, image_shape=np_input_image.shape)) + + input_band_count = np_input_image.shape[2] + MetaSegmentationDataset.get_meta_layer_count(meta_map) + assert input_band_count == params['global']['number_of_bands'], \ + f"The number of bands in the input image ({input_band_count}) and the parameter" \ + f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical" + + sem_seg_results = sem_seg_inference(model, np_input_image, nbr_pix_overlap, chunk_size, num_classes, device, meta_map, metadata) - sem_seg_results = sem_seg_inference(model, nd_array_tif, nbr_pix_overlap, chunk_size, num_classes, device) if debug and len(np.unique(sem_seg_results))==1: - print(f'Something is wrong. Inference contains only "{np.unique(sem_seg_results)} value. Make sure ' - f'"scale_data" parameter is coherent with parameters used for training model used in inference.') + print(f'Something is wrong. Inference contains only one value. Make sure data scale is coherent with training domain values.') + create_new_raster_from_base(local_img, inference_image, sem_seg_results) tqdm.write(f"Semantic segmentation of image {img_name} completed") if bucket: diff --git a/models/coordconv.py b/models/coordconv.py new file mode 100644 index 00000000..a3968ff8 --- /dev/null +++ b/models/coordconv.py @@ -0,0 +1,124 @@ +import collections + +import torch +import torch.nn + + +def get_coords_map(height, width, centered=True, normalized=True, noise=None, dtype=torch.float32): + """Returns a HxW intrinsic coordinates map tensor (shape=2xHxW).""" + x = torch.arange(width, dtype=dtype).unsqueeze(0) + y = torch.arange(height, dtype=dtype).unsqueeze(0) + if centered: + x -= (width - 1) // 2 + y -= (height - 1) // 2 + if normalized: + x /= width - 1 + y /= height - 1 + x = x.repeat(height, 1) + y = y.t().repeat(1, width) + if noise is not None: + assert isinstance(noise, float) and noise >= 0, "invalid noise stddev value" + x = torch.normal(mean=x, std=noise) + y = torch.normal(mean=y, std=noise) + return torch.stack([x, y]) + + +class AddCoords(torch.nn.Module): + """Creates a torch-compatible layer that adds intrinsic coordinate layers to input tensors.""" + def __init__(self, centered=True, normalized=True, noise=None, radius_channel=False, scale=None): + super().__init__() + self.centered = centered + self.normalized = normalized + self.noise = noise + self.radius_channel = radius_channel + self.scale = None + + def forward(self, in_tensor): + batch_size, channels, height, width = in_tensor.shape + coords_map = get_coords_map(height, width, self.centered, self.normalized, self.noise) + if self.scale is not None: + coords_map *= self.scale + if self.radius_channel: + middle_slice = coords_map[:, (height - 1) // 2, (width - 1) // 2] + radius = torch.sqrt(torch.pow(coords_map[0, :, :] - middle_slice[0], 2) + + torch.pow(coords_map[1, :, :] - middle_slice[1], 2)) + coords_map = torch.cat([coords_map, radius.unsqueeze(0)], dim=0) + coords_map = coords_map.repeat(batch_size, 1, 1, 1) + dev = in_tensor.device + out = torch.cat([in_tensor, coords_map.to(dev)], dim=1) + return out + + +class CoordConv2d(torch.nn.Module): + """CoordConv-equivalent of torch's default Conv2d model layer. + + See Liu et al. (2018), "An intriguing failing of convolutional neural networks..." + for more information (https://arxiv.org/abs/1807.03247). + """ + + def __init__(self, in_channels, *args, centered=True, normalized=True, + noise=None, radius_channel=False, scale=None, **kwargs): + super().__init__() + self.addcoord = AddCoords(centered=centered, normalized=normalized, noise=noise, + radius_channel=radius_channel, scale=scale) + extra_ch = 3 if radius_channel else 2 + self.conv = torch.nn.Conv2d(in_channels + extra_ch, *args, **kwargs) + + def forward(self, in_tensor): + out = self.addcoord(in_tensor) + out = self.conv(out) + return out + + +class CoordConvTranspose2d(torch.nn.Module): + """CoordConv-equivalent of torch's default ConvTranspose2d model layer. + + See Liu et al. (2018), "An intriguing failing of convolutional neural networks..." + for more information (https://arxiv.org/abs/1807.03247). + """ + + def __init__(self, in_channels, *args, centered=True, normalized=True, + noise=None, radius_channel=False, scale=None, **kwargs): + super().__init__() + self.addcoord = AddCoords(centered=centered, normalized=normalized, noise=noise, + radius_channel=radius_channel, scale=scale) + extra_ch = 3 if radius_channel else 2 + self.convT = torch.nn.ConvTranspose2d(in_channels + extra_ch, *args, **kwargs) + + def forward(self, in_tensor): + out = self.addcoord(in_tensor) + out = self.convT(out) + return out + + +def swap_coordconv_layers(module, centered=True, normalized=True, + noise=None, radius_channel=False, scale=None): + """Modifies the provided module by swapping Conv2d layers for CoordConv-equivalent layers.""" + # note: this is a pretty 'dumb' way to add coord maps to a model, as it will add them everywhere, even + # in a potential output (1x1) conv layer; manually designing the network with these would be more adequate! + coordconv_params = {"centered": centered, "normalized": normalized, "noise": noise, + "radius_channel": radius_channel, "scale": scale} + if isinstance(module, torch.nn.Conv2d): + return CoordConv2d(in_channels=module.in_channels, out_channels=module.out_channels, + kernel_size=module.kernel_size, stride=module.stride, padding=module.padding, + dilation=module.dilation, groups=module.groups, bias=module.bias is not None, + padding_mode=module.padding_mode, **coordconv_params) + elif isinstance(module, torch.nn.ConvTranspose2d): + return CoordConvTranspose2d(in_channels=module.in_channels, out_channels=module.out_channels, + kernel_size=module.kernel_size, stride=module.stride, + padding=module.padding, output_padding=module.output_padding, + groups=module.groups, bias=module.bias is not None, + dilation=module.dilation, padding_mode=module.padding_mode, + **coordconv_params) + elif isinstance(module, torch.nn.Sequential): + return torch.nn.Sequential(*[swap_coordconv_layers(m, **coordconv_params) for m in module]) + elif isinstance(module, collections.OrderedDict): + for mname, m in module.items(): + module[mname] = swap_coordconv_layers(m, **coordconv_params) + elif isinstance(module, torch.nn.Module): + for attrib, m in module.__dict__.items(): + if isinstance(m, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)): + setattr(module, attrib, swap_coordconv_layers(m, **coordconv_params)) + elif isinstance(m, (torch.nn.Module, collections.OrderedDict)): + setattr(module, attrib, swap_coordconv_layers(m, **coordconv_params)) + return module diff --git a/models/model_choice.py b/models/model_choice.py index d5887af7..9738c27c 100644 --- a/models/model_choice.py +++ b/models/model_choice.py @@ -1,8 +1,9 @@ import os import torch +import warnings import torchvision.models as models -from models import TernausNet, unet, checkpointed_unet, inception -from utils.utils import chop_layer +from models import TernausNet, unet, checkpointed_unet, inception, coordconv +from utils.utils import chop_layer, get_key_def def load_checkpoint(filename): @@ -30,6 +31,10 @@ def net(net_params, inference=False): """Define the neural net""" model_name = net_params['global']['model_name'].lower() num_classes = net_params['global']['num_classes'] + if num_classes == 1: + warnings.warn("config specified that number of classes is 1, but model will be instantiated" + " with a minimum of two regardless (will assume that 'background' exists)") + num_classes = 2 msg = f'Number of bands specified incompatible with this model. Requires 3 band data.' state_dict_path = '' if model_name == 'unetsmall': @@ -76,6 +81,18 @@ def net(net_params, inference=False): model.load_state_dict(chopped_dict, strict=False) else: raise ValueError(f'The model name {model_name} in the config.yaml is not defined.') + + coordconv_convert = get_key_def('coordconv_convert', net_params['global'], False) + if coordconv_convert: + centered = get_key_def('coordconv_centered', net_params['global'], True) + normalized = get_key_def('coordconv_normalized', net_params['global'], True) + noise = get_key_def('coordconv_noise', net_params['global'], None) + radius_channel = get_key_def('coordconv_radius_channel', net_params['global'], False) + scale = get_key_def('coordconv_scale', net_params['global'], 1.0) + # note: this operation will not attempt to preserve already-loaded model parameters! + model = coordconv.swap_coordconv_layers(model, centered=centered, normalized=normalized, noise=noise, + radius_channel=radius_channel, scale=scale) + if net_params['training']['state_dict_path']: state_dict_path = net_params['training']['state_dict_path'] checkpoint = load_checkpoint(state_dict_path) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..6a8570a4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +torch +numpy +ruamel_yaml +fiona +nvidia-ml-py3 +boto3 +rasterio +tqdm diff --git a/train_model.py b/train_model.py index 6cf7d21e..64cdf585 100644 --- a/train_model.py +++ b/train_model.py @@ -8,6 +8,7 @@ import h5py import datetime import warnings +import functools from tqdm import tqdm from collections import OrderedDict @@ -31,7 +32,7 @@ from utils.metrics import report_classification, create_metrics_dict from models.model_choice import net, load_checkpoint from losses import MultiClassCriterion -from utils.utils import read_parameters, load_from_checkpoint, list_s3_subfolders, get_device_ids, gpu_stats +from utils.utils import read_parameters, load_from_checkpoint, list_s3_subfolders, get_device_ids, gpu_stats, get_key_def try: import boto3 @@ -46,7 +47,9 @@ def verify_weights(num_classes, weights): num_classes: number of classes defined in the configuration file weights: weights defined in the configuration file """ - if num_classes != len(weights): + if num_classes == 1 and len(weights) == 2: + warnings.warn("got two class weights for single class defined in configuration file; will assume index 0 = background") + elif num_classes != len(weights): raise ValueError('The number of class weights in the configuration file is different than the number of classes') @@ -145,14 +148,14 @@ def download_s3_files(bucket_name, data_path, output_path, num_classes, task): return bucket, bucket_output_path, local_output_path, data_path -def create_dataloader(data_path, num_samples, batch_size, task, num_devices): +def create_dataloader(data_path, batch_size, task, num_devices, params): """ Function to create dataloader objects for training, validation and test datasets. :param data_path: (str) path to the samples folder - :param num_samples: (dict) number of samples for training, validation and test :param batch_size: (int) batch size :param task: (str) classification or segmentation :param num_devices: (int) number of GPUs used + :param params: (dict) Parameters found in the yaml config file. :return: trn_dataloader, val_dataloader, tst_dataloader """ if task == 'classification': @@ -171,12 +174,26 @@ def create_dataloader(data_path, num_samples, batch_size, task, num_devices): [transforms.Resize(299), transforms.ToTensor()]), loader=loader) elif task == 'segmentation': - trn_dataset = CreateDataset.SegmentationDataset(os.path.join(data_path, "samples"), num_samples['trn'], "trn", - transform=aug.compose_transforms(params, 'trn')) - val_dataset = CreateDataset.SegmentationDataset(os.path.join(data_path, "samples"), num_samples['val'], "val", - transform=aug.compose_transforms(params, 'tst')) - tst_dataset = CreateDataset.SegmentationDataset(os.path.join(data_path, "samples"), num_samples['tst'], "tst", - transform=aug.compose_transforms(params, 'tst')) + num_samples = get_num_samples(data_path=data_path, params=params) + print(f"Number of samples : {num_samples}") + meta_map = get_key_def("meta_map", params["global"], {}) + if not meta_map: + dataset_constr = CreateDataset.SegmentationDataset + else: + dataset_constr = functools.partial(CreateDataset.MetaSegmentationDataset, meta_map=meta_map) + dontcare = get_key_def("ignore_index", params["training"], None) + if dontcare == 0: + warnings.warn("The 'dontcare' value (or 'ignore_index') used in the loss function cannot be zero;" + " all valid class indices should be consecutive, and start at 0. The 'dontcare' value" + " will be remapped to -1 while loading the dataset, and inside the config from now on.") + params["training"]["ignore_index"] = -1 + datasets = [] + for subset in ["trn", "val", "tst"]: + datasets.append(dataset_constr(os.path.join(data_path, "samples"), subset, + max_sample_count=num_samples[subset], + dontcare=dontcare, + transform=aug.compose_transforms(params, subset))) + trn_dataset, val_dataset, tst_dataset = datasets else: raise ValueError(f"The task should be either classification or segmentation. The provided value is {task}") @@ -228,38 +245,25 @@ def set_hyperparameters(params, model, checkpoint): """ # set mandatory hyperparameters values with those in config file if they exist lr = params['training']['learning_rate'] + assert lr is not None and lr > 0, "missing mandatory learning rate parameter" weight_decay = params['training']['weight_decay'] + assert weight_decay is not None and weight_decay >= 0, "missing mandatory weight decay parameter" step_size = params['training']['step_size'] + assert step_size is not None and step_size > 0, "missing mandatory step size parameter" gamma = params['training']['gamma'] - num_devices = params['global']['num_gpus'] - msg = 'Missing mandatory hyperparameter in config file. Make sure learning_rate, weight_decay, step_size, gamma and num_devices are set.' - assert (lr and weight_decay and step_size and gamma and num_devices) is not None, msg + assert gamma is not None and gamma >= 0, "missing mandatory gamma parameter" # optional hyperparameters. Set to None if not in config file class_weights = torch.tensor(params['training']['class_weights']) if params['training']['class_weights'] else None if params['training']['class_weights']: verify_weights(params['global']['num_classes'], class_weights) - ignore_index = params['training']['ignore_index'] if params['training']['ignore_index'] else -100 + ignore_index = -100 + if params['training']['ignore_index'] is not None: + ignore_index = params['training']['ignore_index'] # Loss function criterion = MultiClassCriterion(loss_type=params['training']['loss_fn'], ignore_index=ignore_index, weight=class_weights) - # list of GPU devices that are available and unused. If no GPUs, returns empty list - lst_device_ids = get_device_ids(num_devices) if torch.cuda.is_available() else [] - num_devices = len(lst_device_ids) if lst_device_ids else 0 - device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda.is_available() and lst_device_ids else 'cpu') - - if num_devices == 1: - print(f"Using Cuda device {lst_device_ids[0]}") - elif num_devices > 1: - print(f"Using data parallel on devices {str(lst_device_ids)[1:-1]}") - model = nn.DataParallel(model, device_ids=lst_device_ids) # adds prefix 'module.' to state_dict keys - else: - warnings.warn(f"No Cuda device available. This process will only run on CPU") - - criterion = criterion.to(device) - model = model.to(device) - # Optimizer opt_fn = params['training']['optimizer'] optimizer = create_optimizer(params=model.parameters(), mode=opt_fn, base_lr=lr, weight_decay=weight_decay) @@ -268,7 +272,7 @@ def set_hyperparameters(params, model, checkpoint): if checkpoint: model, optimizer = load_from_checkpoint(checkpoint, model, optimizer=optimizer) - return model, criterion, optimizer, lr_scheduler, device, num_devices + return model, criterion, optimizer, lr_scheduler def main(params, config_path): @@ -295,6 +299,11 @@ def main(params, config_path): num_classes = params['global']['num_classes'] batch_size = params['training']['batch_size'] + if num_classes == 1: + # assume background is implicitly needed (makes no sense to train with one class otherwise) + # this will trigger some warnings elsewhere, but should succeed nonetheless + num_classes = 2 + if bucket_name: bucket, bucket_output_path, output_path, data_path = download_s3_files(bucket_name=bucket_name, data_path=data_path, @@ -316,15 +325,30 @@ def main(params, config_path): val_log = InformationLogger(output_path, 'val') tst_log = InformationLogger(output_path, 'tst') - model, criterion, optimizer, lr_scheduler, device, num_devices = set_hyperparameters(params, model, checkpoint) + num_devices = params['global']['num_gpus'] + assert num_devices is not None and num_devices >= 0, "missing mandatory num gpus parameter" + # list of GPU devices that are available and unused. If no GPUs, returns empty list + lst_device_ids = get_device_ids(num_devices) if torch.cuda.is_available() else [] + num_devices = len(lst_device_ids) if lst_device_ids else 0 + device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda.is_available() and lst_device_ids else 'cpu') + if num_devices == 1: + print(f"Using Cuda device {lst_device_ids[0]}") + elif num_devices > 1: + print(f"Using data parallel on devices {str(lst_device_ids)[1:-1]}") + model = nn.DataParallel(model, device_ids=lst_device_ids) # adds prefix 'module.' to state_dict keys + else: + warnings.warn(f"No Cuda device available. This process will only run on CPU") - num_samples = get_num_samples(data_path=data_path, params=params) - print(f"Number of samples : {num_samples}") trn_dataloader, val_dataloader, tst_dataloader = create_dataloader(data_path=data_path, - num_samples=num_samples, batch_size=batch_size, task=task, - num_devices=num_devices) + num_devices=num_devices, + params=params) + + model, criterion, optimizer, lr_scheduler = set_hyperparameters(params, model, checkpoint) + + criterion = criterion.to(device) + model = model.to(device) filename = os.path.join(output_path, 'checkpoint.pth.tar') @@ -530,7 +554,7 @@ def evaluation(eval_loader, model, criterion, num_classes, batch_size, task, ep_ _tqdm.set_postfix(OrderedDict(dataset=dataset, loss=f'{eval_metrics["loss"].avg:.4f}')) - if debug and torch.cuda.is_available(): + if debug and torch.cuda.is_available() and device.type != "cpu": res, mem = gpu_stats(device=device.index) _tqdm.set_postfix(OrderedDict(device=device, gpu_perc=f'{res.gpu} %', gpu_RAM=f'{mem.used/(1024**2):.0f}/{mem.total/(1024**2):.0f} MiB')) diff --git a/utils/CreateDataset.py b/utils/CreateDataset.py index 650e980a..fcb1eea9 100644 --- a/utils/CreateDataset.py +++ b/utils/CreateDataset.py @@ -1,8 +1,12 @@ +import collections import os import h5py from torch.utils.data import Dataset import numpy as np +import models.coordconv +from utils.utils import get_key_def, get_key_recursive + def create_files_and_datasets(params, samples_folder): """ @@ -13,51 +17,128 @@ def create_files_and_datasets(params, samples_folder): """ samples_size = params['global']['samples_size'] number_of_bands = params['global']['number_of_bands'] - - trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w") - val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w") - tst_hdf5 = h5py.File(os.path.join(samples_folder, "tst_samples.hdf5"), "w") - - trn_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, - maxshape=(None, samples_size, samples_size, number_of_bands)) - trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, - maxshape=(None, samples_size, samples_size)) - - val_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, - maxshape=(None, samples_size, samples_size, number_of_bands)) - val_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, - maxshape=(None, samples_size, samples_size)) - - tst_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, - maxshape=(None, samples_size, samples_size, number_of_bands)) - tst_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, - maxshape=(None, samples_size, samples_size)) - return trn_hdf5, val_hdf5, tst_hdf5 + meta_map = get_key_def('meta_map', params['global'], {}) + real_num_bands = number_of_bands - MetaSegmentationDataset.get_meta_layer_count(meta_map) + assert real_num_bands > 0, "invalid number of bands when accounting for meta layers" + hdf5_files = [] + for subset in ["trn", "val", "tst"]: + hdf5_file = h5py.File(os.path.join(samples_folder, f"{subset}_samples.hdf5"), "w") + hdf5_file.create_dataset("sat_img", (0, samples_size, samples_size, real_num_bands), np.float32, + maxshape=(None, samples_size, samples_size, real_num_bands)) + hdf5_file.create_dataset("map_img", (0, samples_size, samples_size), np.int16, + maxshape=(None, samples_size, samples_size)) + hdf5_file.create_dataset("meta_idx", (0, 1), dtype=np.int16, maxshape=(None, 1)) + hdf5_file.create_dataset("metadata", (0, 1), dtype=h5py.string_dtype(), maxshape=(None, 1)) + hdf5_files.append(hdf5_file) + return hdf5_files class SegmentationDataset(Dataset): - """Dataset for semantic segmentation""" - def __init__(self, work_folder, num_samples, dataset_type, transform=None): + """Semantic segmentation dataset based on HDF5 parsing.""" + + def __init__(self, work_folder, dataset_type, max_sample_count=None, dontcare=None, transform=None): + # note: if 'max_sample_count' is None, then it will be read from the dataset at runtime self.work_folder = work_folder - self.num_samples = num_samples + self.max_sample_count = max_sample_count self.dataset_type = dataset_type self.transform = transform + self.metadata = [] + self.dontcare = dontcare + self.hdf5_path = os.path.join(self.work_folder, self.dataset_type + "_samples.hdf5") + with h5py.File(self.hdf5_path, "r") as hdf5_file: + if "metadata" in hdf5_file: + for i in range(hdf5_file["metadata"].shape[0]): + metadata = hdf5_file["metadata"][i, ...] + if isinstance(metadata, np.ndarray) and len(metadata) == 1: + metadata = metadata[0] + if isinstance(metadata, str): + if "ordereddict" in metadata: + metadata = metadata.replace("ordereddict", "collections.OrderedDict") + if metadata.startswith("collections.OrderedDict"): + metadata = eval(metadata) + self.metadata.append(metadata) + if self.max_sample_count is None: + self.max_sample_count = hdf5_file["sat_img"].shape[0] def __len__(self): - return self.num_samples + return self.max_sample_count + + def _remap_labels(self, map_img): + # note: will do nothing if 'dontcare' is not set in constructor, or set to non-zero value + if self.dontcare is None or self.dontcare != 0: + return map_img + # for now, the current implementation only handles the original 'dontcare' value as zero + # to keep the impl simple, we just reduce all indices by one so that 'dontcare' becomes -1 + assert map_img.dtype == np.int8 or map_img.dtype == np.int16 or map_img.dtype == np.int32 + map_img -= 1 + return map_img def __getitem__(self, index): + with h5py.File(self.hdf5_path, "r") as hdf5_file: + sat_img = hdf5_file["sat_img"][index, ...] + map_img = self._remap_labels(hdf5_file["map_img"][index, ...]) + meta_idx = int(hdf5_file["meta_idx"][index]) if "meta_idx" in hdf5_file else -1 + metadata = None + if meta_idx != -1: + metadata = self.metadata[meta_idx] + sample = {"sat_img": sat_img, "map_img": map_img, "metadata": metadata} + if self.transform: + sample = self.transform(sample) + return sample - hdf5_file = h5py.File(os.path.join(self.work_folder, self.dataset_type + "_samples.hdf5"), "r") - - sat_img = hdf5_file["sat_img"][index, ...] - map_img = hdf5_file["map_img"][index, ...] - - hdf5_file.close() - sample = {'sat_img': sat_img, 'map_img': map_img} +class MetaSegmentationDataset(SegmentationDataset): + """Semantic segmentation dataset interface that appends metadata under new tensor layers.""" + + metadata_handling_modes = ["const_channel", "scaled_channel"] + + def __init__(self, work_folder, dataset_type, meta_map, max_sample_count=None, dontcare=None, transform=None): + assert meta_map is None or isinstance(meta_map, dict), "unexpected metadata mapping object type" + assert meta_map is None or all([isinstance(k, str) and v in self.metadata_handling_modes for k, v in meta_map.items()]), \ + "unexpected metadata key type or value handling mode" + super().__init__(work_folder=work_folder, dataset_type=dataset_type, max_sample_count=max_sample_count, + dontcare=dontcare, transform=transform) + assert all([isinstance(m, (dict, collections.OrderedDict)) for m in self.metadata]), \ + "cannot use provided metadata object type with meta-mapping dataset interface" + self.meta_map = meta_map + + @staticmethod + def append_meta_layers(tensor, meta_map, metadata): + if meta_map: + assert isinstance(metadata, (dict, collections.OrderedDict)), "unexpected metadata type" + for meta_key, mode in meta_map.items(): + meta_val = get_key_recursive(meta_key, metadata) + if mode == "const_channel": + assert np.isscalar(meta_val), "constant channel-wise assignment requires scalar value" + layer = np.full(tensor.shape[0:2], meta_val, dtype=np.float32) + tensor = np.insert(tensor, tensor.shape[2], layer, axis=2) + elif mode == "scaled_channel": + assert np.isscalar(meta_val), "scaled channel-wise coords assignment requires scalar value" + layers = models.coordconv.get_coords_map(tensor.shape[0], tensor.shape[1]) * meta_val + tensor = np.insert(tensor, tensor.shape[2], layers, axis=2) + # else... + return tensor + + @staticmethod + def get_meta_layer_count(meta_map): + meta_layers = 0 + if meta_map: + for meta_key, mode in meta_map.items(): + if mode == "const_channel": + meta_layers += 1 + elif mode == "scaled_channel": + meta_layers += 2 + return meta_layers + def __getitem__(self, index): + # put metadata layer in util func for inf script? + with h5py.File(self.hdf5_path, "r") as hdf5_file: + sat_img = hdf5_file["sat_img"][index, ...] + map_img = self._remap_labels(hdf5_file["map_img"][index, ...]) + meta_idx = int(hdf5_file["meta_idx"][index]) if "meta_idx" in hdf5_file else -1 + assert meta_idx != -1, f"metadata unvailable in sample #{index}" + sat_img = self.append_meta_layers(sat_img, self.meta_map, self.metadata[meta_idx]) + sample = {"sat_img": sat_img, "map_img": map_img, "metadata": self.metadata[meta_idx]} if self.transform: sample = self.transform(sample) - return sample diff --git a/utils/utils.py b/utils/utils.py index 00c0e60b..4983b4dc 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -1,14 +1,22 @@ import torch -# import torch should be first. Unclear issue, mentionned here: https://github.com/pytorch/pytorch/issues/2083 +# import torch should be first. Unclear issue, mentioned here: https://github.com/pytorch/pytorch/issues/2083 import os from torch import nn import numpy as np import rasterio +import rasterio.features import warnings -from ruamel_yaml import YAML +import collections import fiona import csv +from utils.preprocess import minmax_scale + +try: + from ruamel_yaml import YAML +except ImportError: + from ruamel.yaml import YAML + try: from pynvml import * except ModuleNotFoundError: @@ -74,21 +82,6 @@ def chop_layer(pretrained_dict, layer_names=["logits"]): #https://discuss.pyto return chopped_dict -def assert_band_number(in_image, band_count_yaml): - """Verify if provided image has the same number of bands as described in the .yaml - Args: - in_image: full file path of the image - band_count_yaml: band count listed in the .yaml - """ - try: - in_array = image_reader_as_array(in_image) - except Exception as e: - print(e) - - msg = "The number of bands in the input image and the parameter 'number_of_bands' in the yaml file must be the same" - assert in_array.shape[2] == band_count_yaml, msg - - def load_from_checkpoint(checkpoint, model, optimizer=None): """Load weights from a previous checkpoint Args: @@ -127,28 +120,131 @@ def load_from_checkpoint(checkpoint, model, optimizer=None): return model, optimizer -def image_reader_as_array(file_name): +def image_reader_as_array(input_image, scale=None, aux_vector_file=None, aux_vector_attrib=None, aux_vector_ids=None, + aux_vector_dist_maps=False, aux_vector_dist_log=True, aux_vector_scale=None): """Read an image from a file and return a 3d array (h,w,c) Args: - file_name: full file path of the image + input_image: Rasterio file handle holding the (already opened) input raster + scale: optional scaling factor for the raw data + aux_vector_file: optional vector file from which to extract auxiliary shapes + aux_vector_attrib: optional vector file attribute name to parse in order to fetch ids + aux_vector_ids: optional vector ids to target in the vector file above + aux_vector_dist_maps: flag indicating whether aux vector bands should be distance maps or binary maps + aux_vector_dist_log: flag indicating whether log distances should be used in distance maps or not + aux_vector_scale: optional floating point scale factor to multiply to rasterized vector maps Return: - numm_py_array of the image read + numpy array of the image (possibly concatenated with auxiliary vector channels) """ - with rasterio.open(file_name, 'r') as src: - np_array = np.empty([src.height, src.width, src.count], dtype=np.float32) - for i in range(src.count): - band = src.read(i+1) # Bands starts at 1 in rasterio not 0 - np_array[:, :, i] = band + np_array = np.empty([input_image.height, input_image.width, input_image.count], dtype=np.float32) + for i in range(input_image.count): + np_array[:, :, i] = input_image.read(i+1) # Bands starts at 1 in rasterio not 0 + + # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre + # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. + if scale: + sc_min, sc_max = scale + np_array = minmax_scale(img=np_array, + orig_range=(np.min(np_array), np.max(np_array)), + scale_range=(sc_min, sc_max)) + + # if requested, load vectors from external file, rasterize, and append distance maps to array + if aux_vector_file is not None: + vec_tensor = vector_to_raster(vector_file=aux_vector_file, + input_image=input_image, + attribute_name=aux_vector_attrib, + fill=0, + target_ids=aux_vector_ids, + merge_all=False) + if aux_vector_dist_maps: + import cv2 as cv # opencv becomes a project dependency only if we need to compute distance maps here + vec_tensor = vec_tensor.astype(np.float32) + for vec_band_idx in range(vec_tensor.shape[2]): + mask = vec_tensor[:, :, vec_band_idx] + mask = cv.dilate(mask, (3, 3)) # make points and linestring easier to work with + #display_resize = cv.resize(np.where(mask, np.uint8(0), np.uint8(255)), (1000, 1000)) + #cv.imshow("mask", display_resize) + dmap = cv.distanceTransform(np.where(mask, np.uint8(0), np.uint8(255)), cv.DIST_L2, cv.DIST_MASK_PRECISE) + if aux_vector_dist_log: + dmap = np.log(dmap + 1) + #display_resize = cv.resize(cv.normalize(dmap, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000)) + #cv.imshow("dmap1", display_resize) + dmap_inv = cv.distanceTransform(np.where(mask, np.uint8(255), np.uint8(0)), cv.DIST_L2, cv.DIST_MASK_PRECISE) + if aux_vector_dist_log: + dmap_inv = np.log(dmap_inv + 1) + #display_resize = cv.resize(cv.normalize(dmap_inv, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000)) + #cv.imshow("dmap2", display_resize) + vec_tensor[:, :, vec_band_idx] = np.where(mask, -dmap_inv, dmap) + #display = cv.normalize(vec_tensor[:, :, vec_band_idx], None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F) + #display_resize = cv.resize(display, (1000, 1000)) + #cv.imshow("distmap", display_resize) + #cv.waitKey(0) + if aux_vector_scale: + for vec_band_idx in vec_tensor.shape[2]: + vec_tensor[:, :, vec_band_idx] *= aux_vector_scale + np_array = np.concatenate([np_array, vec_tensor], axis=2) return np_array -def validate_num_classes(vector_file, num_classes, value_field): # used only in images_to_samples.py +def vector_to_raster(vector_file, input_image, attribute_name, fill=0, target_ids=None, merge_all=True): + """Function to rasterize vector data. + Args: + vector_file: Path and name of reference GeoPackage + input_image: Rasterio file handle holding the (already opened) input raster + attribute_name: Attribute containing the identifier for a vector (may contain slashes if recursive) + fill: default background value to use when filling non-contiguous regions + target_ids: list of identifiers to burn from the vector file (None = use all) + merge_all: defines whether all vectors should be burned with their identifiers in a + single layer or in individual layers (in the order provided by 'target_ids') + + Return: + numpy array of the burned image + """ + + # Extract vector features to burn in the raster image + with fiona.open(vector_file, 'r') as src: + lst_vector = [vector for vector in src] + + # Sort feature in order to priorize the burning in the raster image (ex: vegetation before roads...) + if attribute_name is not None: + lst_vector.sort(key=lambda vector: get_key_recursive(attribute_name, vector)) + + lst_vector_tuple = {} + + # TODO: check a vector entity is empty (e.g. if a vector['type'] in lst_vector is None.) + for vector in lst_vector: + id = get_key_recursive(attribute_name, vector) if attribute_name is not None else None + if target_ids is None or id in target_ids: + if id not in lst_vector_tuple: + lst_vector_tuple[id] = [] + if merge_all: + # here, we assume that the id can be cast to int! + lst_vector_tuple[id].append((vector['geometry'], int(id) if id is not None else 0)) + else: + # if not merging layers, just use '1' as the value for each target + lst_vector_tuple[id].append((vector['geometry'], 1)) + + if merge_all: + return rasterio.features.rasterize([v for vecs in lst_vector_tuple.values() for v in vecs], + fill=fill, + out_shape=input_image.shape, + transform=input_image.transform, + dtype=np.int16) + else: + burned_rasters = [rasterio.features.rasterize(lst_vector_tuple[id], + fill=fill, + out_shape=input_image.shape, + transform=input_image.transform, + dtype=np.int16) for id in lst_vector_tuple] + return np.stack(burned_rasters, axis=-1) + + +def validate_num_classes(vector_file, num_classes, attribute_name): # used only in images_to_samples.py """Validate that the number of classes in the vector file corresponds to the expected number Args: vector_file: full file path of the vector image num_classes: number of classes set in config.yaml - value_field: name of the value field representing the required classes in the vector image file + attribute_name: name of the value field representing the required classes in the vector image file Return: None @@ -157,9 +253,9 @@ def validate_num_classes(vector_file, num_classes, value_field): # used only distinct_att = set() with fiona.open(vector_file, 'r') as src: for feature in src: - distinct_att.add(feature['properties'][value_field]) # Use property of set to store unique values + distinct_att.add(get_key_recursive(attribute_name, feature)) # Use property of set to store unique values - if len(distinct_att)+1 != num_classes: + if len(distinct_att) != num_classes: raise ValueError('The number of classes in the yaml.config {} is different than the number of classes in ' 'the file {} {}'.format (num_classes, vector_file, str(list(distinct_att)))) @@ -179,23 +275,27 @@ def read_csv(csv_file_name, inference=False): """Open csv file and parse it, returning a list of dict. If inference == True, the dict contains this info: - - tif full path + - tif full path + - metadata yml full path (may be empty string if unavailable) Else, the returned list contains a dict with this info: - - tif full path - - gpkg full path - - attribute_name - - dataset (trn or val) + - tif full path + - metadata yml full path (may be empty string if unavailable) + - gpkg full path + - attribute_name + - dataset (trn or val) """ - list_values = [] with open(csv_file_name, 'r') as f: reader = csv.reader(f) for row in reader: if inference: - list_values.append({'tif': row[0]}) + assert len(row) >= 2, 'unexpected number of columns in dataset CSV description file' \ + ' (for inference, should have two columns, i.e. raster file path and metadata file path)' + list_values.append({'tif': row[0], 'meta': row[1]}) else: - list_values.append({'tif': row[0], 'gpkg': row[1], 'attribute_name': row[2], 'dataset': row[3]}) - + assert len(row) >= 5, 'unexpected number of columns in dataset CSV description file' \ + ' (should have five columns; see \'read_csv\' function for more details)' + list_values.append({'tif': row[0], 'meta': row[1], 'gpkg': row[2], 'attribute_name': row[3], 'dataset': row[4]}) if inference: return list_values else: @@ -240,4 +340,41 @@ def gpu_stats(device=0): res = nvmlDeviceGetUtilizationRates(handle) mem = nvmlDeviceGetMemoryInfo(handle) - return res, mem \ No newline at end of file + return res, mem + + +def get_key_def(key, config, default=None, msg=None, delete=False): + """Returns a value given a dictionary key, or the default value if it cannot be found.""" + if isinstance(key, list): + if len(key) <= 1: + if msg is not None: + raise AssertionError(msg) + else: + raise AssertionError("must provide at least two valid keys to test") + for k in key: + if k in config: + val = config[k] + if delete: + del config[k] + return val + return default + else: + if key not in config: + return default + else: + val = config[key] + if delete: + del config[key] + return val + + +def get_key_recursive(key, config): + """Returns a value recursively given a dictionary key that may contain multiple subkeys.""" + if not isinstance(key, list): + key = key.split("/") # subdict indexing split using slash + assert key[0] in config, f"missing key '{key[0]}' in metadata dictionary" + val = config[key[0]] + if isinstance(val, (dict, collections.OrderedDict)): + assert len(key) > 1, "missing keys to index metadata subdictionaries" + return get_key_recursive(key[1:], val) + return val