Skip to content

Commit

Permalink
Copy files from lisflood-preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
casadoj committed Aug 27, 2024
1 parent d4c5a1b commit 109c6fc
Show file tree
Hide file tree
Showing 6 changed files with 559 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/lisfloodutilities/lfcoords/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import yaml
from pathlib import Path
from typing import Union, Dict


class Config:
def __init__(self, config_file):
"""
Reads the configuration from a YAML file and sets default values if not provided.
Parameters:
-----------
config_file: string or pathlib.Path
The path to the YAML configuration file.
"""

# read configuration file
with open(config_file, 'r', encoding='utf8') as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)

# input
self.STATIONS = Path(config['input']['stations'])
self.LDD_FINE = Path(config['input']['ldd_fine'])
self.UPSTREAM_FINE = Path(config['input']['upstream_fine'])
self.LDD_COARSE = Path(config['input']['ldd_coarse'])
self.UPSTREAM_COARSE = Path(config['input']['upstream_coarse'])

# output
self.SHAPE_FOLDER = Path(config.get('output_folder', './shapefiles'))
self.SHAPE_FOLDER.mkdir(parents=True, exist_ok=True)

# conditions
self.MIN_AREA = config['conditions'].get('min_area', 10)
self.ABS_ERROR = config['conditions'].get('abs_error', 50)
self.PCT_ERROR = config['conditions'].get('pct_error', 1)


196 changes: 196 additions & 0 deletions src/lisfloodutilities/lfcoords/coarser_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import os
os.environ['USE_PYGEOS'] = '0'
import numpy as np
import pandas as pd
import geopandas as gpd
import rioxarray
import pyflwdir
from pathlib import Path
from tqdm.auto import tqdm
from typing import Optional
import logging
import warnings
warnings.filterwarnings("ignore")

from lisfloodpreprocessing import Config
from lisfloodpreprocessing.utils import catchment_polygon

# set logger
logging.basicConfig(level=logging.INFO,
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

def coordinates_coarse(
cfg: Config,
stations: pd.DataFrame,
save: bool = True
) -> Optional[pd.DataFrame]:
"""
Transforms station coordinates from a high-resolution grid to a corresponding location in a coarser grid, aiming to match the shape of the catchment area derived from the high-resolution map. It updates the station coordinates and exports the catchment areas as shapefiles in the coarser grid.
The function reads the upstream area map and local drainage direction (LDD) map in the coarse grid. It then finds the pixel in the coarse grid that best matches the catchment shape derived from the high-resolution map. The match is evaluated based on the intersection-over-union of catchment shapes and the ratio of upstream areas.
Parameters:
-----------
cfg: Config
Configuration object containing file paths and parameters specified in the configuration file.
stations: pandas.DataFrame
DataFrame containing station coordinates and upstream areas in the finer grid. It's the result of coarse_grid.coarse_grid()
save: bool
If True, the updated stations table is saved to a CSV file.
If False, the updated stations DataFrame is returned without saving.
Returns:
--------
staions: pandas.DataFrame
If save is False, returns a pandas DataFrame with updated station coordinates and upstream areas in the coarser grid. Otherwise, the function returns None, and the results are saved directly to a CSV file.
"""

### READ INPUTS

# read upstream area map of coarse grid
upstream_coarse = rioxarray.open_rasterio(cfg.UPSTREAM_COARSE).squeeze(dim='band')
logger.info(f'Map of upstream area corretly read: {cfg.UPSTREAM_COARSE}')

# read local drainage direction map
ldd_coarse = rioxarray.open_rasterio(cfg.LDD_COARSE).squeeze(dim='band')
logger.info(f'Map of local drainage directions correctly read: {cfg.LDD_COARSE}')


### PROCESSING

# create river network
fdir_coarse = pyflwdir.from_array(ldd_coarse.data,
ftype='ldd',
transform=ldd_coarse.rio.transform(),
check_ftype=False,
latlon=True)

# boundaries of the input maps
lon_min, lat_min, lon_max, lat_max = np.round(ldd_coarse.rio.bounds(), 6)

# resolution of the input maps
cellsize = np.round(np.mean(np.diff(ldd_coarse.x)), 6) # degrees
cellsize_arcmin = int(np.round(cellsize * 60, 0)) # arcmin
suffix_coarse = f'{cellsize_arcmin}min'
logger.info(f'Coarse resolution is {cellsize_arcmin} arcminutes')

# extract resolution of the finer grid from 'stations'
suffix_fine = sorted(set([col.split('_')[1] for col in stations.columns if '_' in col]))[0]
cols_fine = [f'{col}_{suffix_fine}' for col in ['area', 'lat', 'lon']]

# add new columns to 'stations'
cols_coarse = [f'{col}_{suffix_coarse}' for col in ['area', 'lat', 'lon']]
stations[cols_coarse] = np.nan

# output folders
SHAPE_FOLDER_FINE = cfg.SHAPE_FOLDER / suffix_fine
SHAPE_FOLDER_COARSE = cfg.SHAPE_FOLDER / suffix_coarse
SHAPE_FOLDER_COARSE.mkdir(parents=True, exist_ok=True)

# search range of 5x5 array -> this is where the best point can be found in the coarse grid
rangexy = np.linspace(-2, 2, 5) * cellsize # arcmin
for ID, attrs in tqdm(stations.iterrows(), total=stations.shape[0], desc='stations'):

# real upstream area
area_ref = attrs['area']

# coordinates and upstream area in the fine grid
lat_fine, lon_fine, area_fine = attrs[[f'{col}_{suffix_fine}' for col in ['lat', 'lon', 'area']]]

if (area_ref < cfg.MIN_AREA) or (area_fine < cfg.MIN_AREA):
logger.warning(f'The catchment area of station {ID} is smaller than the minimum of {cfg.MIN_AREA} km2')
continue

# import shapefile of catchment polygon
shapefile = SHAPE_FOLDER_FINE / f'{ID}.shp'
try:
basin_fine = gpd.read_file(shapefile)
logger.info(f'Catchment polygon correctly read: {shapefile}')
except OSError as e:
logger.error(f'Error reading {shapefile}: {e}')
continue
except Exception as e: # This will catch other exceptions that might occur.
logger.error(f'An unexpected error occurred while reading {shapefile}: {e}')
continue

# find ratio
logger.debug('Start search')
inter_vs_union, area_ratio, area_lisf = [], [], []
for Δlat in rangexy:
for Δlon in rangexy:
lon = lon_fine + Δlon
lat = lat_fine + Δlat
basin = catchment_polygon(fdir_coarse.basins(xy=(lon, lat)).astype(np.int32),
transform=ldd_coarse.rio.transform(),
crs=ldd_coarse.rio.crs)

# calculate union and intersection of shapes
intersection = gpd.overlay(basin_fine, basin, how='intersection')
union = gpd.overlay(basin_fine, basin, how='union')
inter_vs_union.append(intersection.area.sum() / union.area.sum())

# get upstream area (km2) of coarse grid (LISFLOOD)
area = upstream_coarse.sel(x=lon, y=lat, method='nearest').item() * 1e-6
area_lisf.append(area)

# ratio between reference and coarse area
if area_ref == 0 or area == 0:
ratio = 0
else:
ratio = area_ref / area if area_ref < area else area / area_ref
area_ratio.append(ratio)
logger.debug('End search')

# maximum of shape similarity and upstream area accordance
i_shape = np.argmax(inter_vs_union)
area_shape = area_lisf[i_shape]
i_centre = int(len(rangexy)**2 / 2) # middle point
area_centre = area_lisf[i_centre]
# use middle point if errors are small
abs_error = abs(area_shape - area_centre)
pct_error = 100 * abs(1 - area_centre / area_shape)
if (abs_error <= cfg.ABS_ERROR) and (pct_error <= cfg.PCT_ERROR):
i_shape = i_centre
area_shape = area_centre

#i_ratio = np.argmax(area_ratio)

# coordinates in the fine resolution
i = i_shape // len(rangexy)
j = i_shape % len(rangexy)
lat = lat_fine + rangexy[i]
lon = lon_fine + rangexy[j]

# coordinates and upstream area on coarse resolution
area = upstream_coarse.sel(x=lon, y=lat, method='nearest')
area_coarse = area.item() * 1e-6
lon_coarse = area.x.item()
lat_coarse = area.y.item()

# derive catchment polygon from the selected coordinates
basin_coarse = catchment_polygon(fdir_coarse.basins(xy=(lon_coarse, lat_coarse)).astype(np.int32),
transform=ldd_coarse.rio.transform(),
crs=ldd_coarse.rio.crs,
name='ID')
basin_coarse['ID'] = ID
basin_coarse.set_index('ID', inplace=True)
basin_coarse[cols_fine] = area_fine, lat_fine, lon_fine
basin_coarse[cols_coarse] = area_coarse, lat_coarse, lon_coarse

# export shapefile
output_shp = SHAPE_FOLDER_COARSE / f'{ID}.shp'
basin_coarse.to_file(output_shp)
logger.info(f'Catchment {ID} exported as shapefile: {output_shp}')

# update new columns in 'stations'
stations.loc[ID, cols_coarse] = [int(area_coarse), round(lat_coarse, 6), round(lon_coarse, 6)]

# return/save
stations.sort_index(axis=1, inplace=True)
if save:
output_csv = cfg.STATIONS.parent / f'{cfg.STATIONS.stem}_{suffix_coarse}.csv'
stations.to_csv(output_csv)
logger.info(f'The updated stations table in the coarser grid has been exported to: {output_csv}')
else:
return stations
13 changes: 13 additions & 0 deletions src/lisfloodutilities/lfcoords/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
input:
stations: # CSV file defining four station attributes: 'ID', 'lat', 'lon', 'area' in km2
ldd_fine: # TIFF or NetCDF file of the local direction drainage in the high resolution grid
upstream_fine: # TIFF or NetCDF file of the upstream area (km2) in the high resolution grid
ldd_coarse: # TIFF or NetCDF file of the local direction drainage in the low resolution grid
upstream_coarse: # TIFF or NetCDF file of the upstream area (km2) in the low resolution grid

output_folder: # folder where catchment shapefiles will be saved. By default, './shapefiles/'

conditions:
min_area: # minimum catchment area (km2) to consider a station. By default, 10 km2
abs_error: # maximum absolute error (km2) allowed between the fine and coarse resolution catchments. By default, 50 km2
pct_error: # maximum percentage error (%) allowed between the fine and coarse resolution catchments. By default, 1%
130 changes: 130 additions & 0 deletions src/lisfloodutilities/lfcoords/finer_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import numpy as np
import pandas as pd
import rioxarray
import pyflwdir
from tqdm.auto import tqdm
from pathlib import Path
from typing import Optional
import logging
import warnings
warnings.filterwarnings("ignore")

from lisfloodpreprocessing import Config
from lisfloodpreprocessing.utils import find_pixel, catchment_polygon

# set logger
logging.basicConfig(level=logging.INFO,
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

def coordinates_fine(
cfg: Config,
save: bool = True
) -> Optional[pd.DataFrame]:
"""
Processes station coordinates to find the most accurate pixel in a high-resolution map,
based on a reference value of catchment area. It updates the station coordinates and
exports the catchment areas as shapefiles.
The function reads the upstream area map and local drainage direction (LDD) map in
fine resolution, as well as the station coordinates with their reference upstream areas.
It then iteratively searches for the best-matching pixel within an increasing search
range and applies penalties and factors to determine the closest match. The function
creates a boolean map of each station's catchment area and vectorizes it into a
GeoDataFrame for export as a shapefile.
Parameters:
-----------
cfg: Config
Configuration object containing file paths and parameters specified in the configuration file.
save: bool
If True, the updated stations table is saved to a CSV file. If False, the updated stations DataFrame is returned without saving.
Returns:
--------
stations: pandas.DataFrame
If save is False, returns a pandas DataFrame with updated station coordinates and upstream areas in the finer grid. Otherwise, the function returns None, and the results are saved directly to a CSV file.
"""

# READ INPUTS

# read upstream map with fine resolution
upstream_fine = rioxarray.open_rasterio(cfg.UPSTREAM_FINE).squeeze(dim='band')
logger.info(f'Map of upstream area corretly read: {cfg.UPSTREAM_FINE}')

# read local drainage direction map
ldd_fine = rioxarray.open_rasterio(cfg.LDD_FINE).squeeze(dim='band')
logger.info(f'Map of local drainage directions corretly read: {cfg.LDD_FINE}')

# read stations text file
stations = pd.read_csv(cfg.STATIONS, index_col='ID')
logger.info(f'Table of stations correctly read: {cfg.STATIONS}')


# PROCESSING

# resolution of the input map
cellsize = np.mean(np.diff(upstream_fine.x)) # degrees
cellsize_arcsec = int(np.round(cellsize * 3600, 0)) # arcsec
suffix_fine = f'{cellsize_arcsec}sec'
logger.info(f'The resolution of the finer grid is {cellsize_arcsec} arcseconds')

# add columns to the table of stations
new_cols = sorted([f'{col}_{suffix_fine}' for col in stations.columns])
stations[new_cols] = np.nan

# create river network
fdir_fine = pyflwdir.from_array(ldd_fine.data,
ftype='d8',
transform=ldd_fine.rio.transform(),
check_ftype=False,
latlon=True)

# output path
SHAPE_FOLDER_FINE = cfg.SHAPE_FOLDER / suffix_fine
SHAPE_FOLDER_FINE.mkdir(parents=True, exist_ok=True)

for ID, attrs in tqdm(stations.iterrows(), total=stations.shape[0], desc='stations'):

# reference coordinates and upstream area
lat_ref, lon_ref, area_ref = attrs[['lat', 'lon', 'area']]

# search new coordinates in an increasing range
ranges = [55, 101, 151]
penalties = [500, 500, 1000]
factors = [2, .5, .25]
acceptable_errors = [50, 80, np.nan]
for rangexy, penalty, factor, max_error in zip(ranges, penalties, factors, acceptable_errors):
logger.debug(f'Set range to {rangexy}')
lat, lon, error = find_pixel(upstream_fine, lat_ref, lon_ref, area_ref, rangexy=rangexy, penalty=penalty, factor=factor)
if error <= max_error:
break

# update new columns in 'stations'
stations.loc[ID, new_cols] = [int(upstream_fine.sel(y=lat, x=lon).item()), round(lat, 6), round(lon, 6)]

# boolean map of the catchment associated to the corrected coordinates
basin_arr = fdir_fine.basins(xy=(lon, lat)).astype(np.int32)

# vectorize the boolean map into geopandas
basin_gdf = catchment_polygon(basin_arr.astype(np.int32),
transform=ldd_fine.rio.transform(),
crs=ldd_fine.rio.crs,
name='ID')
basin_gdf['ID'] = ID
basin_gdf.set_index('ID', inplace=True)
basin_gdf[attrs.index] = attrs.values

# export shape file
output_file = SHAPE_FOLDER_FINE / f'{ID}.shp'
basin_gdf.to_file(output_file)
logger.info(f'Catchment {ID} exported as shapefile: {output_file}')

# return/save
stations.sort_index(axis=1, inplace=True)
if save:
output_csv = cfg.STATIONS.parent / f'{cfg.STATIONS.stem}_{suffix_fine}.csv'
stations.to_csv(output_csv)
logger.info(f'The updated stations table in the finer grid has been exported to: {output_csv}')
else:
return stations
Loading

0 comments on commit 109c6fc

Please sign in to comment.