From d9e13afc6377390fa6e918ed3f9f47f19ff39ba2 Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Tue, 11 Jun 2024 18:30:48 +0100 Subject: [PATCH 1/9] Interactive grid selection + optimised search --- slr_pkg/__init__.py | 51 ++++++++- step1_extract_cmip.py | 238 ++++++++++++++++++++++++++++++++---------- 2 files changed, 229 insertions(+), 60 deletions(-) diff --git a/slr_pkg/__init__.py b/slr_pkg/__init__.py index b1a85e8..c9ddc25 100644 --- a/slr_pkg/__init__.py +++ b/slr_pkg/__init__.py @@ -11,7 +11,7 @@ import cartopy.crs as ccrs from config import settings -from slr_pkg import cmip, cubeplot, cubeutils, cubedata, process +from slr_pkg import cmip, cubeplot, cubeutils, cubedata, process, whichbox from directories import makefolder, read_dir @@ -244,6 +244,7 @@ def plot_ij(cube, model, location, idx, lat, lon, save_map=True, rad=5): if targetlon > 180: targetlon -= 360 + fig = plt.figure() ax = cubeplot.block(cube, land=False, region=region, cmin=-1, cmax=1, plotcbar=True, nlevels=25, cent_lon=targetlon, title='{} (1x1 grid) - SSH above geoid'.format(model)) @@ -265,7 +266,7 @@ def plot_ij(cube, model, location, idx, lat, lon, save_map=True, rad=5): SRC_CRS, orig_lons, orig_lats).T # Plot symbols showing the ocean point and the tide gauge - ax.plot(new_lons[0], new_lats[0], 'ok') + pred, = ax.plot(new_lons[0], new_lats[0], 'ok') ax.plot(new_lons[1], new_lats[1], 'xr') if save_map: @@ -282,7 +283,53 @@ def plot_ij(cube, model, location, idx, lat, lon, save_map=True, rad=5): plt.savefig(figfile) plt.close() else: + selected_lat = cube.coord('latitude').points[j] + selected_lon = cube.coord('longitude').points[i] + + def onclick(event): + nonlocal selected_lat, selected_lon, pred + selected_lon, selected_lat = event.xdata, event.ydata + + # Transform onto original projection + MAP_CRS = ccrs.PlateCarree(central_longitude=targetlon) + SRC_CRS = ccrs.PlateCarree() + + selected_lon, selected_lat = SRC_CRS.transform_point( + selected_lon, selected_lat, MAP_CRS) + + (i, j), = whichbox.find_gridbox_indicies(cube,[(selected_lon, selected_lat)]) + + selected_lon = cube.coord('longitude').points[i] + selected_lat = cube.coord('latitude').points[j] + + MAP_CRS = ccrs.PlateCarree(central_longitude=targetlon) + SRC_CRS = ccrs.PlateCarree() + + plot_lon, plot_lat = MAP_CRS.transform_point( + selected_lon, selected_lat, SRC_CRS) + + pred.remove() + pred, = ax.plot(plot_lon, plot_lat, 'ok') + fig.canvas.draw() + + cid = fig.canvas.mpl_connect('button_press_event', onclick) plt.show() + + + # Create the output file directory location + out_mapdir = read_dir()[1] + makefolder(out_mapdir) + + # Abbreviate the site location name suitable to use as a filename + loc_abbrev = abbreviate_location_name(location) + figfile = os.path.join(out_mapdir, + f'{loc_abbrev}_{model}_ij_figure.png') + + # Save the CMIP grid box selection map to file + fig.savefig(figfile) + plt.close() + + return i, j, selected_lon, selected_lat def read_ar5_component(datadir, rcp, var, value='mid'): diff --git a/step1_extract_cmip.py b/step1_extract_cmip.py index a2e32e4..18fd2c2 100644 --- a/step1_extract_cmip.py +++ b/step1_extract_cmip.py @@ -12,6 +12,7 @@ from slr_pkg import abbreviate_location_name, plot_ij # found in __init.py__ from slr_pkg import cmip, cubeutils, models, whichbox from tide_gauge_locations import extract_site_info +from scipy.spatial import cKDTree def accept_reject_cmip(cube, model, site_loc, cmip_i, cmip_j, site_lat, @@ -147,6 +148,68 @@ def extract_ssh_data(cmip_sea): return model_names, cubes +# def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): +# """ +# Searches for the nearest appropriate ocean point(s) in the CMIP model +# adjacent to the site location. Initially, finds the model grid box +# indices of the given location. Then, searches surrounding boxes until an +# appropriate ocean point is found - needs to be accepted by the user. +# **NOTES: +# - The GCM data have been interpolated to a common 1 x 1 degree grid +# :param zos_cube_in: cube containing zos field from CMIP models +# :param model: CMIP model name +# :param site_loc: name of the site location +# :param site_lat: latitude of the site location +# :param site_lon: longitude of the site location +# :return: model grid box indices +# """ +# # Find grid box indices of location +# (i, j), = whichbox.find_gridbox_indicies(zos_cube_in, +# [(site_lon, site_lat)]) +# grid_lons = zos_cube_in.coord('longitude').points +# grid_lats = zos_cube_in.coord('latitude').points + +# # Check to see if the cube has a scalar mask, and add mask where cmip +# zos_cube = check_cube_mask(zos_cube_in) + +# # If the CMIP grid box of the exact site location is an ocean point +# # Get the user to check if it's appropriate and if so return the indices +# if not zos_cube.data.mask[j, i]: +# print('Checking CMIP grid box at site location') +# i_out, j_out = accept_reject_cmip(zos_cube, model, site_loc, i, j, +# site_lat, site_lon) +# if i_out is not None: +# pt_lon = grid_lons[i_out] +# pt_lat = grid_lats[j_out] +# return i_out, j_out, pt_lon, pt_lat + +# # If no indices are returned then the CMIP grid box is not appropriate +# # for use. Check the CMIP grid boxes surrounding the site location +# # until an appropriate one is found. +# else: +# i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, +# zos_cube, +# model, +# site_loc, +# site_lat, +# site_lon) + +# # If the CMIP grid box of the exact site location is masked, start by +# # checking the next set of cmip grid boxes +# else: +# i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, zos_cube, +# model, site_loc, +# site_lat, site_lon) + +# return i_out, j_out, pt_lon, pt_lat + +# This function looks for the nearest ocean point to the site location, but in a clunky way. +# I want to optimise it so that the user doesn't have to manually check the ocean point. +# I want to automatically select the nearest ocean point to the site location that doesn't +# have a mask, or a value which is significantly different to the grid boxes around it, +# whilst ignoring any masked cubes. + + def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): """ Searches for the nearest appropriate ocean point(s) in the CMIP model @@ -163,42 +226,11 @@ def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): :return: model grid box indices """ # Find grid box indices of location - (i, j), = whichbox.find_gridbox_indicies(zos_cube_in, - [(site_lon, site_lat)]) - grid_lons = zos_cube_in.coord('longitude').points - grid_lats = zos_cube_in.coord('latitude').points # Check to see if the cube has a scalar mask, and add mask where cmip zos_cube = check_cube_mask(zos_cube_in) - # If the CMIP grid box of the exact site location is an ocean point - # Get the user to check if it's appropriate and if so return the indices - if not zos_cube.data.mask[j, i]: - print('Checking CMIP grid box at site location') - i_out, j_out = accept_reject_cmip(zos_cube, model, site_loc, i, j, - site_lat, site_lon) - if i_out is not None: - pt_lon = grid_lons[i_out] - pt_lat = grid_lats[j_out] - return i_out, j_out, pt_lon, pt_lat - - # If no indices are returned then the CMIP grid box is not appropriate - # for use. Check the CMIP grid boxes surrounding the site location - # until an appropriate one is found. - else: - i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, - zos_cube, - model, - site_loc, - site_lat, - site_lon) - - # If the CMIP grid box of the exact site location is masked, start by - # checking the next set of cmip grid boxes - else: - i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, zos_cube, - model, site_loc, - site_lat, site_lon) + i_out, j_out, pt_lon, pt_lat = find_best_gridcell(zos_cube, model, site_loc, site_lat, site_lon) return i_out, j_out, pt_lon, pt_lat @@ -232,7 +264,68 @@ def ocean_point_wrapper(df, model_names, cubes): write_i_j(site_loc, result, lat, lon_orig) -def search_for_next_cmip(cmip_i, cmip_j, cube, model, site_loc, site_lat, +def find_best_match(lat_grid, lon_grid, data_grid, target_lat, target_lon, max_distance=3, distance_weight=2, difference_weight=0.0005): + lat_mesh, lon_mesh = np.meshgrid(lat_grid, lon_grid, indexing='ij') + points = np.vstack([lat_mesh.ravel(), lon_mesh.ravel()]).T + masked_points = data_grid.mask.ravel() + + tree = cKDTree(points[~masked_points]) + dists, indices = tree.query([target_lat, target_lon], k=49) # Query multiple nearest points + + best_x = None + best_y = None + best_lon = None + best_lat = None + min_weighted_score = float('inf') + + def compute_weighted_score(lat_idx, lon_idx, dist): + value = data_grid[lat_idx, lon_idx] + surrounding_points = tree.query_ball_point([lat_mesh[lat_idx, lon_idx], lon_mesh[lat_idx, lon_idx]], 1) + surrounding_indices = np.where(~masked_points)[0][surrounding_points] + surrounding_lat_idx, surrounding_lon_idx = np.unravel_index(surrounding_indices, data_grid.shape) + surrounding_values = data_grid[surrounding_lat_idx, surrounding_lon_idx] + avg_surrounding_diffs = np.mean(np.abs(np.diff(surrounding_values))) + difference = np.mean(np.abs(surrounding_values - value)) + diff_param = abs(avg_surrounding_diffs - difference) + + # Compute weighted score + weighted_score = float((dist / distance_weight ) - (difference_weight / diff_param)) + return weighted_score + + def check_and_update_best(lat_idx, lon_idx, dist): + nonlocal best_x, best_y, best_lat, best_lon, compute_weighted_score, min_weighted_score + weighted_score = compute_weighted_score(lat_idx, lon_idx, dist) + + if weighted_score < min_weighted_score: + min_weighted_score = weighted_score + best_lat = lat_mesh[lat_idx, lon_idx] + best_lon = lon_mesh[lat_idx, lon_idx] + best_x = nearest_lon_idx + best_y = nearest_lat_idx + + if (-90 <= target_lat <= 90) and (-180 <= target_lon <= 180): + target_lat_idx = np.abs(lat_grid - target_lat).argmin() + target_lon_idx = np.abs(lon_grid - target_lon).argmin() + + if not data_grid.mask[target_lat_idx, target_lon_idx]: + check_and_update_best(target_lat_idx, target_lon_idx, 1) + + for dist, idx in zip(dists, indices): + if dist > max_distance: + continue + + flat_idx = np.where(~masked_points)[0][idx] + nearest_lat_idx, nearest_lon_idx = np.unravel_index(flat_idx, data_grid.shape) + + check_and_update_best(nearest_lat_idx, nearest_lon_idx, dist) + + if best_y is None or best_x is None: + raise ValueError("No suitable unmasked point found within the specified distance.") + + return best_x, best_y, best_lon, best_lat + + +def find_best_gridcell(cube, model, site_loc, site_lat, site_lon, unit_test=False): """ Iteratively check the CMIP grid boxes surrounding the site location @@ -249,31 +342,60 @@ def search_for_next_cmip(cmip_i, cmip_j, cube, model, site_loc, site_lat, """ grid_lons = cube.coord('longitude').points grid_lats = cube.coord('latitude').points - - # The radius limit of 7 is arbitrary but should be large enough. - for radius in range(1, 8): # grid boxes - print(f'Checking CMIP grid boxes {radius} box removed ' + - 'from site location') - x_radius_range, y_radius_range = calc_radius_range(radius) - for ix in x_radius_range: - for iy in y_radius_range: - # Search the nearest grid cells. If the new mask is False, - # that grid cell is an ocean point - limit_lo = radius * radius - dd = ix * ix + iy * iy - if dd >= limit_lo: - # modulus for when grid cell is close to 0deg. - i_try = (cmip_i + ix) % len(grid_lons) - j_try = cmip_j + iy - - if not cube.data.mask[j_try, i_try]: - i_out, j_out = accept_reject_cmip( - cube, model, site_loc, i_try, j_try, site_lat, - site_lon, unit_test) - if i_out is not None: - pt_lon = grid_lons[i_out] - pt_lat = grid_lats[j_out] - return i_out, j_out, pt_lon, pt_lat + + best_i, best_j, best_lat, best_lon = find_best_match(grid_lats, grid_lons, cube.data, site_lat, site_lon) + + if settings["auto_site_selection"]: + best_i, best_j, best_lon, best_lat = plot_ij(cube, model, site_loc, [best_i, best_j], site_lat, site_lon, save_map=True) + return best_i, best_j, best_lon, best_lat + + best_i, best_j, best_lon, best_lat = plot_ij(cube, model, site_loc, [best_i, best_j], site_lat, site_lon, save_map=False) + + return best_i, best_j, best_lon, best_lat + + +# def search_for_next_cmip(cmip_i, cmip_j, cube, model, site_loc, site_lat, +# site_lon, unit_test=False): +# """ +# Iteratively check the CMIP grid boxes surrounding the site location +# until a suitable option is found. +# :param cmip_i: CMIP coord of site location's latitude +# :param cmip_j: CMIP coord of site location's longitude +# :param cube: cube containing zos field from CMIP models +# :param model: CMIP model name +# :param site_loc: name of the site location +# :param site_lat: latitude of the site location +# :param site_lon: longitude of the site location +# :param unit_test: flag to disable plotting for unit testing purposes +# :return: Selected CMIP coords +# """ +# grid_lons = cube.coord('longitude').points +# grid_lats = cube.coord('latitude').points + +# # The radius limit of 7 is arbitrary but should be large enough. +# for radius in range(1, 8): # grid boxes +# print(f'Checking CMIP grid boxes {radius} box removed ' + +# 'from site location') +# x_radius_range, y_radius_range = calc_radius_range(radius) +# for ix in x_radius_range: +# for iy in y_radius_range: +# # Search the nearest grid cells. If the new mask is False, +# # that grid cell is an ocean point +# limit_lo = radius * radius +# dd = ix * ix + iy * iy +# if dd >= limit_lo: +# # modulus for when grid cell is close to 0deg. +# i_try = (cmip_i + ix) % len(grid_lons) +# j_try = cmip_j + iy + +# if not cube.data.mask[j_try, i_try]: +# i_out, j_out = accept_reject_cmip( +# cube, model, site_loc, i_try, j_try, site_lat, +# site_lon, unit_test) +# if i_out is not None: +# pt_lon = grid_lons[i_out] +# pt_lat = grid_lats[j_out] +# return i_out, j_out, pt_lon, pt_lat def write_i_j(site_loc, result, site_lat, lon_orig): From 85233c831366c8d2fd198b5a95dbe6f6dc75ee3a Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Wed, 12 Jun 2024 14:10:47 +0100 Subject: [PATCH 2/9] Updated optimisation --- ProFSea-environment.yml | 3 +- step1_extract_cmip.py | 332 +++++++++++----------------------------- 2 files changed, 93 insertions(+), 242 deletions(-) diff --git a/ProFSea-environment.yml b/ProFSea-environment.yml index 2a3e238..e0d6d41 100755 --- a/ProFSea-environment.yml +++ b/ProFSea-environment.yml @@ -11,4 +11,5 @@ dependencies: - matplotlib - libwebp>=1.3.2 - cartopy - - iris \ No newline at end of file + - iris + - tqdm \ No newline at end of file diff --git a/step1_extract_cmip.py b/step1_extract_cmip.py index 18fd2c2..f8028a3 100644 --- a/step1_extract_cmip.py +++ b/step1_extract_cmip.py @@ -6,6 +6,8 @@ import os import numpy as np import pandas as pd +import warnings +from tqdm import tqdm from config import settings from directories import read_dir, makefolder @@ -14,61 +16,7 @@ from tide_gauge_locations import extract_site_info from scipy.spatial import cKDTree - -def accept_reject_cmip(cube, model, site_loc, cmip_i, cmip_j, site_lat, - site_lon, unit_test=False): - """ - Accept or reject selected CMIP grid box based on a user input. - If CMIP grid box is rejected, search neighbouring grid boxes until a - suitable one is found. - :param cube: cube containing zos field from CMIP models - :param model: CMIP model name - :param site_loc: name of the site location - :param cmip_i: CMIP coord of site location's latitude - :param cmip_j: CMIP coord of site location's longitude - :param site_lat: latitude of the site location - :param site_lon: longitude of the site location - :param unit_test: flag to disable plotting for unit testing purposes - :return: Selected CMIP coords or None if user doesn't confirm grid box - selection - """ - # Plot a map of the site location and selected CMIP grid box for user - # validation. At this stage don't save the figure to file. - if not unit_test: - plot_ij(cube, model, site_loc, [cmip_i, cmip_j], site_lat, site_lon, - save_map=False) - - # Ask user to accept cmip grid box or re-select from neighbouring cells - decision = input(f'Is selected CMIP grid box {cmip_i, cmip_j} ' - f'appropriate for {model}? Y or N: ') - if decision == 'Y' or decision == 'y': - # Save map to file and return CMIP grid box coords - if not unit_test: - plot_ij(cube, model, site_loc, [cmip_i, cmip_j], - site_lat, site_lon) - return cmip_i, cmip_j - elif decision == 'N' or decision == 'n': - print('Selecting another CMIP grid box') - return None, None - else: - raise TypeError('Response needs to be Y or N') - - -def calc_radius_range(radius): - """ - Calculate the maximum distance to search for ocean grid point. - :param radius: Maximum range to search for ocean point - :return: x_radius_range, y_radius_range - """ - # if radius > 1: - # rm1 = radius - 1 - # else: - # rm1 = radius - - x_radius_range = list(range(-radius, radius + 1)) - y_radius_range = list(range(-radius, radius + 1)) - - return x_radius_range, y_radius_range +warnings.filterwarnings("ignore") def check_cube_mask(cube): @@ -92,7 +40,6 @@ def check_cube_mask(cube): if apply_mask: new_mask = (cube.data == 0.0) - print(f'Scalar mask: Re-masking the cube to mask cells = 0') cube = cube.copy(data=np.ma.array(cube.data, mask=new_mask)) return cube @@ -148,68 +95,6 @@ def extract_ssh_data(cmip_sea): return model_names, cubes -# def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): -# """ -# Searches for the nearest appropriate ocean point(s) in the CMIP model -# adjacent to the site location. Initially, finds the model grid box -# indices of the given location. Then, searches surrounding boxes until an -# appropriate ocean point is found - needs to be accepted by the user. -# **NOTES: -# - The GCM data have been interpolated to a common 1 x 1 degree grid -# :param zos_cube_in: cube containing zos field from CMIP models -# :param model: CMIP model name -# :param site_loc: name of the site location -# :param site_lat: latitude of the site location -# :param site_lon: longitude of the site location -# :return: model grid box indices -# """ -# # Find grid box indices of location -# (i, j), = whichbox.find_gridbox_indicies(zos_cube_in, -# [(site_lon, site_lat)]) -# grid_lons = zos_cube_in.coord('longitude').points -# grid_lats = zos_cube_in.coord('latitude').points - -# # Check to see if the cube has a scalar mask, and add mask where cmip -# zos_cube = check_cube_mask(zos_cube_in) - -# # If the CMIP grid box of the exact site location is an ocean point -# # Get the user to check if it's appropriate and if so return the indices -# if not zos_cube.data.mask[j, i]: -# print('Checking CMIP grid box at site location') -# i_out, j_out = accept_reject_cmip(zos_cube, model, site_loc, i, j, -# site_lat, site_lon) -# if i_out is not None: -# pt_lon = grid_lons[i_out] -# pt_lat = grid_lats[j_out] -# return i_out, j_out, pt_lon, pt_lat - -# # If no indices are returned then the CMIP grid box is not appropriate -# # for use. Check the CMIP grid boxes surrounding the site location -# # until an appropriate one is found. -# else: -# i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, -# zos_cube, -# model, -# site_loc, -# site_lat, -# site_lon) - -# # If the CMIP grid box of the exact site location is masked, start by -# # checking the next set of cmip grid boxes -# else: -# i_out, j_out, pt_lon, pt_lat = search_for_next_cmip(i, j, zos_cube, -# model, site_loc, -# site_lat, site_lon) - -# return i_out, j_out, pt_lon, pt_lat - -# This function looks for the nearest ocean point to the site location, but in a clunky way. -# I want to optimise it so that the user doesn't have to manually check the ocean point. -# I want to automatically select the nearest ocean point to the site location that doesn't -# have a mask, or a value which is significantly different to the grid boxes around it, -# whilst ignoring any masked cubes. - - def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): """ Searches for the nearest appropriate ocean point(s) in the CMIP model @@ -229,173 +114,138 @@ def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): # Check to see if the cube has a scalar mask, and add mask where cmip zos_cube = check_cube_mask(zos_cube_in) - - i_out, j_out, pt_lon, pt_lat = find_best_gridcell(zos_cube, model, site_loc, site_lat, site_lon) - - return i_out, j_out, pt_lon, pt_lat + + best_i, best_j, best_lat, best_lon = find_best_gridcell( + zos_cube, site_lat, site_lon) + + if settings["auto_site_selection"]: + plot_ij(zos_cube, model, site_loc, [best_i, best_j], + site_lat, site_lon, save_map=True) + return best_i, best_j, best_lon, best_lat + + best_i, best_j, best_lon, best_lat = plot_ij( + zos_cube, model, site_loc, [best_i, best_j], + site_lat, site_lon, save_map=False) + + return best_i, best_j, best_lon, best_lat -def ocean_point_wrapper(df, model_names, cubes): +def find_best_gridcell( + cube, target_lat, target_lon, + max_distance=2, distance_weight=2, + difference_weight=0.0005): """ - Wrapper script to extract relevant metadata for site location, needed to - select the nearest CMIP grid box. Collates the CMIP model name, i and j - coords selected and lat and lon value of the site. - Writes the results to a single .csv file assuming write=True. - :param df: DataFrame of site location's metadata - :param model_names: CMIP model names - :param cubes: cube containing zos field from CMIP models + Find the best grid cell in the CMIP model for the target latitude and + longitude. The best grid cell is the one that minimizes the weighted + score, which is computed as the distance to the target point minus a + grid cell difference parameter. + :param cube: iris.cube.Cube containing zos field from CMIP models + :param target_lat: Latitude of the target point + :param target_lon: Longitude of the target point + :param max_distance: Maximum distance to search for the best grid cell + :param distance_weight: Weight for the distance parameter + :param difference_weight: Weight for the difference parameter + :return: Best grid cell indices and coordinates """ - - # Get the metadata of either the site location or tide gauge location - for site_loc in df.index.values: - df_site = df.loc[site_loc] - lat, lon_orig, lon = extract_lat_lon(df_site) - - # Setup empty 2D list to store results for each model - # [name, i and j coords, lat and lon value] - result = [] - for n, zos_cube in enumerate(cubes): - model = model_names[n] - i, j, pt_lon, pt_lat = find_ocean_pt(zos_cube, model, site_loc, - lat, lon) - result.append([model, i, j, pt_lon, pt_lat]) - - # Write the data to a file - write_i_j(site_loc, result, lat, lon_orig) - - -def find_best_match(lat_grid, lon_grid, data_grid, target_lat, target_lon, max_distance=3, distance_weight=2, difference_weight=0.0005): + lon_grid = cube.coord('longitude').points + lat_grid = cube.coord('latitude').points + data_grid = cube.data + + # Get lat/lons onto grids, flatten and get mask lat_mesh, lon_mesh = np.meshgrid(lat_grid, lon_grid, indexing='ij') points = np.vstack([lat_mesh.ravel(), lon_mesh.ravel()]).T masked_points = data_grid.mask.ravel() tree = cKDTree(points[~masked_points]) - dists, indices = tree.query([target_lat, target_lon], k=49) # Query multiple nearest points + dists, indices = tree.query([target_lat, target_lon], k=49) # 7x7 grid - best_x = None - best_y = None + best_i = None + best_j = None best_lon = None best_lat = None min_weighted_score = float('inf') def compute_weighted_score(lat_idx, lon_idx, dist): value = data_grid[lat_idx, lon_idx] - surrounding_points = tree.query_ball_point([lat_mesh[lat_idx, lon_idx], lon_mesh[lat_idx, lon_idx]], 1) + + # Pull out surrounding grid cell indices and values + surrounding_points = tree.query_ball_point( + [lat_mesh[lat_idx, lon_idx], lon_mesh[lat_idx, lon_idx]], 1) surrounding_indices = np.where(~masked_points)[0][surrounding_points] - surrounding_lat_idx, surrounding_lon_idx = np.unravel_index(surrounding_indices, data_grid.shape) - surrounding_values = data_grid[surrounding_lat_idx, surrounding_lon_idx] + surrounding_lat_idx, surrounding_lon_idx = np.unravel_index( + surrounding_indices, data_grid.shape) + surrounding_values = data_grid[surrounding_lat_idx, + surrounding_lon_idx] + + # Compute difference parameter avg_surrounding_diffs = np.mean(np.abs(np.diff(surrounding_values))) difference = np.mean(np.abs(surrounding_values - value)) diff_param = abs(avg_surrounding_diffs - difference) # Compute weighted score - weighted_score = float((dist / distance_weight ) - (difference_weight / diff_param)) + weighted_score = float((dist / distance_weight ) - + (difference_weight / diff_param)) return weighted_score def check_and_update_best(lat_idx, lon_idx, dist): - nonlocal best_x, best_y, best_lat, best_lon, compute_weighted_score, min_weighted_score + nonlocal best_i, best_j, best_lat, best_lon + nonlocal compute_weighted_score, min_weighted_score + weighted_score = compute_weighted_score(lat_idx, lon_idx, dist) if weighted_score < min_weighted_score: min_weighted_score = weighted_score best_lat = lat_mesh[lat_idx, lon_idx] best_lon = lon_mesh[lat_idx, lon_idx] - best_x = nearest_lon_idx - best_y = nearest_lat_idx - - if (-90 <= target_lat <= 90) and (-180 <= target_lon <= 180): - target_lat_idx = np.abs(lat_grid - target_lat).argmin() - target_lon_idx = np.abs(lon_grid - target_lon).argmin() - - if not data_grid.mask[target_lat_idx, target_lon_idx]: - check_and_update_best(target_lat_idx, target_lon_idx, 1) - - for dist, idx in zip(dists, indices): + best_i = nearest_lon_idx + best_j = nearest_lat_idx + + candidate_points = sorted(zip(dists, indices), key=lambda x: x[0]) + for dist, idx in candidate_points: if dist > max_distance: - continue + break flat_idx = np.where(~masked_points)[0][idx] - nearest_lat_idx, nearest_lon_idx = np.unravel_index(flat_idx, data_grid.shape) + nearest_lat_idx, nearest_lon_idx = np.unravel_index( + flat_idx, data_grid.shape) check_and_update_best(nearest_lat_idx, nearest_lon_idx, dist) - if best_y is None or best_x is None: - raise ValueError("No suitable unmasked point found within the specified distance.") + if best_i is None or best_j is None: + raise ValueError("No suitable unmasked point" + "found within the specified distance.") - return best_x, best_y, best_lon, best_lat + return best_i, best_j, best_lon, best_lat -def find_best_gridcell(cube, model, site_loc, site_lat, - site_lon, unit_test=False): +def ocean_point_wrapper(df, model_names, cubes): """ - Iteratively check the CMIP grid boxes surrounding the site location - until a suitable option is found. - :param cmip_i: CMIP coord of site location's latitude - :param cmip_j: CMIP coord of site location's longitude - :param cube: cube containing zos field from CMIP models - :param model: CMIP model name - :param site_loc: name of the site location - :param site_lat: latitude of the site location - :param site_lon: longitude of the site location - :param unit_test: flag to disable plotting for unit testing purposes - :return: Selected CMIP coords + Wrapper script to extract relevant metadata for site location, needed to + select the nearest CMIP grid box. Collates the CMIP model name, i and j + coords selected and lat and lon value of the site. + Writes the results to a single .csv file assuming write=True. + :param df: DataFrame of site location's metadata + :param model_names: CMIP model names + :param cubes: cube containing zos field from CMIP models """ - grid_lons = cube.coord('longitude').points - grid_lats = cube.coord('latitude').points - - best_i, best_j, best_lat, best_lon = find_best_match(grid_lats, grid_lons, cube.data, site_lat, site_lon) - - if settings["auto_site_selection"]: - best_i, best_j, best_lon, best_lat = plot_ij(cube, model, site_loc, [best_i, best_j], site_lat, site_lon, save_map=True) - return best_i, best_j, best_lon, best_lat - - best_i, best_j, best_lon, best_lat = plot_ij(cube, model, site_loc, [best_i, best_j], site_lat, site_lon, save_map=False) - - return best_i, best_j, best_lon, best_lat + # Get the metadata of either the site location or tide gauge location + for site_loc in df.index.values: + df_site = df.loc[site_loc] + lat, lon_orig, lon = extract_lat_lon(df_site) + + # Setup empty 2D list to store results for each model + # [name, i and j coords, lat and lon value] + result = [] + print(f'\nExtracting grid cells for {site_loc}') + for i in tqdm(range(len(cubes))): + model = model_names[i] + i, j, pt_lon, pt_lat = find_ocean_pt(cubes[i], model, site_loc, + lat, lon) + result.append([model, i, j, pt_lon, pt_lat]) -# def search_for_next_cmip(cmip_i, cmip_j, cube, model, site_loc, site_lat, -# site_lon, unit_test=False): -# """ -# Iteratively check the CMIP grid boxes surrounding the site location -# until a suitable option is found. -# :param cmip_i: CMIP coord of site location's latitude -# :param cmip_j: CMIP coord of site location's longitude -# :param cube: cube containing zos field from CMIP models -# :param model: CMIP model name -# :param site_loc: name of the site location -# :param site_lat: latitude of the site location -# :param site_lon: longitude of the site location -# :param unit_test: flag to disable plotting for unit testing purposes -# :return: Selected CMIP coords -# """ -# grid_lons = cube.coord('longitude').points -# grid_lats = cube.coord('latitude').points - -# # The radius limit of 7 is arbitrary but should be large enough. -# for radius in range(1, 8): # grid boxes -# print(f'Checking CMIP grid boxes {radius} box removed ' + -# 'from site location') -# x_radius_range, y_radius_range = calc_radius_range(radius) -# for ix in x_radius_range: -# for iy in y_radius_range: -# # Search the nearest grid cells. If the new mask is False, -# # that grid cell is an ocean point -# limit_lo = radius * radius -# dd = ix * ix + iy * iy -# if dd >= limit_lo: -# # modulus for when grid cell is close to 0deg. -# i_try = (cmip_i + ix) % len(grid_lons) -# j_try = cmip_j + iy - -# if not cube.data.mask[j_try, i_try]: -# i_out, j_out = accept_reject_cmip( -# cube, model, site_loc, i_try, j_try, site_lat, -# site_lon, unit_test) -# if i_out is not None: -# pt_lon = grid_lons[i_out] -# pt_lat = grid_lats[j_out] -# return i_out, j_out, pt_lon, pt_lat + # Write the data to a file + write_i_j(site_loc, result, lat, lon_orig) def write_i_j(site_loc, result, site_lat, lon_orig): From 4a1fb2d1facc85f1e479542d332b2eceb1a2aa36 Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Thu, 13 Jun 2024 09:44:48 +0100 Subject: [PATCH 3/9] Updated search weights --- step1_extract_cmip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/step1_extract_cmip.py b/step1_extract_cmip.py index f8028a3..87b12c1 100644 --- a/step1_extract_cmip.py +++ b/step1_extract_cmip.py @@ -132,8 +132,8 @@ def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): def find_best_gridcell( cube, target_lat, target_lon, - max_distance=2, distance_weight=2, - difference_weight=0.0005): + max_distance=2, distance_weight=3, + difference_weight=0.003): """ Find the best grid cell in the CMIP model for the target latitude and longitude. The best grid cell is the one that minimizes the weighted From e7ed1beaf57aa0edd296b0c70115ab8f064ae1ce Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Thu, 13 Jun 2024 10:36:35 +0100 Subject: [PATCH 4/9] Updated file saving --- step3_process_regional_sealevel_projections.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/step3_process_regional_sealevel_projections.py b/step3_process_regional_sealevel_projections.py index f1e0dcf..240d83b 100644 --- a/step3_process_regional_sealevel_projections.py +++ b/step3_process_regional_sealevel_projections.py @@ -88,7 +88,8 @@ def calc_future_sea_level_at_site(df, site_loc, scenario): # Create the output sea level projections file directory and filename sealev_ddir = read_dir()[4] loc_abbrev = abbreviate_location_name(site_loc) - file_header = '_'.join([loc_abbrev, scenario, "projection", "2100"]) + file_header = '_'.join([loc_abbrev, scenario, "projection", + f"{settings['projection_end_year']}"]) G_file = '_'.join([file_header, 'global']) + '.csv' R_file = '_'.join([file_header, 'regional']) + '.csv' From 4c435a9cecbf9f6c837e007e9987fd59cf2c490b Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Fri, 14 Jun 2024 14:54:44 +0100 Subject: [PATCH 5/9] Fixed manual coordinates not updating --- slr_pkg/__init__.py | 1 + step1_extract_cmip.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/slr_pkg/__init__.py b/slr_pkg/__init__.py index c9ddc25..1df989e 100644 --- a/slr_pkg/__init__.py +++ b/slr_pkg/__init__.py @@ -288,6 +288,7 @@ def plot_ij(cube, model, location, idx, lat, lon, save_map=True, rad=5): def onclick(event): nonlocal selected_lat, selected_lon, pred + nonlocal i, j selected_lon, selected_lat = event.xdata, event.ydata # Transform onto original projection diff --git a/step1_extract_cmip.py b/step1_extract_cmip.py index 87b12c1..67f46d5 100644 --- a/step1_extract_cmip.py +++ b/step1_extract_cmip.py @@ -115,7 +115,7 @@ def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): # Check to see if the cube has a scalar mask, and add mask where cmip zos_cube = check_cube_mask(zos_cube_in) - best_i, best_j, best_lat, best_lon = find_best_gridcell( + best_i, best_j, best_lon, best_lat = find_best_gridcell( zos_cube, site_lat, site_lon) if settings["auto_site_selection"]: From b39b2a7fc350d64f1c109cd59d4fb1aa43222b38 Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Thu, 27 Jun 2024 17:45:23 +0100 Subject: [PATCH 6/9] Raises error for tricky regions --- step1_extract_cmip.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/step1_extract_cmip.py b/step1_extract_cmip.py index 67f46d5..0530b4b 100644 --- a/step1_extract_cmip.py +++ b/step1_extract_cmip.py @@ -115,8 +115,12 @@ def find_ocean_pt(zos_cube_in, model, site_loc, site_lat, site_lon): # Check to see if the cube has a scalar mask, and add mask where cmip zos_cube = check_cube_mask(zos_cube_in) + if settings["auto_site_selection"]: + search_distance = 2 + else: + search_distance = 7 best_i, best_j, best_lon, best_lat = find_best_gridcell( - zos_cube, site_lat, site_lon) + zos_cube, site_lat, site_lon, max_distance=search_distance) if settings["auto_site_selection"]: plot_ij(zos_cube, model, site_loc, [best_i, best_j], @@ -212,8 +216,14 @@ def check_and_update_best(lat_idx, lon_idx, dist): check_and_update_best(nearest_lat_idx, nearest_lon_idx, dist) if best_i is None or best_j is None: - raise ValueError("No suitable unmasked point" - "found within the specified distance.") + if settings["auto_site_selection"]: + raise ValueError("Could not find a suitable grid cell due to the " + "model mask. This region might be complex - " + "please re-run with " + "\033[1mauto_site_selection: False \033[0m") + else: + raise ValueError("No valid points in the vicinity of the site. " + "Have you selected a lat/lon near the coast?") return best_i, best_j, best_lon, best_lat From dd788e5de44db33ad2f479a9ce30d0e58d8b2b1d Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Fri, 28 Jun 2024 10:27:30 +0100 Subject: [PATCH 7/9] Tide gauge plotting --- step4_plot_regional_sealevel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/step4_plot_regional_sealevel.py b/step4_plot_regional_sealevel.py index 6de4fa1..dd3b687 100644 --- a/step4_plot_regional_sealevel.py +++ b/step4_plot_regional_sealevel.py @@ -260,7 +260,8 @@ def plot_figure_two(r_df_list, tg_name, nflag, flag, tg_years, non_missing, plot_zeroline(ax, xlim) # Plot the annual mean sea levels from the tide gauge data - plot_tg_data(ax, nflag, flag, tg_years, non_missing, tg_amsl, tg_name) + if settings['plot_tide_gauge_data']: + plot_tg_data(ax, nflag, flag, tg_years, non_missing, tg_amsl, tg_name) ax.set_xlim(xlim) ax.set_ylim(ylim) From 0fb599b62477c130110dcfdd9a5c739dc02996d7 Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Fri, 28 Jun 2024 10:32:45 +0100 Subject: [PATCH 8/9] Add user option for plotting tide gauges --- step4_plot_regional_sealevel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/step4_plot_regional_sealevel.py b/step4_plot_regional_sealevel.py index 800715e..71df94d 100644 --- a/step4_plot_regional_sealevel.py +++ b/step4_plot_regional_sealevel.py @@ -756,10 +756,10 @@ def read_G_R_sl_projections(site_name, scenarios): G_df_list = [] R_df_list = [] for sce in scenarios: - G_filename = '{}{}_{}_projection_2100_global.csv'.format( - in_slddir, loc_abbrev, sce) - R_filename = '{}{}_{}_projection_2100_regional.csv'.format( - in_slddir, loc_abbrev, sce) + G_filename = '{}{}_{}_projection_{}_global.csv'.format( + in_slddir, loc_abbrev, sce, settings["projection_end_year"]) + R_filename = '{}{}_{}_projection_{}_regional.csv'.format( + in_slddir, loc_abbrev, sce, settings["projection_end_year"]) try: G_df = pd.read_csv(G_filename, header=0, index_col=['year', 'percentile']) From 7b1b41d570d22d2898200bfe2a2d7bbcc8bfc539 Mon Sep 17 00:00:00 2001 From: mo-gregmunday Date: Fri, 19 Jul 2024 16:26:42 +0100 Subject: [PATCH 9/9] Update user-settings.yml --- profsea/user-settings.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/profsea/user-settings.yml b/profsea/user-settings.yml index 7fce703..bbdacb6 100644 --- a/profsea/user-settings.yml +++ b/profsea/user-settings.yml @@ -21,6 +21,12 @@ baseoutdir: '/home/user/sl_output/' # Set the end year of the projection(s) projection_end_year: 2300 # int between 2050 and 2300 +# Turn on/off automated grid-cell selection +auto_site_selection: False # bool + +# Turn on/off plotting of tide gauge data on certain figures +plot_tide_gauge_data: True # bool + # Science method # UKCP18 --> sciencemethod: 'UK' # Palmer et al (2020) --> sciencemethod: 'global