Skip to content

Commit

Permalink
Correct print stats and allow decumulate 24h begining of interval
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Jun 19, 2024
1 parent 914df08 commit 714edd6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 51 deletions.
43 changes: 27 additions & 16 deletions src/lisfloodutilities/gridding/decumulate_daily_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def print_statistics(provider_ids: List[str], df_kiwis_24h: pd.DataFrame, df_kiw
print_msg(stats_string)
i += 1

def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_12pm_path: Path,
def run(conf_24h: Config, conf_6h: Config, beginning_of_interval_offset: int, kiwis_24h_06am_path: Path, kiwis_6h_12pm_path: Path,
kiwis_6h_18pm_path: Path, kiwis_6h_12am_path: Path, kiwis_6h_06am_path: Path, input_path_6h: Path, output_path: Path = None):
"""
While processing the 4 grids of 6hourly precipitation Day1 12:00, 18:00 and Day2 00:00, 06:00 we will use the daily precipitation
Expand All @@ -200,10 +200,10 @@ def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_1
kiwis_6h_12am_path,
kiwis_6h_06am_path])
# Check timestamps are correct
if not (kiwis_timestamps_24h[0] == kiwis_timestamps_6h[3] and
(kiwis_timestamps_24h[0] - timedelta(hours=6)) == kiwis_timestamps_6h[2] and
(kiwis_timestamps_24h[0] - timedelta(hours=12)) == kiwis_timestamps_6h[1] and
(kiwis_timestamps_24h[0] - timedelta(hours=18)) == kiwis_timestamps_6h[0]):
if not ((kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset)) == kiwis_timestamps_6h[3] and
(kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=6)) == kiwis_timestamps_6h[2] and
(kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=12)) == kiwis_timestamps_6h[1] and
(kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=18)) == kiwis_timestamps_6h[0]):
raise ArgumentTypeError("The input kiwis do not respect the expected timestamps.")

kiwis_dataframes = df_kiwis_array_24h
Expand Down Expand Up @@ -278,7 +278,8 @@ def main(argv):

# set defaults
parser.set_defaults(quiet=False,
output_folder=None)
output_folder=None,
use_beginning_of_interval=False)

parser.add_argument("-d", "--pr24h", dest="kiwis_24h_folder_path", required=True, type=FileUtils.folder_type,
help="Set the input kiwis file folder containing daily precipitation.",
Expand Down Expand Up @@ -307,6 +308,8 @@ def main(argv):
parser.add_argument("-e", "--end", dest="end_date",
help="Set the end date and time until which data is imported [default: %(default)s]",
metavar="YYYYMMDDHHMISS")
parser.add_argument("-b", "--boi", dest="use_beginning_of_interval", action="store_true",
help="Indicate that the daily timesteps are at the beginning of the interval [default: %(default)s]")
parser.add_argument("-q", "--quiet", dest="quiet", action="store_true", help="Set script output into quiet mode [default: %(default)s]")

# process options
Expand Down Expand Up @@ -357,28 +360,36 @@ def main(argv):
output_path = None
print_msg(f"Output Folder: 6hourly kiwis files will be overwritten")

print_msg(f"Timesteps are beginning of the interval: {args.use_beginning_of_interval}")

kiwis_24h_paths = get_24h_kiwis_paths(conf_24h, Path(args.kiwis_24h_folder_path))
kiwis_6h_folder_path = Path(args.kiwis_6h_folder_path)

# Defines the offset for the 6hourly data when the daily data is at
# the beginning of the interval while the 6hourly files should be at the end of the interval
beginning_of_interval_offset = 0
if args.use_beginning_of_interval:
beginning_of_interval_offset = 24

for filename_kiwis, kiwis_timestamp in kiwis_24h_paths:
kiwis_24h_06am_path = get_existing_file_path(parser, str(filename_kiwis))
print_msg(f"Daily PR kiwis file: {kiwis_24h_06am_path}")
kiwis_6h_06am_timestamp = kiwis_timestamp
kiwis_6h_12am_timestamp = kiwis_timestamp - timedelta(hours=6)
kiwis_6h_18pm_timestamp = kiwis_timestamp - timedelta(hours=12)
kiwis_6h_12pm_timestamp = kiwis_timestamp - timedelta(hours=18)
print_msg(f"Daily {args.variable_code_24h} kiwis file: {kiwis_24h_06am_path}")
kiwis_6h_06am_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset)
kiwis_6h_12am_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=6)
kiwis_6h_18pm_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=12)
kiwis_6h_12pm_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=18)

kiwis_6h_06am_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_06am_timestamp)
kiwis_6h_12am_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_12am_timestamp)
kiwis_6h_18pm_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_18pm_timestamp)
kiwis_6h_12pm_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_12pm_timestamp)

print_msg(f"6hourly PR kiwis file 06:00: {kiwis_6h_06am_path}")
print_msg(f"6hourly PR kiwis file 00:00: {kiwis_6h_12am_path}")
print_msg(f"6hourly PR kiwis file 18:00: {kiwis_6h_18pm_path}")
print_msg(f"6hourly PR kiwis file 12:00: {kiwis_6h_12pm_path}")
print_msg(f"6hourly {args.variable_code_6h} kiwis file 06:00: {kiwis_6h_06am_path}")
print_msg(f"6hourly {args.variable_code_6h} kiwis file 00:00: {kiwis_6h_12am_path}")
print_msg(f"6hourly {args.variable_code_6h} kiwis file 18:00: {kiwis_6h_18pm_path}")
print_msg(f"6hourly {args.variable_code_6h} kiwis file 12:00: {kiwis_6h_12pm_path}")

run(conf_24h, conf_6h, kiwis_24h_06am_path, kiwis_6h_12pm_path, kiwis_6h_18pm_path,
run(conf_24h, conf_6h, beginning_of_interval_offset, kiwis_24h_06am_path, kiwis_6h_12pm_path, kiwis_6h_18pm_path,
kiwis_6h_12am_path, kiwis_6h_06am_path, input_path_6h=kiwis_6h_folder_path, output_path=output_path)
return 0
except Exception as e:
Expand Down
23 changes: 1 addition & 22 deletions src/lisfloodutilities/gridding/generate_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,6 @@ def get_grid(grid_utils: GriddingUtils, filename: Path, tiff_filepath: Path=None
grid_data = grid_utils.generate_grid(filename)
return grid_data

def print_grid_statistics(var_code: str, grid_timestamp: datetime, grid: np.ndarray):
timestamp = grid_timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED)
grid_min = np.nanmin(grid)
grid_max = np.nanmax(grid)
grid_mean = np.nanmean(grid)
grid_percentile_10 = np.nanpercentile(grid, 10)
grid_percentile_90 = np.nanpercentile(grid, 90)
stats_string = (
f'#APP_STATS: {{"TIMESTAMP": "{timestamp}", "VAR_CODE": "{var_code}", '
f'"MINIMUM_VALUE": {grid_min:.2f}, "MAXIMUM_VALUE": {grid_max:.2f}, '
f'"MEAN_VALUE": {grid_mean:.2f}, "PERCENTILE_10": {grid_percentile_10:.2f}, '
f'"PERCENTILE_90": {grid_percentile_90:.2f}}}'
)
print_msg(stats_string)

def run(config_filename: str, infolder: str, output_file: str, processing_dates_file: str, file_utils: FileUtils,
output_tiff: bool, output_netcdf: bool, overwrite_output: bool, use_existing_file: bool, get_existing_tiff: bool,
start_date: datetime = None, end_date: datetime = None, interpolation_mode: str = 'adw',
Expand Down Expand Up @@ -112,14 +97,11 @@ def run(config_filename: str, infolder: str, output_file: str, processing_dates_

netcdf_offset_file_date = int(conf.get_config_field('VAR_TIME','OFFSET_FILE_DATE'))

cur_writer = None
if output_tiff:
output_writer_tiff = GDALWriter(conf, overwrite_output, quiet_mode)
cur_writer = output_writer_tiff
if output_netcdf:
output_writer_netcdf = NetCDFWriter(conf, overwrite_output, quiet_mode)
output_writer_netcdf.open(Path(output_file))
cur_writer = output_writer_netcdf
file_loader = KiwisLoader(conf, Path(infolder), dates_to_process, overwrite_output, use_existing_file, quiet_mode)
for filename, kiwis_timestamp_str in file_loader:
kiwis_timestamp = datetime.strptime(kiwis_timestamp_str, FileUtils.DATE_PATTERN_CONDENSED_SHORT)
Expand All @@ -129,13 +111,10 @@ def run(config_filename: str, infolder: str, output_file: str, processing_dates_
if output_tiff:
output_writer_tiff.open(tiff_filepath)
grid_data = get_grid(grid_utils, filename, tiff_filepath, get_existing_tiff)
if cur_writer is not None:
cur_grid = cur_writer.setNaN(copy.deepcopy(grid_data))
print_grid_statistics(conf.var_code, file_timestamp, cur_grid)
if output_netcdf:
output_writer_netcdf.write(grid_data, file_timestamp)
if output_tiff:
output_writer_tiff.write(grid_data, file_timestamp)
output_writer_tiff.write(grid_data, file_timestamp, print_stats=(not output_netcdf))
output_writer_tiff.close()
if output_netcdf:
output_writer_netcdf.close()
Expand Down
58 changes: 45 additions & 13 deletions src/lisfloodutilities/gridding/lib/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,24 @@ def open(self, out_filename: Path):
self.filepath = out_filename
self.time_created = timex.ctime(timex.time())

def write(self, grid: np.ndarray, timestamp: datetime = None):
def print_grid_statistics(self, grid: np.ndarray):
grid_min = np.nanmin(grid)
grid_max = np.nanmax(grid)
grid_mean = np.nanmean(grid)
grid_percentile_10 = np.nanpercentile(grid, 10)
grid_percentile_90 = np.nanpercentile(grid, 90)
stats_string = (
f'#APP_STATS: {{"TIMESTAMP": "{self.current_timestamp}", "VAR_CODE": "{self.conf.var_code}", '
f'"MINIMUM_VALUE": {grid_min:.2f}, "MAXIMUM_VALUE": {grid_max:.2f}, '
f'"MEAN_VALUE": {grid_mean:.2f}, "PERCENTILE_10": {grid_percentile_10:.2f}, '
f'"PERCENTILE_90": {grid_percentile_90:.2f}}}'
)
self.print_msg(stats_string)

def setup_grid(self, grid: np.ndarray) -> np.ndarray:
raise NotImplementedError

def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True):
raise NotImplementedError

def write_timestep(self, grid: np.ndarray, timestep: int = -1):
Expand Down Expand Up @@ -117,28 +134,34 @@ def open(self, out_filename: Path):
else:
raise ArgumentTypeError(f'File {self.filepath} already exists. Use --force flag to append.')

def write(self, grid: np.ndarray, timestamp: datetime = None):
def setup_grid(self, grid: np.ndarray, print_stats: bool = True) -> np.ndarray:
values = self.setNaN(copy.deepcopy(grid))
values[values < self.conf.value_min_packed] = np.nan
values[values > self.conf.value_max_packed] = np.nan
values[values != self.conf.VALUE_NAN] *= self.conf.scale_factor
values[values != self.conf.VALUE_NAN] += self.conf.add_offset
if print_stats:
self.print_grid_statistics(values)
values[np.isnan(values)] = self.conf.VALUE_NAN * self.conf.scale_factor + self.conf.add_offset
return values

def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True):
timestep = -1
if timestamp is not None:
self.current_timestamp = timestamp
self.current_timestamp = timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED)
timestep = date2num(timestamp, self.calendar_time_unit, self.calendar_type)
else:
self.current_timestamp = None
self.write_timestep(grid, timestep)
cur_grid = self.setup_grid(grid, print_stats)
self.write_timestep(cur_grid, timestep)

def write_timestep(self, grid: np.ndarray, timestep: int = -1):
if timestep >= 0:
if not self.opened():
raise Exception("netCDF Dataset was not initialized. If file already exists, use --force flag to append.")
self.__set_write_index(timestep)
self.nf.variables[self.netcdf_var_time][self.write_idx] = timestep
values = self.setNaN(copy.deepcopy(grid))
values[values < self.conf.value_min_packed] = np.nan
values[values > self.conf.value_max_packed] = np.nan
values[values != self.conf.VALUE_NAN] *= self.conf.scale_factor
values[values != self.conf.VALUE_NAN] += self.conf.add_offset
values[np.isnan(values)] = self.conf.VALUE_NAN * self.conf.scale_factor + self.conf.add_offset
self.nf.variables[self.var_code][self.write_idx, :, :] = values
self.nf.variables[self.var_code][self.write_idx, :, :] = grid

def __set_write_index(self, timestep: int):
if not self.is_new_file:
Expand Down Expand Up @@ -281,11 +304,20 @@ def setup_dataset_metadata(self, ds: gdal.Dataset) -> gdal.Dataset:
if self.current_timestamp is not None:
ds.SetMetadataItem('Timestamp', f'{self.current_timestamp}')
return ds

def setup_grid(self, grid: np.ndarray, print_stats: bool = True) -> np.ndarray:
if print_stats:
values = self.setNaN(copy.deepcopy(grid))
self.print_grid_statistics(values)
return grid

def write(self, grid: np.ndarray, timestamp: datetime = None):
def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True):
if timestamp is not None:
self.current_timestamp = timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED)
self.write_timestep(grid)
else:
self.current_timestamp = None
cur_grid = self.setup_grid(grid, print_stats)
self.write_timestep(cur_grid)
self.current_timestamp = None

def write_timestep(self, grid: np.ndarray, timestep: int = -1):
Expand Down

0 comments on commit 714edd6

Please sign in to comment.