diff --git a/src/lisfloodutilities/gridding/decumulate_daily_grids.py b/src/lisfloodutilities/gridding/decumulate_daily_grids.py index 1b949af..1dbf513 100644 --- a/src/lisfloodutilities/gridding/decumulate_daily_grids.py +++ b/src/lisfloodutilities/gridding/decumulate_daily_grids.py @@ -174,7 +174,7 @@ def print_statistics(provider_ids: List[str], df_kiwis_24h: pd.DataFrame, df_kiw print_msg(stats_string) i += 1 -def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_12pm_path: Path, +def run(conf_24h: Config, conf_6h: Config, beginning_of_interval_offset: int, kiwis_24h_06am_path: Path, kiwis_6h_12pm_path: Path, kiwis_6h_18pm_path: Path, kiwis_6h_12am_path: Path, kiwis_6h_06am_path: Path, input_path_6h: Path, output_path: Path = None): """ While processing the 4 grids of 6hourly precipitation Day1 12:00, 18:00 and Day2 00:00, 06:00 we will use the daily precipitation @@ -200,10 +200,10 @@ def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_1 kiwis_6h_12am_path, kiwis_6h_06am_path]) # Check timestamps are correct - if not (kiwis_timestamps_24h[0] == kiwis_timestamps_6h[3] and - (kiwis_timestamps_24h[0] - timedelta(hours=6)) == kiwis_timestamps_6h[2] and - (kiwis_timestamps_24h[0] - timedelta(hours=12)) == kiwis_timestamps_6h[1] and - (kiwis_timestamps_24h[0] - timedelta(hours=18)) == kiwis_timestamps_6h[0]): + if not ((kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset)) == kiwis_timestamps_6h[3] and + (kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=6)) == kiwis_timestamps_6h[2] and + (kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=12)) == kiwis_timestamps_6h[1] and + (kiwis_timestamps_24h[0] + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=18)) == kiwis_timestamps_6h[0]): raise ArgumentTypeError("The input kiwis do not respect the expected timestamps.") kiwis_dataframes = df_kiwis_array_24h @@ -278,7 +278,8 @@ def main(argv): # set defaults parser.set_defaults(quiet=False, - output_folder=None) + output_folder=None, + use_beginning_of_interval=False) parser.add_argument("-d", "--pr24h", dest="kiwis_24h_folder_path", required=True, type=FileUtils.folder_type, help="Set the input kiwis file folder containing daily precipitation.", @@ -307,6 +308,8 @@ def main(argv): parser.add_argument("-e", "--end", dest="end_date", help="Set the end date and time until which data is imported [default: %(default)s]", metavar="YYYYMMDDHHMISS") + parser.add_argument("-b", "--boi", dest="use_beginning_of_interval", action="store_true", + help="Indicate that the daily timesteps are at the beginning of the interval [default: %(default)s]") parser.add_argument("-q", "--quiet", dest="quiet", action="store_true", help="Set script output into quiet mode [default: %(default)s]") # process options @@ -357,28 +360,36 @@ def main(argv): output_path = None print_msg(f"Output Folder: 6hourly kiwis files will be overwritten") + print_msg(f"Timesteps are beginning of the interval: {args.use_beginning_of_interval}") + kiwis_24h_paths = get_24h_kiwis_paths(conf_24h, Path(args.kiwis_24h_folder_path)) kiwis_6h_folder_path = Path(args.kiwis_6h_folder_path) + + # Defines the offset for the 6hourly data when the daily data is at + # the beginning of the interval while the 6hourly files should be at the end of the interval + beginning_of_interval_offset = 0 + if args.use_beginning_of_interval: + beginning_of_interval_offset = 24 for filename_kiwis, kiwis_timestamp in kiwis_24h_paths: kiwis_24h_06am_path = get_existing_file_path(parser, str(filename_kiwis)) - print_msg(f"Daily PR kiwis file: {kiwis_24h_06am_path}") - kiwis_6h_06am_timestamp = kiwis_timestamp - kiwis_6h_12am_timestamp = kiwis_timestamp - timedelta(hours=6) - kiwis_6h_18pm_timestamp = kiwis_timestamp - timedelta(hours=12) - kiwis_6h_12pm_timestamp = kiwis_timestamp - timedelta(hours=18) + print_msg(f"Daily {args.variable_code_24h} kiwis file: {kiwis_24h_06am_path}") + kiwis_6h_06am_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) + kiwis_6h_12am_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=6) + kiwis_6h_18pm_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=12) + kiwis_6h_12pm_timestamp = kiwis_timestamp + timedelta(hours=beginning_of_interval_offset) - timedelta(hours=18) kiwis_6h_06am_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_06am_timestamp) kiwis_6h_12am_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_12am_timestamp) kiwis_6h_18pm_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_18pm_timestamp) kiwis_6h_12pm_path = get_6hourly_filepath(parser, conf_6h, kiwis_6h_folder_path, kiwis_6h_12pm_timestamp) - print_msg(f"6hourly PR kiwis file 06:00: {kiwis_6h_06am_path}") - print_msg(f"6hourly PR kiwis file 00:00: {kiwis_6h_12am_path}") - print_msg(f"6hourly PR kiwis file 18:00: {kiwis_6h_18pm_path}") - print_msg(f"6hourly PR kiwis file 12:00: {kiwis_6h_12pm_path}") + print_msg(f"6hourly {args.variable_code_6h} kiwis file 06:00: {kiwis_6h_06am_path}") + print_msg(f"6hourly {args.variable_code_6h} kiwis file 00:00: {kiwis_6h_12am_path}") + print_msg(f"6hourly {args.variable_code_6h} kiwis file 18:00: {kiwis_6h_18pm_path}") + print_msg(f"6hourly {args.variable_code_6h} kiwis file 12:00: {kiwis_6h_12pm_path}") - run(conf_24h, conf_6h, kiwis_24h_06am_path, kiwis_6h_12pm_path, kiwis_6h_18pm_path, + run(conf_24h, conf_6h, beginning_of_interval_offset, kiwis_24h_06am_path, kiwis_6h_12pm_path, kiwis_6h_18pm_path, kiwis_6h_12am_path, kiwis_6h_06am_path, input_path_6h=kiwis_6h_folder_path, output_path=output_path) return 0 except Exception as e: diff --git a/src/lisfloodutilities/gridding/generate_grids.py b/src/lisfloodutilities/gridding/generate_grids.py index 70a5e36..885a730 100755 --- a/src/lisfloodutilities/gridding/generate_grids.py +++ b/src/lisfloodutilities/gridding/generate_grids.py @@ -62,21 +62,6 @@ def get_grid(grid_utils: GriddingUtils, filename: Path, tiff_filepath: Path=None grid_data = grid_utils.generate_grid(filename) return grid_data -def print_grid_statistics(var_code: str, grid_timestamp: datetime, grid: np.ndarray): - timestamp = grid_timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED) - grid_min = np.nanmin(grid) - grid_max = np.nanmax(grid) - grid_mean = np.nanmean(grid) - grid_percentile_10 = np.nanpercentile(grid, 10) - grid_percentile_90 = np.nanpercentile(grid, 90) - stats_string = ( - f'#APP_STATS: {{"TIMESTAMP": "{timestamp}", "VAR_CODE": "{var_code}", ' - f'"MINIMUM_VALUE": {grid_min:.2f}, "MAXIMUM_VALUE": {grid_max:.2f}, ' - f'"MEAN_VALUE": {grid_mean:.2f}, "PERCENTILE_10": {grid_percentile_10:.2f}, ' - f'"PERCENTILE_90": {grid_percentile_90:.2f}}}' - ) - print_msg(stats_string) - def run(config_filename: str, infolder: str, output_file: str, processing_dates_file: str, file_utils: FileUtils, output_tiff: bool, output_netcdf: bool, overwrite_output: bool, use_existing_file: bool, get_existing_tiff: bool, start_date: datetime = None, end_date: datetime = None, interpolation_mode: str = 'adw', @@ -112,14 +97,11 @@ def run(config_filename: str, infolder: str, output_file: str, processing_dates_ netcdf_offset_file_date = int(conf.get_config_field('VAR_TIME','OFFSET_FILE_DATE')) - cur_writer = None if output_tiff: output_writer_tiff = GDALWriter(conf, overwrite_output, quiet_mode) - cur_writer = output_writer_tiff if output_netcdf: output_writer_netcdf = NetCDFWriter(conf, overwrite_output, quiet_mode) output_writer_netcdf.open(Path(output_file)) - cur_writer = output_writer_netcdf file_loader = KiwisLoader(conf, Path(infolder), dates_to_process, overwrite_output, use_existing_file, quiet_mode) for filename, kiwis_timestamp_str in file_loader: kiwis_timestamp = datetime.strptime(kiwis_timestamp_str, FileUtils.DATE_PATTERN_CONDENSED_SHORT) @@ -129,13 +111,10 @@ def run(config_filename: str, infolder: str, output_file: str, processing_dates_ if output_tiff: output_writer_tiff.open(tiff_filepath) grid_data = get_grid(grid_utils, filename, tiff_filepath, get_existing_tiff) - if cur_writer is not None: - cur_grid = cur_writer.setNaN(copy.deepcopy(grid_data)) - print_grid_statistics(conf.var_code, file_timestamp, cur_grid) if output_netcdf: output_writer_netcdf.write(grid_data, file_timestamp) if output_tiff: - output_writer_tiff.write(grid_data, file_timestamp) + output_writer_tiff.write(grid_data, file_timestamp, print_stats=(not output_netcdf)) output_writer_tiff.close() if output_netcdf: output_writer_netcdf.close() diff --git a/src/lisfloodutilities/gridding/lib/writers.py b/src/lisfloodutilities/gridding/lib/writers.py index 513c565..751cc77 100644 --- a/src/lisfloodutilities/gridding/lib/writers.py +++ b/src/lisfloodutilities/gridding/lib/writers.py @@ -72,7 +72,24 @@ def open(self, out_filename: Path): self.filepath = out_filename self.time_created = timex.ctime(timex.time()) - def write(self, grid: np.ndarray, timestamp: datetime = None): + def print_grid_statistics(self, grid: np.ndarray): + grid_min = np.nanmin(grid) + grid_max = np.nanmax(grid) + grid_mean = np.nanmean(grid) + grid_percentile_10 = np.nanpercentile(grid, 10) + grid_percentile_90 = np.nanpercentile(grid, 90) + stats_string = ( + f'#APP_STATS: {{"TIMESTAMP": "{self.current_timestamp}", "VAR_CODE": "{self.conf.var_code}", ' + f'"MINIMUM_VALUE": {grid_min:.2f}, "MAXIMUM_VALUE": {grid_max:.2f}, ' + f'"MEAN_VALUE": {grid_mean:.2f}, "PERCENTILE_10": {grid_percentile_10:.2f}, ' + f'"PERCENTILE_90": {grid_percentile_90:.2f}}}' + ) + self.print_msg(stats_string) + + def setup_grid(self, grid: np.ndarray) -> np.ndarray: + raise NotImplementedError + + def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True): raise NotImplementedError def write_timestep(self, grid: np.ndarray, timestep: int = -1): @@ -117,14 +134,26 @@ def open(self, out_filename: Path): else: raise ArgumentTypeError(f'File {self.filepath} already exists. Use --force flag to append.') - def write(self, grid: np.ndarray, timestamp: datetime = None): + def setup_grid(self, grid: np.ndarray, print_stats: bool = True) -> np.ndarray: + values = self.setNaN(copy.deepcopy(grid)) + values[values < self.conf.value_min_packed] = np.nan + values[values > self.conf.value_max_packed] = np.nan + values[values != self.conf.VALUE_NAN] *= self.conf.scale_factor + values[values != self.conf.VALUE_NAN] += self.conf.add_offset + if print_stats: + self.print_grid_statistics(values) + values[np.isnan(values)] = self.conf.VALUE_NAN * self.conf.scale_factor + self.conf.add_offset + return values + + def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True): timestep = -1 if timestamp is not None: - self.current_timestamp = timestamp + self.current_timestamp = timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED) timestep = date2num(timestamp, self.calendar_time_unit, self.calendar_type) else: self.current_timestamp = None - self.write_timestep(grid, timestep) + cur_grid = self.setup_grid(grid, print_stats) + self.write_timestep(cur_grid, timestep) def write_timestep(self, grid: np.ndarray, timestep: int = -1): if timestep >= 0: @@ -132,13 +161,7 @@ def write_timestep(self, grid: np.ndarray, timestep: int = -1): raise Exception("netCDF Dataset was not initialized. If file already exists, use --force flag to append.") self.__set_write_index(timestep) self.nf.variables[self.netcdf_var_time][self.write_idx] = timestep - values = self.setNaN(copy.deepcopy(grid)) - values[values < self.conf.value_min_packed] = np.nan - values[values > self.conf.value_max_packed] = np.nan - values[values != self.conf.VALUE_NAN] *= self.conf.scale_factor - values[values != self.conf.VALUE_NAN] += self.conf.add_offset - values[np.isnan(values)] = self.conf.VALUE_NAN * self.conf.scale_factor + self.conf.add_offset - self.nf.variables[self.var_code][self.write_idx, :, :] = values + self.nf.variables[self.var_code][self.write_idx, :, :] = grid def __set_write_index(self, timestep: int): if not self.is_new_file: @@ -281,11 +304,20 @@ def setup_dataset_metadata(self, ds: gdal.Dataset) -> gdal.Dataset: if self.current_timestamp is not None: ds.SetMetadataItem('Timestamp', f'{self.current_timestamp}') return ds + + def setup_grid(self, grid: np.ndarray, print_stats: bool = True) -> np.ndarray: + if print_stats: + values = self.setNaN(copy.deepcopy(grid)) + self.print_grid_statistics(values) + return grid - def write(self, grid: np.ndarray, timestamp: datetime = None): + def write(self, grid: np.ndarray, timestamp: datetime = None, print_stats: bool = True): if timestamp is not None: self.current_timestamp = timestamp.strftime(FileUtils.DATE_PATTERN_SEPARATED) - self.write_timestep(grid) + else: + self.current_timestamp = None + cur_grid = self.setup_grid(grid, print_stats) + self.write_timestep(cur_grid) self.current_timestamp = None def write_timestep(self, grid: np.ndarray, timestep: int = -1):