Skip to content

Commit

Permalink
Merge pull request #48 from ec-jrc/module_kiwis_interpreter
Browse files Browse the repository at this point in the history
Module kiwis interpreter
  • Loading branch information
gnrgomes authored Dec 11, 2023
2 parents 14d759c + c3a4a04 commit 44291b7
Show file tree
Hide file tree
Showing 14 changed files with 214 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = Daily Accumulated Precipitation
KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1303': 100.0}}

[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = 6 Hourly Accumulated Precipitation
KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0}}


[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = 6 Hourly Accumulated Precipitation per Day
KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0}}

[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = DUMMY_STANDARD_NAME
LONG_NAME = DUMMY LONG NAME
KIWIS_FILTER_PLUGIN_CLASSES = {'KiwisFilter': {}}

[DIMENSION]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = Daily Accumulated Precipitation
KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1303': 100.0}}

[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = 6 Hourly Accumulated Precipitation
KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0}}

[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = precipitation_amount
LONG_NAME = 6 Hourly Accumulated Precipitation per Day
KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0}}

[VAR_TIME]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ VALUE_OFFSET = 0.0
DATA_TYPE_PACKED = i2
STANDARD_NAME = DUMMY_STANDARD_NAME
LONG_NAME = DUMMY LONG NAME
KIWIS_FILTER_PLUGIN_CLASSES = {'KiwisFilter': {}}

[DIMENSION]

Expand Down
9 changes: 8 additions & 1 deletion src/lisfloodutilities/gridding/generate_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError
from datetime import datetime, timedelta
from lisfloodutilities.gridding.lib.utils import Printable, Dem, Config, FileUtils, GriddingUtils
from lisfloodutilities.gridding.lib.utils import Printable, Dem, Config, FileUtils, GriddingUtils # , KiwisLoader
from lisfloodutilities.gridding.lib.writers import NetCDFWriter, GDALWriter


Expand Down Expand Up @@ -80,6 +80,8 @@ def run(config_filename: str, infolder: str, output_file: str, processing_dates_
output_writer_tiff = GDALWriter(conf, overwrite_output, quiet_mode)
output_writer_netcdf = NetCDFWriter(conf, overwrite_output, quiet_mode)
output_writer_netcdf.open(Path(outfile))
# file_loader = KiwisLoader(conf, overwrite_output, Path(infolder), quiet_mode)
# for filename in file_loader:
for filename in sorted(Path(infolder).rglob(inwildcard)):
file_timestamp = file_utils.get_timestamp_from_filename(filename) + timedelta(days=netcdf_offset_file_date)
if not file_utils.processable_file(file_timestamp, dates_to_process, conf.start_date, conf.end_date):
Expand Down Expand Up @@ -144,6 +146,9 @@ def main(argv):
parser.add_argument("-c", "--conf", dest="config_type", required=True,
help="Set the grid configuration type to use.",
metavar="{5x5km, 1arcmin,...}")
parser.add_argument("-p", "--pathconf", dest="config_base_path", required=False, type=FileUtils.folder_type,
help="Overrides the base path where the configurations are stored.",
metavar="/path/to/config")
parser.add_argument("-v", "--var", dest="variable_code", required=True,
help="Set the variable to be processed.",
metavar="{pr,pd,tn,tx,ws,rg,...}")
Expand Down Expand Up @@ -172,6 +177,8 @@ def main(argv):
quiet_mode = args.quiet

configuration_base_folder = os.path.join(program_path, '../src/lisfloodutilities/gridding/configuration')
if args.config_base_path is not None and len(args.config_base_path) > 0:
configuration_base_folder = args.config_base_path

file_utils = FileUtils(args.variable_code, quiet_mode)

Expand Down
40 changes: 40 additions & 0 deletions src/lisfloodutilities/gridding/lib/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path
import pandas as pd

class KiwisFilter():

def __init__(self, filter_args: dict):
self.args = filter_args

def filter(self, kiwis_files: array) -> array:
filtered_data_frames = []
for file_path in kiwis_files:
df_kiwis = pd.read_csv(file_path, sep="\t")
df_kiwis = self.__apply_filter(df_kiwis)
filtered_data_frames.append(df_kiwis)
return filtered_data_frames

def __apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
# Get the code to filter kiwis leaving only the rows to be used for point file creation
return df

# KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0}}

class DowgradedObservationsKiwisFilter(KiwisFilter):

def __init__(self, filter_args: dict):
super().__init__(filter_args)

def __apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
df = super().__apply_filter(df)
return df

class ObservationsKiwisFilter(KiwisFilter):

def __init__(self, filter_args: dict):
super().__init__(filter_args)

def __apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
df = super().__apply_filter(df)
return df

107 changes: 107 additions & 0 deletions src/lisfloodutilities/gridding/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@
import pandas as pd
from decimal import *
from datetime import datetime, timedelta
from collections import OrderedDict
from scipy.spatial import cKDTree
from pyg2p import Loggable
from pyg2p.main.readers.netcdf import NetCDFReader
from pyg2p.main.interpolation.scipy_interpolation_lib import ScipyInterpolation
from numpy import delete
import importlib


__DECIMAL_CASES = 20
__DECIMAL_FORMAT = '{:.20f}'
Expand Down Expand Up @@ -428,3 +432,106 @@ def generate_grid(self, filename: Path) -> np.ndarray:
self.check_grid_nan(filename, result)
grid_data = self.prepare_grid(result, grid_x.shape)
return grid_data


# class KiwisLoader(Printable):
# DATE_PATTERN_CONDENSED = '%Y%m%d%H%M%S'
# DATE_PATTERN_SEPARATED = '%Y-%m-%d %H:%M:%S'
# CSV_DELIMITER = '\t'
# FILES_WILDCARD = '??????????00_all.kiwis'
#
# def __init__(self, conf: Config, overwrite_file: bool = False, infolder: Path, quiet_mode: bool = False):
# super().__init__(quiet_mode)
# self.conf = conf
# self.overwrite_file = overwrite_file
# self.var_code = self.conf.var_code
# self.var_size = len(self.var_code)
# self.inwildcard = self.var_code + FILES_WILDCARD
# self.infolder = infolder
# # Frequency between timesteps in hours
# self.time_frequency = int(self.conf.get_config_field('VAR_TIME','FREQUENCY'))
# self.is_daily_var = (self.time_frequency == 1)
# # Number of files to be read/processed simultaneously. for non daily vars time frequency is in hours
# self.read_files_step = 1 if self.is_daily_var else int(24 / self.time_frequency)
# self.files_list = OrderedDict()
# self.file_groups = []
# self.filter_classes = self.__get_filter_classes()
#
# def __iter__(self):
# self.__load_kiwis_paths()
# self.file_groups = iter(self.__get_file_groups())
# return self
#
# def __next__(self):
# if self.is_daily_var:
# for filter_class in self.filter_classes:
# df_kiwis_array = filter_class.filter(self.file_groups[self.file_group_read_idx])
# self.file_group_read_idx += 1
# raise StopIteration
#
# def __get_filter_classes(self) -> array:
# '''
# TODO: Implement the class.
# '''
# plugins_array = []
# # Load the class dynamically
# plugins = self.conf.get_config_field('PROPERTIES', 'KIWIS_FILTER_PLUGIN_CLASSES')
# module_name = 'lisfloodutilities.gridding.lib.filters'
# try:
# for plugin in plugins:
# class_name = plugin
# class_args = plugins[plugin]
# module = importlib.import_module(module_name)
# class_instance = getattr(module, class_name)(class_args)
# plugins_array.append(class_instance)
# except ImportError:
# print(f"Error: Could not import module '{module_name}'")
# except AttributeError:
# print(f"Error: Could not find class '{class_name}' in module '{module_name}'")
# return plugins_array
#
# def __filter_kiwis(self, filename_kiwis: Path) -> pd.DataFrame:
# return None
#
# def __get_points_filename(self, kiwis_timestamp: str, filename_kiwis: Path) -> Path:
# '''
# Returns the points file path.
# If the mode is overwrite tries to get the first pointfile path it finds and if it does not find generates a new file path.
# Otherwise generates a new file path.
# '''
# if self.overwrite_file:
# for points_path in sorted(filename_kiwis.parent.rglob(f'{self.var_code}{kiwis_timestamp_str}_??????????????.txt')):
# if points_path.is_file():
# return points_path
# pointfile_timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
# return Path(filename_kiwis.parent, f'{self.var_code}{kiwis_timestamp_str}_{pointfile_timestamp}.txt')
#
# def __load_kiwis_paths(self):
# netcdf_offset_file_date = int(self.conf.get_config_field('VAR_TIME','OFFSET_FILE_DATE'))
# for filename_kiwis in sorted(self.infolder.rglob(self.inwildcard)):
# kiwis_timestamp = self.__get_timestamp_from_filename(filename_kiwis)
# file_timestamp = kiwis_timestamp + timedelta(days=netcdf_offset_file_date)
# if self.__processable_file(file_timestamp, self.conf.start_date, self.conf.end_date):
# kiwis_timestamp_str = kiwis_timestamp.strftime(DATE_PATTERN_CONDENSED)
# filename_points = self.__get_points_filename(kiwis_timestamp_str, filename_kiwis)
# self.files_list[kiwis_timestamp_str] = (filename_kiwis, filename_points)
# # print_msg(f'Processing file: {filename}')
#
# def __get_next_file_group(self):
# '''
# TODO
# '''
# if self.is_daily_var:
# for kiwis_timestamp_str in self.files_list:
# self.file_groups.append([kiwis_timestamp_str])
# else: # divide the files into groups of 4 and give exception if group is not complete
# for kiwis_timestamp_str in self.files_list:
# self.file_groups.append([])
#
# def __get_timestamp_from_filename(self, filename: Path) -> datetime:
# file_timestamp = filename.name[self.var_size:12+self.var_size] + '00'
# return datetime.strptime(file_timestamp, FileUtils.DATE_PATTERN_CONDENSED)
#
# def __processable_file(self, file_timestamp: datetime, start_date: datetime = None, end_date: datetime = None) -> bool:
# return (start_date is not None and start_date <= file_timestamp and
# end_date is not None and file_timestamp <= end_date)
3 changes: 2 additions & 1 deletion src/lisfloodutilities/gridding/lib/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import time as timex
import warnings
import numpy as np
import copy
from argparse import ArgumentTypeError
from pathlib import Path
from datetime import datetime, timedelta
Expand Down Expand Up @@ -123,7 +124,7 @@ def write_timestep(self, grid: np.ndarray, timestep: int = -1):
raise Exception("netCDF Dataset was not initialized. If file already exists, use --force flag to append.")
timestep_idx = int(timestep / self.time_frequency)
self.nf.variables[self.netcdf_var_time][timestep_idx] = timestep
values = self.setNaN(grid)
values = self.setNaN(copy.deepcopy(grid))
values[values < self.conf.value_min_packed] = np.nan
values[values > self.conf.value_max_packed] = np.nan
values[values != self.conf.VALUE_NAN] *= self.conf.scale_factor
Expand Down
Loading

0 comments on commit 44291b7

Please sign in to comment.