Create new filter ProvidersKiwisFilter

ec-jrc · Sep 20, 2024 · 2d11061 · 2d11061
1 parent 61e90f4
commit 2d11061
Show file tree

Hide file tree

Showing 8 changed files with 396 additions and 7 deletions.
diff --git a/src/lisfloodutilities/gridding/configuration/1arcmin/config_pr6.txt b/src/lisfloodutilities/gridding/configuration/1arcmin/config_pr6.txt
@@ -19,7 +19,7 @@ LONG_NAME = 6 Hourly Accumulated Precipitation
 # 1304 - EURO4M-APGD
 # 1310 - HNMS
 # 1329 - ERA5-land
-KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1304': 1.0, '1302': 1.0, '1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0, '1329': 100.0}}
+# KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1304': 1.0, '1302': 1.0, '1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0, '1329': 100.0}}
 
 
 [VAR_TIME]

diff --git a/src/lisfloodutilities/gridding/configuration/1arcmin/config_ta6.txt b/src/lisfloodutilities/gridding/configuration/1arcmin/config_ta6.txt
@@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
 STANDARD_NAME = air_temperature
 LONG_NAME = 6 Hourly Average Temperature
 
+# 1280 - IMGW
+# 1295 - MARS
+# 1302 - CarpatClim
+# 1303 - ERAinterim
+# 1304 - EURO4M-APGD
+# 1310 - HNMS
+# 1323 - ICON
+# 1329 - ERA5-land
+KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 06:00:00', '2023-12-31 06:00:00')]}}
+
 [VAR_TIME]
 
 UNIT_PATTERN = hours since %%Y-%%m-%%d %%H:%%M:%%S.%%f

diff --git a/src/lisfloodutilities/gridding/configuration/1arcmin/config_tn.txt b/src/lisfloodutilities/gridding/configuration/1arcmin/config_tn.txt
@@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
 STANDARD_NAME = air_temperature
 LONG_NAME = Daily Minimum Temperature
 
+# 1280 - IMGW
+# 1295 - MARS
+# 1302 - CarpatClim
+# 1303 - ERAinterim
+# 1304 - EURO4M-APGD
+# 1310 - HNMS
+# 1323 - ICON
+# 1329 - ERA5-land
+KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 06:00:00', '2023-12-31 06:00:00')]}}
+
 [VAR_TIME]
 
 UNIT = days since 1990-01-01 06:00:00.0

diff --git a/src/lisfloodutilities/gridding/configuration/1arcmin/config_tx.txt b/src/lisfloodutilities/gridding/configuration/1arcmin/config_tx.txt
@@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
 STANDARD_NAME = air_temperature
 LONG_NAME = Daily Maximum Temperature
 
+# 1280 - IMGW
+# 1295 - MARS
+# 1302 - CarpatClim
+# 1303 - ERAinterim
+# 1304 - EURO4M-APGD
+# 1310 - HNMS
+# 1323 - ICON
+# 1329 - ERA5-land
+KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 18:00:00', '2023-12-31 18:00:00')]}}
+
 [VAR_TIME]
 
 UNIT = days since 1990-01-01 18:00:00.0

diff --git a/src/lisfloodutilities/gridding/lib/filters.py b/src/lisfloodutilities/gridding/lib/filters.py
@@ -235,6 +235,43 @@ def has_neighbor_within_radius_from_other_providers(self, row: pd.Series, tree:
         return False
 
 
+class ProvidersKiwisFilter(KiwisFilter):
+    """
+    Class to filter Kiwis files metadata for stations that belong to a list of providers and inside a defined list of time intervals.
+    Expects to have in filter_args a dictionary containing the provider ID whose stations we want to
+    filter (as key) and an array of pairs of start and end dates defining the intervals to filter the station from.
+    filter_args = {1121: [('1992-01-02 06:00:00', '1993-01-01 06:00:00'), ('1995-01-02 06:00:00', '1996-01-01 06:00:00')]}
+    """
+
+    def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: str = '', quiet_mode: bool = False):
+        super().__init__(filter_columns, filter_args, var_code, quiet_mode)
+        # Getting the intervals and providers. {(start1, end2): [provider_id1, provider_id2]}
+        print('args:', self.args)
+        self.provider_intervals = {}
+        for provider_id in self.args:
+            time_intervals = self.args[provider_id]
+            for time_interval in time_intervals:
+                start, end = time_interval
+                start = dt.strptime(start, "%Y-%m-%d %H:%M:%S")
+                end = dt.strptime(end, "%Y-%m-%d %H:%M:%S")
+                cur_interval = (start, end)
+                if cur_interval not in self.provider_intervals:
+                    self.provider_intervals[cur_interval] = []
+                self.provider_intervals[cur_interval].append(provider_id)
+        print('provider_intervals:', self.provider_intervals)
+
+    def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = super().apply_filter(df)
+        # Filter providers only if current file datetime belongs to any of the intervals 
+        for time_interval in self.provider_intervals:
+            start, end = time_interval
+            if start <= self.cur_timestamp and end >= self.cur_timestamp:
+                providers_to_remove = self.provider_intervals[time_interval]
+                df = df[~df[self.COL_PROVIDER_ID].isin(providers_to_remove)]
+        self.print_statistics(df)
+        return df
+
+
 class DowgradedObservationsKiwisFilter(ObservationsKiwisFilter):
     """
     Class to filter Kiwis files metadata for stations whose daily data was down graded to 6hourly data

diff --git a/src/lisfloodutilities/gridding/tools/get_stats_from_kiwis_logs.py b/src/lisfloodutilities/gridding/tools/get_stats_from_kiwis_logs.py
@@ -0,0 +1,143 @@
+
+__author__="Goncalo Gomes"
+__date__="$Jun 06, 2024 10:45:00$"
+__version__="0.1"
+__updated__="$Jun 06, 2024 10:45:00$"
+
+"""
+Copyright 2019-2020 European Union
+Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission  subsequent versions of the EUPL (the "Licence");
+You may not use this work except in compliance with the Licence.
+You may obtain a copy of the Licence at:
+https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
+Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the Licence for the specific language governing permissions and limitations under the Licence.
+
+"""
+
+import sys
+import os
+from pathlib import Path
+from argparse import ArgumentParser, ArgumentTypeError
+import pandas as pd
+import json
+import csv
+from datetime import datetime, timedelta
+from lisfloodutilities.gridding.lib.utils import FileUtils
+
+
+COL_OUTPUT_QUALITY_CODE_WRONG = 'QUALITY_CODE_WRONG'
+COL_OUTPUT_TOTAL_OBSERVATIONS = 'TOTAL_OBSERVATIONS'
+COL_OUTPUT_TIMESTAMP = 'TIMESTAMP'
+COL_OUTPUT_VAR_CODE = 'VAR_CODE'
+COL_OUTPUT_PROVIDER_ID = 'PROVIDER_ID'
+
+
+def run(statfile: str, outfile: str):
+
+    outfilepath = Path(outfile)
+    # Create the output parent folders if not exist yet
+    Path(outfilepath.parent).mkdir(parents=True, exist_ok=True)
+
+    statfilepath = Path(statfile)
+    print(f'Reading statistics file: {statfilepath}')
+    df_stats = pd.read_csv(statfilepath, sep="\t")
+    provider_ids = sorted(df_stats[COL_OUTPUT_PROVIDER_ID].unique())
+
+    print('provider_ids:', provider_ids)
+
+    first_timestamp_cell = df_stats[COL_OUTPUT_TIMESTAMP].iloc[0]
+    yyyy = first_timestamp_cell[:4]
+
+    ncols = len(provider_ids)
+
+    out_row1 = [yyyy, 'DP']
+    out_row1.extend(provider_ids)
+    out_row2 = [yyyy, 'average stations with data']
+    out_row2.extend([''] * ncols)
+    out_row3 = [yyyy, 'average error']
+    out_row3.extend([''] * ncols)
+    out_row4 = [yyyy, 'max number of errors in a day']
+    out_row4.extend([''] * ncols)
+
+    i = 0
+    for provider_id in provider_ids:
+        average_stations = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_TOTAL_OBSERVATIONS].mean()
+        out_row2[2 + i] = round(average_stations,0)
+        average_error = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_QUALITY_CODE_WRONG].mean()
+        out_row3[2 + i] = round(average_error)
+        max_error = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_QUALITY_CODE_WRONG].max()
+        out_row4[2 + i] = round(max_error)
+        i += 1
+
+    with open(outfilepath, 'a', newline='') as file:
+        writer = csv.writer(file, delimiter='\t')
+        writer.writerow(out_row1)
+        writer.writerow(out_row2)
+        writer.writerow(out_row3)
+        writer.writerow(out_row4)
+
+    print(f'Wrote file: {outfilepath}')
+
+
+def main(argv):
+    '''Command line options.'''
+    global quiet_mode
+
+    program_name = os.path.basename(sys.argv[0])
+    program_path = os.path.dirname(os.path.realpath(sys.argv[0]))
+    program_version = "v%s" % __version__
+    program_build_date = "%s" % __updated__
+
+    program_version_string = 'version %s (%s)\n' % (program_version, program_build_date)
+    program_longdesc = '''
+    This script extracts kiwis logged statistics into another tab separated file.
+    '''
+    program_license = """
+    Copyright 2019-2020 European Union
+    Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission  subsequent versions of the EUPL (the "Licence");
+    You may not use this work except in compliance with the Licence.
+    You may obtain a copy of the Licence at:
+    https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
+    Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the Licence for the specific language governing permissions and limitations under the Licence.
+    """
+
+    # try:
+    if True:
+        # setup option parser
+        parser = ArgumentParser(epilog=program_license, description=program_version_string+program_longdesc)
+
+        # set defaults
+        # parser.set_defaults(input_wildcard='*.tsv')
+
+        parser.add_argument("-s", "--stat", dest="statfile", required=True, type=FileUtils.file_type,
+                            help="Set input file containing kiwis statistics name (*.tsv).",
+                            metavar="/path/to/kiwis_stats_ws_2001.tsv")
+        parser.add_argument("-o", "--out", dest="outfile", required=True, type=FileUtils.file_type,
+                            help="Set output file name (*.tsv).",
+                            metavar="/path/to/output_file.tsv")
+
+        # process options
+        args = parser.parse_args(argv)
+
+        print(f"Statistics File: {args.statfile}")
+        print(f"Output File: {args.outfile}")
+
+        run(args.statfile, args.outfile)
+        print("Finished.")
+    # except Exception as e:
+    #     indent = len(program_name) * " "
+    #     sys.stderr.write(program_name + ": " + repr(e) + "\n")
+    #     sys.stderr.write(indent + "  for help use --help")
+    #     return 2
+
+
+def main_script():
+    sys.exit(main(sys.argv[1:]))
+
+
+if __name__ == "__main__":
+    main_script()