Skip to content

Commit

Permalink
Create new filter ProvidersKiwisFilter
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Sep 20, 2024
1 parent 61e90f4 commit 2d11061
Show file tree
Hide file tree
Showing 8 changed files with 396 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ LONG_NAME = 6 Hourly Accumulated Precipitation
# 1304 - EURO4M-APGD
# 1310 - HNMS
# 1329 - ERA5-land
KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1304': 1.0, '1302': 1.0, '1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0, '1329': 100.0}}
# KIWIS_FILTER_PLUGIN_CLASSES = {'DowgradedObservationsKiwisFilter': {'1304': 1.0, '1302': 1.0, '1295': 1.0}, 'ObservationsKiwisFilter': {'1303': 100.0, '1329': 100.0}}


[VAR_TIME]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
STANDARD_NAME = air_temperature
LONG_NAME = 6 Hourly Average Temperature

# 1280 - IMGW
# 1295 - MARS
# 1302 - CarpatClim
# 1303 - ERAinterim
# 1304 - EURO4M-APGD
# 1310 - HNMS
# 1323 - ICON
# 1329 - ERA5-land
KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 06:00:00', '2023-12-31 06:00:00')]}}

[VAR_TIME]

UNIT_PATTERN = hours since %%Y-%%m-%%d %%H:%%M:%%S.%%f
Expand Down
10 changes: 10 additions & 0 deletions src/lisfloodutilities/gridding/configuration/1arcmin/config_tn.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
STANDARD_NAME = air_temperature
LONG_NAME = Daily Minimum Temperature

# 1280 - IMGW
# 1295 - MARS
# 1302 - CarpatClim
# 1303 - ERAinterim
# 1304 - EURO4M-APGD
# 1310 - HNMS
# 1323 - ICON
# 1329 - ERA5-land
KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 06:00:00', '2023-12-31 06:00:00')]}}

[VAR_TIME]

UNIT = days since 1990-01-01 06:00:00.0
Expand Down
10 changes: 10 additions & 0 deletions src/lisfloodutilities/gridding/configuration/1arcmin/config_tx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ DATA_TYPE_PACKED = i2
STANDARD_NAME = air_temperature
LONG_NAME = Daily Maximum Temperature

# 1280 - IMGW
# 1295 - MARS
# 1302 - CarpatClim
# 1303 - ERAinterim
# 1304 - EURO4M-APGD
# 1310 - HNMS
# 1323 - ICON
# 1329 - ERA5-land
KIWIS_FILTER_PLUGIN_CLASSES = {'ObservationsKiwisFilter': {'1329': 700.0}, 'ProvidersKiwisFilter': {'1323': [('2022-01-01 18:00:00', '2023-12-31 18:00:00')]}}

[VAR_TIME]

UNIT = days since 1990-01-01 18:00:00.0
Expand Down
37 changes: 37 additions & 0 deletions src/lisfloodutilities/gridding/lib/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,43 @@ def has_neighbor_within_radius_from_other_providers(self, row: pd.Series, tree:
return False


class ProvidersKiwisFilter(KiwisFilter):
"""
Class to filter Kiwis files metadata for stations that belong to a list of providers and inside a defined list of time intervals.
Expects to have in filter_args a dictionary containing the provider ID whose stations we want to
filter (as key) and an array of pairs of start and end dates defining the intervals to filter the station from.
filter_args = {1121: [('1992-01-02 06:00:00', '1993-01-01 06:00:00'), ('1995-01-02 06:00:00', '1996-01-01 06:00:00')]}
"""

def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: str = '', quiet_mode: bool = False):
super().__init__(filter_columns, filter_args, var_code, quiet_mode)
# Getting the intervals and providers. {(start1, end2): [provider_id1, provider_id2]}
print('args:', self.args)
self.provider_intervals = {}
for provider_id in self.args:
time_intervals = self.args[provider_id]
for time_interval in time_intervals:
start, end = time_interval
start = dt.strptime(start, "%Y-%m-%d %H:%M:%S")
end = dt.strptime(end, "%Y-%m-%d %H:%M:%S")
cur_interval = (start, end)
if cur_interval not in self.provider_intervals:
self.provider_intervals[cur_interval] = []
self.provider_intervals[cur_interval].append(provider_id)
print('provider_intervals:', self.provider_intervals)

def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
df = super().apply_filter(df)
# Filter providers only if current file datetime belongs to any of the intervals
for time_interval in self.provider_intervals:
start, end = time_interval
if start <= self.cur_timestamp and end >= self.cur_timestamp:
providers_to_remove = self.provider_intervals[time_interval]
df = df[~df[self.COL_PROVIDER_ID].isin(providers_to_remove)]
self.print_statistics(df)
return df


class DowgradedObservationsKiwisFilter(ObservationsKiwisFilter):
"""
Class to filter Kiwis files metadata for stations whose daily data was down graded to 6hourly data
Expand Down
143 changes: 143 additions & 0 deletions src/lisfloodutilities/gridding/tools/get_stats_from_kiwis_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@

__author__="Goncalo Gomes"
__date__="$Jun 06, 2024 10:45:00$"
__version__="0.1"
__updated__="$Jun 06, 2024 10:45:00$"

"""
Copyright 2019-2020 European Union
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence");
You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at:
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence.
"""

import sys
import os
from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError
import pandas as pd
import json
import csv
from datetime import datetime, timedelta
from lisfloodutilities.gridding.lib.utils import FileUtils


COL_OUTPUT_QUALITY_CODE_WRONG = 'QUALITY_CODE_WRONG'
COL_OUTPUT_TOTAL_OBSERVATIONS = 'TOTAL_OBSERVATIONS'
COL_OUTPUT_TIMESTAMP = 'TIMESTAMP'
COL_OUTPUT_VAR_CODE = 'VAR_CODE'
COL_OUTPUT_PROVIDER_ID = 'PROVIDER_ID'


def run(statfile: str, outfile: str):

outfilepath = Path(outfile)
# Create the output parent folders if not exist yet
Path(outfilepath.parent).mkdir(parents=True, exist_ok=True)

statfilepath = Path(statfile)
print(f'Reading statistics file: {statfilepath}')
df_stats = pd.read_csv(statfilepath, sep="\t")
provider_ids = sorted(df_stats[COL_OUTPUT_PROVIDER_ID].unique())

print('provider_ids:', provider_ids)

first_timestamp_cell = df_stats[COL_OUTPUT_TIMESTAMP].iloc[0]
yyyy = first_timestamp_cell[:4]

ncols = len(provider_ids)

out_row1 = [yyyy, 'DP']
out_row1.extend(provider_ids)
out_row2 = [yyyy, 'average stations with data']
out_row2.extend([''] * ncols)
out_row3 = [yyyy, 'average error']
out_row3.extend([''] * ncols)
out_row4 = [yyyy, 'max number of errors in a day']
out_row4.extend([''] * ncols)

i = 0
for provider_id in provider_ids:
average_stations = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_TOTAL_OBSERVATIONS].mean()
out_row2[2 + i] = round(average_stations,0)
average_error = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_QUALITY_CODE_WRONG].mean()
out_row3[2 + i] = round(average_error)
max_error = df_stats.loc[df_stats[COL_OUTPUT_PROVIDER_ID] == provider_id, COL_OUTPUT_QUALITY_CODE_WRONG].max()
out_row4[2 + i] = round(max_error)
i += 1

with open(outfilepath, 'a', newline='') as file:
writer = csv.writer(file, delimiter='\t')
writer.writerow(out_row1)
writer.writerow(out_row2)
writer.writerow(out_row3)
writer.writerow(out_row4)

print(f'Wrote file: {outfilepath}')


def main(argv):
'''Command line options.'''
global quiet_mode

program_name = os.path.basename(sys.argv[0])
program_path = os.path.dirname(os.path.realpath(sys.argv[0]))
program_version = "v%s" % __version__
program_build_date = "%s" % __updated__

program_version_string = 'version %s (%s)\n' % (program_version, program_build_date)
program_longdesc = '''
This script extracts kiwis logged statistics into another tab separated file.
'''
program_license = """
Copyright 2019-2020 European Union
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence");
You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at:
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence.
"""

# try:
if True:
# setup option parser
parser = ArgumentParser(epilog=program_license, description=program_version_string+program_longdesc)

# set defaults
# parser.set_defaults(input_wildcard='*.tsv')

parser.add_argument("-s", "--stat", dest="statfile", required=True, type=FileUtils.file_type,
help="Set input file containing kiwis statistics name (*.tsv).",
metavar="/path/to/kiwis_stats_ws_2001.tsv")
parser.add_argument("-o", "--out", dest="outfile", required=True, type=FileUtils.file_type,
help="Set output file name (*.tsv).",
metavar="/path/to/output_file.tsv")

# process options
args = parser.parse_args(argv)

print(f"Statistics File: {args.statfile}")
print(f"Output File: {args.outfile}")

run(args.statfile, args.outfile)
print("Finished.")
# except Exception as e:
# indent = len(program_name) * " "
# sys.stderr.write(program_name + ": " + repr(e) + "\n")
# sys.stderr.write(indent + " for help use --help")
# return 2


def main_script():
sys.exit(main(sys.argv[1:]))


if __name__ == "__main__":
main_script()
Loading

0 comments on commit 2d11061

Please sign in to comment.