diff --git a/src/lisfloodutilities/gridding/decumulate_daily_grids.py b/src/lisfloodutilities/gridding/decumulate_daily_grids.py index ba9158e..e2244dc 100644 --- a/src/lisfloodutilities/gridding/decumulate_daily_grids.py +++ b/src/lisfloodutilities/gridding/decumulate_daily_grids.py @@ -175,7 +175,7 @@ def print_statistics(provider_ids: List[str], df_kiwis_24h: pd.DataFrame, df_kiw i += 1 def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_12pm_path: Path, - kiwis_6h_18pm_path: Path, kiwis_6h_12am_path: Path, kiwis_6h_06am_path: Path, output_path: Path = None): + kiwis_6h_18pm_path: Path, kiwis_6h_12am_path: Path, kiwis_6h_06am_path: Path, input_path_6h: Path, output_path: Path = None): """ Interpolate text files containing (x, y, value) using inverse distance interpolation. Produces as output, either a netCDF file containing all the grids or one TIFF file per grid. @@ -217,7 +217,10 @@ def run(conf_24h: Config, conf_6h: Config, kiwis_24h_06am_path: Path, kiwis_6h_1 for kiwis_filepath in kiwis_filepaths[1:]: i += 1 if output_path is not None: - filepath = Path.joinpath(output_path, kiwis_filepath.name) + outfile = str(kiwis_filepath).replace(str(input_path_6h), str(output_path)) + filepath = Path(outfile) + # Create the output parent folders if not exist yet + Path(filepath.parent).mkdir(parents=True, exist_ok=True) else: filepath = kiwis_filepath df_kiwis_array[i].to_csv(filepath, index=False, header=True, sep="\t") @@ -368,8 +371,8 @@ def main(argv): print_msg(f"6hourly PR kiwis file 00:00: {kiwis_6h_12am_path}") print_msg(f"6hourly PR kiwis file 06:00: {kiwis_6h_06am_path}") - run(conf_24h, conf_6h, kiwis_24h_06am_path, kiwis_6h_12pm_path, - kiwis_6h_18pm_path, kiwis_6h_12am_path, kiwis_6h_06am_path, output_path=output_path) + run(conf_24h, conf_6h, kiwis_24h_06am_path, kiwis_6h_12pm_path, kiwis_6h_18pm_path, + kiwis_6h_12am_path, kiwis_6h_06am_path, input_path_6h=kiwis_6h_folder_path, output_path=output_path) except Exception as e: indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") diff --git a/src/lisfloodutilities/gridding/tools/analyse_incidents.py b/src/lisfloodutilities/gridding/tools/analyse_incidents.py new file mode 100644 index 0000000..65d38fa --- /dev/null +++ b/src/lisfloodutilities/gridding/tools/analyse_incidents.py @@ -0,0 +1,151 @@ +from dask.dataframe.io.tests.test_json import df +from pandas.tests.io.test_fsspec import df1 +__author__="Goncalo Gomes" +__date__="$Jun 06, 2024 10:45:00$" +__version__="0.1" +__updated__="$Jun 06, 2024 10:45:00$" + +""" +Copyright 2019-2020 European Union +Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); +You may not use this work except in compliance with the Licence. +You may obtain a copy of the Licence at: +https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt +Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licence for the specific language governing permissions and limitations under the Licence. + +""" + +import sys +import os +from pathlib import Path +from argparse import ArgumentParser, ArgumentTypeError +import pandas as pd +import json +from lisfloodutilities.gridding.lib.utils import FileUtils + + +COL_PROVIDER_ID = 'SITE' +COL_STATION_NUM = 'STATION' +COL_PARAMETER = 'PARAM' +COL_TIMESERIES = 'TS' +COL_STATION_NAME = 'NAME' +COL_STATUS = 'Status' +COL_LAT = 'latitude' +COL_LON = 'longitude' +COL_HEIGHT = 'elevation' +COL_RELATED_PATHS = 'RelatedPaths' +COL_RESULTS_TYPE = 'resultsType' +COL_MSG = 'message' +COL_TIME_INTERVAL_START = 'from' +COL_TIME_INTERVAL_END = 'until' +COL_NUM_INVALID = 'nInvalid' +COL_NUM_VALID = 'nValid' +COL_PERCENTAGE_INVALID = 'percInvalid' +COL_INCIDENTS = 'incidents' +COL_TOTAL_INCIDENTS = 'totalIncidents' + + +def get_total_incidents(row: pd.Series) -> int: + incidents = row[COL_INCIDENTS] + if incidents is None: + return 0 + incidents = incidents.strip() + if len(incidents) == 0 or incidents == 'nan' or incidents == '{}': + return 0 + incidents_dic = eval(incidents) + if not isinstance(incidents_dic, dict): + return 0 + total_incidents = 0 + for incident_key in incidents_dic: + try: + total_incidents += int(incidents_dic[incident_key]) + except Exception as e: + print(f'ERROR evaluating row: {row}') + return total_incidents + + +def run(infolder: str, outfolder: str): + inwildcard = '*.csv' + + for filename in sorted(Path(infolder).rglob(inwildcard)): + print(f'Processing file: {filename}') + outfile = f'{filename}_out.tsv'.replace(infolder, outfolder) + outfilepath = Path(outfile) + # Create the output parent folders if not exist yet + Path(outfilepath.parent).mkdir(parents=True, exist_ok=True) + df = pd.read_csv(filename, delimiter=';') + df = df.astype({COL_INCIDENTS: 'str'}) + df[COL_TOTAL_INCIDENTS] = df.apply(get_total_incidents, axis=1) + + df = df.groupby([COL_PROVIDER_ID])[COL_TOTAL_INCIDENTS].agg(['sum','count']).reset_index() + + if df is None or df.empty: + print(f'WARNING: No data was found in file {filename}') + else: + df.to_csv(outfilepath, index=False, header=True, sep='\t') + print(f'Wrote file: {outfilepath}') + # print(out_df) + + +def main(argv): + '''Command line options.''' + global quiet_mode + + program_name = os.path.basename(sys.argv[0]) + program_path = os.path.dirname(os.path.realpath(sys.argv[0])) + program_version = "v%s" % __version__ + program_build_date = "%s" % __updated__ + + program_version_string = 'version %s (%s)\n' % (program_version, program_build_date) + program_longdesc = ''' + This script parses a list of CSV files containing KIWIS incidents and analyses them to produce a report into tab separated file for each. + ''' + program_license = """ + Copyright 2019-2020 European Union + Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); + You may not use this work except in compliance with the Licence. + You may obtain a copy of the Licence at: + https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt + Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the Licence for the specific language governing permissions and limitations under the Licence. + """ + + # try: + if True: + # setup option parser + parser = ArgumentParser(epilog=program_license, description=program_version_string+program_longdesc) + + # # set defaults + # parser.set_defaults(search_string='#APP_STATS: ') + + parser.add_argument("-i", "--in", dest="infolder", required=True, type=FileUtils.folder_type, + help="Set input folder path with log files (*.csv)", + metavar="/input/folder/logfiles/") + parser.add_argument("-o", "--out", dest="outfolder", required=True, type=FileUtils.folder_type, + help="Set output folder where the analysis files will be stored (*_out.tsv).", + metavar="/path/to/output_file.tsv") + + # process options + args = parser.parse_args(argv) + + print(f"Input Folder: {args.infolder}") + print(f"Output Folder: {args.outfolder}") + + run(args.infolder, args.outfolder) + print("Finished.") + # except Exception as e: + # indent = len(program_name) * " " + # sys.stderr.write(program_name + ": " + repr(e) + "\n") + # sys.stderr.write(indent + " for help use --help") + # return 2 + + +def main_script(): + sys.exit(main(sys.argv[1:])) + + +if __name__ == "__main__": + main_script()