-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow output to maintain the same folder structure of input
- Loading branch information
Showing
2 changed files
with
158 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
151 changes: 151 additions & 0 deletions
151
src/lisfloodutilities/gridding/tools/analyse_incidents.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
from dask.dataframe.io.tests.test_json import df | ||
from pandas.tests.io.test_fsspec import df1 | ||
__author__="Goncalo Gomes" | ||
__date__="$Jun 06, 2024 10:45:00$" | ||
__version__="0.1" | ||
__updated__="$Jun 06, 2024 10:45:00$" | ||
|
||
""" | ||
Copyright 2019-2020 European Union | ||
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); | ||
You may not use this work except in compliance with the Licence. | ||
You may obtain a copy of the Licence at: | ||
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt | ||
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the Licence for the specific language governing permissions and limitations under the Licence. | ||
""" | ||
|
||
import sys | ||
import os | ||
from pathlib import Path | ||
from argparse import ArgumentParser, ArgumentTypeError | ||
import pandas as pd | ||
import json | ||
from lisfloodutilities.gridding.lib.utils import FileUtils | ||
|
||
|
||
COL_PROVIDER_ID = 'SITE' | ||
COL_STATION_NUM = 'STATION' | ||
COL_PARAMETER = 'PARAM' | ||
COL_TIMESERIES = 'TS' | ||
COL_STATION_NAME = 'NAME' | ||
COL_STATUS = 'Status' | ||
COL_LAT = 'latitude' | ||
COL_LON = 'longitude' | ||
COL_HEIGHT = 'elevation' | ||
COL_RELATED_PATHS = 'RelatedPaths' | ||
COL_RESULTS_TYPE = 'resultsType' | ||
COL_MSG = 'message' | ||
COL_TIME_INTERVAL_START = 'from' | ||
COL_TIME_INTERVAL_END = 'until' | ||
COL_NUM_INVALID = 'nInvalid' | ||
COL_NUM_VALID = 'nValid' | ||
COL_PERCENTAGE_INVALID = 'percInvalid' | ||
COL_INCIDENTS = 'incidents' | ||
COL_TOTAL_INCIDENTS = 'totalIncidents' | ||
|
||
|
||
def get_total_incidents(row: pd.Series) -> int: | ||
incidents = row[COL_INCIDENTS] | ||
if incidents is None: | ||
return 0 | ||
incidents = incidents.strip() | ||
if len(incidents) == 0 or incidents == 'nan' or incidents == '{}': | ||
return 0 | ||
incidents_dic = eval(incidents) | ||
if not isinstance(incidents_dic, dict): | ||
return 0 | ||
total_incidents = 0 | ||
for incident_key in incidents_dic: | ||
try: | ||
total_incidents += int(incidents_dic[incident_key]) | ||
except Exception as e: | ||
print(f'ERROR evaluating row: {row}') | ||
return total_incidents | ||
|
||
|
||
def run(infolder: str, outfolder: str): | ||
inwildcard = '*.csv' | ||
|
||
for filename in sorted(Path(infolder).rglob(inwildcard)): | ||
print(f'Processing file: {filename}') | ||
outfile = f'{filename}_out.tsv'.replace(infolder, outfolder) | ||
outfilepath = Path(outfile) | ||
# Create the output parent folders if not exist yet | ||
Path(outfilepath.parent).mkdir(parents=True, exist_ok=True) | ||
df = pd.read_csv(filename, delimiter=';') | ||
df = df.astype({COL_INCIDENTS: 'str'}) | ||
df[COL_TOTAL_INCIDENTS] = df.apply(get_total_incidents, axis=1) | ||
|
||
df = df.groupby([COL_PROVIDER_ID])[COL_TOTAL_INCIDENTS].agg(['sum','count']).reset_index() | ||
|
||
if df is None or df.empty: | ||
print(f'WARNING: No data was found in file {filename}') | ||
else: | ||
df.to_csv(outfilepath, index=False, header=True, sep='\t') | ||
print(f'Wrote file: {outfilepath}') | ||
# print(out_df) | ||
|
||
|
||
def main(argv): | ||
'''Command line options.''' | ||
global quiet_mode | ||
|
||
program_name = os.path.basename(sys.argv[0]) | ||
program_path = os.path.dirname(os.path.realpath(sys.argv[0])) | ||
program_version = "v%s" % __version__ | ||
program_build_date = "%s" % __updated__ | ||
|
||
program_version_string = 'version %s (%s)\n' % (program_version, program_build_date) | ||
program_longdesc = ''' | ||
This script parses a list of CSV files containing KIWIS incidents and analyses them to produce a report into tab separated file for each. | ||
''' | ||
program_license = """ | ||
Copyright 2019-2020 European Union | ||
Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); | ||
You may not use this work except in compliance with the Licence. | ||
You may obtain a copy of the Licence at: | ||
https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt | ||
Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the Licence for the specific language governing permissions and limitations under the Licence. | ||
""" | ||
|
||
# try: | ||
if True: | ||
# setup option parser | ||
parser = ArgumentParser(epilog=program_license, description=program_version_string+program_longdesc) | ||
|
||
# # set defaults | ||
# parser.set_defaults(search_string='#APP_STATS: ') | ||
|
||
parser.add_argument("-i", "--in", dest="infolder", required=True, type=FileUtils.folder_type, | ||
help="Set input folder path with log files (*.csv)", | ||
metavar="/input/folder/logfiles/") | ||
parser.add_argument("-o", "--out", dest="outfolder", required=True, type=FileUtils.folder_type, | ||
help="Set output folder where the analysis files will be stored (*_out.tsv).", | ||
metavar="/path/to/output_file.tsv") | ||
|
||
# process options | ||
args = parser.parse_args(argv) | ||
|
||
print(f"Input Folder: {args.infolder}") | ||
print(f"Output Folder: {args.outfolder}") | ||
|
||
run(args.infolder, args.outfolder) | ||
print("Finished.") | ||
# except Exception as e: | ||
# indent = len(program_name) * " " | ||
# sys.stderr.write(program_name + ": " + repr(e) + "\n") | ||
# sys.stderr.write(indent + " for help use --help") | ||
# return 2 | ||
|
||
|
||
def main_script(): | ||
sys.exit(main(sys.argv[1:])) | ||
|
||
|
||
if __name__ == "__main__": | ||
main_script() |