Skip to content

Commit

Permalink
Update storm overflows for new constituencies
Browse files Browse the repository at this point in the history
  • Loading branch information
slowe committed Jul 24, 2024
1 parent 249b69a commit 5577552
Show file tree
Hide file tree
Showing 9 changed files with 1,231 additions and 1,203 deletions.
52 changes: 38 additions & 14 deletions pipelines/environment/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -43,53 +43,77 @@ stages:
hash: md5
md5: ee2acbd709d5d094955dc74f4307c291
size: 2324810
- path: ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return/EDM 2023 Storm Overflow
Annual Return - all water and sewerage companies.xlsx
hash: md5
md5: 912120d1e2a0df1f515aa208cae9ceb9
size: 2456178
- path: ../../raw-data/spills-by-constituency.json
hash: md5
md5: 8684dd65bd630cdcf3a4e65c41b6c590
size: 229328
- path: ../../src/_data/geojson/constituencies-2022.geojson
- path: ../../src/_data/geojson/constituencies-2024.geojson
hash: md5
md5: 4c70e72adbfe31a71d6a504edd2d2509
size: 7461749
md5: 3d9f501977a3bd70dc860acdc0cf9ff7
size: 2462727
- path: ../util.py
hash: md5
md5: 48d28252bb6ca8ba2451792cb1a5709a
size: 653
md5: f3e966c3793136650d3b61c6fbb96e5f
size: 664
- path: storm-overflows.py
hash: md5
md5: d42a6f41a97de92688af17177fdebbdb
size: 5413
md5: ea29901d1e8fb5619493cb0fd7936be3
size: 8218
outs:
- path: ../../src/_data/sources/environment/spills_by_constituency.csv
hash: md5
md5: 15c19b34ee6c9144627bb5e4e247a2aa
size: 100103
- path: ../../src/_data/sources/environment/storm_overflows.csv
- path: ../../src/themes/environment/storm-overflows/_data/storm_overflows.csv
hash: md5
md5: be2d964c9595da21a76a223cd4c2d048
size: 25214
md5: b0472763c761409f0c14e307bf53681d
size: 30261
isexec: true
get-storm-overflows:
cmd:
- wget -q -O ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return.zip
"https://environment.data.gov.uk/api/file/download?fileDataSetId=c55e170e-3c75-49a5-8026-a961ff94c8e0&fileName=EDM_2022_Storm_Overflow_Annual_Return.zip"
- wget -q -O ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return.zip
"https://environment.data.gov.uk/api/file/download?fileDataSetId=c55e170e-3c75-49a5-8026-a961ff94c8e0&fileName=EDM_2023_Storm_Overflow_Annual_Return.zip"
outs:
- path: ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return.zip
hash: md5
md5: bad66d7251a2509333f526099960b45f
size: 2706281
isexec: true
- path: ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return.zip
hash: md5
md5: d341e93f7de5ec0e9537b2681b95686b
size: 2859496
isexec: true
unzip-storm-overflows:
cmd:
- unzip -o ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return.zip "EDM_2022_Storm_Overflow_Annual_Return/EDM
2022 Storm Overflow Annual Return - all water and sewerage companies.xlsx" -d
../../raw-data/
- unzip -o ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return.zip "EDM_2023_Storm_Overflow_Annual_Return/EDM
2023 Storm Overflow Annual Return - all water and sewerage companies.xlsx" -d
../../raw-data/
deps:
- path: ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return.zip
hash: md5
md5: bad66d7251a2509333f526099960b45f
size: 2706281
- path: ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return.zip
hash: md5
md5: d341e93f7de5ec0e9537b2681b95686b
size: 2859496
outs:
- path: ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return/EDM 2022 Storm Overflow
Annual Return - all water and sewerage companies.xlsx
hash: md5
md5: ee2acbd709d5d094955dc74f4307c291
size: 2324810
isexec: true
- path: ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return/EDM 2023 Storm Overflow
Annual Return - all water and sewerage companies.xlsx
hash: md5
md5: 912120d1e2a0df1f515aa208cae9ceb9
size: 2456178
isexec: true
8 changes: 4 additions & 4 deletions pipelines/environment/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ stages:
- storm-overflows.py
- ../util.py
- ../../raw-data/spills-by-constituency.json
- ../../src/_data/geojson/constituencies-2022.geojson
- ../../src/_data/geojson/constituencies-2024.geojson
- ../../raw-data/EDM_2022_Storm_Overflow_Annual_Return/EDM 2022 Storm Overflow Annual Return - all water and sewerage companies.xlsx
- ../../raw-data/EDM_2023_Storm_Overflow_Annual_Return/EDM 2023 Storm Overflow Annual Return - all water and sewerage companies.xlsx
outs:
- ../../src/_data/sources/environment/spills_by_constituency.csv:
cache: false
- ../../src/_data/sources/environment/storm_overflows.csv:
#- ../../src/_data/sources/environment/spills_by_constituency.csv:
# cache: false
- ../../src/themes/environment/storm-overflows/_data/storm_overflows.csv:
cache: false
22 changes: 11 additions & 11 deletions pipelines/environment/storm-overflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ def df_latlong2constituency(df, opts={}):
if not 'year' in opts:
opts['year'] = 'XXXX'
if not 'key' in opts:
opts['key'] = 'PCON22CD'
opts['key'] = 'PCON24CD'
if not 'name' in opts:
opts['name'] = 'PCON22NM'
opts['name'] = 'PCON24NM'
if not 'geojson' in opts:
opts['geojson'] = basedir+'../../src/_data/geojson/constituencies-2022.geojson'
opts['geojson'] = basedir+'../../src/_data/geojson/constituencies-2024.geojson'

# load the geojson
with open(opts['geojson']) as f:
Expand Down Expand Up @@ -111,7 +111,7 @@ def df_latlong2constituency(df, opts={}):
print("Can't save file "+basedir+'../../raw-data/storm_overflows_latlong-'+opts['year']+'.csv')

# Limit the columns
df = df.loc[:, ['Total Duration (hrs) all spills prior to processing through 12-24h count method', 'Counted spills using 12-24h count method', 'PCON22CD', 'PCON22NM']]
df = df.loc[:, ['Total Duration (hrs) all spills prior to processing through 12-24h count method', 'Counted spills using 12-24h count method', 'PCON24CD', 'PCON24NM']]
# remove non-numeric entries
df.replace(['#N/a', 'N/a', '-'], '', inplace=True)
#df.to_csv('../../src/_data/sources/environment/storm_overflows_by_constituency.csv')
Expand Down Expand Up @@ -168,12 +168,12 @@ def storm_overflows():
df = df_grid2latlong({'base':basedir,'data':years[y]})

# convert latlong to a constituency using shapely to check polygons
df = df_latlong2constituency(df,{'year':y,'key':'PCON22CD','geojson':basedir+'../../src/_data/geojson/constituencies-2022.geojson'})
df = df_latlong2constituency(df,{'year':y,'key':'PCON24CD','geojson':basedir+'../../src/_data/geojson/constituencies-2024.geojson'})

df['Total Duration (hrs) all spills prior to processing through 12-24h count method'] = pd.to_numeric(df['Total Duration (hrs) all spills prior to processing through 12-24h count method'], errors='coerce')
total_duration = df.groupby(['PCON22CD', 'PCON22NM'])['Total Duration (hrs) all spills prior to processing through 12-24h count method'].sum().reset_index()
total_duration = df.groupby(['PCON24CD', 'PCON24NM'])['Total Duration (hrs) all spills prior to processing through 12-24h count method'].sum().reset_index()
df['Counted spills using 12-24h count method'] = pd.to_numeric(df['Counted spills using 12-24h count method'], errors='coerce')
total_spills = df.groupby(['PCON22CD', 'PCON22NM'])['Counted spills using 12-24h count method'].sum().reset_index()
total_spills = df.groupby(['PCON24CD', 'PCON24NM'])['Counted spills using 12-24h count method'].sum().reset_index()

merged_df = total_spills.merge(total_duration, how='inner')

Expand All @@ -190,15 +190,15 @@ def storm_overflows():
# Join all the data_sets
full = pd.concat(data_sets,ignore_index=True)

pivotted = full.pivot_table(index=['PCON22CD'], columns=['Year'], values=['Counted spills using 12-24h count method','Total Duration (hrs) all spills prior to processing through 12-24h count method'])
pivotted = full.pivot_table(index=['PCON24CD'], columns=['Year'], values=['Counted spills using 12-24h count method','Total Duration (hrs) all spills prior to processing through 12-24h count method'])

# Add a column at the start which is a duplicate of the index (so we can not print the index column)
pivotted.insert(0,'PCON22CD',pivotted.index)
pivotted.insert(0,'PCON24CD',pivotted.index)

# Add the Names back
pivotted.insert(1,'PCON22NM',pivotted.PCON22CD.map(full.set_index('PCON22CD')['PCON22NM'].to_dict()),True)
pivotted.insert(1,'PCON24NM',pivotted.PCON24CD.map(full.set_index('PCON24CD')['PCON24NM'].to_dict()),True)

pivotted.pipe(save_tidy_csv, basedir, '../../src/_data/sources/environment/storm_overflows.csv')
pivotted.pipe(save_tidy_csv, basedir, '../../src/themes/environment/storm-overflows/_data/storm_overflows.csv')
print(pivotted);

return pivotted
Expand Down
Loading

0 comments on commit 5577552

Please sign in to comment.