-
Notifications
You must be signed in to change notification settings - Fork 0
/
river_dailies_to_ts.py
125 lines (94 loc) · 6.06 KB
/
river_dailies_to_ts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import xarray as xr
import os
import re
import sys
import pandas as pd
from datetime import datetime
import salishsea_tools.river_202108 as rivers # if you want to find river inputs for a different (future?) version of rivers, you'll have to change this the files_between_dates fn!
rlist_dict = {'test': [['fraser', 'Fraser'], ['skagit', 'Skagit1']],
'fraser': [['fraser', 'Fraser']],
'Cassidy': [['fraser', 'Nooksack'], ['skagit', 'Skagit1'], ['skagit', 'SnohomishAllenQuilceda'], ['puget', 'NisquallyMcAllister'], \
['jdf', 'Elwha'], ['evi_s', 'Cowichan1'], ['evi_s', 'Nanaimo1'], ['evi_s', 'Puntledge'], ['evi_n', 'SalmonSayward'], ['bute', 'Homathko'], \
['howe', 'Squamish']],
'Camryn':[['puget','Johnson'], ['puget','Jimmycomelately'], ['puget','SalmonSnow'], ['puget','Chimacum'], ['puget','Thorndike'], ['puget','Torboo'], \
['puget','LittleBigQuilcene'], ['puget','Dosewalips'], ['puget','Duckabush'], ['puget','Fulton'], ['puget','Waketick'], ['puget','HammaHamma'],\
['puget','Jorsted'], ['puget','Eagle'], ['puget','Lilliwaup'], ['puget','Finch'], ['puget','Skokomish'], ['puget','Rendsland'], ['puget','Tahuya'], \
['puget','Mission'], ['puget','Union'], ['puget','Coulter'], ['puget','Minter'], ['puget','Burley'], ['puget','Olalla'], ['puget','Blackjack'],\
['puget','ClearBarker'], ['puget','BigValley'], ['puget','BigBear'], ['puget','Swaback'], ['puget','Stavis'], ['puget','Anderson'], ['puget','Dewatta'], \
['puget','Sherwood'], ['puget','DeerJohnsGoldboroughMill'],['puget','Skookum'], ['puget','KennedySchneider'], ['puget','PerryMcClane'],\
['puget','Deschutes'], ['puget','Woodward'], ['puget','Woodland'], ['puget','Chambers'], ['puget','NisquallyMcAllister'], \
['puget','Puyallup'], ['puget','Hylebas'], ['puget','Duwamish1'], ['puget','Duwamish2'], ['puget','CedarSammamish']],
}
def main(start_str, end_str, source_directory, save_name, rlist_call):
# Check if the correct number of arguments is provided
if len(sys.argv) != 6:
print("Usage: python river_dailies_to_ts.py yyyymmdd_start yyyymmdd_end source_dir save_name river_list\
\n files save as 'river_dailies_to_ts_save_name_yyyymmdd_start_yyyymmdd_end.csv'\
\n river forcings are in source_dir='/results/forcing/rivers/'")
def files_between_dates(start_date, end_date, directory):
files = []
file_dates = []
date_pattern = r'R202108Dailies_y(\d{4})m(\d{2})d(\d{2})' # regex pattern to specifically match the 202108Dailies - CHANGE IF DIFF VERSION
for filename in sorted(os.listdir(directory)):
match = re.search(date_pattern, filename)
if match:
year, month, day = map(int, match.groups())
file_date = datetime(year, month, day).date()
if start_date <= file_date <= end_date:
files.append(filename)
file_dates.append(file_date.strftime('%m-%d-%Y'))
return files, file_dates
# the entire set of rivers and their w_shed/r_call pairs is located in /ocean/cdonaldson/MEOPAR/tools/SalishSeaTools/salishsea_tools/river_202108.py
# this fn looks up and returns the river input coordinates and widths
def river_bounds(river):
w_shed = river[0]
r_call = river[1]
y = rivers.prop_dict[w_shed][r_call]['i'] # model grid Y-axis
x = rivers.prop_dict[w_shed][r_call]['j'] # model grid X-axis
dy = rivers.prop_dict[w_shed][r_call]['di'] # the number of boxes in Y
dx = rivers.prop_dict[w_shed][r_call]['dj'] # the number of boxes in X
return y, dy, x, dx
# when selecting from the big array, do it like [y:y+dy, x:x+dx]
# np.array([[1, 2, 3], [4, 5, 6], [7, 8 ,9]])[0:1, 2:3] = array([[3]]), specifies the row then the column
def files_to_timeseries(directory, file_names, rivers):
num_rows = len(file_names)
num_cols = len(rivers)
result = np.zeros((num_rows, num_cols), dtype=float) # allocate memory based on # days and # rivers
row_idx = 0
col_idx = 0
for file in file_names:
fname = directory + file
ds = xr.open_dataset(fname)
array = ds['rorunoff'].values[0, :, :] # og shape is (1, 898, 398)
for river in rivers:
y, dy, x, dx = river_bounds(river)
result[row_idx, col_idx] = array[y:y+dy, x:x+dx].sum() # take the sum in the box, slices are not inclusive
col_idx += 1
ds.close() # close the dataset for this day before opening the next one
col_idx = 0 # reset the river idx to loop again
row_idx += 1 # add one to the file idx
return result
start_date = datetime.strptime(start_str, '%Y%m%d').date()
end_date = datetime.strptime(end_str, '%Y%m%d').date()
rivers_list = rlist_dict[rlist_call]
file_names, file_dates = files_between_dates(start_date, end_date, source_directory)
result = files_to_timeseries(source_directory, file_names, rivers_list)
data_dict = {}
for i in np.arange(len(rivers_list)):
river_name = rivers_list[i][1] + ' [kg/m2/s]'
data_dict[river_name] = result[:,i]
df_data = pd.DataFrame(data_dict)
metas = {'filename':file_names, 'date':file_dates}
df_metas = pd.DataFrame(metas)
df_all = pd.concat([df_metas, df_data], axis=1)
file_to_save_name = 'river_dailies_to_ts_{}_{}_{}.csv'.format(save_name, start_str, end_str)
df_all.to_csv(file_to_save_name)
print('File saved as {}'.format(file_to_save_name))
if __name__ == "__main__":
start_str = sys.argv[1]
end_str = sys.argv[2]
source_directory = sys.argv[3]
save_name = sys.argv[4]
rlist_call = sys.argv[5]
main(start_str, end_str, source_directory, save_name, rlist_call)