Skip to content

Commit

Permalink
Merge pull request #35 from coecms/stats
Browse files Browse the repository at this point in the history
added calendar, fixed issue with tdim in detect
  • Loading branch information
paolap authored Jun 11, 2021
2 parents 818e052 + e8c925a commit 8306462
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 28 deletions.
3 changes: 2 additions & 1 deletion tests/test_identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def test_get_calendar(calendars):
del calendars
# test retrieving calendar attribute
for calendar,timerange in locals().items():
assert get_calendar(timerange) == ndays_year[calendar]
var = xr.DataArray(coords={'time': timerange}, dims=['time'])
assert get_calendar(var.time) == ndays_year[calendar]
# test guessing number of days per year if calendar not present
# test working with different calendars
16 changes: 8 additions & 8 deletions tests/xmhw_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,14 @@ def inter_data():

@pytest.fixture
def calendars():
noleap = xr.cftime_range("2000", periods=6, calendar="noleap").to_datetimeindex()
all_leap = xr.cftime_range("2000", periods=6, calendar="all_leap").to_datetimeindex()
day_365 = xr.cftime_range("2000", periods=6, calendar="365_day").to_datetimeindex()
day_366 = xr.cftime_range("2000", periods=6, calendar="366_day").to_datetimeindex()
gregorian = xr.cftime_range("2000", periods=6, calendar="gregorian").to_datetimeindex()
standard = xr.cftime_range("2000", periods=6, calendar="standard").to_datetimeindex()
julian = xr.cftime_range("2000", periods=6, calendar="julian").to_datetimeindex()
proleptic = xr.cftime_range("2000", periods=6, calendar="proleptic_gregorian").to_datetimeindex()
noleap = xr.cftime_range("2000", periods=6, calendar="noleap")
all_leap = xr.cftime_range("2000", periods=6, calendar="all_leap")
day_365 = xr.cftime_range("2000", periods=6, calendar="365_day")
day_366 = xr.cftime_range("2000", periods=6, calendar="366_day")
gregorian = xr.cftime_range("2000", periods=6, calendar="gregorian")
standard = xr.cftime_range("2000", periods=6, calendar="standard")
julian = xr.cftime_range("2000", periods=6, calendar="julian")
proleptic = xr.cftime_range("2000", periods=6, calendar="proleptic_gregorian")
ndays_year = {'noleap': 365, 'all_leap': 366, 'day_365' : 365, 'day_366': 366, 'day_360': 360,
'gregorian': 365.25, 'standard': 365.25, 'julian': 365.25, 'proleptic': 365.25}
return noleap, all_leap, day_365, day_366, gregorian, standard, julian, proleptic, ndays_year
Expand Down
31 changes: 17 additions & 14 deletions xmhw/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,26 @@ def get_calendar(tdim):
# for these we ant to use add_doy
# for 360/ 365 /366 we need different approach, they all can stay as they are but I should then use the original day ofyear admititng this is calculated differently and consistently each time
ndays = {'standard': 365.25, 'gregorian': 365.25, 'proleptic_gregorian': 365.25, 'all_leap': 366,
'no_leap': 365, '365_day': 365, '360_day': 360, 'julian': 365.25}
try:
calendar = tdim.econding['calendar']
except:
calendar=''
'noleap': 365, '365_day': 365, '360_day': 360, 'julian': 365.25}
if 'calendar' in tdim.encoding.keys():
calendar = tdim.encoding['calendar']
elif 'calendar' in tdim.attrs.keys():
calendar = tdim.attrs['calendar']
else:
calendar = getattr(tdim.values[0], 'calendar', '')
if calendar == '':
if 'calendar' in tdim.attrs:
calendar = tdim.calendar
if calendar in ['360', '365', '366']:
calendar = f'{calendar}_day'
elif calendar == 'leap':
calendar = 'standard'
#calendar = infer_calendar(tdim)
pass
# if calendar was retrieved by variable attributes is possible it was wrongly defined
if calendar in ['360', '365', '366']:
calendar = f'{calendar}_day'
elif calendar == 'leap':
calendar = 'standard'
if calendar not in ndays.keys():
# try to work out from array whatit could be
print('calendar not in keys')
ndays_year = 365.25 # just to retunr something now
else:
ndays_year = ndays[calendar]
ndays_year = ndays[calendar]
return ndays_year


Expand Down Expand Up @@ -181,7 +184,7 @@ def define_events(ds, idxarr, minDuration, joinAcrossGaps, maxGap, intermediate
# Prepare dataframe to get features before groupby operation
df = mhw_df(pd.concat([df,dfev], axis=1))
# Calculate mhw properties, for each event using groupby
dfmhw = mhw_features(df, len(ds.time)-1)
dfmhw = mhw_features(df, len(idxarr)-1)

# convert back to xarray dataset and reindex (?) so all cells have same event axis
mhw = xr.Dataset.from_dataframe(dfmhw, sparse=False)
Expand Down
12 changes: 7 additions & 5 deletions xmhw/xmhw.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ def detect(temp, th, se, minDuration=5, joinAcrossGaps=True, maxGap=2, maxPadLen
# check maxGap < minDuration
if maxGap >= minDuration:
raise XmhwException("Maximum gap between mhw events should be smaller than event minimum duration")

# if time dimension different from time, rename it
temp = temp.rename({tdim: 'time'})
# save original attributes in a dictionary to be assigned to final dataset
ds_attrs = {}
ds_attrs['ts'] = temp.attrs
Expand All @@ -249,7 +250,7 @@ def detect(temp, th, se, minDuration=5, joinAcrossGaps=True, maxGap=2, maxPadLen
# return an array stacked on all dimensions excluding time
# Land cells are removed
# new dimensions are (time, cell)
ts = land_check(temp, tdim=tdim)
ts = land_check(temp)
th = land_check(th, tdim='doy')
se = land_check(se, tdim='doy')
# assign doy
Expand All @@ -276,17 +277,18 @@ def detect(temp, th, se, minDuration=5, joinAcrossGaps=True, maxGap=2, maxPadLen
# so data can be split by chunks
ds = xr.Dataset({'ts': ts, 'seas': seas, 'thresh': thresh, 'bthresh': bthresh})
ds = ds.reset_coords(drop=True)
ds = ds.chunk(chunks={tdim: -1, 'cell': 1})
ds = ds.chunk(chunks={'time': -1, 'cell': 1})
# Build a pandas series with the positional indexes as values
# [0,1,2,3,4,5,6,7,8,9,10,..]
idxarr = pd.Series(data=np.arange(len(ds[tdim])), index=ds[tdim].values)
idxarr = pd.Series(data=np.arange(len(ds.time)), index=ds.time.values)
mhwls = []
for c in ds.cell:
mhwls.append( define_events(ds.sel(cell=c), idxarr,
minDuration, joinAcrossGaps, maxGap, intermediate))
results = dask.compute(mhwls)
mhw_results = [r[0] for r in results[0]]
mhw = xr.concat(mhw_results, dim=ds.cell).unstack('cell')
mhw = xr.concat(mhw_results, dim=ds.cell)
mhw = mhw.unstack('cell')
if intermediate:
inter_results = [r[1] for r in results[0]]
mhw_inter = xr.concat(inter_results, dim=ds.cell).unstack('cell')
Expand Down

0 comments on commit 8306462

Please sign in to comment.