Skip to content

Commit

Permalink
Merge pull request #81 from sam-hartke-ucar/main
Browse files Browse the repository at this point in the history
Update writeERA5file.py to run in parallel
  • Loading branch information
gutmann authored Feb 28, 2023
2 parents e57e056 + 4fe96ce commit a095865
Showing 1 changed file with 95 additions and 24 deletions.
119 changes: 95 additions & 24 deletions helpers/writeERA5file.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import time

cluster = PBSCluster(
queue="casper",
walltime="03:00:00",
project="P48500028",
memory="30GB",
cores=1,
processes=1,
)

cluster.scale(12)

client=Client(cluster)
time.sleep(30) # wait 30 seconds to give all dask workers time to populate
print('Cluster created and assigned to dask client')

import os
import numpy as np
import pandas as pd
import xarray as xr
#import qfgrib
from datetime import timedelta,datetime
from dateutil.relativedelta import relativedelta


era5pl_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.pl/{yr}{mth}/e5.oper.an.pl.{var_id}.{dt}00_{dt}23.grb"
era5sfc_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.sfc/{yr}{mth}/e5.oper.an.sfc.{var_id}.{dt}00_{dt2}23.grb"
w_format = "128_135_w.ll025sc"
v_format = "128_132_v.ll025uv"
u_format = "128_131_u.ll025uv"
q_format = "128_133_q.ll025sc"
t_format = "128_130_t.ll025sc"
tcrw_format = "228_089_tcrw.ll025sc"
#--------------------------------------------------------------------------
# Function for writing ERA5 data to yearly input files for GARD


era5pl_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.pl/{yr}{mth}/e5.oper.an.pl.{var_id}.{dt}00_{dt}23.grb"
era5sfc_template = "/glade/collections/rda/data/ds633.0/e5.oper.an.sfc/{yr}{mth}/e5.oper.an.sfc.{var_id}.{dt}00_{dt2}23.grb"

var_formats = {'v':v_format,
'u':u_format,
'w':w_format,
'q':q_format,
't':t_format,
'tcrw':tcrw_format,
var_formats = {'v':"128_132_v.ll025uv",
'u':"128_131_u.ll025uv",
'w':"128_135_w.ll025sc",
'q':"128_133_q.ll025sc",
't':"128_130_t.ll025sc",
'tcrw':"228_089_tcrw.ll025sc",
}

# new variable names to match those from CESM LENS 2
Expand All @@ -35,7 +49,7 @@
'tcrw':'PRECT',
}

def createInputDataset(yr_st,yr_end,varlist):
def createERA5Dataset(yr_st,yr_end,varlist):

m=0
for var in varlist:
Expand All @@ -45,7 +59,8 @@ def createInputDataset(yr_st,yr_end,varlist):
yr = dt.year
mth = "%.02d"%dt.month
if var in ('u','v','w','q','t'):
var_files.append(era5pl_template.format(yr=yr,mth=mth,var_id=var_formats[var],dt=dt.strftime('%Y%m%d')))
var_files.append(era5pl_template.format(yr=yr,mth=mth,var_id=var_formats[var],
dt=dt.strftime('%Y%m%d')))
dt = dt + timedelta(days=1)

elif var == 'tcrw':
Expand All @@ -56,7 +71,7 @@ def createInputDataset(yr_st,yr_end,varlist):
dt = dt + relativedelta(months=+1)

vardata = xr.open_mfdataset(var_files,concat_dim='time',combine='nested',
backend_kwargs={"indexpath":""}).sel(latitude=slice(50,20),
backend_kwargs={"indexpath":""},parallel=True).sel(latitude=slice(50,20),
longitude=slice(360-120,360-60))[var]

if var in ('u','v','w','q','t'):
Expand All @@ -69,23 +84,79 @@ def createInputDataset(yr_st,yr_end,varlist):

if m==0:
era5_ds = vardata.to_dataset()
era5_ds = era5_ds.rename({var:newvarname[var]})
else:
era5_ds = era5_ds.assign(var=vardata)

era5_ds = era5_ds.rename({'var':newvarname[var]})
era5_ds = era5_ds.rename({'var':newvarname[var]})

m+=1

era5_ds = era5_ds.drop_vars(('number','step','surface'))

if yr_st!=yr_end:
outfile = '/glade/scratch/shartke/gard/era5/era5_daily_%d_%d.nc'%(yr_st,yr_end)
else:
outfile = '/glade/scratch/shartke/gard/era5/era5_daily_%d.nc'%yr_st
era5_ds.to_netcdf(outfile)



#--------------------------------------------------------------------------
# Function for writing CESM LENS2 data to decadal input files for GARD


cesm_template = "/glade/campaign/cgd/cesm/CESM2-LE/atm/proc/tseries/day_1/{var}/b.e21.B{scen}{forcing}.f09_g17.LE2-{styr}.0{ens}.cam.h{i}.{var}.{yr1}0101-{yr2}1231.nc"
scen="HIST"
f="cmip6"

def createCESM2Dataset(yr,styr,varlist,enslist):

yr1 = yr-yr%10

for e in enslist:
m=0
for var in varlist:
if var in ('U','V','T','Q'):
ds = xr.open_dataset(cesm_template.format(var=var,scen=scen,forcing=f,styr=styr,
ens="%.02d"%e,yr1=yr1,yr2=yr1+9,i=6))[var]
# select data over CONUS at ~450 mb level
vardata = ds.sel(lev=ds.lev[19],lat=slice(20,50),lon=slice(360.-120.,360.-60.)) # ,time=slice(str(yr),str(yr))
vardata = vardata.drop_vars('lev')
elif var in ('PSL','PRECT'):
ds = xr.open_dataset(cesm_template.format(var=var,scen=scen,forcing=f,styr=styr,
ens="%.02d"%e,yr1=yr1,yr2=yr1+9,i=1))[var]
vardata = ds.sel(lat=slice(20,50),lon=slice(360.-120.,360.-60.)) # ,time=slice(str(yr),str(yr))
# convert m/s to mm/d
vardata = vardata*3600*24*1000

if m==0:
cesm_ds = vardata.to_dataset()
else:
cesm_ds = cesm_ds.assign(var=vardata)
cesm_ds = cesm_ds.rename({'var':var})

m+=1

outfile = '/glade/scratch/shartke/gard/cesmlens2/cesm_daily_%d_%d_%d_%.02d.nc'%(yr,yr+9,styr,e)
cesm_ds.to_netcdf(outfile)


#--------------------------------------------------------------------------

# Note: Generating the ERA5 datasets will take the bulk of the time for this program

print(datetime.now())
createInputDataset(1982,1982,['u','v','w','q','t','tcrw'])
print('1982 dataset complete at: ',datetime.now())
createInputDataset(1983,1983,['u','v','w','q','t','tcrw'])
print('1983 dataset complete at: ',datetime.now())
styr = 1301 # 1231, 1251, 1281, or 1301
createCESM2Dataset(1960,styr,['U','V','W','Q','T','PRECT'],np.arange(1,3))
createCESM2Dataset(1970,styr,['U','V','W','Q','T','PRECT'],np.arange(1,3))
print('CESM LENS2 datasets complete at: ',datetime.now())

for yr in (1980,1981,1982,1983):
createERA5Dataset(yr,yr,['u','v','w','q','t','tcrw'])
print('ERA5 %s dataset complete at: '%yr,datetime.now())




# now you should be able to train GARD using 1980-1999 ERA5 data
# and predict downscaled 1960-1979 precip or temp using CESM LENS2 data

0 comments on commit a095865

Please sign in to comment.