-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
328 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Query the ESGF errata service for datasets of a data versions dictionnary (only lowest realization number), and organizes the reported issues by variable, severity and description" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data_versions_tag = \"20210201_derived\"\n", | ||
"data_versions_dir = \"/home/ssenesi/CAMMAC/select_data_versions\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from IPython.core.display import display, HTML, Image\n", | ||
"display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n", | ||
"import requests # use pip or conda to install it if needed\n", | ||
"import json\n", | ||
"import sys" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Climaf version >= 1.2.13 (see https://climaf.readthedocs.io)\n", | ||
"climaf_lib = \"/home/ssenesi/climaf_installs/running\" \n", | ||
"# AR6/WGI/chapter8 CliMAF-based package\n", | ||
"CAMMAC = \"/home/ssenesi/CAMMAC\"\n", | ||
"sys.path.append(climaf_lib) \n", | ||
"sys.path.append(CAMMAC)\n", | ||
"from CAMMAClib.ancillary import feed_dic\n", | ||
"from CAMMAClib.mips_et_al import institute_for_model, mip_for_experiment,\\\n", | ||
" models_for_experiments,read_versions_dictionnary, prefered_variant " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def errata(dataset_drs,base_url=\"https://errata.es-doc.org/1/\"):\n", | ||
" \"\"\"\n", | ||
" Query the errata service for erratas on a dataset DRS, such as\n", | ||
" >>> dataset_drs=\"CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912\"\n", | ||
" and returns a list of pairs (severity, description) for relevant erratas\n", | ||
" \"\"\"\n", | ||
" erratas=[] \n", | ||
" resolve_url=base_url+\"resolve/simple-pid?datasets=\"+dataset_drs\n", | ||
" r=requests.get(resolve_url)\n", | ||
" #print resolve_url\n", | ||
" try :\n", | ||
" r=r.json()\n", | ||
" except ValueError :\n", | ||
" print \"\\nNo Json object for \"+dataset_drs\n", | ||
" return None\n", | ||
" if 'errorCode' not in r :\n", | ||
" for handle in r :\n", | ||
" #print handle\n", | ||
" #print r[handle]['errataIds'] \n", | ||
" #print type(r[handle]['errataIds'])\n", | ||
" l=r[handle]['errataIds']\n", | ||
" if type(l) != type([]) :\n", | ||
" l=eval(l)\n", | ||
" for uid in l :\n", | ||
" #print uid\n", | ||
" e=requests.get(base_url+\"issue/retrieve?uid=\"+uid).json()['issue']\n", | ||
" erratas.append((e['severity'],e['description']))\n", | ||
" else :\n", | ||
" #print \"No entry for \"+ds\n", | ||
" return None\n", | ||
" return erratas" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## A simple example of query" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"errata('CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"errata('CMIP6.CMIP.THU.CIESM.historical.r1i1p1f1.Amon.pr.gr.v20200417')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"errata('CMIP6.ScenarioMIP.THU.CIESM.ssp585.r1i1p1f1.Amon.tas.gr.v20200417')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## A function for querying the erratas for a versions dict, and organizing the outputs in a dict berrata2models[rvariable][severity][description] which values are lists of model.experiment" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def analyze_erratas(dic,experiments=None,variables=None, max_count=None, do_print=True, print_expids=False ) :\n", | ||
" \"\"\"\n", | ||
" Use a data versions dictionnary such as produced by notebook select_data_versions and\n", | ||
" query the ESGF errata service for all corresponding datasets\n", | ||
" \n", | ||
" Returns a dictionnary of expid DRS with an errata, grouped that way :\n", | ||
" \n", | ||
" >>> d[variable][severity][errata_description] = [ ... set of expid DRS ...]\n", | ||
" \n", | ||
" Arg dic is a data versions dictionnary organized that way : \n", | ||
" data_versions[expid][variable][table][model][variant]=(grid,version,data_period)\n", | ||
" Arg variable allows to restrict the analysis to those metadata lines which are for a given variable\n", | ||
" Arg max_count allows to restrict the number of processed cases\n", | ||
" \n", | ||
" \"\"\"\n", | ||
" errata_base_url=\"https://errata.es-doc.org/1/\"\n", | ||
" count=0\n", | ||
" berrata2models=dict()\n", | ||
" already_done=[]\n", | ||
" if experiments is None :\n", | ||
" experiments=dic.keys()\n", | ||
" for experiment in experiments :\n", | ||
" print experiment,\n", | ||
" if variables is None :\n", | ||
" variables=dic[experiment].keys()\n", | ||
" #print variables\n", | ||
" for variable in variables :\n", | ||
" print variable,\n", | ||
" for table in dic[experiment][variable] :\n", | ||
" for model in dic[experiment][variable][table] :\n", | ||
" variants=set(dic[experiment][variable][table][model].keys())\n", | ||
" variant=prefered_variant(variants,\"\",model) \n", | ||
" if variant is None :\n", | ||
" raise ValueError(\"Issue with prefered variant for %s %s %s %s\"%(experiment,variable,model,variants) )\n", | ||
" grid,version,data_period = dic[experiment][variable][table][model][variant]\n", | ||
" #CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.piControl\n", | ||
" expid=\"CMIP6.%s.%s.%s.%s\"%(mip_for_experiment(experiment),institute_for_model(model),model,experiment)\n", | ||
" nuple=(expid,variant,table,variable,grid,version)\n", | ||
" if nuple not in already_done :\n", | ||
" already_done.append(nuple)\n", | ||
" #print \"processing \",nuple\n", | ||
" drs=\"%s.%s.%s.%s.%s.%s\"%(expid,variant,table,variable,grid,version)\n", | ||
" #print drs\n", | ||
" #continue\n", | ||
" count +=1\n", | ||
" err_list=errata(drs,errata_base_url)\n", | ||
" #print err_list\n", | ||
" if err_list is not None :\n", | ||
" if variable not in berrata2models :\n", | ||
" berrata2models[variable]=dict()\n", | ||
" for severity,description in err_list :\n", | ||
" if severity not in berrata2models[variable]:\n", | ||
" berrata2models[variable][severity]=dict()\n", | ||
" if description not in berrata2models[variable][severity] :\n", | ||
" berrata2models[variable][severity][description]=set()\n", | ||
" expid_short=expid.split(\".\")[3]+\".\"+expid.split(\".\")[4]\n", | ||
" berrata2models[variable][severity][description].add(expid_short)\n", | ||
" if max_count is not None and count > max_count :\n", | ||
" break\n", | ||
" print\n", | ||
"\n", | ||
" print\n", | ||
" \n", | ||
" for variable in berrata2models :\n", | ||
" for severity in berrata2models[variable] :\n", | ||
" for description in berrata2models[variable][severity] :\n", | ||
" expids=berrata2models[variable][severity][description]\n", | ||
" berrata2models[variable][severity][description]=list(expids)\n", | ||
"\n", | ||
" print \"%d distinct cases scrutinized\"%len(already_done)\n", | ||
" if do_print :\n", | ||
" print_errata2models(berrata2models,print_expids)\n", | ||
" #\n", | ||
" from datetime import datetime\n", | ||
" berrata2models[\"Errata service query date\"] = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", | ||
" berrata2models[\"Errata service query url\"] = errata_base_url\n", | ||
" #\n", | ||
" return berrata2models" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def print_errata2models(berrata2models,print_expids=True,severities=[\"high\",\"medium\"]):\n", | ||
" for variable in berrata2models :\n", | ||
" if \"Errata\" in variable : continue\n", | ||
" print \"\\nvariable\",variable\n", | ||
" for severity in berrata2models[variable] :\n", | ||
" if severity not in severities : continue\n", | ||
" print \"\\n\\tseverity\",severity #, berrata2models[variable][severity]\n", | ||
" for description in berrata2models[variable][severity] :\n", | ||
" expids=berrata2models[variable][severity][description]\n", | ||
" if print_expids :\n", | ||
" print \"\\n\\t\\t\",description,expids\n", | ||
" else:\n", | ||
" print \"\\n\\t\\t\",description,len(expids)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## A small scale example of using analyze_erratas " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"a=analyze_erratas(data_versions,experiments=[\"historical\"],variables=[\"pr\"],do_print=False)#,max_count=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print_errata2models(a,print_expids=True,severities=[\"medium\",\"high\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Let us run the analysis for the whole dict" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"allvars=dict()\n", | ||
"for variable in [u'pr', u'tas', u'mrro', u'evspsbl', u'mrso', u'P-E', u'prw', u'mrsos', u'sos'] :\n", | ||
" allvars[variable]=analyze_erratas(data_versions,do_print=False,variables=[variable])\n", | ||
" print_errata2models(allvars[variable],print_expids=True,severities=[\"medium\",\"high\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Let us save the result" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"jsfile=\"all_erratas_%s.json\"%data_versions_tag\n", | ||
"#a[\"files\"]=l\n", | ||
"a[\"doc\"]=\"list_of_model.experiment[variable][severity][description]\"\n", | ||
"with open(jsfile,\"w\") as f :\n", | ||
" json.dump(a,f,separators=(',', ': '),indent=3,ensure_ascii=True)\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"name": "python", | ||
"pygments_lexer": "ipython2" | ||
}, | ||
"kernelspec": { | ||
"name": "python2", | ||
"display_name": "Python 2", | ||
"language": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |