Skip to content

Commit

Permalink
Add notebook Check_errata
Browse files Browse the repository at this point in the history
  • Loading branch information
senesis committed Apr 2, 2021
1 parent 3c67825 commit a466064
Showing 1 changed file with 328 additions and 0 deletions.
328 changes: 328 additions & 0 deletions select_data_versions/Check_errata.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Query the ESGF errata service for datasets of a data versions dictionnary (only lowest realization number), and organizes the reported issues by variable, severity and description"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_versions_tag = \"20210201_derived\"\n",
"data_versions_dir = \"/home/ssenesi/CAMMAC/select_data_versions\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.display import display, HTML, Image\n",
"display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n",
"import requests # use pip or conda to install it if needed\n",
"import json\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Climaf version >= 1.2.13 (see https://climaf.readthedocs.io)\n",
"climaf_lib = \"/home/ssenesi/climaf_installs/running\" \n",
"# AR6/WGI/chapter8 CliMAF-based package\n",
"CAMMAC = \"/home/ssenesi/CAMMAC\"\n",
"sys.path.append(climaf_lib) \n",
"sys.path.append(CAMMAC)\n",
"from CAMMAClib.ancillary import feed_dic\n",
"from CAMMAClib.mips_et_al import institute_for_model, mip_for_experiment,\\\n",
" models_for_experiments,read_versions_dictionnary, prefered_variant "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def errata(dataset_drs,base_url=\"https://errata.es-doc.org/1/\"):\n",
" \"\"\"\n",
" Query the errata service for erratas on a dataset DRS, such as\n",
" >>> dataset_drs=\"CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912\"\n",
" and returns a list of pairs (severity, description) for relevant erratas\n",
" \"\"\"\n",
" erratas=[] \n",
" resolve_url=base_url+\"resolve/simple-pid?datasets=\"+dataset_drs\n",
" r=requests.get(resolve_url)\n",
" #print resolve_url\n",
" try :\n",
" r=r.json()\n",
" except ValueError :\n",
" print \"\\nNo Json object for \"+dataset_drs\n",
" return None\n",
" if 'errorCode' not in r :\n",
" for handle in r :\n",
" #print handle\n",
" #print r[handle]['errataIds'] \n",
" #print type(r[handle]['errataIds'])\n",
" l=r[handle]['errataIds']\n",
" if type(l) != type([]) :\n",
" l=eval(l)\n",
" for uid in l :\n",
" #print uid\n",
" e=requests.get(base_url+\"issue/retrieve?uid=\"+uid).json()['issue']\n",
" erratas.append((e['severity'],e['description']))\n",
" else :\n",
" #print \"No entry for \"+ds\n",
" return None\n",
" return erratas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## A simple example of query"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"errata('CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"errata('CMIP6.CMIP.THU.CIESM.historical.r1i1p1f1.Amon.pr.gr.v20200417')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"errata('CMIP6.ScenarioMIP.THU.CIESM.ssp585.r1i1p1f1.Amon.tas.gr.v20200417')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## A function for querying the erratas for a versions dict, and organizing the outputs in a dict berrata2models[rvariable][severity][description] which values are lists of model.experiment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def analyze_erratas(dic,experiments=None,variables=None, max_count=None, do_print=True, print_expids=False ) :\n",
" \"\"\"\n",
" Use a data versions dictionnary such as produced by notebook select_data_versions and\n",
" query the ESGF errata service for all corresponding datasets\n",
" \n",
" Returns a dictionnary of expid DRS with an errata, grouped that way :\n",
" \n",
" >>> d[variable][severity][errata_description] = [ ... set of expid DRS ...]\n",
" \n",
" Arg dic is a data versions dictionnary organized that way : \n",
" data_versions[expid][variable][table][model][variant]=(grid,version,data_period)\n",
" Arg variable allows to restrict the analysis to those metadata lines which are for a given variable\n",
" Arg max_count allows to restrict the number of processed cases\n",
" \n",
" \"\"\"\n",
" errata_base_url=\"https://errata.es-doc.org/1/\"\n",
" count=0\n",
" berrata2models=dict()\n",
" already_done=[]\n",
" if experiments is None :\n",
" experiments=dic.keys()\n",
" for experiment in experiments :\n",
" print experiment,\n",
" if variables is None :\n",
" variables=dic[experiment].keys()\n",
" #print variables\n",
" for variable in variables :\n",
" print variable,\n",
" for table in dic[experiment][variable] :\n",
" for model in dic[experiment][variable][table] :\n",
" variants=set(dic[experiment][variable][table][model].keys())\n",
" variant=prefered_variant(variants,\"\",model) \n",
" if variant is None :\n",
" raise ValueError(\"Issue with prefered variant for %s %s %s %s\"%(experiment,variable,model,variants) )\n",
" grid,version,data_period = dic[experiment][variable][table][model][variant]\n",
" #CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.piControl\n",
" expid=\"CMIP6.%s.%s.%s.%s\"%(mip_for_experiment(experiment),institute_for_model(model),model,experiment)\n",
" nuple=(expid,variant,table,variable,grid,version)\n",
" if nuple not in already_done :\n",
" already_done.append(nuple)\n",
" #print \"processing \",nuple\n",
" drs=\"%s.%s.%s.%s.%s.%s\"%(expid,variant,table,variable,grid,version)\n",
" #print drs\n",
" #continue\n",
" count +=1\n",
" err_list=errata(drs,errata_base_url)\n",
" #print err_list\n",
" if err_list is not None :\n",
" if variable not in berrata2models :\n",
" berrata2models[variable]=dict()\n",
" for severity,description in err_list :\n",
" if severity not in berrata2models[variable]:\n",
" berrata2models[variable][severity]=dict()\n",
" if description not in berrata2models[variable][severity] :\n",
" berrata2models[variable][severity][description]=set()\n",
" expid_short=expid.split(\".\")[3]+\".\"+expid.split(\".\")[4]\n",
" berrata2models[variable][severity][description].add(expid_short)\n",
" if max_count is not None and count > max_count :\n",
" break\n",
" print\n",
"\n",
" print\n",
" \n",
" for variable in berrata2models :\n",
" for severity in berrata2models[variable] :\n",
" for description in berrata2models[variable][severity] :\n",
" expids=berrata2models[variable][severity][description]\n",
" berrata2models[variable][severity][description]=list(expids)\n",
"\n",
" print \"%d distinct cases scrutinized\"%len(already_done)\n",
" if do_print :\n",
" print_errata2models(berrata2models,print_expids)\n",
" #\n",
" from datetime import datetime\n",
" berrata2models[\"Errata service query date\"] = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
" berrata2models[\"Errata service query url\"] = errata_base_url\n",
" #\n",
" return berrata2models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def print_errata2models(berrata2models,print_expids=True,severities=[\"high\",\"medium\"]):\n",
" for variable in berrata2models :\n",
" if \"Errata\" in variable : continue\n",
" print \"\\nvariable\",variable\n",
" for severity in berrata2models[variable] :\n",
" if severity not in severities : continue\n",
" print \"\\n\\tseverity\",severity #, berrata2models[variable][severity]\n",
" for description in berrata2models[variable][severity] :\n",
" expids=berrata2models[variable][severity][description]\n",
" if print_expids :\n",
" print \"\\n\\t\\t\",description,expids\n",
" else:\n",
" print \"\\n\\t\\t\",description,len(expids)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## A small scale example of using analyze_erratas "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"a=analyze_erratas(data_versions,experiments=[\"historical\"],variables=[\"pr\"],do_print=False)#,max_count=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print_errata2models(a,print_expids=True,severities=[\"medium\",\"high\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Let us run the analysis for the whole dict"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"allvars=dict()\n",
"for variable in [u'pr', u'tas', u'mrro', u'evspsbl', u'mrso', u'P-E', u'prw', u'mrsos', u'sos'] :\n",
" allvars[variable]=analyze_erratas(data_versions,do_print=False,variables=[variable])\n",
" print_errata2models(allvars[variable],print_expids=True,severities=[\"medium\",\"high\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Let us save the result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"jsfile=\"all_erratas_%s.json\"%data_versions_tag\n",
"#a[\"files\"]=l\n",
"a[\"doc\"]=\"list_of_model.experiment[variable][severity][description]\"\n",
"with open(jsfile,\"w\") as f :\n",
" json.dump(a,f,separators=(',', ': '),indent=3,ensure_ascii=True)\n"
]
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython2"
},
"kernelspec": {
"name": "python2",
"display_name": "Python 2",
"language": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit a466064

Please sign in to comment.