Add notebook Check_errata

senesis · Apr 2, 2021 · a466064 · a466064
1 parent 3c67825
commit a466064
Showing 1 changed file with 328 additions and 0 deletions.
diff --git a/select_data_versions/Check_errata.ipynb b/select_data_versions/Check_errata.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Query the ESGF errata service for datasets of a data versions dictionnary (only lowest realization number), and organizes the reported issues by variable, severity and description"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_versions_tag         = \"20210201_derived\"\n",
+    "data_versions_dir         = \"/home/ssenesi/CAMMAC/select_data_versions\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.core.display import display, HTML, Image\n",
+    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n",
+    "import requests  # use pip or conda to install it if needed\n",
+    "import json\n",
+    "import sys"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Climaf version >= 1.2.13 (see https://climaf.readthedocs.io)\n",
+    "climaf_lib     = \"/home/ssenesi/climaf_installs/running\" \n",
+    "# AR6/WGI/chapter8 CliMAF-based package\n",
+    "CAMMAC         = \"/home/ssenesi/CAMMAC\"\n",
+    "sys.path.append(climaf_lib) \n",
+    "sys.path.append(CAMMAC)\n",
+    "from CAMMAClib.ancillary  import feed_dic\n",
+    "from CAMMAClib.mips_et_al import institute_for_model, mip_for_experiment,\\\n",
+    "    models_for_experiments,read_versions_dictionnary, prefered_variant "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def errata(dataset_drs,base_url=\"https://errata.es-doc.org/1/\"):\n",
+    "    \"\"\"\n",
+    "    Query the errata service for erratas on a dataset DRS, such as\n",
+    "    >>> dataset_drs=\"CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912\"\n",
+    "    and returns a list of pairs (severity, description) for relevant erratas\n",
+    "    \"\"\"\n",
+    "    erratas=[]    \n",
+    "    resolve_url=base_url+\"resolve/simple-pid?datasets=\"+dataset_drs\n",
+    "    r=requests.get(resolve_url)\n",
+    "    #print resolve_url\n",
+    "    try :\n",
+    "        r=r.json()\n",
+    "    except ValueError :\n",
+    "        print \"\\nNo Json object for \"+dataset_drs\n",
+    "        return None\n",
+    "    if 'errorCode' not in r :\n",
+    "        for handle in r :\n",
+    "            #print handle\n",
+    "            #print r[handle]['errataIds']   \n",
+    "            #print type(r[handle]['errataIds'])\n",
+    "            l=r[handle]['errataIds']\n",
+    "            if type(l) != type([]) :\n",
+    "                l=eval(l)\n",
+    "            for uid in l :\n",
+    "                #print uid\n",
+    "                e=requests.get(base_url+\"issue/retrieve?uid=\"+uid).json()['issue']\n",
+    "                erratas.append((e['severity'],e['description']))\n",
+    "    else :\n",
+    "        #print \"No entry for \"+ds\n",
+    "        return None\n",
+    "    return erratas"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## A simple example of query"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "errata('CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-sol.r1i1p1f1.AERmon.bldep.gn.v20180912')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "errata('CMIP6.CMIP.THU.CIESM.historical.r1i1p1f1.Amon.pr.gr.v20200417')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "errata('CMIP6.ScenarioMIP.THU.CIESM.ssp585.r1i1p1f1.Amon.tas.gr.v20200417')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## A function for querying the erratas for a versions dict, and organizing the outputs in a dict berrata2models[rvariable][severity][description] which values are lists of model.experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def analyze_erratas(dic,experiments=None,variables=None, max_count=None, do_print=True, print_expids=False ) :\n",
+    "    \"\"\"\n",
+    "    Use a data versions dictionnary such as produced by notebook select_data_versions and\n",
+    "    query the ESGF errata service for all corresponding datasets\n",
+    "    \n",
+    "    Returns a dictionnary of expid DRS with an errata, grouped that way :\n",
+    "    \n",
+    "    >>>  d[variable][severity][errata_description] = [ ... set of expid DRS ...]\n",
+    "    \n",
+    "    Arg dic is a data versions dictionnary organized that way : \n",
+    "       data_versions[expid][variable][table][model][variant]=(grid,version,data_period)\n",
+    "    Arg variable allows to restrict the analysis to those metadata lines which are for a given variable\n",
+    "    Arg max_count allows to restrict the number of processed cases\n",
+    "          \n",
+    "    \"\"\"\n",
+    "    errata_base_url=\"https://errata.es-doc.org/1/\"\n",
+    "    count=0\n",
+    "    berrata2models=dict()\n",
+    "    already_done=[]\n",
+    "    if experiments is None :\n",
+    "        experiments=dic.keys()\n",
+    "    for experiment in experiments :\n",
+    "        print experiment,\n",
+    "        if variables is None :\n",
+    "            variables=dic[experiment].keys()\n",
+    "        #print variables\n",
+    "        for variable in variables :\n",
+    "            print variable,\n",
+    "            for table in dic[experiment][variable] :\n",
+    "                for model in dic[experiment][variable][table] :\n",
+    "                    variants=set(dic[experiment][variable][table][model].keys())\n",
+    "                    variant=prefered_variant(variants,\"\",model)               \n",
+    "                    if variant is None :\n",
+    "                        raise ValueError(\"Issue with prefered variant for %s %s %s %s\"%(experiment,variable,model,variants) )\n",
+    "                    grid,version,data_period = dic[experiment][variable][table][model][variant]\n",
+    "                    #CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.piControl\n",
+    "                    expid=\"CMIP6.%s.%s.%s.%s\"%(mip_for_experiment(experiment),institute_for_model(model),model,experiment)\n",
+    "                    nuple=(expid,variant,table,variable,grid,version)\n",
+    "                    if nuple not in already_done :\n",
+    "                        already_done.append(nuple)\n",
+    "                        #print \"processing \",nuple\n",
+    "                        drs=\"%s.%s.%s.%s.%s.%s\"%(expid,variant,table,variable,grid,version)\n",
+    "                        #print drs\n",
+    "                        #continue\n",
+    "                        count +=1\n",
+    "                        err_list=errata(drs,errata_base_url)\n",
+    "                        #print err_list\n",
+    "                        if err_list is not None :\n",
+    "                            if variable not in berrata2models :\n",
+    "                                berrata2models[variable]=dict()\n",
+    "                            for severity,description in err_list :\n",
+    "                                if severity not in berrata2models[variable]:\n",
+    "                                    berrata2models[variable][severity]=dict()\n",
+    "                                if description not in berrata2models[variable][severity] :\n",
+    "                                    berrata2models[variable][severity][description]=set()\n",
+    "                                expid_short=expid.split(\".\")[3]+\".\"+expid.split(\".\")[4]\n",
+    "                                berrata2models[variable][severity][description].add(expid_short)\n",
+    "                        if max_count is not None and count > max_count :\n",
+    "                            break\n",
+    "            print\n",
+    "\n",
+    "    print\n",
+    "    \n",
+    "    for variable in berrata2models :\n",
+    "        for severity in berrata2models[variable] :\n",
+    "            for description in berrata2models[variable][severity] :\n",
+    "                    expids=berrata2models[variable][severity][description]\n",
+    "                    berrata2models[variable][severity][description]=list(expids)\n",
+    "\n",
+    "    print \"%d distinct cases scrutinized\"%len(already_done)\n",
+    "    if do_print :\n",
+    "        print_errata2models(berrata2models,print_expids)\n",
+    "    #\n",
+    "    from datetime import datetime\n",
+    "    berrata2models[\"Errata service query date\"] = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    berrata2models[\"Errata service query url\"]  = errata_base_url\n",
+    "    #\n",
+    "    return berrata2models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def print_errata2models(berrata2models,print_expids=True,severities=[\"high\",\"medium\"]):\n",
+    "    for variable in berrata2models :\n",
+    "        if \"Errata\" in variable : continue\n",
+    "        print \"\\nvariable\",variable\n",
+    "        for severity in berrata2models[variable] :\n",
+    "            if severity not in severities : continue\n",
+    "            print \"\\n\\tseverity\",severity #, berrata2models[variable][severity]\n",
+    "            for description in berrata2models[variable][severity] :\n",
+    "                expids=berrata2models[variable][severity][description]\n",
+    "                if print_expids :\n",
+    "                    print \"\\n\\t\\t\",description,expids\n",
+    "                else:\n",
+    "                    print \"\\n\\t\\t\",description,len(expids)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## A small scale example of using analyze_erratas "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "a=analyze_erratas(data_versions,experiments=[\"historical\"],variables=[\"pr\"],do_print=False)#,max_count=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_errata2models(a,print_expids=True,severities=[\"medium\",\"high\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Let us run the analysis for the whole dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "allvars=dict()\n",
+    "for variable in [u'pr', u'tas', u'mrro', u'evspsbl', u'mrso', u'P-E', u'prw', u'mrsos', u'sos'] :\n",
+    "    allvars[variable]=analyze_erratas(data_versions,do_print=False,variables=[variable])\n",
+    "    print_errata2models(allvars[variable],print_expids=True,severities=[\"medium\",\"high\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  Let us save the result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jsfile=\"all_erratas_%s.json\"%data_versions_tag\n",
+    "#a[\"files\"]=l\n",
+    "a[\"doc\"]=\"list_of_model.experiment[variable][severity][description]\"\n",
+    "with open(jsfile,\"w\") as f :\n",
+    "        json.dump(a,f,separators=(',', ': '),indent=3,ensure_ascii=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython2"
+  },
+  "kernelspec": {
+   "name": "python2",
+   "display_name": "Python 2",
+   "language": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}