From 70d4cfd6dc5cc53bb33e24cc37696e6fd63f7070 Mon Sep 17 00:00:00 2001
From: BrettHoover-NOAA <98188219+BrettHoover-NOAA@users.noreply.github.com>
Date: Fri, 24 Feb 2023 11:44:25 -0500
Subject: [PATCH] Overhaul to plot_proftrace: options for multiple figure
 configurations in YAML, plotting per-ob DA penalty statistics (#131)

* Bug fix for _get_conv() to pull appropriate ptop, pbot based on variable

* plot_proftrace.py for plotting ob type/subtype, add .png to .gitignore

* Switched from multi-index slicing to .isin(), inputs can now be provided as a list and aggregated together

* Fixed bug with dataframe filters for ==None

* Added some pycodestyle compliance to scripts/plot_proftrace, finalized bug fix for ==None filtering options

* generalized input filters with some in-line documentation

* modified plot_proftrace to use yaml input; pycodestyle and code readability changes

* Rearranged into more discrete functions, greater pythonic compliance

* Added PyGSI/gsi_stat and scripts/plot_proftrace for plotting penalty data

* Fixed collect_statistics() to properly handle case of missing gsistat file in list for a given set

* Overhaul to plot_proftrace.py and proftrace_yaml.yaml: YAML provides series of figure-cards to produce multiple figures with different configurations, capability to set a figure name to None to suppress producing that figure for that figure-card

* Overhaul to plot_proftrace.py: Rename figure-name keys in YAML, allow for both error (rms/bias) and penalty (cpen) data to be plotted, allow missing figure-name keys in YAML to be interpreted as None (no plotting)

* Fixed pyGSI/gsi_stat.py pycodestyle error

* Removed legacy pyGSI/git_stat_pen.py test-code

* Update proftrace_yaml.yaml
---
 proftrace_yaml.yaml       |  79 +++--
 pyGSI/gsi_stat.py         |   4 +-
 scripts/plot_proftrace.py | 592 +++++++++++++++++++++++++++++++-------
 3 files changed, 552 insertions(+), 123 deletions(-)

diff --git a/proftrace_yaml.yaml b/proftrace_yaml.yaml
index 713343a..ae7fdc5 100644
--- a/proftrace_yaml.yaml
+++ b/proftrace_yaml.yaml
@@ -1,24 +1,55 @@
-set1:
-     name:    'MDCRS-OMB-t'
-     var:     't'
-     it:      1
-     use:     'asm'
-     typ:     133
-     styp:    None
-     statdir: /full/path/to/gsistat/files/
-set2:
-     name:    'MDCRS-OMA-t'
-     var:     't'
-     it:      3
-     use:     'asm'
-     typ:     133
-     styp:    None
-     statdir: /full/path/to/gsistat/files/
-figures:
-     pname:  'gsistat_profs.png'
-     tname:  'gsistat_trace.png'
-     tskip:  8
-dates:
-     dt_beg: '2020090800'
-     dt_end: '2020093018'
-     hrs: 6
+LWIR_figs:
+     set1:
+          name:    'LWIR-G17'
+          var:     'uv'
+          it:      1
+          use:     'asm'
+          typ:     245
+          styp:    '0271'
+          statdir: /path/to/set1/gsistat/files
+     set2:
+          name:    'LWIR-G18'
+          var:     'uv'
+          it:      1
+          use:     'asm'
+          typ:     245
+          styp:    '0272'
+          statdir: /path/to/set2/gsistat/files
+     figopts:
+          errProName:  gsistat_errprofs.png
+          errTraName:  gsistat_errtrace.png
+          penProName:  gsistat_penprofs.png
+          penTraName:  gsistat_pentrace.png
+          tskip:  16
+     dates:
+          dt_beg: '2022100100'
+          dt_end: '2022112018'
+          hrs: 6
+WVct_figs:
+     set1:
+          name:    'WVct-G17'
+          var:     'uv'
+          it:      1
+          use:     'asm'
+          typ:     246
+          styp:    '0271'
+          statdir: /path/to/set1/gsistat/files
+     set2:
+          name:    'WVct-G18'
+          var:     'uv'
+          it:      1
+          use:     'asm'
+          typ:     246
+          styp:    '0272'
+          statdir: /path/to/set2/gsistat/files
+     figopts:
+          errProName:  gsistat_errprofs_wvct.png
+          errTraName:  gsistat_errtrace_wvct.png
+          penProName:  None
+          penTraName:  None
+          tskip:  16
+     dates:
+          dt_beg: '2022100100'
+          dt_end: '2022112018'
+          hrs: 6
+
diff --git a/pyGSI/gsi_stat.py b/pyGSI/gsi_stat.py
index 9d86858..3f60a01 100644
--- a/pyGSI/gsi_stat.py
+++ b/pyGSI/gsi_stat.py
@@ -285,8 +285,8 @@ def _get_conv(self, name):
                 # don't add cpen or qcpen either
                 # careful here, cpen here also removes qcpen
                 # hence the extra space before qcpen and cpen
-                if any(x in line for x in [' qcpen', ' cpen']):
-                    continue
+                # if any(x in line for x in [' qcpen', ' cpen']):
+                #    continue
                 tmp.append(line.strip().split())
 
         columns = header.split()
diff --git a/scripts/plot_proftrace.py b/scripts/plot_proftrace.py
index 1c256b9..a601500 100644
--- a/scripts/plot_proftrace.py
+++ b/scripts/plot_proftrace.py
@@ -2,24 +2,67 @@
 #
 # YAML FILE CONTENTS
 #
-# yaml file contents come in three types, based on the key name:
+# yaml file contents come in several types, based on the key name:
 #
-# figures: Defines settings for profile and trace figures
-#    pname: Name of profile figure-file
-#    tname: Name of trace figure-file
+# fig_card_1:
+#      setdict1:
+#           <options>
+#      setdict2:
+#           <options>
+#      ...
+#      figopts:
+#           <options>
+#      dates:
+#           <options>
+# fig_card_2:
+#      setdict1:
+#           <options>
+#      setdict2:
+#           <options>
+#      ...
+#      figopts:
+#           <options>
+#      dates:
+#           <options>
+#
+# ...
+#
+# The top-level of keys are FIGURE CARDS, each of which will contain all
+# settings necessary to product a single profile and trace figure set. The
+# script will iterate through each figure card to produce a total set of
+# figures. You can set figure cards to produce profile and trace figures
+# for, for example, each ob-type you are interested in. This keeps the user
+# from having to edit a YAML multiple times to produce figures across multiple
+# settings options.
+#
+# Within each FIGURE CARD, you will find three kinds of keys:
+#
+# figopts: Defines setting options for profile and trace figures for figure
+# card
+#    <options>:
+#    errProName: Name of error (rms/bias) profile figure-file (None, if not
+#                plotting figure, defaults to None)
+#    errTraNname: Name of error (rms/bias) trace figure-file (None, if not
+#                plotting figure, defaults to None)
+#    penProName: Name of DA penalty (mean) profile figure-file (None, if not
+#                plotting figure, defaults to None)
+#    penTraNname: Name of DA penalty (mean) trace figure-file (None, if not
+#                plotting figure, defaults to None)
 #    tskip: Number of ticks that are skipped when plotting tick-labels on
 #           trace-figure (should be large enough to keep labels from
-#           stepping on each other)
+#           stepping on each other, defaults to 1, i.e. no skipping)
 #
 # dates: Defines settings for time-period over which statistics are
-#        collected and plotted
+#        collected and plotted for figure card
+#    <options>
 #    dt_beg: Beginning date of time-series (%Y%m%d%H format)
 #    dt_end: Ending date of time-series (%Y%m%d%H format)
 #    hrs: Number of hours between each time-period between dt_beg and
 #         dt_end (usually 6 for collecting every cycle)
 #
 # <any other key name>: Each member of the plot is provided as a separate
-# dictionary with the following keys:
+# SET DICTIONARY with the following keys:
+#    <options>:
 #    name: Name of member, used in the legend of the plot to distinguish
 #          lines or bars belonging to this set
 #    var: Variable being extracted ('uv', 't', or 'q')
@@ -68,17 +111,37 @@ def get_datelist(startdate, enddate, hrs):
     return datelist
 
 
-def parse_yaml(yaml_file):
+def parse_cards(yaml_file):
+    import yaml
+    # Parses top-level keys of YAML file (figure cards), returns list of keys
+    with open(yaml_file, 'r') as stream:
+        try:
+            parsed_yaml = yaml.safe_load(stream)
+        except yaml.YAMLError as YAMLError:
+            parsed_yaml = None
+            print(f'YAMLError: {YAMLError}')
+        if parsed_yaml is not None:
+            # Extract figure cards
+            try:
+                figureCards = list(parsed_yaml.keys())
+            except KeyError as MissingCardsError:
+                figureCards = None
+                print(f'MissingCardsError: {MissingCardsError}')
+            return figureCards
+
+
+def parse_yaml(yaml_file, card):
     import yaml
-    # YAML entries come in three types:
+    # YAML entries come in three types per FIGURE CARD provided:
     # key='dates' : provides beginning (dt_beg)/ending (dt_end) dates
     #               for get_datelist in %Y%m%d%H format, and
     #               timedelta (hrs)
-    # key='figures' : provides figure filenames for profile figure
-    #                 (pname) and trace figure (tname), and number of
+    # key='figopts' : provides figure filenames for profile figures
+    #                 (errProName, penProName) and trace figures
+    #                 (errTraName, penTraName), and number of
     #                 times to skip on tick-labels (tskip)
-    # any other key: provides filtering dictionaries for a dataset to
-    #                be plotted
+    # any other key: provides SET DICTIONARY for a dataset to
+    #                be plotted (data filters, data name, filedir)
     with open(yaml_file, 'r') as stream:
         try:
             parsed_yaml = yaml.safe_load(stream)
@@ -88,29 +151,55 @@ def parse_yaml(yaml_file):
         if parsed_yaml is not None:
             # Extract 'dates' data
             try:
-                dt_beg = parsed_yaml['dates']['dt_beg']
-                dt_end = parsed_yaml['dates']['dt_end']
-                hrs = parsed_yaml['dates']['hrs']
+                dt_beg = parsed_yaml[card]['dates']['dt_beg']
+                dt_end = parsed_yaml[card]['dates']['dt_end']
+                hrs = parsed_yaml[card]['dates']['hrs']
             except KeyError as MissingDatesError:
                 dt_beg = None
                 dt_end = None
                 hrs = None
                 print(f'MissingDatesError: {MissingDatesError}')
-            # Extract 'figures' data
+            # Extract 'figopts' data
             try:
-                pname = parsed_yaml['figures']['pname']
-                tname = parsed_yaml['figures']['tname']
-                tskip = parsed_yaml['figures']['tskip']
+                # For figure names, must account for 2 conditions for
+                # a None results:
+                #  1. key is 'None' (convert to None)
+                #  2. key does not exist
+                # Initialize assuming key does not exist, otherwise
+                # read key
+                errProName = None
+                errTraName = None
+                penProName = None
+                penTraName = None
+                if 'errProName' in parsed_yaml[card]['figopts']:
+                    errProName = None if \
+                      parsed_yaml[card]['figopts']['errProName'] == 'None' \
+                      else parsed_yaml[card]['figopts']['errProName']
+                if 'errTraName' in parsed_yaml[card]['figopts']:
+                    errTraName = None if \
+                      parsed_yaml[card]['figopts']['errTraName'] == 'None' \
+                      else parsed_yaml[card]['figopts']['errTraName']
+                if 'penProName' in parsed_yaml[card]['figopts']:
+                    penProName = None if \
+                      parsed_yaml[card]['figopts']['penProName'] == 'None' \
+                      else parsed_yaml[card]['figopts']['penProName']
+                if 'penTraName' in parsed_yaml[card]['figopts']:
+                    penTraName = None if \
+                      parsed_yaml[card]['figopts']['penTraName'] == 'None' \
+                      else parsed_yaml[card]['figopts']['penTraName']
+                tskip = parsed_yaml[card]['figopts']['tskip']
             except KeyError as MissingFiguresError:
-                pname = None
-                tname = None
+                errProName = None
+                errTraName = None
+                penProName = None
+                penTraName = None
                 tskip = None
-                print(f'MissingFiguresError: {MissingFiguresError}')
+                print(f'MissingFigoptsError: {MissingFigoptsError}')
             # Extract all other keys as filtering dictionaries, store
             # in setdict list
             setdict = []
-            fdicts = {x: parsed_yaml[x] for x in parsed_yaml
-                      if x not in {'dates', 'figures'}}
+            fdicts = {x: parsed_yaml[card][x] for x in parsed_yaml[card]
+                      if x not in {'dates', 'figopts'}}
             fkeys = ['it', 'use', 'typ', 'styp']
             # Check filter keys for 'None' and change to None as
             # appropriate before storing
@@ -123,24 +212,24 @@ def parse_yaml(yaml_file):
                     except KeyError as FilterKeyError:
                         print(f'FilterKeyError: {FilterKeyError}')
                 setdict.append(fdict)
-    return dt_beg, dt_end, hrs, pname, tname, tskip, setdict
+    return (dt_beg, dt_end, hrs, errProName, errTraName, penProName,
+            penTraName, tskip, setdict)
 
 
 def collect_statistics(setdict):
     ######################################################################
     #
-    # Collect rmse, bias, and ob-count statistics for each data (sub)set
-    # in setdict. Statistics are provided for each individual pressure-bin
-    # as well as a full-column value.
+    # Collect rmse, bias, penalty, and ob-count statistics for each data
+    # (sub)set in setdict. Statistics are provided for each individual
+    # pressure-bin as well as a full-column value.
     #
-    # If a gsistat file is missing for a given cycle, it raises a
-    # FileNotFoundError and the cycle is skipped, not appearing at all in
-    # the time-series
+    # If a gsistat file is missing for a given cycle, the cycle is
+    # skipped, not appearing at all in the time-series.
     #
     # If a gsistat file exists for a given cycle but no data for a data
-    # (sub)set is present, the rmse, bias, and ob-count are set to zero but
-    # the cycle is not skipped. Statistics with zero ob-count are
-    # reassigned to NaN later.
+    # (sub)set is present, the rmse, bias, penalty, and ob-count are set
+    # to zero but the cycle is not skipped. Statistics with zero ob-count
+    # are reassigned to NaN later.
     #
     # INPUTS:
     #    setdict: List of dictionaries for each subset, including name,
@@ -154,6 +243,7 @@ def collect_statistics(setdict):
     #           where a gsistat file exists
     #    biases: average difference between subset of obs and model
     #            for each cycle where a gsistat file exists
+    #    cpens: DA penalty per-observation among subset obs
     #    counts: number of subset obs  for each cycle where a gsistat
     #            file exists
     #    levels: pressure levels for each subset of obs
@@ -170,6 +260,7 @@ def collect_statistics(setdict):
     rmses = {}
     counts = {}
     biases = {}
+    cpens = {}
     levels = {}
     for sd in setdict:
         setname = sd['name']
@@ -177,17 +268,29 @@ def collect_statistics(setdict):
         rmses[setname] = {}
         counts[setname] = {}
         biases[setname] = {}
+        cpens[setname] = {}
         for cycle in cycles:
             rmses[setname][cycle] = {}
             counts[setname][cycle] = {}
             biases[setname][cycle] = {}
+            cpens[setname][cycle] = {}
             inputfile = os.path.join(statdir, 'gsistat.gdas.'+cycle)
-            try:
+            # Test if inputfile exists, if not, set all values
+            # for rmses/counts/cpens/biases to None for this setname
+            # and cycle for both 'lev' and 'col' values
+            if os.path.exists(inputfile):
                 gdas = GSIstat(inputfile, cycle)
-            except FileNotFoundError:
-                cycles.remove(cycle)
-                raise FileNotFoundError(f'Unable to find {inputfile}\
-                                        for {cycle}')
+            else:
+                rmses[setname][cycle]['lev'] = None
+                counts[setname][cycle]['lev'] = None
+                biases[setname][cycle]['lev'] = None
+                cpens[setname][cycle]['lev'] = None
+                rmses[setname][cycle]['col'] = None
+                counts[setname][cycle]['col'] = None
+                biases[setname][cycle]['col'] = None
+                cpens[setname][cycle]['col'] = None
+                # Skip rest of this for-loop entry
+                continue
             # statistics are drawn for a single variable
             var = sd['var']
             # all other filters (it,use,typ,styp) could contain multiple
@@ -228,8 +331,8 @@ def collect_statistics(setdict):
                               slice(None), slice(None), slice(None),
                               slice(None)), :]
                 # Filter to just the first observation type in the frame
-                # (index=[0:3] is count, rms, and bias of first entry)
-                s = s.iloc[0:3, :]
+                # (index=[0:4] is count, rms, cpen, and bias of first entry)
+                s = s.iloc[0:4, :]
                 # Zero-out all elements
                 s.loc[(slice(None), slice(None), slice(None),
                        slice(None), slice(None), slice(None),
@@ -242,8 +345,8 @@ def collect_statistics(setdict):
                               slice(None), slice(None), slice(None),
                               slice(None)), :]
                 # Filter to just the first observation type in the frame
-                # (index=[0:3] is count, rms, and bias of first entry)
-                s = s.iloc[0:3, :]
+                # (index=[0:4] is count, rms, cpen, and bias of first entry)
+                s = s.iloc[0:4, :]
                 # Zero-out all elements
                 s.loc[(slice(None), slice(None), slice(None),
                        slice(None), slice(None), slice(None),
@@ -267,6 +370,10 @@ def collect_statistics(setdict):
                                                    slice(None), slice(None),
                                                    slice(None), slice(None),
                                                    'bias'), s.columns[:-1]]
+            cpens[setname][cycle]['lev'] = s.loc[(slice(None), slice(None),
+                                                  slice(None), slice(None),
+                                                  slice(None), slice(None),
+                                                  'cpen'), s.columns[:-1]]
             rmses[setname][cycle]['col'] = s.loc[(slice(None), slice(None),
                                                   slice(None), slice(None),
                                                   slice(None), slice(None),
@@ -279,21 +386,26 @@ def collect_statistics(setdict):
                                                    slice(None), slice(None),
                                                    slice(None), slice(None),
                                                    'bias'), s.columns[-1]]
+            cpens[setname][cycle]['col'] = s.loc[(slice(None), slice(None),
+                                                  slice(None), slice(None),
+                                                  slice(None), slice(None),
+                                                  'cpen'), s.columns[-1]]
     # Return output dictionaries
-    return rmses, biases, counts, levels
+    return rmses, biases, cpens, counts, levels
 
 
-def aggregate_figure_data(rmses, biases, counts, levs):
+def aggregate_figure_data(rmses, biases, cpens, counts, levs):
     ######################################################################
     #
-    # Generate plotting data by aggregating rmse, bias, and count data
-    # across times (for profiles), or aggregate full-column data (for
+    # Generate plotting data by aggregating rmse, bias, penalty, and count
+    # data across times (for profiles), or aggregate full-column data (for
     # traces). Returned lists of profile and trace data serve as inputs to
     # plotting functions.
     #
     # INPUTS:
     #    rmses: Dictionary of rmse data for each subset
     #    biases: Dictionary of bias data for each subset
+    #    cpens: Dictionary of per-ob penalty data for each subset
     #    counts: Dictionary of ob-count data for each subset
     #    levs: Dictionary of pressure-bin level data for each subset
     #
@@ -302,6 +414,8 @@ def aggregate_figure_data(rmses, biases, counts, levs):
     #    rmse_trace: List of rmse time-series traces to plot
     #    bias_profs: List of bias profiles to plot
     #    bias_trace: List of bias time-series traces to plot
+    #    cpen_profs: List of cpen profiles to plot
+    #    cpen_trace: List of cpen time-series traces to plot
     #    nobs_profs: List of ob-count profiles to plot
     #    nobs_trace: List of ob-count time-series traces to plot
     #    levl_profs: List of pressure-bin levels for profiles
@@ -319,6 +433,8 @@ def aggregate_figure_data(rmses, biases, counts, levs):
     rmse_trace = []
     bias_profs = []
     bias_trace = []
+    cpen_profs = []
+    cpen_trace = []
     nobs_profs = []
     nobs_trace = []
     levl_profs = []
@@ -332,37 +448,49 @@ def aggregate_figure_data(rmses, biases, counts, levs):
         nlev = np.size(plevs)
         rmse = np.zeros((nlev, ))
         bias = np.zeros((nlev, ))
+        cpen = np.zeros((nlev, ))
         nobs = np.zeros((nlev, ))
         dates = list(rmses[setname].keys())
         date_trace.append(dates)
         ndate = len(dates)
-        r_trace = np.zeros((ndate, ))
-        b_trace = np.zeros((ndate, ))
+        r_trace = np.zeros((ndate, ))  # rms
+        b_trace = np.zeros((ndate, ))  # bias
+        p_trace = np.zeros((ndate, ))  # cpen
         n_trace = np.zeros((ndate, ))
         for i in range(ndate):
             date = dates[i]
-            n_ele = np.size(rmses[setname][date]['col'].values)
+            # If rmses[setname][date]['col] is NoneType, set n_ele=0
+            if rmses[setname][date]['col'] is None:
+                n_ele = 0
+            # If not NoneType, define n_ele by values in dataframe
+            else:
+                n_ele = np.size(rmses[setname][date]['col'].values)
             for j in range(n_ele):
-                # Aggregate rms, bias, and count for full-column values at each
-                # time to define traces
+                # Aggregate rms, bias, cpen, and count for full-column
+                # values at each time to define traces
                 rcol = rmses[setname][date]['col'].values[j]
                 bcol = biases[setname][date]['col'].values[j]
+                pcol = cpens[setname][date]['col'].values[j]
                 ncol = counts[setname][date]['col'].values[j]
                 ncol2 = n_trace[i] + ncol
                 if ncol2 > 0:
                     rcol2 = np.sqrt((n_trace[i] * r_trace[i] ** 2. + ncol *
                                      rcol ** 2.) / ncol2)
                     bcol2 = (n_trace[i] * b_trace[i] + ncol * bcol) / ncol2
+                    pcol2 = (n_trace[i] * p_trace[i] + ncol * pcol) / ncol2
                 else:
                     rcol2 = 0.
                     bcol2 = 0.
+                    pcol2 = 0.
                 r_trace[i] = rcol2
                 b_trace[i] = bcol2
+                p_trace[i] = pcol2
                 n_trace[i] = ncol2
-                # Aggregate rms, bias, and count at each level across all times
-                # to define profiles
-                r = rmses[setname][date]['lev'].values[j]
-                b = biases[setname][date]['lev'].values[j]
+                # Aggregate rms, bias, cpen, and count at each level across
+                # all times to define profiles
+                r = rmses[setname][date]['lev'].values[j]   # rms
+                b = biases[setname][date]['lev'].values[j]  # bias
+                p = cpens[setname][date]['lev'].values[j]   # cpen
                 c = counts[setname][date]['lev'].values[j]
                 for k in range(nlev):
                     c2 = nobs[k] + c[k]
@@ -370,28 +498,36 @@ def aggregate_figure_data(rmses, biases, counts, levs):
                         r2 = np.sqrt((nobs[k] * rmse[k] ** 2. +
                                      c[k] * r[k] ** 2.) / c2)
                         b2 = (nobs[k] * bias[k] + c[k] * b[k]) / c2
+                        p2 = (nobs[k] * cpen[k] + c[k] * p[k]) / c2
                     else:
                         r2 = 0.
                         b2 = 0.
+                        p2 = 0.
                     rmse[k] = r2
                     bias[k] = b2
+                    cpen[k] = p2
                     nobs[k] = c2
         # Any score with 0 obs should be changed to NaN before entering into
         # record
         rmse[nobs == 0] = np.nan
         bias[nobs == 0] = np.nan
+        cpen[nobs == 0] = np.nan
         r_trace[n_trace == 0] = np.nan
         b_trace[n_trace == 0] = np.nan
+        p_trace[n_trace == 0] = np.nan
         # Append profile and trace data to output lists
         rmse_trace.append(r_trace)
         bias_trace.append(b_trace)
+        cpen_trace.append(p_trace)
         nobs_trace.append(n_trace)
         rmse_profs.append(rmse)
         bias_profs.append(bias)
+        cpen_profs.append(cpen)
         nobs_profs.append(nobs)
     # Return output lists
-    return (rmse_profs, rmse_trace, bias_profs, bias_trace, nobs_profs,
-            nobs_trace, levl_profs, date_trace, name_list)
+    return (rmse_profs, rmse_trace, bias_profs, bias_trace, cpen_profs,
+            cpen_trace, nobs_profs, nobs_trace, levl_profs, date_trace,
+            name_list)
 
 
 def plot_stat_profiles(rmsList, biasList, countList, nameList, plevList,
@@ -521,6 +657,123 @@ def plot_stat_profiles(rmsList, biasList, countList, nameList, plevList,
     return plt.gcf()
 
 
+def plot_cpen_profiles(penList, countList, nameList, plevList,
+                       colMap=['tab10', 10]):
+    ######################################################################
+    # Generates 2-panel plot:
+    #    Left: Profiles of penalty for each set
+    #    Right: Horizontal bar-chart of ob-counts for each set
+    #
+    # INPUTS
+    #    penList: list of numpy arrays (nlev,) of cpen by pressure level
+    #             for each set
+    #    countList: list of numpy arrays (nlev,) of ob-count by pressure
+    #               level for each set
+    #    nameList: list of names for each set (for figure legend)
+    #    plevList: list of profile pressure levels (str or float)
+    #    colMap: 2-element list containing colormap and colormap-range for
+    #            panels (default: ['tab10',10]))
+    # OUTPUTS
+    #    plotFig: plot figure, as plt.fig()
+    #
+    # DEPENDENCIES
+    # numpy
+    # matplotlib.rc
+    # matplotlib.pyplot
+    # matplotlib.cm
+    ######################################################################
+    import numpy as np
+    import matplotlib
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
+    #
+    # Set font size and type
+    #
+    font = {'family': 'DejaVu Sans',
+            'weight': 'bold',
+            'size': 22}
+
+    matplotlib.rc('font', **font)
+    #
+    # Identify number of profile sets (should be indentical for penList,
+    # countList)
+    #
+    n_profiles = len(penList)
+    #
+    # Define colormap: The default settings are to select a range of 10 on
+    # 'tab10', so that 10 penalty profiles can be produced for
+    # the left panel that correspond to 10 ob-counts on the right panel.
+    # The user can select a different colormap and range with colMapLeft
+    # and colMapRight options.
+    # If you want to sample the entire colorbar, set the range to the
+    # number of datasets plotted.
+    #
+    # scalarMapList is used to select colors for each profile/bar
+    scalarMap = cm.ScalarMappable(cmap=colMap[0])
+    scalarMapList = scalarMap.to_rgba(range(colMap[1]))
+    #
+    # Generate figure
+    plt.figure(figsize=(18, 18))
+    # Define offset values (needed to define y-axis limits on both plots,
+    # for consistency)
+    y_offset = 8.0*(np.arange(n_profiles)-np.floor(n_profiles/2))
+    # LEFT PANEL: penalty scores
+    plt.subplot(121)
+    # For each set, plot penalty: plot lines and circular markers
+    legend_list = []
+    for i in range(n_profiles):
+        pen = penList[i]
+        levs = plevList[i]
+        n_levs = np.size(levs)
+        # Define y-axis limits
+        y_min = min(levs)+min(y_offset)-8.0
+        y_max = max(levs)+max(y_offset)+8.0
+        # pen profile
+        legend_list.append(nameList[i]+' pen')
+        prof_color = list(scalarMapList[i][0:3])
+        plt.plot(pen, levs, color=prof_color, linewidth=3)
+        plt.plot(pen, levs, 'o', color=prof_color, markersize=8,
+                 label='_nolegend_')
+    # Zero-line
+    plt.plot(np.zeros((n_levs, )), levs, color='k', linewidth=1,
+             linestyle='dashed', label='_nolegend_')
+    # Set y-limits to entire range
+    plt.ylim((y_min, y_max))
+    plt.yticks(levs)
+    # Reverse y-axis, if levs is in descending-order (often the case with
+    # pressure coordinate data)
+    if (levs[1] < levs[0]):
+        plt.gca().invert_yaxis()
+    # Set legend
+    plt.legend(legend_list, frameon=False, fontsize=10)
+    # Set x-label
+    plt.xlabel('Penalty')
+    # RIGHT PANEL: ob-counts
+    plt.subplot(122)
+    # For each set, plot ob-counts and generate legend list
+    # Counts are asserted to be in thousands
+    legend_list = []
+    for i in range(n_profiles):
+        count = 0.001*countList[i]
+        bar_color = list(scalarMapList[i][0:3])
+        plt.barh(levs+y_offset[i], count, height=8.0, color=bar_color)
+    # Set y-limits to entire range
+    plt.ylim((y_min, y_max))
+    plt.yticks(levs)
+    # Reverse y-axis, if levs is in descending-order (often the case with
+    # pressure coordinate data)
+    if (levs[1] < levs[0]):
+        plt.gca().invert_yaxis()
+    # Set legend
+    plt.legend(nameList, frameon=False, fontsize=10)
+    # Set x-label
+    plt.xlabel('Ob Count (Thousands)')
+    # Turn off interactive-mode to suppress plotting figure
+    plt.ioff()
+    # Return
+    return plt.gcf()
+
+
 def plot_stat_traces(rmsList, biasList, countList, nameList, dateList,
                      tskip=4, colMap=['tab10', 10]):
     ######################################################################
@@ -650,55 +903,200 @@ def plot_stat_traces(rmsList, biasList, countList, nameList, dateList,
     return plt.gcf()
 
 
-if __name__ == "__main__":
+def plot_cpen_traces(penList, countList, nameList, dateList,
+                     tskip=4, colMap=['tab10', 10]):
     ######################################################################
-    import matplotlib.pyplot as plt
-    #
-    # Parse yaml file and define settings for plotting
+    # Generates 2-panel plot:
+    #    Top: Trace of penalty for each set
+    #    Bottom: Bar-chart of ob-counts for each set
     #
-    (dt_beg, dt_end, hrs, pname, tname, tskip,
-     setdict) = parse_yaml('proftrace_yaml.yaml')
-    cycles = get_datelist(dt_beg, dt_end, hrs)
-    profs_filename = pname
-    trace_filename = tname
+    # INPUTS
+    #    penList: list of numpy arrays (nlev,) of cpen by pressure level
+    #             for each set
+    #    countList: list of numpy arrays (nlev,) of ob-count by pressure
+    #               level for each set
+    #    nameList: list of names for each set (for figure legend)
+    #    dateList: list of dates (str in '%Y%m%d%H' format)
+    #    tskip: number of ticks to skip when writing tick-labels (default: 4)
+    #    colMap: 2-element list containing colormap and colormap-range for
+    #            panels (default: ['tab10',10]))
+    # OUTPUTS
+    #    plotFig: plot figure, as plt.fig()
     #
+    # DEPENDENCIES
+    # numpy
+    # datetime
+    # matplotlib.rc
+    # matplotlib.pyplot
+    # matplotlib.cm
     ######################################################################
+    import numpy as np
+    from datetime import datetime
+    import matplotlib
+    import matplotlib.pyplot as plt
+    import matplotlib.cm as cm
     #
-    # Collect rmse, bias, and ob-count statistics for each data (sub)set
-    # in setdict
-    #
-    # If a gsistat file is missing for a given cycle, it raises a
-    # FileNotFoundError and the cycle is skipped, not appearing at all in
-    # the time-series
+    # Set font size and type
     #
-    # If a gsistat file exists for a given cycle but no data for a data
-    # (sub)set is present, the rmse, bias, and ob-count are set to zero
-    # but the cycle is not skipped. Statistics with zero ob-count are
-    # reassigned to NaN later.
+    font = {'family': 'DejaVu Sans',
+            'weight': 'bold',
+            'size': 22}
+
+    matplotlib.rc('font', **font)
     #
-    rmses, biases, counts, levels = collect_statistics(setdict)
-    ######################################################################
+    # Identify number of trace sets (should be indentical for penList,
+    # countList)
     #
-    # Generate plotting data by aggregating rmse, bias, and count data
-    # across times (for profiles), or aggregate full-column data (for
-    # traces)
+    n_trace = len(penList)
     #
-    (rmse_profs, rmse_trace, bias_profs, bias_trace,
-     nobs_profs, nobs_trace, levl_profs, date_trace,
-     name_list) = aggregate_figure_data(rmses, biases, counts, levels)
-    ######################################################################
+    # Define colormap: The default settings are to select a range of 10 on
+    # 'tab10', so that 10 penalty profiles can be produced for
+    # the left panel that correspond to 10 ob-counts on the right panel.
+    # The user can select a different colormap and range with colMapLeft
+    # and colMapRight options. If you want to sample the entire colorbar,
+    # set the range to the number of datasets plotted.
     #
-    # Generate profile plot
+    # scalarMapList is used to select colors for each profile/bar
+    scalarMap = cm.ScalarMappable(cmap=colMap[0])
+    scalarMapList = scalarMap.to_rgba(range(colMap[1]))
     #
-    fig_prof = plot_stat_profiles(rmse_profs, bias_profs, nobs_profs,
-                                  name_list, levl_profs)
+    # Generate figure
+    plt.figure(figsize=(18, 18))
+    # Define offset values (needed to define x-axis limits on both plots,
+    # for consistency)
+    offs = 0.8/n_trace
+    x_offset = offs*(np.arange(n_trace)-np.floor(n_trace/2))
+    # TOP PANEL: penalty traces
+    plt.subplot(211)
+    # For each set, plot penalty: plot both lines and circular markers
+    legend_list = []
+    for i in range(n_trace):
+        pen = penList[i]
+        dates = dateList[i]
+        # Convert dates from %Y%m%d%H format to %b%d:%HZ tick-label format
+        dstr = []
+        for d in dates:
+            dt = datetime.strptime(d, '%Y%m%d%H')
+            dstr.append(datetime.strftime(dt, '%b%d:%HZ'))
+        n_dates = np.size(dates)
+        # Define x-axis limits
+        x_min = min(x_offset)-offs
+        x_max = n_dates+max(x_offset)+offs
+        x_rng = np.arange(1, n_dates+1.0E-05)
+        # pen trace
+        legend_list.append(nameList[i]+' pen')
+        prof_color = list(scalarMapList[i][0:3])
+        plt.plot(x_rng, pen, color=prof_color, linewidth=3)
+        plt.plot(x_rng, pen, 'o', color=prof_color, markersize=8,
+                 label='_nolegend_')
+    # Zero-line
+    plt.plot(x_rng, np.zeros((n_dates, )), color='k', linewidth=1,
+             linestyle='dashed', label='_nolegend_')
+    # Set x-limits to entire range
+    plt.xlim((x_min, x_max))
+    plt.xticks(ticks=x_rng[::tskip], labels=dstr[::tskip], fontsize=10)
+    # Set legend
+    plt.legend(legend_list, frameon=False, fontsize=10)
+    # Set y-label
+    plt.ylabel('Penalty')
+    # BOTTOM PANEL: ob-counts
+    plt.subplot(212)
+    # For this plot, bars need to be offset from each other so that they
+    # all cluster around the pressure
+    # level. This is accomplished with an offset value added to each bar
+    legend_list = []
+    for i in range(n_trace):
+        count = 0.001*countList[i]
+        bar_color = list(scalarMapList[i][0:3])
+        plt.bar(x_rng+x_offset[i], count, width=offs, color=bar_color)
+    # Set x-limits to entire range
+    plt.xlim((x_min, x_max))
+    plt.xticks(ticks=x_rng[::tskip], labels=dstr[::tskip], fontsize=10)
+    # Set legend
+    plt.legend(nameList, frameon=False, fontsize=10)
+    # Set y-label
+    plt.ylabel('Ob Count (Thousands)')
+    # Turn off interactive-mode to suppress plotting figure
     plt.ioff()
-    fig_prof.savefig(profs_filename, bbox_inches='tight', facecolor='w')
+    # Return
+    return plt.gcf()
+
+
+if __name__ == "__main__":
+    ######################################################################
+    import matplotlib.pyplot as plt
     #
-    # Generate trace plot
+    # Parse figure cards from yaml file
     #
-    fig_trace = plot_stat_traces(rmse_trace, bias_trace, nobs_trace,
-                                 name_list, date_trace, tskip=tskip)
-    plt.ioff()
-    fig_trace.savefig(trace_filename, bbox_inches='tight', facecolor='w')
-    ######################################################################
+    yamlFile = 'proftrace_yaml.yaml'
+    figCards = parse_cards(yamlFile)
+    if figCards is not None:
+        for card in figCards:
+            print('Processing Figure Card: ', card)
+            # Parse yaml file for card and define settings for plotting
+            (dt_beg, dt_end, hrs, errProName, errTraName, penProName,
+             penTraName, tskip, setdict) = parse_yaml(yamlFile, card)
+            cycles = get_datelist(dt_beg, dt_end, hrs)
+            #
+            ##############################################################
+            #
+            # Collect rmse, bias, penalty, and ob-count statistics for
+            # each data (sub)set in setdict
+            #
+            # If a gsistat file is missing for a given cycle, the cycle is
+            # skipped, not appearing at all in the time-series
+            #
+            # If a gsistat file exists for a given cycle but no data for a
+            # data (sub)set is present, the rmse, bias, penalty, and
+            # ob-count are set to zero but the cycle is not skipped.
+            # Statistics with zero ob-count are reassigned to NaN later.
+            #
+            rmses, biases, cpens, counts, levels = collect_statistics(setdict)
+            ##############################################################
+            #
+            # Generate plotting data by aggregating rmse, bias, penalty,
+            # and count data across times (for profiles), or aggregate
+            # full-column data (for traces)
+            #
+            (rmse_profs, rmse_trace, bias_profs, bias_trace,
+             cpen_profs, cpen_trace, nobs_profs, nobs_trace, levl_profs,
+             date_trace, name_list) = aggregate_figure_data(rmses, biases,
+                                                            cpens, counts,
+                                                            levels)
+            ##############################################################
+            #
+            # Generate profile plots
+            #
+            if errProName is not None:
+                fig_prof = plot_stat_profiles(rmse_profs, bias_profs,
+                                              nobs_profs, name_list,
+                                              levl_profs)
+                plt.ioff()
+                fig_prof.savefig(errProName, bbox_inches='tight',
+                                 facecolor='w')
+            if penProName is not None:
+                fig_prof = plot_cpen_profiles(cpen_profs, nobs_profs,
+                                              name_list, levl_profs)
+                plt.ioff()
+                fig_prof.savefig(penProName, bbox_inches='tight',
+                                 facecolor='w')
+            #
+            # Generate trace plots
+            #
+            if errTraName is not None:
+                fig_trace = plot_stat_traces(rmse_trace, bias_trace,
+                                             nobs_trace, name_list,
+                                             date_trace, tskip=tskip)
+                plt.ioff()
+                fig_trace.savefig(errTraName, bbox_inches='tight',
+                                  facecolor='w')
+            if penTraName is not None:
+                fig_trace = plot_cpen_traces(cpen_trace, nobs_trace,
+                                             name_list, date_trace,
+                                             tskip=tskip)
+                plt.ioff()
+                fig_trace.savefig(penTraName, bbox_inches='tight',
+                                  facecolor='w')
+            ##############################################################
+    else:
+        print('No Figure Cards Found, Exiting...')