From 12fa2277076b6ac8d3e9bb51986432cfc8936697 Mon Sep 17 00:00:00 2001 From: Darin Comeau Date: Tue, 3 Oct 2023 17:59:48 -0500 Subject: [PATCH] Adding CICE-QC testing script and documentation --- .../mpas-seaice/testing/cice-qc/README.md | 25 + .../testing/cice-qc/job_script.cice-qc.anvil | 20 + .../cice-qc/job_script.cice-qc.chrysalis | 20 + .../testing/cice-qc/mpas-seaice.t-test.py | 747 ++++++++++++++++++ 4 files changed, 812 insertions(+) create mode 100644 components/mpas-seaice/testing/cice-qc/README.md create mode 100644 components/mpas-seaice/testing/cice-qc/job_script.cice-qc.anvil create mode 100644 components/mpas-seaice/testing/cice-qc/job_script.cice-qc.chrysalis create mode 100644 components/mpas-seaice/testing/cice-qc/mpas-seaice.t-test.py diff --git a/components/mpas-seaice/testing/cice-qc/README.md b/components/mpas-seaice/testing/cice-qc/README.md new file mode 100644 index 000000000000..989deee96787 --- /dev/null +++ b/components/mpas-seaice/testing/cice-qc/README.md @@ -0,0 +1,25 @@ +# Quality Control (QC) testing for MPAS-Seaice + +This testing script to determine if the answers are climate-changing as determined by a 2 stage t-test was adapted from [CICE](https://cice-consortium-cice.readthedocs.io/en/main/user_guide/ug_testing.html#code-validation-test-non-bit-for-bit-validation). + +## Setup + +1. Setup and build a baseline and test case with recommended minimum resolution `RES=TL319_EC30to60E2r2`, and `COMPSET=DTESTM-JRA1p5`. +2. Turn on the ice thickness analysis member with the following namelist options in `user_nl_mpassi`: + + config_AM_thicknesses_enable = .true. + config_AM_thicknesses_compute_on_startup = .true. + config_AM_thicknesses_write_on_startup = .true. + +3. In the run directory's `streams.seaice` file, modify the `output` stream to have daily snapshot output by setting `output_interval="00-00-01_00:00:00"` (default is "none"). Copy this modified `streams.seaice` file to the case directory in `SourceMods/src.mpassi/.`. +4. Run each case for (at least) 5 years. + +## Usage + + python mpas-seaice.t-test.py $BASE $TEST + +where `$BASE`, `$TEST` are the paths to the run directories of the two tests containing the `mpassi.hist.*` files. A sample batch script for Chrysalis and Anvil is provided, `job_script.cice-qc.anvil`, `job_script.cice-qc.chrysalis`. + +## Output + +A `qc_log.txt` file provides testing progress and results. Map *png files are also generated (currently produces blank output, support still needed). \ No newline at end of file diff --git a/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.anvil b/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.anvil new file mode 100644 index 000000000000..5ef27fdfb341 --- /dev/null +++ b/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.anvil @@ -0,0 +1,20 @@ +#!/bin/bash -l +#SBATCH --nodes=1 +#SBATCH --time=1:00:00 +#SBATCH -A condo +#SBATCH -p acme-small +#SBATCH --job-name=cice-qc +#SBATCH --output=log-cice-qc.o%j +#SBATCH --error=log-cice-qc.e%j + +cd $SLURM_SUBMIT_DIR +export OMP_NUM_THREADS=1 + +source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_anvil.sh + +export HDF5_USE_FILE_LOCKING=FALSE + +export BASE=/lcrc/group/acme/ac.dcomeau/scratch/chrys/20221218.DMPAS-JRA1p5.TL319_EC30to60E2r2.chrysalis.column-package.intel/run +export TEST=/lcrc/group/acme/ac.dcomeau/scratch/chrys/20221218.DMPAS-JRA1p5.TL319_EC30to60E2r2.chrysalis.icepack.intel/run + +srun -N 1 -n 1 python mpas-seaice.t-test.py $BASE $TEST diff --git a/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.chrysalis b/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.chrysalis new file mode 100644 index 000000000000..57a110b7a71e --- /dev/null +++ b/components/mpas-seaice/testing/cice-qc/job_script.cice-qc.chrysalis @@ -0,0 +1,20 @@ +#!/bin/bash -l +#SBATCH --nodes=1 +#SBATCH --time=1:00:00 +#SBATCH -A e3sm +#SBATCH -p compute +#SBATCH --job-name=cice-qc +#SBATCH --output=log-cice-qc.o%j +#SBATCH --error=log-cice-qc.e%j + +cd $SLURM_SUBMIT_DIR +export OMP_NUM_THREADS=1 + +source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_chrysalis.sh + +export HDF5_USE_FILE_LOCKING=FALSE + +export BASE=/lcrc/group/acme/ac.dcomeau/scratch/chrys/20221218.DMPAS-JRA1p5.TL319_EC30to60E2r2.chrysalis.column-package.intel/run +export TEST=/lcrc/group/acme/ac.dcomeau/scratch/chrys/20221218.DMPAS-JRA1p5.TL319_EC30to60E2r2.chrysalis.icepack.intel/run + +srun -N 1 -n 1 python mpas-seaice.t-test.py $BASE $TEST diff --git a/components/mpas-seaice/testing/cice-qc/mpas-seaice.t-test.py b/components/mpas-seaice/testing/cice-qc/mpas-seaice.t-test.py new file mode 100644 index 000000000000..e3e16a25edca --- /dev/null +++ b/components/mpas-seaice/testing/cice-qc/mpas-seaice.t-test.py @@ -0,0 +1,747 @@ +#!/usr/bin/env python +''' +This script performs the t-test validation for non-bit-for-bit results for the +CICE model. + +Written by: Matthew Turner +Date: October, 2017 + +Adapted for MPAS-Seaice +Darin Comeau +September 2022 +''' + +import os +import sys +import logging +import glob +import numpy as np +import numpy.ma as ma +import netCDF4 as nc + +import re +import fnmatch + +def maenumerate(marr): + ''' + This function provides the enumerate functionality for masked arrays + ''' + mask = ~marr.mask.ravel() + try: # Python 2 + import itertools + for i, m in itertools.izip(np.ndindex(marr.shape[-2:]), mask): + if m: yield i + except: # Python 3 + for i, m in zip(np.ndindex(marr.shape[-2:]), mask): + if m: yield i + +def gen_filenames(base_dir, test_dir): + ''' + This function is passed the directories of the baseline and test history + files and generates a list of filenames for each. + ''' + # The path to output files for simulation 'a' (the '-bc' simulation) + if base_dir.endswith(('run', 'run/')): + path_a = base_dir + else: + path_a = base_dir + '/run/' + + # The path to output files for simulation 'b' (the test simulation) + if test_dir.endswith(('run', 'run/')): + path_b = test_dir + else: + path_b = test_dir + '/run/' + + # Find the number of output files to be read in + files_a = fnmatch.filter(os.listdir(path_a+'/'), '*mpassi.hist.000*') + files_b = fnmatch.filter(os.listdir(path_b+'/'), '*mpassi.hist.000*') + + + if not len(files_a) == len(files_b): + logger.error("Number of output files for baseline simulation does not match the number" + \ + " of files for the test simulation. Exiting...\n" + \ + "Baseline directory: {}\n".format(path_a) + \ + " # of files: {}\n".format(len(files_a)) + \ + "Test directory: {}\n".format(path_b) + \ + " # of files: {}".format(len(files_b))) + sys.exit(-1) + + if len(files_a) < 60: + logger.error("Number of output files too small, expecting at least 60." + \ + " Exiting...\n" + \ + "Baseline directory: {}\n".format(path_a) + \ + " # of files: {}\n".format(len(files_a)) + \ + "Test directory: {}\n".format(path_b) + \ + " # of files: {}".format(len(files_b))) + sys.exit(-1) + + logger.info("Number of files: %d", len(files_a)) + + return path_a, path_b, files_a, files_b + +def get_geom(path, file): + ''' + This function reads the latCell, lonCell variables from a netcdf file + ''' + fid = nc.Dataset("{}/{}".format(path, file), 'r') + latCell = fid.variables['latCell'][:] + lonCell = fid.variables['lonCell'][:] + fid.close() + + return latCell, lonCell + +def read_data(path_a, path_b, files_a, files_b): + ''' + Read the baseline and test data for sea ice thickness. The calculate + the difference for all locations where sea ice thickness is greater + than 0.01 meters. + ''' + def fill_data_array(path, files): + '''Function to fill the data arrays''' + # # Initialize the data array + data = [] + cnt = 0 + logging.info("Reading data") + for fname in sorted(files): + nfid = nc.Dataset("{}/{}".format(path, fname), 'r') + tmp = nfid.variables[var][:][:] + if cnt == 0: + data = tmp + else: + data = np.append(data, tmp, axis=0) + cnt += 1 + nfid.close() + + return data # (nDays, nCells) + + def calc_diff(data_a, data_b): + ''' + Calculate the difference and mask the points where the the difference at + every timestep is 0, or the sea ice thickness for every timestep is < 0.01 meters + for data_a or data_b + ''' + logging.info("Diffing data") + data_d = data_a - data_b + mask_d = np.logical_or(\ + np.logical_or(\ + np.all(np.equal(data_d, 0.), axis=0), np.all(data_a < 0.01, axis=0))\ + , np.all(data_b < 0.01, axis=0)) + mask_array_a = np.zeros_like(data_d) + + for x, value in np.ndenumerate(mask_d): + iCell = x + mask_array_a[:, iCell] = value + del mask_d + + data_a = ma.masked_array(data_a, mask=mask_array_a) + data_b = ma.masked_array(data_b, mask=mask_array_a) + data_d = ma.masked_array(data_d, mask=mask_array_a) + + del mask_array_a + + return data_a, data_b, data_d + + var = 'iceThicknessCell' + + data_a = fill_data_array(path_a, files_a) + data_b = fill_data_array(path_b, files_b) + + data_a, data_b, data_d = calc_diff(data_a, data_b) + + return data_a, data_b, data_d + +def two_stage_test(data_a, num_samp, data_d, fname, path): + ''' + This function performs the Two-Stage Paired Thickness Test + ''' + def stage_one(data_d, num_samp, mean_d, variance_d): + logging.info("Testing data") + logger.debug('Running step 1 of 2-stage test') + + # Calculate the mean from 1:end-1 and 2:end + mean_nm1_d = np.mean(data_d[:-1, :], axis=0) + mean_2n_d = np.mean(data_d[1:, :], axis=0) + + # Calculate equation (5) for both simulations + r1_num = np.zeros_like(mean_d) + r1_den1 = np.zeros_like(mean_d) + r1_den2 = np.zeros_like(mean_d) + for i in np.arange(np.size(data_a, axis=0)-1): + r1_num = r1_num + (data_d[i, :]-mean_nm1_d[:])*(data_d[i+1, :]-mean_2n_d[:]) + r1_den1 = r1_den1 + np.square(data_d[i, :]-mean_nm1_d[:]) + + for i in np.arange(1, np.size(data_a, axis=0)): + r1_den2 = r1_den2 + np.square(data_d[i, :] - mean_2n_d[:]) + + r1 = r1_num / np.sqrt(r1_den1*r1_den2) + + # Calculate the effective sample size + n_eff = num_samp * ((1.-r1) / (1.+r1)) + n_eff[n_eff < 2] = 2 + n_eff[n_eff > num_samp] = num_samp + + # Calculate the t-statistic with n_eff + t_val = mean_d / np.sqrt(variance_d / n_eff) + + # Effective degrees of freedom + df = n_eff - 1 + + # Read in t_crit table + if os.path.exists('/lcrc/group/e3sm/public_html/inputdata/ice/mpas-seaice/general/CICE_t_critical_p0.8.nc'): + nfid = nc.Dataset("/lcrc/group/e3sm/public_html/inputdata/ice/mpas-seaice/general/CICE_t_critical_p0.8.nc", 'r') + else: + logger.error("Cannot locate file CICE_t_critical_p0.8.nc") + df_table = nfid.variables['df'][:] + t_crit_table = nfid.variables['tcrit'][:] + nfid.close() + t_crit = np.zeros_like(t_val) + + # Calculate critical t-value for each grid cell, based on the t_crit table + data_d = np.squeeze(data_d[:,0]) + for x in maenumerate(data_d): + min_val = np.min(np.abs(df[x]-df_table)) + idx = np.where(np.abs(df[x]-df_table) == min_val) + # Handle the cases where the data point falls exactly half way between + # 2 critical T-values (i.e., idx has more than 1 value in it) + while True: + try: + idx = idx[0] + except: + break + t_crit[x] = t_crit_table[idx] + + # Create an array of Pass / Fail values for each grid cell + H1 = np.abs(t_val) > t_crit + + return n_eff, H1, r1, t_crit + + # Calculate the mean of the difference + mean_d = np.mean(data_d, axis=0) + + # Loop through each timestep and calculate the square of the difference. + # This is required (instead of just np.square(data_d - mean_d) to reduce + # the memory footprint of the script. + tmp1 = np.zeros_like(data_d) + for i in np.arange(np.shape(data_d)[0]): + tmp1[i, :] = np.square(data_d[i, :] - mean_d) + variance_d = np.sum(tmp1) / float(num_samp - 1) + + n_eff, H1, r1, t_crit = stage_one(data_d, num_samp, mean_d, variance_d) + + if np.all(H1 == False) and np.all(n_eff >= 30): + # H0 confirmed in all cells, and all effective sample size >= 30 + logger.debug('H0 confirmed in all cells, and all effective sample size >= 30') + logger.info('2 stage test passed') + return True, H1 + + elif np.all(H1): + # H1 in every grid cell + logger.debug('H1 in all cells') + logger.info('2 stage test failed') + return False, H1 + +########### H0 confirmed for some grid cells with n_eff < 30 ############ + logger.debug('Number of H1 grid cells after stage 1 = %d', np.sum(H1)) + + logger.debug('Running step 2 of 2-stage test') + + # Find the indices where n_eff is less than 30, and H0 is confirmed + tmp_idx = np.where(n_eff < 30) and np.where(H1 == False) + + # Calculate the T-statistic using actual sample size + t_val = mean_d / np.sqrt(variance_d / num_samp) + + # Find t_crit from the nearest value on the Lookup Table Test + if os.path.exists('/lcrc/group/e3sm/public_html/inputdata/ice/mpas-seaice/general/CICE_Lookup_Table_p0.8_n1825.nc'): + nfid = nc.Dataset("/lcrc/group/e3sm/public_html/inputdata/ice/mpas-seaice/general/CICE_Lookup_Table_p0.8_n1825.nc", 'r') + else: + logger.error("Cannot locate file CICE_Lookup_Table_p0.8_n1825.nc") + r1_table = nfid.variables['r1'][:] + t_crit_table = nfid.variables['tcrit'][:] + nfid.close() + + # Fill t_crit based on lookup table + data_d = np.squeeze(data_d[:,0]) + for x in maenumerate(data_d): + min_val = np.min(np.abs(r1[x]-r1_table)) + idx = np.where(np.abs(r1[x]-r1_table) == min_val) + # Handle the cases where the data point falls exactly half way between + # 2 critical T-values (i.e., idx has more than 1 value in it) + while True: + try: + idx = idx[0] + except: + break + t_crit[x] = t_crit_table[idx] + + # Create an array showing locations of Pass / Fail grid cells + H1[tmp_idx] = abs(t_val[tmp_idx]) > t_crit[tmp_idx] + + logger.debug('Number of H1 grid cells after stage 2 = %d', np.sum(H1)) + + if np.all(H1): + # H1 in all grid cells + logger.debug('H1 in all cells, stage 2') + logger.info('2 Stage Test Failed') + return False, H1 + + elif np.all(H1 == False): + # H0 confirmed in all grid cells + logger.debug('H0 confirmed in all cells with n_eff < 30') + logger.info('2 Stage Test Passed') + return True, H1 + +####### Some grid cells have H0 confirmed, and some do not ###### + + # Calculate the area-weighted fraction of the test region that failed (f_val). + # If f_val is greater than or equal to the critical fraction, the test fails" + f_val = critical_fraction(data_a, H1, fname, path) + f_crit = 0.5 + if f_val >= f_crit: + logger.info('2 Stage Test Failed') + logger.debug('Area-weighted fraction of failures is greater than ' + \ + 'critical fraction. Test failed.') + logger.debug('Area-weighted fraction of failures = %f', f_val) + return False, H1 + else: + logger.info('2 Stage Test Passed') + logger.debug('Area-weighted fraction of failures = %f', f_val) + return True, H1 + +def critical_fraction(data_a, failures, fname, path_a): + ''' + This function calculates the area-weighted average of cells where H1 is true. + ''' + logger.debug('Calculating area-weighted average of H1 cells') + # First calculate the weight attributed to each grid point (based on Area) + nfid = nc.Dataset("{}/{}".format(path_a, fname), 'r') + tarea = nfid.variables['areaCell'][:] + nfid.close() + tarea = ma.masked_array(tarea, mask=data_a[0, :].mask) + area_weight = tarea / np.sum(tarea) + + # Calculate the area weight of the failing grid cells + weight_tot = 0 + weight_fail = 0 + data_a = np.squeeze(data_a[:,0]) + for x in maenumerate(data_a): + weight_tot += area_weight[x] + if failures[x]: + weight_fail += area_weight[x] + + return weight_fail/weight_tot + +def skill_test(path_a, fname, data_a, data_b, num_samp, hemisphere): + '''Calculate Taylor Skill Score''' + # First calculate the weight attributed to each grid point (based on Area) + nfid = nc.Dataset("{}/{}".format(path_a, fname), 'r') + tarea = nfid.variables['areaCell'][:] + nfid.close() + tarea = ma.masked_array(tarea, mask=data_a[0, :].mask) + area_weight = tarea / np.sum(tarea) + + weighted_mean_a = 0 + weighted_mean_b = 0 + for i in np.arange(num_samp): + weighted_mean_a = weighted_mean_a + np.sum(area_weight*data_a[i, :]) + weighted_mean_b = weighted_mean_b + np.sum(area_weight*data_b[i, :]) + + weighted_mean_a = weighted_mean_a / num_samp + weighted_mean_b = weighted_mean_b / num_samp + + nonzero_weights = np.count_nonzero(area_weight) + area_var_a = 0 + area_var_b = 0 + for t in np.arange(num_samp): + area_var_a = area_var_a + np.sum(area_weight*np.square(data_a[t, :]-weighted_mean_a)) + area_var_b = area_var_b + np.sum(area_weight*np.square(data_b[t, :]-weighted_mean_b)) + + area_var_a = nonzero_weights / (num_samp * nonzero_weights - 1.) * area_var_a + area_var_b = nonzero_weights / (num_samp * nonzero_weights - 1.) * area_var_b + std_a = np.sqrt(area_var_a) + std_b = np.sqrt(area_var_b) + + combined_cov = 0 + for i in np.arange(num_samp): + combined_cov = combined_cov + np.sum(area_weight*(data_a[i, :]-weighted_mean_a)*\ + (data_b[i, :]-weighted_mean_b)) + + combined_cov = nonzero_weights / (num_samp * nonzero_weights - 1.) * combined_cov + + weighted_r = combined_cov / (std_a*std_b) + + s = np.square((1+weighted_r)*(std_a*std_b)/\ + (area_var_a + area_var_b)) + + logger.debug('%s Hemisphere skill score = %f', hemisphere, s) + + s_crit = 0.99 + if s < 0 or s > 1: + logger.error('Skill score out of range for %s Hemisphere', hemisphere) + return False + elif s > s_crit: + logger.info('Quadratic Skill Test Passed for %s Hemisphere', hemisphere) + return True + else: + logger.info('Quadratic Skill Test Failed for %s Hemisphere', hemisphere) + return False + +### DC plot_data not yet working, produces empty plots, needs updating + +def plot_data(data, lat, lon, units, case, plot_type): + '''This function plots MPAS-Seaice data and creates a .png file.''' + + try: + # Load the necessary plotting libraries + import matplotlib.pyplot as plt + import cartopy.crs as ccrs + import cartopy.feature as cfeature + except ImportError: + logger.warning('Error loading necessary Python modules in plot_data function') + return + + # Suppress Matplotlib deprecation warnings + import warnings + warnings.filterwarnings("ignore", category=UserWarning) + + # define north and south polar stereographic coord ref system + npstereo = ccrs.NorthPolarStereo(central_longitude=-90.0) # define projection + spstereo = ccrs.SouthPolarStereo(central_longitude= 90.0) # define projection + + # define figure + fig = plt.figure(figsize=[14,7]) + + # add axis for each hemishpere + ax1 = fig.add_subplot(121,projection=npstereo) + ax2 = fig.add_subplot(122,projection=spstereo) + + # set plot extents + ax1.set_extent([-180.,180.,35.,90.],ccrs.PlateCarree()) + ax2.set_extent([-180.,180.,-90.,-35.],ccrs.PlateCarree()) + + # add land features NH plot + ax1.add_feature(cfeature.LAND, color='lightgray') + ax1.add_feature(cfeature.BORDERS) + ax1.add_feature(cfeature.COASTLINE) + + # add land features SH plot + ax2.add_feature(cfeature.LAND, color='lightgray') + ax2.add_feature(cfeature.BORDERS) + ax2.add_feature(cfeature.COASTLINE) + + # add grid lines + dlon = 30.0 + dlat = 15.0 + mpLons = np.arange(-180. ,180.0+dlon,dlon) + mpLats = np.arange(-90.,90.0+dlat ,dlat) + + g1 = ax1.gridlines(xlocs=mpLons,ylocs=mpLats, + draw_labels=True, + x_inline=False,y_inline=False) + + g2 = ax2.gridlines(xlocs=mpLons,ylocs=mpLats, + draw_labels=True, + x_inline=False,y_inline=False) + + + # Specify Min/max colors for each hemisphere + # check for minus to see if it is a difference plot + if '\n- ' in case: # this is a difference plot + # specify colormap + mycmap = 'seismic' # blue,white,red with white centered colormap + + # determine max absolute value to use for color range + # intent is use same min/max with center zero + dmin = np.abs(data.min()) + dmax = np.abs(data.max()) + clim = np.max([dmin,dmax]) + + # this specifies both hemishperes the same range. + cminNH = -clim + cmaxNH = clim + cminSH = -clim + cmaxSH = clim + + else: # not a difference plot + # specify colormap + mycmap = 'RdBu' + + # arbitrary limits for each Hemishpere + cminNH = 0.0 + cmaxNH = 5.0 + cminSH = 0.0 + cmaxSH = 2.0 + + if plot_type == 'scatter': + # plot NH + scNH = ax1.scatter(lon,lat,c=data,cmap=mycmap,s=4,edgecolors='none', + vmin=cminNH, vmax=cmaxNH, + transform=ccrs.PlateCarree()) + + # plot SH + scSH = ax2.scatter(lon,lat,c=data,cmap=mycmap,s=4,edgecolors='none', + vmin=cminSH, vmax=cmaxSH, + transform=ccrs.PlateCarree()) + + else: + if plot_type == 'contour': + print("contour plot depreciated. using pcolor.") + + scNH = ax1.pcolormesh(lon,lat,data,cmap=mycmap, + vmin=cminNH, vmax=cmaxNH, + transform=ccrs.PlateCarree()) + + scSH = ax2.pcolormesh(lon,lat,data,cmap=mycmap, + vmin=cminSH, vmax=cmaxSH, + transform=ccrs.PlateCarree()) + + plt.suptitle(f'MPAS-Seaice Mean Ice Thickness\n{case:s}') + + # add more whitespace between plots for colorbar. + plt.subplots_adjust(wspace=0.4) + + # add separate axes for colorbars + # first get position/size of current axes + pos1 = ax1.get_position() + pos2 = ax2.get_position() + + # now add new colormap axes using the position ax1, ax2 as reference + cax1 = fig.add_axes([pos1.x0+pos1.width+0.03, + pos1.y0, + 0.02, + pos1.height]) + + cax2 = fig.add_axes([pos2.x0+pos2.width+0.03, + pos2.y0, + 0.02, + pos2.height]) + + + if '\n- ' in case: + # If making a difference plot, use scientific notation for colorbar + cbNH = plt.colorbar(scNH, cax=cax1, orientation="vertical", + pad=0.1, format="%.1e") + cbSH = plt.colorbar(scSH, cax=cax2, orientation="vertical", + pad=0.1, format="%.1e") + + else: + #pass + # If plotting non-difference data, do not use scientific notation for colorbar + cbNH = plt.colorbar(scNH, cax=cax1, orientation="vertical", + pad=0.1, format="%.2f") + cbSH = plt.colorbar(scSH, cax=cax2, orientation="vertical", + pad=0.1, format="%.2f") + + cbNH.set_label(units, loc='center') + cbSH.set_label(units, loc='center') + + outfile = 'ice_thickness_{}.png'.format(case.replace('\n- ','_minus_')) + logger.info('Creating map of the data ({})'.format(outfile)) + plt.savefig(outfile, dpi=300, bbox_inches='tight') + +def plot_two_stage_failures(data, lat, lon): + '''This function plots each grid cell and whether or not it Passed or Failed + the two-stage test. It then either creates a .png file + (two_stage_test_failure_map.png), or saves the failure locations to a + text file. + ''' + + # Convert the boolean array (data) to an integer array + int_data = data.astype(int) + + try: + logger.info('Creating map of the failures (two_stage_test_failure_map.png)') + # Load the necessary plotting libraries + import matplotlib.pyplot as plt + import cartopy.crs as ccrs + import cartopy.feature as cfeature + from mpl_toolkits.axes_grid1 import make_axes_locatable + from matplotlib.colors import LinearSegmentedColormap + + # Suppress Matplotlib deprecation warnings + import warnings + warnings.filterwarnings("ignore", category=UserWarning) + + # Create the figure + fig = plt.figure(figsize=(12, 8)) + + # define plot projection and create axis + pltprj = ccrs.Mollweide(central_longitude=0.0) + ax = fig.add_subplot(111,projection=pltprj) + + # add land + ax.add_feature(cfeature.LAND, color='lightgray') + ax.add_feature(cfeature.BORDERS) + ax.add_feature(cfeature.COASTLINE) + #gshhs = cfeature.GSHHSFeature(scale='auto',facecolor='lightgray',edgecolor='none') + #ax.add_feature(gshhs) + + # Create the custom colormap + colors = [(0, 0, 1), (1, 0, 0)] # Blue, Red + cmap_name = 'RB_2bins' + cm = LinearSegmentedColormap.from_list(cmap_name, colors, N=2) + + # Plot the data as a scatter plot + sc = ax.scatter(lon,lat,c=int_data,cmap=cm,s=4,lw=0, + vmin=0.,vmax=1., + transform=ccrs.PlateCarree()) + + # add grid lines + dlon = 60.0 + dlat = 30.0 + mpLons = np.arange(-180. ,180.0+dlon,dlon) + mpLats = np.arange(-90.,90.0+dlat ,dlat) + mpLabels = {"left": "y", + "bottom": "x"} + + ax.gridlines(xlocs=mpLons,ylocs=mpLats, + draw_labels=mpLabels) + + plt.title('MPAS-Seaice Two-Stage Test Failures') + + # Create the colorbar and add Pass / Fail labels + divider = make_axes_locatable(ax) + cax = divider.append_axes("bottom", size="5%", pad=0.5) + cb = plt.colorbar(sc, cax=cax, orientation="horizontal", format="%.0f") + cb.set_ticks([]) + cb.ax.text(-0.01, -0.5, 'PASS') + cb.ax.text(0.99, -0.5, 'FAIL') + + plt.savefig('two_stage_test_failure_map.png', dpi=300) + except: + logger.warning('') + logger.warning('Unable to plot the data. Saving latitude and longitude') + logger.warning('for ONLY failures to two_stage_failure_locations.txt') + + # Create a file and write the failures only to the file + f = open('two_stage_failure_locations.txt', 'w') + f.write('# CICE Two-stage test failures\n') + f.write('# Longitude,Latitude\n') + for i in range(data.shape[0]): + for j in range(data.shape[1]): + if (not data.mask[i, j]) and data[i, j]: + f.write('{},{}\n'.format(lon[i, j], lat[i, j])) + + f.close() + +def main(): + import argparse + parser = argparse.ArgumentParser(description='This script performs the T-test for \ + CICE simulations that should be bit-for-bit, but are not.') + parser.add_argument('base_dir', \ + help='Path to the baseline history (iceh_inst*) files. REQUIRED') + parser.add_argument('test_dir', \ + help='Path to the test history (iceh_inst*) files. REQUIRED') + parser.add_argument('-v', '--verbose', dest='verbose', help='Print debug output?', \ + action='store_true') + parser.add_argument('-pt','--plot_type', dest='plot_type', help='Specify type of plot \ + to create', choices=['scatter','contour','pcolor']) + + parser.set_defaults(verbose=False) + parser.set_defaults(plot_type='pcolor') + + # If no arguments are provided, print the help message + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + + # Set up the logger + global logger + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + # Log to log file as well as stdout + fh = logging.FileHandler(r'qc_log.txt', 'w') + logger = logging.getLogger(__name__) + logger.addHandler(fh) + + logger.info('Running QC test on the following directories:') + logger.info(' {}'.format(args.base_dir)) + logger.info(' {}'.format(args.test_dir)) + + dir_a, dir_b, files_base, files_test = gen_filenames(args.base_dir, args.test_dir) + + t_lat, t_lon = get_geom(dir_a, files_base[0]) + + data_base, data_test, data_diff = read_data(dir_a, dir_b, files_base, files_test) + + num_samp, nCells = np.shape(data_base) + + if np.ma.all(data_diff.mask): + logger.info("Data is bit-for-bit. No need to run QC test") + sys.exit(0) + + # Run the two-stage test + PASSED, H1_array = two_stage_test(data_base, num_samp, data_diff, files_base[0], dir_a) + + # Delete arrays that are no longer necessary + del data_diff + + # If test failed, attempt to create a plot of the failure locations + if not PASSED: + plot_two_stage_failures(H1_array, t_lat, t_lon) + + # # Create plots of mean ice thickness + baseDir = os.path.abspath(args.base_dir).rstrip('run/').rstrip(\ + 'run').split('/')[-1] + testDir = os.path.abspath(args.test_dir).rstrip('run/').rstrip( \ + 'run').split('/')[-1] + plot_data(np.mean(data_base,axis=0), t_lat, t_lon, 'm', baseDir, args.plot_type) + plot_data(np.mean(data_test,axis=0), t_lat, t_lon, 'm', testDir, args.plot_type) + plot_data(np.mean(data_base-data_test,axis=0), t_lat, t_lon, 'm', '{}\n- {}'.\ + format(baseDir,testDir), args.plot_type) + + logger.error('Quality Control Test FAILED') + sys.exit(-1) + + # Create a northern hemisphere and southern hemisphere mask + mask_tlat = t_lat < 0 + mask_nh = np.zeros_like(data_base) + mask_sh = np.zeros_like(data_base) + for x, val in np.ndenumerate(mask_tlat): + mask_nh[:, x] = val + mask_sh[:, x] = not val + + # Run skill test on northern hemisphere + data_nh_a = ma.masked_array(data_base, mask=mask_nh) + data_nh_b = ma.masked_array(data_test, mask=mask_nh) + if np.ma.all(data_nh_a.mask) and np.ma.all(data_nh_b.mask): + logger.info("Northern Hemisphere data is bit-for-bit") + PASSED_NH = True + else: + PASSED_NH = skill_test(dir_a, files_base[0], data_nh_a, data_nh_b, num_samp, 'Northern') + + # Run skill test on southern hemisphere + data_sh_a = ma.masked_array(data_base, mask=mask_sh) + data_sh_b = ma.masked_array(data_test, mask=mask_sh) + if np.ma.all(data_sh_a.mask) and np.ma.all(data_sh_b.mask): + logger.info("Southern Hemisphere data is bit-for-bit") + PASSED_SH = True + else: + PASSED_SH = skill_test(dir_a, files_base[0], data_sh_a, data_sh_b, num_samp, 'Southern') + + PASSED_SKILL = PASSED_NH and PASSED_SH + + # Plot the ice thickness data for the base and test cases + baseDir = os.path.abspath(args.base_dir).rstrip('run/').rstrip( \ + 'run').split('/')[-1] + testDir = os.path.abspath(args.test_dir).rstrip('run/').rstrip( \ + 'run').split('/')[-1] + plot_data(np.mean(data_base,axis=0), t_lat, t_lon, 'm', baseDir, args.plot_type) + plot_data(np.mean(data_test,axis=0), t_lat, t_lon, 'm', testDir, args.plot_type) + plot_data(np.mean(data_base-data_test,axis=0), t_lat, t_lon, 'm', '{}\n- {}'.\ + format(baseDir,testDir), args.plot_type) + + logger.info('') + if not PASSED_SKILL: + logger.error('Quality Control Test FAILED') + sys.exit(1) # exit with an error return code + else: + logger.info('Quality Control Test PASSED') + sys.exit(0) # exit with successfull return code + +if __name__ == "__main__": + main()