From 93239ffec2fec336176e531d2d486a29e6199163 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Thu, 21 Oct 2021 22:42:37 +1100
Subject: [PATCH 01/23] Sketch out prototype

---
 mesmer/prototype/calibrate.py          | 148 +++++++++++++++++++++++++
 mesmer/prototype/calibrate_multiple.py |  25 +++++
 2 files changed, 173 insertions(+)
 create mode 100644 mesmer/prototype/calibrate.py
 create mode 100644 mesmer/prototype/calibrate_multiple.py

diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
new file mode 100644
index 00000000..8d8075e6
--- /dev/null
+++ b/mesmer/prototype/calibrate.py
@@ -0,0 +1,148 @@
+import abc
+
+
+class MesmerCalibrateBase(metaclass=abc.ABCMeta):
+    """
+    Abstract base class for calibration
+    """
+
+    # then we could build up utils, whether they were static methods of the
+    # base class or their own utils module doesn't really matter...
+    # an example is below, but we could have others to handle grouping, adding
+    # dimension information back to numpy arrays etc.
+    @staticmethod
+    def _flatten(target, predictor):
+        """
+        Flatten xarray dataarrays into basic numpy arrays
+
+        Parameters
+        ----------
+        target : xarray.DataArray
+
+        predictor : xarray.DataArray
+
+        Returns
+        -------
+        (np.ndarray, np.ndarray, dict)
+            Flattened target, flattened predictor and info required to convert
+            back into xarray if desired
+        """
+
+    @staticmethod
+    def _get_calibration_groups(target, groups):
+        return target.groupby(groups)
+
+
+class MesmerCalibrateTargetOnly(MesmerCalibrateBase):
+    @abc.abstractmethod
+    def calibrate(self, predictor, **kwargs):
+        """
+        Calibrate a model
+
+        Parameters
+        ----------
+        predictor : xarray.DataArray
+            Predictor data array
+
+        **kwargs
+            Passed onto the calibration method
+
+        Returns
+        -------
+        xarray.DataArray
+            Fitting coefficients (have to think about how best to label and
+            store)
+        """
+
+class MesmerCalibrateTargetPredictor(MesmerCalibrateBase):
+    @abc.abstractmethod
+    def calibrate(self, predictor, target, **kwargs):
+        """
+        Calibrate a model
+
+        Parameters
+        ----------
+        predictor : xarray.DataArray
+            Predictor data array
+
+        target : xarray.DataArray
+            Target data
+
+        **kwargs
+            Passed onto the calibration method
+
+        Returns
+        -------
+        xarray.DataArray
+            Fitting coefficients (have to think about how best to label and
+            store)
+        """
+
+
+class LinearRegression(MesmerCalibrateTargetPredictor):
+    """
+    following
+
+    https://github.com/MESMER-group/mesmer/blob/d73e8f521a2e1d081a48b775ba14dd764cb671e8/mesmer/calibrate_mesmer/train_lt.py#L165
+
+    All the lines above and below that line are basically just data
+    preparation, which makes it very hard to see what the model actually is
+    """
+    @staticmethod
+    def _regress_single_point(target_point, predictor, weights=None):
+        # this is the method that actually does the regression
+        args = [predictor, target_point]
+        if weights is not None:
+            args.append(weights)
+
+        reg = LinearRegression().fit(*args)
+
+        return reg
+
+    def calibrate(self, target, predictor, weights=None, groups=["gridpoint"]):
+        # this method handles the pre-processing to get the data ready for the
+        # regression, does the regression (or calls another method to do so),
+        # then puts it all back into a DataArray for output
+
+        # would need to be smarter here too to handle multiple predictors and/
+        # or target variables but that should be doable
+        res = {
+            "intercepts": [],
+            "coef": [],
+        }
+        for target_group in self._get_calibration_groups(target, groups):
+            res_group = target_group.apply(self._regress_single_point, predictor, weights)
+            # need to be smarter about this, but idea is to store things using
+            # xarray DataArray
+            res["intercepts"].append(res_group.intercept_)
+            res["coef"].append(res_group.coef_)
+
+        res = xr.DataArray(res)
+        return res
+
+
+class AutoRegression(MesmerCalibrateTargetOnly):
+    def _regress_single(target, lags):
+        res = AutoReg(target, lags=lags).fit()
+
+        return res
+
+    def calibrate(self, target, groups=["gridpoint"], lags=1):
+        # would need to be smarter here too to handle multiple predictors and/
+        # or target variables but that should be doable
+        res = {
+            "intercepts": [],
+            "coef": [],
+            "stds": [],
+        }
+        for target_group in self._get_calibration_groups(target, groups):
+            res_group = target_group.apply(self._regress_single, lags)
+            # need to be smarter about this, but idea is to store things using
+            # xarray DataArray
+            res["intercepts"].append(res_group.params[0])
+            res["coef"].append(res_group.params[1])
+            res["stds"].append(np.sqrt(res_group.sigma2))
+
+        res = xr.DataArray(res)
+
+        return res
diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
new file mode 100644
index 00000000..103d454c
--- /dev/null
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -0,0 +1,25 @@
+def calibrate_multiple_scenarios_and_ensemble_members(targets, predictors, calibration_class, calibration_kwargs, weighting_style):
+    """
+    Calibrate based on multiple scenarios and ensemble members per scenario
+
+    Parameters
+    ----------
+    targets : xarray.DataArray
+        Target variables to calibrate to. This should have a scenario and an ensemble-member dimension (you'd need a different function to prepare things such that the dimensions all worked)
+
+    predictors : xarray.DataArray
+        Predictor variables, as above re comments about prepping data
+
+    calibration_class : mesmer.calibration.MesmerCalibrateBase
+        Class to use for calibration
+
+    calibration_kwargs : dict
+        Keyword arguments to pass to the calibration class
+
+    weighting_style : ["equal", "equal_scenario", "equal_ensemble_member"]
+        How to weight combinations of scenarios and ensemble members. "equal" --> all scenario - ensemble member combination are treated equally. "equal_scenario" --> all scenarios get equal weight (so if a scenario has more ensemble members then each ensemble member gets less weight). "equal_ensemble_member" --> all ensemble members get the same weight (so if an ensemble member is reported for more than one scenario then each scenario gets less weight for that ensemble member)
+    """
+    # this function (or class) would handle all of the looping over scenarios
+    # and ensemble members. It would call the calibration class on each
+    # scenario and ensemble member and know how much weight to give each
+    # before combining them

From 9ae897f43c8a048825597412f0fdc4b134b65846 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Sun, 24 Oct 2021 17:47:31 +1100
Subject: [PATCH 02/23] Mark path which is never used to simply things

---
 mesmer/calibrate_mesmer/train_lt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mesmer/calibrate_mesmer/train_lt.py b/mesmer/calibrate_mesmer/train_lt.py
index 13ae4e93..9c57317f 100644
--- a/mesmer/calibrate_mesmer/train_lt.py
+++ b/mesmer/calibrate_mesmer/train_lt.py
@@ -222,5 +222,5 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
 
     if len(params_lv) > 0:
         return params_lt, params_lv
-    else:
-        return params_lt
+
+    raise NotImplementedError("This path is never used")

From 35b5fed8b08d85ff17dcea0b7f9d892134bd2272 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Sun, 24 Oct 2021 17:54:28 +1100
Subject: [PATCH 03/23] Block out another branch

---
 mesmer/calibrate_mesmer/train_lt.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mesmer/calibrate_mesmer/train_lt.py b/mesmer/calibrate_mesmer/train_lt.py
index 9c57317f..41ea51d6 100644
--- a/mesmer/calibrate_mesmer/train_lt.py
+++ b/mesmer/calibrate_mesmer/train_lt.py
@@ -182,6 +182,9 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
                         ]
                         coef_idx += 1
 
+    else:
+        raise NotImplementedError()
+
     # save the local trend paramters if requested
     if save_params:
         dir_mesmer_params = cfg.dir_mesmer_params

From 92f1e4ec681d20d4cf51b13e97d3bf85e9bcd3c2 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Sun, 24 Oct 2021 19:46:34 +1100
Subject: [PATCH 04/23] Implemente prototype method

---
 mesmer/calibrate_mesmer/train_lt.py |   2 +-
 mesmer/prototype/calibrate.py       |  80 ++++++++-----
 tests/integration/test_prototype.py | 168 ++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+), 28 deletions(-)
 create mode 100644 tests/integration/test_prototype.py

diff --git a/mesmer/calibrate_mesmer/train_lt.py b/mesmer/calibrate_mesmer/train_lt.py
index 41ea51d6..31654139 100644
--- a/mesmer/calibrate_mesmer/train_lt.py
+++ b/mesmer/calibrate_mesmer/train_lt.py
@@ -84,13 +84,13 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
           assumption is fulfilled or rewrite code such that no longer necessary)
 
     """
-
     targ_names = list(targs.keys())
     targ_name = targ_names[0]  # because same approach for each targ
     pred_names = list(preds.keys())
 
     # specify necessary variables from config file
     wgt_scen_tr_eq = cfg.wgt_scen_tr_eq
+    # This code will ever only work with a single target variable
     method_lt = cfg.methods[targ_name]["lt"]
     method_lv = cfg.methods[targ_name]["lv"]
     method_lt_each_gp_sep = cfg.method_lt_each_gp_sep
diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index 8d8075e6..a64ab2a5 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -1,37 +1,54 @@
 import abc
 
+import numpy as np
+import sklearn.linear_model
+import xarray as xr
+
 
 class MesmerCalibrateBase(metaclass=abc.ABCMeta):
     """
     Abstract base class for calibration
     """
+    def _flatten_predictors(self, predictors, flat_dim_order):
+        predictors_flat = []
+        predictor_order = []
+        for v, vals in predictors.items():
+            predictor_order.append(v)
+            predictors_flat.append(self._flatten(vals, flat_dim_order))
 
-    # then we could build up utils, whether they were static methods of the
-    # base class or their own utils module doesn't really matter...
-    # an example is below, but we could have others to handle grouping, adding
-    # dimension information back to numpy arrays etc.
-    @staticmethod
-    def _flatten(target, predictor):
-        """
-        Flatten xarray dataarrays into basic numpy arrays
+        predictors_flat = np.vstack(predictors_flat).T
 
-        Parameters
-        ----------
-        target : xarray.DataArray
+        return predictors_flat, predictor_order
 
-        predictor : xarray.DataArray
+    @staticmethod
+    def _flatten(values, flat_dim_order, dropna=True):
+        out = values.transpose(*flat_dim_order).values.flatten()
+        # TODO: better handling of nan
+        # Probably a better solution is to pass all the flattening up a level
+        # i.e. should be done before calibration is attempted/have a function
+        # to handle flattening scenarios
+        if dropna:
+            out = out[np.where(~np.isnan(out))]
 
-        Returns
-        -------
-        (np.ndarray, np.ndarray, dict)
-            Flattened target, flattened predictor and info required to convert
-            back into xarray if desired
-        """
+        return out
 
     @staticmethod
     def _get_calibration_groups(target, groups):
         return target.groupby(groups)
 
+    @staticmethod
+    def _unflatten_calibrated_values(res, dim_names, coords):
+        res_flat = np.vstack(list(res.values()))
+        coords = {
+            **coords,
+            "predictor": list(res.keys())
+        }
+
+        out = xr.DataArray(res_flat, dims=["predictor"] + dim_names, coords=coords)
+
+        return out
+
+
 
 class MesmerCalibrateTargetOnly(MesmerCalibrateBase):
     @abc.abstractmethod
@@ -95,11 +112,11 @@ def _regress_single_point(target_point, predictor, weights=None):
         if weights is not None:
             args.append(weights)
 
-        reg = LinearRegression().fit(*args)
+        reg = sklearn.linear_model.LinearRegression().fit(*args)
 
         return reg
 
-    def calibrate(self, target, predictor, weights=None, groups=["gridpoint"]):
+    def calibrate(self, target, predictors, weights=None, groups="gridpoint"):
         # this method handles the pre-processing to get the data ready for the
         # regression, does the regression (or calls another method to do so),
         # then puts it all back into a DataArray for output
@@ -107,17 +124,26 @@ def calibrate(self, target, predictor, weights=None, groups=["gridpoint"]):
         # would need to be smarter here too to handle multiple predictors and/
         # or target variables but that should be doable
         res = {
-            "intercepts": [],
-            "coef": [],
+            **{k: [] for k in predictors},
+            "intercept": [],
         }
-        for target_group in self._get_calibration_groups(target, groups):
-            res_group = target_group.apply(self._regress_single_point, predictor, weights)
+
+        flat_dim_order = [d for d in target.dims if d != groups]
+
+        predictor_numpy, predictor_order = self._flatten_predictors(predictors, flat_dim_order)
+        for group_val, target_group in self._get_calibration_groups(target, groups):
+            target_group_flat = self._flatten(target_group, flat_dim_order)
+
+            res_group = self._regress_single_point(target_group_flat, predictor_numpy, weights)
+
             # need to be smarter about this, but idea is to store things using
             # xarray DataArray
-            res["intercepts"].append(res_group.intercept_)
-            res["coef"].append(res_group.coef_)
+            res["intercept"].append(res_group.intercept_)
+            for i, p in enumerate(predictor_order):
+                res[p].append(res_group.coef_[i])
+
+        res = self._unflatten_calibrated_values(res, dim_names=[groups], coords={groups: getattr(target, groups)})
 
-        res = xr.DataArray(res)
         return res
 
 
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
new file mode 100644
index 00000000..5c059420
--- /dev/null
+++ b/tests/integration/test_prototype.py
@@ -0,0 +1,168 @@
+import numpy as np
+import numpy.testing as npt
+import xarray as xr
+
+from mesmer.calibrate_mesmer.train_lt import train_lt
+from mesmer.prototype.calibrate import LinearRegression
+
+
+class _MockConfig:
+    def __init__(
+        self,
+        method_lt="OLS",
+        method_lv="OLS_AR1_sci",
+        separate_gridpoints=True,
+        weight_scenarios_equally=True,
+        target_variable="tas",
+    ):
+        self.methods = {}
+        self.methods[target_variable] = {}
+        self.methods[target_variable]["lt"] = method_lt
+        self.methods[target_variable]["lv"] = method_lv
+        self.method_lt_each_gp_sep = separate_gridpoints
+        self.wgt_scen_tr_eq = weight_scenarios_equally
+
+
+def _do_legacy_run(
+    emulator_tas,
+    emulator_tas_squared,
+    emulator_hfds,
+    global_variability,
+    esm_tas,
+    cfg,
+):
+    preds_legacy = {}
+    for name, vals in (
+        ("gttas", emulator_tas),
+        ("gttas2", emulator_tas_squared),
+        ("gthfds", emulator_hfds),
+        ("gvtas", global_variability),
+    ):
+        preds_legacy[name] = {}
+        for scenario, vals_scen in vals.groupby("scenario"):
+            # we have to force this to have an extra dimension, run, for legacy
+            # to work although this really isn't how it should be because it
+            # means you have some weird reshaping to do at a really low level
+            preds_legacy[name][scenario] = vals_scen.dropna(dim="time").values[np.newaxis, :]
+
+
+    targs_legacy = {}
+    for name, vals in (
+        ("tas", esm_tas),
+    ):
+        targs_legacy[name] = {}
+        for scenario, vals_scen in vals.groupby("scenario"):
+            # we have to force this to have an extra dimension, run, for legacy
+            # to work although this really isn't how it should be
+            # order of dimensions is very important for legacy too
+            targs_legacy[name][scenario] = vals_scen.T.dropna(dim="time").values[np.newaxis, :, :]
+
+
+    res_legacy = train_lt(
+        preds_legacy,
+        targs_legacy,
+        esm="esm_name",
+        cfg=cfg,
+        save_params=False,
+    )
+
+    return res_legacy
+
+
+def test_prototype_train_lt(
+):
+    time = [1850, 1950, 2014, 2015, 2050, 2100, 2300]
+    scenarios = ["hist", "ssp126"]
+
+    pred_dims = ["scenario", "time"]
+    pred_coords = dict(
+        time=time,
+        scenario=scenarios,
+    )
+
+    emulator_tas = xr.DataArray(
+        np.array([
+            [0, 0.5, 1, np.nan, np.nan, np.nan, np.nan],
+            [np.nan, np.nan, np.nan, 1.1, 1.5, 1.4, 1.2],
+        ]),
+        dims=pred_dims,
+        coords=pred_coords,
+    )
+    emulator_tas_squared = emulator_tas ** 2
+    global_variability = xr.DataArray(
+        np.array([
+            [-0.1, 0.1, 0.03, np.nan, np.nan, np.nan, np.nan],
+            [np.nan, np.nan, np.nan, 0.04, 0.2, -0.03, 0.0],
+        ]),
+        dims=pred_dims,
+        coords=pred_coords,
+    )
+    emulator_hfds = xr.DataArray(
+        np.array([
+            [0.5, 1.5, 2.0, np.nan, np.nan, np.nan, np.nan],
+            [np.nan, np.nan, np.nan, 2.1, 2.0, 1.5, 0.4],
+        ]),
+        dims=pred_dims,
+        coords=pred_coords,
+    )
+
+    # we wouldn't actually start like this, but we'd write a utils function
+    # to simply go from lat-lon to gridpoint and back
+    targ_dims = ["scenario", "gridpoint", "time"]
+    targ_coords = dict(
+        time=time,
+        scenario=scenarios,
+        gridpoint=[0, 1],
+        lat=(["gridpoint"], [-60, 60]),
+        lon=(["gridpoint"], [120, 240]),
+    )
+    esm_tas = xr.DataArray(
+        np.array([
+            [
+                [0.6, 1.6, 2.6, np.nan, np.nan, np.nan, np.nan],
+                [0.43, 1.13, 2.21, np.nan, np.nan, np.nan, np.nan],
+            ],
+            [
+                [np.nan, np.nan, np.nan, 2.11, 2.01, 1.54, 1.22],
+                [np.nan, np.nan, np.nan, 2.19, 2.04, 1.53, 1.21],
+            ]
+        ]),
+        dims=targ_dims,
+        coords=targ_coords,
+    )
+
+    res_legacy = _do_legacy_run(
+        emulator_tas,
+        emulator_tas_squared,
+        emulator_hfds,
+        global_variability,
+        esm_tas,
+        cfg=_MockConfig(),
+    )
+
+
+    res_updated = LinearRegression().calibrate(
+        esm_tas,
+        predictors={
+            "emulator_tas": emulator_tas,
+            "emulator_tas_squared": emulator_tas_squared,
+            "emulator_hfds": emulator_hfds,
+            "global_variability": global_variability,
+        }
+    )
+
+    # check that calibrated parameters match for each predictor variable
+    for updated_name, legacy_vals in (
+        ("emulator_tas", res_legacy[0]["coef_gttas"]["tas"]),
+        ("emulator_tas_squared", res_legacy[0]["coef_gttas2"]["tas"]),
+        ("emulator_hfds", res_legacy[0]["coef_gthfds"]["tas"]),
+        ("global_variability", res_legacy[1]["coef_gvtas"]["tas"]),
+        ("intercept", res_legacy[0]["intercept"]["tas"]),
+    ):
+        npt.assert_allclose(res_updated.sel(predictor=updated_name), legacy_vals)
+
+
+# things that aren't handled well:
+# - checking that ensemble member and scenario actually make a sensible set
+# - dropping of nans in prototyped method
+# - units (should probably be using dataset rather than dataarray for outputs)

From 47fa2c6618162e92e6c74de190d3548a93f99d24 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Mon, 25 Oct 2021 13:12:41 +1100
Subject: [PATCH 05/23] Tidy up

---
 mesmer/prototype/calibrate.py       | 199 ++++++++++++++--------------
 tests/integration/test_prototype.py |   8 +-
 2 files changed, 106 insertions(+), 101 deletions(-)

diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index a64ab2a5..40f6f761 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -9,90 +9,80 @@ class MesmerCalibrateBase(metaclass=abc.ABCMeta):
     """
     Abstract base class for calibration
     """
-    def _flatten_predictors(self, predictors, flat_dim_order):
-        predictors_flat = []
-        predictor_order = []
-        for v, vals in predictors.items():
-            predictor_order.append(v)
-            predictors_flat.append(self._flatten(vals, flat_dim_order))
+    @staticmethod
+    def _get_stack_coord_name(inp_array):
+        stack_coord_name = "stacked_coord"
+        if stack_coord_name in inp_array.dims:
+            stack_coord_name = "memser_stacked_coord"
 
-        predictors_flat = np.vstack(predictors_flat).T
+        if stack_coord_name in inp_array.dims:
+            raise NotImplementedError("You have dimensions we can't safely unstack yet")
 
-        return predictors_flat, predictor_order
+        return stack_coord_name
 
     @staticmethod
-    def _flatten(values, flat_dim_order, dropna=True):
-        out = values.transpose(*flat_dim_order).values.flatten()
-        # TODO: better handling of nan
-        # Probably a better solution is to pass all the flattening up a level
-        # i.e. should be done before calibration is attempted/have a function
-        # to handle flattening scenarios
-        if dropna:
-            out = out[np.where(~np.isnan(out))]
-
-        return out
+    def _check_coords_match(obj, obj_other, check_coord):
+        coords_match = obj.coords[check_coord].equals(obj_other.coords[check_coord])
+        if not coords_match:
+            raise AssertionError(f"{check_coord} is not the same on {obj} and {obj_other}")
 
     @staticmethod
-    def _get_calibration_groups(target, groups):
-        return target.groupby(groups)
+    def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
+        predictors_flat = []
+        for v, vals in predictors.items():
+            if stack_coord_name in vals.dims:
+                raise AssertionError(f"{stack_coord_name} is already in {vals.dims}")
 
-    @staticmethod
-    def _unflatten_calibrated_values(res, dim_names, coords):
-        res_flat = np.vstack(list(res.values()))
-        coords = {
-            **coords,
-            "predictor": list(res.keys())
-        }
+            vals_flat = vals.stack({stack_coord_name: dims_to_flatten}).dropna(stack_coord_name)
+            vals_flat.name = v
+            predictors_flat.append(vals_flat)
 
-        out = xr.DataArray(res_flat, dims=["predictor"] + dim_names, coords=coords)
+        out = xr.merge(predictors_flat).to_stacked_array("predictor", sample_dims=[stack_coord_name])
 
         return out
 
+    def _flatten_predictors_and_target(self, predictors, target):
+        dims_to_flatten = self._get_predictor_dims(predictors)
+        stack_coord_name = self._get_stack_coord_name(target)
 
+        target_flattened = target.stack({stack_coord_name: dims_to_flatten}).dropna(stack_coord_name)
+        predictors_flattened = self._flatten_predictors(predictors, dims_to_flatten, stack_coord_name)
+        self._check_coords_match(target_flattened, predictors_flattened, stack_coord_name)
 
-class MesmerCalibrateTargetOnly(MesmerCalibrateBase):
-    @abc.abstractmethod
-    def calibrate(self, predictor, **kwargs):
-        """
-        Calibrate a model
+        return predictors_flattened, target_flattened, stack_coord_name
 
-        Parameters
-        ----------
-        predictor : xarray.DataArray
-            Predictor data array
+    @staticmethod
+    def _get_calibration_groups(target_flattened, stack_coord_name):
+        calibration_groups = list(set(target_flattened.dims) - {stack_coord_name})
+        if len(calibration_groups) > 1:
+            raise NotImplementedError()
 
-        **kwargs
-            Passed onto the calibration method
-
-        Returns
-        -------
-        xarray.DataArray
-            Fitting coefficients (have to think about how best to label and
-            store)
-        """
+        return calibration_groups[0]
 
 class MesmerCalibrateTargetPredictor(MesmerCalibrateBase):
     @abc.abstractmethod
-    def calibrate(self, predictor, target, **kwargs):
+    def calibrate(self, target, predictor, **kwargs):
         """
         Calibrate a model
 
+        [TODO: update this based on whatever LinearRegression.calibrate's docs
+        end up looking like]
+
         Parameters
         ----------
-        predictor : xarray.DataArray
-            Predictor data array
-
         target : xarray.DataArray
             Target data
 
+        predictor : xarray.DataArray
+            Predictor data array
+
         **kwargs
             Passed onto the calibration method
 
         Returns
         -------
         xarray.DataArray
-            Fitting coefficients (have to think about how best to label and
-            store)
+            Fitting coefficients [TODO description of how labelled]
         """
 
 
@@ -106,7 +96,7 @@ class LinearRegression(MesmerCalibrateTargetPredictor):
     preparation, which makes it very hard to see what the model actually is
     """
     @staticmethod
-    def _regress_single_point(target_point, predictor, weights=None):
+    def _regress_single_group(target_point, predictor, weights=None):
         # this is the method that actually does the regression
         args = [predictor, target_point]
         if weights is not None:
@@ -116,59 +106,74 @@ def _regress_single_point(target_point, predictor, weights=None):
 
         return reg
 
-    def calibrate(self, target, predictors, weights=None, groups="gridpoint"):
-        # this method handles the pre-processing to get the data ready for the
-        # regression, does the regression (or calls another method to do so),
-        # then puts it all back into a DataArray for output
-
-        # would need to be smarter here too to handle multiple predictors and/
-        # or target variables but that should be doable
-        res = {
-            **{k: [] for k in predictors},
-            "intercept": [],
-        }
+    @staticmethod
+    def _get_predictor_dims(predictors):
+        predictors_dims = {k: v.dims for k, v in predictors.items()}
+        predictors_dims_unique = set(predictors_dims.values())
+        if len(predictors_dims_unique) > 1:
+            raise AssertionError(f"Dimensions of predictors are not all the same, we have: {predictors_dims}")
 
-        flat_dim_order = [d for d in target.dims if d != groups]
+        return list(predictors_dims_unique)[0]
 
-        predictor_numpy, predictor_order = self._flatten_predictors(predictors, flat_dim_order)
-        for group_val, target_group in self._get_calibration_groups(target, groups):
-            target_group_flat = self._flatten(target_group, flat_dim_order)
+    def _calibrate_groups(self, target_flattened, predictor_numpy, predictor_names, weights, calibration_groups):
+        def _calibrate_group(target_group):
+            target_group_numpy = target_group.values
+            res_group = self._regress_single_group(target_group_numpy, predictor_numpy, weights)
 
-            res_group = self._regress_single_point(target_group_flat, predictor_numpy, weights)
+            res_xr = xr.DataArray(np.concatenate([[res_group.intercept_], res_group.coef_]), dims=["predictor"], coords={"predictor": predictor_names})
 
-            # need to be smarter about this, but idea is to store things using
-            # xarray DataArray
-            res["intercept"].append(res_group.intercept_)
-            for i, p in enumerate(predictor_order):
-                res[p].append(res_group.coef_[i])
+            return res_xr
 
-        res = self._unflatten_calibrated_values(res, dim_names=[groups], coords={groups: getattr(target, groups)})
+        res = target_flattened.groupby(calibration_groups).apply(_calibrate_group)
 
         return res
 
+    def calibrate(self, target, predictors, weights=None):
+        """
+        Calibrate a linear regression model
 
-class AutoRegression(MesmerCalibrateTargetOnly):
-    def _regress_single(target, lags):
-        res = AutoReg(target, lags=lags).fit()
+        Parameters
+        ----------
+        target : xarray.DataArray
+            Target data
 
-        return res
+        predictors : dict[str: xarray.DataArray]
+            Predictors to use, each key gives the name of the predictor, the
+            values give the values of the predictor.
 
-    def calibrate(self, target, groups=["gridpoint"], lags=1):
-        # would need to be smarter here too to handle multiple predictors and/
-        # or target variables but that should be doable
-        res = {
-            "intercepts": [],
-            "coef": [],
-            "stds": [],
-        }
-        for target_group in self._get_calibration_groups(target, groups):
-            res_group = target_group.apply(self._regress_single, lags)
-            # need to be smarter about this, but idea is to store things using
-            # xarray DataArray
-            res["intercepts"].append(res_group.params[0])
-            res["coef"].append(res_group.params[1])
-            res["stds"].append(np.sqrt(res_group.sigma2))
-
-        res = xr.DataArray(res)
+        weights : optional, xarray.DataArray
+            Weights to use for dimensions in ``predictors``
 
-        return res
+        Returns
+        -------
+        xarray.DataArray
+            Fitting coefficients, the dimensions of which are the combination
+            of "predictor" and all dimensions in ``target`` which are not in
+            ``predictors``. For example, the resulting dimensions could be
+            ["gridpoint", "predictor"].
+
+        Notes
+        -----
+        We flatten ``target``, ``predictors`` and ``weights`` across the
+        dimensions shared by ``target`` and ``predictors`` to create the input
+        arrays for the linear regression.
+        """
+        predictors_flattened, target_flattened, stack_coord_name = self._flatten_predictors_and_target(
+            predictors, target
+        )
+
+        # stacked coord has to go first for the regression to be setup correctly
+        predictors_flattened_dim_order = [stack_coord_name] + [d for d in predictors_flattened.dims if d != stack_coord_name]
+        predictors_flattened_reordered = predictors_flattened.transpose(*predictors_flattened_dim_order)
+        predictors_plus_intercept = ["intercept"] + list(predictors_flattened_reordered["variable"].values)
+        predictor_numpy = predictors_flattened_reordered.values
+
+        calibration_groups = self._get_calibration_groups(target_flattened, stack_coord_name)
+
+        return self._calibrate_groups(
+            target_flattened,
+            predictor_numpy,
+            predictor_names=predictors_plus_intercept,
+            weights=weights,
+            calibration_groups=calibration_groups,
+        )
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 5c059420..90ea8063 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -162,7 +162,7 @@ def test_prototype_train_lt(
         npt.assert_allclose(res_updated.sel(predictor=updated_name), legacy_vals)
 
 
-# things that aren't handled well:
-# - checking that ensemble member and scenario actually make a sensible set
-# - dropping of nans in prototyped method
-# - units (should probably be using dataset rather than dataarray for outputs)
+# things that aren't tested well:
+# - what happens if ensemble member and scenario don't actually make a coherent set
+# - units (should probably be using dataset rather than dataarray for inputs and outputs?)
+# - weights

From b408db01dedda6a6702f7551061ea5a0fa2a7fa9 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Mon, 25 Oct 2021 13:13:26 +1100
Subject: [PATCH 06/23] Format

---
 mesmer/prototype/calibrate.py          | 74 ++++++++++++++++++++------
 mesmer/prototype/calibrate_multiple.py |  4 +-
 tests/integration/test_prototype.py    | 68 ++++++++++++-----------
 3 files changed, 97 insertions(+), 49 deletions(-)

diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index 40f6f761..e54af425 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -9,6 +9,7 @@ class MesmerCalibrateBase(metaclass=abc.ABCMeta):
     """
     Abstract base class for calibration
     """
+
     @staticmethod
     def _get_stack_coord_name(inp_array):
         stack_coord_name = "stacked_coord"
@@ -24,7 +25,9 @@ def _get_stack_coord_name(inp_array):
     def _check_coords_match(obj, obj_other, check_coord):
         coords_match = obj.coords[check_coord].equals(obj_other.coords[check_coord])
         if not coords_match:
-            raise AssertionError(f"{check_coord} is not the same on {obj} and {obj_other}")
+            raise AssertionError(
+                f"{check_coord} is not the same on {obj} and {obj_other}"
+            )
 
     @staticmethod
     def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
@@ -33,11 +36,15 @@ def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
             if stack_coord_name in vals.dims:
                 raise AssertionError(f"{stack_coord_name} is already in {vals.dims}")
 
-            vals_flat = vals.stack({stack_coord_name: dims_to_flatten}).dropna(stack_coord_name)
+            vals_flat = vals.stack({stack_coord_name: dims_to_flatten}).dropna(
+                stack_coord_name
+            )
             vals_flat.name = v
             predictors_flat.append(vals_flat)
 
-        out = xr.merge(predictors_flat).to_stacked_array("predictor", sample_dims=[stack_coord_name])
+        out = xr.merge(predictors_flat).to_stacked_array(
+            "predictor", sample_dims=[stack_coord_name]
+        )
 
         return out
 
@@ -45,9 +52,15 @@ def _flatten_predictors_and_target(self, predictors, target):
         dims_to_flatten = self._get_predictor_dims(predictors)
         stack_coord_name = self._get_stack_coord_name(target)
 
-        target_flattened = target.stack({stack_coord_name: dims_to_flatten}).dropna(stack_coord_name)
-        predictors_flattened = self._flatten_predictors(predictors, dims_to_flatten, stack_coord_name)
-        self._check_coords_match(target_flattened, predictors_flattened, stack_coord_name)
+        target_flattened = target.stack({stack_coord_name: dims_to_flatten}).dropna(
+            stack_coord_name
+        )
+        predictors_flattened = self._flatten_predictors(
+            predictors, dims_to_flatten, stack_coord_name
+        )
+        self._check_coords_match(
+            target_flattened, predictors_flattened, stack_coord_name
+        )
 
         return predictors_flattened, target_flattened, stack_coord_name
 
@@ -59,6 +72,7 @@ def _get_calibration_groups(target_flattened, stack_coord_name):
 
         return calibration_groups[0]
 
+
 class MesmerCalibrateTargetPredictor(MesmerCalibrateBase):
     @abc.abstractmethod
     def calibrate(self, target, predictor, **kwargs):
@@ -95,6 +109,7 @@ class LinearRegression(MesmerCalibrateTargetPredictor):
     All the lines above and below that line are basically just data
     preparation, which makes it very hard to see what the model actually is
     """
+
     @staticmethod
     def _regress_single_group(target_point, predictor, weights=None):
         # this is the method that actually does the regression
@@ -111,16 +126,31 @@ def _get_predictor_dims(predictors):
         predictors_dims = {k: v.dims for k, v in predictors.items()}
         predictors_dims_unique = set(predictors_dims.values())
         if len(predictors_dims_unique) > 1:
-            raise AssertionError(f"Dimensions of predictors are not all the same, we have: {predictors_dims}")
+            raise AssertionError(
+                f"Dimensions of predictors are not all the same, we have: {predictors_dims}"
+            )
 
         return list(predictors_dims_unique)[0]
 
-    def _calibrate_groups(self, target_flattened, predictor_numpy, predictor_names, weights, calibration_groups):
+    def _calibrate_groups(
+        self,
+        target_flattened,
+        predictor_numpy,
+        predictor_names,
+        weights,
+        calibration_groups,
+    ):
         def _calibrate_group(target_group):
             target_group_numpy = target_group.values
-            res_group = self._regress_single_group(target_group_numpy, predictor_numpy, weights)
+            res_group = self._regress_single_group(
+                target_group_numpy, predictor_numpy, weights
+            )
 
-            res_xr = xr.DataArray(np.concatenate([[res_group.intercept_], res_group.coef_]), dims=["predictor"], coords={"predictor": predictor_names})
+            res_xr = xr.DataArray(
+                np.concatenate([[res_group.intercept_], res_group.coef_]),
+                dims=["predictor"],
+                coords={"predictor": predictor_names},
+            )
 
             return res_xr
 
@@ -158,17 +188,27 @@ def calibrate(self, target, predictors, weights=None):
         dimensions shared by ``target`` and ``predictors`` to create the input
         arrays for the linear regression.
         """
-        predictors_flattened, target_flattened, stack_coord_name = self._flatten_predictors_and_target(
-            predictors, target
-        )
+        (
+            predictors_flattened,
+            target_flattened,
+            stack_coord_name,
+        ) = self._flatten_predictors_and_target(predictors, target)
 
         # stacked coord has to go first for the regression to be setup correctly
-        predictors_flattened_dim_order = [stack_coord_name] + [d for d in predictors_flattened.dims if d != stack_coord_name]
-        predictors_flattened_reordered = predictors_flattened.transpose(*predictors_flattened_dim_order)
-        predictors_plus_intercept = ["intercept"] + list(predictors_flattened_reordered["variable"].values)
+        predictors_flattened_dim_order = [stack_coord_name] + [
+            d for d in predictors_flattened.dims if d != stack_coord_name
+        ]
+        predictors_flattened_reordered = predictors_flattened.transpose(
+            *predictors_flattened_dim_order
+        )
+        predictors_plus_intercept = ["intercept"] + list(
+            predictors_flattened_reordered["variable"].values
+        )
         predictor_numpy = predictors_flattened_reordered.values
 
-        calibration_groups = self._get_calibration_groups(target_flattened, stack_coord_name)
+        calibration_groups = self._get_calibration_groups(
+            target_flattened, stack_coord_name
+        )
 
         return self._calibrate_groups(
             target_flattened,
diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 103d454c..f42369b5 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -1,4 +1,6 @@
-def calibrate_multiple_scenarios_and_ensemble_members(targets, predictors, calibration_class, calibration_kwargs, weighting_style):
+def calibrate_multiple_scenarios_and_ensemble_members(
+    targets, predictors, calibration_class, calibration_kwargs, weighting_style
+):
     """
     Calibrate based on multiple scenarios and ensemble members per scenario
 
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 90ea8063..dbb1dd7e 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -43,20 +43,20 @@ def _do_legacy_run(
             # we have to force this to have an extra dimension, run, for legacy
             # to work although this really isn't how it should be because it
             # means you have some weird reshaping to do at a really low level
-            preds_legacy[name][scenario] = vals_scen.dropna(dim="time").values[np.newaxis, :]
-
+            preds_legacy[name][scenario] = vals_scen.dropna(dim="time").values[
+                np.newaxis, :
+            ]
 
     targs_legacy = {}
-    for name, vals in (
-        ("tas", esm_tas),
-    ):
+    for name, vals in (("tas", esm_tas),):
         targs_legacy[name] = {}
         for scenario, vals_scen in vals.groupby("scenario"):
             # we have to force this to have an extra dimension, run, for legacy
             # to work although this really isn't how it should be
             # order of dimensions is very important for legacy too
-            targs_legacy[name][scenario] = vals_scen.T.dropna(dim="time").values[np.newaxis, :, :]
-
+            targs_legacy[name][scenario] = vals_scen.T.dropna(dim="time").values[
+                np.newaxis, :, :
+            ]
 
     res_legacy = train_lt(
         preds_legacy,
@@ -69,8 +69,7 @@ def _do_legacy_run(
     return res_legacy
 
 
-def test_prototype_train_lt(
-):
+def test_prototype_train_lt():
     time = [1850, 1950, 2014, 2015, 2050, 2100, 2300]
     scenarios = ["hist", "ssp126"]
 
@@ -81,27 +80,33 @@ def test_prototype_train_lt(
     )
 
     emulator_tas = xr.DataArray(
-        np.array([
-            [0, 0.5, 1, np.nan, np.nan, np.nan, np.nan],
-            [np.nan, np.nan, np.nan, 1.1, 1.5, 1.4, 1.2],
-        ]),
+        np.array(
+            [
+                [0, 0.5, 1, np.nan, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, 1.1, 1.5, 1.4, 1.2],
+            ]
+        ),
         dims=pred_dims,
         coords=pred_coords,
     )
     emulator_tas_squared = emulator_tas ** 2
     global_variability = xr.DataArray(
-        np.array([
-            [-0.1, 0.1, 0.03, np.nan, np.nan, np.nan, np.nan],
-            [np.nan, np.nan, np.nan, 0.04, 0.2, -0.03, 0.0],
-        ]),
+        np.array(
+            [
+                [-0.1, 0.1, 0.03, np.nan, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, 0.04, 0.2, -0.03, 0.0],
+            ]
+        ),
         dims=pred_dims,
         coords=pred_coords,
     )
     emulator_hfds = xr.DataArray(
-        np.array([
-            [0.5, 1.5, 2.0, np.nan, np.nan, np.nan, np.nan],
-            [np.nan, np.nan, np.nan, 2.1, 2.0, 1.5, 0.4],
-        ]),
+        np.array(
+            [
+                [0.5, 1.5, 2.0, np.nan, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, 2.1, 2.0, 1.5, 0.4],
+            ]
+        ),
         dims=pred_dims,
         coords=pred_coords,
     )
@@ -117,16 +122,18 @@ def test_prototype_train_lt(
         lon=(["gridpoint"], [120, 240]),
     )
     esm_tas = xr.DataArray(
-        np.array([
-            [
-                [0.6, 1.6, 2.6, np.nan, np.nan, np.nan, np.nan],
-                [0.43, 1.13, 2.21, np.nan, np.nan, np.nan, np.nan],
-            ],
+        np.array(
             [
-                [np.nan, np.nan, np.nan, 2.11, 2.01, 1.54, 1.22],
-                [np.nan, np.nan, np.nan, 2.19, 2.04, 1.53, 1.21],
+                [
+                    [0.6, 1.6, 2.6, np.nan, np.nan, np.nan, np.nan],
+                    [0.43, 1.13, 2.21, np.nan, np.nan, np.nan, np.nan],
+                ],
+                [
+                    [np.nan, np.nan, np.nan, 2.11, 2.01, 1.54, 1.22],
+                    [np.nan, np.nan, np.nan, 2.19, 2.04, 1.53, 1.21],
+                ],
             ]
-        ]),
+        ),
         dims=targ_dims,
         coords=targ_coords,
     )
@@ -140,7 +147,6 @@ def test_prototype_train_lt(
         cfg=_MockConfig(),
     )
 
-
     res_updated = LinearRegression().calibrate(
         esm_tas,
         predictors={
@@ -148,7 +154,7 @@ def test_prototype_train_lt(
             "emulator_tas_squared": emulator_tas_squared,
             "emulator_hfds": emulator_hfds,
             "global_variability": global_variability,
-        }
+        },
     )
 
     # check that calibrated parameters match for each predictor variable

From b93de5877fbe84a39f713b6d0fc1cd36ed78eac6 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Mon, 25 Oct 2021 13:21:12 +1100
Subject: [PATCH 07/23] Tidy up a bit more

---
 mesmer/prototype/calibrate.py | 60 +++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index e54af425..c403d28e 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -10,6 +10,17 @@ class MesmerCalibrateBase(metaclass=abc.ABCMeta):
     Abstract base class for calibration
     """
 
+    @staticmethod
+    def _get_predictor_dims(predictors):
+        predictors_dims = {k: v.dims for k, v in predictors.items()}
+        predictors_dims_unique = set(predictors_dims.values())
+        if len(predictors_dims_unique) > 1:
+            raise AssertionError(
+                f"Dimensions of predictors are not all the same, we have: {predictors_dims}"
+            )
+
+        return list(predictors_dims_unique)[0]
+
     @staticmethod
     def _get_stack_coord_name(inp_array):
         stack_coord_name = "stacked_coord"
@@ -121,17 +132,6 @@ def _regress_single_group(target_point, predictor, weights=None):
 
         return reg
 
-    @staticmethod
-    def _get_predictor_dims(predictors):
-        predictors_dims = {k: v.dims for k, v in predictors.items()}
-        predictors_dims_unique = set(predictors_dims.values())
-        if len(predictors_dims_unique) > 1:
-            raise AssertionError(
-                f"Dimensions of predictors are not all the same, we have: {predictors_dims}"
-            )
-
-        return list(predictors_dims_unique)[0]
-
     def _calibrate_groups(
         self,
         target_flattened,
@@ -158,6 +158,25 @@ def _calibrate_group(target_group):
 
         return res
 
+    @staticmethod
+    def _get_predictors_numpy_and_output_predictor_order(
+        predictors_flattened, stack_coord_name
+    ):
+        # stacked coord has to go first for the regression to be setup correctly
+        predictors_flattened_dim_order = [stack_coord_name] + [
+            d for d in predictors_flattened.dims if d != stack_coord_name
+        ]
+        predictors_flattened_reordered = predictors_flattened.transpose(
+            *predictors_flattened_dim_order
+        )
+        predictors_numpy = predictors_flattened_reordered.values
+
+        predictors_plus_intercept_order = ["intercept"] + list(
+            predictors_flattened_reordered["variable"].values
+        )
+
+        return predictors_numpy, predictors_plus_intercept_order
+
     def calibrate(self, target, predictors, weights=None):
         """
         Calibrate a linear regression model
@@ -194,17 +213,12 @@ def calibrate(self, target, predictors, weights=None):
             stack_coord_name,
         ) = self._flatten_predictors_and_target(predictors, target)
 
-        # stacked coord has to go first for the regression to be setup correctly
-        predictors_flattened_dim_order = [stack_coord_name] + [
-            d for d in predictors_flattened.dims if d != stack_coord_name
-        ]
-        predictors_flattened_reordered = predictors_flattened.transpose(
-            *predictors_flattened_dim_order
-        )
-        predictors_plus_intercept = ["intercept"] + list(
-            predictors_flattened_reordered["variable"].values
+        (
+            predictors_numpy,
+            predictors_plus_intercept_order,
+        ) = self._get_predictors_numpy_and_output_predictor_order(
+            predictors_flattened, stack_coord_name
         )
-        predictor_numpy = predictors_flattened_reordered.values
 
         calibration_groups = self._get_calibration_groups(
             target_flattened, stack_coord_name
@@ -212,8 +226,8 @@ def calibrate(self, target, predictors, weights=None):
 
         return self._calibrate_groups(
             target_flattened,
-            predictor_numpy,
-            predictor_names=predictors_plus_intercept,
+            predictors_numpy,
+            predictor_names=predictors_plus_intercept_order,
             weights=weights,
             calibration_groups=calibration_groups,
         )

From de1d293cbcd42364c7d88ee78f467f1ac0bd60b3 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Tue, 26 Oct 2021 10:52:48 +1100
Subject: [PATCH 08/23] Change calibration pattern to simplify use of classes

---
 mesmer/prototype/__init__.py           |   0
 mesmer/prototype/calibrate.py          | 195 ++++---------------------
 mesmer/prototype/calibrate_multiple.py |  65 +++++++++
 tests/integration/test_prototype.py    |  15 +-
 4 files changed, 109 insertions(+), 166 deletions(-)
 create mode 100644 mesmer/prototype/__init__.py

diff --git a/mesmer/prototype/__init__.py b/mesmer/prototype/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index c403d28e..26dc2cd5 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -10,79 +10,6 @@ class MesmerCalibrateBase(metaclass=abc.ABCMeta):
     Abstract base class for calibration
     """
 
-    @staticmethod
-    def _get_predictor_dims(predictors):
-        predictors_dims = {k: v.dims for k, v in predictors.items()}
-        predictors_dims_unique = set(predictors_dims.values())
-        if len(predictors_dims_unique) > 1:
-            raise AssertionError(
-                f"Dimensions of predictors are not all the same, we have: {predictors_dims}"
-            )
-
-        return list(predictors_dims_unique)[0]
-
-    @staticmethod
-    def _get_stack_coord_name(inp_array):
-        stack_coord_name = "stacked_coord"
-        if stack_coord_name in inp_array.dims:
-            stack_coord_name = "memser_stacked_coord"
-
-        if stack_coord_name in inp_array.dims:
-            raise NotImplementedError("You have dimensions we can't safely unstack yet")
-
-        return stack_coord_name
-
-    @staticmethod
-    def _check_coords_match(obj, obj_other, check_coord):
-        coords_match = obj.coords[check_coord].equals(obj_other.coords[check_coord])
-        if not coords_match:
-            raise AssertionError(
-                f"{check_coord} is not the same on {obj} and {obj_other}"
-            )
-
-    @staticmethod
-    def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
-        predictors_flat = []
-        for v, vals in predictors.items():
-            if stack_coord_name in vals.dims:
-                raise AssertionError(f"{stack_coord_name} is already in {vals.dims}")
-
-            vals_flat = vals.stack({stack_coord_name: dims_to_flatten}).dropna(
-                stack_coord_name
-            )
-            vals_flat.name = v
-            predictors_flat.append(vals_flat)
-
-        out = xr.merge(predictors_flat).to_stacked_array(
-            "predictor", sample_dims=[stack_coord_name]
-        )
-
-        return out
-
-    def _flatten_predictors_and_target(self, predictors, target):
-        dims_to_flatten = self._get_predictor_dims(predictors)
-        stack_coord_name = self._get_stack_coord_name(target)
-
-        target_flattened = target.stack({stack_coord_name: dims_to_flatten}).dropna(
-            stack_coord_name
-        )
-        predictors_flattened = self._flatten_predictors(
-            predictors, dims_to_flatten, stack_coord_name
-        )
-        self._check_coords_match(
-            target_flattened, predictors_flattened, stack_coord_name
-        )
-
-        return predictors_flattened, target_flattened, stack_coord_name
-
-    @staticmethod
-    def _get_calibration_groups(target_flattened, stack_coord_name):
-        calibration_groups = list(set(target_flattened.dims) - {stack_coord_name})
-        if len(calibration_groups) > 1:
-            raise NotImplementedError()
-
-        return calibration_groups[0]
-
 
 class MesmerCalibrateTargetPredictor(MesmerCalibrateBase):
     @abc.abstractmethod
@@ -124,110 +51,50 @@ class LinearRegression(MesmerCalibrateTargetPredictor):
     @staticmethod
     def _regress_single_group(target_point, predictor, weights=None):
         # this is the method that actually does the regression
-        args = [predictor, target_point]
+        args = [predictor.T, target_point.reshape(-1, 1)]
         if weights is not None:
             args.append(weights)
-
         reg = sklearn.linear_model.LinearRegression().fit(*args)
+        out_array = np.concatenate([reg.intercept_, *reg.coef_])
 
-        return reg
+        return out_array
 
-    def _calibrate_groups(
+    def calibrate(
         self,
         target_flattened,
-        predictor_numpy,
-        predictor_names,
-        weights,
-        calibration_groups,
-    ):
-        def _calibrate_group(target_group):
-            target_group_numpy = target_group.values
-            res_group = self._regress_single_group(
-                target_group_numpy, predictor_numpy, weights
-            )
-
-            res_xr = xr.DataArray(
-                np.concatenate([[res_group.intercept_], res_group.coef_]),
-                dims=["predictor"],
-                coords={"predictor": predictor_names},
-            )
-
-            return res_xr
-
-        res = target_flattened.groupby(calibration_groups).apply(_calibrate_group)
-
-        return res
-
-    @staticmethod
-    def _get_predictors_numpy_and_output_predictor_order(
-        predictors_flattened, stack_coord_name
+        predictors_flattened,
+        stack_coord_name,
+        predictor_name="predictor",
+        weights=None,
+        predictor_temporary_name="__pred_store__",
     ):
-        # stacked coord has to go first for the regression to be setup correctly
-        predictors_flattened_dim_order = [stack_coord_name] + [
-            d for d in predictors_flattened.dims if d != stack_coord_name
-        ]
-        predictors_flattened_reordered = predictors_flattened.transpose(
-            *predictors_flattened_dim_order
-        )
-        predictors_numpy = predictors_flattened_reordered.values
-
-        predictors_plus_intercept_order = ["intercept"] + list(
-            predictors_flattened_reordered["variable"].values
-        )
-
-        return predictors_numpy, predictors_plus_intercept_order
-
-    def calibrate(self, target, predictors, weights=None):
         """
-        Calibrate a linear regression model
-
-        Parameters
-        ----------
-        target : xarray.DataArray
-            Target data
-
-        predictors : dict[str: xarray.DataArray]
-            Predictors to use, each key gives the name of the predictor, the
-            values give the values of the predictor.
+        TODO: redo docstring
+        """
+        if predictor_name not in predictors_flattened.dims:
+            raise AssertionError(f"{predictor_name} not in {predictors_flattened.dims}")
 
-        weights : optional, xarray.DataArray
-            Weights to use for dimensions in ``predictors``
+        if predictor_temporary_name in predictors_flattened.dims:
+            raise AssertionError(
+                f"{predictor_temporary_name} already in {predictors_flattened.dims}, choose a different temporary name"
+            )
 
-        Returns
-        -------
-        xarray.DataArray
-            Fitting coefficients, the dimensions of which are the combination
-            of "predictor" and all dimensions in ``target`` which are not in
-            ``predictors``. For example, the resulting dimensions could be
-            ["gridpoint", "predictor"].
-
-        Notes
-        -----
-        We flatten ``target``, ``predictors`` and ``weights`` across the
-        dimensions shared by ``target`` and ``predictors`` to create the input
-        arrays for the linear regression.
-        """
-        (
-            predictors_flattened,
+        res = xr.apply_ufunc(
+            self._regress_single_group,
             target_flattened,
-            stack_coord_name,
-        ) = self._flatten_predictors_and_target(predictors, target)
-
-        (
-            predictors_numpy,
-            predictors_plus_intercept_order,
-        ) = self._get_predictors_numpy_and_output_predictor_order(
-            predictors_flattened, stack_coord_name
+            predictors_flattened,
+            input_core_dims=[[stack_coord_name], [predictor_name, stack_coord_name]],
+            output_core_dims=((predictor_temporary_name,),),
+            vectorize=True,
+            kwargs=dict(weights=weights),
         )
 
-        calibration_groups = self._get_calibration_groups(
-            target_flattened, stack_coord_name
+        # assuming that predictor's names are in the 'variable' coordinate
+        predictors_plus_intercept_order = ["intercept"] + list(
+            predictors_flattened["variable"].values
         )
+        res = res.assign_coords(
+            {predictor_temporary_name: predictors_plus_intercept_order}
+        ).rename({predictor_temporary_name: predictor_name})
 
-        return self._calibrate_groups(
-            target_flattened,
-            predictors_numpy,
-            predictor_names=predictors_plus_intercept_order,
-            weights=weights,
-            calibration_groups=calibration_groups,
-        )
+        return res
diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index f42369b5..231352a5 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -1,3 +1,68 @@
+import xarray as xr
+
+
+def _get_predictor_dims(predictors):
+    predictors_dims = {k: v.dims for k, v in predictors.items()}
+    predictors_dims_unique = set(predictors_dims.values())
+    if len(predictors_dims_unique) > 1:
+        raise AssertionError(
+            f"Dimensions of predictors are not all the same, we have: {predictors_dims}"
+        )
+
+    return list(predictors_dims_unique)[0]
+
+
+def _get_stack_coord_name(inp_array):
+    stack_coord_name = "stacked_coord"
+    if stack_coord_name in inp_array.dims:
+        stack_coord_name = "memser_stacked_coord"
+
+    if stack_coord_name in inp_array.dims:
+        raise NotImplementedError("You have dimensions we can't safely unstack yet")
+
+    return stack_coord_name
+
+
+def _check_coords_match(obj, obj_other, check_coord):
+    coords_match = obj.coords[check_coord].equals(obj_other.coords[check_coord])
+    if not coords_match:
+        raise AssertionError(f"{check_coord} is not the same on {obj} and {obj_other}")
+
+
+def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
+    predictors_flat = []
+    for v, vals in predictors.items():
+        if stack_coord_name in vals.dims:
+            raise AssertionError(f"{stack_coord_name} is already in {vals.dims}")
+
+        vals_flat = vals.stack({stack_coord_name: dims_to_flatten}).dropna(
+            stack_coord_name
+        )
+        vals_flat.name = v
+        predictors_flat.append(vals_flat)
+
+    out = xr.merge(predictors_flat).to_stacked_array(
+        "predictor", sample_dims=[stack_coord_name]
+    )
+
+    return out
+
+
+def flatten_predictors_and_target(predictors, target):
+    dims_to_flatten = _get_predictor_dims(predictors)
+    stack_coord_name = _get_stack_coord_name(target)
+
+    target_flattened = target.stack({stack_coord_name: dims_to_flatten}).dropna(
+        stack_coord_name
+    )
+    predictors_flattened = _flatten_predictors(
+        predictors, dims_to_flatten, stack_coord_name
+    )
+    _check_coords_match(target_flattened, predictors_flattened, stack_coord_name)
+
+    return predictors_flattened, target_flattened, stack_coord_name
+
+
 def calibrate_multiple_scenarios_and_ensemble_members(
     targets, predictors, calibration_class, calibration_kwargs, weighting_style
 ):
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index dbb1dd7e..41dbcc54 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -4,6 +4,7 @@
 
 from mesmer.calibrate_mesmer.train_lt import train_lt
 from mesmer.prototype.calibrate import LinearRegression
+from mesmer.prototype.calibrate_multiple import flatten_predictors_and_target
 
 
 class _MockConfig:
@@ -147,14 +148,24 @@ def test_prototype_train_lt():
         cfg=_MockConfig(),
     )
 
-    res_updated = LinearRegression().calibrate(
-        esm_tas,
+    (
+        predictors_flattened,
+        target_flattened,
+        stack_coord_name,
+    ) = flatten_predictors_and_target(
         predictors={
             "emulator_tas": emulator_tas,
             "emulator_tas_squared": emulator_tas_squared,
             "emulator_hfds": emulator_hfds,
             "global_variability": global_variability,
         },
+        target=esm_tas,
+    )
+
+    res_updated = LinearRegression().calibrate(
+        target_flattened,
+        predictors_flattened,
+        stack_coord_name,
     )
 
     # check that calibrated parameters match for each predictor variable

From 3d97ec16d23a6e0a2d71bd592227932e0fb9da1b Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Tue, 26 Oct 2021 12:46:31 +1100
Subject: [PATCH 09/23] Try doing auto-regression implementation

---
 mesmer/calibrate_mesmer/train_gv.py        |   4 +-
 mesmer/create_emulations/create_emus_gv.py |   2 +
 mesmer/prototype/calibrate.py              |  82 ++++++++++---
 mesmer/prototype/calibrate_multiple.py     |  81 +++++++++++++
 tests/integration/test_prototype.py        | 135 ++++++++++++++++++++-
 5 files changed, 280 insertions(+), 24 deletions(-)

diff --git a/mesmer/calibrate_mesmer/train_gv.py b/mesmer/calibrate_mesmer/train_gv.py
index 3d65ec5d..561bd718 100644
--- a/mesmer/calibrate_mesmer/train_gv.py
+++ b/mesmer/calibrate_mesmer/train_gv.py
@@ -176,7 +176,7 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
                 gv[scen][run], maxlag=max_lag, ic=sel_crit, old_names=False
             ).ar_lags
             # if order > 0 is selected,add selected order to vector
-            if len(run_ar_lags) > 0:
+            if run_ar_lags is not None:
                 AR_order_runs_tmp[run] = run_ar_lags[-1]
 
         AR_order_scens_tmp[scen_idx] = np.percentile(
@@ -201,6 +201,7 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
         AR_std_innovs_tmp = 0
 
         for run in np.arange(nr_runs):
+            # AR parameters for scenario are average of runs
             AR_model_tmp = AutoReg(
                 gv[scen][run], lags=AR_order_sel, old_names=False
             ).fit()
@@ -208,6 +209,7 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
             AR_coefs_tmp += AR_model_tmp.params[1:] / nr_runs
             AR_std_innovs_tmp += np.sqrt(AR_model_tmp.sigma2) / nr_runs
 
+        # AR parameters are average over scenarios
         params_gv["AR_int"] += AR_int_tmp / nr_scens
         params_gv["AR_coefs"] += AR_coefs_tmp / nr_scens
         params_gv["AR_std_innovs"] += AR_std_innovs_tmp / nr_scens
diff --git a/mesmer/create_emulations/create_emus_gv.py b/mesmer/create_emulations/create_emus_gv.py
index 326c69a9..b2a02b07 100644
--- a/mesmer/create_emulations/create_emus_gv.py
+++ b/mesmer/create_emulations/create_emus_gv.py
@@ -149,6 +149,7 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
     np.random.seed(seed)
 
     # buffer so that initial start at 0 does not influence overall result
+    # Should this buffer be based on the length of ar_lags instead of hard-coded?
     buffer = 50
 
     # re-name params for easier reading of code below
@@ -175,6 +176,7 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
         for t in np.arange(ar_lags[-1], len(emus_gv[i])):  # avoid misleading indices
             emus_gv[i, t] = (
                 ar_int
+                # could probably be replaced with ArmaProcess.generate_samples
                 + sum(
                     ar_coefs[k] * emus_gv[i, t - ar_lags[k]]
                     for k in np.arange(len(ar_lags))
diff --git a/mesmer/prototype/calibrate.py b/mesmer/prototype/calibrate.py
index 26dc2cd5..7bb646cb 100644
--- a/mesmer/prototype/calibrate.py
+++ b/mesmer/prototype/calibrate.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import sklearn.linear_model
+import statsmodels.tsa.ar_model
 import xarray as xr
 
 
@@ -15,26 +16,8 @@ class MesmerCalibrateTargetPredictor(MesmerCalibrateBase):
     @abc.abstractmethod
     def calibrate(self, target, predictor, **kwargs):
         """
-        Calibrate a model
-
-        [TODO: update this based on whatever LinearRegression.calibrate's docs
-        end up looking like]
-
-        Parameters
-        ----------
-        target : xarray.DataArray
-            Target data
-
-        predictor : xarray.DataArray
-            Predictor data array
-
-        **kwargs
-            Passed onto the calibration method
-
-        Returns
-        -------
-        xarray.DataArray
-            Fitting coefficients [TODO description of how labelled]
+        [TODO: update this based on however LinearRegression.calibrate's docs
+        end up looking]
         """
 
 
@@ -98,3 +81,62 @@ def calibrate(
         ).rename({predictor_temporary_name: predictor_name})
 
         return res
+
+
+class MesmerCalibrateTarget(MesmerCalibrateBase):
+    @abc.abstractmethod
+    def calibrate(self, target, **kwargs):
+        """
+        [TODO: update this based on however LinearRegression.calibrate's docs
+        end up looking]
+        """
+
+    @staticmethod
+    def _check_target_is_one_dimensional(target, return_numpy_values):
+        if len(target.dims) > 1:
+            raise AssertionError(f"More than one dimension, found {target.dims}")
+
+        if not return_numpy_values:
+            return None
+
+        return target.dropna(dim=target.dims[0]).values
+
+
+class AutoRegression1DOrderSelection(MesmerCalibrateTarget):
+    def calibrate(
+        self,
+        target,
+        maxlag=12,
+        ic="bic",
+    ):
+        target_numpy = self._check_target_is_one_dimensional(
+            target, return_numpy_values=True
+        )
+
+        calibrated = statsmodels.tsa.ar_model.ar_select_order(
+            target_numpy, maxlag=maxlag, ic=ic, old_names=False
+        )
+
+        return calibrated.ar_lags
+
+
+class AutoRegression1D(MesmerCalibrateTarget):
+    def calibrate(
+        self,
+        target,
+        order,
+    ):
+        target_numpy = self._check_target_is_one_dimensional(
+            target, return_numpy_values=True
+        )
+
+        calibrated = statsmodels.tsa.ar_model.AutoReg(
+            target_numpy, lags=order, old_names=False
+        ).fit()
+
+        return {
+            "intercept": calibrated.params[0],
+            "lag_coefficients": calibrated.params[1:],
+            # I don't know what this is so a better name could probably be chosen
+            "standard_innovations": np.sqrt(calibrated.sigma2),
+        }
diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 231352a5..95166b7f 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -1,5 +1,8 @@
+import numpy as np
 import xarray as xr
 
+from .calibrate import AutoRegression1DOrderSelection, AutoRegression1D
+
 
 def _get_predictor_dims(predictors):
     predictors_dims = {k: v.dims for k, v in predictors.items()}
@@ -63,6 +66,84 @@ def flatten_predictors_and_target(predictors, target):
     return predictors_flattened, target_flattened, stack_coord_name
 
 
+def _select_auto_regressive_process_order(
+    target,
+    maxlag,
+    ic,
+    scenario_level="scenario",
+    ensemble_member_level="ensemble_member",
+    q=50,
+    interpolation="nearest",
+):
+    """
+
+    interpolation : str
+        Passed to :func:`numpy.percentile`. Interpolation is not a good way to
+        go here because it could lead to an AR order that wasn't actually chosen by any run. We recommend using the default value i.e. "nearest".
+    """
+    order = []
+    for _, scenario_vals in target.groupby(scenario_level):
+        scenario_orders = []
+        for _, em_vals in scenario_vals.groupby(ensemble_member_level):
+            em_orders = AutoRegression1DOrderSelection().calibrate(
+                em_vals, maxlag=maxlag, ic=ic
+            )
+
+            if em_orders is not None:
+                scenario_orders.append(em_orders[-1])
+            else:
+                scenario_orders.append(0)
+
+        if scenario_orders:
+            order.append(
+                np.percentile(scenario_orders, q=q, interpolation=interpolation)
+            )
+        else:
+            order.append(0)
+
+    res = int(np.percentile(order, q=q, interpolation=interpolation))
+
+    return res
+
+
+def _derive_auto_regressive_process_parameters(
+    target, order, scenario_level="scenario", ensemble_member_level="ensemble_member"
+):
+    # I don't like the fact that I'm duplicating these loops, surely there is a better way
+    parameters = {
+        "intercept": [],
+        "lag_coefficients": [],
+        "standard_innovations": [],
+    }
+    for _, scenario_vals in target.groupby(scenario_level):
+        scenario_parameters = {k: [] for k in parameters}
+        for _, em_vals in scenario_vals.groupby(ensemble_member_level):
+            em_parameters = AutoRegression1D().calibrate(em_vals, order=order)
+            for k, v in em_parameters.items():
+                scenario_parameters[k].append(v)
+
+        scenario_parameters_average = {
+            k: np.vstack(v).mean(axis=0) for k, v in scenario_parameters.items()
+        }
+        for k, v in scenario_parameters_average.items():
+            parameters[k].append(v)
+
+    parameters_average = {k: np.vstack(v).mean(axis=0) for k, v in parameters.items()}
+
+    return parameters_average
+
+
+def calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
+    target,
+    maxlag=12,
+    ic="bic",
+):
+    ar_order = _select_auto_regressive_process_order(target, maxlag, ic)
+    ar_params = _derive_auto_regressive_process_parameters(target, ar_order)
+
+    return ar_params
+
+
 def calibrate_multiple_scenarios_and_ensemble_members(
     targets, predictors, calibration_class, calibration_kwargs, weighting_style
 ):
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 41dbcc54..bad805dd 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -1,10 +1,16 @@
 import numpy as np
 import numpy.testing as npt
+import pytest
 import xarray as xr
+from statsmodels.tsa.arima_process import ArmaProcess
 
 from mesmer.calibrate_mesmer.train_lt import train_lt
+from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.prototype.calibrate import LinearRegression
-from mesmer.prototype.calibrate_multiple import flatten_predictors_and_target
+from mesmer.prototype.calibrate_multiple import (
+    calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
+    flatten_predictors_and_target,
+)
 
 
 class _MockConfig:
@@ -12,6 +18,7 @@ def __init__(
         self,
         method_lt="OLS",
         method_lv="OLS_AR1_sci",
+        method_gv="AR",
         separate_gridpoints=True,
         weight_scenarios_equally=True,
         target_variable="tas",
@@ -20,11 +27,18 @@ def __init__(
         self.methods[target_variable] = {}
         self.methods[target_variable]["lt"] = method_lt
         self.methods[target_variable]["lv"] = method_lv
+        self.methods[target_variable]["gv"] = method_gv
+
+        # this has to be set but isn't actually used
+        self.preds = {}
+        self.preds[target_variable] = {}
+        self.preds[target_variable]["gv"] = None
+
         self.method_lt_each_gp_sep = separate_gridpoints
         self.wgt_scen_tr_eq = weight_scenarios_equally
 
 
-def _do_legacy_run(
+def _do_legacy_run_train_lt(
     emulator_tas,
     emulator_tas_squared,
     emulator_hfds,
@@ -139,7 +153,7 @@ def test_prototype_train_lt():
         coords=targ_coords,
     )
 
-    res_legacy = _do_legacy_run(
+    res_legacy = _do_legacy_run_train_lt(
         emulator_tas,
         emulator_tas_squared,
         emulator_hfds,
@@ -179,6 +193,121 @@ def test_prototype_train_lt():
         npt.assert_allclose(res_updated.sel(predictor=updated_name), legacy_vals)
 
 
+def _do_legacy_run_train_gv(
+    esm_tas_global_variability,
+    cfg,
+):
+    targs_legacy = {}
+    var_name = "tas"
+
+    targs_legacy = {}
+    for scenario, vals_scen in esm_tas_global_variability.groupby("scenario"):
+        targs_legacy[scenario] = (
+            vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time").values
+        )
+
+    res_legacy = train_gv(
+        targs_legacy,
+        targ=var_name,
+        esm="esm_name",
+        cfg=cfg,
+        save_params=False,
+        max_lag=2,
+    )
+
+    return res_legacy
+
+
+@pytest.mark.parametrize(
+    "ar",
+    (
+        [1, 0.5, 0.3],
+        [1, 0.5, 0.3, 0.3, 0.7],
+        [0.9, 1, 0.2, -0.1],
+    ),
+)
+def test_prototype_train_gv(ar):
+    time_history = range(1850, 2014 + 1)
+    time_scenario = range(2015, 2100 + 1)
+    time = list(time_history) + list(time_scenario)
+
+    magnitude = np.array([0.1])
+
+    scenarios = ["hist", "ssp126"]
+
+    # we wouldn't actually start like this, but we'd write a utils function
+    # to simply go from lat-lon to gridpoint and back
+    targ_dims = ["scenario", "ensemble_member", "time"]
+    targ_coords = dict(
+        time=time,
+        scenario=scenarios,
+        ensemble_member=["r1i1p1f1", "r2i1p1f1"],
+    )
+    esm_tas_global_variability = xr.DataArray(
+        np.array(
+            [
+                [
+                    np.concatenate(
+                        [
+                            ArmaProcess(ar, magnitude).generate_sample(
+                                nsample=len(time_history)
+                            ),
+                            np.nan * np.zeros(len(time_scenario)),
+                        ]
+                    ),
+                    np.concatenate(
+                        [
+                            ArmaProcess(ar, magnitude).generate_sample(
+                                nsample=len(time_history)
+                            ),
+                            np.nan * np.zeros(len(time_scenario)),
+                        ]
+                    ),
+                ],
+                [
+                    np.concatenate(
+                        [
+                            np.nan * np.zeros(len(time_history)),
+                            ArmaProcess(ar, magnitude).generate_sample(
+                                nsample=len(time_scenario)
+                            ),
+                        ]
+                    ),
+                    np.concatenate(
+                        [
+                            np.nan * np.zeros(len(time_history)),
+                            ArmaProcess(ar, magnitude).generate_sample(
+                                nsample=len(time_scenario)
+                            ),
+                        ]
+                    ),
+                ],
+            ]
+        ),
+        dims=targ_dims,
+        coords=targ_coords,
+    )
+
+    res_legacy = _do_legacy_run_train_gv(
+        esm_tas_global_variability,
+        cfg=_MockConfig(),
+    )
+
+    res_updated = (
+        calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
+            esm_tas_global_variability,
+            maxlag=2,
+        )
+    )
+
+    for key, comparison in (
+        ("intercept", res_legacy["AR_int"]),
+        ("lag_coefficients", res_legacy["AR_coefs"]),
+        ("standard_innovations", res_legacy["AR_std_innovs"]),
+    ):
+        npt.assert_allclose(res_updated[key], comparison)
+
+
 # things that aren't tested well:
 # - what happens if ensemble member and scenario don't actually make a coherent set
 # - units (should probably be using dataset rather than dataarray for inputs and outputs?)

From 14c429df8f3cf51f11bed6de5355543c733a78d2 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Wed, 27 Oct 2021 12:12:26 +1100
Subject: [PATCH 10/23] Clean up loops

---
 mesmer/calibrate_mesmer/train_lv.py    |   8 +-
 mesmer/prototype/calibrate_multiple.py | 132 ++++++++++++-------------
 2 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/mesmer/calibrate_mesmer/train_lv.py b/mesmer/calibrate_mesmer/train_lv.py
index 23fce905..938d19d5 100644
--- a/mesmer/calibrate_mesmer/train_lv.py
+++ b/mesmer/calibrate_mesmer/train_lv.py
@@ -251,6 +251,12 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):
             params_lv["AR1_std_innovs"][targ_name] += AR1_std_innovs_runs / nr_scens
 
         # determine localization radius, empirical cov matrix, and localized ecov matrix
+        import pdb
+
+        pdb.set_trace()
+        # y_targ has dimensions (draw (flattened version of time and scenario and ensemble member), gridpoint)
+        # wgt_scen_eq has dimensions (draw (flattened version of time and scenario and ensemble member),)
+        # aux["phi_gc"][year] has dimensions (gridpoint, gridpoint)
         (
             params_lv["L"][targ_name],
             params_lv["ecov"][targ_name],
@@ -361,6 +367,7 @@ def train_lv_find_localized_ecov(y, wgt_scen_eq, aux, cfg):
             # each cv sample gets its own likelihood -> can sum them up for overall
             # likelihood
             # sum over all samples = wgt average * nr_samples
+            # TODO: check why this can't be done with sum
             llh_cv_fold_sum = np.average(
                 llh_cv_each_sample, weights=wgt_scen_eq_cv
             ) * len(wgt_scen_eq_cv)
@@ -369,7 +376,6 @@ def train_lv_find_localized_ecov(y, wgt_scen_eq, aux, cfg):
             llh_cv_sum[L] += llh_cv_fold_sum
 
         idx_L += 1
-
         if llh_cv_sum[L] > llh_max:
             L_sel = L
             llh_max = llh_cv_sum[L]
diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 95166b7f..8e40f8ea 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 import xarray as xr
 
 from .calibrate import AutoRegression1DOrderSelection, AutoRegression1D
@@ -66,6 +67,21 @@ def flatten_predictors_and_target(predictors, target):
     return predictors_flattened, target_flattened, stack_coord_name
 
 
+def _loop_levels(inp, levels):
+    # annoyingly, there doesn't seem to be an inbuilt solution for this
+    # https://github.com/pydata/xarray/issues/2438
+    def _yield_level(inph, levels_left, out_names):
+        for name, values in inph.groupby(levels_left[0]):
+            out_names_here = out_names + [name]
+            if len(levels_left) == 1:
+                yield tuple(out_names_here), values
+            else:
+                yield from _yield_level(values, levels_left[1:], out_names_here)
+
+    for names, values in _yield_level(inp, levels, []):
+        yield names, values
+
+
 def _select_auto_regressive_process_order(
     target,
     maxlag,
@@ -81,27 +97,31 @@ def _select_auto_regressive_process_order(
         Passed to :func:`numpy.percentile`. Interpolation is not a good way to
         go here because it could lead to an AR order that wasn't actually chosen by any run. We recommend using the default value i.e. "nearest".
     """
-    order = []
-    for _, scenario_vals in target.groupby(scenario_level):
-        scenario_orders = []
-        for _, em_vals in scenario_vals.groupby(ensemble_member_level):
-            em_orders = AutoRegression1DOrderSelection().calibrate(
-                em_vals, maxlag=maxlag, ic=ic
-            )
-
-            if em_orders is not None:
-                scenario_orders.append(em_orders[-1])
-            else:
-                scenario_orders.append(0)
+    store = []
 
-        if scenario_orders:
-            order.append(
-                np.percentile(scenario_orders, q=q, interpolation=interpolation)
-            )
-        else:
-            order.append(0)
+    for (scenario, ensemble_member), values in _loop_levels(
+        target, (scenario_level, ensemble_member_level)
+    ):
+        orders = AutoRegression1DOrderSelection().calibrate(
+            values, maxlag=maxlag, ic=ic
+        )
+        keep_order = 0 if orders is None else orders[-1]
+        store.append(
+            {
+                "scenario": scenario,
+                "ensemble_member": ensemble_member,
+                "order": keep_order,
+            }
+        )
 
-    res = int(np.percentile(order, q=q, interpolation=interpolation))
+    store = pd.DataFrame(store).set_index(["scenario", "ensemble_member"])
+    res = (
+        store.groupby("scenario")["order"]
+        # first operation gives result by scenario (i.e. over ensemble members)
+        .quantile(q=q / 100, interpolation=interpolation)
+        # second one gives result over all scenarios
+        .quantile(q=q / 100, interpolation=interpolation)
+    )
 
     return res
 
@@ -109,28 +129,29 @@ def _select_auto_regressive_process_order(
 def _derive_auto_regressive_process_parameters(
     target, order, scenario_level="scenario", ensemble_member_level="ensemble_member"
 ):
-    # I don't like the fact that I'm duplicating these loops, surely there is a better way
-    parameters = {
-        "intercept": [],
-        "lag_coefficients": [],
-        "standard_innovations": [],
-    }
-    for _, scenario_vals in target.groupby(scenario_level):
-        scenario_parameters = {k: [] for k in parameters}
-        for _, em_vals in scenario_vals.groupby(ensemble_member_level):
-            em_parameters = AutoRegression1D().calibrate(em_vals, order=order)
-            for k, v in em_parameters.items():
-                scenario_parameters[k].append(v)
-
-        scenario_parameters_average = {
-            k: np.vstack(v).mean(axis=0) for k, v in scenario_parameters.items()
-        }
-        for k, v in scenario_parameters_average.items():
-            parameters[k].append(v)
-
-    parameters_average = {k: np.vstack(v).mean(axis=0) for k, v in parameters.items()}
-
-    return parameters_average
+    store = []
+    for (scenario, ensemble_member), values in _loop_levels(
+        target, (scenario_level, ensemble_member_level)
+    ):
+        parameters = AutoRegression1D().calibrate(values, order=order)
+        parameters["scenario"] = scenario
+        parameters["ensemble_member"] = ensemble_member
+        store.append(parameters)
+
+    store = pd.DataFrame(store).set_index(["scenario", "ensemble_member"])
+
+    def _axis_mean(inp):
+        return inp.apply(np.mean, axis=0)
+
+    res = (
+        store.groupby("scenario")
+        # first operation gives result by scenario (i.e. over ensemble members)
+        .apply(_axis_mean)
+        # second one gives result over all scenarios
+        .apply(np.mean, axis=0).to_dict()
+    )
+
+    return res
 
 
 def calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
@@ -142,32 +163,3 @@ def calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
     ar_params = _derive_auto_regressive_process_parameters(target, ar_order)
 
     return ar_params
-
-
-def calibrate_multiple_scenarios_and_ensemble_members(
-    targets, predictors, calibration_class, calibration_kwargs, weighting_style
-):
-    """
-    Calibrate based on multiple scenarios and ensemble members per scenario
-
-    Parameters
-    ----------
-    targets : xarray.DataArray
-        Target variables to calibrate to. This should have a scenario and an ensemble-member dimension (you'd need a different function to prepare things such that the dimensions all worked)
-
-    predictors : xarray.DataArray
-        Predictor variables, as above re comments about prepping data
-
-    calibration_class : mesmer.calibration.MesmerCalibrateBase
-        Class to use for calibration
-
-    calibration_kwargs : dict
-        Keyword arguments to pass to the calibration class
-
-    weighting_style : ["equal", "equal_scenario", "equal_ensemble_member"]
-        How to weight combinations of scenarios and ensemble members. "equal" --> all scenario - ensemble member combination are treated equally. "equal_scenario" --> all scenarios get equal weight (so if a scenario has more ensemble members then each ensemble member gets less weight). "equal_ensemble_member" --> all ensemble members get the same weight (so if an ensemble member is reported for more than one scenario then each scenario gets less weight for that ensemble member)
-    """
-    # this function (or class) would handle all of the looping over scenarios
-    # and ensemble members. It would call the calibration class on each
-    # scenario and ensemble member and know how much weight to give each
-    # before combining them

From 4fd57aae90c674a29c17cc1b46a101b3383ed4ee Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Wed, 27 Oct 2021 12:18:34 +1100
Subject: [PATCH 11/23] Remove pdb statement

---
 mesmer/calibrate_mesmer/train_lv.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mesmer/calibrate_mesmer/train_lv.py b/mesmer/calibrate_mesmer/train_lv.py
index 938d19d5..c023b964 100644
--- a/mesmer/calibrate_mesmer/train_lv.py
+++ b/mesmer/calibrate_mesmer/train_lv.py
@@ -251,9 +251,6 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):
             params_lv["AR1_std_innovs"][targ_name] += AR1_std_innovs_runs / nr_scens
 
         # determine localization radius, empirical cov matrix, and localized ecov matrix
-        import pdb
-
-        pdb.set_trace()
         # y_targ has dimensions (draw (flattened version of time and scenario and ensemble member), gridpoint)
         # wgt_scen_eq has dimensions (draw (flattened version of time and scenario and ensemble member),)
         # aux["phi_gc"][year] has dimensions (gridpoint, gridpoint)

From 1026161bbb6faccea29839fc26b0c7d038abac37 Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Wed, 24 Nov 2021 13:06:12 +1100
Subject: [PATCH 12/23] Sketch out test for train lv

---
 tests/integration/test_prototype.py | 55 +++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index bad805dd..db877d3c 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -5,6 +5,7 @@
 from statsmodels.tsa.arima_process import ArmaProcess
 
 from mesmer.calibrate_mesmer.train_lt import train_lt
+from mesmer.calibrate_mesmer.train_lv import train_lv
 from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.prototype.calibrate import LinearRegression
 from mesmer.prototype.calibrate_multiple import (
@@ -308,6 +309,60 @@ def test_prototype_train_gv(ar):
         npt.assert_allclose(res_updated[key], comparison)
 
 
+def _do_legacy_run_train_lv(
+    esm_tas_global_variability,
+    cfg,
+):
+    targs_legacy = {}
+
+    targs_legacy = {}
+    for scenario, vals_scen in esm_tas_global_variability.groupby("scenario"):
+        targs_legacy[scenario] = (
+            vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time").values
+        )
+
+    aux = {
+        "phi_gc": calculate_gaspari_cohn_correlation_function(),
+    }
+    res_legacy = train_lv(
+        preds={},
+        targs=targs_legacy,
+        esm="test",
+        cfg=cfg,
+        save_params=False,
+        aux=aux,
+        params_lv={},  # unclear why this is passed in
+    )
+
+    return res_legacy
+
+
+def test_prototype_train_lv():
+    # input is residual local variability we want to reproduce (for Leah, residual
+    # means after removing local trend and local variability due to global variability
+    # but it could be whatever in reality)
+
+    # also input the phi gc stuff (that's part of the calibration but doesn't go in the train
+    # lv function for some reason --> put it in calibrate_local_variability prototype function)
+
+    res_legacy = _do_legacy_run_train_lv(
+        esm_tas_residual_local_variability,
+        cfg=_MockConfig(),
+    )
+
+    res_updated = (
+        calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
+            esm_tas_global_variability,
+            maxlag=2,
+        )
+    )
+
+    for key, comparison in (
+        ("intercept", res_legacy["AR_int"]),
+        ("lag_coefficients", res_legacy["AR_coefs"]),
+        ("standard_innovations", res_legacy["AR_std_innovs"]),
+    ):
+        npt.assert_allclose(res_updated[key], comparison)
 # things that aren't tested well:
 # - what happens if ensemble member and scenario don't actually make a coherent set
 # - units (should probably be using dataset rather than dataarray for inputs and outputs?)

From d4e4e7fa1d0ae6ca0a8a47ba8c084403736cda6a Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Thu, 25 Nov 2021 18:04:27 +1100
Subject: [PATCH 13/23] More notes about how to do train lv

---
 tests/integration/test_prototype.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index db877d3c..91e3bd4a 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -310,13 +310,13 @@ def test_prototype_train_gv(ar):
 
 
 def _do_legacy_run_train_lv(
-    esm_tas_global_variability,
+    esm_tas_residual_local_variability,
     cfg,
 ):
     targs_legacy = {}
 
     targs_legacy = {}
-    for scenario, vals_scen in esm_tas_global_variability.groupby("scenario"):
+    for scenario, vals_scen in esm_tas_residual_local_variability.groupby("scenario"):
         targs_legacy[scenario] = (
             vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time").values
         )
@@ -337,13 +337,25 @@ def _do_legacy_run_train_lv(
     return res_legacy
 
 
+# how train_lv works:
+# 1. AR1 process for each individual gridpoint (use calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members)
+# 2. find localised empirical covariance matrix
+# 3. combine AR1 and localised empirical covariance matrix to get localised empirical covariance matrix
+#    of innovations (i.e. errors) which can be used for later draws (again, with a bit of custom code
+#    that feels like it should really be done using an existing library)
+
+
 def test_prototype_train_lv():
     # input is residual local variability we want to reproduce (for Leah, residual
     # means after removing local trend and local variability due to global variability
     # but it could be whatever in reality)
 
     # also input the phi gc stuff (that's part of the calibration but doesn't go in the train
-    # lv function for some reason --> put it in calibrate_local_variability prototype function)
+    # lv function for some reason --> put it in calibrate_local_variability prototype function
+    # although split that function to allow for pre-calculating distance between points in future
+    # as a performance boost)
+
+    # see how much code I can reuse for the AR1 calibration
 
     res_legacy = _do_legacy_run_train_lv(
         esm_tas_residual_local_variability,

From 3470ee43543fc088ef533ab4f274c6f0ee977c9b Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Thu, 25 Nov 2021 20:45:30 +1100
Subject: [PATCH 14/23] Start working on geodesic functions

---
 mesmer/prototype/utils.py           | 70 ++++++++++++++++++++++++++
 tests/integration/test_prototype.py | 78 +++++++++++++++++++++++++++--
 2 files changed, 144 insertions(+), 4 deletions(-)
 create mode 100644 mesmer/prototype/utils.py

diff --git a/mesmer/prototype/utils.py b/mesmer/prototype/utils.py
new file mode 100644
index 00000000..3c3fac79
--- /dev/null
+++ b/mesmer/prototype/utils.py
@@ -0,0 +1,70 @@
+import numpy as np
+import pyproj
+
+
+def calculate_gaspari_cohn_correlation_matrix(
+    latitudes,
+    longitudes,
+):
+    """
+    Calculate Gaspari-Cohn correlation matrix
+
+    Parameters
+    ----------
+    latitudes : :obj:`xr.DataArray`
+        Latitudes (one-dimensional)
+
+    longitudes : :obj:`xr.DataArray`
+        Longitudes (one-dimensional)
+    """
+    # I wonder if xarray can apply a function to all pairs of points in arrays
+    # or something
+    geodist = calculate_geodistance_exact(longitudes, latitudes)
+
+
+def calculate_geodistance_exact(latitudes, longitudes):
+    """
+    Calculate exact great circle distance based on WSG 84
+
+    Parameters
+    ----------
+    latitudes : :obj:`xr.DataArray`
+        Latitudes (one-dimensional)
+
+    longitudes : :obj:`xr.DataArray`
+        Longitudes (one-dimensional)
+
+    Returns
+    -------
+    geodist : np.array
+        2D array of great circle distances.
+    """
+    if longitudes.shape != latitudes.shape or longitudes.ndim != 1:
+        raise ValueError("lon and lat need to be 1D arrays of the same shape")
+
+    geod = pyproj.Geod(ellps="WGS84")
+
+    n_points = longitudes.shape[0]
+
+    geodist = np.zeros([n_points, n_points])
+
+    # calculate only the upper right half of the triangle first
+    for i in range(n_points):
+
+        # need to duplicate gridpoint (required by geod.inv)
+        lat = np.tile(latitudes[i], n_points - (i + 1))
+        lon = np.tile(longitudes[i], n_points - (i + 1))
+
+        geodist[i, i + 1 :] = geod.inv(lon, lat, longitudes[i + 1 :], latitudes[i + 1 :])[2]
+
+    # convert m to km
+    geodist /= 1000
+
+    # fill the lower left half of the triangle (in-place)
+    geodist += np.transpose(geodist)
+
+    # should be able to keep co-ordinate information here too
+    import pdb
+    pdb.set_trace()
+
+    return geodist
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 91e3bd4a..a3e54bfe 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -12,6 +12,7 @@
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
     flatten_predictors_and_target,
 )
+from mesmer.prototype.utils import calculate_gaspari_cohn_correlation_matrix
 
 
 class _MockConfig:
@@ -236,8 +237,6 @@ def test_prototype_train_gv(ar):
 
     scenarios = ["hist", "ssp126"]
 
-    # we wouldn't actually start like this, but we'd write a utils function
-    # to simply go from lat-lon to gridpoint and back
     targ_dims = ["scenario", "ensemble_member", "time"]
     targ_coords = dict(
         time=time,
@@ -318,11 +317,14 @@ def _do_legacy_run_train_lv(
     targs_legacy = {}
     for scenario, vals_scen in esm_tas_residual_local_variability.groupby("scenario"):
         targs_legacy[scenario] = (
-            vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time").values
+            vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time", "gridpoint").values
         )
 
     aux = {
-        "phi_gc": calculate_gaspari_cohn_correlation_function(),
+        "phi_gc": calculate_gaspari_cohn_correlation_matrix(
+            latitudes=esm_tas_residual_local_variability.lat,
+            longitudes=esm_tas_residual_local_variability.lon,
+        ),
     }
     res_legacy = train_lv(
         preds={},
@@ -356,6 +358,74 @@ def test_prototype_train_lv():
     # as a performance boost)
 
     # see how much code I can reuse for the AR1 calibration
+    time_history = range(1850, 2014 + 1)
+    time_scenario = range(2015, 2100 + 1)
+    time = list(time_history) + list(time_scenario)
+    scenarios = ["hist", "ssp126"]
+
+    # we wouldn't actually start like this, but we'd write a utils function
+    # to simply go from lat-lon to gridpoint and back
+    targ_dims = ["scenario", "ensemble_member", "gridpoint", "time"]
+    targ_coords = dict(
+        time=time,
+        scenario=scenarios,
+        ensemble_member=["r1i1p1f1", "r2i1p1f1"],
+        gridpoint=[0, 1],
+        lat=(["gridpoint"], [-60, 60]),
+        lon=(["gridpoint"], [120, 240]),
+    )
+
+    ar = np.array([1])
+    magnitude = np.array([0.05])
+
+    def _get_history_sample():
+        return np.concatenate(
+            [
+                ArmaProcess(ar, magnitude).generate_sample(
+                    nsample=len(time_history)
+                ),
+                np.nan * np.zeros(len(time_scenario)),
+            ]
+        )
+
+    def _get_scenario_sample():
+        return np.concatenate(
+            [
+                np.nan * np.zeros(len(time_history)),
+                ArmaProcess(ar, magnitude).generate_sample(
+                    nsample=len(time_scenario)
+                ),
+            ]
+        )
+
+    esm_tas_residual_local_variability = xr.DataArray(
+        np.array(
+            [
+                [
+                    [
+                        _get_history_sample(),
+                        _get_history_sample(),
+                    ],
+                    [
+                        _get_history_sample(),
+                        _get_history_sample(),
+                    ]
+                ],
+                [
+                    [
+                        _get_scenario_sample(),
+                        _get_scenario_sample(),
+                    ],
+                    [
+                        _get_scenario_sample(),
+                        _get_scenario_sample(),
+                    ],
+                ],
+            ]
+        ),
+        dims=targ_dims,
+        coords=targ_coords,
+    )
 
     res_legacy = _do_legacy_run_train_lv(
         esm_tas_residual_local_variability,

From 3b23a6da0b2b3a3941406c62afc43f0498a8cf9f Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Fri, 26 Nov 2021 15:58:26 +1100
Subject: [PATCH 15/23] Get legacy training running

---
 mesmer/prototype/utils.py           | 92 +++++++++++++++++++++++++----
 tests/integration/test_prototype.py | 33 +++++++----
 2 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/mesmer/prototype/utils.py b/mesmer/prototype/utils.py
index 3c3fac79..1f749cf9 100644
--- a/mesmer/prototype/utils.py
+++ b/mesmer/prototype/utils.py
@@ -1,13 +1,15 @@
 import numpy as np
 import pyproj
+import xarray as xr
 
 
-def calculate_gaspari_cohn_correlation_matrix(
+def calculate_gaspari_cohn_correlation_matrices(
     latitudes,
     longitudes,
+    localisation_radii,
 ):
     """
-    Calculate Gaspari-Cohn correlation matrix
+    Calculate Gaspari-Cohn correlation matrices for a range of localisation radiis
 
     Parameters
     ----------
@@ -16,10 +18,30 @@ def calculate_gaspari_cohn_correlation_matrix(
 
     longitudes : :obj:`xr.DataArray`
         Longitudes (one-dimensional)
+
+    localisation_radii : list-like
+        Localisation radii to test (in metres)
+
+    Returns
+    -------
+    dict[float: :obj:`xr.DataArray`]
+        Gaspari-Cohn correlation matrix (values) for each localisation radius (keys)
+
+    Notes
+    -----
+    Values in ``localisation_radii`` should not exceed 10'000 by much because
+    it can lead to ``ValueError: the input matrix must be positive semidefinite``
     """
     # I wonder if xarray can apply a function to all pairs of points in arrays
     # or something
-    geodist = calculate_geodistance_exact(longitudes, latitudes)
+    geodistance = calculate_geodistance_exact(latitudes, longitudes)
+
+    gaspari_cohn_correlation_matrices = {
+        lr : calculate_gaspari_cohn_values(geodistance / lr)
+        for lr in localisation_radii
+    }
+
+    return gaspari_cohn_correlation_matrices
 
 
 def calculate_geodistance_exact(latitudes, longitudes):
@@ -36,8 +58,9 @@ def calculate_geodistance_exact(latitudes, longitudes):
 
     Returns
     -------
-    geodist : np.array
-        2D array of great circle distances.
+    :obj:`xr.DataArray`
+        2D array of great circle distances between points represented by ``latitudes``
+        and ``longitudes``
     """
     if longitudes.shape != latitudes.shape or longitudes.ndim != 1:
         raise ValueError("lon and lat need to be 1D arrays of the same shape")
@@ -46,7 +69,7 @@ def calculate_geodistance_exact(latitudes, longitudes):
 
     n_points = longitudes.shape[0]
 
-    geodist = np.zeros([n_points, n_points])
+    geodistance = np.zeros([n_points, n_points])
 
     # calculate only the upper right half of the triangle first
     for i in range(n_points):
@@ -55,16 +78,59 @@ def calculate_geodistance_exact(latitudes, longitudes):
         lat = np.tile(latitudes[i], n_points - (i + 1))
         lon = np.tile(longitudes[i], n_points - (i + 1))
 
-        geodist[i, i + 1 :] = geod.inv(lon, lat, longitudes[i + 1 :], latitudes[i + 1 :])[2]
+        geodistance[i, i + 1 :] = geod.inv(lon, lat, longitudes.values[i + 1 :], latitudes.values[i + 1 :])[2]
 
     # convert m to km
-    geodist /= 1000
+    geodistance /= 1000
 
     # fill the lower left half of the triangle (in-place)
-    geodist += np.transpose(geodist)
+    geodistance += np.transpose(geodistance)
+
+    if latitudes.dims != longitudes.dims:
+        raise AssertionError(f"latitudes and longitudes have different dims: {latitudes.dims} vs. {longitudes.dims}")
+
+    geodistance = xr.DataArray(geodistance, dims=list(latitudes.dims) * 2, coords=latitudes.coords)
 
-    # should be able to keep co-ordinate information here too
-    import pdb
-    pdb.set_trace()
+    return geodistance
 
-    return geodist
+
+def calculate_gaspari_cohn_values(inputs):
+    """
+    Calculate smooth, exponentially decaying Gaspari-Cohn values
+
+    Parameters
+    ----------
+    inputs : :obj:`xr.DataArray`
+        Inputs at which to calculate the value of the smooth, exponentially decaying Gaspari-Cohn
+        correlation function (these could be e.g. normalised geographical distances)
+
+    Returns
+    -------
+    :obj:`xr.DataArray`
+        Gaspari-Cohn correlation function applied to each point in ``inputs``
+    """
+    inputs_abs = abs(inputs)
+    out = np.zeros_like(inputs)
+
+    sel_zero_to_one = (inputs_abs.values >= 0) & (inputs_abs.values < 1)
+    r_s = inputs_abs.values[sel_zero_to_one]
+    out[sel_zero_to_one] = (
+        1 - 5 / 3 * r_s ** 2 + 5 / 8 * r_s ** 3 + 1 / 2 * r_s ** 4 - 1 / 4 * r_s ** 5
+    )
+
+    sel_one_to_two = (inputs_abs.values >= 1) & (inputs_abs.values < 2)
+    r_s = inputs_abs.values[sel_one_to_two]
+
+    out[sel_one_to_two] = (
+        4
+        - 5 * r_s
+        + 5 / 3 * r_s ** 2
+        + 5 / 8 * r_s ** 3
+        - 1 / 2 * r_s ** 4
+        + 1 / 12 * r_s ** 5
+        - 2 / (3 * r_s)
+    )
+
+    out = xr.DataArray(out, dims=inputs.dims, coords=inputs.coords)
+
+    return out
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index a3e54bfe..9eb4f6eb 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -12,7 +12,7 @@
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
     flatten_predictors_and_target,
 )
-from mesmer.prototype.utils import calculate_gaspari_cohn_correlation_matrix
+from mesmer.prototype.utils import calculate_gaspari_cohn_correlation_matrices
 
 
 class _MockConfig:
@@ -24,6 +24,7 @@ def __init__(
         separate_gridpoints=True,
         weight_scenarios_equally=True,
         target_variable="tas",
+        cross_validation_max_iterations=30,
     ):
         self.methods = {}
         self.methods[target_variable] = {}
@@ -39,6 +40,8 @@ def __init__(
         self.method_lt_each_gp_sep = separate_gridpoints
         self.wgt_scen_tr_eq = weight_scenarios_equally
 
+        self.max_iter_cv = cross_validation_max_iterations
+
 
 def _do_legacy_run_train_lt(
     emulator_tas,
@@ -310,22 +313,25 @@ def test_prototype_train_gv(ar):
 
 def _do_legacy_run_train_lv(
     esm_tas_residual_local_variability,
+    localisation_radii,
     cfg,
 ):
-    targs_legacy = {}
-
-    targs_legacy = {}
+    targs_legacy = {"tas": {}}
     for scenario, vals_scen in esm_tas_residual_local_variability.groupby("scenario"):
-        targs_legacy[scenario] = (
+        targs_legacy["tas"][scenario] = (
             vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time", "gridpoint").values
         )
 
-    aux = {
-        "phi_gc": calculate_gaspari_cohn_correlation_matrix(
-            latitudes=esm_tas_residual_local_variability.lat,
-            longitudes=esm_tas_residual_local_variability.lon,
-        ),
+    gaspari_cohn_correlation_matrices = calculate_gaspari_cohn_correlation_matrices(
+        latitudes=esm_tas_residual_local_variability.lat,
+        longitudes=esm_tas_residual_local_variability.lon,
+        localisation_radii=localisation_radii,
+    )
+    gaspari_cohn_correlation_matrices = {
+        k: v.values for k, v in gaspari_cohn_correlation_matrices.items()
     }
+    aux = {"phi_gc": gaspari_cohn_correlation_matrices}
+
     res_legacy = train_lv(
         preds={},
         targs=targs_legacy,
@@ -357,6 +363,8 @@ def test_prototype_train_lv():
     # although split that function to allow for pre-calculating distance between points in future
     # as a performance boost)
 
+    localisation_radii = np.arange(700, 2000, 1000)
+
     # see how much code I can reuse for the AR1 calibration
     time_history = range(1850, 2014 + 1)
     time_scenario = range(2015, 2100 + 1)
@@ -429,13 +437,14 @@ def _get_scenario_sample():
 
     res_legacy = _do_legacy_run_train_lv(
         esm_tas_residual_local_variability,
+        localisation_radii,
         cfg=_MockConfig(),
     )
 
     res_updated = (
         calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
-            esm_tas_global_variability,
-            maxlag=2,
+            esm_tas_residual_local_variability,
+            localisation_radii,
         )
     )
 

From 88ec4ad5f9191f5bdb3f4356b5601af5b3deedaf Mon Sep 17 00:00:00 2001
From: Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>
Date: Fri, 26 Nov 2021 17:37:55 +1100
Subject: [PATCH 16/23] Finish reimplementing train_lv

---
 mesmer/prototype/calibrate_multiple.py | 115 +++++++++++++++++++++++++
 tests/integration/test_prototype.py    |   8 +-
 2 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 8e40f8ea..71c196fc 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pandas as pd
+import scipy.stats
 import xarray as xr
 
 from .calibrate import AutoRegression1DOrderSelection, AutoRegression1D
+from .utils import calculate_gaspari_cohn_correlation_matrices
 
 
 def _get_predictor_dims(predictors):
@@ -33,6 +35,15 @@ def _check_coords_match(obj, obj_other, check_coord):
         raise AssertionError(f"{check_coord} is not the same on {obj} and {obj_other}")
 
 
+def _flatten(inp, dims_to_flatten):
+    stack_coord_name = _get_stack_coord_name(inp)
+    inp_flat = inp.stack({stack_coord_name: dims_to_flatten}).dropna(
+        stack_coord_name
+    )
+
+    return inp_flat, stack_coord_name
+
+
 def _flatten_predictors(predictors, dims_to_flatten, stack_coord_name):
     predictors_flat = []
     for v, vals in predictors.items():
@@ -163,3 +174,107 @@ def calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
     ar_params = _derive_auto_regressive_process_parameters(target, ar_order)
 
     return ar_params
+
+
+def calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members(
+    target,
+    localisation_radii,
+    max_cross_validation_iterations=30,
+    gridpoint_dim_name="gridpoint",
+):
+    gridpoint_autoregression_parameters = {
+        gridpoint: _derive_auto_regressive_process_parameters(gridpoint_vals, order=1)
+        for gridpoint, gridpoint_vals in target.groupby("gridpoint")
+    }
+
+    gaspari_cohn_correlation_matrices = calculate_gaspari_cohn_correlation_matrices(
+        target.lat,
+        target.lon,
+        localisation_radii,
+    )
+
+    localised_empirical_covariance_matrix = _calculate_localised_empirical_covariance_matrix(
+        target,
+        localisation_radii,
+        gaspari_cohn_correlation_matrices,
+        max_cross_validation_iterations,
+        gridpoint_dim_name=gridpoint_dim_name,
+    )
+
+    gridpoint_autoregression_coeffcients = np.hstack([v["lag_coefficients"] for v in gridpoint_autoregression_parameters.values()])
+
+    localised_empirical_covariance_matrix_with_ar1_errors = (
+        (1 - gridpoint_autoregression_coeffcients ** 2)
+        * localised_empirical_covariance_matrix
+    )
+
+    return localised_empirical_covariance_matrix_with_ar1_errors
+
+
+def _calculate_localised_empirical_covariance_matrix(
+    target,
+    localisation_radii,
+    gaspari_cohn_correlation_matrices,
+    max_cross_validation_iterations,
+    gridpoint_dim_name="gridpoint",
+):
+    dims_to_flatten = [d for d in target.dims if d != gridpoint_dim_name]
+    target_flattened, stack_coord_name = _flatten(target, dims_to_flatten)
+    target_flattened = target_flattened.transpose(stack_coord_name, gridpoint_dim_name)
+
+    number_samples = target_flattened[stack_coord_name].shape[0]
+    number_iterations = min([number_samples, max_cross_validation_iterations])
+
+    # setup cross-validation stuff
+    index_cross_validation_out = np.zeros([number_iterations, number_samples], dtype=bool)
+
+    for i in range(number_iterations):
+        index_cross_validation_out[i, i::max_cross_validation_iterations] = True
+
+    # No idea what these are either
+    log_likelihood_cross_validation_sum_max = -10000
+
+    for lr in localisation_radii:
+        log_likelihood_cross_validation_sum = 0
+
+        for i in range(number_iterations):
+            # extract folds (no idea why these are called folds)
+            target_estimator = target_flattened.isel(**{stack_coord_name: ~index_cross_validation_out[i]}).values
+            target_cross_validation = target_flattened.isel(**{stack_coord_name: index_cross_validation_out[i]}).values
+            # selecting relevant weights goes in here
+
+            empirical_covariance = np.cov(target_estimator, rowvar=False)
+            # must be a better way to handle ensuring that the dimensions line up correctly (rather than
+            # just cheating by using `.values`)
+            empirical_covariance_localised = empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+
+            # calculate likelihood of cross validation samples
+            log_likelihood_cross_validation_samples = scipy.stats.multivariate_normal.logpdf(
+                target_cross_validation,
+                mean=np.zeros(gaspari_cohn_correlation_matrices[lr].shape[0]),
+                cov=empirical_covariance_localised,
+                allow_singular=True,
+            )
+            log_likelihood_cross_validation_samples_weighted_sum = np.average(
+                log_likelihood_cross_validation_samples,
+                # weights=wgt_scen_eq_cv # TODO: weights handling
+            ) * log_likelihood_cross_validation_samples.shape[0]
+
+            # add to full sum over all folds
+            log_likelihood_cross_validation_sum += log_likelihood_cross_validation_samples_weighted_sum
+
+        if log_likelihood_cross_validation_sum > log_likelihood_cross_validation_sum_max:
+            log_likelihood_cross_validation_sum_max = log_likelihood_cross_validation_sum
+        else:
+            # experience tells us that once we start selecting large localisation radii, performance
+            # will not improve ==> break (reduces computational effort and number of singular matrices
+            # encountered)
+            break
+
+    # TODO: replace print with logging
+    print(f"Selected localisation radius: {lr}")
+
+    empirical_covariance = np.cov(target_flattened.values, rowvar=False)
+    empirical_covariance_localised = empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+
+    return empirical_covariance_localised
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 9eb4f6eb..e4e8c9f5 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -10,6 +10,7 @@
 from mesmer.prototype.calibrate import LinearRegression
 from mesmer.prototype.calibrate_multiple import (
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
+    calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members,
     flatten_predictors_and_target,
 )
 from mesmer.prototype.utils import calculate_gaspari_cohn_correlation_matrices
@@ -442,18 +443,17 @@ def _get_scenario_sample():
     )
 
     res_updated = (
-        calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members(
+        calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members(
             esm_tas_residual_local_variability,
             localisation_radii,
         )
     )
 
     for key, comparison in (
-        ("intercept", res_legacy["AR_int"]),
-        ("lag_coefficients", res_legacy["AR_coefs"]),
-        ("standard_innovations", res_legacy["AR_std_innovs"]),
+        ("localised_empirical_covariance_matrix_with_ar1_errors", res_legacy["loc_ecov_AR1_innovs"]["tas"]),
     ):
         npt.assert_allclose(res_updated[key], comparison)
+
 # things that aren't tested well:
 # - what happens if ensemble member and scenario don't actually make a coherent set
 # - units (should probably be using dataset rather than dataarray for inputs and outputs?)

From 08a51348058fe2c180596920573c26d89bba79eb Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Thu, 21 Sep 2023 18:16:58 +0200
Subject: [PATCH 17/23] linting

---
 mesmer/prototype/calibrate_multiple.py | 85 ++++++++++++++++----------
 mesmer/prototype/utils.py              | 25 +++++---
 tests/integration/test_prototype.py    | 32 +++++-----
 3 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 71c196fc..049b1ce6 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -3,7 +3,7 @@
 import scipy.stats
 import xarray as xr
 
-from .calibrate import AutoRegression1DOrderSelection, AutoRegression1D
+from .calibrate import AutoRegression1D, AutoRegression1DOrderSelection
 from .utils import calculate_gaspari_cohn_correlation_matrices
 
 
@@ -37,9 +37,7 @@ def _check_coords_match(obj, obj_other, check_coord):
 
 def _flatten(inp, dims_to_flatten):
     stack_coord_name = _get_stack_coord_name(inp)
-    inp_flat = inp.stack({stack_coord_name: dims_to_flatten}).dropna(
-        stack_coord_name
-    )
+    inp_flat = inp.stack({stack_coord_name: dims_to_flatten}).dropna(stack_coord_name)
 
     return inp_flat, stack_coord_name
 
@@ -193,20 +191,23 @@ def calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_
         localisation_radii,
     )
 
-    localised_empirical_covariance_matrix = _calculate_localised_empirical_covariance_matrix(
-        target,
-        localisation_radii,
-        gaspari_cohn_correlation_matrices,
-        max_cross_validation_iterations,
-        gridpoint_dim_name=gridpoint_dim_name,
+    localised_empirical_covariance_matrix = (
+        _calculate_localised_empirical_covariance_matrix(
+            target,
+            localisation_radii,
+            gaspari_cohn_correlation_matrices,
+            max_cross_validation_iterations,
+            gridpoint_dim_name=gridpoint_dim_name,
+        )
     )
 
-    gridpoint_autoregression_coeffcients = np.hstack([v["lag_coefficients"] for v in gridpoint_autoregression_parameters.values()])
+    gridpoint_autoregression_coeffcients = np.hstack(
+        [v["lag_coefficients"] for v in gridpoint_autoregression_parameters.values()]
+    )
 
     localised_empirical_covariance_matrix_with_ar1_errors = (
-        (1 - gridpoint_autoregression_coeffcients ** 2)
-        * localised_empirical_covariance_matrix
-    )
+        1 - gridpoint_autoregression_coeffcients**2
+    ) * localised_empirical_covariance_matrix
 
     return localised_empirical_covariance_matrix_with_ar1_errors
 
@@ -226,7 +227,9 @@ def _calculate_localised_empirical_covariance_matrix(
     number_iterations = min([number_samples, max_cross_validation_iterations])
 
     # setup cross-validation stuff
-    index_cross_validation_out = np.zeros([number_iterations, number_samples], dtype=bool)
+    index_cross_validation_out = np.zeros(
+        [number_iterations, number_samples], dtype=bool
+    )
 
     for i in range(number_iterations):
         index_cross_validation_out[i, i::max_cross_validation_iterations] = True
@@ -239,32 +242,50 @@ def _calculate_localised_empirical_covariance_matrix(
 
         for i in range(number_iterations):
             # extract folds (no idea why these are called folds)
-            target_estimator = target_flattened.isel(**{stack_coord_name: ~index_cross_validation_out[i]}).values
-            target_cross_validation = target_flattened.isel(**{stack_coord_name: index_cross_validation_out[i]}).values
+            target_estimator = target_flattened.isel(
+                **{stack_coord_name: ~index_cross_validation_out[i]}
+            ).values
+            target_cross_validation = target_flattened.isel(
+                **{stack_coord_name: index_cross_validation_out[i]}
+            ).values
             # selecting relevant weights goes in here
 
             empirical_covariance = np.cov(target_estimator, rowvar=False)
             # must be a better way to handle ensuring that the dimensions line up correctly (rather than
             # just cheating by using `.values`)
-            empirical_covariance_localised = empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+            empirical_covariance_localised = (
+                empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+            )
 
             # calculate likelihood of cross validation samples
-            log_likelihood_cross_validation_samples = scipy.stats.multivariate_normal.logpdf(
-                target_cross_validation,
-                mean=np.zeros(gaspari_cohn_correlation_matrices[lr].shape[0]),
-                cov=empirical_covariance_localised,
-                allow_singular=True,
+            log_likelihood_cross_validation_samples = (
+                scipy.stats.multivariate_normal.logpdf(
+                    target_cross_validation,
+                    mean=np.zeros(gaspari_cohn_correlation_matrices[lr].shape[0]),
+                    cov=empirical_covariance_localised,
+                    allow_singular=True,
+                )
+            )
+            log_likelihood_cross_validation_samples_weighted_sum = (
+                np.average(
+                    log_likelihood_cross_validation_samples,
+                    # weights=wgt_scen_eq_cv # TODO: weights handling
+                )
+                * log_likelihood_cross_validation_samples.shape[0]
             )
-            log_likelihood_cross_validation_samples_weighted_sum = np.average(
-                log_likelihood_cross_validation_samples,
-                # weights=wgt_scen_eq_cv # TODO: weights handling
-            ) * log_likelihood_cross_validation_samples.shape[0]
 
             # add to full sum over all folds
-            log_likelihood_cross_validation_sum += log_likelihood_cross_validation_samples_weighted_sum
+            log_likelihood_cross_validation_sum += (
+                log_likelihood_cross_validation_samples_weighted_sum
+            )
 
-        if log_likelihood_cross_validation_sum > log_likelihood_cross_validation_sum_max:
-            log_likelihood_cross_validation_sum_max = log_likelihood_cross_validation_sum
+        if (
+            log_likelihood_cross_validation_sum
+            > log_likelihood_cross_validation_sum_max
+        ):
+            log_likelihood_cross_validation_sum_max = (
+                log_likelihood_cross_validation_sum
+            )
         else:
             # experience tells us that once we start selecting large localisation radii, performance
             # will not improve ==> break (reduces computational effort and number of singular matrices
@@ -275,6 +296,8 @@ def _calculate_localised_empirical_covariance_matrix(
     print(f"Selected localisation radius: {lr}")
 
     empirical_covariance = np.cov(target_flattened.values, rowvar=False)
-    empirical_covariance_localised = empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+    empirical_covariance_localised = (
+        empirical_covariance * gaspari_cohn_correlation_matrices[lr].values
+    )
 
     return empirical_covariance_localised
diff --git a/mesmer/prototype/utils.py b/mesmer/prototype/utils.py
index 1f749cf9..397bb5ed 100644
--- a/mesmer/prototype/utils.py
+++ b/mesmer/prototype/utils.py
@@ -37,8 +37,7 @@ def calculate_gaspari_cohn_correlation_matrices(
     geodistance = calculate_geodistance_exact(latitudes, longitudes)
 
     gaspari_cohn_correlation_matrices = {
-        lr : calculate_gaspari_cohn_values(geodistance / lr)
-        for lr in localisation_radii
+        lr: calculate_gaspari_cohn_values(geodistance / lr) for lr in localisation_radii
     }
 
     return gaspari_cohn_correlation_matrices
@@ -78,7 +77,9 @@ def calculate_geodistance_exact(latitudes, longitudes):
         lat = np.tile(latitudes[i], n_points - (i + 1))
         lon = np.tile(longitudes[i], n_points - (i + 1))
 
-        geodistance[i, i + 1 :] = geod.inv(lon, lat, longitudes.values[i + 1 :], latitudes.values[i + 1 :])[2]
+        geodistance[i, i + 1 :] = geod.inv(
+            lon, lat, longitudes.values[i + 1 :], latitudes.values[i + 1 :]
+        )[2]
 
     # convert m to km
     geodistance /= 1000
@@ -87,9 +88,13 @@ def calculate_geodistance_exact(latitudes, longitudes):
     geodistance += np.transpose(geodistance)
 
     if latitudes.dims != longitudes.dims:
-        raise AssertionError(f"latitudes and longitudes have different dims: {latitudes.dims} vs. {longitudes.dims}")
+        raise AssertionError(
+            f"latitudes and longitudes have different dims: {latitudes.dims} vs. {longitudes.dims}"
+        )
 
-    geodistance = xr.DataArray(geodistance, dims=list(latitudes.dims) * 2, coords=latitudes.coords)
+    geodistance = xr.DataArray(
+        geodistance, dims=list(latitudes.dims) * 2, coords=latitudes.coords
+    )
 
     return geodistance
 
@@ -115,7 +120,7 @@ def calculate_gaspari_cohn_values(inputs):
     sel_zero_to_one = (inputs_abs.values >= 0) & (inputs_abs.values < 1)
     r_s = inputs_abs.values[sel_zero_to_one]
     out[sel_zero_to_one] = (
-        1 - 5 / 3 * r_s ** 2 + 5 / 8 * r_s ** 3 + 1 / 2 * r_s ** 4 - 1 / 4 * r_s ** 5
+        1 - 5 / 3 * r_s**2 + 5 / 8 * r_s**3 + 1 / 2 * r_s**4 - 1 / 4 * r_s**5
     )
 
     sel_one_to_two = (inputs_abs.values >= 1) & (inputs_abs.values < 2)
@@ -124,10 +129,10 @@ def calculate_gaspari_cohn_values(inputs):
     out[sel_one_to_two] = (
         4
         - 5 * r_s
-        + 5 / 3 * r_s ** 2
-        + 5 / 8 * r_s ** 3
-        - 1 / 2 * r_s ** 4
-        + 1 / 12 * r_s ** 5
+        + 5 / 3 * r_s**2
+        + 5 / 8 * r_s**3
+        - 1 / 2 * r_s**4
+        + 1 / 12 * r_s**5
         - 2 / (3 * r_s)
     )
 
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index e4e8c9f5..0f250896 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -4,9 +4,9 @@
 import xarray as xr
 from statsmodels.tsa.arima_process import ArmaProcess
 
+from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.calibrate_mesmer.train_lt import train_lt
 from mesmer.calibrate_mesmer.train_lv import train_lv
-from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.prototype.calibrate import LinearRegression
 from mesmer.prototype.calibrate_multiple import (
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
@@ -110,7 +110,7 @@ def test_prototype_train_lt():
         dims=pred_dims,
         coords=pred_coords,
     )
-    emulator_tas_squared = emulator_tas ** 2
+    emulator_tas_squared = emulator_tas**2
     global_variability = xr.DataArray(
         np.array(
             [
@@ -320,7 +320,9 @@ def _do_legacy_run_train_lv(
     targs_legacy = {"tas": {}}
     for scenario, vals_scen in esm_tas_residual_local_variability.groupby("scenario"):
         targs_legacy["tas"][scenario] = (
-            vals_scen.T.dropna(dim="time").transpose("ensemble_member", "time", "gridpoint").values
+            vals_scen.T.dropna(dim="time")
+            .transpose("ensemble_member", "time", "gridpoint")
+            .values
         )
 
     gaspari_cohn_correlation_matrices = calculate_gaspari_cohn_correlation_matrices(
@@ -390,9 +392,7 @@ def test_prototype_train_lv():
     def _get_history_sample():
         return np.concatenate(
             [
-                ArmaProcess(ar, magnitude).generate_sample(
-                    nsample=len(time_history)
-                ),
+                ArmaProcess(ar, magnitude).generate_sample(nsample=len(time_history)),
                 np.nan * np.zeros(len(time_scenario)),
             ]
         )
@@ -401,9 +401,7 @@ def _get_scenario_sample():
         return np.concatenate(
             [
                 np.nan * np.zeros(len(time_history)),
-                ArmaProcess(ar, magnitude).generate_sample(
-                    nsample=len(time_scenario)
-                ),
+                ArmaProcess(ar, magnitude).generate_sample(nsample=len(time_scenario)),
             ]
         )
 
@@ -418,7 +416,7 @@ def _get_scenario_sample():
                     [
                         _get_history_sample(),
                         _get_history_sample(),
-                    ]
+                    ],
                 ],
                 [
                     [
@@ -442,18 +440,20 @@ def _get_scenario_sample():
         cfg=_MockConfig(),
     )
 
-    res_updated = (
-        calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members(
-            esm_tas_residual_local_variability,
-            localisation_radii,
-        )
+    res_updated = calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members(
+        esm_tas_residual_local_variability,
+        localisation_radii,
     )
 
     for key, comparison in (
-        ("localised_empirical_covariance_matrix_with_ar1_errors", res_legacy["loc_ecov_AR1_innovs"]["tas"]),
+        (
+            "localised_empirical_covariance_matrix_with_ar1_errors",
+            res_legacy["loc_ecov_AR1_innovs"]["tas"],
+        ),
     ):
         npt.assert_allclose(res_updated[key], comparison)
 
+
 # things that aren't tested well:
 # - what happens if ensemble member and scenario don't actually make a coherent set
 # - units (should probably be using dataset rather than dataarray for inputs and outputs?)

From fe1d1bcd61631b4ade407a9487a6c02c712f735b Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Thu, 21 Sep 2023 18:18:31 +0200
Subject: [PATCH 18/23] fix: test_prototype_train_lv

---
 tests/integration/test_prototype.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 0f250896..e23b3418 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -445,13 +445,8 @@ def _get_scenario_sample():
         localisation_radii,
     )
 
-    for key, comparison in (
-        (
-            "localised_empirical_covariance_matrix_with_ar1_errors",
-            res_legacy["loc_ecov_AR1_innovs"]["tas"],
-        ),
-    ):
-        npt.assert_allclose(res_updated[key], comparison)
+    # check localised_empirical_covariance_matrix_with_ar1_errors
+    npt.assert_allclose(res_updated, res_legacy["loc_ecov_AR1_innovs"]["tas"])
 
 
 # things that aren't tested well:

From c854fa480618844c34c1de5024ff4a8baf91e577 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Thu, 21 Sep 2023 23:28:20 +0200
Subject: [PATCH 19/23] clean train_lt.py

---
 mesmer/calibrate_mesmer/train_lt.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mesmer/calibrate_mesmer/train_lt.py b/mesmer/calibrate_mesmer/train_lt.py
index 05e1dfe6..198d970e 100644
--- a/mesmer/calibrate_mesmer/train_lt.py
+++ b/mesmer/calibrate_mesmer/train_lt.py
@@ -89,6 +89,7 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
           assumption is fulfilled or rewrite code such that no longer necessary)
 
     """
+
     targ_names = list(targs.keys())
     targ_name = targ_names[0]  # because same approach for each targ
     pred_names = list(preds.keys())
@@ -185,9 +186,6 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
     else:
         raise NotImplementedError()
 
-    else:
-        raise NotImplementedError()
-
     # save the local trend paramters if requested
     if save_params:
         save_mesmer_data(
@@ -225,5 +223,5 @@ def train_lt(preds, targs, esm, cfg, save_params=True):
 
     if len(params_lv) > 0:
         return params_lt, params_lv
-
-    raise NotImplementedError("This path is never used")
+    else:
+        return params_lt

From c3bf5e094a22aa88cfcdbf4a37678b54d99b228a Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Thu, 21 Sep 2023 23:54:54 +0200
Subject: [PATCH 20/23] remove prototype/utils.py after #298, #299, and #300

---
 mesmer/prototype/calibrate_multiple.py |  12 ++-
 mesmer/prototype/utils.py              | 141 -------------------------
 tests/integration/test_prototype.py    |  15 ++-
 3 files changed, 17 insertions(+), 151 deletions(-)
 delete mode 100644 mesmer/prototype/utils.py

diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 049b1ce6..a6d847e1 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -3,8 +3,11 @@
 import scipy.stats
 import xarray as xr
 
+from ..core.computation import (
+    calc_gaspari_cohn_correlation_matrices,
+    calc_geodist_exact,
+)
 from .calibrate import AutoRegression1D, AutoRegression1DOrderSelection
-from .utils import calculate_gaspari_cohn_correlation_matrices
 
 
 def _get_predictor_dims(predictors):
@@ -185,10 +188,9 @@ def calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_
         for gridpoint, gridpoint_vals in target.groupby("gridpoint")
     }
 
-    gaspari_cohn_correlation_matrices = calculate_gaspari_cohn_correlation_matrices(
-        target.lat,
-        target.lon,
-        localisation_radii,
+    geodist = calc_geodist_exact(target.lon, target.lat)
+    gaspari_cohn_correlation_matrices = calc_gaspari_cohn_correlation_matrices(
+        geodist, localisation_radii
     )
 
     localised_empirical_covariance_matrix = (
diff --git a/mesmer/prototype/utils.py b/mesmer/prototype/utils.py
deleted file mode 100644
index 397bb5ed..00000000
--- a/mesmer/prototype/utils.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import numpy as np
-import pyproj
-import xarray as xr
-
-
-def calculate_gaspari_cohn_correlation_matrices(
-    latitudes,
-    longitudes,
-    localisation_radii,
-):
-    """
-    Calculate Gaspari-Cohn correlation matrices for a range of localisation radiis
-
-    Parameters
-    ----------
-    latitudes : :obj:`xr.DataArray`
-        Latitudes (one-dimensional)
-
-    longitudes : :obj:`xr.DataArray`
-        Longitudes (one-dimensional)
-
-    localisation_radii : list-like
-        Localisation radii to test (in metres)
-
-    Returns
-    -------
-    dict[float: :obj:`xr.DataArray`]
-        Gaspari-Cohn correlation matrix (values) for each localisation radius (keys)
-
-    Notes
-    -----
-    Values in ``localisation_radii`` should not exceed 10'000 by much because
-    it can lead to ``ValueError: the input matrix must be positive semidefinite``
-    """
-    # I wonder if xarray can apply a function to all pairs of points in arrays
-    # or something
-    geodistance = calculate_geodistance_exact(latitudes, longitudes)
-
-    gaspari_cohn_correlation_matrices = {
-        lr: calculate_gaspari_cohn_values(geodistance / lr) for lr in localisation_radii
-    }
-
-    return gaspari_cohn_correlation_matrices
-
-
-def calculate_geodistance_exact(latitudes, longitudes):
-    """
-    Calculate exact great circle distance based on WSG 84
-
-    Parameters
-    ----------
-    latitudes : :obj:`xr.DataArray`
-        Latitudes (one-dimensional)
-
-    longitudes : :obj:`xr.DataArray`
-        Longitudes (one-dimensional)
-
-    Returns
-    -------
-    :obj:`xr.DataArray`
-        2D array of great circle distances between points represented by ``latitudes``
-        and ``longitudes``
-    """
-    if longitudes.shape != latitudes.shape or longitudes.ndim != 1:
-        raise ValueError("lon and lat need to be 1D arrays of the same shape")
-
-    geod = pyproj.Geod(ellps="WGS84")
-
-    n_points = longitudes.shape[0]
-
-    geodistance = np.zeros([n_points, n_points])
-
-    # calculate only the upper right half of the triangle first
-    for i in range(n_points):
-
-        # need to duplicate gridpoint (required by geod.inv)
-        lat = np.tile(latitudes[i], n_points - (i + 1))
-        lon = np.tile(longitudes[i], n_points - (i + 1))
-
-        geodistance[i, i + 1 :] = geod.inv(
-            lon, lat, longitudes.values[i + 1 :], latitudes.values[i + 1 :]
-        )[2]
-
-    # convert m to km
-    geodistance /= 1000
-
-    # fill the lower left half of the triangle (in-place)
-    geodistance += np.transpose(geodistance)
-
-    if latitudes.dims != longitudes.dims:
-        raise AssertionError(
-            f"latitudes and longitudes have different dims: {latitudes.dims} vs. {longitudes.dims}"
-        )
-
-    geodistance = xr.DataArray(
-        geodistance, dims=list(latitudes.dims) * 2, coords=latitudes.coords
-    )
-
-    return geodistance
-
-
-def calculate_gaspari_cohn_values(inputs):
-    """
-    Calculate smooth, exponentially decaying Gaspari-Cohn values
-
-    Parameters
-    ----------
-    inputs : :obj:`xr.DataArray`
-        Inputs at which to calculate the value of the smooth, exponentially decaying Gaspari-Cohn
-        correlation function (these could be e.g. normalised geographical distances)
-
-    Returns
-    -------
-    :obj:`xr.DataArray`
-        Gaspari-Cohn correlation function applied to each point in ``inputs``
-    """
-    inputs_abs = abs(inputs)
-    out = np.zeros_like(inputs)
-
-    sel_zero_to_one = (inputs_abs.values >= 0) & (inputs_abs.values < 1)
-    r_s = inputs_abs.values[sel_zero_to_one]
-    out[sel_zero_to_one] = (
-        1 - 5 / 3 * r_s**2 + 5 / 8 * r_s**3 + 1 / 2 * r_s**4 - 1 / 4 * r_s**5
-    )
-
-    sel_one_to_two = (inputs_abs.values >= 1) & (inputs_abs.values < 2)
-    r_s = inputs_abs.values[sel_one_to_two]
-
-    out[sel_one_to_two] = (
-        4
-        - 5 * r_s
-        + 5 / 3 * r_s**2
-        + 5 / 8 * r_s**3
-        - 1 / 2 * r_s**4
-        + 1 / 12 * r_s**5
-        - 2 / (3 * r_s)
-    )
-
-    out = xr.DataArray(out, dims=inputs.dims, coords=inputs.coords)
-
-    return out
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index e23b3418..efef96a5 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -7,13 +7,16 @@
 from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.calibrate_mesmer.train_lt import train_lt
 from mesmer.calibrate_mesmer.train_lv import train_lv
+from mesmer.core.computation import (
+    calc_gaspari_cohn_correlation_matrices,
+    calc_geodist_exact,
+)
 from mesmer.prototype.calibrate import LinearRegression
 from mesmer.prototype.calibrate_multiple import (
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
     calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_scenarios_and_ensemble_members,
     flatten_predictors_and_target,
 )
-from mesmer.prototype.utils import calculate_gaspari_cohn_correlation_matrices
 
 
 class _MockConfig:
@@ -325,11 +328,13 @@ def _do_legacy_run_train_lv(
             .values
         )
 
-    gaspari_cohn_correlation_matrices = calculate_gaspari_cohn_correlation_matrices(
-        latitudes=esm_tas_residual_local_variability.lat,
-        longitudes=esm_tas_residual_local_variability.lon,
-        localisation_radii=localisation_radii,
+    geodist = calc_geodist_exact(
+        esm_tas_residual_local_variability.lon, esm_tas_residual_local_variability.lat
+    )
+    gaspari_cohn_correlation_matrices = calc_gaspari_cohn_correlation_matrices(
+        geodist, localisation_radii
     )
+
     gaspari_cohn_correlation_matrices = {
         k: v.values for k, v in gaspari_cohn_correlation_matrices.items()
     }

From 26f67619c37d2f4a4435151ab7766c3232919001 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Mon, 25 Sep 2023 12:16:39 +0200
Subject: [PATCH 21/23] allow selected ar order to be None

---
 mesmer/prototype/calibrate_multiple.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index a6d847e1..3a6babd3 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -117,7 +117,10 @@ def _select_auto_regressive_process_order(
         orders = AutoRegression1DOrderSelection().calibrate(
             values, maxlag=maxlag, ic=ic
         )
-        keep_order = 0 if orders is None else orders[-1]
+
+        # orders can be None
+        keep_order = np.nan if orders is None else orders[-1]
+
         store.append(
             {
                 "scenario": scenario,

From d3eb99d59630335ba78fa5a1e2b4cbe85d712d9d Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Tue, 12 Dec 2023 16:27:39 +0100
Subject: [PATCH 22/23] fix for gaspari_cohn and geodist_exact

---
 mesmer/prototype/calibrate_multiple.py | 10 ++++------
 tests/integration/test_prototype.py    |  9 +++------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/mesmer/prototype/calibrate_multiple.py b/mesmer/prototype/calibrate_multiple.py
index 3a6babd3..4b12f6b6 100644
--- a/mesmer/prototype/calibrate_multiple.py
+++ b/mesmer/prototype/calibrate_multiple.py
@@ -3,10 +3,8 @@
 import scipy.stats
 import xarray as xr
 
-from ..core.computation import (
-    calc_gaspari_cohn_correlation_matrices,
-    calc_geodist_exact,
-)
+import mesmer
+
 from .calibrate import AutoRegression1D, AutoRegression1DOrderSelection
 
 
@@ -191,8 +189,8 @@ def calibrate_auto_regressive_process_with_spatially_correlated_errors_multiple_
         for gridpoint, gridpoint_vals in target.groupby("gridpoint")
     }
 
-    geodist = calc_geodist_exact(target.lon, target.lat)
-    gaspari_cohn_correlation_matrices = calc_gaspari_cohn_correlation_matrices(
+    geodist = mesmer.geospatial.geodist_exact(target.lon, target.lat)
+    gaspari_cohn_correlation_matrices = mesmer.stats.gaspari_cohn_correlation_matrices(
         geodist, localisation_radii
     )
 
diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index efef96a5..49fc6750 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -4,13 +4,10 @@
 import xarray as xr
 from statsmodels.tsa.arima_process import ArmaProcess
 
+import mesmer
 from mesmer.calibrate_mesmer.train_gv import train_gv
 from mesmer.calibrate_mesmer.train_lt import train_lt
 from mesmer.calibrate_mesmer.train_lv import train_lv
-from mesmer.core.computation import (
-    calc_gaspari_cohn_correlation_matrices,
-    calc_geodist_exact,
-)
 from mesmer.prototype.calibrate import LinearRegression
 from mesmer.prototype.calibrate_multiple import (
     calibrate_auto_regressive_process_multiple_scenarios_and_ensemble_members,
@@ -328,10 +325,10 @@ def _do_legacy_run_train_lv(
             .values
         )
 
-    geodist = calc_geodist_exact(
+    geodist = mesmer.geospatial.geodist_exact(
         esm_tas_residual_local_variability.lon, esm_tas_residual_local_variability.lat
     )
-    gaspari_cohn_correlation_matrices = calc_gaspari_cohn_correlation_matrices(
+    gaspari_cohn_correlation_matrices = mesmer.stats.gaspari_cohn_correlation_matrices(
         geodist, localisation_radii
     )
 

From 8f19cd5ac91b094e8a6ec12b2975a948db13eb44 Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Tue, 12 Dec 2023 17:33:36 +0100
Subject: [PATCH 23/23] small refactor

---
 tests/integration/test_prototype.py | 94 +++++++++++++----------------
 1 file changed, 42 insertions(+), 52 deletions(-)

diff --git a/tests/integration/test_prototype.py b/tests/integration/test_prototype.py
index 49fc6750..123de595 100644
--- a/tests/integration/test_prototype.py
+++ b/tests/integration/test_prototype.py
@@ -1,5 +1,4 @@
 import numpy as np
-import numpy.testing as npt
 import pytest
 import xarray as xr
 from statsmodels.tsa.arima_process import ArmaProcess
@@ -196,7 +195,7 @@ def test_prototype_train_lt():
         ("global_variability", res_legacy[1]["coef_gvtas"]["tas"]),
         ("intercept", res_legacy[0]["intercept"]["tas"]),
     ):
-        npt.assert_allclose(res_updated.sel(predictor=updated_name), legacy_vals)
+        np.testing.assert_allclose(res_updated.sel(predictor=updated_name), legacy_vals)
 
 
 def _do_legacy_run_train_gv(
@@ -233,9 +232,9 @@ def _do_legacy_run_train_gv(
     ),
 )
 def test_prototype_train_gv(ar):
-    time_history = range(1850, 2014 + 1)
-    time_scenario = range(2015, 2100 + 1)
-    time = list(time_history) + list(time_scenario)
+    time_history = np.arange(1850, 2014 + 1)
+    time_scenario = np.arange(2015, 2100 + 1)
+    time = np.concatenate((time_history, time_scenario))
 
     magnitude = np.array([0.1])
 
@@ -247,47 +246,38 @@ def test_prototype_train_gv(ar):
         scenario=scenarios,
         ensemble_member=["r1i1p1f1", "r2i1p1f1"],
     )
-    esm_tas_global_variability = xr.DataArray(
-        np.array(
+
+    def _get_history_sample():
+        return np.concatenate(
             [
-                [
-                    np.concatenate(
-                        [
-                            ArmaProcess(ar, magnitude).generate_sample(
-                                nsample=len(time_history)
-                            ),
-                            np.nan * np.zeros(len(time_scenario)),
-                        ]
-                    ),
-                    np.concatenate(
-                        [
-                            ArmaProcess(ar, magnitude).generate_sample(
-                                nsample=len(time_history)
-                            ),
-                            np.nan * np.zeros(len(time_scenario)),
-                        ]
-                    ),
-                ],
-                [
-                    np.concatenate(
-                        [
-                            np.nan * np.zeros(len(time_history)),
-                            ArmaProcess(ar, magnitude).generate_sample(
-                                nsample=len(time_scenario)
-                            ),
-                        ]
-                    ),
-                    np.concatenate(
-                        [
-                            np.nan * np.zeros(len(time_history)),
-                            ArmaProcess(ar, magnitude).generate_sample(
-                                nsample=len(time_scenario)
-                            ),
-                        ]
-                    ),
-                ],
+                ArmaProcess(ar, magnitude).generate_sample(nsample=time_history.size),
+                np.full(time_scenario.size, np.nan),
             ]
-        ),
+        )
+
+    def _get_scenario_sample():
+        return np.concatenate(
+            [
+                np.full(time_history.size, np.nan),
+                ArmaProcess(ar, magnitude).generate_sample(nsample=len(time_scenario)),
+            ]
+        )
+
+    data = np.array(
+        [
+            [
+                _get_history_sample(),
+                _get_history_sample(),
+            ],
+            [
+                _get_scenario_sample(),
+                _get_scenario_sample(),
+            ],
+        ],
+    )
+
+    esm_tas_global_variability = xr.DataArray(
+        data,
         dims=targ_dims,
         coords=targ_coords,
     )
@@ -309,7 +299,7 @@ def test_prototype_train_gv(ar):
         ("lag_coefficients", res_legacy["AR_coefs"]),
         ("standard_innovations", res_legacy["AR_std_innovs"]),
     ):
-        npt.assert_allclose(res_updated[key], comparison)
+        np.testing.assert_allclose(res_updated[key], comparison)
 
 
 def _do_legacy_run_train_lv(
@@ -371,9 +361,9 @@ def test_prototype_train_lv():
     localisation_radii = np.arange(700, 2000, 1000)
 
     # see how much code I can reuse for the AR1 calibration
-    time_history = range(1850, 2014 + 1)
-    time_scenario = range(2015, 2100 + 1)
-    time = list(time_history) + list(time_scenario)
+    time_history = np.arange(1850, 2014 + 1)
+    time_scenario = np.arange(2015, 2100 + 1)
+    time = np.concatenate((time_history, time_scenario))
     scenarios = ["hist", "ssp126"]
 
     # we wouldn't actually start like this, but we'd write a utils function
@@ -394,15 +384,15 @@ def test_prototype_train_lv():
     def _get_history_sample():
         return np.concatenate(
             [
-                ArmaProcess(ar, magnitude).generate_sample(nsample=len(time_history)),
-                np.nan * np.zeros(len(time_scenario)),
+                ArmaProcess(ar, magnitude).generate_sample(nsample=time_history.size),
+                np.full(time_scenario.size, np.nan),
             ]
         )
 
     def _get_scenario_sample():
         return np.concatenate(
             [
-                np.nan * np.zeros(len(time_history)),
+                np.full(time_history.size, np.nan),
                 ArmaProcess(ar, magnitude).generate_sample(nsample=len(time_scenario)),
             ]
         )
@@ -448,7 +438,7 @@ def _get_scenario_sample():
     )
 
     # check localised_empirical_covariance_matrix_with_ar1_errors
-    npt.assert_allclose(res_updated, res_legacy["loc_ecov_AR1_innovs"]["tas"])
+    np.testing.assert_allclose(res_updated, res_legacy["loc_ecov_AR1_innovs"]["tas"])
 
 
 # things that aren't tested well: