From b5739eb53495265d1c9a3842862dcf1891ed9f9a Mon Sep 17 00:00:00 2001
From: "Andrew M. Bradley" <ambrad@cs.stanford.edu>
Date: Wed, 21 Jul 2021 00:27:38 -0600
Subject: [PATCH] Methods for the Islet paper.

---
 methods/islet/Makefile                        |   35 +
 methods/islet/cslunstab.cpp                   |  765 ++++++++++++
 methods/islet/figures/amb3.hy                 |  870 ++++++++++++++
 methods/islet/figures/figs-adv-diag.hy        | 1035 +++++++++++++++++
 methods/islet/figures/figs-methods.hy         |  584 ++++++++++
 methods/islet/figures/figs.tex                |  376 ++++++
 methods/islet/figures/figsutils.hy            |  225 ++++
 methods/islet/figures/islet.hy                |   41 +
 methods/islet/figures/poly.hy                 |  190 +++
 methods/islet/figures/run-accuracy.sh         |   58 +
 methods/islet/figures/run-img-filament.sh     |   34 +
 methods/islet/figures/run-isl-footprint.sh    |   35 +
 methods/islet/figures/run-mixing.sh           |   40 +
 .../figures/run-pg-srcterm-midpoint-test.sh   |   36 +
 methods/islet/figures/run-stability-cmp.sh    |   32 +
 .../islet/figures/run-toychem-diagnostic.sh   |   34 +
 methods/islet/figures/run-toychem-imgs.sh     |   23 +
 methods/islet/figures/sl-gpu-perf.hy          |  221 ++++
 methods/islet/islet_interpmethod.hpp          |   14 +
 methods/islet/islet_isl.cpp                   |  364 ++++++
 methods/islet/islet_isl.hpp                   |   54 +
 methods/islet/islet_maxeigcomp.cpp            |  616 ++++++++++
 methods/islet/islet_maxeigcomp.hpp            |   79 ++
 methods/islet/islet_nodalbasis.cpp            |  195 ++++
 methods/islet/islet_nodalbasis.hpp            |   59 +
 methods/islet/islet_np4.cpp                   |   46 +
 methods/islet/islet_np4.hpp                   |   22 +
 methods/islet/islet_npx.cpp                   |   35 +
 methods/islet/islet_npx.hpp                   |  137 +++
 methods/islet/islet_pum.cpp                   |  226 ++++
 methods/islet/islet_pum.hpp                   |   56 +
 methods/islet/islet_studymetrics.cpp          |  134 +++
 methods/islet/islet_studymetrics.hpp          |    6 +
 methods/islet/islet_tables.cpp                |  268 +++++
 methods/islet/islet_tables.hpp                |   27 +
 methods/islet/islet_types.hpp                 |   12 +
 methods/islet/islet_util.hpp                  |  264 +++++
 methods/islet/islet_xnodes_metrics.cpp        |  257 ++++
 methods/islet/islet_xnodes_metrics.hpp        |   53 +
 methods/islet/make-depends.sh                 |    3 +
 methods/islet/make.depends                    |   40 +
 methods/islet/make.inc.gnu                    |    2 +
 methods/islet/pum_sweep.cpp                   |   56 +
 methods/islet/readme.txt                      |   77 ++
 methods/islet/run_meam1_sweep.cpp             |   48 +
 methods/islet/run_np4.cpp                     |  270 +++++
 methods/islet/search.cpp                      |  962 +++++++++++++++
 methods/slmm/Makefile                         |   29 +-
 methods/slmm/make-depends.sh                  |    3 +
 methods/slmm/make.depends                     |  220 ++++
 methods/slmm/slmm_basis.cpp                   |    2 +-
 methods/slmm/slmm_islet.cpp                   |   60 +-
 methods/slmm/slmm_runtests.py                 |  292 +++++
 methods/slmm/slmmir_remapper_isl.cpp          |    6 +-
 54 files changed, 9544 insertions(+), 84 deletions(-)
 create mode 100644 methods/islet/Makefile
 create mode 100644 methods/islet/cslunstab.cpp
 create mode 100644 methods/islet/figures/amb3.hy
 create mode 100644 methods/islet/figures/figs-adv-diag.hy
 create mode 100644 methods/islet/figures/figs-methods.hy
 create mode 100644 methods/islet/figures/figs.tex
 create mode 100644 methods/islet/figures/figsutils.hy
 create mode 100644 methods/islet/figures/islet.hy
 create mode 100644 methods/islet/figures/poly.hy
 create mode 100644 methods/islet/figures/run-accuracy.sh
 create mode 100644 methods/islet/figures/run-img-filament.sh
 create mode 100644 methods/islet/figures/run-isl-footprint.sh
 create mode 100644 methods/islet/figures/run-mixing.sh
 create mode 100644 methods/islet/figures/run-pg-srcterm-midpoint-test.sh
 create mode 100644 methods/islet/figures/run-stability-cmp.sh
 create mode 100644 methods/islet/figures/run-toychem-diagnostic.sh
 create mode 100644 methods/islet/figures/run-toychem-imgs.sh
 create mode 100644 methods/islet/figures/sl-gpu-perf.hy
 create mode 100644 methods/islet/islet_interpmethod.hpp
 create mode 100644 methods/islet/islet_isl.cpp
 create mode 100644 methods/islet/islet_isl.hpp
 create mode 100644 methods/islet/islet_maxeigcomp.cpp
 create mode 100644 methods/islet/islet_maxeigcomp.hpp
 create mode 100644 methods/islet/islet_nodalbasis.cpp
 create mode 100644 methods/islet/islet_nodalbasis.hpp
 create mode 100644 methods/islet/islet_np4.cpp
 create mode 100644 methods/islet/islet_np4.hpp
 create mode 100644 methods/islet/islet_npx.cpp
 create mode 100644 methods/islet/islet_npx.hpp
 create mode 100644 methods/islet/islet_pum.cpp
 create mode 100644 methods/islet/islet_pum.hpp
 create mode 100644 methods/islet/islet_studymetrics.cpp
 create mode 100644 methods/islet/islet_studymetrics.hpp
 create mode 100644 methods/islet/islet_tables.cpp
 create mode 100644 methods/islet/islet_tables.hpp
 create mode 100644 methods/islet/islet_types.hpp
 create mode 100644 methods/islet/islet_util.hpp
 create mode 100644 methods/islet/islet_xnodes_metrics.cpp
 create mode 100644 methods/islet/islet_xnodes_metrics.hpp
 create mode 100644 methods/islet/make-depends.sh
 create mode 100644 methods/islet/make.depends
 create mode 100644 methods/islet/make.inc.gnu
 create mode 100644 methods/islet/pum_sweep.cpp
 create mode 100644 methods/islet/readme.txt
 create mode 100644 methods/islet/run_meam1_sweep.cpp
 create mode 100644 methods/islet/run_np4.cpp
 create mode 100644 methods/islet/search.cpp
 create mode 100644 methods/slmm/make-depends.sh
 create mode 100644 methods/slmm/make.depends
 create mode 100755 methods/slmm/slmm_runtests.py

diff --git a/methods/islet/Makefile b/methods/islet/Makefile
new file mode 100644
index 0000000..10fac11
--- /dev/null
+++ b/methods/islet/Makefile
@@ -0,0 +1,35 @@
+include make.inc
+
+SOURCES = islet_isl.cpp islet_tables.cpp islet_npx.cpp islet_maxeigcomp.cpp islet_xnodes_metrics.cpp islet_pum.cpp islet_studymetrics.cpp islet_nodalbasis.cpp islet_np4.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+.cpp.o:
+	$(CXX) $(CXXFLAGS) -c $< -o $@
+
+all: libislet search np4 pum_sweep run_meam1_sweep cslunstab
+
+libislet: $(OBJECTS)
+	$(CXX) $(OBJECTS) $(LDFLAGS) -fopenmp -shared $(LINK_LAPACK_BLAS) -o libislet.so
+
+search: libislet search.o
+	$(CXX) search.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o search
+
+np4: libislet run_np4.o
+	$(CXX) run_np4.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o np4
+
+pum_perturb_plot: libislet pum_perturb_plot.o
+	$(CXX) pum_perturb_plot.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o pum_perturb_plot
+
+run_meam1_sweep: libislet run_meam1_sweep.o
+	$(CXX) run_meam1_sweep.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o run_meam1_sweep
+
+pum_sweep: libislet pum_sweep.o
+	$(CXX) pum_sweep.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o pum_sweep
+
+cslunstab: cslunstab.o
+	$(CXX) cslunstab.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -fopenmp -o cslunstab
+
+clean:
+	rm -f *.o *.so search np4 pum_sweep pum_perturb_plot run_meam1_sweep
+
+# generate by running `bash make.depends`
+include make.depends
diff --git a/methods/islet/cslunstab.cpp b/methods/islet/cslunstab.cpp
new file mode 100644
index 0000000..aec8ace
--- /dev/null
+++ b/methods/islet/cslunstab.cpp
@@ -0,0 +1,765 @@
+/* This standalone program implements the example unstable 1D and 2D problems
+   that use the classical cubic interpolation semi-Lagrangian method.
+
+   Build it:
+     g++ -O3 -c cslunstab.cpp
+     g++ cslunstab.o -llapack -lblas -o cslunstab
+   Run it:
+     ./cslunstab
+   There will be no output if all the assertions pass. The two lines
+     require(cubic1d_demo_unstable_problem() >= 1 + 1e-3);
+   and
+     require(cubic2d_demo_unstable_problem() >= 1 + 1e-2);
+   assert that the maximum eigenvalue magnitude is at least 1 + a small amount
+   in each problem.
+ */
+
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include <algorithm>
+#include <stdexcept>
+#include <sstream>
+
+using Int = int;
+using Real = double;
+
+#define require(condition) do {                                         \
+    if ( ! (condition)) {                                               \
+      std::stringstream _ss_;                                           \
+      _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition  \
+           << "\n";                                                     \
+      throw std::logic_error(_ss_.str());                               \
+    }                                                                   \
+  } while (0)
+#define require_msg(condition, message) do {                            \
+    if ( ! (condition)) {                                               \
+      std::stringstream _ss_;                                           \
+      _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition  \
+           << "\nmessage:\n" << message << "\n";                        \
+      throw std::logic_error(_ss_.str());                               \
+    }                                                                   \
+  } while (0)
+
+bool eq(const std::string& a, const char* const b1, const char* const b2 = 0);
+
+template <typename T> T square (const T& x) { return x*x; }
+
+inline Real reldif (const Real& a, const Real& b)
+{ return std::abs(b - a)/std::max(std::abs(a), std::abs(b)); }
+
+template <typename T1, typename T2>
+inline bool equal (const T1& a, const T2& b) {
+  if (a != b)
+    printf("equal: a,b = %23.16e %23.16e re = %23.16e\n",
+           Real(a), Real(b), std::abs((a-b)/Real(a)));
+  return a == b;
+}
+
+template <typename T1, typename T2>
+inline bool almost_equal (const T1& a, const T2& b, const Real tol) {
+  const auto re = std::abs(a-b)/(1.0 + std::abs(a));
+  const bool good = re <= tol;
+  if ( ! good)
+    printf("equal: a,b = %23.16e %23.16e re = %23.16e tol %9.2e\n",
+           Real(a), Real(b), re, tol);
+  return good;
+}
+
+inline double urand () { return rand() / ((double) RAND_MAX + 1.0); }
+
+extern "C" void dgeev_(char* jobvl, char* jobvr, int* n, double* a, int* lda,
+                       double* wr, double* wi,
+                       double* vl, int* ldvl,
+                       double* vr, int* ldvr,
+                       double* work, int* lwork, int* info);
+
+void dgeev (int n, double* a, int lda,
+            double* wr, double* wi,
+            std::vector<double>& work, int& info,
+            double* vl = nullptr, int ldvl = 1,
+            double* vr = nullptr, int ldvr = 1) {
+  int lwork = 10*n;
+  if (static_cast<int>(work.size()) < lwork) work.resize(lwork);
+  char jobvl = vl ? 'v' : 'n';
+  char jobvr = vr ? 'v' : 'n';
+  assert(vl == nullptr || (ldvl >= n));
+  assert(vr == nullptr || (ldvr >= n));
+  dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr,
+         work.data(), &lwork, &info);
+}
+
+Real calc_max_eig_amp (int n, double* a, int lda, std::vector<double>& work) {
+  work.resize(12*n);
+  Real* const wr = work.data() + 10*n;
+  Real* const wi = work.data() + 11*n;
+  int info;
+  dgeev(n, a, lda, wr, wi, work, info);
+  Real mea = 0;
+  for (int i = 0; i < n; ++i) mea = std::max(mea, std::sqrt(square(wr[i]) + square(wi[i])));
+  return mea;
+}
+
+static void eval_lagrange_poly_basis (const Int& n, const Real* xsup, const Real& x,
+                                      Real* const y) {
+  for (int i = 0; i < n; ++i) {
+    Real f = 1;
+    for (int j = 0; j < n; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - xsup[j]) / (xsup[i] - xsup[j]);
+    y[i] = f;
+  }
+}
+
+static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup,
+                                const Real& x) {
+  Real y = 0;
+  for (int i = 0; i < n; ++i) {
+    Real f = 1;
+    for (int j = 0; j < n; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - xsup[j]) / (xsup[i] - xsup[j]);
+    y += f*ysup[i];
+  }
+  return y;
+}
+
+// Move x to its in-bounds periodic point.
+static Real calc_periodic_x (const Real& x0, const Real& x1, const Real x) {
+  if (x >= x0 && x <= x1) return x;
+  const auto D = x1 - x0;
+  auto xper = (x - x0)/D;
+  xper = x0 + (xper - std::floor(xper))*D;
+  assert(xper >= x0);
+  assert(xper <= x1);
+  return xper;
+}
+
+// Find xper's cell.
+static Int find_cell (const Real* const xnode, const Int nreg, const Real& xper) {
+  if (xper == xnode[0]) return 0;
+  const Int iper = static_cast<Int>(
+    std::lower_bound(xnode, xnode + nreg + 1, xper)
+    - xnode)
+    - 1;
+  assert(iper >= 0   );
+  assert(iper <  nreg);
+  assert(xper >= xnode[iper  ]);
+  assert(xper <= xnode[iper+1]);
+  return iper;
+}
+
+// xnode[0] is identical to xnode[nreg]. ynode[0] must equal ynode[nreg]. We
+// take ynode[nreg] as input rather than use ynode[0] to be sure the caller has
+// set up a periodic problem.
+static Real periodic_cubic1d_interp (
+  const Int nreg, const Real* const xnode, const Real* const ynode, const Real& xeval)
+{
+  assert(ynode[0] == ynode[nreg]);
+
+  const auto xper = calc_periodic_x(xnode[0], xnode[nreg], xeval);
+  const auto iper = find_cell(xnode, nreg, xper);
+
+  Real xsup[4], ysup[4];
+  if (iper == 0) {
+    xsup[0] = xnode[0] - (xnode[nreg] - xnode[nreg-1]);
+    ysup[0] = ynode[nreg-1];
+    for (int i = 1; i < 4; ++i) xsup[i] = xnode[iper-1+i];
+    for (int i = 1; i < 4; ++i) ysup[i] = ynode[iper-1+i];
+  } else if (iper == nreg-1) {
+    for (int i = 0; i < 3; ++i) xsup[i] = xnode[iper-1+i];
+    for (int i = 0; i < 3; ++i) ysup[i] = ynode[iper-1+i];
+    xsup[3] = xnode[nreg] + (xnode[1] - xnode[0]);
+    ysup[3] = ynode[1];
+  } else {
+    for (int i = 0; i < 4; ++i) xsup[i] = xnode[iper-1+i];
+    for (int i = 0; i < 4; ++i) ysup[i] = ynode[iper-1+i];
+  }
+
+  return eval_lagrange_poly(4, xsup, ysup, xper);
+}
+
+// Very straightforward, inefficient impl.
+void periodic_cubic1d_make_translation_matrix (
+  const Int nreg, const Real* const xnode, const Real& xoffset, Real* op, Real* wrk)
+{
+  for (Int i = 0; i < nreg; ++i) wrk[i] = 0;
+  for (Int si = 0; si < nreg; ++si) {
+    wrk[si] = 1;
+    if (si == 0) wrk[nreg] = 1;
+    for (Int ti = 0; ti < nreg; ++ti)
+      op[nreg*si + ti] = periodic_cubic1d_interp(nreg, xnode, wrk, xnode[ti] + xoffset);
+    wrk[si] = 0;
+    if (si == 0) wrk[nreg] = 0;
+  }
+}
+
+// Demo a configuration that has associated max eig amp >= 1 + 1e-3.
+static Real cubic1d_demo_unstable_problem () {
+  const Real x[] = {0, 0.11242, 0.44817, 0.78392, 0.88737, 1};
+  const Int nreg = sizeof(x)/sizeof(*x) - 1;
+  const Real xoffset = 0.33575;
+  std::vector<Real> op(nreg*nreg), wrk(nreg+1);
+  periodic_cubic1d_make_translation_matrix(nreg, x, xoffset, op.data(), wrk.data());
+  const auto mea = calc_max_eig_amp(nreg, op.data(), nreg, wrk);
+  return mea;
+}
+
+static void cubic1d_unittest () {
+  const auto eps = std::numeric_limits<Real>::epsilon();
+
+  {
+    require(equal(calc_periodic_x(-0.1, 1.3, 0.7), 0.7));
+    require(equal(calc_periodic_x(-0.1, 1.3, 1.3), 1.3));
+    require(equal(calc_periodic_x(-0.1, 1.3, -0.1), -0.1));
+    require(almost_equal(calc_periodic_x(-0.1, 1.3, 1.4), 0, eps));
+    const auto x = calc_periodic_x(-0.1, 1.3, 2.7);
+    require(almost_equal(x, -0.1, eps) || almost_equal(x, 1.2, eps));
+    require(almost_equal(calc_periodic_x(1.1, 1.3, 1.4), 1.2, eps));
+  }
+
+  { // In the interior, a cubic is recovered exactly.
+    const auto f = [&] (const Real& x) { return ((((-1.2*x) + 0.1)*x) - 0.3)*x + 11; };
+    const Int nreg = 5, nsamp = 100;
+    std::vector<Real> x(nreg+1), y(nreg+1);
+    for (Int i = 0; i <= nreg; ++i) x[i] = 2*urand() - 1;
+    std::sort(x.begin(), x.end());
+    for (Int i = 0; i < nreg; ++i) y[i] = f(x[i]);
+    y[nreg] = y[0]; // y[nreg] doesn't influence this test.
+    const auto D = x[nreg-2] - x[1];
+    for (Int i = 0; i < nsamp; ++i) {
+      const auto xs = x[1] + i*D/(nsamp - 1);
+      const auto ys = periodic_cubic1d_interp(nreg, x.data(), y.data(), xs);
+      const auto yt = f(xs);
+      require(almost_equal(ys, yt, 10*eps));
+    }
+  }
+
+  { // At each sample point, error decreases at least as fast as nreg^-4 in the
+    // 1-norm, nreg^-3 pointwise.
+    const Real xl = -1.2, xu = 4.2, L = xu - xl;
+    const auto f = [&] (const Real& x) { return std::cos(2*M_PI*(x - xl)/L); };
+    const Int nsamp = 1111;
+    std::vector<Real> xs(nsamp);
+    std::vector<std::vector<Real> > yss(2);
+    for (Int i = 0; i < nsamp; ++i) xs[i] = xl + i*L/(nsamp - 1);
+    for (Int nreg : {31, 80, 211}) {
+      Int cnt = 0;
+      for (int refine = 0; refine < 2; ++refine) {
+        nreg *= 2;
+        std::vector<Real> x(nreg+1), y(nreg+1);
+        for (Int i = 0; i <= nreg; ++i) x[i] = xl + i*L/nreg;
+        for (Int i = 0; i <= nreg; ++i) y[i] = f(x[i]);
+        auto& ys = yss[refine];
+        ys.resize(nsamp);
+        for (Int i = 0; i < nsamp; ++i)
+          ys[i] = periodic_cubic1d_interp(nreg, x.data(), y.data(), xs[i]);
+      }
+      Real err[2] = {0};
+      for (Int i = 0; i < nsamp; ++i) {
+        const auto yt = f(xs[i]);
+        const auto e1 = std::abs(yss[0][i] - yt);
+        const auto e2 = std::abs(yss[1][i] - yt);
+        require(e1 >= e2);
+        if ( ! (e1 == e2 || e1 >= 8*e2))
+          ++cnt;
+        if (i < nsamp-1) {
+          err[0] += e1;
+          err[1] += e2;
+        }
+      }
+      // A few points not showing 3rd-order convergence is OK, since this means
+      // the less-accurate solution is more accurate than anticipated at these
+      // points.
+      require(cnt < 0.1*nsamp);
+      require(err[0] > 15.9*err[1]);
+      require(err[0] < 16.1*err[1]);
+    }
+  }
+
+  { // The matrix gives the same answer as calls to periodic_cubic1d_interp.
+    const Int nreg = 17;
+    std::vector<Real> x(nreg+1), op(nreg*nreg), wrk(nreg+1),
+      ys(nreg+1), yt1(nreg+1), yt2(nreg+1);
+
+    for (Int i = 0; i <= nreg; ++i) x[i] = 2*urand() - 1;
+    std::sort(x.begin(), x.end());
+    for (Int i = 0; i <  nreg; ++i) ys[i] = 2*urand() - 1;
+    ys[nreg] = ys[0];
+    
+    for (const auto xoffset : {0.0, 0.01, -0.02, 0.1, -0.42, 1.7, -4.2}) {
+      periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data());
+      for (Int i = 0; i < nreg; ++i) yt1[i] = 0;
+      for (Int j = 0; j < nreg; ++j)
+        for (Int i = 0; i < nreg; ++i)
+          yt1[i] += op[nreg*j + i]*ys[j];
+      for (Int i = 0; i < nreg; ++i)
+        yt2[i] = periodic_cubic1d_interp(nreg, x.data(), ys.data(), x[i] + xoffset);
+      for (Int i = 0; i < nreg; ++i)
+        require(almost_equal(yt1[i], yt2[i], 10*eps));
+    }
+  }
+
+  { // The matrix for a uniform grid has max eig 1.
+    const Int nreg = 6;
+    std::vector<Real> x(nreg+1), op(nreg*nreg), wrk(nreg+1);
+    for (Int i = 0; i <= nreg; ++i) x[i] = i;
+    const Real xoffset = 1.2;
+    periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data());
+    const auto mea = calc_max_eig_amp(nreg, op.data(), nreg, wrk);
+    require(almost_equal(mea, 1, 2*eps));
+  }
+
+  { // Cubic ISL on the uniform-grid periodic translation problem has OOA 3.
+    const Real xl = -4.2, xu = 1.7, L = xu - xl;
+    const auto f = [&] (const Real& x) { return std::cos(2*M_PI*(x - xl)/L); };
+    const auto error = [&] (const std::vector<Real>& x, const std::vector<Real>& y) {
+      Real num = 0, den = 0;
+      // Ignore the last point, which is periodic; we don't update it.
+      for (size_t i = 0; i < y.size(); ++i) {
+        const auto yt = f(x[i]);
+        num += square(y[i] - yt);
+        den += square(yt);
+      }
+      return std::sqrt(num/den);
+    };
+    Int nstep = 37;
+    Int nreg = 20;
+    Real e[2];
+    for (Int refine = 0; refine < 2; ++refine) {
+      nreg *= 2;
+      nstep *= 2;
+      const Real xoffset = L/nstep;
+      std::vector<Real> x(nreg+1), op(nreg*nreg), wrk(nreg+1);
+      std::vector<Real> ys[2];
+      // Size is nreg rather than nreg+1 because we don't maintain the periodic
+      // point y[nreg] = y[0].
+      for (Int k = 0; k < 2; ++k) ys[k].resize(nreg);
+      // Make the space-time operator A.
+      for (Int i = 0; i <= nreg; ++i) x[i] = xl + i*L/nreg;
+      periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data());
+      // Initial condition.
+      Int i0 = 0, i1 = 1;
+      for (Int i = 0; i < nreg; ++i) ys[i0][i] = f(x[i]);
+      for (Int si = 0; si < nstep; ++si) {
+        // y1 = A y0
+        for (Int i = 0; i < nreg; ++i) ys[i1][i] = 0;
+        for (Int j = 0; j < nreg; ++j)
+          for (Int i = 0; i < nreg; ++i)
+            ys[i1][i] += op[nreg*j + i]*ys[i0][j];
+        // At a time t < T, check that the error is large.
+        if (si == nstep/2) require(error(x, ys[i1]) > 1);
+        std::swap(i0, i1);
+      }
+      e[refine] = error(x, ys[i0]);
+    }
+    // At time T, the error decreases with OOA 3.
+    require(e[0] > 7.95*e[1]);
+    require(e[0] < 8.05*e[1]);
+    // Check that we solved the problem to reasonable accuracy.
+    require(e[1] <= 1e-3);
+  }
+
+  // The primary purpose of periodic_cubic1d_make_translation_matrix is to
+  // demonstrate that there is a 1D periodic translation problem for which the
+  // associated classical cubic ISL space-time matrix has maximum eigenvalue
+  // amplitude > 1. In this demo, it is > 1 + 1e-3.
+  require(cubic1d_demo_unstable_problem() >= 1 + 1e-3);
+}
+
+struct NonUniMesh1d {
+  using Array = std::vector<Real>;
+
+  NonUniMesh1d (const Real* xb, const Int ne)
+    : ne_(ne), L_(xb[ne] - xb[0]), xb_(ne+1)
+  {
+    std::copy(xb, xb+ne+1, xb_.begin());
+  }
+
+  Int get_ne () const { return ne_; }
+
+  const Array& get_xb() const { return xb_; }
+
+  Real to_periodic (const Real& x) const {
+    if (x >= xb_[0] && x <= xb_[ne_]) return x;
+    auto y = (x - xb_[0])/L_;
+    y = y - std::floor(y);
+    return xb_[0] + y*L_;
+  }
+
+  Int in_cell (const Real& x) const {
+    const auto xp = to_periodic(x);
+    if (xp <= xb_[0]) return 0;
+    const Int iper = std::lower_bound(xb_.begin(), xb_.end(), xp) - xb_.begin() - 1;
+    assert(iper >= 0  );
+    assert(iper <  ne_);
+    assert(xp >= xb_[iper  ]);
+    assert(xp <= xb_[iper+1]);
+    return iper;
+  }
+
+  Real to_physical (const Int& ie) const { return xb_[ie]; }
+
+  static Int unittest () {
+    const Real eps = std::numeric_limits<Real>::epsilon();
+    Int ne = 0;
+    std::vector<Real> xb({-1, 1, 5.});
+    NonUniMesh1d m(xb.data(), 2);
+    if (m.in_cell(1.1) != 1) ++ne;
+    if (m.in_cell(-1) != 0) ++ne;
+    if (reldif(m.to_periodic(5.1), -0.9) > 10*eps) ++ne;
+    if (m.in_cell(5.1) != 0) ++ne;
+    if (m.in_cell(-1.1) != 1) ++ne;
+    return ne;
+  }
+
+private:
+  const Int ne_;
+  const Real L_;
+  Array xb_;
+};
+
+class NonUniMesh2d {
+  const NonUniMesh1d mx_, my_;
+  const Int nx_;
+
+public:
+  NonUniMesh2d (const Real* xb, const Int nx, const Real* yb, const Int ny)
+    : mx_(xb, nx), my_(yb, ny), nx_(nx)
+  {}
+
+  Int get_ne () const { return mx_.get_ne() * my_.get_ne(); }
+
+  const NonUniMesh1d& get_mx () const { return mx_; }
+  const NonUniMesh1d& get_my () const { return my_; }
+
+  void to_periodic (const Real& x, const Real& y,
+                    Real& xp, Real& yp) const {
+    xp = mx_.to_periodic(x);
+    yp = my_.to_periodic(y);
+  }
+
+  void ncell (Int& nx, Int& ny) const {
+    nx = nx_;
+    ny = my_.get_ne();
+  }
+
+  Int in_cell (const Real& x, const Real& y) const {
+    const Int ix = mx_.in_cell(x), iy = my_.in_cell(y);
+    return iy*nx_ + ix;
+  }
+
+  void to_physical (const Int& ci, Real& x, Real& y) const {
+    const Int yci = ci / nx_;
+    const Int xci = ci % nx_;
+    x = mx_.to_physical(xci);
+    y = my_.to_physical(yci);
+  }
+
+  static Int unittest () {
+    const Real eps = std::numeric_limits<Real>::epsilon();
+    Int ne = 0;
+    std::vector<Real> xb({-1, 1, 5.}), yb({-1, 1, 6.});
+    NonUniMesh2d m(xb.data(), 2, yb.data(), 2);
+    if (m.in_cell(1.1, 1.1) != 3) ++ne;
+    if (m.in_cell(5, 6) != 3) ++ne;
+    if (m.in_cell(-1, 1) != 0) ++ne;
+    if (m.in_cell(1, 1) != 0) ++ne;
+    Real xp, yp;
+    m.to_periodic(5.1, -1.2, xp, yp);
+    if (reldif(xp, -0.9) > 10*eps) ++ne;
+    if (reldif(yp, 5.8) > 10*eps) ++ne;
+    if (m.in_cell(5.1, 6.1) != 0) ++ne;
+    if (m.in_cell(-1.1, -1.1) != 3) ++ne;
+    return ne;
+  }
+};
+
+// Sparse matrix data structures and operations.
+struct SparseTriple {
+  Int m, n;
+  std::vector<Int> rp, ci; // row pointer, column index
+  std::vector<Real> d; // matrix entries
+};
+
+// d is row major.
+static void sparse2dense (const SparseTriple& s, std::vector<Real>& d) {
+  d.resize(s.m*s.n, 0);
+  for (Int r = 0; r < s.m; ++r)
+    for (Int j = s.rp[r]; j < s.rp[r+1]; ++j)
+      d[s.n*r + s.ci[j]] = s.d[j];
+}
+
+static void apply (const SparseTriple& s, const std::vector<Real>& x,
+                   std::vector<Real>& y) {
+  assert(s.n == static_cast<Int>(x.size()));
+  y.resize(s.m);
+  for (Int r = 0; r < s.m; ++r) {
+    Real yr = 0;
+    for (Int j = s.rp[r]; j < s.rp[r+1]; ++j)
+      yr += s.d[j]*x[s.ci[j]];
+    y[r] = yr;
+  }
+}
+
+// Fill the polynomial interpolant's support with periodically unwrapped
+// coordinate values.
+static void fill_lag_coord (const NonUniMesh1d::Array& xb, const Int& np,
+                            const Int& cell, Real* coord) {
+  const Int nx = xb.size();
+  const Int ne = nx - 1;
+  const Real L = xb[ne] - xb[0];
+  for (Int i = 0; i < np; ++i) {
+    const Int k = cell + i;
+    if (k < 0)
+      coord[i] = xb[(k + ne) % ne] - L;
+    else if (k >= ne)
+      coord[i] = xb[k % ne] + L;
+    else
+      coord[i] = xb[k];
+  }
+#ifndef NDEBUG
+  for (Int i = 1; i < np; ++i) assert(coord[i] > coord[i-1]);
+#endif
+}
+
+// Order the degrees of freedom in the operator.
+static Int dof (const Int& nx, const Int& ny,
+                const Int& xe, const Int& ye) {
+  return (ye % ny)*nx + (xe % nx);
+}
+
+template <typename Function>
+void make_ccsl_op_nondiv2d (const NonUniMesh2d& mesh, const Int np,
+                            const Function& integrate,
+                            const Real& dt, SparseTriple& s)
+{
+  const Int os = (np-1)/2;
+  Int nex, ney;
+  mesh.ncell(nex, ney);
+  const Int ne = nex*ney, n = ne;
+  const auto& xb = mesh.get_mx().get_xb();
+  const auto& yb = mesh.get_my().get_xb();
+
+  s.m = s.n = ne;
+  s.rp.resize(ne+1, 0);
+
+  for (Int tie = 0; tie < ne; ++tie) {
+    const Int tye = tie / nex, txe = tie % nex;
+    const Int tdof = dof(nex, ney, txe, tye);
+    assert(tdof >= 0 && tdof < n);
+
+    Real x0, y0;
+    mesh.to_physical(tie, x0, y0);
+    Real tx, ty;
+    integrate(x0, y0, dt, tx, ty);
+
+    const Int sie = mesh.in_cell(tx, ty);
+    const Int sxe0 = sie % nex, sye0 = sie / nex;
+
+    Real txp, typ;
+    mesh.to_periodic(tx, ty, txp, typ);
+    Real xv[12], yv[12], lag_coord[12];
+    assert(np <= 12);
+    fill_lag_coord(xb, np, sxe0 - os, lag_coord);
+    assert(txp >= lag_coord[0] && txp <= lag_coord[np-1]);
+    eval_lagrange_poly_basis(np, lag_coord, txp, xv);
+    fill_lag_coord(yb, np, sye0 - os, lag_coord);
+    assert(typ >= lag_coord[0] && typ <= lag_coord[np-1]);
+    eval_lagrange_poly_basis(np, lag_coord, typ, yv);
+
+    Int k = s.rp[tie];
+    for (Int dy = -os; dy <= os+1; ++dy) {
+      const Int sye = (sye0 + dy + ney) % ney;
+      for (Int dx = -os; dx <= os+1; ++dx) {
+        const Int sxe = (sxe0 + dx + nex) % nex;
+        const Int sdof = dof(nex, ney, sxe, sye);
+        assert(sdof >= 0 && sdof < n);
+        s.ci.push_back(sdof);
+        s.d.push_back(xv[dx+os]*yv[dy+os]);
+        k++;
+      }
+    }
+    s.rp[tie+1] = k;
+  }
+
+  assert(s.d.size() == s.ci.size());
+  assert(s.rp[s.m] == static_cast<Int>(s.d.size()));
+}
+
+// Check that the 'integrate' function provides doubly-periodic outputs over the
+// domain of the mesh.
+template <typename Function>
+void check_periodicity (const NonUniMesh2d& mesh, const Real& dt, const Function& integrate) {
+  static const Real rtol = 1e2*std::numeric_limits<Real>::epsilon();
+  static const Real atol = std::numeric_limits<Real>::epsilon();
+
+  Int nex, ney;
+  mesh.ncell(nex, ney);
+  const auto& xb = mesh.get_mx().get_xb();
+  const auto& yb = mesh.get_my().get_xb();
+
+  // Check that the domain is [0,1]^2.
+  require(std::abs(xb[nex] - xb[0] - 1) <= atol);
+  require(std::abs(yb[ney] - yb[0] - 1) <= atol);
+
+  // Relative/absolute error checks.
+  const auto check_error = [=] (Real x0, Real tx0, Real x1, Real tx1) {
+    require(reldif(tx0 - x0, tx1 - x1) <= rtol ||
+            std::abs((tx0 - x0) - (tx1 - x1)) <= atol);
+  };
+
+  // Run over the sides of the domain and check periodicity.
+  const auto check = [&] (Real x0, Real x1, Real y0, Real y1) {
+    Real tx0, ty0, tx1, ty1;
+    integrate(x0, y0, dt, tx0, ty0);
+    integrate(x1, y1, dt, tx1, ty1);
+    check_error(x0, tx0, x1, tx1);
+    check_error(y0, ty0, y1, ty1);
+  };
+  Int n = 7*std::max(nex, ney);
+  for (int i = 0; i <= n; ++i) {
+    const Real a = Real(i)/n, x = a*xb[0] + (1-a)*xb[nex];
+    check(x, x, yb[0], yb[ney]);
+  }
+  for (int i = 0; i <= n; ++i) {
+    const Real a = Real(i)/n, y = a*yb[0] + (1-a)*yb[ney];
+    check(xb[0], xb[nex], y, y);
+  }
+}
+
+static void setup_demo_problem (const std::vector<Real>& xb, const std::vector<Real>& yb,
+                                const Real dt, SparseTriple& s, const Int np,
+                                const bool check = false) {
+  const Int nex = xb.size() - 1, ney = yb.size() - 1;
+  // Shear (nondivergent) flow. Parameter values were obtained from a search for
+  // an unstable operator.
+  const auto integrate = [&] (const Real& x0, const Real& y0, const Real& dt,
+                              Real& xf, Real& yf) {
+    const auto speed = 1 + std::cos(2*M_PI*(0.342 + x0 - y0));
+    xf = x0 + speed*dt;
+    yf = y0 + speed*dt;
+  };
+  NonUniMesh2d mesh(xb.data(), xb.size()-1, yb.data(), yb.size()-1);
+  if (check) check_periodicity(mesh, dt, integrate);
+  make_ccsl_op_nondiv2d(mesh, np, integrate, dt, s);
+}
+
+static Real cubic2d_demo_unstable_problem (const bool unstable = true) {
+  const Int nex = 15, ney = 13, ne = nex*ney;
+  const Real dt = unstable ? 0.2761 : 0.1;
+  std::vector<Real> xb(nex+1), yb(ney+1), op(ne*ne), wrk;
+  const Real dx = 1.0/nex, dy = 1.0/ney;
+  for (Int i = 0; i <= nex; ++i) xb[i] = i*dx;
+  for (Int i = 0; i <= ney; ++i) yb[i] = i*dy;
+  SparseTriple s;
+  setup_demo_problem(xb, yb, dt, s, 4, true);
+  sparse2dense(s, op);
+  const auto mea = calc_max_eig_amp(ne, op.data(), ne, wrk);
+  return mea;
+}
+
+static void set_ic (const std::vector<Real>& xb, const std::vector<Real>& yb,
+                    std::vector<Real>& z) {
+  const Int nex = xb.size() - 1, ney = yb.size() - 1, ne = nex*ney;
+  z.resize(ne);
+  for (Int iy = 0; iy < ney; ++iy) {
+    const Real fy = std::cos(2*M_PI*(yb[iy] - yb[0])/(yb[ney] - yb[0]));
+    for (Int ix = 0; ix < nex; ++ix)
+      z[nex*iy + ix] = fy*std::cos(2*M_PI*(xb[ix] - xb[0])/(xb[nex] - xb[0]));
+  }
+}
+
+static Real calc_l2_reldif (const std::vector<Real>& a, const std::vector<Real>& b) {
+  Real num = 0, den = 0;
+  for (size_t i = 0; i < a.size(); ++i) {
+    num += square(a[i] - b[i]);
+    den += square(a[i]);
+  }
+  return std::sqrt(num/den);
+}
+
+// The example problem generically has order of accuracy np-1 as long as shear
+// is not permitted to continue for too long. Demonstrate this:
+//   If reverse, integrate a smooth field forward for time T/2, then backward to
+// time T, and compare the result with the initial condition.
+//   If not reverse, integrate forward for time T, then compare with the
+// analytical solution at time T.
+static Real measure_ooa (const Int np, const bool reverse) {
+  Int nex = 35, ney = 32, nstep = 22;
+  const Real x0 = -0.4, y0 = 0.2;
+  Real dt = 0.012, err[2];
+  const Real T = dt*nstep;
+  for (Int refine = 0; refine < 2; ++refine) {
+    // Refinement parameters.
+    nex *= 2;
+    ney *= 2;
+    nstep *= 2;
+    dt /= 2;
+    const Int ne = nex*ney;
+    // Make forward and backward ops.
+    std::vector<Real> xb(nex+1), yb(ney+1);
+    for (Int i = 0; i <= nex; ++i) xb[i] = x0 + Real(i)/nex;
+    for (Int i = 0; i <= ney; ++i) yb[i] = y0 + Real(i)/ney;
+    SparseTriple op1, op2;
+    setup_demo_problem(xb, yb, dt, op1, np, refine == 0);
+    setup_demo_problem(xb, yb, reverse ? -dt : T, op2, np, refine == 0);
+    // Initial conditions.
+    std::vector<Real> z0, zs[2];
+    set_ic(xb, yb, z0);
+    zs[0] = z0;
+    zs[1].resize(z0.size());
+    // Time step.
+    Int i0 = 0, i1 = 1;
+    for (Int ti = 0; ti < nstep; ++ti) {
+      apply(op1, zs[i0], zs[i1]);
+      std::swap(i0, i1);
+    }
+    if (reverse) {
+      for (Int ti = 0; ti < nstep; ++ti) {
+        apply(op2, zs[i0], zs[i1]);
+        std::swap(i0, i1);
+      }
+      err[refine] = calc_l2_reldif(z0, zs[i0]);
+    } else {
+      apply(op2, z0, zs[i1]);
+      err[refine] = calc_l2_reldif(zs[i1], zs[i0]);
+    }
+  }
+  return std::log2(err[0]/err[1]);
+}
+
+void cubic2d_unittest () {
+  const auto eps = std::numeric_limits<Real>::epsilon();
+  require(NonUniMesh1d::unittest() == 0);
+  require(NonUniMesh2d::unittest() == 0);
+  // The primary purpose of make_ccsl_op_nondiv2d is to demonstrate that there
+  // is a 2D periodic nondivergent-flow problem, with uniform grid, for which
+  // the associated classical cubic ISL space-time matrix has maximum eigenvalue
+  // amplitude > 1. In this demo, it is > 1 + 1e-2.
+  require(cubic2d_demo_unstable_problem() >= 1 + 1e-2);
+  // Make sure it's stable if, for example, dt is not from the seach for
+  // unstable parameter values.
+  require(almost_equal(cubic2d_demo_unstable_problem(false), 1, 50*eps));
+  // Order of accuracy is np-1.
+  for (const Int np : {4, 6, 8})
+    for (const bool reverse : {true, false}) {
+      const auto ooa = measure_ooa(np, reverse);
+      const Real d = 0.025; // permit 2.5% deviation from theoretical OOA
+      require(ooa > (1 - d)*(np - 1));
+      require(ooa < (1 + d)*(np - 1));
+    }
+}
+
+int main (int argc, char** argv) {
+  // Show a 1D periodic translation problem on a nonuniform grid for which the
+  // associated classical cubic ISL space-time matrix has maximum eigenvalue
+  // amplitude > 1 + 1e-3.
+  cubic1d_unittest();
+  // Show a 2D periodic nondivergent-flow problem on a uniform grid for which
+  // the associated classical cubic ISL space-time matrix has maximum eigenvalue
+  // amplitude > 1 + 1e-2.
+  cubic2d_unittest();
+}
diff --git a/methods/islet/figures/amb3.hy b/methods/islet/figures/amb3.hy
new file mode 100644
index 0000000..6382bc9
--- /dev/null
+++ b/methods/islet/figures/amb3.hy
@@ -0,0 +1,870 @@
+;;; Collection of utils.
+
+;;(require [hy.contrib.walk [let]])
+(import sys copy math os re)
+
+;;   when passing kwargs to another function like pl.plot, the dictionary should
+;; be like {'option value}, not {:option value}.
+
+(defmacro sdo [&rest code]
+  "Scoped do. Just like do, but vars def'ed inside stay there."
+  `((fn [] ~@code)))
+
+(defmacro sv [&rest code]
+  "The ultimate in laziness."
+  `(setv ~@code))
+
+(defmacro svb [&rest args]
+  "Sub-bracketed setv, like Lisp's let."
+  `(do ~@(map (fn [e]
+                `(setv ~(first e) ~(last e)))
+              args)))
+
+(defn symbol [string] (HySymbol string))
+
+(defmacro/g! svifn [&rest args]
+  "setv if none"
+  `(do ~@(map (fn [e]
+                `(if (none? ~(first e)) (setv ~(first e) ~(last e))))
+              (zip (cut args 0 (len args) 2)
+                   (cut args 1 (len args) 2)))))
+
+(defmacro/g! expect [expr &optional [answer True]]
+  (setv expr-code (str expr))
+  (setv answer-code (str answer))
+  `(sdo (setv ~g!got ~expr)
+        (setv ~g!want ~answer)
+        (if (not (= ~g!got ~g!want))
+          (print (.format "ERROR: {0:s} = {1:s} NOT EQUAL TO {2:s} = {3:s}"
+                          ~expr-code (str ~g!got) ~answer-code (str ~g!want))))))
+
+(defmacro/g! in-require [expr]
+  (setv expr-code (str expr))
+  `(sdo (setv ~g!value ~expr)
+        (unless ~g!value
+          (sdo (import inspect)
+               (setv ~g!frame (inspect.currentframe)
+                     ~g!file (. ~g!frame f-code co-filename)
+                     ~g!lineno (. ~g!frame f-lineno))
+               (raise (Exception (.format "IN-REQUIRE {:s} {:d}: {:s}"
+                                          ~g!file ~g!lineno ~expr-code)))))))
+
+(defmacro assert-type [v t] `(assert (= (type ~v) ~t)))
+
+(defmacro dont [&rest code]
+  "No-op."
+  `((fn [])))
+
+(defmacro raisefmt [&rest args]
+  `(raise (Exception (.format ~@args))))
+
+(defmacro interact [&rest code]
+  "Block code for interactive eval, but that is silenced when the .hy file is
+  run as a program."
+  `(if (= --name-- "__main__")
+    (dont ~@code)
+    (do ~@code)))
+
+(defmacro if-main [&rest code]
+  "Block code when run with a main, but silence on import."
+  `(if (= --name-- "__main__")
+    (do ~@code)
+    (dont ~@code)))
+
+(defmacro prf [&rest args]
+  `(print (.format ~@args)))
+(defmacro prfno [&rest args]
+  `(print (.format ~@args) :end ""))
+(defmacro prff [fid &rest args]
+  `(print (.format ~@args) :file ~fid))
+(defmacro prffno [fid &rest args]
+  `(print (.format ~@args) :end "" :file ~fid))
+
+(defmacro prc [sym]
+  `(prf ~(+ (name sym) " {}") ~sym))
+
+(defmacro mapply [func &rest args]
+  "Apply func. Works for keyword args, unlike apply. (Probably broken in many
+  ways compared with apply, however.)"
+  `(~func ~@args))
+
+(defmacro dispatch-dict [d m]
+  "Function object dispatch helper."
+  `(try (get ~d ~m)
+        (except [] (print "No function" (name ~m)) None)))
+
+(defmacro/g! inc! [x &optional [increment 1]]
+  `(do (setv ~x (+ ~x ~increment))
+       ~x))
+
+(defmacro/g! dec! [x &optional [increment 1]]
+  `(do (setv ~x (- ~x ~increment))
+       ~x))
+
+(defmacro/g! case/test [op keyform &rest entries]
+  "Case based on a test with op. Use :else as default action."
+  `(do (setv ~g!keyform-result ~keyform)
+       (cond ~@(map (fn [entry]
+                      (+ (if (= (first entry) ':else)
+                           '[True] ; If :else, use True in cond.
+                           `[(~op ~g!keyform-result ~(first entry))])
+                         `[~@(rest entry)]))
+                    entries)
+             ;; If no case matches, return None.
+             [True None])))
+
+(defmacro/g! case/eq [&rest forms]
+  "Case using = for the key. Thus, the 'keylist' is not really a list, but an
+  atom."
+  `(case/test = ~@forms))
+
+(defmacro/g! case/in [&rest forms]
+  "Case using 'in' for the key. Thus, each keylist *must* indeed be a list."
+  `(case/test in ~@forms))
+
+(defmacro geton [&rest forms]
+  "Like get, but return instead of raising KeyError on failure."
+  `(try (get ~@forms)
+        (except [] None)))
+
+;; I want to switch to lfor in new code, but so old code doesn't break, provide
+;; list-comp, which was removed in Hy 0.15.
+(defmacro list-comp [transform range]
+  `(lfor ~(first range) ~(second range) ~transform))
+
+;; Inject the variable(s) first? or last?.
+(defmacro/g! for-last [it &rest body]
+  `(do
+     (setv ~g!last (first (last (enumerate ~(second it)))))
+     (for [(, ~g!i ~(first it)) (enumerate ~(second it))]
+       (setv last? (= ~g!i ~g!last))
+       ~@body)))
+
+(defmacro/g! for-first-last [it &rest body]
+  `(do
+     (setv ~g!first (first (first (enumerate ~(second it))))
+           ~g!last (first (last (enumerate ~(second it)))))
+     (for [(, ~g!i ~(first it)) (enumerate ~(second it))]
+       (setv first? (= ~g!i ~g!first))
+       (setv last? (= ~g!i ~g!last))
+       ~@body)))
+
+;; Use this instead of macroexpand to get output stripped of the Hy object
+;; ctors.
+(defn ppme [quoted-form]
+  (sv sym-dict {} b (Box) b.sym-num 0
+      b.after-open True)
+  (defn sym [e]
+    (unless (in e sym-dict)
+      (assoc sym-dict e (.format "sym-{:d}" b.sym-num))
+      (inc! b.sym-num))
+    (get sym-dict e))
+  (defn prl [e ldelim rdelim]
+    (prfno "{:s}" (+ (if b.after-open "" " ") ldelim))
+    (sv b.after-open True)
+    (for [li e] (rec li))
+    (prfno "{:s}" rdelim))
+  (defn atom? [e]
+    (in "quote" (first e)))
+  (defn rec [e]
+    (setv t (type e))
+    (case/in t
+             [[hy.models.HyFloat HyInteger] (print (+ " " (str e)) :end "")]
+             [[HyExpression]
+              (if (atom? e)
+                (for [li e] (rec li))
+                (prl e "(" ")"))]
+             [[HyList] (prl e "[" "]")]
+             [[HyString] (print (.format " \"{:s}\"" e) :end "")]
+             [:else
+              (unless b.after-open (print " " :end ""))
+              (sv b.after-open False)
+              (cond [(in "keyform" e)
+                     (print (sym e) :end "")]
+                    [(in "quote" e)
+                     (print "'" :end "")
+                     (sv b.after-open True)]
+                    [:else (print e :end "")])]))
+  (setv h (macroexpand quoted-form))
+  (print h)
+  (rec h)
+  (print))
+
+(defclass Box []
+  "A box to hold values to be written in closures."
+  (defn --repr-- [me]
+    (str me.--dict--)))
+(defn class? [o] (= (type o) (type (Box))))
+(defn has-field? [o field] (in field o.--dict--))
+(defn pod-number? [n] (in (type n) (, int float)))
+(defn pod-or-len [n] (if (pod-number? n) n (len n)))
+(defn list? [coll] (= (type coll) list))
+(defn tuple? [coll] (= (type coll) tuple))
+(defn dict? [coll] (= (type coll) dict))
+(defn fn? [f] (= (type f) (type (fn []))))
+
+(defmacro/g! box-slots [&rest slots]
+  "Example: (setv hi 3 bye \"so long\" b (box-slots 'hi 'bye))"
+  `(do (setv ~g!box (Box))
+       ~@(map (fn [s]
+                ;;            handles _ vs - in names
+                (setv g!field ((. (str (second s)) replace) "-" "_")
+                      ;;      inject symbol directly
+                      g!value (second s))
+                `(assoc (. ~g!box --dict--) ~g!field ~g!value))
+              slots)
+       ~g!box))
+
+(if-main
+  (expect (sdo (setv hi "so long" foo-bar 3 b (box-slots 'hi 'foo-bar))
+               b.foo-bar)
+          3))
+
+(defn strleq [s ref]
+  (if (< (len s) (len ref))
+    False
+    (= (cut s 0 (len ref)) ref)))
+
+(defn mapl [fun &rest args]
+  (list (map fun #*args)))
+
+(defn assoc-nested [d keys val]
+  "Associate in a nested dict, creating new sub-dicts as needed."
+  (setv dn d)
+  (for [key (cut keys 0 -1)]
+    (if-not (in key dn)
+            (assoc dn key {}))
+    (setv dn (get dn key)))
+  (assoc dn (get keys -1) val))
+
+(defn assoc-nested-append [d keys val]
+  "Associate in a nested dict, creating new sub-dicts as needed. The value is
+  intended to be an item to go into a list. If the list exists, append to it; if
+  not, create it with the item as the only element."
+  (assoc-nested d keys (+ (or (geton d #*keys) []) [val])))
+
+;; Very nice for writing .py files from C++ with data, and then running in a
+;; parse/plot program to load the data.
+(defn get-python-mod [filename &optional [basedir ""]]
+  "Return the module for basedir/filename."
+  (defn get-module-basename [fn]
+    (setv name ((. os path basename) fn))
+    (get (.split name ".") 0))
+  (setv name (get-module-basename filename))
+  (if (not (empty? basedir))
+    (+= basedir "."))
+  (import importlib)
+  (importlib.import-module (.format "{:s}{:s}" basedir name)))
+
+;; Set up the path so that os.* calls know where to find .so files.
+(defn set-ld-path []
+  (setv (, _ stdout) (os.popen2 ". ~/sems/source.sh; env|grep LD_LIBRARY_PATH")
+        paths (stdout.read)
+        paths (cut paths (inc (.find paths "=")) -1)
+        (get os.environ "LD_LIBRARY_PATH") paths))
+
+;; Probably already exists, but haven't looked.
+(defn unzip [coll-of-tups]
+  "Unzip [(a1 b1 ...) (a2 b2 ...) ...] into [[a1 a2 ...] [b2 b2 ...] ...]."
+  (if (empty? coll-of-tups)
+    coll-of-tups
+    (do (setv uz [])
+        (for [i (range (len (get coll-of-tups 0)))]
+          (.append uz (list (list-comp (get e i) [e coll-of-tups]))))
+        uz)))
+
+;; Basic cmp function that wrappers can call.
+(defn cmp-fn [a b]
+  (int (cond [(< a b) -1]
+             [(> a b) 1]
+             [:else 0])))
+
+(defn sort! [coll]
+  "Functional sort."
+  (.sort coll)
+  coll)
+
+(defn sort [coll]
+  "Functional sort."
+  (setv c (copy.deepcopy coll))
+  (sort! c))
+
+
+(defn extend! [coll1 coll2]
+  (.extend coll1 coll2)
+  coll1)
+
+(defn extend [coll1 coll2]
+  (setv c (copy.deepcopy coll1))
+  (extend! c coll2))
+
+(defn find [e coll &optional [always-list False]]
+  (setv f [])
+  (for [i (range (len coll))]
+    (if (= e (get coll i))
+      (.append f i)))
+  (if (and (not always-list) (= 1 (len f))) (first f) f))
+
+(defn first-or-none [coll]
+  (if (empty? coll) None (first coll)))
+
+(defn safe-len [x]
+  (try (len x) (except [] 1)))
+
+(defn sscanf [str-to-parse fmt &optional split]
+  """
+  Kind of like sscanf. Format is like this: 'i,if,f,s', where if is for
+  int(float(.)).
+  """
+  (setv str2type {"i" int
+                  "if" (fn [x] (int (float x)))
+                  "f" float
+                  "s" str})
+  (setv ts (list-comp (get str2type s)
+                      [s (.split fmt ",")]))
+  (list-comp (t s)
+             [(, t s) (zip ts (.split str-to-parse split))]))
+
+(defn split-convert [ln conversion]
+  "Example:
+      (split-convert \"COMPOSE> ne 24 nmax 480 qsize 5 slmpi 1\"
+                     \"ssisisisi\")
+   'conversion' can be shorter than (.split ln), in which case the remainder
+   of line is omitted."
+  (try
+   (list-comp ((get {"i" int "s" identity "f" float}
+                    (get conversion i)) tkn)
+              [(, i tkn) (enumerate (cut (.split ln) 0 (len conversion)))])
+   (except []
+     (prf "split-convert failed on:\n  {:s}\nlen ln {:d} len conversion {:d}"
+          ln (len (.split ln)) (len conversion)))))
+
+(defn cc [x] (* 0.5 (+ (cut x 0 -1) (cut x 1))))
+
+(defn mean [coll]
+  (/ (sum coll) (len coll)))
+
+(defn median [coll]
+  (setv c (list (copy.deepcopy coll)))
+  (.sort c)
+  (if (odd? (len c))
+    (get c (int (math.floor (/ (len c) 2))))
+    (sdo (setv i (dec (int (/ (len c) 2))))
+         (mean (cut c i (+ i 2))))))
+
+(defn variance [coll]
+  (setv mu (mean coll))
+  (/ (sum (map (fn [e] (** (- e mu) 2)) coll))
+     (len coll)))
+
+(defn cross-prod [x y]
+  (defn one [i0 i1]
+    (- (* (get x i0) (get y i1)) (* (get x i1) (get y i0))))
+  [(one 1 2) (one 2 0) (one 0 1)])
+
+(defn readall [filename]
+  (with [f (open filename "r")]
+    (f.read)))
+
+(defn grep-str [pattern str]
+  (import re)
+  (re.findall (+ "(?:" pattern ").*") str :flags re.MULTILINE))
+
+(defn grep [pattern filename]
+  (grep-str pattern (with [f (open filename "r")] (f.read))))
+
+(defn sed-str [pat-repls str]
+  (import re)
+  (for [pr pat-repls]
+    (sv str (re.sub (first pr) (second pr) str :flags re.MULTILINE)))
+  str)
+
+(defn sed [pat-repls file-in file-out]
+  (sv str (sed-str pat-repls (with [f (open file-in "r")] (f.read))))
+  (with [f (open file-out "w")] (f.write str)))
+
+(if-main
+ (sv s (sed-str (, (,"BAR" "yes") (, "FOO" "cow"))
+                "foo BAR FOO BAR\nBAR hold\nbar FOO moo"))
+ (expect s "foo yes cow yes\nyes hold\nbar cow moo"))
+
+(defn re-split-convert [converts pat ln]
+  "Ex: (re-split-convert (, int float) regex ln)"
+  (try
+   (list-comp (convert e)
+              [(, convert e) (zip converts
+                                  (first (re.findall pat ln)))])
+   (except [e Exception]
+     (print ln))))
+
+(defn ooa [y &optional [xfac 2] [x None]]
+  (setv r [])
+  (for [i (range (dec (len y)))]
+    (.append r (/ (- (math.log (get y i)) (math.log (get y (inc i))))
+                  (if x
+                    (- (math.log (get x i)) (math.log (get x (inc i))))
+                    (math.log xfac)))))
+  r)
+
+(defn ooa-from-file [fname fieldno &optional anchor]
+  "Read text from file fname, optionally scan for lines beginning with anchor,
+  and read symbol fieldno, starting from 0. Return a list of OOAs."
+  (sv txt (.split (readall fname) "\n")
+      errs [])
+  (for [ln txt]
+    (unless (none? anchor)
+      (when (or (< (len ln) (len anchor))
+                (!= anchor (cut ln 0 (len anchor))))
+        (continue)))
+    (.append errs (float (get (.split ln) fieldno))))
+  (, errs (ooa errs)))
+
+(defn single? [coll]
+  (or (not (coll? coll))
+      (= (len coll) 1)))
+
+(if-main
+  (expect (single? (, 'gauss)))
+  (expect (single? 'gauss))
+  (expect (not (single? (, 'sin 'gauss)))))
+
+(setv *when-inp-verbosity* 2)
+
+(defn inp [name]
+  (setv b (in name sys.argv))
+  (when (or (and b (> *when-inp-verbosity* 0))
+            (> *when-inp-verbosity* 1)
+            (= (len sys.argv) 1))
+    (prf "{:s}: {:s}" (if b "DO" "av") name))
+  b)
+
+(defmacro/g! when-inp [fn-name-args &rest body]
+  "Example:
+     (when-inp [\"hi\" {:hi int :bye float}]
+     (print hi bye))"
+  (if (> (len fn-name-args) 2)
+    (raise (Exception "(when-inp [fn-name &optional args] body)")))
+  (unless (or (= (len fn-name-args) 1)
+              (is (type (second fn-name-args)) hy.models.HyDict))
+    (raise (Exception "args must be a dict")))
+  (setv fn-name (first fn-name-args)
+        args (if (> (len fn-name-args) 1) (second fn-name-args)))
+  (defn dissect-args [args]
+    (setv alist [] arg-str "")
+    (for [(, i e) (enumerate (zip (cut args 0 (len args) 2)
+                                  (cut args 1 (len args) 2)))]
+      (.append alist
+               `(setv
+                 ;; grab "kw" from ":kw" and make it a symbol
+                 ;;~(HySymbol (cut (first e) 2))
+                 ~(HySymbol (name (first e)))
+                 ;; apply type conversion
+                 (try (~(second e) (get sys.argv (+ ~i 2)))
+                      (except []
+                        (.format "Could not parse sys.argv {:d}: {:s}"
+                                 (+ ~i 2)
+                                 (get sys.argv (+ ~i 2)))))))
+      (+= arg-str (+ " " (name (first e)) ": " (name (second e)))))
+    (, alist arg-str))
+  (if args
+    (do
+     (setv (, alist arg-str) (dissect-args args))
+     `(sdo
+       (import amb3)
+       (setv ~g!b (in ~fn-name sys.argv))
+       (when (or (and ~g!b (> amb3.*when-inp-verbosity* 0))
+                 (> amb3.*when-inp-verbosity* 1)
+                 (= (len sys.argv) 1))
+         (prf "{:s}: {:s}:{:s}" (if ~g!b "DO" "av")
+              ~fn-name
+              ~arg-str))
+       (when ~g!b
+         (if (< (- (len sys.argv) 2) (len ~args))
+           (raise (Exception (+ "in " ~fn-name
+                                " args has more entries than"
+                                " are available in sys.argv"))))
+         ~@alist
+         ~@body)))
+    `(sdo (when (inp ~fn-name) ~@body))))
+
+(if-main
+ (when-inp ["hi" {:bye int :aye float}] (print bye aye))
+ (when-inp ["hello"] (print "hello")))
+
+(defn and-coll [pred coll]
+  (reduce (fn [accum e] (and accum (pred e))) coll True))
+
+(defn or-coll [pred coll]
+  (reduce (fn [accum e] (or accum (pred e))) coll False))
+
+(defn none-in [items coll]
+  (and-coll (fn [e] (not (in e coll))) items))
+
+(defn any-in [items coll]
+  (or-coll (fn [e] (in e coll)) items))
+
+(if-main
+  (expect (none-in (, "hi" "bye") "bye hello") False)
+  (expect (any-in (, "hi" "bye") "bye hello"))
+  (expect (none-in (, "hi" "bye") "adieu hello"))
+  (expect (any-in (, "hi" "bye") "adieu hello") False)
+  (expect (none-in '(1 2 3) (range 10)) False)
+  (expect (none-in '(1 2 3) (range 4 10)))
+  (expect (none-in (, "hi" "bye") ["bye" "hello"]) False)
+  (expect (none-in (, "hi" "bye") ["adieu" "hello"])))
+
+(defn str-ctypes [s] (ctypes.c-char-p (bytes s :encoding "ascii")))
+
+;;; Numpy utils.
+
+(try
+ (do
+  (import [numpy :as npy] ctypes)
+
+  (defn array-range [&rest args]
+    (npy.array (list (apply range args)) :dtype int))
+
+  (defn array-if-not [A &optional [dtype float]]
+    (unless (= (type A) npy.ndarray)
+      (npy.array A :dtype float)))
+
+  (defn as-ctypes [x]
+    (npy.ctypeslib.as-ctypes x))
+
+  (defn vectorize [A] (npy.reshape A A.size))
+  (defn row-vec [v] (npy.reshape v (, 1 v.size)))
+  (defn col-vec [v] (npy.reshape v (, v.size 1)))
+  (defn vector? [v]
+    (or (and (= (len v.shape) 2) (= (min v.shape) 1))
+        (= (len v.shape) 1)))
+  (defn ones-row-vec [n] (npy.ones (, 1 (pod-or-len n))))
+  (defn ones-col-vec [n] (npy.ones (, (pod-or-len n) 1)))
+
+  (defn sort-with-p [ai]
+    "Return sorted ai and permutation array. Each entry of a must have the same
+    type."
+    (if (empty? ai)
+      (, ai [])
+      (do (setv dtype [(, (str "data") (type (get ai 0))) (, (str "p") int)]
+                a (npy.array (list-comp (, e i) [(, i e) (enumerate ai)])
+                            :dtype dtype))
+          (setv a (npy.sort a :order (str "data")))
+          (tuple (unzip a)))))
+
+  (defn epsilon [&optional [type float]]
+    (. (npy.finfo type) eps))
+
+  (defn np-map [f a]
+    (npy.array (list (map f a))))
+
+  (defn dbg-array->np-array [a m n]
+    (npy.transpose (npy.reshape (npy.array a) (, n m))))
+
+  (defn reldif [a b &optional [norm None]]
+    (if (and (pod-number? a) (pod-number? b))
+      (/ (abs (- a b)) (max (abs a) (abs b)))
+      (sdo (setv aa (npy.array a)
+                 ba (npy.array b))
+           (/ (npy.linalg.norm (- aa ba) :ord norm)
+              (npy.linalg.norm aa :ord norm)))))
+
+  (defn np-set-print []
+    (setv float-format (fn [x]
+                         (cond [(zero? x) (+ " ." (* " " 8))]
+                               [(= x 1) (+ " 1" (* " " 8))]
+                               [:else (.format "{:10.3e}" x)]))
+          complex-format (fn [x]
+                           (cond [(zero? x) (+ " ." (* " " 19))]
+                                 [(= x 1) (+ " 1" (* " " 19))]
+                                 [:else (if (zero? x.imag)
+                                          (.format (+ "{:10.3e}" (* " " 11)) x.real)
+                                          (.format "{:21.3e}" x))]))
+          int-format (fn [x]
+                       (if (zero? x)
+                         (+ " ." (* " " 1))
+                         (.format "{:2d} " x))))
+    (npy.set-printoptions
+     :precision 2
+     :linewidth 1000
+     :formatter {"float" float-format
+                 "complexfloat" complex-format
+                 "int" int-format}))
+
+  (defn triple-read-file [fn]
+    (setv triple [])
+    (with [f (open fn "r")]
+      (while True
+        (setv ln (f.readline))
+        (if (empty? ln) (break))
+        (setv d (sscanf ln "i,i,f"))
+        (.append triple (tuple d))))
+    triple)
+
+  (defn triple->dense [triple &optional [base 0]]
+    (setv (, row col val) (unzip triple)
+          A (sdo (setv d (if (= base 0) 1 0)
+                       M (+ (max row) d)
+                       N (+ (max col) d))
+                 (npy.zeros (, M N))))
+    (for [e triple]
+      (setv r (get e 0)
+            c (get e 1))
+      (if (> base 0)
+        (setv r (- r base)
+              c (- c base)))
+      (setv v (get e 2)
+            (get A r c) v))
+    A)
+
+  (defn dense-extract-block-diag [A bs]
+    (setv D (npy.zeros (npy.shape A)))
+    (for [br (range (// (npy.size A 0) bs))]
+      (setv r0 (* br bs)
+            cs (list (range r0 (+ r0 bs))))
+      (for [i (range bs)]
+        (setv r (+ r0 i)
+              (get (get D r) cs) (get (get A r) cs))))
+    D)
+
+  (defn dense-max-norm [A]
+    (setv m1n 0)
+    (for [r (range (npy.size A 0))]
+      (setv r1n (as-> (get A r) it
+                      (npy.abs it)
+                      (npy.sum it))
+            m1n (max m1n r1n)))
+    m1n)
+
+  (defn pod-number? [n]
+    (in (type n) (, int float npy.int64 npy.float64)))
+
+  (defn np-array? [a] (= (type a) npy.ndarray))
+
+  (defn conforms? [v u]
+    (and (np-array? u) (= u.shape v.shape)))
+
+  (defn s-all [] (slice None))
+  (defn s-all-rev [] (slice None None -1))
+
+  (defn idx-arr [A rows cols]
+    (get (get A rows) (, (s-all) cols)))
+
+  (defn antidiag [v]
+    (get (npy.diag (get (npy.array v) (s-all-rev))) (s-all-rev)))
+  )
+ (except []
+   (do
+    (defn np-array? [a] False)
+    )))
+
+;;; Matplotlib utils.
+
+(try
+ (do
+  (import matplotlib [matplotlib.pyplot :as pl])
+
+  (defn my-grid [&optional ls zorder]
+    (svifn ls "-" zorder -1)
+    (pl.grid True :lw 0.5 :ls ls :color (, 0.8 0.8 0.8) :zorder zorder
+             :which "both")
+    (.set_axisbelow (pl.gca) True))
+
+  (defn dispfig [&optional fn-prefix [format "pdf"] [tight True] [nowarn False]]
+    (import warnings)
+    (with [(warnings.catch-warnings)]
+      (when nowarn (warnings.filterwarnings "ignore"))
+      (when tight (pl.tight-layout))
+      (if (or (not fn_prefix) (empty? fn-prefix))
+          (pl.show)
+          (pl.savefig (+ fn-prefix (+ "." format))
+                      :format format :bbox-inches "tight"))))
+
+  (defclass pl-plot []
+    (defn --init-- [me figsize filename &optional [format None]
+                    [tight True] [nowarn False]]
+      (setv me.filename filename
+            me.format (if (none? format) "pdf" format)
+            me.tight tight me.nowarn nowarn)
+      (pl.close)
+      (pl.figure :num 1 :figsize figsize))
+    (defn cleanup [me]
+      (dispfig me.filename :format me.format :tight me.tight :nowarn me.nowarn))
+    (defn --enter-- [me] me)
+    (defn --exit-- [me &rest args] (me.cleanup))
+    (defn --del-- [me]))
+
+  ;; To get Type 1 fonts only. From
+  ;;     http://nerdjusttyped.blogspot.com/2010/07/type-1-fonts-and-matplotlib-figures.html
+  ;; The third one in particular really blows up rendering time, so switch this
+  ;; block to False during development iterations.
+  (defn pl-require-type1-fonts []
+    (import matplotlib)
+    (assoc matplotlib.rcParams
+           "ps.useafm" True
+           "pdf.use14corefonts" True
+           "text.usetex" True))
+
+  (defn imshow-matrix [A]
+    (pl.imshow A :interpolation "none")
+    (pl.show))
+
+  (defn iml [A]
+    (pl.imshow (npy.log10 (npy.abs A)) :interpolation "none"))
+
+  (defn pad-lim [lim &optional [pad 0.05] [mult False]]
+    (if mult
+      (do (, (* (first lim) (- 1 pad))
+             (* (second lim) (+ 1 pad))))
+      (do (setv d (- (second lim) (first lim))
+                delta (* pad d))
+          (, (- (first lim) delta)
+             (+ (second lim) delta)))))
+
+  (defn axis-tight-pad [&optional [pad 0.05] [mult False]]
+    (pl.axis "tight")
+    (setv xl (pl.xlim) yl (pl.ylim))
+    (pl.xlim (pad-lim xl pad mult))
+    (pl.ylim (pad-lim yl pad mult)))
+
+  (defn reset-colors []
+    (.set-color-cycle (pl.gca) None))
+
+  (defn good-subplot-dims [n &optional [pref-horiz False]]
+    (setv d (cond [(< n 5) (case/eq n [1 (, 1 1)] [2 (, 2 1)]
+                                    [3 (, 3 1)] [4 (, 2 2)])]
+                  [(< n 7) (, 3 2)]
+                  [(< n 10) (, 3 3)]
+                  [(< n 13) (, 4 3)]
+                  [(< n 17) (, 4 4)]
+                  [:else (, 5 (int (math.ceil (/ n 5))))]))
+    (if pref-horiz
+      (, (second d) (first d))
+      d))
+
+  (defn get-linestyle-word [char]
+    (get {"-" "solid" "--" "dashed" ":" "dotted" "-." "dashdot"} char))
+
+  (defn set-tick-fontsize [fs]
+    (for [ax (, "xaxis" "yaxis")]
+      (sv ticks ((. (get (. (pl.gca) --dict--) ax)
+                    get-major-ticks)))
+      (for [tick ticks]
+        ((. tick label set-fontsize) fs))))
+
+  (defn make-reference-slope-triangle [x-span y-span slope pattern
+                                       &optional [kwargs-plot None]
+                                       [kwargs-text None]
+                                       [opposite None]]
+    (assert (= 2 (len x-span)))
+    (assert (= 2 (len y-span)))
+    (svifn kwargs-plot {})
+    (svifn kwargs-text {})
+    (svifn opposite False)
+    (sv (, x0 x1) x-span (, y0 y1) y-span
+        dx (- x1 x0) dy (- y1 y0))
+    (if opposite
+        (do (sv x [x0 x1 x1 x0]
+                y [y1 y1 y0 y1]))
+        (do (sv x [x0 x1 x0 x0]
+                y [y0 y0 y1 y0])))
+    (pl.plot #*[x y pattern] #**kwargs-plot)
+    (when opposite
+      (if (none? kwargs-text) (sv kwargs-text {}))
+      (assoc kwargs-text "horizontalalignment" "right" "verticalalignment" "top"))
+    (pl.text #*[(if opposite (- x1 (* 0.1 dx)) (+ x0 (* 0.1 dx)))
+                (if opposite (- y1 (* 0.1 dy)) (+ y0 (* 0.1 dy))) (str slope)]
+             #**kwargs-text))
+
+  ) (except [] ))
+
+;;; More extensive unit tests.
+(if-main
+  (expect
+    (case/eq 'hi
+             ('hit 'nope)
+             ('hi 'bark 'yep))
+    'yep)
+  (expect
+    (case/in 'hello
+             ['(bye hello) 'nope]
+             ('(hi) 'yep)
+             ('(4) 'another-nope))
+    'nope)
+  (expect
+    (case/in 'hi
+             ('(bye hello) 'nope)
+             ('(hi) 'yep)
+             ('(4) 'another-nope))
+    'yep)
+  (expect
+    (case/in 'hi
+             ('(bye hello) 'nope)
+             ('hi 'yep)
+             ('(4) 'another-nope))
+    'yep)
+  (expect
+    (case/test in 'hi
+               ('(bye hello) 'nope)
+               ('hi 'yep)
+               ('(4) 'another-nope))
+    'yep)
+  (expect (case/eq 4 [5 'bye] [4 'hi])
+          'hi)
+  (expect
+    (case/in 'hi
+             ('(bye hello) 'nope))
+    None)
+  (expect
+    (case/in 'hi
+             ('(bye hello) 'nope)
+             (:else 'woot))
+    'woot)
+  (expect
+    (sdo (setv key 'hi)
+         (case/in key
+                  ('(bye hello) 'nope)
+                  ([key] 'yup)))
+    'yup)
+
+  (expect
+    (do
+      (sv a None b 4 c "hi" d None)
+      (svifn a "hi" b 3 c "bye")
+      (and (= a "hi") (= b 4) (= c "hi") (none? d))))
+  (expect
+    (do
+      (sv a None)
+      (svifn a "hi")
+      (= a "hi")))
+    (expect
+    (do
+      (sv a 5)
+      (svifn a "hi")
+      (= a 5)))
+
+    (expect
+     (do (sv a 1 b "hi" c 'd)
+         (svb (ab 1) (bb "hi") (cb 'd))
+         (and (= a ab) (= b bb) (= c cb))))
+
+  (when-inp ["test-pretty-print"]
+    (ppme '(case/in 'hi
+                    (['hi 'bye] 'hi)
+                    ('[foo] (for [d dinos]
+                              (print (.format "{:s} goes {:d}"
+                                              d.name d.sound))))
+                    ('[bar]
+                     (defn axis-tight-pad [&optional [pad 0.05] [mult False]
+                                           [foo 3]]
+                       (pl.axis "tight")
+                       (setv xl (pl.xlim) yl (pl.ylim))
+                       (pl.xlim (pad-lim xl pad mult))
+                       (pl.ylim (pad-lim yl pad mult)))
+                     (axis-tight-pad)))))
+
+  (when-inp ["ooa-from-file" {:fname str :anchor str :fieldno int}]
+    (sv (, errs ooas) (ooa-from-file fname fieldno :anchor anchor))
+    (for [i (range (len errs))]
+      (prf "{:10.3e} {}" (get errs i)
+           (if (zero? i) " n/a" (.format "{:6.3f}" (get ooas (dec i)))))))
+)
diff --git a/methods/islet/figures/figs-adv-diag.hy b/methods/islet/figures/figs-adv-diag.hy
new file mode 100644
index 0000000..02af0b4
--- /dev/null
+++ b/methods/islet/figures/figs-adv-diag.hy
@@ -0,0 +1,1035 @@
+(require [amb3 [*]])
+(import amb3 [amb3 [*]]
+        [figsutils [*]]
+        math glob struct)
+
+(assoc matplotlib.rcParams "savefig.dpi" 300)
+(do (pl-require-type1-fonts))
+
+;;; parse cmd, L, M, C slmmir output
+
+(defn acc-parse [fname &optional map-nstepfac]
+  (sv txt (.split (readall fname) "\n")
+      bo None d {})
+  (for [ln txt]
+    (sv ln2 (cut ln 0 2))
+    (cond [(and (= "cm" ln2) (in "cmd>" ln))
+           (sv cmd ln c (parse-cmd ln :map-nstepfac map-nstepfac))]
+          [(= ln2 "M ")
+           (sv mp (parse-midpoint-check ln))
+           (unless (none? (geton d #*(+ (cmd->key-base c)
+                                        (, (:ic mp) cyc (:ne c) :M))))
+             ;; handle repeated ic used for src-term OOA testing.
+             (assoc mp :ic (+ (:ic mp) "-src")))
+           (assoc-nested d (+ (cmd->key-base c) (, (:ic mp) cyc (:ne c) :M))
+                         {:l1 (:l1 mp) :l2 (:l2 mp)})]
+          [(= ln2 "C ")
+           (cond [(in "cycle" ln) (sv (, - - cyc) (sscanf ln "s,s,i"))]
+                 [(or (in "PASS" ln) (in "FAIL" ln))
+                  (sv (, - pf) (sscanf ln "s,s"))
+                  (when (and (= pf "FAIL") (!= (:mono c) "none") (<= cyc 10))
+                    (prf "FAIL {}" cmd))]
+                 [:else
+                  (sv cl (parse-C ln))
+                  (unless (none? (geton d #*(+ (cmd->key-base c)
+                                               (, (:ic cl) cyc (:ne c) :C))))
+                    ;; handle repeated ic used for src-term OOA testing.
+                    (assoc cl :ic (+ (:ic cl) "-src")))
+                  (assoc-nested d (+ (cmd->key-base c) (, (:ic cl) cyc (:ne c) :C))
+                                cl)])]
+          [(= ln2 "L ")
+           (cond [(in "L file" ln)
+                  (assoc-nested d (+ (cmd->key-base c)
+                                     (, "cos" cyc (:ne c) :L :mixing-file))
+                                (last (.split ln)))]
+                 [(in "phimin" ln)
+                  (sv (, - ic - l1 - l2 - li - phimin - phimax)
+                      (sscanf ln "s,s,s,f,s,f,s,f,s,f,s,f"))
+                  (assoc-nested d (+ (cmd->key-base c) (, ic cyc (:ne c) :Lerr))
+                                {:l1 l1 :l2 l2 :li li :phimin phimin :phimax phimax})]
+                 [:else
+                  (sv bo (parse-bakeoff-diag bo ln (:timeint c)))
+                  (when (:done bo)
+                    (assoc-nested d (+ (cmd->key-base c) (, "cos" cyc (:ne c) :L)) bo)
+                    (sv bo None))])]))
+  d)
+
+;;; print a long table of all accuracy results
+
+(defn acc-print-txt-table [c d &optional]
+  (sv dent (Box) dent.indentation 0 dent.delta 1)
+  (defn indent [] (+= dent.indentation dent.delta))
+  (defn dedent [] (-= dent.indentation dent.delta))
+  (sv buf [])
+  (defn msg [s] (.append buf (+ (* " " dent.indentation) s)))
+  (defn msg-pop [] (unless (empty? buf) (.pop buf)))
+  (defn msg-clear [] (.clear buf))
+  (defn msg-dump []
+    (for [e buf] (prf e))
+    (msg-clear))
+  (defmacro dent-fn [title &rest body]
+    `(do (msg ~title)
+         (indent)
+         ~@body
+         (msg-pop)
+         (dedent)))
+
+  (sv unstab-thr 0.9 unstab {})
+  (defn study-unstab [e-all cdrglb cdrlcl np ic nstepfac ode timeint]
+    (sv e None)
+    (for [ne c.nes]
+      (sv pe e e (geton e-all ne))
+      (when (none? e) (continue))
+      (unless (or (> (get e :C :l2) 0.9)
+                  (and (not (none? pe)) (> (get e :C :l2) (get pe :C :l2))
+                       (> (get e :C :l2) 1e-12)))
+        (continue))
+      (dont prf "u {:2d} {} {:2d} {:2d} {} {} {:7.1e}"
+           np ic ne nstepfac cdrglb cdrlcl (get e :C :l2))
+      (sv key (, np cdrglb cdrlcl)
+          use True)
+      (when (in key unstab)
+        (sv pv (get unstab key))
+        (unless (or (and (= (:ic pv) "slo") (!= ic "slo"))
+                    (and (< ne (:ne pv))
+                         (not (and (!= (:ic pv) "slo") (= ic "slo"))))
+                    (and (not (= ic "slo"))
+                         (> (get e :C :l2) (get pv :l2))
+                         (< (get pv :l2) unstab-thr)))
+          (sv use False)))
+      (when (and (!= np 4)
+                 (or (!= timeint "interp") (= cdrglb "none")
+                     (!= ode "divergent") (!= ic "gau") (!= nstepfac 1)))
+        (continue))
+      (when use
+        (assoc unstab key {:ic ic :ne ne :l2 (get e :C :l2)
+                           :nstepfac nstepfac :ode ode}))))
+  (defn print-unstab []
+    (sv keys (list (.keys unstab)))
+    (.sort keys :key first)
+    (for [k keys]
+      (sv e (get unstab k))
+      (prf "u {:2d} {} {:<12s} {:2d} {:2d} {:<9s} {} {:7.1e}"
+           (first k) (:ic e) (:ode e) (:ne e) (:nstepfac e) (second k)
+           (last k) (:l2 e))))
+
+  (defn pr [&rest args]
+    (prfno (* " " dent.indentation))
+    (print #*args))
+  (defn praccv [pne ne vp v]
+    (if (or (none? vp) (zero? vp) (zero? v))
+        (prfno " {:7.1e} ( ----)" v)
+        (prfno " {:7.1e} ({:5.2f})" v (first (ooa [vp v] :x [ne pne])))))
+  (defn pracc [diagnostic norm pne pe ne e]
+    (sv v (geton e diagnostic norm))
+    (when (none? v) (return))
+    (sv vp (unless (none? pe) (get pe diagnostic norm)))
+    (praccv pne ne vp v))
+  (defn print-table [e-all &optional [Ldiags False]]
+    (sv pe None pne None)
+    (for [ne c.nes]
+      (sv e (geton e-all ne))
+      (when (none? e) (continue))
+      (when (< (get e :C :l2) 1e-13) (continue))
+      (prfno "{}{:3d}" (* " " dent.indentation) ne)
+      (pracc ':C ':masscons pne pe ne e)
+      (for [diag (, :C :M)]
+        (sv first True)
+        (for [norm (, :l1 :l2 :li)]
+          (when (and (= diag :M) (!= norm :l2)) (continue))
+          (when (none? (geton e diag norm)) (continue))
+          (when first
+            (prfno " |")
+            (sv first False))
+          (pracc diag norm pne pe ne e)))
+      (when Ldiags
+        (sv bo (geton e :L))
+        (unless (none? bo)
+          (for [r (, 0 1)]
+            (unless (or (zero? r) (in :me-mixing bo)) (continue))
+            (prfno " |")
+            (sv s (if (zero? r) :mixing :me-mixing)
+                pmixing (unless (none? pe) (get pe :L s))
+                mixing (get bo s))
+            (for [k (, :lr :lu :lo)]
+              (praccv pne ne (unless (none? pmixing) (get pmixing k))
+                      (get mixing k))))))
+      (when (> (get e :C :l2) 0.9) (prfno " UNSTABLE"))
+      (print)
+      (sv pe e pne ne)))
+
+  (pr (+ "         ne    mass                l1              l2            linf"
+         "            mid l2"))
+  (for [method c.methods]
+    (dent-fn method
+      (for [timeint c.timeints]
+        (dent-fn (.format "timeint {}" timeint)
+          (for [ode c.odes]
+            (dent-fn ode
+              (for [cdrgl c.cdrs]
+                (sv (, cdrglb cdrlcl) cdrgl)
+                (dent-fn (.format "{} {}" cdrglb cdrlcl)
+                  (for [nstepfac c.nstepfacs]
+                    (dent-fn (.format "nstepfac {}" nstepfac)
+                      (for [ic c.ics]
+                        (dent-fn ic
+                          (for [cyc c.cycs]
+                            (dent-fn (.format "cycle {}" cyc)
+                              (for [np c.nps]
+                                (sv prefine (if (or (= np 4) (= timeint "exact"))
+                                                0 5)
+                                    e (geton d timeint ode nstepfac method
+                                             cdrglb cdrlcl prefine np ic cyc))
+                                (when (none? e) (continue))
+                                (dent-fn (.format "np {}" np)
+                                  (msg-dump)
+                                  (pr method timeint ode cdrglb cdrlcl nstepfac
+                                      ic cyc np)
+                                  (print-table e :Ldiags (= ic "cos"))
+                                  (study-unstab e cdrglb cdrlcl np ic nstepfac
+                                                ode timeint)))))))))))))))))
+  (print-unstab))
+
+;;; accuracy figs
+
+(defclass AccFig []
+  (defn --init-- [me])
+
+  (defn get-defaults [me &optional context]
+    (svifn context (get-context))
+    (sv c context)
+    {:method "pcsl" :nstepfac 1 :timeint "interp" :ode "nondivergent" :ic "gau"
+     :cdrglb "caas-node" :cdrlcl "caas" :prefine 5 :nonuni 0 :cyc 1 :measure :l2
+     :nps c.nps :nes c.nes :pat-line "-" :pat-clr (.copy c.npclrs) :C-line :C
+     :pat-mark (.copy c.npmarks) :fs 11 :lw 1 :markersize 4 :yform :log2
+     :xticks :degrees :ooa-text False :filter-floor None :figsize (, 4 4)
+     :ref-ooa-2 False :ref-cos-033 True :pg None})
+
+  (defn plot [me ax d-all &optional [o None]]
+    (svifn o (me.get-defaults))
+    (sv npa npy.array
+        d1 (geton d-all (:timeint o) (:ode o) (:nstepfac o) (:method o)
+                  (:cdrglb o) (:cdrlcl o) (:prefine o))
+        gray (* 0.5 (npy.ones 3))
+        y-ext [1e10 -1e10])
+    (when (none? d1) (return))
+    (for [np (:nps o)]
+      (sv keys (if (none? (:pg o))
+                   (, np (:ic o) (:cyc o))
+                   (if (fn? (:pg o))
+                       (, ((:pg o) np) np (:ic o) (:cyc o))
+                       (, (:pg o) np (:ic o) (:cyc o))))
+          d2 (geton d1 #*keys))
+      (when (none? d2) (continue))
+      (sv x [] y [])
+      (for [ne (:nes o)]
+        (sv val (geton d2 ne (:C-line o) (:measure o)))
+        (when (none? val) (continue))
+        (unless (none? (:filter-floor o))
+          (when (< val (:filter-floor o)) (continue)))
+        (.append x ne)
+        (.append y val)
+        (sv (get y-ext 0) (min (get y-ext 0) val)
+            (get y-ext 1) (max (get y-ext 1) val)))
+      (svb (pat (+ (get (:pat-clr o) np) (get (:pat-mark o) np) (:pat-line o)))
+           (xtform (npy.log2 (npa x)))
+           ((, xticks xtick-strs)
+            (case/eq (:xticks o)
+                     [:degrees
+                      (sv d (nes->degstrs x))
+                      (, (npy.log2 (npa (:ne d))) (:deg d))]
+                     [:else (, xtform x)]))
+           ((, ytform y-lbl y-tfn)
+            (case/eq (:yform o)
+                     [:log2 (, (npy.log2 (npa y)) "$\log_2$" npy.log2)]
+                     [:log10 (, (npy.log2 (npa y)) "$\log_{10}$" npy.log2)]
+                     [:semilogy (, (npa y) "$\log_{10}$" identity)]))
+           (pl-plot (if (= (:yform o) :semilogy) ax.semilogy ax.plot)))
+      (pl-plot xtform ytform pat
+               :lw (:lw o) :markersize (:markersize o) :fillstyle "none")
+      (when (= np (first (:nps o)))
+        (pl.xticks xticks xtick-strs :fontsize (:fs o))
+        (pl.ylabel (+ y-lbl " " (get {:l1 "$l_1$" :l2 "$l_2$" :li "$l_{\infty}$"
+                                      :masscons "Mass conservation"}
+                                     (:measure o))
+                      " relative error")
+                   :fontsize (:fs o))
+        (cond [(= (:yform o) :log2)
+               (pl.yticks (list (range -40 10)) :fontsize (:fs o))])
+        (when (and (:ref-cos-033 o) (= (:ic o) "cos") (= (:ode o) "nondivergent"))
+          (pl-plot xtform (* 0.033 (npy.ones (len xtform))) "k-."
+                   :zorder -10 :lw (:lw o) :color gray)))
+      (when (:ooa-text o)
+        (sv i (- (len x) 2))
+        (pl.text (- (get xtform (inc i)) 0.1) (* (get ytform (inc i)) 2)
+                 (.format "{:1.1f}"
+                          (- (first (ooa (cut y i (+ i 2))
+                                         :x (cut x i (+ i 2))))))))
+      (when (= np (last (:nps o)))
+        (when (= (:yform o) :semilogy)
+          (sv (, y ys) (pl.yticks))
+          (when (> (/ (last y) (first y)) 1e3)
+            (for [i (range (len ys))]
+              (sv (get ys i) (.format "{}" (int (math.log10 (get y i))))))
+            (pl.yticks y ys)))
+        (when (:ref-ooa-2 o)
+          (sv ytref (y-tfn (* 0.7 (first y-ext) (** (/ (last x) (npa x)) 2))))
+          (pl-plot xtform ytref "k:" :color gray))))
+    y-ext)
+
+  (defn legend [me ax entries &optional [o None] [nps-legend True] bbox]
+    (svifn o (me.get-defaults))
+    (sv xl (pl.xlim) yl (pl.ylim) hs [] delta 2)
+    (unless (empty? entries)
+      (for [e entries]
+        (sv h (ax.plot (- (first xl) delta) (- (first yl) delta)
+                       (first e) :label (second e) :fillstyle "none"
+                       :lw (:lw o) :markersize (:markersize o)))
+        (.extend hs h))
+      (sv l1 (pl.legend :handles hs :fontsize (- (:fs o) 1)
+                        :bbox-to-anchor (if (none? bbox) (, 0 0.08) bbox)
+                        :loc "lower left" :framealpha 1))
+      (ax.add-artist l1))
+    (when nps-legend
+      (sv hs [])
+      (for [np (:nps o)]
+        (sv h (ax.plot (- (first xl) delta) (- (first yl) delta)
+                       (+ (get (:pat-clr o) np) (get (:pat-mark o) np))
+                       :lw (:lw o) :markersize (:markersize o)
+                       :label (.format "{}" np) :fillstyle "none"))
+        (.extend hs h))
+      (sv l2 (pl.legend :handles hs :fontsize (- (:fs o) 1)
+                        :ncol (len (:nps o)) :bbox-to-anchor (, 0 -0.01 1 0)
+                        :loc "lower left" :mode "expand" :framealpha 1))
+      (ax.add-artist l2))
+    (pl.xlim xl) (pl.ylim yl))
+
+  (defn title [me s &optional [o None]]
+    (svifn o (me.get-defaults))
+    (pl.title s :fontsize (:fs o))))
+
+(defn make-nps-string [nps]
+  (sv s (.format "$n_p$ {}" (first nps)))
+  (for [ne (cut nps 1)] (sv s (+ s (.format ", {}" ne))))
+  s)
+
+(defn nstepfac->word [nstepfac]
+  (get {1 "long" 3 "medium" 5 "short"} nstepfac))
+
+(defn make-title [main o &optional extra]
+  (svifn extra "")
+  (+ main "\n"
+     (flow-short2long (:ode o)) ", "
+     (ic-short2long (cut (:ic o) 0 3)) ", "
+     (nstepfac->word (:nstepfac o)) " steps,\n"
+     (if (= (:prefine o) 5) "$p$-refinement, " "")
+     (+ (if (= (:cdrglb o) "none") "no " "") "property preservation")
+     extra))
+
+(defn fig-stab-cmp [c d]
+  (sv p (AccFig)
+      o (p.get-defaults c))
+  (assoc o :ic "gau" :ode "divergent" :cdrglb "caas-node" :cdrlcl "caas"
+         :measure :l2 :timeint "interp" :prefine 5 :yform :semilogy)
+  (sv nps-str (make-nps-string (:nps o)))
+  (with [(pl-plot (:figsize o) (+ c.fig-dir "stab-cmp-" (name (:measure o))))]
+    (sv ax (pl.subplot 1 1 1))
+    (assoc o :method "pcslu" :pat-line "-." :cyc 1) (p.plot ax d o)
+    (assoc o :method "pcsl" :pat-line "--" :cyc 100) (p.plot ax d o)
+    (assoc o :method "pcsl" :pat-line "-" :cyc 1 :ref-ooa-2 True) (p.plot ax d o)
+    (if (= (:yform o) :semilogy)
+        (pl.ylim (, 9e-7 1))
+        (pl.ylim (, -20 0)))
+    (my-grid)
+    (p.legend ax (, (, "k-" "Islet 1 cycle") (, "k--" "Islet 100 cycles")
+                    (, "k-." "Natural 1 cycle") (, "k:" "OOA 2")) :o o)
+    (p.title (make-title "Islet stability:" o) o)))
+
+(defn nextpow10 [f] (** 10.0 (math.ceil  (math.log10 f))))
+(defn prevpow10 [f] (** 10.0 (math.floor (math.log10 f))))
+
+(defn figs-acc [c d &optional prefix ref-ooa-2 legend
+                general-timeint general-prefine show-linf pp]
+  (svifn prefix "" ref-ooa-2 True legend True general-timeint "interp"
+    general-prefine 5 show-linf True pp True)
+  (sv p (AccFig)
+      o (p.get-defaults c))
+  (defn plot [o title plot-fn &optional [ref-ooa-2 False]]
+    (sv fname (+ prefix "acc-" (:ode o) "-" (:ic o)
+                 "-" (:timeint o) "-" (if (= (:cdrglb o) "none")
+                                          "nopp" "pp")
+                 "-fac" (str (:nstepfac o))))
+    (print fname)
+    (with [(pl-plot (:figsize o) (+ c.fig-dir fname))]
+      (sv ax (pl.subplot 1 1 1))
+      (plot-fn ax d o)
+      (my-grid)
+      (sv legs [(, "k-" "$l_2$") (, "k--" "$l_{\infty}$")])
+      (when ref-ooa-2 (.append legs (, "k:" "OOA 2")))
+      (when legend (p.legend ax legs :o o))
+      (pl.ylabel "$\log_{10}$ relative error")
+      (p.title (make-title title o) o)))
+  (assoc o :ode "nondivergent" :ic "gau" :nstepfac 1
+         :yform :semilogy :cdrglb "none" :cdrlcl "none"
+         :timeint "exact" :prefine 0 :filter-floor 1e-11)
+  (plot o "Islet empirical order of accuracy:"
+        (fn [ax d o]
+          (for [norm (, :l2 :li)]
+            (assoc o :measure norm :pat-line (if (= norm :l2) "-" "--")
+                   :ooa-text (= norm :li))
+            (p.plot ax d o)
+            (pl.ylim (, 1e-11 1))
+            (sv e (npy.array [0 -2 -4 -6 -8 -10]))
+            (pl.yticks (** 10.0 e) e))))
+  (for [nstepfac (, 1 5)
+        ic (, "gau" "cos" "slo")
+        ode (, "nondivergent" "divergent" "rotate")]
+    (assoc o :nstepfac nstepfac :ooa-text False :ode ode
+           :cdrglb (if pp "caas-node" "none") :cdrlcl (if pp "caas" "none")
+           :ic ic :filter-floor None :timeint general-timeint)
+    (dont when (and (= ode "divergent") (= ic "gau")) (continue))
+    (plot o "Islet accuracy:"
+          (fn [ax d o]
+            (sv ye [1e10 -1e10])
+            (defn update-ye [ye1]
+              (when (none? ye1) (return))
+              (sv (get ye 0) (min (first ye) (first ye1))
+                  (get ye 1) (max (last ye) (last ye1))))
+            (for [norm (, :l2 :li)]
+              (when (and (= norm :li) (not show-linf)) (continue))
+              (assoc o :cdrglb (if pp "caas" "none") :timeint "exact" :prefine 0
+                     :measure norm :pat-line (if (= norm :l2) "-" "--")
+                     :ref-ooa-2 False)
+              (sv ye1 (p.plot ax d o))
+              (update-ye ye1)
+              (assoc o :cdrglb (if pp "caas-node" "none")
+                     :timeint general-timeint :prefine general-prefine
+                     :ref-ooa-2 (and ref-ooa-2 (= norm :l2) (!= ic "slo")))
+              (sv ye1 (p.plot ax d o))
+              (update-ye ye1))
+            (if (= ic "slo")
+                (pl.ylim (, (* 0.7 (first ye)) (nextpow10 (second ye))))
+                (pl.ylim (, (/ (first ye) 4) (nextpow10 (second ye))))))
+          :ref-ooa-2 (and ref-ooa-2 (!= ic "slo")))))
+
+(defn fig-acc-midpoint-check [c d]
+  (sv p (AccFig)
+      o (p.get-defaults c))
+  (assoc o :ic "gau" :ode "nondivergent" :cdrglb "none" :cdrlcl "none"
+         :measure :l2 :yform :semilogy :nstepfac 1)
+  (sv nps-str (make-nps-string (:nps o)))
+  (with [(pl-plot (:figsize o) (+ c.fig-dir "midpoint-check"))]
+    (sv ax (pl.subplot 1 1 1))
+    (assoc o :pat-line "-" :C-line :C :ooa-text True :prefine 0 :timeint "exact" :nps [4])
+    (p.plot ax d o)
+    (assoc o :prefine 5 :timeint "interp" :nps (cut c.nps 0 -1))
+    (p.plot ax d o)
+    (assoc o :prefine 5 :timeint "interp" :nps (cut c.nps -1) :ooa-text False)
+    (p.plot ax d o)
+    (assoc o :pat-line "--" :C-line :M :prefine 0 :timeint "exact" :nps [4])
+    (p.plot ax d o)
+    (assoc o :prefine 5 :timeint "interp" :nps c.nps)
+    (p.plot ax d o)
+    (pl.ylim (, 5e-10 1))
+    (my-grid)
+    (p.legend ax (, (, "k-" "1 cycle") (, "k--" "1/2 cycle")) :o o)
+    (p.title (make-title "Trajectory interpolation:" o) o)))
+
+(defn fig-acc-mimic-src-term-midpoint [c d &optional [np-minus-2 False]]
+  (sv p (AccFig)
+      o (p.get-defaults c))
+  (defn plot [o title plot-fn &optional [ref-ooa-2 False]]
+    (sv fname (+ "acc-pg-mimic-src-term-midpoint-" (:ode o) "-" (:ic o)
+                 "-" (:timeint o) "-" (if (= (:cdrglb o) "none")
+                                          "nopp" "pp")
+                 "-fac" (str (:nstepfac o)) "-" (name (:measure o)))
+        title (make-title title o :extra ", midpoint"))
+    (print fname)
+    (with [(pl-plot (:figsize o) (+ c.fig-dir fname))]
+      (sv ax (pl.subplot 1 1 1))
+      (plot-fn ax d o)
+      (my-grid)
+      (sv legs [(, "k-" "Reference") (, "k-." "$n_f=n_p$") (, "k--" "$n_f=2$")])
+      (when np-minus-2 (.append legs (, "k:" "$n_f=n_p-2$")))
+      (when ref-ooa-2 (.append legs (, "k:" "OOA 2")))
+      (p.legend ax legs :o o)
+      (pl.ylabel "$\log_{10}$ $l_2$ relative error")
+      (p.title title o)))
+  (for [nstepfac (, 5)
+        ic (, "gau")
+        ode (, "nondivergent")]
+    (assoc o :nstepfac nstepfac :ooa-text False :ode ode :cdrglb "caas-node"
+           :cdrlcl "caas" :ic ic :filter-floor None :norm :l2 :timeint "interp"
+           :ref-cos-033 False :yform :semilogy :filter-floor 1e-11 :C-line :M)
+    (plot o "Physics grid source term accuracy:"
+          (fn [ax d o]
+            (sv ye [1e10 -1e10])
+            (defn update-ye [ye1]
+              (sv (get ye 0) (min (first ye) (first ye1))
+                  (get ye 1) (max (last ye) (last ye1))))
+            (for [suffix (, "" "-src")]
+              (for [(, ipg pg)
+                    (enumerate (if (= suffix "")
+                                   (, 2)
+                                   (, 2
+                                      (fn [np] np)
+                                      (fn [np] (- np 2)))))]
+                (when (and (not np-minus-2) (= ipg 2)) (continue))
+                (assoc o :cdrglb "caas-node" :timeint "exact" :prefine 0
+                       :measure :l2 :ref-ooa-2 False :ic (+ ic suffix)
+                       :pg pg :pat-line (if (= suffix "")
+                                            "-"
+                                            (case/eq ipg [0  "--"] [1 "-."] [2 ":"])))
+                (sv ye1 (p.plot ax d o))
+                (update-ye ye1)
+                (assoc o :timeint "interp" :prefine 5 :ref-ooa-2 (= suffix ""))
+                (sv ye1 (p.plot ax d o))
+                (update-ye ye1)))
+            (pl.ylim (, (/ (first ye) 4) (nextpow10 (second ye)))))
+          :ref-ooa-2 True)))
+
+;;; filament diagnostic
+
+(defn fig-filament [c d-all]
+  (defn get-pat [nstepfac ne]
+    (+ (get {10 "b" 20 "r." 40 "k"} ne)
+       (get {1 "-" 5 "--"} nstepfac)))
+  (sv p (AccFig) o (p.get-defaults c)
+      nes (, 20 40)
+      degs (nes->degstrs nes))
+  (with [(pl-plot (, 10 4) (+ c.fig-dir "filament"))]
+    (for [(, igrid grid) (enumerate (, :v :t))]
+      (for [(, inp np) (enumerate c.nps)]
+        (sv spi (inc (+ (* igrid (len c.nps)) inp))
+            ax (pl.subplot 2 (len c.nps) spi))
+        (for [(, ine ne) (enumerate nes)
+              nstepfac (, 1 5)]
+          (sv d (get d-all (if (= np 4) "exact" "interp") "nondivergent" nstepfac
+                     "pcsl" (if (= np 4) "caas" "caas-node") "caas"
+                     (if (= np 4) 0 5) np "cos" 1))
+          (when (none? (geton d ne)) (continue))
+          (sv thr (get d ne :L :thr)
+              fil (get d ne :L (if (or (= grid :v) (= np 4)) :fil :me-fil)))
+          (pl.plot thr fil (get-pat nstepfac ne)
+                   :label (.format "{} {}" (get (:deg degs) ine)
+                                   (nstepfac->word nstepfac)))
+          (pl.xticks [0 0.2 0.4 0.6 0.8 1] :fontsize (:fs o))
+          (pl.xlim (, 0.05 1))
+          (pl.yticks (if (< np 8)
+                         [0 20 40 60 80 100 120]
+                         [50 60 70 80 90 100 110])
+                     :fontsize (:fs o))
+          (my-grid)
+          (when (= spi (* 2 (len c.nps)))
+            (pl.legend :loc "center left" :fontsize (:fs o) :frameon False))
+          (pl.ylim (case/in np
+                            [(, 9 12) (if (= grid :v) (, 75 115) (, 75 105))]
+                            [(, 8) (, 50 115)]
+                            [:else (, -5 125)]))
+          (sv yl (pl.ylim)
+              extra "")
+          (when (> np 4) (sv extra (.format " {} grid"
+                                            (if (= grid :v) "v" "tracer")))))
+        (pl.text 0.1 (+ (first yl) (* 0.05 (npy.diff yl)))
+                 (.format (if 0 "$n_p$ {}{}" "$n_p$ {} {}") np extra)
+                 :fontsize (+ (:fs o) 2))))))
+
+;;; mixing diagnostic
+
+(defn triplot-read-dat [fname]
+    (sv raw (with [f (open fname "rb")]
+            (.read f))
+      n (first (struct.unpack "i" (get raw (slice 0 4))))
+      data (npy.zeros (* 2 n)))
+  (for [i (range (* 2 n))]
+    (sv os (+ 4 (* 4 i))
+        (get data i) (first (struct.unpack "f" (cut raw os (+ os 4))))))
+  (sv cb (cut data 0 n)
+      ccb (cut data n (* 2 n)))
+  (, cb ccb))
+
+(defn triplot [cb ccb &optional [data None]]
+  (defn triplot-curve [x]
+    (+ 0.9 (* -0.8 x x)))
+  (sv x (npy.linspace 0.1 1 100)
+      lw 2)
+  (pl.plot x (triplot-curve x) "k-" :lw lw)
+  (sv tl (triplot-curve (last x))
+      br (triplot-curve (first x)))
+  (pl.plot x (* (triplot-curve (first x)) (npy.ones (len x))) "k-" :lw lw)
+  (pl.plot (npy.ones (len x)) (npy.linspace tl br (len x)) "k-" :lw lw)
+  (pl.plot x (npy.linspace br tl (len x)) "k-" :lw lw)
+  (pl.plot cb ccb "r." :markersize 1)
+  (pl.xlim 0.05 1.05)
+  (pl.ylim 0.05 0.95)
+  (sv t [0.2 0.4 0.6 0.8 1])
+  (pl.xticks t []) (pl.yticks t [])
+  (my-grid)
+  ;(pl.axis "off")
+  (defn text [x y txt sym me-sym]
+    (pl.text x (+ y 0.1) txt)
+    (sv dx 0.08)
+    (pl.text (+ x dx) (+ y 0.1) (.format "{:1.2e} (v)" (get data sym)))
+    (pl.text (+ x dx) y (if (in me-sym data)
+                             (.format "{:1.2e}" (get data me-sym))
+                             "")))
+  (unless (none? data)
+    (pl.text 0.1 0.91 (.format "$n_p$ {}, {} step" (get data 'np)
+                               (nstepfac->word (get data 'nstepfac)))
+             :fontsize 12)
+    (sv x 0.1)
+    (text x 0.32 "$l_r$" 'lr 'me-lr)
+    (text x 0.11 "$l_u$" 'lu 'me-lu)))
+
+(defn figs-mixing [c d]
+  (sv p (AccFig) o (p.get-defaults c))
+  (for [ne (, 20 40)]
+    (with [(pl-plot (, 10 4) (+ c.fig-dir "mixing-ne" (str ne)) :format "png")]
+      (for [(, instepfac nstepfac) (enumerate (, 1 5))
+            (, inp np) (enumerate (:nps o))]
+        (sv spi (inc (+ (* instepfac (len (:nps o))) inp))
+            ax (pl.subplot 2 (len (:nps o)) spi)
+            e (get d (if (= np 4) "exact" "interp") "nondivergent" nstepfac "pcsl"
+                   (if (= np 4) "caas" "caas-node") "caas" (if (= np 4) 0 5) np
+                   "cos" 1 ne :L)
+            (, cb ccb) (triplot-read-dat (+ c.data-dir (:mixing-file e)))
+            me-mixing (if (= np 4) :mixing :me-mixing)
+            data {'np np 'nstepfac nstepfac
+                  'lr (get e :mixing :lr) 'me-lr (get e me-mixing :lr)
+                  'lu (get e :mixing :lu) 'me-lu (get e me-mixing :lu)})
+        (unless (and (zero? (get e :mixing :lo)) (zero? (get e me-mixing :lo)))
+          (prf "{}: lo {} {}" (:mixing-file e) (get e :mixing :lo)
+               (get e me-mixing :lo)))
+        (triplot cb ccb :data data)))))
+
+;;; slotted cylinders images
+
+(defn img-slo-filament [c d direc img-idx outname &optional nps nps-right]
+  (svifn nps (, 4 4 6 8) nps-right nps)
+  (sv degs (nes->degstrs (, 20 40))
+      gap-for-colorbar (!= (len nps) (len nps-right))
+      fs 7)
+  (with [(pl-plot (, 7.3 (+ (if gap-for-colorbar 0 0.5) (len nps)))
+                  outname :format "pdf" :tight False)]
+    (sv spi 0 axs [])
+    (for [(, inp np) (enumerate nps)
+          (, ine ne) (enumerate (, 20 40))
+          nstepfac (, 1 5)]
+      (inc! spi)
+      (when (and (= ne 40) (not (in np nps-right))) (continue))
+      (sv c (% (dec spi) 4)
+          r (// (dec spi) 4)
+          w (/ 1 4)
+          h (/ 1 (len nps))
+          ax (pl.axes [(* w c) (- 1 (* h r)) (* 0.95 w) h]))
+      (unless gap-for-colorbar (.append axs ax))
+      (sv timeint (if (= np 4) "exact" "interp")
+          cdrglb (if (and (= np 4) (= inp 0)) "caas" "caas-node")
+          prefine (if (= np 4) 0 5)
+          fname (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin"
+                                      ne np (* ne 6 nstepfac) timeint cdrglb prefine))
+          img (try (get (read-slmmir-io-arrays fname) img-idx)
+                   (except [e Exception] (print e "couldn't read" fname img-idx))))
+      (unless (none? img)
+        (print ne nstepfac np (/ (- 0.1 (npy.min img)) 0.1) (- (npy.max img) 1))
+        (draw-slmmir-image img)
+        (pl.text (- (last img.shape) 500) (- (first img.shape) 80)
+                 (.format "$n_p$ {} {}" np (cdr-name cdrglb)) :fontsize fs)
+        (defn write [x] (if (zero? x) "0" (.format "{:1.1e}" x)))
+        (unless (none? d)
+          (try
+            (sv keys (, timeint "nondivergent" nstepfac "pcsl" cdrglb "caas" prefine
+                        np "slo" 1 ne)
+                e (get d #*(+ keys (, :Lerr))))
+            (pl.text 10 15 (.format (+ "$l_2$ {:1.1e} $l_\infty$ {:1.1e}\n"
+                                       "$\phi_{{min}}$ {:1.1e} $\phi_{{max}}$ {:1.1e}")
+                                    (:l2 e) (:li e) (:phimin e) (:phimax e))
+                     :fontsize fs)
+            (except [] (print "no data for" timeint "nondivergent" nstepfac "pcsl"
+                              cdrglb "caas" prefine np "slo" 1 ne))))
+        (when (<= spi 4)
+          (pl.text (/ (second img.shape) 2) (+ 30 (first img.shape))
+                   (.format "{}, {} step" (get (:deg degs) ine)
+                            (nstepfac->word nstepfac))
+                   :ha "center" :fontsize (inc fs)))))
+    (sv bdys (npy.linspace -0.05 1.05 23))
+    (if gap-for-colorbar
+        (do
+          (sv c 2 r (+ 0.5 (- (len nps) 2))
+              ax (pl.axes [(* w c) (- 1 (* h r)) (* 2 0.95 w) (* 0.2 h)])
+              c (pl.colorbar :cax ax :orientation "horizontal" :aspect 30 :shrink 0.9
+                             :ticks (npy.linspace 0 1.1 12)
+                             :boundaries bdys)))
+        (sv c (pl.colorbar :ax axs :orientation "horizontal" :aspect 50 :shrink 0.7
+                           :ticks (npy.linspace 0 1.1 12) :pad 0.025
+                           :boundaries bdys)))
+    (c.ax.tick-params :labelsize (inc fs))))
+
+(defn img-slocyl-slide [c d direc img-idxs outname &optional [ylabel False]]
+  ;; 3 cols: IC above vertical colorbar, midpoint, endpoint
+  (sv ne 20 deg (first (:deg (nes->degstrs (, 20)))) nstepfac 5 fs 7
+      times (, "start" "middle" "end")
+      w (/ 1 3)
+      h (/ 1 (len c.nps)))
+  (with [(pl-plot (, 5.5 (len c.nps)) outname :tight False)]
+    (for [col (range 3)
+          row (range (len c.nps))]
+      (when (and ylabel (zero? col))
+        ((. (pl.gcf) text)
+         -0.01 (- 1 (* (- row 0.5) h))
+         (if (zero? row) "Standard" "$p$-refined")
+         :fontsize (+ fs 2) :rotation 90 :ha "right" :va "center"))
+      (when (and (zero? col) (not (zero? row))) (continue))
+      (sv ax (pl.axes [(* w col) (- 1 (* h row)) (* 0.95 w) h])
+          np (nth c.nps row)
+          timeint (if (= np 4) "exact" "interp")
+          cdrglb (if (and (= np 4) (= inp 0)) "caas" "caas-node")
+          prefine (if (= np 4) 0 5)
+          fname (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin"
+                                      ne np (* ne 6 nstepfac) timeint cdrglb prefine))
+          img (try (get (read-slmmir-io-arrays fname) (nth img-idxs col))
+                   (except [e Exception]
+                     (print e "couldn't read" fname (nth img-idxs col)))))
+      (when (zero? col)
+        (sv (get img (= img 1)) (- 1 1e-16)))
+      (draw-slmmir-image img)
+      (pl.text (- (last img.shape) 500) (- (first img.shape) 80)
+               (.format "$n_p$ {} {}" np (cdr-name cdrglb)) :fontsize fs)
+      (when (zero? row)
+        (pl.text (/ (second img.shape) 2) (+ 30 (first img.shape))
+                 (.format "{}, {} step, {}" deg (nstepfac->word nstepfac)
+                          (nth times col))
+                 :ha "center" :fontsize (inc fs))))
+    (sv bdys (npy.linspace -0.05 1.05 23)
+        col 0 row (dec (len c.nps)))
+    (if (= (len c.nps) 2)
+        (sv ax (pl.axes [(* w col) (- 1 (* 0.5 h row)) (* 0.95 w) (* 0.15 h)])
+            cb (pl.colorbar :cax ax :orientation "horizontal" :aspect 30 :shrink 0.9
+                            :ticks (npy.linspace 0 1 6)
+                            :boundaries bdys))
+        (sv col 0 row (dec (len c.nps))
+            ax (pl.axes [(* w (+ col 0.35)) (- 1.02 (* h row)) (* 0.1 w)
+                         (* (dec (len c.nps)) 0.95 h)])
+            cb (pl.colorbar :cax ax :orientation "vertical" :aspect 30 :shrink 0.9
+                            :ticks (npy.linspace 0 1 11)
+                            :boundaries bdys)))
+    (cb.ax.tick-params :labelsize (inc fs))))
+
+;;; toy chemistry
+
+(defn toychem-diagnostic-parse [fname]
+  (sv txt (.split (readall fname) "\n")
+      d {}
+      skip 0 l2s [] lis [])
+  (for [ln txt]
+    (when (in "cmd> 1" ln) (break))
+    (inc! skip))
+  (for [ln (cut txt skip)]
+    (cond [(in "cmd>" ln)
+           (assert (or (empty? l2s) (= (len l2s) (* 576 10))))
+           (sv cmd ln c (parse-cmd ln)
+               key (+ (cmd->key-base c) (, (:ne c)))
+               l2s [] lis [])
+           (unless (none? (geton d #*key))
+             ;; we collected pg<np data but only care about the pg=np data.
+             (print "overwriting:\n" (get d #*(+ key (, :cmd)))))
+           (assoc-nested d (+ key (, :cmd)) cmd)
+           (assoc-nested d (+ key (, :l2)) l2s)
+           (assoc-nested d (+ key (, :li)) lis)]
+          [(in "C cycle" ln) (sv cyc (int (last (.split ln))))]
+          [(= "C " (cut ln 0 2))]
+          [(in "toy " ln)
+           (sv pos (.find ln "toy")
+               (, - masscons - l2 li) (sscanf (cut ln pos) "s,f,s,f,f"))
+           (.append l2s l2)
+           (.append lis li)]))
+  d)
+
+(defn fig-toychem-diagnostic [c d]
+  (sv npa npy.array
+      p (AccFig) o (p.get-defaults c))
+  (with [(pl-plot (:figsize o) (+ c.fig-dir "toychem-diagnostic"))]
+    (sv ax (pl.subplot 1 1 1))
+    (for [npi (if 0 (, 8) (, 4.1 6 8 9 12))]
+      (sv np (math.floor npi)
+          e (get d (if (< npi 5) "exact" "interp") "nondivergent" 3 "pcsl"
+                 (if (= npi 4) "caas" "caas-node") "caas" (if (< np 5) 0 5)
+                 np np 30)
+          l2 (npa (:l2 e))
+          li (npa (:li e))
+          n 576
+          x (/ (npa (list (range (len l2)))) n)
+          subset (list (range 0 (len l2) n))
+          xsparse (/ (npa subset) n)
+          clr (get c.npclrs np)
+          mrk (get c.npmarks np))
+      (pl.semilogy x l2 (+ clr "-") x li (+ clr ":")
+                   xsparse (get l2 subset) (+ clr mrk)
+                   xsparse (get li subset) (+ clr mrk)
+                   :fillstyle "none" :lw (:lw o) :markersize (:markersize o)))
+    (pl.ylabel "$\log_{10}$ Toy chemistry diagnostic" :fontsize (:fs o))
+    (pl.xlabel "Cycles" :fontsize (:fs o))
+    (svb (x (list (range 0 11)))
+         (xt (cut x))
+         #_((get xt 10) ""))
+    (pl.xticks x xt :fontsize (:fs o))
+    (sv y (npa (list (range -16 1))))
+    (pl.yticks (** 10.0 y) y :fontsize (:fs o))
+    (pl.title (+ "Toy chemistry diagnostic:\n"
+                 "nondivergent flow, 1$^\circ$, 576 time steps/cycle\n"
+                 "$p$-refinement, property preservation")
+              :fontsize (:fs o))
+    (p.legend ax (, (, "k-" "$l_2$") (, "k:" "$l_\infty$"))
+              :o o :bbox (, 0 0.8))
+    (pl.ylim (, (** 10.0 -16) (** 10.0 -3)))
+    (pl.xlim (, -0.2 10.2))
+    (my-grid)))
+
+(defn img-toychem [c img-files outname &optional [diagnostic False]]
+  (defn parse-filename [fname]
+    (sv ode (cond [(in "nondiv" fname) "nondivergent flow"]
+                  [(in "div" fname) "divergent flow"]
+                  [:else "unknown-ode"])
+        (, np pgtype pg) (cond [(in "np8pg8" fname) (, 8 "$n_f$" 8)]
+                               [(in "np4pg0" fname) (, 4 "$n_p$" 4)])
+        cdrtype (cond [(in "caas-node" fname) "CAAS-point"]
+                      [(in "caas" fname) "CAAS-CAAS"])
+        res (cond [(in "ne30" fname) "1$^\circ$"]
+                  [:else "unknown-res"]))
+    {:np np :pgtype pgtype :pg pg :dt "30min" :res res :ode ode :cdr cdrtype})
+  (sv npa npy.array p (AccFig) o (p.get-defaults)
+      ncol 2 nrow (// (+ (len img-files) ncol -1) ncol)
+      vmin 0 vmax 4.5e-6 ticks (npy.linspace vmin vmax 10)
+      ncolor (if diagnostic 8 18) fs 12
+      xtbar 4e-6)
+  (with [(pl-plot (, (* 3.65 ncol) (+ (* 2 nrow) 0.25)) outname :tight False)]
+    (sv axs [])
+    (for [spi (range (len img-files))]
+      (sv c (% spi ncol) r (// spi ncol)
+          w (/ 1 ncol) h (/ 1 nrow)
+          ax (pl.axes [(* w c) (- 1 (* h r)) (* 0.95 w) h])
+          img (read-slmmir-io-arrays (nth img-files spi))
+          img (if diagnostic
+                  (/ (- (+ (nth img (- (len img) 2))
+                           (nth img (- (len img) 1)))
+                        xtbar)
+                     xtbar)
+                  (nth img (- (len img) 2))))
+      (sv imin (npy.min img) imax (npy.max img))
+      ;; the toy chem paper draws images circle-shifted by 1/2 the image
+      ;; relative to the bakeoff and test suite paper. do that here using
+      ;; :switch-halves False if desired.
+      (sv sh True)
+      (if diagnostic
+          (do
+            (sv vmax (max (- imin) imax)
+                thr (/ 0.3e-6 xtbar)
+                vmax (if (> vmax thr) thr vmax)
+                vmin (- vmax)
+                ticks (npy.linspace vmin vmax 5))
+            (draw-slmmir-image img :vmin vmin :vmax vmax :ncolor ncolor
+                               :colorsym True :switch-halves sh))
+          (draw-slmmir-image img :vmin vmin :vmax vmax :ncolor ncolor
+                              :switch-halves sh))
+      (sv d (parse-filename (nth img-files spi)))
+      (pl.text 10 70
+               (.format "$n_p$ {} {} {} {}"
+                        (:np d) (:pgtype d) (:pg d) (:cdr d))
+               :fontsize fs)
+      (pl.text (- (last img.shape) 500) (- (first img.shape) 60)
+               (.format "min {:8.1e} max {:8.1e}" imin imax)
+               :fontsize (dec fs))
+      (.append axs ax)
+      (when diagnostic
+        (sv c (pl.colorbar :ax ax :orientation "horizontal" :aspect 30 :shrink 0.8
+                           :pad 0.05 :ticks ticks))
+        (c.ax.tick-params :labelsize fs)
+        (c.ax.set-xticklabels (lfor e ticks (.format "{:1.1e}"  e)))))
+    (pl.text -20 (+ 15 (first img.shape))
+             (.format "Toy chemistry: {}, $\Delta t$ {}, {}"
+                      (:res d) (:dt d) (:ode d))
+             :ha "center" :fontsize fs)
+    (unless diagnostic
+      (sv c (pl.colorbar :ax axs :orientation "horizontal" :aspect 50 :shrink 0.8
+                         :pad 0.05 :ticks ticks))
+      (c.ax.tick-params :labelsize fs)
+      (c.ax.set-xticklabels (lfor e ticks (.format "{:1.1e}"  e))))))
+
+;;; ISL comm footprint
+
+(defn parse-footprint [fname]
+  (sv d {}
+      txt (.split (readall fname) "\n")
+      c None cnt 0)
+  (for [ln txt]
+    (sv ln4 (cut ln 0 4))
+    (cond [(= ln4 "cmd>")
+           (dont unless (none? c)
+             (print cmd)
+             (print cnt))
+           (sv cmd ln c (parse-cmd ln) cnt 0)]
+          [(and (not (none? c)) (in "footprint>" ln))
+           (sv pos (.find ln "t>")
+               vals (sscanf (cut ln (+ pos 2)) "i,i,f,i"))
+           (for [(, isym sym) (enumerate (, :min :median :mean :max))]
+             (assoc-nested-append d (+ (cmd->key-base c) (, (:ne c) sym))
+                                  (nth vals isym)))
+           (inc! cnt)]))
+  d)
+
+(defn fig-comm-footprint [c d]
+  (sv npa npy.array
+      p (AccFig) o (p.get-defaults c))
+  (assoc o :nps (, 4 8 12) :lw 1.5)
+  (with [(pl-plot (:figsize o) (+ c.fig-dir "isl-footprint") :tight False)]
+    (sv pat-line {:min ":" :median "-" :max "--"})
+    (for [(, spi nstepfac) (enumerate (, 1 5))]
+      (sv ax (pl.axes (, (/ spi 2) 0 0.41 0.8)))
+      (for [np (:nps o)]
+        (sv e (get d (if (= np 4) "exact" "interp") "nondivergent" nstepfac "pcsl"
+                   (if (= np 4) "caas" "caas-node") "caas" (if (= np 4) 0 5)
+                   np ; accidentally ran with pg; no effect but changes the dict nesting
+                   np 30)
+            x (* 12 (/ (npa (list (range (len (get e :min))))) (len (get e :min))))
+            step (// (len x) 5)
+            s (slice (// step 2) -1 step)
+            xsparse (get x s))
+        (for [sym (, :median :max)]
+          (sv y (npa (get e sym))
+              ysparse (get y s))
+          (pl.plot [(first x) (last x)] (* (** np 2) (npa [1 1])) (+ (get c.npclrs np) ":"))
+          (pl.plot x y
+                   (+ (get c.npclrs np) (get pat-line sym))
+                   xsparse ysparse
+                   (+ (get c.npclrs np) (get c.npmarks np)) :fillstyle "none"))
+        (my-grid)
+        (pl.xticks (, 0 3 6 9 12) :fontsize (:fs o))
+        (pl.yticks (npy.linspace 0 160 17) :fontsize (:fs o))
+        (pl.ylim (, 0 160))
+        (pl.xlabel "Days" :fontsize (:fs o))
+        (pl.title (.format "{} time step" (get {1 "Long" 5 "Short"} nstepfac))
+                  :fontsize (:fs o)))
+      (cond [(= nstepfac 5)
+             (p.legend ax (, (, (+ "k" (get pat-line :max)) "max")
+                             (, (+ "k" (get pat-line :median)) "median")
+                             (, (+ "k" ":") "$n_p^2$ reference"))
+                       :o o :bbox (, 0 0.75) :nps-legend False)]
+            [(= nstepfac 1)
+             (p.legend ax (, ) :o o)])
+      (when (= nstepfac 1)
+        (pl.text 13 172
+                 (+ "Islet: Number of transmitted scalars per tracer per element\n"
+                    "Nondivergent flow")
+                 :ha "center" :fontsize (:fs o))))))
+
+;;; miscellaneous figs likely not to go in the paper
+
+(defn img-slo-cyl-tracer-grid [c direc outname &optional [ne 10]]
+  (defn make-fname [direc ne np nstepfac timeint cdrglb prefine]
+    (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin"
+                          ne np (* ne 6 nstepfac) timeint cdrglb prefine)))
+  (sv nstepfac 5)
+  (sv degs (nes->degstrs (, 10))
+      fs 7)
+  (with [(pl-plot (, 8.25 4.5) outname)]
+    (sv imgss [] nps [])
+    (for [c (, (, 4 "exact" "caas" 0)
+               (, 16 "interp" "caas-node" 5)
+               (, 16 "interp" "caas-node" 1))]
+      (.append nps (first c))
+      (.append imgss (read-slmmir-io-arrays (make-fname direc ne (first c)
+                                              nstepfac (second c) (nth c 2) (last c)))))
+    (sv spi 0)
+    (for [idx (, 1 3 5)
+          (, i imgs) (enumerate imgss)]
+      (sv np (nth nps i)
+          img (nth imgs idx))
+      (pl.subplot 3 3 (inc! spi))
+      (when (= idx 1)
+        (sv (get img (= img 1)) (- 1 1e-16)))
+      (draw-slmmir-image img)
+      (when (>= spi 7)
+        (pl.text 10 25 (.format (+ "$n_e$ {} $n_p$ {} {} time step\n"
+                                   "{}")
+                                ne np (if (= nstepfac 5) "short" "long")
+                                (case/in spi
+                                         [(, 7 8) "on dynamics grid"]
+                                         [:else "on tracer grid"]))
+                 :fontsize fs)))))
+
+;;; drivers
+
+(when-inp ["acc-print-txt-table" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (when (in "stab-cmp" fname) (sv c.ics (, "gau")))
+  (acc-print-txt-table c d))
+
+(when-inp ["fig-stab-cmp" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (fig-stab-cmp c d))
+
+(when-inp ["figs-acc" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (figs-acc c d))
+
+(when-inp ["fig-midpoint" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (fig-acc-midpoint-check c d))
+
+(when-inp ["fig-filament" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (fig-filament c d))
+
+(when-inp ["figs-mixing" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (figs-mixing c d))
+
+(when-inp ["img-filament" {:fname str :direc str}]
+  (sv c (get-context)
+      c.nps (, 4 6 8)
+      d (acc-parse (+ c.data-dir fname)))
+  (img-slo-filament c None (+ c.data-dir direc) 3 (+ c.fig-dir "slo-midpoint")
+                    :nps (, 4 6 8 12) :nps-right (, 4 6 8))
+  (img-slo-filament c d (+ c.data-dir direc) 5 (+ c.fig-dir "slo-finpoint")
+                    :nps (, 4 6 8 12) :nps-right (, 4 6 8)))
+
+(when-inp ["img-filament-slide" {:direc str}]
+  (sv c (get-context)
+      c.nps (, 4 6 8 12))
+  (img-slocyl-slide c None (+ c.data-dir direc) (, 1 3 5)
+                    (+ c.fig-dir "slo-imgs-slide"))
+  (sv c.nps (, 4 12))
+  (img-slocyl-slide c None (+ c.data-dir direc) (, 1 3 5)
+                    (+ c.fig-dir "slo-imgs-slide-brief")
+                    :ylabel True))
+
+(when-inp ["fig-pg-mimic-src-term" {:fname str}]
+  (sv c (get-context)
+      d (acc-parse (+ c.data-dir fname)))
+  (fig-acc-mimic-src-term-midpoint c d))
+
+(when-inp ["fig-toychem-diagnostic" {:fname str}]
+  (sv c (get-context)
+      d (toychem-diagnostic-parse (+ c.data-dir fname)))
+  (fig-toychem-diagnostic c d))
+
+(when-inp ["fig-toychem-finpoint" {:direc str}]
+  (sv c (get-context)
+      d (+ c.data-dir direc "/")
+      img-files [])
+  (for [np (, 4 8)]
+    (.append img-files (first (glob.glob (+ d (.format "*np{}*bin" np))))))
+  (img-toychem c img-files (+ c.fig-dir "toychem-finpoint"))
+  (img-toychem c img-files (+ c.fig-dir "toychem-finpoint-diagnostic")
+               :diagnostic True))
+
+(when-inp ["fig-comm-footprint" {:fname str}]
+  (sv c (get-context)
+      d (parse-footprint (+ c.data-dir fname)))
+  (fig-comm-footprint c d))
diff --git a/methods/islet/figures/figs-methods.hy b/methods/islet/figures/figs-methods.hy
new file mode 100644
index 0000000..cbd9107
--- /dev/null
+++ b/methods/islet/figures/figs-methods.hy
@@ -0,0 +1,584 @@
+(require [amb3 [*]])
+(import amb3 [amb3 [*]] [figsutils :as futils]
+        [scipy.linalg :as linalg]
+        math re poly)
+
+(assoc matplotlib.rcParams "savefig.dpi" 300)
+(do (pl-require-type1-fonts))
+
+;;; tables
+
+(defn read-last-array [ln]
+  (sv arr (re.findall "{.*}" ln))
+  (when (empty? arr) (return arr))
+  (as-> (last arr) it
+        (.replace it "," " ")
+        (.replace it "{" "[")
+        (.replace it "}" "]")
+        (read-str it)
+        (eval it)))
+
+(defn parse-cpp-methods [fname]  
+  (sv txt (.split (readall fname) "\n")
+      d {})
+  (for [ln txt]
+    (cond [(in "xnodes: " ln)
+           (sv xnodes (cond [(in "GLL" ln) 'gll]
+                           [(in "Uniform" ln) 'uniform]))]
+          [(in "case " ln)
+           (sv np (int (first (re.findall "case (\d*):" ln))))]
+          [(in "subnp[]" ln)
+           (sv subnp (read-last-array ln)
+               nodes [])]
+          [(in "offst[]" ln)
+           (sv offst (read-last-array ln)
+               subtype 'offset)]
+          [(and (not (in "nodes[]" ln))
+                (not (empty? (do (sv arr (read-last-array ln))
+                                 arr))))
+           (sv subtype 'general)
+           (.append nodes arr)]
+          [(in "eval" ln)
+           (assoc-nested d (, xnodes np)
+                         (if (= subtype 'general)
+                             {:subtype subtype :subnp subnp :nodes nodes}
+                             {:subtype subtype :subnp subnp :offst offst}))]))
+  d)
+
+(defn make-array-str [a] (+ "\{" (str.join ", " (lfor e a (str e))) "\}"))
+
+(defn make-subsets-str [ns]
+  (sv s "\n\\begin{tabular}{l}\n\!\!\!nodal subsets \\\\\n\{")
+  (for-first-last
+    [n ns]
+    (+= s
+        (if first? "" "\\phantom{\{}")
+        (make-array-str n)
+        (if last? "\}\n" ",\\\\\n")))
+  (+ s "\end{tabular}"))
+
+(defn write-methods-latex-tables [d xnodes fname]
+  (sv col-hdrs (, "$\\numg$" "OOA" "$\\npsub$" "Supports")
+      d (get d xnodes)
+      nps (list (.keys d)))
+  (with [f (open fname "w")]
+    (defn write [s] (.write f s))
+    (write "\\begin{center}\n")
+    (write "\\begin{tabular}{r|c|l|l}\n")
+    (write (+ (str.join " & " col-hdrs) " \\\\\n\\hline\n"))
+    (when (= xnodes 'gll)
+      (write "4 & 2 & see text & see text \\\\\n\\hline\n"))
+    (for [np nps]
+      (sv e (get d np))
+      (svb (np-str (str np))
+           (nodes-str (case/eq xnodes ['gll "GLL"] ['uniform "Uniform"]))
+           (ooa-str (str (dec (min (:subnp e)))))
+           (npsub-str (make-array-str (:subnp e)))
+           (subsets-str (if (= (:subtype e) 'offset)
+                            (+ "offsets " (make-array-str (:offst e)))
+                            (make-subsets-str (:nodes e)))))
+      (write (+ (str.join " & " (, np-str ooa-str npsub-str subsets-str))
+                " \\\\\n"))
+      (unless (= np (last nps)) (write "\\hline\n")))
+    (write "\\end{tabular}\n")
+    (write "\\end{center}\n")))
+
+;;; illustrations
+
+(defn illustrate-grids [img-fname]
+  (defn draw-gll-dots [np marker]
+    (sv xgll (col-vec (poly.get-gll-x np))
+        e (npy.ones (, np 1))
+        X (npy.dot e (.transpose xgll))
+        Y (npy.dot xgll (.transpose e)))
+    (pl.plot X Y marker :markersize 12))
+  (sv nf 6 elc "k")
+  (with [(pl-plot (, 5 5) img-fname :format "pdf")]
+    (pl.plot [-1 1] [1 1] elc [-1 1] [-1 -1] elc [1 1] [-1 1] elc [-1 -1] [-1 1] elc
+               :linewidth 2
+               :color (if 0 "#1D4BFA" "#3838FF"))
+    (sv lw 2
+        line "g--"
+        d 0.99 nd (- d))
+    (for [it (range 2)]
+      (for [i (range (inc nf))]
+        (sv x (as-> (- (* 2 (/ i nf)) 1) x
+                    (if (= x -1) nd
+                        (= x 1) d
+                        x)))
+        (if (zero? it)
+            (pl.plot [x x] [nd d] line :linewidth lw)
+            (pl.plot [nd d] [x x] line :linewidth lw))))
+    (draw-gll-dots 4 (+ elc "o"))
+    (draw-gll-dots 8 "r.")
+    (pl.axis "equal")
+    (pl.axis "off")))
+
+(defn draw-np4-schematic [img-fname]
+  (import islet)
+  (sv np 4 fs 12
+      x-gll (poly.get-gll-x np)
+      nx 256
+      x (npy.linspace -1 1 nx)
+      isl (islet.Islet)
+      (, yn yo yb) (lfor m (, 0 1 3) (.transpose (isl.eval m np x)))
+      clrs "kgrb"
+      d 1.04)
+  (with [(pl-plot (, 4 4) img-fname :tight True)]
+    (pl.axes [0 0.6 1 0.4])
+    (sv xsub (npy.linspace (nth x-gll 2) 1 (// nx 4))
+        xscale (- (* 2 (/ (- xsub (nth x-gll 2)) (- 1 (nth x-gll 2)))) 1)
+        alpha (poly.eval-lagrange-poly (poly.get-gll-x 3) (, 0 0.306 1) xscale))
+    (pl.plot (- (get xsub (s-all-rev))) (get alpha (s-all-rev)) "k-."
+             xsub alpha "k-.")
+    (do
+      (pl.plot -2 -2 "k:"  :label "Natural")
+      (pl.plot -2 -2 "k--" :label "Offset nodal subset: $\{3,4\}$, $\{0,0\}$")
+      (pl.plot -2 -2 "k-"  :label "Optimized")
+      (pl.plot -2 -2 "k-." :label "Convex combination parameter")
+      (pl.legend :loc "center" :fontsize fs :bbox-to-anchor (, 0.49 0.64)
+                 :frameon False))
+    (my-grid)
+    (axis-tight-pad :pad 0)
+    (pl.xlim (, (- d) d))
+    (pl.ylim (, -0.05 1.05))
+    (pl.yticks (, 0 0.5 1) :fontsize fs)
+    (pl.plot (nth x-gll 2) 0 "ko" 1 1 "ko")
+    (pl.text 0.57 1 "Fully $n_p$ 4" :fontsize fs :va "top")
+    (pl.text 0.05 0 "Fully $n_p$ 3" :fontsize fs)
+    (pl.title "Optimized $n_p$ 4 basis" :fontsize fs)
+    (sv xt (npy.linspace -1 1 9))
+    (pl.xticks xt (lfor e (range (len xt)) "") :fontsize fs)
+    (pl.axes [0 0 1 0.57])
+    (for [i (range (dec np) -1 -1)]
+      (sv c (nth clrs i))
+      (pl.plot x (get yn i) (+ c ":"))
+      (pl.plot x (get yo i) (+ c "--"))
+      (pl.plot x (get yb i) (+ c "-")))
+    (my-grid)
+    (axis-tight-pad :pad 0)
+    (pl.xlim (, (- d) d))
+    (pl.xticks xt :fontsize fs)
+    (pl.yticks (npy.linspace -0.2 1 7) :fontsize fs)
+    (pl.xlabel "Reference coordinate" :fontsize fs)))
+
+;;; utils for search
+
+(defn parse-search-list [fname]
+  (sv txt (.split (readall fname) "\n")
+      c [])
+  (for [ln txt]
+    (when (or (< (len ln) 100) (!= (cut ln 0 5) "meam1")) (continue))
+    (sv b (futils.parse-search-basis-line ln))
+    (.append c b))
+  c)
+
+(defn uniquify-search-list [c]
+  (sv uniq (set) c-uniq [])
+  (for [b c]
+    (sv s (futils.Nodes->string (:np b) (:nodes b)))
+    (when (in s uniq) (continue))
+    (.add uniq s)
+    (.append c-uniq b))
+  c-uniq)
+
+(defn get-slmmir-builtin [np]
+  (get
+    {4  None
+     5  None
+     6  [[0 1 2 3 4] [ 0 1 2 3 5] [0 1 2 3 4 5]]
+     7  (futils.offst->Nodes (, 5 5 6) (, 0 0 0))
+     8  (futils.offst->Nodes (, 6 6 7 6) (, 0 0 0 1))
+     9  [[0 1 2 3 4 5 8] [0 1 2 3 4 5 7 8] [0 1 2 3 4 5 6 8] [1 2 3 4 5 6 7]]
+     10 (futils.offst->Nodes (, 7 7 7 8 8) (, 0 0 0 0 1))
+     11 (futils.offst->Nodes (, 8 9 8 9 8) (, 0 0 0 0 1))
+     12 (futils.offst->Nodes (, 9 9 10 10 9 10) (, 0 0 0 0 1 1))
+     13 (futils.offst->Nodes (, 10 10 10 10 11 10) (, 0 0 0 0 0 1))}
+    np))
+
+(defn nodes= [a-np a-nodes b-np b-nodes]
+  (unless (and (= a-np b-np)
+               (= (len a-nodes) (len b-nodes)))
+    (return False))
+  (for [i (range (len a-nodes))]
+    (sv npa npy.array ai (npa (nth a-nodes i)) bi (npa (nth b-nodes i)))
+    (unless (and (= (len ai) (len bi)) (npy.all (= ai bi)))
+      (return False)))
+  True)
+
+;;; run slmmir on a bunch of formulas
+
+(defn write-slmmir-script [blns script-fname]
+  (svb (cmdstr
+         (fn [basis]
+           (+ "KMP_AFFINITY=balanced OMP_NUM_THREADS=48 $exe "
+              "-method pcsl -ode {ode:s} -ic gaussianhills -ic cosinebells -ic slottedcylinders "
+              "-we 0 -rit -dmc eh -mono {mono:s} -lim {lim:s} -nsteps {nstep:d} -T 12 -ne {ne:d} "
+              "-np {np:d} -timeint {timeint:s} -prefine {prefine:d} -d2c "
+              (if basis "-basis \"{basis:s}\" " "")
+              "|& grep \"^C \"")))
+       (ode "nondivergent") (ne 20) (nstep (* 20 6))
+       (lims (, (, "caas-node" "caas") (, "none" "none"))))
+  (with [f (open script-fname "w")]
+    (f.write "exe=\n")
+    (sv ctr 0)
+    (defn write1 [b lim str-basis]
+      (sv cmd (.format (cmdstr str-basis) :ode ode :mono (first lim)
+                       :lim (last lim) :nstep nstep :ne ne :np (:np b)
+                       :timeint (if (= (:np b) 4) "exact" "interp")
+                       :prefine (if (= (:np b) 4) 0 5)
+                       :basis (if str-basis (futils.Nodes->string (:np b) (:nodes b)))))
+      (f.write (.format "echo 'line> {}'\n" (if str-basis
+                                                (:txt b)
+                                                (.format "builtin np {}" (:np b)))))
+      (f.write (.format "echo 'cmd> {} {}'\n" ctr cmd))
+      (f.write (.format "{}\n" cmd)))
+    (dont for [np (range 4 13) lim lims]
+      (write1 {:np np} lim False)
+      (inc! ctr))
+    (for [b blns lim lims]
+      (write1 b lim True)
+      (inc! ctr))))
+
+(defn parse-slmmir-output [fname &optional d [lebesgue False]]
+  (import islet)
+  (svifn d {})
+  (sv txt (.split (readall fname) "\n")
+      isl (islet.Islet))
+  (for [ln txt]
+    (cond [(in "line>" ln)
+           (sv b (futils.parse-search-basis-line (cut ln 6)))
+           (when lebesgue
+             (sv bstr (futils.Nodes->string (:np b) (:nodes b))
+                 npm (isl.calc-xnodes-metrics-from-basis-string bstr)
+                 npa npy.array)
+             (assert (< (reldif (npa (:npm b)) (npa npm)) 1e-2))
+             (assoc b :lebesgue (isl.calc-lebesgue-consts-from-basis-string bstr))
+             (sv strp (fn [t]
+                        (sv s "")
+                        (for [e t] (+= s (.format " {:1.2e}" e)))
+                        s))
+             (dont prf (+ (strp (:npm b)) " |" (strp (:lebesgue b)))))]
+          [(in "cmd>" ln)
+           (sv cmd ln c (futils.parse-cmd ln)
+               cls {})]
+          [(in "C cycle" ln)
+           (sv cyc (int (last (.split ln))))]
+          [(or (in "C PASS" ln) (in "C FAIL" ln))
+           (assoc-nested-append d (+ (futils.cmd->key-base c)
+                                     (, cyc (:ne c)))
+                                {:b b :cls cls})]
+          [(and (> (len ln) 10) (= (cut ln 0 2) "C "))
+           (sv cl (futils.parse-C ln))
+           (assoc cls (:ic cl) cl)]))
+  d)
+
+(defn basic-key [np &optional ode nstepfac prop-preserve cyc ne]
+  (svifn ode "nondivergent" nstepfac 1 cyc 1 ne 20)
+  (, (if (= np 4) "exact" "interp") ode nstepfac "pcsl"
+     (if prop-preserve "caas-node" "none") (if prop-preserve "caas" "none")
+     (if (= np 4) 0 5) np cyc ne))
+
+(defn get-tick-labels [x]
+  (sv xt [])
+  (for [e x]
+    (sv v (npy.log10 e))
+    (.append xt (if (= v (int v)) (int v) (.format "{:1.1f}" v))))
+  xt)
+
+(defn set-semilogy-ticks [&optional fs]
+  (sv y (first (pl.yticks)))
+  (pl.yticks y (get-tick-labels y) :fontsize fs))
+
+(defn set-loglog-ticks [&optional fs]
+  (sv x (first (pl.xticks)))
+  (pl.xticks x (get-tick-labels x) :fontsize fs)
+  (sv y (first (pl.yticks)))
+  (pl.yticks y (get-tick-labels y) :fontsize fs))
+
+(defn norm->str [n]
+  (get {:l1 "$l_1$" :l2 "$l_2$" :li "$l_{\infty}$"} n))
+
+(defn plot-slmmir-vs-heuristic [c d img-fname
+                                &optional nps prop-preserve ic norm pum-thr lebesgue]
+  (svifn nps (list (range 4 14)) prop-preserve False ic "gau" norm :l2
+         pum-thr 1e-6 lebesgue False)
+  (sv npa npy.array fs 11
+      plot (if lebesgue pl.semilogy pl.loglog))
+  (print img-fname)
+  (with [(pl-plot (, 4 4.2) img-fname :format "pdf")]
+    (for [np nps]
+      (sv es (get d #*(basic-key np :prop-preserve prop-preserve)))
+      (when (none? es) (continue))
+      (sv x [] y [])
+      (for [e es]
+        (sv cls (:cls e) b (:b e))
+        (when (> (:pum b) pum-thr) (continue))
+        (.append x (nth (get b (if lebesgue :lebesgue :npm))
+                        (get {:l1 0 :l2 1 :li 2} norm)))
+        (.append y (get cls ic norm))
+        (when (nodes= (:np b) (:nodes b) (:np b) (get-slmmir-builtin (:np b)))
+          (plot (last x) (last y) "ro" :fillstyle "none" :markersize 12
+                :zorder 20)))
+      (plot (npa x) (npa y) (+ (get c.npclrs np) (get c.npmarks np))
+            :fillstyle "none" :zorder (- 20 np) :label (.format "$n_p$ {}" np)))
+    (if lebesgue
+        (do (set-semilogy-ticks :fs fs)
+            (pl.xlabel (+ (norm->str norm) " heuristic") :fontsize fs))
+        (do (set-loglog-ticks :fs fs)
+            (pl.xlabel (+ "$\log_{10}$ " (norm->str norm) " heuristic") :fontsize fs)))
+    (my-grid)
+    (pl.title (.format (+ "Test problem vs.~heuristic:\n"
+                          "nondivergent flow, 1.5$^\circ$, {}, long steps\n"
+                          "$p$-refinement, {}")
+                       (futils.ic-short2long ic)
+                       (+ (if prop-preserve "" "no ") "property preservation"))
+              :fontsize fs)
+    (pl.legend :loc "best" :fontsize fs)
+    (pl.axis "tight")
+    (pl.ylabel (+ "$\log_{10}$ " (norm->str norm) " relative error") :fontsize fs)))
+
+;;; pum vs perturb
+
+(defn parse-pum-vs-perturb [fname]
+  (sv txt (.split (readall fname) "\n")
+      perturb None
+      d {})
+  (for [ln txt]
+    (cond [(= ">>> " (cut ln 0 4))
+           (sv (, - basis np) (sscanf ln "s,s,i"))]
+          [(= ">> " (cut ln 0 3))
+           (sv pos (.find ln "[")
+               arr (eval (read-str (cut ln pos))))
+           (if (none? perturb)
+               (sv perturb arr)
+               (do (sv meam1 arr)
+                   (assoc-nested d (, basis np) (, perturb meam1))))]))
+  d)
+
+(defn plot-pum-vs-perturb [c d fname]
+  (sv f identity fs 11)
+  (with [(pl-plot (, 4 4) fname)]
+    (for [basis (, "gll_best")]
+      (sv uni (in "uni" basis))
+      (for [np (range (if uni 8 6) 14)]
+        (sv e (geton d basis np))
+        (when (none? e) (continue))
+        (pl.loglog (f (first e)) (f (second e))
+                   (+ (get c.npclrs np) (if uni "--" "-"))
+                   :label (+ "$n_p$ " (str np) (if uni "'" "")))))
+    (sv f 1.7 slope 4)
+    (make-reference-slope-triangle [(/ 2.8e-3 f) (* 2.8e-2 f)]
+                                   [(* 3e-8 (** f slope)) (/ 3e-12 (** f slope))]
+                                   slope "k-"
+                                   :opposite True
+                                   :kwargs-text {"fontsize" (+ fs 4)})
+    (set-loglog-ticks)
+    (my-grid)
+    (pl.legend :loc "upper left" :fontsize (dec fs) :ncol 1)
+    (pl.xlabel "$\log_{10}$ Element size relative random perturbation $\delta$"
+               :fontsize fs)
+    (pl.ylabel "$\log_{10}$ (max $|\lambda|$ - 1)" :fontsize fs)
+    (pl.title "Perturbed uniform mesh metric" :fontsize fs)
+    (pl.xlim (, 1e-4 0.1))
+    (pl.ylim (, 1e-14 0.1))))
+
+;;; meam1 and pum vs dx
+
+(defn parse-meam1-sweep [fname]
+  (sv txt (.split (readall fname) "\n")
+      methods (, "gll_natural" "gll_best" "uniform_offset_nodal_subset")
+      fst True
+      d {})
+  (defn fill []
+    (print method (len dxs) (len meam1s))
+    (assoc d method (, dxs meam1s np)))
+  (for [ln txt]
+    (sv toks (.split ln))
+    (cond [(in (first toks) methods)
+           (unless fst (fill))
+           (sv method (first toks)
+               np (int (second toks))
+               fst False
+               dxs [] meam1s [])]
+          [(= (len toks) 2)
+           (sv (, dx meam1) (sscanf ln "f,f"))
+           (.append dxs dx)
+           (.append meam1s meam1)]))
+  (fill)
+  d)
+
+(defn parse-pum-sweep [method-fnames]
+  (sv d {})
+  (for [(, method fname) method-fnames]
+    (sv txt (.split (readall fname) "\n")
+        dxs [] meam1s [] skip 1)
+    (for [ln txt]
+      (when (in "final" ln) (break))
+      (inc! skip))
+    (for [ln (cut txt skip)]
+      (sv toks (.split ln))
+      (unless (= (len toks) 4) (break))
+      (.append dxs (float (nth toks 1)))
+      (.append meam1s (float (nth toks 3))))
+    (assoc d method (, dxs meam1s)))
+  d)
+
+(defn plot-meam1-and-pum-vs-dx [c dmeam1 dpum fname]
+  (defn symx [x]
+    (sv x (npy.array x))
+    (npy.concatenate (, x (- 1 (get x (s-all-rev))))))
+  (defn symy [y]
+    (sv y (npy.array y))
+    (npy.concatenate (, y (get y (s-all-rev)))))
+  (sv fs 11
+      ms (, "gll_natural" "uniform_offset_nodal_subset" "gll_best")
+      clrs {(nth ms 0) "r" (nth ms 1) "g" (nth ms 2) "k"}
+      mrks {(nth ms 0) "." (nth ms 1) "x" (nth ms 2) "o"}
+      lbls {(nth ms 0) "GLL natural" (nth ms 1) "Uniform offset nodal subset"
+            (nth ms 2) "GLL nodal subset"})
+  (with [(pl-plot (, 4 4) fname)]
+    (for [method ms]
+      (sv e (get dpum method))
+      (pl.semilogy (first e) (second e)
+                   (+ (get clrs method) (get mrks method))
+                   :label (get lbls method))
+      (sv e (get dmeam1 method))
+      (pl.semilogy (symx (first e)) (symy (second e))
+                   (+ (get clrs method) "-")))
+    (sv np (last e))
+    (for [i (range (// np 2) np)]
+      (sv p (/ i (dec np)))
+      (pl.semilogy [p p] [1e-15 1] (+ (get clrs (nth ms 1)) ":") :zorder -1))
+    (set-semilogy-ticks)
+    (pl.ylim (, 1e-16 1))
+    (pl.ylabel "$\log_{10}$ (max $|\lambda|$ - 1)" :fontsize fs)
+    (pl.xlabel "Translation, $\Delta x$, relative to element size 1" :fontsize fs)
+    (pl.title (.format "$n_p$ {} methods" np) :fontsize fs)
+    (pl.legend :loc (, 0.02 0.15) :fontsize fs)
+    (pl.xticks (npy.linspace 0 1 11))
+    (my-grid)))
+
+;;; drivers
+
+(when-inp ["dev-parse"]
+  (sv fname "data/search-0.txt"
+      blns (parse-search-list fname))
+  (print (len blns))
+  (for [b blns]
+    (when (and (= (:np b) 13) (= (:type b) :offst-nodal-subset) (< (:pum b) 1e-6))
+      (print b) (break)))
+  (for [b blns]
+    (when (and (= (:np b) 8) (= (:type b) :nodal-subset) (< (:pum b) 1e-7))
+      (print b) (break))))
+
+(when-inp ["write-slmmir-script"]
+  (sv script-fname "../../slmm/meas/run-slmmir-on-basis-lines.sh"
+      data-fnames (, "search-0.txt"
+                     "search-findnodal_given_bestosn-0.txt"
+                     "search-findnodal_given_bestosn-1.txt"
+                     "search-findnodal_given_bestosn-2.txt"
+                     "search-findnodal_given_bestosn-3.txt"
+                     "search-findnodal_given_bestosn-7.txt")
+      blns [])
+  (for [fname data-fnames]
+    (.extend blns (parse-search-list (+ "data/" fname))))
+  (sv blns (uniquify-search-list blns))
+  (write-slmmir-script blns script-fname))
+
+(when-inp ["plot-slmmir-vs-heuristic"]
+  (sv fnames (, "slmmir-on-basis-lines-2.txt")
+      c (futils.get-context)
+      lebesgue False
+      d {})
+  (for [fname fnames]
+    (sv d (parse-slmmir-output (+ "data/" fname) :d d :lebesgue lebesgue)))
+  (for [(, norm pp ic) (, (, :l2 False "gau") (, :l2 True "cos"))]
+    (plot-slmmir-vs-heuristic
+      c d (.format (+ "{}slmmir-vs-heuristic-{}-{}-{}" (if lebesgue "-leb" ""))
+                   c.fig-dir ic (if pp "pp" "nopp") (name norm))
+      :nps [6 7 8 9 10]
+      :prop-preserve pp :norm norm :ic ic :lebesgue lebesgue)))
+
+(when-inp ["illustrations"]
+  (sv c (futils.get-context))
+  (illustrate-grids (+ c.fig-dir "illustrate-grids")))
+
+(when-inp ["tables"]
+  (for [xnodes (, 'gll)]
+    (write-methods-latex-tables (parse-cpp-methods "islet-methods.txt")
+                                xnodes (.format "figs/methods-table-{}.tex"
+                                                (name xnodes)))))
+
+(when-inp ["pum-vs-perturb"]
+  (sv fname "pum_perturb_plot-041021.txt"
+      d (parse-pum-vs-perturb (+ "data/" fname))
+      c (futils.get-context))
+  (plot-pum-vs-perturb c d (+ c.fig-dir "pum-vs-perturb")))
+
+(when-inp ["meam1-and-pum-vs-dx"]
+  (sv c (futils.get-context)
+      data-dir "data/"
+      meam1-fname (+ data-dir "run_meam1_sweep-np8.txt")
+      method-fnames (zip (, "gll_best" "gll_natural" "uniform_offset_nodal_subset")
+                         (lfor fname (, "pum_sweep-np8-gll_best.txt"
+                                        "pum_sweep-np8-gll_natural.txt"
+                                        "pum_sweep-np8-uni.txt")
+                               (+ data-dir fname)))
+      d (parse-meam1-sweep meam1-fname)
+      d-pum (parse-pum-sweep method-fnames))
+  (plot-meam1-and-pum-vs-dx c d d-pum (+ c.fig-dir "meam1-and-pum-vs-dx")))
+
+(when-inp ["np4-schematic"]
+  (sv c (futils.get-context))
+  (draw-np4-schematic (+ c.fig-dir "np4-schematic")))
+
+(defn plot-basis-schematic [np &optional [annotate False]]
+  (import islet)
+  (sv c (futils.get-context)
+      pats {0 "--" 3 "-"}
+      clrs "krbcgm"
+      x (npy.linspace -1 1 512)
+      isl (islet.Islet)
+      fs 12)
+  (with [(pl-plot (, 6 (if annotate 4 3))
+                  (+ c.fig-dir "basis-schematic-np" (str np)
+                     (if annotate "-annotated" "")))]
+    (for [method (, 0 3)]
+      (sv y (.transpose (isl.eval method np x))
+          pat (get pats method))
+      (for [i (range np)]
+        (pl.plot x (get y i) (+ (nth clrs (% i (len clrs))) pat)
+                 :label (if (zero? i) (get {0 "Natural GLL" 3 "Islet GLL"} method))))
+      (my-grid)
+      (sv d 1.04)
+      (pl.xlim (, (- d) d))
+      (pl.ylim (, (if annotate -0.64 -0.22) 1.03)))
+    (pl.xlabel "Reference coordinate" :fontsize fs)
+    (pl.ylabel "Basis function value" :fontsize fs)
+    (pl.text -1 1.1 (.format "Basis functions, $n_p$ = {}" np) :fontsize fs)
+    (pl.figlegend :loc (, 0.49 (if annotate 0.91 0.88)) :fontsize fs :ncol 2)
+    (when annotate
+      (sv npa npy.array
+          xgll (npa (poly.get-gll-x np))
+          ireg 1
+          xs (cut xgll ireg (+ ireg 2))
+          xc (cc xs)
+          clr "g" y -0.3 w 0.006 lw 2 ones (npa [1 1]))
+      (for [i (range 2)]
+        (pl.arrow (nth xgll (+ ireg i)) y 0 0.1 :width w :color clr))
+      (pl.plot xs (* y ones) (+ clr "-") (* xc ones) [-0.36 y] :color clr :lw lw)
+      (pl.text xc -0.45 (.format "Region {}" ireg)
+               :color clr :ha "center" :fontsize fs)
+      (when (= np 6)
+        (sv support [0 1 2 3 5]
+            clr "r")
+        (for [i support]
+          (pl.arrow (nth xgll i) -0.5 0 0.1 :width w :color clr))
+        (sv x (nth xgll 4))
+        (pl.plot x -0.45 (+ clr "x") :markersize 14)
+        (pl.text 0 -0.59 (.format "Support nodes for region {}" ireg)
+                 :color clr :ha "center" :fontsize fs)))))
+
+(when-inp ["basis-schematic" {:np int}]
+  (plot-basis-schematic np :annotate True))
diff --git a/methods/islet/figures/figs.tex b/methods/islet/figures/figs.tex
new file mode 100644
index 0000000..5feda17
--- /dev/null
+++ b/methods/islet/figures/figs.tex
@@ -0,0 +1,376 @@
+% hy figs-methods.hy basis-schematic 6
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.75\linewidth]{basis-schematic-np6-annotated}
+  \caption{
+    Basis functions for the Islet $\np=6$ GLL nodal subset basis listed in Table \ref{tbl:gll}.
+    Each curve's color corresponds to a basis function.
+    Each line pattern corresponds to a basis type, as listed in the legend.
+    The green span shows region 1.
+    The red arrows point to the nodes in the support of region 1;
+    the red $\times$ is beneath the one node not in region 1's support.
+  }
+  \label{fig:np6-basis}
+\end{figure}
+
+\begin{table}[tbh]
+  \input{figs/methods-table-gll.tex}
+  \caption{
+    Islet GLL nodal subset bases.
+    Each row provides a formula for the row's $\np$ value.
+    Columns are $\np$, order of accuracy (OOA),
+    the support sizes $\npsub$ for each region ordered left to middle,
+    and the supports.
+    For offset nodal subset bases, supports are given by offsets.
+    For general nodal subset bases, supports are given by nodal subsets, again ordered from left region to middle.
+    The case $\np=4$ is described in Sect.~\ref{sec:np4}.
+    In all cases, the support points are GLL points.
+  }
+  \label{tbl:gll}
+\end{table}
+
+% ./pum_sweep 8 512 4 0 > pum_sweep-np8-gll_natural.txt
+% ./pum_sweep 8 512 4 1 > pum_sweep-np8-gll_best.txt
+% ./pum_sweep 8 512 4 2 > pum_sweep-np8-uni.txt
+% ./run_meam1_sweep 8 > run_meam1_sweep-np8.txt
+% hy figs-methods.hy meam1-and-pum-vs-dx
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{meam1-and-pum-vs-dx}
+  \caption{
+    $\lambdamax(\Delta x)-1$ (solid lines) and $\lambdamaxpum(\Delta x)-1$ (markers) for
+    the natural GLL (red, small circles), uniform-points offset nodal subset (green, $\times$), and
+    Islet GLL nodal subset (black, large circle) $\np=8$ bases.
+    Green dotted vertical lines mark multiples of $1/(\np-1)=1/7$.
+  }
+  \label{fig:meam1-and-pum-vs-dx}
+\end{figure}
+
+% ./pum_perturb_plot > pum_perturb_plot-041021.txt
+% hy figs-methods.hy pum-vs-perturb
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{pum-vs-perturb}
+  % include only gll_best b/c uniform_offset_nodal_subset was already shown to
+  % be bad at i*1/(np-1) > 0.5, integer i.
+  \caption{
+    $\lambdamaxpum(\delta)-1$ for the bases in Table \ref{tbl:gll} with $\np \ge 6$.
+    The triangle provides a $\delta^4$ reference slope.
+  }
+  \label{fig:pum-vs-perturb}
+\end{figure}
+
+% hy figs-methods.hy np4-schematic
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{np4-schematic}
+  \caption{
+  Illustration of the optimized Islet GLL $\np=4$ basis (solid line) compared with
+  the natural (dotted) and the best nodal subset (dashed) $\np=4$ bases.
+  Each basis function in a basis has its own color.
+  The top panel shows the convex combination parameter value as a function of reference coordinate
+  that is used to combine the natural and best nodal subset bases
+  to form the optimized basis.
+  }
+  \label{fig:np4-schematic}
+\end{figure}
+
+% hy figs-methods.hy illustrations
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{illustrate-grids}
+  \caption{
+    One spectral element (blue solid line outlining the full square) with
+    dynamics (black large circles), tracer (small red circles), and physics (green dashed lines) subelement grids.
+  }
+  \label{fig:illustrate-grids}
+\end{figure}
+
+% bash run-stability-cmp.sh > stability-cmp-0.txt
+% hy figs-adv-diag.hy fig-stab-cmp stability-cmp-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{stab-cmp-l2}
+  \caption{
+    Stability of the Islet method with the Islet GLL bases,
+    compared with the instability of the method with the natural GLL bases.
+    The $x$-axis is average dynamics grid point spacing at the equator in degrees for the quasiuniform cubed-sphere grid.
+    The $y$-axis is $\log_{10} l_2$ relative error.
+    A curve's line pattern corresponds to basis type and number of cycles, as listed in the top legend.
+    A curve's marker corresponds to $\npt$, as listed in the bottom legend.
+    The case is divergent flow, Gaussian hills ICs, property preservation, $p$-refinement, and long time steps.
+  }
+  \label{fig:islet-vs-gll}
+\end{figure}
+
+% bash run-accuracy.sh > acc-0.txt
+% hy figs-adv-diag.hy fig-midpoint acc-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{midpoint-check}
+  \caption{
+    Comparison of relative errors calculated at the test simulation's midpoint time of 6 days (1/2 cycle, dashed lines)
+    and endpoint time of 12 days (1 cycle, solid lines).
+    Each number at the right side of the plot is the empirical OOA computed using the final two points of the 1-cycle result.
+  }
+  \label{fig:traj-interp}
+\end{figure}
+
+% hy figs-adv-diag.hy figs-acc acc-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{acc-nondivergent-gau-exact-nopp-fac5}
+  \caption{
+    Empirical verification of the order of accuracy of the Islet GLL bases.
+    Each number at the right side of the plot is empirical OOA computed using the final two points of the $l_\infty$ curve.
+  }
+  \label{fig:islet-empirical-ooa}
+\end{figure}
+
+% hy figs-methods.hy write-slmmir-script
+% bash run-slmmir-on-basis-lines.sh > slmmir-on-basis-lines-2.txt
+% hy figs-methods.hy plot-slmmir-vs-heuristic # uses slmmir-on-basis-lines-2.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{slmmir-vs-heuristic-gau-nopp-l2}
+  \caption{$l_2$ norm on the nondivergent flow problem
+    using basis $\basisns_{\np}$ vs.~$a_2(\basisns_{\np})$,
+    for a large number of \abtps~bases and $\np=6$ to $10$.
+    The legend lists the marker type for each $\np$.
+    Large red circles outline the bases in Table \ref{tbl:gll}.
+    The configuration uses the Gaussian hills IC and no property preservation.}
+  \label{fig:slmmir-vs-heuristic-a}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{slmmir-vs-heuristic-cos-pp-l2}
+  \caption{Same as Fig.~\ref{fig:slmmir-vs-heuristic-a} except that the configuration
+    uses the cosine bells IC with property preservation.}
+  \label{fig:slmmir-vs-heuristic-b}
+\end{figure}
+
+% hy figs-adv-diag.hy figs-acc acc-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-nondivergent-gau-interp-pp-fac1}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Figs.~1, 2 in TR14.
+  }
+  \label{fig:islet-acc-nondiv-gau-a}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-nondivergent-gau-interp-pp-fac5}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Figs.~1, 2 in TR14.
+  }
+  \label{fig:islet-acc-nondiv-gau-b}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-nondivergent-cos-interp-pp-fac1}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Fig.~3 in TR14.
+  }
+  \label{fig:islet-acc-nondiv-cos-a}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-nondivergent-cos-interp-pp-fac5}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Fig.~3 in TR14.
+  }
+  \label{fig:islet-acc-nondiv-cos-b}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-divergent-cos-interp-pp-fac1}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Fig.~16 in TR14.
+  }
+  \label{fig:islet-acc-div-cos-a}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.48\linewidth]{acc-divergent-cos-interp-pp-fac5}
+  \caption{
+    Accuracy diagnostic.
+    Compare with Fig.~16 in TR14.
+  }
+  \label{fig:islet-acc-div-cos-b}
+\end{figure}
+
+% hy figs-adv-diag.hy fig-filament acc-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{filament}
+  \caption{
+    Filament diagnostic, following Sect.~3.3 of TS12.
+    Compare with Fig.~5 in TR14.
+    The top row shows the diagnostic measured on the $\npv=4$ dynamics grid;
+    the bottom row, on the tracer grid.
+    The legend describes the dynamics-grid resolution and the time step length.
+    The prescribed validation problem is the nondivergent flow with cosine bells IC.
+    Property preservation is on.
+    The $x$-axis is $\tau$, the mixing ratio threshold.
+    The $y$-axis is the percent area having mixing ratio at least $\tau$ relative to that at the initial time.
+  }
+  \label{fig:filament}
+\end{figure}
+
+% bash run-mixing.sh > mixing-0.txt
+% hy figs-adv-diag.hy figs-mixing mixing-0.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{mixing-ne20.png}
+  \caption{
+    Mixing diagnostic, following Sect.~3.5 of TS12.
+    Compare with Figs.~11--14 in TR14.
+    This figure shows results for dynamics-grid resolution of 1.5$^\circ$.
+    $l_o$ is exactly 0 in all cases because shape preservation is on, and so is not shown.
+    See the text for further details.}
+  \label{fig:mixing-ne20}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{mixing-ne40.png}
+  \caption{Same as Fig.~\ref{fig:mixing-ne20} but with dynamics-grid resolution $0.75^\circ$.}
+  \label{fig:mixing-ne40}
+\end{figure}
+
+% bash run-img-filament.sh > filament-imgs-0.txt
+% hy figs-adv-diag.hy img-filament filament-imgs-0.txt filament-imgs
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{slo-midpoint}
+  \caption{
+    Images of the slotted cylinders IC advected by the nondivergent flow at the simulation's midpoint.
+    Each column corresponds to a spatial resolution and time step length configuration,
+    as stated at the top of each column.
+    Each row corresponds to a particular value of $\npt$, as stated in the text at the top-right of each image.
+    We omit $\npt=12$ results for the $0.75^\circ$ resolution because they are essentially identical at the resolution of the figure to the $\npt=8$ images.
+  }
+  \label{fig:slocyl-midpoint}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{slo-finpoint}
+  \caption{
+    Same as Fig.~\ref{fig:slocyl-midpoint} but for the simulation final point.
+    Error measures are printed at the bottom-left of each image; see text for details.
+  }
+  \label{fig:slocyl-finpoint}
+\end{figure}
+
+% bash run-pg-srcterm-midpoint-test.sh > pg-srcterm-midpoint-test-nbdy3-1.txt
+% hy figs-adv-diag.hy fig-pg-mimic-src-term pg-srcterm-midpoint-test-nbdy3-1.txt
+% nbdy3 => edge_np = interior_np = 3
+\begin{figure}[tbh]
+  \centering
+  % pg = np and pg = 2
+  \includegraphics[width=0.5\linewidth]{acc-pg-mimic-src-term-midpoint-nondivergent-gau-interp-pp-fac5-l2}
+  \caption{
+    Validation of the remap of tendencies from physics to tracer grids and state from tracer to dynamics grids.
+    See Sect.~\ref{sec:results:sources} for a description of the problem.
+  }
+  \label{fig:pg-mimic-src-term}
+\end{figure}
+
+% bash run-toychem-diagnostic.sh > toychem-diagnostic-nbdy3-0.txt
+% hy figs-adv-diag.hy fig-toychem-diagnostic toychem-diagnostic-nbdy3-0.txt
+\begin{figure}[tbh]
+  \centering
+  % pg = np-2
+  \includegraphics[width=0.5\linewidth]{toychem-diagnostic}
+  \caption{
+    Toy chemistry diagnostic values as a function of time for ten cycles of the nondivergent flow.
+    Time is on the $x$-axis and measured in cycles.
+    Diagnostic values for the $l_2$-norm (solid lines) and $l_\infty$-norm (dashed lines) are on the $y$-axis.
+    Markers as listed in the bottom legend are placed at the start of each cycle to differentiate the curves.
+  }
+  \label{fig:toychem-diagnostic}
+\end{figure}
+
+% bash run-toychem-imgs.sh
+% hy figs-adv-diag.hy fig-toychem-finpoint toychem-imgs-nbdy3
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{toychem-finpoint}
+  \caption{
+    Images of the monatomic tracer at the end of the first cycle.
+    Text at the lower left of each image states the configuration.
+    Text at the upper right reports global extremal values.
+  }
+  \label{fig:toychem-finpoint}
+\end{figure}
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=1\linewidth]{toychem-finpoint-diagnostic}
+  \caption{
+    Same as Fig.~\ref{fig:toychem-finpoint}, but now the images are of $(X_T - \bar{X}_T)/\bar{X}_T$.
+  }
+  \label{fig:toychem-finpoint-diagnostic}
+\end{figure}
+
+% bash run-isl-footprint.sh > isl-footprint-1.txt
+% hy figs-adv-diag.hy fig-comm-footprint isl-footprint-1.txt
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{isl-footprint}
+  \caption{
+    Communication volume, in number of real scalars transmitted in $q$-messages
+    per tracer per element per time step ($y$-axis)
+    vs.~time in days of the simulation ($x$-axis),
+    in the case of one element per process,
+    for the nondivergent flow,
+    with long (left) and short (right) time steps.
+    Statistic and $\npt$ line patterns are stated in the legends.
+  }
+  \label{fig:footprint}
+\end{figure}
+
+% code branch: https://github.com/ambrad/E3SM/releases/tag/islet-2d-paper-summit-sl-gpu-timings
+% data: https://github.com/E3SM-Project/perf-data/tree/main/nhxx-sl-summit-mar2021
+% generate a table of data:
+%   hy sl-gpu-perf.hy table "perf-data/nhxx-sl-summit-mar2021/data/qsize10/*"
+%   hy sl-gpu-perf.hy table "perf-data/nhxx-sl-summit-mar2021/data/qsize40/*"
+% we use these table entries to make the figure in addition to the SC20 paper's
+% data:
+% >>> ne 1024 qsize 10 nmax  4096 alg Eul    main_loop
+% 1024  383.49   0.29
+% 2048  225.43   0.50
+% 4096  132.30   0.85
+% 4600  120.84   0.93
+% >>> ne 1024 qsize 10 nmax  4096 alg SL     main_loop
+% 1024  253.64   0.44  1.51
+% 2048  146.66   0.77  1.54
+% 4096   89.18   1.26  1.48
+% 4600   81.39   1.38  1.48
+% >>> ne 1024 qsize 40 nmax  4096 alg Eul    main_loop
+% 2048  461.20   0.24
+% 4096  274.52   0.41
+% 4600  257.60   0.44
+% >>> ne 1024 qsize 40 nmax  4096 alg SL     main_loop
+% 2048  167.22   0.67  2.76
+% 4096   99.70   1.13  2.75
+% 4600   90.23   1.24  2.85
+% hy sl-gpu-perf.hy fig
+\begin{figure}[tbh]
+  \centering
+  \includegraphics[width=0.5\linewidth]{sl-gpu-perf-032521-islet}
+  \caption{
+    Performance comparison of SL transport with $\npv=\npt=4$ vs.~Eulerian transport
+    in the E3SM Atmosphere Model's dynamical core on the Summit supercomputer.
+    The $x$-axis is number of NVIDIA V100 GPUs on Summit used in a run;
+    the $y$-axis is dycore throughput reported in simulated years per wallclock day (SYPD).
+    The black curves are for Eulerian transport; the red, for SL.
+    Dashed lines are for 40 tracers; solid and the dotted black line, for 10.
+    A number above a data point reports the $y$-value of that point.
+  }
+  \label{fig:summit-perf}
+\end{figure}
diff --git a/methods/islet/figures/figsutils.hy b/methods/islet/figures/figsutils.hy
new file mode 100644
index 0000000..69e830e
--- /dev/null
+++ b/methods/islet/figures/figsutils.hy
@@ -0,0 +1,225 @@
+(require [amb3 [*]])
+(import amb3 [amb3 [*]] struct math)
+
+(defn get-context []
+  (sv c (Box)
+      c.data-dir "data/"
+      c.fig-dir "figs/"
+      c.odes (, "rotate" "divergent" "nondivergent")
+      c.cdrs (, (, "none" "none") (, "caas-node" "caas") (, "caas" "caas"))
+      c.nstepfacs (, 1 5)
+      c.methods (, "pcsl" "pcslu")
+      c.cycs (, 1 100)
+      c.timeints (, "exact" "interp")
+      c.nes (, 5 10 20 40 80)
+      c.nps (, 4 6 8 9 12) ;(, 4 5 6 7 8 9 10 11 12 13 16)
+      c.ics (, "gau" "cos" "slo")
+      c.npclrs  {4 "g" 5 "m" 6 "r" 7 "c" 8 "k" 9 "b" 10 "g" 11 "c" 12 "r" 13 "m" 16 "g"}
+      c.npmarks {4 "o" 5 "x" 6 "s" 7 "x" 8 "p" 9 "+" 10 "." 11 "^" 12 "." 13 "*" 16 "."})
+  c)
+
+(defn flow-short2long [flow]
+  (get {"divergent" "divergent flow"
+        "nondivergent" "nondivergent flow"
+        "rotate" "solid-body rotation"} flow))
+
+(defn ic-short2long [ic]
+  (get {"gau" "Gaussian hills"
+        "cos" "cosine bells"
+        "slo" "slotted cylinders"} ic))
+
+(defn nes->degstrs [nes]
+  (sv x [] xstr [])
+  (for [ne nes]
+    (sv deg (geton {5 6 10 3 20 "1.5" 40 "0.75" 80 "0.375" 160 "0.1875"} ne))
+    (when (none? deg) (continue))
+    (.append x ne)
+    (.append xstr (.format "${}^{{\circ}}$" deg)))
+  {:ne x :deg xstr})
+
+(defn cdr-name [short]
+  (get {"caas" "CAAS-CAAS" "caas-node" "CAAS-point"} short))
+
+;;; slmmir I/O
+
+(defn read-slmmir-io-arrays [fname &optional beg end stride]
+  (defn vis-read-array [f]
+    (sv b (.read f 4))
+    (when (zero? (len b)) (return None))
+    (sv ndim (first (struct.unpack "@i" b))
+        b (.read f (* 4 ndim))
+        dims (struct.unpack (+ "@" (* "i" ndim)) b))
+    (.reshape (npy.fromfile f :count (npy.prod dims)) dims))
+  (svifn beg 0 end -1 stride 1)
+  (sv d [])
+  (with [f (open fname "rb")]
+    (sv i 0 i-next beg)
+    (while True
+      (sv a (vis-read-array f))
+      (when (none? a) (break))
+      (when (= i i-next)
+        (.append d a)
+        (sv i-next (+ i-next stride)))
+      (inc! i)))
+  d)
+
+(defn draw-slmmir-image [f &optional vmin vmax ncolor colorsym switch-halves]
+  (svifn vmin -0.05 vmax 1.15 ncolor 24 colorsym False switch-halves True)
+  (sv (, m n) f.shape
+      lon-idx (if switch-halves
+                  (+ (list (range (// n 2) n)) (list (range 0 (// n 2))))
+                  (s-all))
+      x (* (npy.array [(/ -0.5 n) 0.25 0.5 0.75 1]) n)
+      xticks [] ;(, "0" "$\pi$/2" "$\pi$" "3$\pi$/2" "$2\pi$")
+      y (* (npy.array [0 0.5 1]) m)
+      yticks []; (, "-$\pi$/2" "0" "$\pi$/2")
+      fs 8
+      colors (if colorsym
+                 [(, 0 0 1) (, 1 1 1) (, 1 0 0)]
+                 [(, .85 .85 .95) (, 0 0 1) (, 0 1 0) (, 1 1 0) (, 1 0 0)]))
+  (if 0
+      (pl.contour (get f (, (s-all) lon-idx))
+                  (npy.linspace -0.05 1.15 25))
+      (pl.imshow (get f (, (s-all) lon-idx))
+                 (matplotlib.colors.LinearSegmentedColormap.from-list
+                   "filament" colors ncolor)
+                 :vmin vmin :vmax vmax))
+  (pl.xlim (, (first x) (last x))) (pl.xticks x xticks :fontsize fs)
+  (pl.ylim (, (first y) (last y))) (pl.yticks y yticks :fontsize fs)
+  (my-grid :ls ":"))
+
+;;; parse slmmir text output
+
+(defn parse-cmd [cmd &optional map-nstepfac]
+  (sv toks (.split cmd))
+  (defn int-or-none [x]
+    (unless (none? x) (int x)))
+  (defn get-key-val [key]
+    (for [(, i t) (enumerate toks)]
+      (unless (and (= (first t) "-") (= (cut t 1) key)) (continue))
+      (return (get toks (inc i)))))
+  (sv keys {"ode" str "ne" int "np" int "nsteps" int "prefine" int
+            "mono" str "lim" str "timeint" str "method" str "pg" int-or-none}
+      d {})
+  (for [e (.items keys)]
+    (assoc d (keyword (first e)) ((second e) (get-key-val (first e)))))
+  (sv nstepfac (/ (:nsteps d) (:ne d) 6))
+  (unless (none? map-nstepfac) (sv nstepfac (map-nstepfac nstepfac)))
+  (assoc d :nstepfac (int nstepfac))
+  (when (= (:timeint d) "exact") (assoc d :prefine 0))
+  d)
+
+(defn cmd->key-base [c]
+  (sv c1 (, (:timeint c) (:ode c) (:nstepfac c) (:method c) (:mono c) (:lim c)
+            (:prefine c)))
+  (if (none? (:pg c))
+      (+ c1 (, (:np c)))
+      (+ c1 (, (:pg c) (:np c)))))
+
+(defn parse-midpoint-check [ln]
+  (sv (, - ic - l1 l2) (sscanf ln "s,s,s,f,f"))
+  {:ic ic :l1 l1 :l2 l2})
+
+(defn parse-C [ln]
+  (sv (, - ic - masscons - limmin limmax - l1 l2 li - massredist massdisc)
+      (sscanf ln "s,s,s,f,s,f,f,s,f,f,f,s,f,f"))
+  {:ic ic :masscons masscons :limerr (, limmin limmax)
+   :l1 l1 :l2 l2 :li li :massredist massredist :massdisc massdisc})
+
+(defn parse-bakeoff-diag [d ln timeint]
+  (defn parse-mixing [ln]
+    (sv (, - lr - lu - lo) (sscanf (cut ln 4) "s,f,s,f,s,f"))
+    {:lr lr :lu lu :lo lo})
+  (defn parse-arr [ln]
+    (sv toks (.split (cut ln 8)))
+    (lfor t toks (float t)))
+  (when (none? d) (sv d {}))
+  (assoc d :done False)
+  (cond [(in "   l_r" ln) (assoc d :mixing    (parse-mixing ln))]
+        [(in "me l_r" ln) (assoc d :me-mixing (parse-mixing ln))]
+        [(in "   thr" ln) (assoc d :thr       (parse-arr ln))]
+        [(in "   fil" ln) (assoc d :fil       (parse-arr ln) :done (= timeint "exact"))]
+        [(in "me fil" ln) (assoc d :me-fil    (parse-arr ln) :done True)])
+  d)
+
+;;; parse and write basis strings from search
+
+(defn offst->Nodes [subnp offst]
+  (sv nodes [])
+  (for [(, i e) (enumerate subnp)]
+    (sv os (get offst i))
+    (.append nodes (list (range os (+ os e)))))
+  nodes)
+
+(defn Nodes->string [np nodes]
+  (sv bdy 1 s (.format "{:d} {:d}" np bdy))
+  (for [i (range (len nodes))]
+    (+= s (.format " | {:d} {:d}:" i (len (get nodes i))))
+    (for [e (get nodes i)] (+= s (.format " {:d}" e))))
+  s)
+
+(defn string->Nodes [basis-str]
+  (sv toks (.split basis-str)
+      np (int (first toks))
+      on-bdy (int (second toks))
+      nodes [] n [] ctr 0 i 3 start True)
+  (assert on-bdy)
+  (while (< i (len toks))
+    (sv t (get toks i))
+    (cond [(= t "|")
+           (.append nodes n)
+           (sv n [] start True)]
+          [start
+           (assert (= (int t) ctr))
+           (inc! ctr)
+           (inc! i)
+           (sv start False)]
+          [:else
+           (.append n (int t))])
+    (inc! i))
+  (.append nodes n)
+  (, np nodes))
+
+(defn offset-nodal? [nodes]
+  (for [n nodes]
+    (sv d (npy.diff (npy.array n)))
+    (when (> (npy.max d) 1) (return False)))
+  True)
+
+(defn parse-ints [s] (lfor t (.split s) (int t)))
+
+(defn parse-search-offset-nodal-subset-line [ln]
+  (sv (, - meam1 - - - - - - - wtr - npm1 npm2 npm3 - pum - - np)
+      (sscanf ln "s,f,s,s,s,s,s,s,s,f,s,f,f,f,s,f,s,s,i")
+      p1 (.find ln "subnp ")
+      p2 (.find ln "offst ")
+      subnp (parse-ints (cut ln (+ p1 5) p2))
+      offst (parse-ints (cut ln (+ p2 5))))
+  {:txt ln :np np :meam1 meam1 :wtr wtr :npm (, npm1 npm2 npm3) :pum pum
+   :nodes (offst->Nodes subnp offst) :type :offst-nodal-subset})
+
+(defn parse-search-nodal-subset-line [ln]
+  (defn parse-nodes [s]
+    (sv toks (.split s) nodess [] i 0)
+    (while (< i (len toks))
+      (inc! i)
+      (sv nodes [])
+      (.append nodess nodes)
+      (while (and (< i (len toks)) (!= (nth toks i) "|"))
+        (.append nodes (int (nth toks i)))
+        (inc! i)))
+    nodess)
+  (sv (, - meam1 - - - wtr - npm1 npm2 npm3 - pum - - np)
+      (sscanf ln "s,f,s,s,s,f,s,f,f,f,s,f,s,s,i")
+      p1 (.find ln "subnp ")
+      p2 (.find ln "nodes ")
+      subnp (parse-ints (cut ln (+ p1 5) p2))
+      nodes (parse-nodes (cut ln (+ p2 6))))
+  {:txt ln :np np :meam1 meam1 :wtr wtr :npm (, npm1 npm2 npm3) :pum pum
+   :nodes nodes :type :nodal-subset})
+
+;; parse output from search findoffsetnodal|findnodal.
+(defn parse-search-basis-line [ln]
+  (if (in " offst " ln)
+      (parse-search-offset-nodal-subset-line ln)
+      (parse-search-nodal-subset-line ln)))
diff --git a/methods/islet/figures/islet.hy b/methods/islet/figures/islet.hy
new file mode 100644
index 0000000..0fca082
--- /dev/null
+++ b/methods/islet/figures/islet.hy
@@ -0,0 +1,41 @@
+(require [amb3 [*]])
+(import amb3 [amb3 [*]]
+        [scipy.linalg :as linalg]
+        scipy.integrate
+        re math sys ctypes)
+
+(defn nelem [xb yb]
+  (* (dec (len xb)) (dec (len yb))))
+
+(defn ndof [method ne np]
+  (cond [(< method 2) (* ne (** (dec np) 2))]
+        [(= method 2) ne]
+        [:else (raisefmt "nope")]))
+
+(defclass Islet []
+  (defn --init-- [me]
+    (try (sv lib (npy.ctypeslib.load-library "libislet" ".")
+             me.lib lib)
+         (except [e [Exception]]
+           (print e)
+           (sv me.lib None))))
+  (defn unittest [me]
+    (me.lib.islet-unittest))
+  (defn eval [me method np x]
+    (sv c-int ctypes.c-int
+        y (npy.zeros (, (len x) np)))
+    (me.lib.eval-interpolant (c-int method) (c-int np) (c-int (len x))
+                             (as-ctypes x) (as-ctypes y))
+    y)
+  (defn get-xnodes [me method np]
+    (sv xnodes (npy.zeros np))
+    (me.lib.get-xnodes (ctypes.c-int method) (ctypes.c-int np) (as-ctypes xnodes))
+    xnodes)
+  (defn calc-xnodes-metrics-from-basis-string [me basis]
+    (sv metrics (npy.zeros 3 :dtype float))
+    (me.lib.calc-xnodes-metrics-from-basis-string
+      (str-ctypes basis) (as-ctypes metrics))
+    metrics))
+
+(defn diff [x] (- (cut x 1) (cut x 0 -1)))
+
diff --git a/methods/islet/figures/poly.hy b/methods/islet/figures/poly.hy
new file mode 100644
index 0000000..e1e2a2d
--- /dev/null
+++ b/methods/islet/figures/poly.hy
@@ -0,0 +1,190 @@
+(require [amb3 [*]])
+(import [amb3 [*]]
+        [numpy :as npy]
+        math sys)
+
+(defn eval-lagrange-poly [x y xi]
+  (setv np (len x)
+        pod (pod-number? xi)
+        yi (if pod 0 (npy.zeros (len xi))))
+  (for [i (range np)]
+    (if (= (get y i) 0) (continue))
+    (setv f (if pod 1 (npy.ones (len xi))))
+    (for [j (range np)]
+      (if (= i j) (continue))
+      (*= f (/ (- xi (get x j))
+               (- (get x i) (get x j)))))
+    (+= yi (* (get y i) f)))
+  yi)
+
+(defn eval-lagrange-poly-basis [x xi]
+  (setv np (len x)
+        pod (pod-number? xi)
+        v (if pod (npy.zeros np) (npy.zeros (, np (len xi)))))
+  (for [i (range np)]
+    (setv f (if pod 1 (npy.ones (len xi))))
+    (for [j (range np)]
+      (if (= i j) (continue))
+      (*= f (/ (- xi (get x j))
+               (- (get x i) (get x j)))))
+    (setv (get v i) f))
+  v)
+
+(defn eval-poly [coef x]
+  (setv pod (pod-number? x)
+        y (* (last coef) (if pod 1 (npy.ones (len x)))))
+  (for [p (range (- (len coef) 2) -1 -1)]
+    (setv y (+ (* x y) (get coef p))))
+  y)
+
+(defn eval-lagrange-poly-basis-derivative [x xi]
+  (setv np (len x)
+        pod (pod-number? xi)
+        v (if pod (npy.zeros np) (npy.zeros (, np (len xi)))))
+  (for [i (range np)]
+    (setv f (if pod 0 (npy.zeros (len xi))))
+    (for [j (range np)]
+      (if (= j i) (continue))
+      (sv g (if pod 1 (npy.ones (len xi))))
+      (for [k (range np)]
+        (if (= k i) (continue))
+        (*= g (/ (if (= k j)
+                   1
+                   (- xi (get x k)))
+                 (- (get x i) (get x k)))))
+      (+= f g))
+    (setv (get v i) f))
+  v)
+
+(defn eval-lagrange-poly-derivative [x y xi]
+  (setv np (len x)
+        pod (pod-number? xi)
+        yi (if pod 0 (npy.zeros (len xi))))
+  (for [i (range np)]
+    (setv f (if pod 0 (npy.zeros (len xi))))
+    (for [j (range np)]
+      (if (= j i) (continue))
+      (sv g (if pod 1 (npy.ones (len xi))))
+      (for [k (range np)]
+        (if (= k i) (continue))
+        (*= g (/ (if (= k j)
+                   1
+                   (- xi (get x k)))
+                 (- (get x i) (get x k)))))
+      (+= f g))
+    (+= yi (* (get y i) f)))
+  yi)
+
+(defn get-gll-x [np]
+  (npy.array
+   (case/eq np
+            [1 (, 0)]
+            [2 (, -1 1)]
+            [3 (, -1 0 1)]
+            [4 (sdo (setv oosqrt5 (/ (math.sqrt 5)))
+                    (, -1 (- oosqrt5) oosqrt5 1))]
+            [5 (sdo (setv sqrt3o7 (math.sqrt (/ 3 7)))
+                    (, -1 (- sqrt3o7) 0 sqrt3o7 1))]
+            [6 (sdo (setv e (fn [sgn]
+                              (math.sqrt (+ (/ 1 3)
+                                            (* sgn 2 (/ (math.sqrt 7)
+                                                        21)))))
+                          np6a (e 1)
+                          np6b (e -1))
+                    (, -1 (- np6a) (- np6b) np6b np6a 1))]
+            [7 (sdo (setv e (fn [sgn]
+                              (math.sqrt (/ (+ 5 (* sgn 2 (math.sqrt (/ 5 3))))
+                                            11)))
+                          np7a (e 1)
+                          np7b (e -1))
+                    (, -1 (- np7a) (- np7b) 0 np7b np7a 1))]
+            [8 (sdo (setv c1 0.8717401485096066153
+                          c2 0.59170018143314230214
+                          c3 0.20929921790247886877)
+                    (, -1 (- c1) (- c2) (- c3) c3 c2 c1 1))]
+            [9 (sdo (setv c1 0.89975799541146015731
+                          c2 0.67718627951073775345
+                          c3 0.36311746382617815871)
+                    (, -1 (- c1) (- c2) (- c3) 0 c3 c2 c1 1))]
+            [10 (sdo (setv c1 0.91953390816645881383
+                           c2 0.73877386510550507500
+                           c3 0.47792494981044449566
+                           c4 0.16527895766638702463)
+                     (, -1 (- c1) (- c2) (- c3) (- c4) c4 c3 c2 c1 1))]
+            [11 (sdo (setv c1 0.93400143040805913433
+                           c2 0.78448347366314441862
+                           c3 0.56523532699620500647
+                           c4 0.29575813558693939143)
+                     (, -1 (- c1) (- c2) (- c3) (- c4) 0 c4 c3 c2 c1 1))]
+            [12 (sdo (setv c1 0.94489927222288222341
+                           c2 0.81927932164400667835
+                           c3 0.63287615303186067766
+                           c4 0.39953094096534893226
+                           c5 0.13655293285492755486)
+                     (, -1 (- c1) (- c2) (- c3) (- c4) (- c5) c5 c4 c3 c2 c1 1))]
+            [:else (raise (Exception (.format "bad np: {}" np)))])))
+
+(defn get-gll-w [np]
+  (defn reverse [coll]
+    (cut coll None None -1))
+  (defn expand-sym [np a]
+    (if (zero? (% np 2))
+      (+ a (reverse a))
+      (+ a (reverse (cut a 0 -1)))))
+  (when (= np 1)
+    (return (, 2)))
+  (npy.array
+   (expand-sym
+    np (case/eq
+        np
+        [2 (, 1)]
+        [3 (, (/ 1 3) (/ 4 3))]
+        [4 (, (/ 1 6) (/ 5 6))]
+        [5 (, (/ 1 10) (/ 49 90) (/ 32 45))]
+        [6 (sv v (math.sqrt 7))
+         (, (/ 1 15) (/ (- 14 v) 30) (/ (+ 14 v) 30))]
+        [7 (sv v (* 7 (math.sqrt 15)))
+         (, (/ 1 21) (/ (- 124 v) 350) (/ (+ 124 v) 350) (/ 256 525))]
+        [ 8 (, 0.03571428571428571429 0.21070422714350603938 0.34112269248350436476,
+               0.41245879465870388157)]
+        [ 9 (, 0.02777777777777777778 0.16549536156080552505 0.27453871250016173528,
+               0.34642851097304634512 0.37151927437641723356)]
+        [10 (, 0.02222222222222222222 0.13330599085107011113 0.22488934206312645212,
+               0.29204268367968375788 0.32753976118389745666)]
+        [11 (, 0.01818181818181818182 0.10961227326699486446 0.18716988178030520411,
+               0.24804810426402831404 0.28687912477900808868 0.30021759545569069379)]
+        [12 (, 0.01515151515151515152 0.09168451741319613067 0.15797470556437011517,
+               0.21250841776102114536 0.25127560319920128029 0.27140524091069617700)]
+        [:else (raisefmt "bad np: {}" np)]))))
+
+(if-main
+ (when-inp ["test-lag-basis" {:np int}]
+   (for [f (, eval-lagrange-poly-basis-derivative eval-lagrange-poly-basis)]
+     (setv x (npy.linspace -1 1 1000)
+           v (f (get-gll-x np) x)
+           v1 (f (get-gll-x np) (get x 11))))
+   (expect (npy.all (= v1 (get v (, (slice None) 11)))))
+   (pl.plot x (.transpose v) "-")
+   (dispfig "test-lag-basis"))
+
+ (when-inp ["plot-lagp" {:np int}]
+   ;; just plot lagrange poly basis functions
+   (setv x-gll (get-gll-x np)
+         y-gll (npy.random.rand np)
+         xi (npy.linspace -1 1 100)
+         clrs "bgrcmybgrcmybgrcmy")
+   (with [(pl-plot (, 6 6) "csl-plot-lagp")]
+     (for [i (range np)]
+       (setv y-gll (npy.zeros np)
+             (get y-gll i) 1
+             yi (eval-lagrange-poly x-gll y-gll xi)
+             yip (eval-lagrange-poly-derivative x-gll y-gll xi)
+             c (get clrs i))
+       (pl.plot xi yi (+ c "-")
+                xi yip (+ c "--")))
+     (pl.plot x-gll (npy.zeros np) "ko")))
+
+ (when-inp ["test-gll-w"]
+   (for [np (range 1 8)]
+     (sv w (get-gll-w np))
+     (assert (<= (reldif 2 (sum w)) (* 1 (epsilon)))))))
diff --git a/methods/islet/figures/run-accuracy.sh b/methods/islet/figures/run-accuracy.sh
new file mode 100644
index 0000000..9b2639e
--- /dev/null
+++ b/methods/islet/figures/run-accuracy.sh
@@ -0,0 +1,58 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T 12 -d2c -lauritzen -midpoint-check -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint"
+    grepcmd='grep "^C \|^L \|^M "'
+    echo "cmd> $ctr $cmd"
+    eval "$cmd | $grepcmd"
+}
+
+cdrglbs=(none caas-node);
+cdrlcls=(none caas     );
+for ne in 5 10 20 40 80; do
+    for nstepfac in 1 5; do
+        for ode in rotate nondivergent divergent; do
+            nstep=$(expr $ne \* 6)
+            nstep=$(expr $nstep \* $nstepfac)
+            timeint=exact
+            for icdr in 1; do
+                cdrglb=${cdrglbs[$icdr]}
+                cdrlcl=${cdrlcls[$icdr]}
+                prefine=0
+                np=4
+                run
+            done
+            for icdr in 0 1; do
+                cdrglb=${cdrglbs[$icdr]}
+                cdrlcl=${cdrlcls[$icdr]}
+                prefine=0
+                for np in 4; do
+                    cdrglb=${cdrglb:0:4}
+                    run
+                done
+            done
+            timeint=interp
+            for icdr in 0 1; do
+                cdrglb=${cdrglbs[$icdr]}
+                cdrlcl=${cdrlcls[$icdr]}
+                prefine=5
+                for np in $(seq 5 13) 16; do
+                    run
+                done
+            done
+            timeint=exact
+            for icdr in 0; do
+                cdrglb=${cdrglbs[$icdr]}
+                cdrlcl=${cdrlcls[$icdr]}
+                prefine=0
+                for np in $(seq 5 13) 16; do
+                    run
+                done                
+            done
+        done
+    done
+done
diff --git a/methods/islet/figures/run-img-filament.sh b/methods/islet/figures/run-img-filament.sh
new file mode 100644
index 0000000..5a2b8fb
--- /dev/null
+++ b/methods/islet/figures/run-img-filament.sh
@@ -0,0 +1,34 @@
+cat $0
+exe=../../slmm/slmmir
+
+function run {
+    cmd="$exe -method pcsl -ode nondivergent -ic slottedcylinders -T 12 -ne $ne -nsteps $nsteps -timeint $timeint -nonunimesh 0 -np $np -dmc eh -mono $cdrglb -lim caas -lauritzen -we $we -io internal -o ../data/mar21/filament-imgs/ne$ne-np$np-nstep$nsteps-$timeint-$cdrglb-pr$prefine -res 256 -rit -prefine $prefine $d2c"
+    echo "cmd> $cmd"
+    eval "OMP_NUM_THREADS=1 $cmd"
+}
+
+d2c="-d2c -io-nodss"
+for ne in 20 40; do
+    for nstepfac in 1 5; do
+        nsteps=$(expr $ne \* 6)
+        nsteps=$(expr $nsteps \* $nstepfac)
+        we=$(expr $nsteps / 2)
+        timeint=exact
+        prefine=0
+        cdrglb=caas
+        np=4
+        run
+        cdrglb=caas-node
+        run
+        timeint=interp
+        prefine=5
+        cdrglb=caas-node
+        for np in 6 8; do
+            run
+        done
+        if [[ $ne == 20 ]]; then
+            np=12
+            run
+        fi
+    done
+done
diff --git a/methods/islet/figures/run-isl-footprint.sh b/methods/islet/figures/run-isl-footprint.sh
new file mode 100644
index 0000000..8ac0081
--- /dev/null
+++ b/methods/islet/figures/run-isl-footprint.sh
@@ -0,0 +1,35 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics -ic gaussianhills -we 0 -rit -dmc eh -T 12 -d2c -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -footprint"
+    grepcmd='grep "^C \|^L \|^M \|footprint>"'
+    echo "cmd> $ctr $cmd"
+    eval "$cmd | $grepcmd"
+}
+
+cdrlcl=caas
+ncycle=1
+ode=nondivergent
+for ne in 30; do
+    for nstepfac in 1 5; do
+        nstep=$(expr $ne \* 6)
+        nstep=$(expr $nstep \* $nstepfac)
+        for ode in nondivergent; do
+            timeint=exact
+            prefine=0
+            cdrglb=caas
+            np=4
+            run
+            timeint=interp
+            prefine=5
+            cdrglb=caas-node
+            for np in 6 8 12; do
+                run
+            done
+        done
+    done
+done
diff --git a/methods/islet/figures/run-mixing.sh b/methods/islet/figures/run-mixing.sh
new file mode 100644
index 0000000..a05f7e6
--- /dev/null
+++ b/methods/islet/figures/run-mixing.sh
@@ -0,0 +1,40 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T 12 -d2c -lauritzen -lauritzen-io -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -o mixing-0/${ode}-$timeint-nsteps${nstep}-prefine${prefine}-$cdrglb-$cdrlcl-ne$ne-np$np"
+    grepcmd='grep "^C \|^L \|^M "'
+    echo "cmd> $ctr $cmd"
+    eval "$cmd | $grepcmd"
+}
+
+cdrglbs=(none caas-node);
+cdrlcls=(none caas     );
+for ne in 20 40; do
+    for nstepfac in 1 5; do
+        for ode in nondivergent; do
+            nstep=$(expr $ne \* 6)
+            nstep=$(expr $nstep \* $nstepfac)
+            icdr=1
+            timeint=exact
+            prefine=0
+            cdrglb=${cdrglbs[$icdr]}
+            cdrlcl=${cdrlcls[$icdr]}
+            np=4
+            run
+            timeint=interp
+            prefine=5
+            for np in 6 8 9 12; do
+                run
+            done
+            cdrglb=${cdrglb:0:4}
+            timeint=exact
+            prefine=0
+            np=4
+            run
+        done
+    done
+done
diff --git a/methods/islet/figures/run-pg-srcterm-midpoint-test.sh b/methods/islet/figures/run-pg-srcterm-midpoint-test.sh
new file mode 100644
index 0000000..4f6f4a5
--- /dev/null
+++ b/methods/islet/figures/run-pg-srcterm-midpoint-test.sh
@@ -0,0 +1,36 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    ics1="-ic gaussianhills -ic cosinebells -ic slottedcylinders"
+    ics2="-ic zero -ic zero -ic zero"
+    for pg in 2 $(expr $np - 2) $np; do
+        cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics1 -ic toychem1 -ic toychem2 $ics2 -we 0 -rit -dmc eh -T 12 -d2c -ode $ode -ne $ne -np $np -pg $pg -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -midpoint-check"
+        grepcmd='grep "^C \|^L \|^M "'
+        echo "cmd> $ctr $cmd"
+        eval "$cmd | $grepcmd"
+    done
+}
+
+cdrglb=caas-node
+cdrlcl=caas
+for ne in 5 10 20 40 80; do
+    for nstepfac in 1 5; do
+        for ode in nondivergent; do #divergent; do
+            nstep=$(expr $ne \* 6)
+            nstep=$(expr $nstep \* $nstepfac)
+            timeint=exact
+            prefine=0
+            np=4
+            run
+            timeint=interp
+            prefine=5
+            for np in 6 8 9 12; do
+                run
+            done
+        done
+    done
+done
diff --git a/methods/islet/figures/run-stability-cmp.sh b/methods/islet/figures/run-stability-cmp.sh
new file mode 100644
index 0000000..89e7237
--- /dev/null
+++ b/methods/islet/figures/run-stability-cmp.sh
@@ -0,0 +1,32 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method $method -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T $(expr 12 \* $ncycle) -d2c -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint"
+    grepcmd='grep "^C \|^L \|^M "'
+    echo "cmd> $ctr $cmd"
+    eval "$cmd | $grepcmd"
+}
+
+cdrglb=caas-node
+cdrlcl=caas
+for ne in 5 10 20 40 80; do
+    for nstepfac in 1; do
+        for ode in divergent; do
+            nstep=$(expr $ne \* 6)
+            nstep=$(expr $nstep \* $nstepfac)
+            timeint=interp
+            prefine=5
+            ncycle=10
+            for method in pcslu pcsl; do
+                for np in 4 6 8 9 12; do
+                    run
+                done
+                ncycle=100
+            done
+        done
+    done
+done
diff --git a/methods/islet/figures/run-toychem-diagnostic.sh b/methods/islet/figures/run-toychem-diagnostic.sh
new file mode 100644
index 0000000..3383846
--- /dev/null
+++ b/methods/islet/figures/run-toychem-diagnostic.sh
@@ -0,0 +1,34 @@
+cat $0
+
+exe=../../slmm/slmmir
+
+ctr=0
+function run {
+    ctr=$(expr $ctr + 1)
+    for pg in $(expr $np - 2) $np; do
+        cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics -ic gaussianhills -ic toychem1 -ic toychem2 -we 0 -rit -dmc eh -T $(expr 12 \* $ncycle) -d2c -ode $ode -ne $ne -np $np -pg $pg -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint"
+        grepcmd='grep "^C \|^L \|^M \|toy "'
+        echo "cmd> $ctr $cmd"
+        eval "$cmd | $grepcmd"
+    done
+}
+
+cdrlcl=caas
+ncycle=10
+nstep=576
+for ne in 30; do
+    for ode in nondivergent; do
+        timeint=exact
+        prefine=0
+        cdrglb=caas
+        np=4
+        run
+        cdrglb=caas-node
+        run
+        timeint=interp
+        prefine=5
+        for np in 6 8 9 12; do
+            run
+        done
+    done
+done
diff --git a/methods/islet/figures/run-toychem-imgs.sh b/methods/islet/figures/run-toychem-imgs.sh
new file mode 100644
index 0000000..ae54dc8
--- /dev/null
+++ b/methods/islet/figures/run-toychem-imgs.sh
@@ -0,0 +1,23 @@
+exe=../../slmm/slmmir
+datadir=toychem-imgs
+
+ne=30
+nstep=$(expr 48 \* 12)
+dt=dt30min
+we=$(expr 48 \* 6)
+
+np=8
+glbcdr=caas-node
+pg=$np
+name=toychem-nondiv-ne${ne}pr5np${np}pg${pg}-${glbcdr}-caas-$dt
+cmd="$exe -method pcsl -ode nondivergent -ic gaussianhills -ic toychem1 -ic toychem2 -T 12 -nsteps $nstep -timeint interp -ne $ne -np ${np} -dmc eh -d2c -mono $glbcdr -lim caas -we $we -io internal -res 256 -o $datadir/$name -rit -prefine 5 -pg $pg"
+echo "cmd> $cmd"
+eval "$cmd"
+
+glbcdr=caas
+np=4
+pg=0
+name=toychem-nondiv-ne${ne}pr0np${np}pg${pg}-${glbcdr}-caas-$dt
+cmd="$exe -method pcsl -ode nondivergent -ic gaussianhills -ic toychem1 -ic toychem2 -T 12 -nsteps $nstep -timeint exact -ne $ne -np ${np} -dmc eh -d2c -mono $glbcdr -lim caas -we $we -io internal -res 256 -o $datadir/$name -rit -prefine 0 -pg $pg"
+echo "cmd> $cmd"
+eval "$cmd"
diff --git a/methods/islet/figures/sl-gpu-perf.hy b/methods/islet/figures/sl-gpu-perf.hy
new file mode 100644
index 0000000..d404673
--- /dev/null
+++ b/methods/islet/figures/sl-gpu-perf.hy
@@ -0,0 +1,221 @@
+(require [amb3 [*]])
+(import [amb3 [*]] math glob re)
+
+(defn get-context []
+  (sv c (Box)
+      c.template-dir "/ccs/home/ambradl/repo/compy-goodies/slgpu/"
+      c.run-dir "/ccs/home/ambradl/sl/gpu/"
+      c.max-nnode 4600
+      c.timers (, "main_loop" "dirk" "RK2" "tracers_compose"))
+  c)
+
+(defn get-tstep [ne] (* (/ 1024 ne) 10))
+(defn get-nu [ne] (* 2.5e10 (** (/ 1024 ne) 3)))
+(defn get-hv-subcyc [ne]
+  (cond [(<= ne 128) 3]
+        [(<= ne 512) 2]
+        [:else 1]))
+(defn get-remap-fac [ne] 2)
+(defn get-tracer-fac [ne] 8)
+(defn get-qsize [ne] 40)
+(defn get-nmax [ne] (* 1 4096))
+;(defn get-nmax [ne] (* 1 2048))
+(defn get-nnode [ne]
+  (sv nelem (* 6 (** ne 2))
+      nnode (min 4096 (math.ceil (/ nelem (* 6 256)))))
+  nnode)
+
+(defn timer->sypd [nmax tstep timer]
+  (/  (/ (* nmax tstep) 365)
+      timer))
+
+(defn write-files [c ne nnode-fac &optional nnode]
+  ;; job: NNODE WALLTIME JOBNAME INPUTSL INPUTEUL
+  ;;  nl: NE NMAX QSIZE TSTEP NUVAL HVSUB REMAPFAC TRACERFAC
+  (svifn nnode -1)
+  (defn make-input-name [job-name talg] (+ c.run-dir job-name "-" talg ".nl"))
+  (sv nnode (if (= nnode -1) (int (* nnode-fac (get-nnode ne))) nnode)
+      nmax (get-nmax ne)
+      qsize (get-qsize ne)
+      tstep (get-tstep ne)
+      nu (get-nu ne)
+      remap-fac (get-remap-fac ne)
+      tracer-fac (get-tracer-fac ne)
+      job-name (.format "r5-ne{}-nmax{}-qsize{}-nnode{}" ne nmax qsize nnode))
+  (when (> nnode c.max-nnode) (return))
+  (sed (, (, "NNODE" (str nnode)) (, "WALLTIME" "15") (, "JOBNAME" job-name)
+          (, "INPUTSL" (make-input-name job-name "sl"))
+          (, "INPUTEUL" (make-input-name job-name "eul")))
+       (+ c.template-dir "job.sh.template")
+       (+ c.run-dir (+ job-name "-job.sh")))
+  (for [talg (, "eul" "sl")]
+    (sv nl-template (+ "theta-" talg ".nl.template"))
+    (sed (, (, "NE" (str ne)) (, "NMAX" (str nmax)) (, "QSIZE" (str qsize))
+            (, "TSTEP" (str tstep)) (, "NUVAL" (.format "{:1.2e}" nu))
+            (, "HVSUB" (str (get-hv-subcyc ne)))
+            (, "REMAPFAC" (str remap-fac)) (, "TRACERFAC" (str tracer-fac)))
+         (+ c.template-dir nl-template)
+         (make-input-name job-name talg))))
+
+(defn parse-out [c fname &optional d]
+  (defn list->dict [c s]
+    (assert (= (len c) (len s)))
+    (sv d {})
+    (for [i (range (len c))]
+      (assoc d (first (nth s i))
+             ((second (nth s i)) (nth c i))))
+    d)
+  (defn parse-fname [fname]
+    (sv f (first (re.findall ".*ne(\d+)-nmax(\d+)-qsize(\d+)-nnode(\d+)" fname)))
+    (list->dict f (, (, :ne int) (, :nmax int) (, :qsize int) (, :nnode int))))
+  (defn parse-timer-line [ln]
+    (sv (, - ngpu - ncall sum max - - - min)
+        (sscanf ln "s,i,s,f,f,f,s,s,s,f"))
+    {:ngpu ngpu :ncall ncall :sum sum :max max :min min})
+  (svifn d {})
+  (sv m (parse-fname fname)
+      pat ">>>.*full")
+  (for [t c.timers] (+= pat (+ "|" t ".*0\)")))
+  (sv txt (grep pat fname))
+  (for [ln txt]
+    (cond [(= ">>>" (cut ln 0 3))
+           (sv talg (second (.split ln)))]
+          [:else
+           (for [t c.timers]
+             (when (= t (cut ln 0 (len t)))
+               (when (or (and (= t "RK2") (= talg "SL"))
+                         (and (= t "tracers_compose") (= talg "Eul")))
+                 (raisefmt "inconsistent timers: {}" fname))
+               (sv timer t
+                   p (parse-timer-line (cut ln (inc (len t)))))
+               (break)))
+           (assoc-nested-append d (, (:ne m) (:qsize m) (:nmax m) talg
+                                     timer (:nnode m))
+                                p)]))
+  d)
+
+(defn parse-from-glob [c globpat]
+  (sv fnames (glob.glob globpat)
+      d {})
+  (for [fname fnames]
+    (sv d (parse-out c fname :d d)))
+  d)
+
+(defn write-table [c d &optional talgs]
+  (svifn talgs (, "Eul" "SL"))
+  (for [ne (sort (list (.keys d)))]
+    (for [timer (, "main_loop" "advection")]
+      (for [qsize (.keys (get d ne))]
+        (for [nmax (.keys (get d ne qsize))]
+          (sv vfirst {} first True)
+          (for [talg talgs]
+            (prf ">>> ne {:4d} qsize {:2d} nmax {:5d} alg {:6s} {}"
+                 ne qsize nmax talg timer)
+            (sv t (if (= timer "advection")
+                    (if (in "SL" talg) "tracers_compose" "RK2")
+                    timer)
+                e (get d ne qsize nmax talg t)
+                nnodes (sort (list (.keys e))))
+            (for [nnode nnodes]
+              (sv ps (get e nnode)
+                  vs [])
+              (for [p ps] (.append vs (:max p)))
+              (sv v (min vs)
+                  sypd (timer->sypd nmax (get-tstep ne) v)
+                  speedup "")
+              (if first
+                (assoc vfirst nnode v)
+                (sv speedup (.format "{:6.2f}" (/ (get vfirst nnode) v))))
+              (prf "{:4d} {:7.2f} {:6.2f}{}" nnode v sypd speedup))
+            (sv first False)))))))
+
+;;; drivers
+
+(when-inp ["gen"]
+  (sv c (get-context))
+  (for [ne (, 32 64 128 256 512 1024)
+        fac (, 0.25 0.5 1 2 4)]
+    (write-files c ne fac)))
+
+(when-inp ["gen-4600"]
+  (sv c (get-context))
+  (for [ne (, 1024)]
+    (write-files c ne 1 :nnode 4600)))
+
+(when-inp ["table" {:globpat str}]
+  (sv c (get-context)
+      d (parse-from-glob c globpat))
+  (write-table c d :talgs (, "Eul" "SL")))
+
+(when-inp ["sypd" {:ne int :timer float}]
+  (sv nmax (get-nmax ne)
+      tstep (get-tstep ne))
+  (print (timer->sypd nmax tstep timer)))
+
+(when-inp ["fig"]
+  (assoc matplotlib.rcParams "savefig.dpi" 300)
+  (do (pl-require-type1-fonts))
+  (sv fs 16 fsl 18)
+  (defn text1 [x y dx dy data]
+    (for [i (range (len x))]
+      (pl.text (+ (nth x i) dx) (+ (nth y i) dy)
+               (.format "{:4.2f}" (nth data i))
+               :fontsize fs)))
+  (defn text2 [x y dx dy data]
+    (sv i (dec (len x)))
+    (pl.text (+ (nth x i) dx) (+ (nth y i) dy)
+             (.format "{:4.2f}" (nth data i))
+             :fontsize fs))
+  (defn int->str [i]
+    (sv s (str i)
+        s (case/eq (len s)
+                   [4 (+ (first s) "," (cut s 1))]
+                   [5 (+ (cut s 0 2) "," (cut s 2))]
+                   [:else s]))
+    s)
+  (sv npa npy.array
+      x (npa      [1024 2048 4096 4600])
+      xfac 6
+      x (* xfac x)
+      sc20-y (npa [0.31 0.54 0.90 0.97])
+      eul-y (npa  [0.29 0.50 0.85 0.93])
+      sl-y (npa   [0.44 0.77 1.26 1.38])
+      q40 (Box)
+      q40.x (* xfac (npa [2048 4096 4600]))
+      q40.eul-y (npa     [0.24 0.41 0.44])
+      q40.sl-y (npa      [0.67 1.13 1.24])
+      perf-x (* xfac (npa [1024 4600]))
+      perf-y (do (sv b 0.2)
+                 (npa [b (* (/ (last perf-x) (first perf-x)) b)]))
+      yt (/ (npy.linspace 1 15 15) 10))
+  (for [format (, "png" "pdf")]
+    (with [(pl-plot (, 6 (if (= format "png") 6.6 6.4))
+                    "sl-gpu-perf-032521-islet"
+                    :format format)]
+      (pl.plot (npy.log2 x) (npy.log2 sc20-y) "ko-" :label "SC20 data, Eulerian transport")
+      (text1 (npy.log2 x) (npy.log2 sc20-y) -0.13 0.08 sc20-y)
+      (pl.plot (npy.log2 x) (npy.log2 eul-y) "k.:" :label "SC20 config., Eulerian transport")
+      (pl.plot (npy.log2 x) (npy.log2 sl-y) "rs-"
+               :label "SC20 config., SL transport")
+      (text1 (npy.log2 x) (npy.log2 sl-y) -0.13 0.08 sl-y)
+      (unless (none? q40)
+        (pl.plot (npy.log2 q40.x) (npy.log2 q40.eul-y) "ko--")
+        (text2 (npy.log2 q40.x) (npy.log2 q40.eul-y) -0.13 0.08 q40.eul-y)
+        (pl.plot (npy.log2 q40.x) (npy.log2 q40.sl-y) "rs--")
+        (text2 (npy.log2 q40.x) (npy.log2 q40.sl-y) -0.03 0.03 q40.sl-y))
+      (pl.plot (npy.log2 perf-x) (npy.log2 perf-y) "g:" :label "Perfect scaling")
+      (pl.xticks (npy.log2 x) (lfor e x (int->str e)) :fontsize fs :rotation 45)
+      (pl.yticks (npy.log2 yt) yt :fontsize fs)
+      (pl.xlabel (+ "Number of Summit " (if (= xfac 6) "GPUs" "nodes"))
+                 :fontsize fsl)
+      (pl.ylabel "Simulated Years Per Day (SYPD)" :fontsize fsl)
+      (pl.title (+ "Semi-Lagrangian tracer transport on GPU:\n"
+                   "Dycore performance of SCREAM 3.25km configuration\n"
+                   "solid line: 10 tracers, dashed line: 40 tracers")
+                :fontsize fsl)
+      (pl.legend :loc "upper left" :fontsize (dec fs) :framealpha 0)
+      (my-grid)
+      (sv d 0.18)
+      (pl.xlim (, (- (npy.log2 (* xfac 1024)) d) (+ (npy.log2 (* xfac 4600)) d)))
+      (sv d 0.27)
+      (pl.ylim (, (- (npy.log (if (none? q40) 0.2 0.15)) d) (+ (npy.log 1.5) d))))))
diff --git a/methods/islet/islet_interpmethod.hpp b/methods/islet/islet_interpmethod.hpp
new file mode 100644
index 0000000..8e6485c
--- /dev/null
+++ b/methods/islet/islet_interpmethod.hpp
@@ -0,0 +1,14 @@
+#ifndef INCLUDE_ISLET_INTERPMETHOD_HPP
+#define INCLUDE_ISLET_INTERPMETHOD_HPP
+
+#include <memory>
+
+struct UserInterpMethod {
+  typedef std::shared_ptr<UserInterpMethod> Ptr;
+  virtual ~UserInterpMethod () {}
+  virtual void eval(const Real& x, Real* const v) = 0;
+  virtual const Real* get_xnodes() const = 0;
+  virtual Int get_np() const = 0;
+};
+
+#endif
diff --git a/methods/islet/islet_isl.cpp b/methods/islet/islet_isl.cpp
new file mode 100644
index 0000000..3cf307f
--- /dev/null
+++ b/methods/islet/islet_isl.cpp
@@ -0,0 +1,364 @@
+#include <cassert>
+
+#include <array>
+#include <vector>
+#include <limits>
+
+#include "islet_tables.hpp"
+#include "islet_util.hpp"
+#include "islet_isl.hpp"
+#include "islet_xnodes_metrics.hpp"
+#include "islet_npx.hpp"
+
+extern "C" {
+  void dgemm_(const char* transa, const char* transb, const int* m,
+              const int* n, const int* k, const double* alpha, const double* a,
+              const int* lda, const double* b, const int* ldb,
+              const double* beta, double* c, const int* ldc);
+  void dpotrf_(const char* uplo, const int* n, double* a, const int* lda,
+               int* info);
+  void dpotrs_(const char* uplo, const int* n, const int* nrhs, const double* a,
+               const int* lda, double* b, const int* ldb, int* info);
+  void dtrsm_(const char* side, const char* uplo, const char* transa, const char* diag,
+              const int* n, const int* nrhs, const double* alpha, const double* a,
+              const int* lda, double* b, const int* ldb);
+  void dtrtrs_(const char* uplo, const char* trans, const char* diag,
+               const int* n, const int* nrhs, double* a, const int* lda,
+               double* b, const int* ldb, int* info);
+  void dgeqrf_(const int* m, const int* n, double* a, const int* lda,
+               double* tau, double* wrk, int* iwrk, int* info);
+  void dormqr_(const char* side, const char* trans,
+               const int* m, const int* n, const int* k,
+               double* a, const int* lda,
+               double* tau, double* c, const int* ldc,
+               double* wrk, const int* iwrk, int* info);
+}
+
+namespace islet {
+// C = alpha op(A) op(B) + beta C
+void dgemm (char transa, char transb, int m, int nrhs, int n, double alpha,
+            const double* a, int lda, const double* b, int ldb, double beta,
+            const double* c, int ldc) {
+  dgemm_(&transa, &transb, &m, &nrhs, &n, &alpha, const_cast<double*>(a), &lda,
+         const_cast<double*>(b), &ldb, &beta, const_cast<double*>(c), &ldc);
+}
+
+int dpotrf (char uplo, int n, double* a, int lda) {
+  int info;
+  dpotrf_(&uplo, &n, a, &lda, &info);
+  return info;
+}
+
+int dpotrs (char uplo, int n, int nrhs, const double* a, int lda, double* bx,
+            int ldb) {
+  int info;
+  dpotrs_(&uplo, &n, &nrhs, const_cast<double*>(a), &lda, bx, &ldb, &info);
+  return info;
+}
+
+void dtrsm (char side, char uplo, char transa, char diag, int n, int nrhs,
+            double alpha, const double* a, int lda, double* bx, int ldb) {
+  dtrsm_(&side, &uplo, &transa, &diag, &n, &nrhs, &alpha,
+         const_cast<double*>(a), &lda, bx, &ldb);
+}
+
+int dtrtrs (char uplo, char trans, char diag, int n, int nrhs,
+            double* a, int lda, double* b, int ldb) {
+  int info;
+  dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info);
+  return info;
+}
+
+// tau[min(m,n)], wrk[>= n]
+int dgeqrf (int m, int n, double* a, int lda,
+            double* tau, double* wrk, int iwrk) {
+  int info;
+  dgeqrf_(&m, &n, a, &lda, tau, wrk, &iwrk, &info);
+  return info;
+}
+
+// tau[min(m,n)], wrk[>= max(m,n)]
+int dormqr (char side, char trans, int m, int n, int k, double* a, int lda,
+            double* tau, double* c, int ldc, double* wrk, int iwrk) {
+  int info;
+  dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, wrk, &iwrk, &info);
+  return info;
+}
+
+struct GllNatural : public Operator {
+  virtual void eval (const Int& np, const Real& x, Real* const v) const override {
+    eval_lagrange_poly(get_xnodes(np), np, x, v);
+  }
+};
+
+struct GllOffsetNodalSubset : public Operator, public npxstab<Real> {
+  virtual void eval (const Int& np, const Real& x, Real* const v) const override {
+    npxstab<Real>::eval(np, x, v);
+  }
+};
+
+void eval_offset_nodal_subset (
+  const Int np, const Int nreg, const Int* subnp, const Int* os, const Real* xnodes,
+  const Real& x, Real* const v)
+{
+  if (x > 0) {
+    eval_offset_nodal_subset(np, nreg, subnp, os, xnodes, -x, v);
+    for (int i = 0; i < np/2; ++i)
+      std::swap(v[i], v[np-i-1]);
+    return;
+  }
+  bool done = false;
+  for (Int i = 0; i < nreg; ++i)
+    if (x < xnodes[i+1]) {
+      std::fill(v, v + np, 0);
+      eval_lagrange_poly(xnodes + os[i], subnp[i], x, v + os[i]);
+      done = true;
+      break;
+    }
+  if ( ! done)
+    eval_lagrange_poly(xnodes, np, x, v);
+}
+
+static void eval_offset (const Int& np, const Real* const xnodes,
+                         const Int* const subnp, const Int* const offst,
+                         const Real& x, Real* const v) {
+  if (x > 0) {
+    eval_offset(np, xnodes, subnp, offst, -x, v);
+    for (int i = 0; i < np/2; ++i)
+      std::swap(v[i], v[np-i-1]);
+    return;
+  }
+  bool done = false;
+  for (Int i = 0; i < np/2; ++i)
+    if (x < xnodes[i+1]) {
+      std::fill(v, v + np, 0);
+      eval_lagrange_poly(xnodes + offst[i], subnp[i], x, v + offst[i]);
+      done = true;
+      break;
+    }
+  if ( ! done)
+    eval_lagrange_poly(xnodes, np, x, v);
+}
+
+struct GllBest : public Operator {
+  static void eval_np4 (const Real* const xnodes, const Real& x, Real* const y) {
+    static const Real c1 = 0.306;
+    if (x < xnodes[1] || x > xnodes[2]) {
+      y[0] = y[3] = 0;
+      const Int os = x < xnodes[1] ? 0 : 1;
+      eval_lagrange_poly(xnodes + os, 3, x, y + os);
+      Real y4[4];
+      eval_lagrange_poly(xnodes, 4, x, y4);
+      const Real x0 = 2*(1 - std::abs(x))/(1 - xnodes[2]) - 1;
+      const Real a = (c1 + (0.5 - c1)*x0)*(x0 + 1);
+      for (int i = 0; i < 4; ++i)
+        y[i] = a*y[i] + (1 - a)*y4[i];
+    } else
+      eval_lagrange_poly(xnodes, 4, x, y);
+  }
+
+  virtual void eval (const Int& np, const Real& x, Real* const v) const override {
+    const Real* xnodes = get_xnodes(np);
+    switch (np) {
+    case 4: eval_np4(xnodes, x, v); break; // 2
+    case 5: { // 2
+      const Int subnp[] = {3,4};
+      const Int offst[] = {0,0};
+      eval_offset(5, xnodes, subnp, offst, x, v);
+    } break;
+    case 6: { // 4
+      const Int subnp[] = {5,5,6};
+      const Int n0[] = { 0, 1, 2, 3, 4,  };
+      const Int n1[] = { 0, 1, 2, 3,    5};
+      const Int n2[] = { 0, 1, 2, 3, 4, 5};
+      const Int* nodes[] = {n0,n1,n2};
+      ::eval(6, true, xnodes, subnp, nodes, x, v);
+    } break;
+    case 7: { // 4
+      const Int subnp[] = {5,5,6};
+      const Int offst[] = {0,0,0};
+      eval_offset(7, xnodes, subnp, offst, x, v);
+    } break;
+    case 8: { // 5
+      const Int subnp[] = {6,6,7,6};
+      const Int offst[] = {0,0,0,1};
+      eval_offset(8, xnodes, subnp, offst, x, v);
+    } break;
+    case 9: { // 6
+      const Int subnp[] = {7,8,8,7};
+      const Int n0[] = { 0, 1, 2, 3, 4, 5,       8};
+      const Int n1[] = { 0, 1, 2, 3, 4, 5,    7, 8};
+      const Int n2[] = { 0, 1, 2, 3, 4, 5, 6,    8};
+      const Int n3[] = {    1, 2, 3, 4, 5, 6, 7   };
+      const Int* nodes[] = {n0,n1,n2,n3};
+      ::eval(9, true, xnodes, subnp, nodes, x, v);
+    } break;
+    case 10: { // 6
+      const Int subnp[] = {7,7,7,8,8};
+      const Int offst[] = {0,0,0,0,1};
+      eval_offset(10, xnodes, subnp, offst, x, v);
+    } break;
+    case 11: { // 7
+      const Int subnp[] = {8,9,8,9,8};
+      const Int offst[] = {0,0,0,0,1};
+      eval_offset(11, xnodes, subnp, offst, x, v);
+    } break;
+    case 12: { // 8
+      const Int subnp[] = {9,9,10,10,9,10};
+      const Int offst[] = {0,0,0,0,1,1};
+      eval_offset(12, xnodes, subnp, offst, x, v);
+    } break;
+    case 13: { // 9
+      const Int subnp[] = {10,10,10,10,11,10};
+      const Int offst[] = {0,0,0,0,0,1};
+      eval_offset(13, xnodes, subnp, offst, x, v);
+    } break;
+    default: throw_if(true, "not impl'ed");
+    }    
+  }
+
+  std::string get_basis_string (const Int& np) const override {
+    switch (np) {
+    case 5:  return "5 1 | 0 3: 0 1 2 | 1 4: 0 1 2 3";
+    case 6:  return "6 1 | 0 5: 0 1 2 3 4 | 1 5: 0 1 2 3 5 | 2 6: 0 1 2 3 4 5";
+    case 7:  return "7 1 | 0 5: 0 1 2 3 4 | 1 5: 0 1 2 3 4 | 2 6: 0 1 2 3 4 5";
+    case 8:  return "8 1 | 0 6: 0 1 2 3 4 5 | 1 6: 0 1 2 3 4 5 | 2 7: 0 1 2 3 4 5 6 | 3 6: 1 2 3 4 5 6";
+    case 9:  return "9 1 | 0 7: 0 1 2 3 4 5 8 | 1 8: 0 1 2 3 4 5 7 8 | 2 8: 0 1 2 3 4 5 6 8 | 3 7: 1 2 3 4 5 6 7";
+    case 10: return "10 1 | 0 7: 0 1 2 3 4 5 6 | 1 7: 0 1 2 3 4 5 6 | 2 7: 0 1 2 3 4 5 6 | 3 8: 0 1 2 3 4 5 6 7 | 4 8: 1 2 3 4 5 6 7 8";
+    case 11: return "11 1 | 0 8: 0 1 2 3 4 5 6 7 | 1 9: 0 1 2 3 4 5 6 7 8 | 2 8: 0 1 2 3 4 5 6 7 | 3 9: 0 1 2 3 4 5 6 7 8 | 4 8: 1 2 3 4 5 6 7 8";
+    case 12: return "12 1 | 0 9: 0 1 2 3 4 5 6 7 8 | 1 9: 0 1 2 3 4 5 6 7 8 | 2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | 4 9: 1 2 3 4 5 6 7 8 9 | 5 10: 1 2 3 4 5 6 7 8 9 10";
+    case 13: return "13 1 | 0 10: 0 1 2 3 4 5 6 7 8 9 | 1 10: 0 1 2 3 4 5 6 7 8 9 | 2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | 4 11: 0 1 2 3 4 5 6 7 8 9 10 | 5 10: 1 2 3 4 5 6 7 8 9 10";
+    default: return "";
+    }
+  }
+};
+
+struct UniformOffsetNodalSubset : public Operator {
+  virtual const Real* get_xnodes (const Int& np) const override {
+    if (np < 2 || np > np_max+1) return nullptr;
+    static Real xnode[np_max+1][np_max+1] = {0};
+    if (xnode[np][0] == 0) {
+      for (Int i = 0; i < np; ++i)
+        xnode[np][i] = 2*(Real(i)/(np-1)) - 1;
+    }
+    return xnode[np];
+  }
+
+  virtual void eval (const Int& np, const Real& x, Real* const v) const override {
+    const Real* xnodes = get_xnodes(np);
+    switch (np) {
+    case 2: {
+      const Int subnp[] = {2};
+      const Int offst[] = {0};
+      eval_offset(2, xnodes, subnp, offst, x, v);
+    } break;
+    case 3: {
+      const Int subnp[] = {3};
+      const Int offst[] = {0};
+      eval_offset(3, xnodes, subnp, offst, x, v);
+    } break;
+    case 4: {
+      const Int subnp[] = {3,4};
+      const Int offst[] = {0,0};
+      eval_offset(4, xnodes, subnp, offst, x, v);
+    } break;
+    case 5: {
+      const Int subnp[] = {3,4};
+      const Int offst[] = {0,0};
+      eval_offset(5, xnodes, subnp, offst, x, v);
+    } break;
+    case 6: {
+      const Int subnp[] = {3,4,6};
+      const Int offst[] = {0,0,0};
+      eval_offset(6, xnodes, subnp, offst, x, v);
+    } break;
+    case 7: {
+      const Int subnp[] = {3,4,4};
+      const Int offst[] = {0,0,1};
+      eval_offset(7, xnodes, subnp, offst, x, v);
+    } break;
+    case 8: {
+      const Int subnp[] = {4,4,4,4};
+      const Int offst[] = {0,0,1,2};
+      eval_offset(8, xnodes, subnp, offst, x, v);
+    } break;
+    case 9: {
+      const Int subnp[] = {4,4,4,4};
+      const Int offst[] = {0,0,1,2};
+      eval_offset(9, xnodes, subnp, offst, x, v);
+    } break;
+    case 10: {
+      const Int subnp[] = {4,4,4,4,4};
+      const Int offst[] = {0,0,1,2,3};
+      eval_offset(10, xnodes, subnp, offst, x, v);
+    } break;
+    case 11: {
+      const Int subnp[] = {4,4,4,4,4};
+      const Int offst[] = {0,0,1,2,3};
+      eval_offset(11, xnodes, subnp, offst, x, v);
+    } break;
+    case 12: {
+      const Int subnp[] = {4,4,4,4,4,4};
+      const Int offst[] = {0,0,1,2,3,4};
+      eval_offset(12, xnodes, subnp, offst, x, v);
+    } break;
+    case 13: {
+      const Int subnp[] = {4,4,4,4,4,4};
+      const Int offst[] = {0,0,1,2,3,4};
+      eval_offset(13, xnodes, subnp, offst, x, v);
+    } break;
+    default: throw_if(true, "not impl'ed");
+    }    
+  }
+};
+
+Operator::ConstPtr Operator::create (Operator::Method m) {
+  switch (m) {
+  case gll_natural: return std::make_shared<GllNatural>();
+  case gll_offset_nodal_subset: return std::make_shared<GllOffsetNodalSubset>();
+  case gll_best: return std::make_shared<GllBest>();
+  case uniform_offset_nodal_subset: return std::make_shared<UniformOffsetNodalSubset>();
+  default: throw_if(true, "Operator::create: not a method: " << m);
+  }
+  return nullptr;
+}
+
+Int unittest_eval () {
+  Int nerr = 0;
+  {
+    GllOffsetNodalSubset o1;
+    GllBest o2;
+    for (const Int np : {5,7,8,10,11,12,13}) {
+      const Int n = 100;
+      Int ne = 0;
+      for (Int i = 0; i <= n; ++i) {
+        const Real x = 2*(Real(i)/n) - 1;
+        Real v1[np_max], v2[np_max];
+        o1.eval(np, x, v1);
+        o2.eval(np, x, v2);
+        for (Int j = 0; j < np; ++j) if (v1[j] != v2[j]) ++ne;
+      }
+      if (ne) printf("GllOffsetNodalSubset vs GllBest np %d failed\n", np);
+      nerr += ne;
+    }
+  }
+  return nerr;
+}
+} // namespace islet
+
+using namespace islet;
+extern "C" { // For python ctypes.
+void get_xnodes (const Int method, const Int np, Real* xnodes) {
+  const auto op = Operator::create(static_cast<Operator::Method>(method));
+  const auto x = op->get_xnodes(np);
+  for (Int i = 0; i < np; ++i) xnodes[i] = x[i];
+}
+
+void eval_interpolant (const Int method, const Int np, const Int nx,
+                       // y is np x nx, np the fast index.
+                       const Real* const x, Real* const y) {
+  const auto op = Operator::create(static_cast<Operator::Method>(method));
+  for (Int ix = 0; ix < nx; ++ix)
+    op->eval(np, x[ix], y + np*ix);
+}
+} // extern "C"
diff --git a/methods/islet/islet_isl.hpp b/methods/islet/islet_isl.hpp
new file mode 100644
index 0000000..1cde180
--- /dev/null
+++ b/methods/islet/islet_isl.hpp
@@ -0,0 +1,54 @@
+#ifndef INCLUDE_ISLET_ISL_HPP
+#define INCLUDE_ISLET_ISL_HPP
+
+#include "islet_types.hpp"
+#include "islet_interpmethod.hpp"
+
+#include <memory>
+
+namespace islet {
+const Real* get_x_gll(const Int np);
+const Real* get_w_gll(const Int np);
+
+template <typename Scalar>
+void eval_lagrange_poly (const Scalar* x_gll, const Int& np, const Scalar& x,
+                         Scalar* const y) {
+  for (int i = 0; i < np; ++i) {
+    Scalar f = 1;
+    for (int j = 0; j < np; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - x_gll[j]) / (x_gll[i] - x_gll[j]);
+    y[i] = f;
+  }
+}
+
+struct Operator {
+  typedef std::shared_ptr<Operator> Ptr;
+  typedef std::shared_ptr<const Operator> ConstPtr;
+
+  virtual void eval(const Int& np, const Real& x, Real* const v) const = 0;
+  virtual const Real* get_xnodes (const Int& np) const { return get_x_gll(np); }
+  virtual std::string get_basis_string (const Int& np) const { return ""; }
+
+  enum Method { gll_natural = 0, gll_offset_nodal_subset, xnodal, gll_best,
+                uniform_offset_nodal_subset };
+  static ConstPtr create(Method m);
+};
+
+struct OperatorInterpMethod : public UserInterpMethod {
+  typedef std::shared_ptr<OperatorInterpMethod> Ptr;
+  OperatorInterpMethod (const Int np_, const Operator::ConstPtr& op_) : np(np_), op(op_) {}
+  void eval (const Real& x, Real* const v) override { op->eval(np, x, v); }
+  const Real* get_xnodes () const override { return op->get_xnodes(np); }
+  Int get_np () const override { return np; }
+private:
+  Int np;
+  Operator::ConstPtr op;
+};
+
+Int unittest_eval();
+
+} // namespace islet
+
+#endif
diff --git a/methods/islet/islet_maxeigcomp.cpp b/methods/islet/islet_maxeigcomp.cpp
new file mode 100644
index 0000000..f9afc64
--- /dev/null
+++ b/methods/islet/islet_maxeigcomp.cpp
@@ -0,0 +1,616 @@
+#include <omp.h>
+
+#include "islet_maxeigcomp.hpp"
+#include "islet_tables.hpp"
+#include "islet_npx.hpp"
+#include "islet_util.hpp"
+
+// LAPACK eigendecomp routine for real unsymmetric matrix.
+typedef int fint;
+extern "C" void dgeev_ (char* jobvl, char* jobvr, fint* n, double* a, int* lda,
+                        double* wr, double* wi,
+                        double* vl, int* ldvl,
+                        double* vr, int* ldvr,
+                        double* work, int* lwork, int* info);
+extern "C" void zgeev_ (char* jobvl, char* jobvr, fint* n, Complex* a, int* lda,
+                        Complex* w,
+                        Complex* vl, int* ldvl,
+                        Complex* vr, int* ldvr,
+                        Complex* work, int* lwork, double* rwork, int* info);
+// LAPACK SVD routine for real unsymmetric matrix.
+extern "C" void dgesvd_ (char* jobu, char* jobvt, fint* m, fint* n, double* a, int* lda,
+                         double* s, double* u, int* ldu, double* vt, int* ldvt,
+                         double* work, int* lwork, int* info);
+extern "C" void zgesvd_ (char* jobu, char* jobvt, fint* m, fint* n, Complex* a, int* lda,
+                         double* s, Complex* u, int* ldu, Complex* vt, int* ldvt,
+                         Complex* work, int* lwork, double* rwork, int* info);
+
+static
+void dgeev (char jobvl, char jobvr, int n, double* a, int lda,
+            double* wr, double* wi,
+            double* vl, int ldvl,
+            double* vr, int ldvr,
+            double* work, int lwork, int& info) {
+  dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr,
+         work, &lwork, &info);
+}
+
+static
+void zgeev (char jobvl, char jobvr, int n, Complex* a, int lda,
+            Complex* w,
+            Complex* vl, int ldvl,
+            Complex* vr, int ldvr,
+            Complex* work, int lwork, double* rwork, int& info) {
+  zgeev_(&jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
+         work, &lwork, rwork, &info);
+}
+
+static
+void dgesvd (char jobu, char jobvt, fint m, fint n, double* a, int lda,
+             double* s, double* u, int ldu, double* vt, int ldvt,
+             double* work, int lwork, int& info) {
+  dgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
+          work, &lwork, &info);
+}
+
+static
+void zgesvd (char jobu, char jobvt, fint m, fint n, Complex* a, int lda,
+             double* s, Complex* u, int ldu, Complex* vt, int ldvt,
+             Complex* work, int lwork, double* rwork, int& info) {
+  zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
+          work, &lwork, rwork, &info);
+}
+
+class Mesh {
+  Int nc_;
+  Real dx_;
+public:
+  Mesh (const Int nc) { init(nc); }
+  void init (const Int nc) {
+    nc_ = nc;
+    dx_ = 1.0/nc;
+  }
+
+  Real toperiodic (const Real& x) const { return x - std::floor(x); }
+  Int ncell () const { return nc_; }
+  Real dx () const { return dx_; }
+  Int incell (const Real& x) const { return std::floor(toperiodic(x) / dx_); }
+  void toref (const Real& x, Int& ci, Real& a) const {
+    ci = incell(x);
+    a = 2*(toperiodic(x)*nc_ - ci) - 1;
+  }
+  Real tophysical (const Int& ci, const Real& a) const {
+    return toperiodic((ci + 0.5*(a+1))*dx_);
+  }
+
+  static Int unittest () {
+    Mesh m(42);
+    const Real dx = m.dx();
+    const Int nc = m.ncell();
+    const Real eps = std::numeric_limits<Real>::epsilon();
+    using islet::reldif;
+    Int ne = 0;
+    if (reldif(m.toperiodic( 1.7), 0.7) > 10*eps) ++ne;
+    if (reldif(m.toperiodic(-0.8), 0.2) > 10*eps) ++ne;
+    if (m.incell(2) != 0) ++ne;
+    if (m.incell(2 - 0.5*dx) != nc-1) ++ne;
+    { Int ic; Real a;
+      const Real x = 4.4*dx;
+      m.toref(x, ic, a);
+      if (ic != 4) ++ne;
+      if (reldif(a, -0.2) > 1e2*eps) ++ne;
+      if (reldif(m.tophysical(ic, a), x) > 1e2*eps) ++ne; }
+    return ne;
+  }
+};
+
+void op_apply (const Mesh& m, const Int ne, const InterpMethod& im,
+               const Real dx_flow, const Real* const src, Real* const tgt) {
+  const auto xnodes = im.get_xnodes();
+  for (Int ci = 0, k = 0; ci < ne; ++ci) {
+    for (Int i = 0; i < im.np-1; ++i, ++k) {
+      Int ci_src;
+      Real a_src;
+      m.toref(m.tophysical(ci, xnodes[i]) + dx_flow, ci_src, a_src);
+      Real v[32];
+      op_eval(im, a_src, v);
+      Real val = 0;
+      const Real* src_cell = src + (im.np - 1)*ci_src;
+      for (Int i_src = 0; i_src < im.np; ++i_src)
+        val += v[i_src]*src_cell[i_src];
+      tgt[k] = val;
+    }
+  }
+}
+
+static void get_matrix (
+  const Int& ne, const Int& np, const Real& dx, const InterpMethod& interp_method,
+  Array<Real>& A)
+{
+  // Get the CSL operator as a matrix.
+  const Int N = ne*(np-1), Np1 = N+1;
+  A.optclear_and_resize(N*N);
+  Mesh m(ne);
+  const Real dx_flow = -dx/ne;
+  Array<Real> u0(Np1, 0);
+  const Real* const src = u0.data();
+  for (Int b = 0; b < N; ++b) {
+    // Get b'th column.
+    u0[b] = 1;
+    if (b == 0) u0[N] = 1;
+    else if (b == N) u0[0] = 1;
+    Real* const tgt = A.data() + b*N;
+    op_apply(m, ne, interp_method, dx_flow, src, tgt);
+    u0[b] = 0;
+    if (b == 0) u0[N] = 0;
+    else if (b == N) u0[0] = 0;
+  }
+}   
+
+// A is overwritten.
+static Real cond_2norm (Complex* A, const Int& n, Real* work, const Int& nwork) {
+  throw_if(nwork < 12*n, "work should be >= 12 n");
+  Real* rwork = work;
+  Real* s = work + 5*n;
+  Complex* cwork = reinterpret_cast<Complex*>(work + 6*n);
+  const Int lwork = (nwork - 6*n)/2;
+  int info;
+  zgesvd('n', 'n', n, n, A, n, s,
+         nullptr, 1, nullptr, 1,
+         cwork, lwork, rwork,
+         info);
+  return s[0]/s[n-1];
+}
+
+namespace bloch {
+// K is the number of nodes still in the element after shifting right by the
+// fraction dx of an element.
+static Int get_K (const Int np, const Real* x_gll, const Real dx) {
+  assert(dx > 0 && dx < 1);
+  Int K;
+  for (K = 0; K < np; ++K)
+    if (x_gll[K] + 2*dx >= 1)
+      break;
+  assert(x_gll[0] > -1 || (K > 0 && K < np));
+  return K;
+}
+
+// K is the opposite of what it is in csl.hy.
+struct Data {
+  const Int np;
+  const Real* const x_gll;
+  const Int K;
+  const Real dx;
+
+  Data (const Int& inp, const Real& idx, const Real* const x_gll_ = nullptr)
+    : np(inp),
+      x_gll(x_gll_ ? x_gll_ : islet::get_x_gll(np)),
+      K(get_K(inp, x_gll, idx)),
+      dx(idx)
+  {}
+};
+
+// Get the (np-1)xnp block matrix that is repeated in A.
+void form_kernel_block (const Data& d, const InterpMethod& im, Real* const A) {
+  const Int npm1 = d.np-1;
+  // Get the kernel block in an (np-1)xnp row-major matrix starting at A.
+  for (Int ip = 0; ip < npm1; ++ip) {
+    Real ref = d.x_gll[ip] + 2*d.dx;
+    if (ref >= 1) {
+      assert(ip >= d.K);
+      ref -= 2;
+    } else {
+      if (ip >= d.K) {
+        pr(puf(d.np) pu(d.K) pu(d.dx) pu(ip) pu(ref));
+        islet::prarr("d.x_gll", d.x_gll, d.np);
+      }
+      assert(ip < d.K);
+    }
+    assert(ref >= -1 && ref < 1);
+    op_eval(im, ref, A + d.np*ip);
+  }
+}
+
+// Get B(mu) from A.
+void form_Bmu (const Data& d, const Complex& mu, const Real* const A,
+               Complex* const Bmu) {
+  const Int npm1 = d.np-1;
+  for (Int c = 0; c < npm1; ++c) {
+    Complex* const col = Bmu + npm1*c;
+    for (Int r = 0  ; r < d.K ; ++r) col[r] =    A[d.np*r + c];
+    for (Int r = d.K; r < npm1; ++r) col[r] = mu*A[d.np*r + c];
+  }
+  {
+    Complex* const col = Bmu;
+    for (Int r = 0  ; r < d.K ; ++r) col[r] +=    mu*A[d.np*r + npm1];
+    for (Int r = d.K; r < npm1; ++r) col[r] += mu*mu*A[d.np*r + npm1];
+  }  
+}
+
+void form_Bmu (const Data& d, const Int& ne, const Int& ie, const Real* const A,
+               Complex* const Bmu) {
+  const Real arg = 2 * M_PI * (Real(ie)/ne);
+  const Complex mu(std::cos(arg), std::sin(arg));
+  form_Bmu(d, mu, A, Bmu);
+}
+
+void edecomp (const Data& d, Complex* const Bmu,
+              Real* const work, Int lwork,
+              // (w[2*i], ws(2*i+1)) contains the i'th eigenvalue.
+              Real* const w, Complex* const V = nullptr) {
+  const Int npm1 = d.np-1;
+  assert(lwork >= 22*npm1);
+  lwork -= 2*npm1;
+  Int eig_info;
+  zgeev('n', V ? 'v' : 'n', npm1, Bmu, npm1,
+        reinterpret_cast<Complex* const>(w),
+        nullptr, 1, V, npm1,
+        reinterpret_cast<Complex* const>(work), lwork/2,
+        work + lwork, eig_info);
+  assert(eig_info == 0);
+}
+} // namespace bloch
+
+void MaxEigComputer::setup_workspace (const Int max_ne_) {
+  max_ne = max_ne_;
+  const int nthr = threaded ? omp_get_max_threads() : 1;
+  wss.resize(nthr);
+}
+
+struct IndexCover {
+  IndexCover (Int n_, Int P_) {
+    n = n_;
+    P = P_;
+    split = std::max(10, P);
+    N = std::max(1, n/split);
+  }
+
+  Int nit () const { return (n+P-1)/P; }
+
+  Int idx (const Int it, const Int tid) const {
+    const Int k = P*it + tid;
+    const Int i = (k >= split*N) ? k : (k % split)*N + (k / split);
+    return i >= n ? -1 : i;
+  }
+
+private:
+  Int P, n, N, split;
+};
+
+static Int test_index_cover () {
+  Int nerr = 0;
+  const auto check = [&] (const Int n, const Int P) -> Int {
+    Int ne = 0;
+    std::vector<int> cnt(n, 0);
+    IndexCover ic(n, P);
+    const Int nit = ic.nit();
+    for (Int it = 0; it < nit; ++it)
+      for (Int tid = 0; tid < P; ++tid) {
+        const Int i = ic.idx(it, tid);
+        if (i >= n) ++ne;
+        else if (i >= 0) ++cnt[i];
+      }
+    for (Int i = 0; i < n; ++i)
+      if (cnt[i] != 1)
+        ++ne;
+    return ne;
+  };
+  for (const  Int n : {15, 33, 128, 1111, 3333, 4000, 7777})
+    for (const Int P : {1, 2, 3, 8, 11, 48, 272})
+      nerr += check(n, P);
+  return nerr;
+}
+
+Real MaxEigComputer::
+run (const Int& ne_max, const Int& ndx_max, const Real& maxeigampm1,
+     const bool quiet, const InterpMethod& im) {
+  setup_workspace(ne_max);
+  // Search dx in (0, 0.5], in parallel, to see if there's a max |lambda| - 1
+  // bigger than tol.
+  const auto cdxeig = [=] (Int ne, Int ndx, Real tol) {
+    Real mme = 0;
+    const int P = threaded ? omp_get_max_threads() : 1;
+    // Chunk up the search space so we explore widely as early as possible.
+    const bool both_dir = false;
+    const auto fac = both_dir ? 2 : 1;
+    const Int n = fac*ndx;
+    IndexCover ic(n, P);
+    const Int nit = ic.nit();
+    assert(P <= max_nthread);
+    const auto run1 = [&] (const int it, const int tid) -> Real {
+      const auto i = ic.idx(it, tid) + 1;
+      if (i < 0) return 1;
+      // dx is in (-0.5, 0.5] or (0, 0.5].
+      const Real dx = both_dir ? (Real(i)/n - 0.5) : (0.5*i)/n;
+      if (dx == 0) return 1;
+      Real me;
+      compute(im, dx, ne, &me);
+      return me;
+    };
+    std::array<Real,max_nthread> mes;
+    for (int it = 0; it < nit; ++it) {
+      if (threaded) {
+#       pragma omp parallel
+        {
+          const int tid = omp_get_thread_num();
+          mes[tid] = run1(it, tid);
+        }
+        for (int j = 0; j < std::min(P, n); ++j)
+          mme = std::max(mme, mes[j]);
+      } else {
+        mme = run1(it, 0);
+      }
+      if (mme - 1 >= tol)
+        break;
+    }
+    return mme;
+  };
+
+  Real maxeigamp = -1;
+  // Ramp up precision of search to encourage an early exit when the method
+  // (np, order, offset) is not stable.
+  for (int ne : {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, ne_max}) {
+    if (ne > ne_max) break;
+    bool toobig = false, first = true;
+    for (int ndx : {32, 64, 128, 256, 512, 1024, ndx_max}) {
+      if ( ! first && ndx > ndx_max) break;
+      if (ne < ne_max && ndx == ndx_max) break;
+      first = false;
+      maxeigamp = std::max(maxeigamp,
+                           cdxeig(ne, ndx, maxeigampm1));
+      if ( ! quiet)
+        printf("ne %2d nsamp %3d %1.3e\n", ne, ndx, maxeigamp-1);
+      toobig = maxeigamp - 1 > maxeigampm1;
+      if (toobig) break;
+    }
+    if (toobig) break;
+  }
+  return maxeigamp-1;
+}
+
+MaxEigComputer::Analysis MaxEigComputer::
+calc_max_vals (const Int& nmu, const Int& ndx,
+               const InterpMethod& im) {
+  const auto calc1 = [=] (const Int& idx) -> Analysis {
+    const Real dx = (0.5*idx)/ndx;
+    if (dx == 0) return {1,1,1};
+    Analysis vals;
+    compute(im, dx, nmu, &vals.max_eig_amp, &vals.max_condv,
+            &vals.max_defect_ub);
+    return vals;
+  };
+  Real max_eig_amp = 0, max_condv = 0;
+  // Use <= ndx below to get dx in [0, 0.5] instead of [0, 0.5).
+  if (threaded) {
+    Real mea[max_nthread] = {0}, mcv[max_nthread] = {0};
+#   pragma omp parallel for
+    for (Int ix = 0; ix <= ndx; ++ix) {
+      const int tid = omp_get_thread_num();
+      const auto mv = calc1(ix);
+      mea[tid] = std::max(mea[tid], mv.max_eig_amp);
+      mcv[tid] = std::max(mcv[tid], mv.max_condv);
+    }
+    for (Int i = 0; i < omp_get_max_threads(); ++i) {
+      max_eig_amp = std::max(max_eig_amp, mea[i]);
+      max_condv = std::max(max_condv, mcv[i]);
+    }
+  } else {
+    for (Int ix = 0; ix <= ndx; ++ix) {
+      const auto mv = calc1(ix);
+      max_eig_amp = std::max(max_eig_amp, mv.max_eig_amp);
+      max_condv = std::max(max_condv, mv.max_condv);
+    }
+  }
+  return {max_eig_amp, max_condv, 1};
+}
+
+void MaxEigComputer::
+compute (const InterpMethod& im, const Real& dx, const Int& ne,
+         Real* max_amp_out, Real* max_condv,
+         Real* max_defect_ub,
+         Complex* lam, Complex* V) {
+  auto& ws = wss[threaded ? omp_get_thread_num() : 0];
+  Real max_amp = 0;
+  const Int np = im.np, npm1 = np - 1;
+  if (bloch) {
+    const Int N = 3*npm1*np, edecomp_ws = 22*npm1, evecs_ws = 2*npm1*npm1,
+      cond_2norm_ws = 12*npm1;
+    if (static_cast<int>(ws.A.size()) < N) {
+      ws.A.optclear_and_resize(N);
+      ws.wr.optclear_and_resize(2*npm1);
+      ws.work.optclear_and_resize(edecomp_ws + evecs_ws + cond_2norm_ws);
+    }
+    bloch::Data bd(np, dx, im.uim == nullptr ? nullptr : im.uim->get_xnodes());
+    bloch::form_kernel_block(bd, im, ws.A.data());
+    Complex* const Bmu = reinterpret_cast<Complex*>(ws.A.data() + npm1*np);
+    if (max_condv) *max_condv = 0;
+    Complex* evecs = reinterpret_cast<Complex*>(ws.work.data() + edecomp_ws);
+    for (int ie = 0; ie < ne; ++ie) {
+      bloch::form_Bmu(bd, ne, ie, ws.A.data(), Bmu);
+      bloch::edecomp(bd, Bmu, ws.work.data(), edecomp_ws, ws.wr.data(),
+                     V || max_condv ? evecs : nullptr);
+      for (Int i = 0; i < npm1; ++i) {
+        const Real re = ws.wr[2*i], im = ws.wr[2*i+1];
+        max_amp = std::max(max_amp, std::sqrt(re*re + im*im));
+      }
+      if (V)
+        for (Int i = 0; i < npm1*npm1; ++i)
+          V[npm1*npm1*ie + i] = evecs[i];
+      if (max_condv) {
+        const Real condv = cond_2norm(evecs, npm1,
+                                      ws.work.data() + edecomp_ws + evecs_ws,
+                                      cond_2norm_ws);
+        *max_condv = std::max(*max_condv, condv);
+      }
+      if (lam)
+        for (Int i = 0; i < npm1; ++i)
+          lam[ie*npm1 + i] = Complex(ws.wr[2*i], ws.wr[2*i+1]);
+    }
+  } else {
+    throw_if(max_condv || max_defect_ub,
+             "Only Bloch-wave-based edecomp supports"
+             " max cond(V) and max defect_ub(lam).");
+    // Compute eigenvalues of discrete space-time operator with ne cells.
+    const Int N = ne*npm1;
+    if (static_cast<int>(ws.wr.size()) < N) {
+      const int Nonce = std::max(ne, max_ne)*npm1;
+      ws.A.optclear_and_resize(Nonce*Nonce);
+      ws.wr.optclear_and_resize(Nonce);
+      ws.wi.optclear_and_resize(Nonce);
+      ws.work.optclear_and_resize(10*Nonce);
+    }
+    // -dx to match Bloch-wave computation.
+    get_matrix(ne, np, -dx, im, ws.A);
+    int eig_info;
+    Real* Vreal = V ? reinterpret_cast<Real*>(V) : nullptr;
+    dgeev('n', V ? 'v' : 'n', N, ws.A.data(), N, ws.wr.data(), ws.wi.data(),
+          nullptr, 1, Vreal, N, ws.work.data(), ws.work.size(), eig_info);
+    for (Int i = 0; i < N; ++i)
+      max_amp = std::max(max_amp,
+                         std::sqrt(ws.wr[i]*ws.wr[i] + ws.wi[i]*ws.wi[i]));
+    if (lam)
+      for (Int i = 0; i < N; ++i)
+        lam[i] = Complex(ws.wr[i], ws.wi[i]);
+  }
+  assert(max_amp_out);
+  *max_amp_out = max_amp;
+}
+
+static void normalize (Complex* v, const Int& n) {
+  Real norm2 = 0;
+  for (Int k = 0; k < n; ++k) {
+    Real d = std::abs(v[k]);
+    norm2 += d*d;
+  }
+  const Real scale = 1/std::sqrt(norm2);
+  for (Int k = 0; k < n; ++k)
+    v[k] *= scale;
+}
+
+static void remove_phase (Complex* v, const Int& n) {
+  const Real arg = std::atan2(v[0].imag(), v[0].real());
+  Complex phase(std::cos(arg), -std::sin(arg));
+  for (Int k = 0; k < n; ++k)
+    v[k] *= phase;
+}
+
+static void write_dgeev_evec (const Complex& lam, const Real* V,
+                              const Int& vi, const Int& pair, const Int& n,
+                              Complex* v) {
+  if (lam.imag() == 0) {
+    for (Int k = 0; k < n; ++k)
+      v[k] = V[vi*n + k];
+  } else {
+    const Int sign = pair == 0 ? 1 : -1;
+    for (Int k = 0; k < n; ++k)
+      v[k] = Complex(V[vi*n + k], sign*V[(vi+1)*n + k]);
+  }
+  normalize(v, n);
+  remove_phase(v, n);
+}
+
+static void write_bloch_evec (const Complex* v_bloch, const Int& vi,
+                              const Int& ne, const Int& np, Complex* v) {
+  const Int npm1 = np-1, n = ne*npm1;
+  const Int vie = vi / npm1;
+  const Real arg = 2 * M_PI * (Real(vie)/ne);
+  const Complex* const phi = v_bloch + npm1*vi;
+  for (Int ie = 0; ie < ne; ++ie) {
+    const Complex mu(std::cos(ie*arg), std::sin(ie*arg));
+    for (Int i = 0; i < npm1; ++i)
+      v[ie*npm1 + i] = mu*phi[i];
+  }
+  normalize(v, n);
+  remove_phase(v, n);    
+}
+
+// Check that the eigenvalues and eigenvectors derived from Bloch-wave analysis
+// -- the size-(np-1) eigenvalue problem -- match those derived from brute-force
+// eigendecomp of the ne-mesh space-time operator.
+static int
+check_bloch_edecomp (const int np, const int ne,
+                     const Complex* lam, const Complex* lam_bloch,
+                     const Real* V, const Complex* v_bloch,
+                     // >= 2 ne (np-1)
+                     Complex* work) {
+  const Int N = ne*(np-1);
+  Complex* const v_bf = work;
+  Complex* const v_full_bloch = work + N;
+  int nerr = 0;
+  Array<bool> used(N, false);
+  for (Int i = 0, i_vec = 0, pair = 0; i < N; ++i) {
+    // Find corresponding eigenvalues.
+    const auto lam_brute_force = lam[i];
+    Real min_diff = 2;
+    Int jmin = -1;
+    for (Int j = 0; j < N; ++j) {
+      const Real diff = std::abs(lam_bloch[j] - lam_brute_force);
+      if (diff < min_diff) {
+        min_diff = diff;
+        jmin = j;
+      }
+    }
+    used[jmin] = true;
+    if (min_diff > 1e-13)
+      ++nerr;
+    // Compare eigenvectors. For dgeev, some arithmetic is needed to account
+    // for how evecs are packed.
+    write_dgeev_evec(lam[i], V, i_vec, pair, N, v_bf);
+    if (pair == 1) {
+      assert(lam[i].imag() != 0);
+      i_vec += 2;
+      pair = 0;
+    } else if (lam[i].imag() == 0) {
+      i_vec++;
+      assert(pair == 0);
+    } else {
+      assert(pair == 0);
+      pair++;
+    }
+    write_bloch_evec(v_bloch, jmin, ne, np, v_full_bloch);
+    Real num = 0, den = 0;
+    for (Int k = 0; k < N; ++k) {
+      Real d = std::abs(v_full_bloch[k] - v_bf[k]);
+      num += d*d;
+      d = std::abs(v_bf[k]);
+      den += d*d;
+    }
+    if (std::sqrt(num/den) >= 3e-9) {
+      pr(puf(i) pu(jmin) pu(min_diff) pu(std::sqrt(num/den)));
+      ++nerr;
+    }
+  }
+  // Check that all eigenvalues were matched.
+  for (Int j = 0; j < N; ++j)
+    if ( ! used[j])
+      ++nerr;
+  return nerr;
+}
+
+int MaxEigComputer::unittest () {
+  int nerr = 0;
+  MaxEigComputer mec(false, false), mec_bloch(false, true);
+  const int np_max = std::min(13, islet::np_max), ne_max = np_max,
+    Nmax = (np_max-1)*ne_max;
+  Array<Complex> lam(Nmax), lam_bloch(Nmax),
+    V((Nmax*Nmax)/2), v_bloch((np_max-1)*(np_max-1)*ne_max), work(2*Nmax);
+  for (int np = 4; np <= np_max; ++np) {
+    InterpMethod im(np, InterpMethod::npxstab);
+    for (int ne : {1, 3, ne_max}) {
+      const Int N = ne*(np-1);
+      assert(N <= Nmax);
+      for (Real dx : {0.05, 0.42, 0.7}) {
+        Real max_amp;
+        mec.compute(im, dx, ne, &max_amp, nullptr, nullptr,
+                    lam.data(), V.data());
+        mec_bloch.compute(im, dx, ne, &max_amp, nullptr, nullptr,
+                          lam_bloch.data(), v_bloch.data());
+        nerr += check_bloch_edecomp(np, ne, lam.data(), lam_bloch.data(),
+                                    reinterpret_cast<Real*>(V.data()),
+                                    v_bloch.data(), work.data());
+      }
+    }
+  }
+  nerr += test_index_cover();
+  return nerr;
+}
diff --git a/methods/islet/islet_maxeigcomp.hpp b/methods/islet/islet_maxeigcomp.hpp
new file mode 100644
index 0000000..a673998
--- /dev/null
+++ b/methods/islet/islet_maxeigcomp.hpp
@@ -0,0 +1,79 @@
+#ifndef INCLUDE_ISLET_MAXEIGCOMP_HPP
+#define INCLUDE_ISLET_MAXEIGCOMP_HPP
+
+#include "islet_types.hpp"
+#include "islet_util.hpp"
+#include "islet_interpmethod.hpp"
+#include "islet_npx.hpp"
+
+using islet::Array;
+
+#include <memory>
+
+class MaxEigComputer {
+  static constexpr int max_nthread = 136;
+  
+  struct Workspace {
+    Array<Real> A, wr, wi, work;
+  };
+  std::vector<Workspace> wss;
+  Int max_ne;
+  bool threaded, bloch;
+
+public:
+  MaxEigComputer (const bool ithreaded = true, const bool ibloch = true)
+    : max_ne(0), threaded(ithreaded), bloch(ibloch)
+  { setup_workspace(); }
+
+  bool is_threaded () const { return threaded; }
+
+  void setup_workspace(const Int max_ne_ = 64);
+
+  Real run(const Int& ne_max, const Int& ndx_max, const Real& maxeigampm1,
+           const bool quiet, const InterpMethod& im);
+
+  Real run (const Int& np, const Int& ne_max, const Int& ndx_max,
+            const Real& maxeigampm1, const bool quiet,
+            UserInterpMethod* uim) {
+    std::shared_ptr<UserInterpMethod> suim(uim, [] (UserInterpMethod*) {});
+    return run(ne_max, ndx_max, maxeigampm1, quiet, InterpMethod(suim));
+  }
+  Real run (const Int& np, const Int& ne_max, const Int& ndx_max,
+            const Real& maxeigampm1, const bool quiet,
+            const UserInterpMethod::Ptr& uim) {
+    return run(ne_max, ndx_max, maxeigampm1, quiet, InterpMethod(uim));
+  }
+
+  struct Analysis {
+    static constexpr Real condv_switch = 1e2;
+    Real max_eig_amp, max_condv, max_defect_ub;
+  };
+
+  Analysis calc_max_vals(const Int& nmu, const Int& ndx,
+                         const InterpMethod& im);
+
+  Analysis calc_max_vals (const Int& nmu, const Int& ndx, const Int& np,
+                          UserInterpMethod* uim) {
+    std::shared_ptr<UserInterpMethod> suim(uim, [] (UserInterpMethod*) {});
+    return calc_max_vals(nmu, ndx, InterpMethod(suim));
+  }
+
+  // dx is fraction of an element, so [0,1], *not* reference-element cordinate.
+  void compute(const InterpMethod& im, const Real& dx, const Int& ne,
+               Real* max_amp_out, Real* max_condv = nullptr,
+               Real* max_defect_ub = nullptr,
+               Complex* lam = nullptr, Complex* V = nullptr);
+
+  void compute(UserInterpMethod* uim, const Real& dx, const Int& ne,
+               Real* max_amp_out, Real* max_condv = nullptr,
+               Real* max_defect_ub = nullptr,
+               Complex* lam = nullptr, Complex* V = nullptr) {
+    std::shared_ptr<UserInterpMethod> suim(uim, [] (UserInterpMethod*) {});
+    compute(InterpMethod(suim), dx, ne, max_amp_out, max_condv,
+            max_defect_ub, lam, V);
+  }
+
+  static int unittest();
+};
+
+#endif
diff --git a/methods/islet/islet_nodalbasis.cpp b/methods/islet/islet_nodalbasis.cpp
new file mode 100644
index 0000000..16ef25b
--- /dev/null
+++ b/methods/islet/islet_nodalbasis.cpp
@@ -0,0 +1,195 @@
+#include "islet_nodalbasis.hpp"
+#include "islet_npx.hpp"
+#include "islet_util.hpp"
+
+Nodes::Nodes () : np(-1), include_bdy_val(true) {}
+
+Nodes::Nodes (const Nodes& s)
+  : np(s.np), nh(s.nh), include_bdy_val(s.include_bdy_val),
+    data(s.data), subnp(s.subnp)
+{ set_ptrs(); }
+
+Nodes::Nodes (const Int np_, const bool include_bdy) { init(np_, include_bdy); }
+
+void Nodes::set_ptrs () {
+  nodes.resize(nh);
+  for (size_t i = 0; i < nodes.size(); ++i)
+    nodes[i] = data.data() + np*i;
+}
+
+void Nodes::init (const Int np_, const bool include_bdy_) {
+  np = np_;
+  include_bdy_val = include_bdy_;
+  nh = np/2 + (include_bdy_val ? 0 : 1);
+  data.resize(np*nh);
+  set_ptrs();
+  subnp.resize(nh, -1);
+}
+
+void Nodes::set (const Int i, const std::initializer_list<Int>& il) {
+  set(i, std::vector<Int>(il));
+}
+
+void Nodes::set (const Int i, const std::vector<Int>& il) {
+  assert(i <= static_cast<Int>(nodes.size()));
+  Int j = 0;
+  for (const auto e : il) nodes[i][j++] = e;
+  subnp[i] = j;
+}
+
+std::string Nodes::string (const bool newline) const {
+  std::stringstream ss;
+  ss << np << " " << int(include_bdy_val) << " | ";
+  for (size_t i = 0; i < nodes.size(); ++i) {
+    if (subnp[i] == -1) continue;
+    ss << i << " " << subnp[i] << ":";
+    for (Int j = 0; j < subnp[i]; ++j)
+      ss << " " << nodes[i][j];
+    if (i+1 != nodes.size()) ss << " | ";
+  }
+  if (newline) ss << "\n";
+  return ss.str();
+}
+
+bool Nodes::init (const std::string& s) {
+  std::stringstream ss(s);
+  const auto read_int = [&] () -> int {
+    int i;
+    ss >> i;
+    return i;
+  };
+  const auto eat_until_after_bar = [&] () {
+    while ( ! ss.eof() && ss.peek() != '|') ss.get();
+    ss.get();
+  };
+  np = read_int();
+  include_bdy_val = read_int();
+  init(np, include_bdy_val);
+  for (Int ni = 0; ni < get_nh(); ++ni) {
+    eat_until_after_bar();
+    const auto ni_check = read_int();
+    if (ni_check != ni) return false;
+    subnp[ni] = read_int();
+    if (subnp[ni] < 2 || subnp[ni] > np) return false;
+    ss.get(); // colon
+    for (Int i = 0; i < subnp[ni]; ++i)
+      nodes[ni][i] = read_int();
+  }
+  return ok_to_eval();
+}
+
+bool Nodes::ok_to_eval () const {
+  for (Int ni = 0; ni < get_nh(); ++ni) {
+    if (subnp[ni] < 2) return false;
+    for (Int i = 1; i < subnp[ni]; ++i)
+      if (nodes[ni][i] <= nodes[ni][i-1])
+        return false;
+    Int fnd = 0;
+    for (Int i = 0; i < subnp[ni]; ++i)
+      if (nodes[ni][i] == ni || nodes[ni][i] == ni+1)
+        ++fnd;
+    if (fnd != 2) return false;
+  }
+  return true;
+}
+
+bool operator== (const Nodes& a, const Nodes& b) {
+  if (a.get_np() != b.get_np()) return false;
+  if (a.include_bdy() != b.include_bdy()) return false;
+  const auto an = a.get_nodes();
+  const auto bn = b.get_nodes();
+  const auto as = a.get_subnp();
+  const auto bs = b.get_subnp();
+  for (Int ni = 0; ni < a.get_nh(); ++ni) {
+    if (as[ni] != bs[ni]) return false;
+    for (Int i = 0; i < as[ni]; ++i)
+      if (an[ni][i] != bn[ni][i]) return false;
+  }
+  return true;
+}
+
+bool operator!= (const Nodes& a, const Nodes& b) { return ! (a == b); }
+
+void eval (const Int& np, const bool bdy, const Real* const xnodes,
+           const Int* subnp, Int const* const* nodes,
+           const Real& x, Real* const v) {
+  if (x > 0) {
+    eval(np, bdy, xnodes, subnp, nodes, -x, v);
+    for (int i = 0; i < np/2; ++i)
+      std::swap(v[i], v[np-i-1]);
+    return;
+  }
+  Real xsub[islet::np_max], vsub[islet::np_max];
+  const Int nreg = bdy ? np-1 : np+1;
+  const Int ios = bdy ? 1 : 0;
+  for (Int i = 0; i < nreg; ++i) {
+    if (i < np-2 && x > xnodes[i+ios]) continue;
+    if (subnp[i] == np) {
+      eval_lagrange_poly(xnodes, np, x, v);
+    } else {
+      for (Int j = 0; j < subnp[i]; ++j)
+        xsub[j] = xnodes[nodes[i][j]];
+      std::fill(v, v + np, 0);
+      eval_lagrange_poly(xsub, subnp[i], x, vsub);
+      for (Int j = 0; j < subnp[i]; ++j) {
+        const auto node = nodes[i][j];
+        assert(node >= 0);
+        assert(node < np);
+        v[node] = vsub[j];
+      }
+    }
+    break;
+  }
+}
+
+void eval (const Nodes& nodes, const Real* const xnodes, const Real& x,
+           Real* const v) {
+  eval(nodes.get_np(), nodes.include_bdy(), xnodes,
+       nodes.get_subnp(), nodes.get_nodes(),
+       x, v);
+}
+
+void eval (const Nodes& nodes, const Real& x, Real* const v) {
+  eval(nodes, islet::get_x_gll_special(nodes.get_np()), x, v);
+}
+
+void unittest_Nodes () {
+  Nodes n(10);
+  require( ! n.ok_to_eval()); // Test all regions specified checking.
+  n.set(0, {0,1,9});
+  n.set(1, {1,2,9});
+  require( ! n.ok_to_eval()); // Test all regions specified checking.
+  n.set(2, {0,2,3,9});
+  n.set(3, {0,2,3,4,9});
+  n.set(4, {0,2,4,5,6});
+  // Test order checking.
+  require(n.ok_to_eval()); n.set(1, {2,1,9}); require( ! n.ok_to_eval()); n.set(1, {1,2,9});
+  // Test interpolatory checking.
+  require(n.ok_to_eval()); n.set(2, {4,5,9}); require( ! n.ok_to_eval()); n.set(2, {0,2,3,9});
+  require(n.ok_to_eval());
+  { Nodes n1(n); require(n1.ok_to_eval()); require(n1 == n); }
+  {
+    Nodes n1(n);
+    n1.set(2, {1,2,3,9}  ); require(n1.ok_to_eval()); require(n1 != n);
+    n1.set(2, {0,1,2,3,9}); require(n1.ok_to_eval()); require(n1 != n);
+  }
+  { Nodes n1; require(n1.init(n.string())); require(n1.ok_to_eval()); require(n1 == n); }
+  {
+    Nodes n1;
+    n1.init("12 1 | 0 12: 0 1 2 3 4 5 6 7 8 9 10 11 | 1 9: 0 1 2 3 4 5 6 7 8 | "
+            "2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | "
+            "4 9: 1 2 3 4 5 6 7 8 9 | 5 10: 1 2 3 4 5 6 7 8 9 10");
+    const Int nx = 71;
+    Int ne = 0;
+    for (Int ix = 0; ix <= nx; ++ix) {
+      const Real x = -1 + Real(ix)/nx;
+      Real v1[12], v2[12];
+      npxstab<Real>::eval<12,6>({12, 9, 10, 10,  9, 10}, {0, 0, 0, 0, 1, 1}, x, v1);
+      require(n1.ok_to_eval());
+      eval(n1, x, v2);
+      for (Int i = 0; i < 12; ++i)
+        if (islet::reldif(v1[i], v2[i]) > 4*std::numeric_limits<Real>::epsilon()) ++ne;
+    }
+    require(ne == 0);
+  }
+}
diff --git a/methods/islet/islet_nodalbasis.hpp b/methods/islet/islet_nodalbasis.hpp
new file mode 100644
index 0000000..38a9ba7
--- /dev/null
+++ b/methods/islet/islet_nodalbasis.hpp
@@ -0,0 +1,59 @@
+#ifndef INCLUDE_ISLET_NODALBASIS_HPP
+#define INCLUDE_ISLET_NODALBASIS_HPP
+
+#include "islet_types.hpp"
+#include "islet_util.hpp"
+
+class Nodes {
+  Int np, nh;
+  bool include_bdy_val;
+  std::vector<Int*> nodes;
+  std::vector<Int> data, subnp;
+
+  void set_ptrs();  
+
+public:
+  typedef std::shared_ptr<Nodes> Ptr;
+
+  Nodes();
+  Nodes(const Nodes& s);
+  Nodes(const Int np_, const bool include_bdy = true);
+
+  void init(const Int np, const bool include_bdy);
+  bool init(const std::string& s);
+
+  Int get_np () const { return np; }
+  Int get_nh () const { return nh; }
+  bool include_bdy () const { return include_bdy_val; }
+  Int const* const* get_nodes () const { return nodes.data(); }
+  const Int* get_subnp () const { return subnp.data(); }
+
+  void set(const Int i, const std::initializer_list<Int>& il);
+  void set(const Int i, const std::vector<Int>& il);
+
+  template <typename IntT>
+  void set(const Int ireg, const IntT* const inodes, const Int isubnp) {
+    assert(ireg <= static_cast<Int>(nodes.size()));
+    assert(isubnp <= np);
+    subnp[ireg] = isubnp;
+    for (Int j = 0; j < isubnp; ++j) nodes[ireg][j] = inodes[j];
+  }
+
+  bool ok_to_eval() const;
+
+  std::string string(const bool newline = true) const;
+};
+
+bool operator==(const Nodes&, const Nodes&);
+bool operator!=(const Nodes&, const Nodes&);
+
+void eval(const Int& np, const bool bdy, const Real* const xnodes,
+          const Int* subnp, Int const* const* nodes,
+          const Real& x, Real* const v);
+void eval(const Nodes& nodes, const Real* const xnodes,
+          const Real& x, Real* const v);
+void eval(const Nodes& nodes, const Real& x, Real* const v);
+
+void unittest_Nodes();
+
+#endif
diff --git a/methods/islet/islet_np4.cpp b/methods/islet/islet_np4.cpp
new file mode 100644
index 0000000..b2f4894
--- /dev/null
+++ b/methods/islet/islet_np4.cpp
@@ -0,0 +1,46 @@
+#include "islet_np4.hpp"
+#include "islet_isl.hpp"
+
+static const Real oosqrt5 = 0.44721359549995793928;
+
+static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup,
+                                const Real& x) {
+  Real y = 0;
+  for (int i = 0; i < n; ++i) {
+    Real f = 1;
+    for (int j = 0; j < n; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - xsup[j]) / (xsup[i] - xsup[j]);
+    y += f*ysup[i];
+  }
+  return y;
+}
+
+Np4InterpMethod::Np4InterpMethod (Real c0, Real c1, Real c2) {
+  reset_c(c0, c1, c2);
+}
+
+void Np4InterpMethod::reset_c (Real c0, Real c1, Real c2) {
+  c[0] = c0; c[1] = c1; c[2] = c2;
+}
+
+Real Np4InterpMethod::eval_a (const Real& x) const {
+  return eval_lagrange_poly(3, islet::get_x_gll(3), c,
+                            2*(1 - std::abs(x))/(1 - oosqrt5) - 1);
+}
+
+void Np4InterpMethod::eval (const Real& x, Real* const y) {
+  const auto* x_gll = get_xnodes();
+  if (x < -oosqrt5 || x > oosqrt5) {
+    y[0] = y[3] = 0;
+    const Int os = x < -oosqrt5 ? 0 : 1;
+    islet::eval_lagrange_poly(x_gll + os, 3, x, y + os);
+    Real y4[4];
+    islet::eval_lagrange_poly(x_gll, 4, x, y4);
+    const Real a = eval_a(x);
+    for (int i = 0; i < 4; ++i)
+      y[i] = a*y[i] + (1 - a)*y4[i];
+  } else
+    islet::eval_lagrange_poly(x_gll, 4, x, y);
+}
diff --git a/methods/islet/islet_np4.hpp b/methods/islet/islet_np4.hpp
new file mode 100644
index 0000000..eb327cc
--- /dev/null
+++ b/methods/islet/islet_np4.hpp
@@ -0,0 +1,22 @@
+#ifndef INCLUDE_ISLET_NP4_HPP
+#define INCLUDE_ISLET_NP4_HPP
+
+#include "islet_types.hpp"
+#include "islet_isl.hpp"
+#include "islet_interpmethod.hpp"
+
+struct Np4InterpMethod : public UserInterpMethod {
+  typedef std::shared_ptr<Np4InterpMethod> Ptr;
+
+  Np4InterpMethod(Real c0, Real c1, Real c2);
+  void reset_c(Real c0, Real c1, Real c2);
+  Real eval_a(const Real& x) const;
+  void eval(const Real& x, Real* const y);
+  const Real* get_xnodes () const override { return islet::get_x_gll(4); }
+  Int get_np () const override { return 4; }
+
+private:
+  Real c[3];
+};
+
+#endif
diff --git a/methods/islet/islet_npx.cpp b/methods/islet/islet_npx.cpp
new file mode 100644
index 0000000..8eb00d0
--- /dev/null
+++ b/methods/islet/islet_npx.cpp
@@ -0,0 +1,35 @@
+#include "islet_npx.hpp"
+#include "islet_util.hpp"
+
+void op_eval (const InterpMethod& im, const Real a_src, Real* v) {
+  switch (im.type) {
+  case InterpMethod::npx: npx<Real>::eval(im.np, a_src, v); break;
+  case InterpMethod::npxstab: npxstab<Real>::eval(im.np, a_src, v); break;
+  case InterpMethod::user: im.uim->eval(a_src, v); break;
+  default:
+    throw_if(true, "op_eval: invalid im.type " << im.type);
+  }
+}
+
+template <typename Scalar>
+void npxstab<Scalar>::eval (const Int& np, const Scalar& x, Scalar* const v) {
+  switch (np) {                                                   // order of accuracy
+  case  2: return eval< 2,0>({                      }, {                }, x, v); // 1
+  case  3: return eval< 3,0>({                      }, {                }, x, v); // 2
+  case  4: return eval< 4,1>({ 3                    }, {0,              }, x, v); // 2
+  case  5: return eval< 5,2>({ 3,  4                }, {0, 0            }, x, v); // 2
+  case  6: return eval< 6,2>({ 6,  5                }, {0, 0            }, x, v); // 4
+  case  7: return eval< 7,3>({ 5,  5,  6            }, {0, 0, 0         }, x, v); // 4
+  case  8: return eval< 8,4>({ 6,  6,  7,  6        }, {0, 0, 0, 1      }, x, v); // 5
+  case  9: return eval< 9,4>({ 7,  8,  7,  7        }, {0, 0, 0, 1      }, x, v); // 6
+  case 10: return eval<10,5>({ 7,  7,  7,  8,  8    }, {0, 0, 0, 0, 1   }, x, v); // 6
+  case 11: return eval<11,5>({ 8,  9,  8,  9,  8    }, {0, 0, 0, 0, 1   }, x, v); // 7
+  case 12: return eval<12,6>({ 9,  9, 10, 10,  9, 10}, {0, 0, 0, 0, 1, 1}, x, v); // 8
+  case 13: return eval<13,6>({10, 10, 10, 10, 11, 10}, {0, 0, 0, 0, 0, 1}, x, v); // 9
+  case 16: return eval<16,8>({12, 13, 13, 13, 13, 14, 13, 12},
+                              { 0,  0,  0,  0,  0,  0,  1,  2}, x, v); // 11
+  default: throw_if(true, "Only 2 <= np <= 13, np = 16 are supported.");
+  }
+}
+
+template class npxstab<Real>;
diff --git a/methods/islet/islet_npx.hpp b/methods/islet/islet_npx.hpp
new file mode 100644
index 0000000..2c24b8a
--- /dev/null
+++ b/methods/islet/islet_npx.hpp
@@ -0,0 +1,137 @@
+#ifndef INCLUDE_ISLET_NPX_HPP
+#define INCLUDE_ISLET_NPX_HPP
+
+#include "islet_util.hpp"
+#include "islet_types.hpp"
+#include "islet_tables.hpp"
+#include "islet_interpmethod.hpp"
+
+template <typename Scalar>
+void eval_lagrange_poly (const Scalar* x_gll, const Int& np, const Scalar& x,
+                         Scalar* const y) {
+  for (int i = 0; i < np; ++i) {
+    Scalar f = 1;
+    for (int j = 0; j < np; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - x_gll[j]) / (x_gll[i] - x_gll[j]);
+    y[i] = f;
+  }
+}
+
+template <typename Scalar=Real>
+struct npx {
+  static void eval (const Int np, const Scalar& x, Scalar* const v) {
+    eval_lagrange_poly(islet::get_x_gll(np), np, x, v);
+  }
+};
+
+template <typename Scalar=Real>
+struct npxstab : public npx<Scalar> {
+  template <int np, int nreg>
+  static void eval (const std::array<int, nreg>& order,
+                    const std::array<int, nreg>& os,
+                    const Scalar& x, Scalar* const v) {
+    if (x > 0) {
+      eval<np, nreg>(order, os, -x, v);
+      for (int i = 0; i < np/2; ++i)
+        std::swap(v[i], v[np-i-1]);
+      return;
+    }
+    const auto x_gll = islet::get_x_gll(np);
+    bool done = false;
+    for (Int i = 0; i < nreg; ++i)
+      if (x < x_gll[i+1]) {
+        std::fill(v, v + np, 0);
+        eval_lagrange_poly(x_gll + os[i], order[i], x, v + os[i]);
+        done = true;
+        break;
+      }
+    if ( ! done)
+      eval_lagrange_poly(x_gll, np, x, v);
+  }
+
+  static void eval(const Int& np, const Scalar& x, Scalar* const v);
+
+  static int ooa_vs_np (const int np) {
+    if (np == 5) return 2;
+    return np - 1 - ((np-1)/3);
+  }
+
+  template <int np, int nreg, int alphanp>
+  static void eval (const std::array<int, nreg>& subnp,
+                    const std::array<int, nreg>& os,
+                    const std::array<Scalar, nreg*alphanp>& alphac,
+                    const Scalar& x, Scalar* const v) {
+    if (x > 0) {
+      eval<np, nreg, alphanp>(subnp, os, alphac, -x, v);
+      for (int i = 0; i < np/2; ++i)
+        std::swap(v[i], v[np-i-1]);
+      return;
+    }
+    const auto x_gll = islet::get_x_gll(np);
+    bool done = false;
+    for (int i = 0; i < nreg; ++i)
+      if (x < x_gll[i+1]) {
+        eval_lagrange_poly(x_gll, np, x, v);
+        if (subnp[i] < np) {
+          Real w[12] = {0};
+          eval_lagrange_poly(x_gll + os[i], subnp[i], x, w + os[i]);
+          Real alpha = 0;
+          if (alphanp == 1)
+            alpha = alphac[i];
+          else {
+            assert(alphanp <= 3);
+            const auto alpha_r_gll = islet::get_x_gll(alphanp);
+            const auto r = (x - x_gll[i]) / (x_gll[i+1] - x_gll[i]);
+            Real a[3];
+            eval_lagrange_poly(alpha_r_gll, alphanp, r, a);
+            for (int j = 0; j < alphanp; ++j)
+              alpha += alphac[alphanp*i + j] * a[j];
+          }
+          for (int j = 0; j < np; ++j)
+            v[j] = alpha*v[j] + (1 - alpha)*w[j];
+        }
+        done = true;
+        break;
+      }
+    if ( ! done)
+      eval_lagrange_poly(x_gll, np, x, v);
+  }
+};
+
+struct InterpMethod {
+  typedef std::shared_ptr<InterpMethod> Ptr;
+
+  enum Type { notype, npx, npxstab, user };
+  Int np;
+  Type type;
+  std::shared_ptr<UserInterpMethod> uim;
+
+  static Type convert (const std::string& s) {
+    if (s == "npx") return npx;
+    if (s == "npxstab") return npxstab;
+    if (s == "user") return user;
+    throw std::runtime_error(std::string("Not an InterpMethod::Type: ") + s);
+  }
+
+  static std::string convert (const Type& t) {
+    if (t == npx) return "npx";
+    if (t == npxstab) return "npxstab";
+    if (t == user) return "user";
+    throw std::runtime_error("Not an InterpMethod::Type.");
+  }
+
+  InterpMethod () : np(-1), type(notype) {}
+  InterpMethod (Int inp, Type itype) : np(inp), type(itype) {}
+  InterpMethod (const std::shared_ptr<UserInterpMethod>& iuim)
+    : np(iuim->get_np()), type(user), uim(iuim) {}
+
+  const Real* get_xnodes() const {
+    return uim ? uim->get_xnodes() : islet::get_x_gll(np);
+  };
+};
+
+void op_eval(const InterpMethod& im, const Real a_src, Real* v);
+
+#endif
diff --git a/methods/islet/islet_pum.cpp b/methods/islet/islet_pum.cpp
new file mode 100644
index 0000000..8162ce7
--- /dev/null
+++ b/methods/islet/islet_pum.cpp
@@ -0,0 +1,226 @@
+#include "islet_pum.hpp"
+
+#include <omp.h>
+
+#include "islet_util.hpp"
+#include "islet_isl.hpp"
+
+namespace pum {
+
+static void init_xb (const Int ne, const Real& perturb, Real* const xb) {
+  for (Int i = 0; i <= ne; ++i)
+    xb[i] = 2*(Real(i)/ne) - 1;
+  for (Int i = 1; i < ne; ++i)
+    xb[i] += (perturb/ne)*(2*islet::urand() - 1);
+}
+
+static void fill_xgrid (const Int ne, const Real* const xb,
+                        const Int np, const Real* const xnodes,
+                        Real* const x) {
+  for (Int i = 0; i < ne; ++i)
+    for (Int j = 0; j < np; ++j) {
+      const auto alpha = (1 + xnodes[j])/2;
+      x[i*(np - 1) + j] = (1 - alpha)*xb[i] + alpha*xb[i+1];
+    }
+}
+
+static void prep_for_eval (const Int ne, const Real* const xb, const Int np,
+                           const Real& x, Int& ie, Real& xr, Real* const v) {
+  for (ie = 0; ie < ne; ++ie)
+    if (x >= xb[ie] && (ie == ne-1 || x < xb[ie+1]))
+      break;
+  assert(ie >= 0 && ie < ne);
+  xr = 2*(x - xb[ie]) / (xb[ie+1] - xb[ie]) - 1;
+  std::fill(v, v + ne*(np-1) + 1, 0);
+}
+
+Options::Options ()
+  : threaded(false),
+    ne(3), ntrial(111), mec_ne(11),
+    perturb(0.05)
+{}
+
+PerturbedUniformMeshMetric
+::PerturbedUniformMeshMetric (const InterpMethod::Ptr& im, const Options opts_)
+  : opts(opts_), base_im(im), mec(opts.threaded)
+{ init(); }
+
+PerturbedUniformMeshMetric
+::PerturbedUniformMeshMetric (const UserInterpMethod::Ptr& uim, const Options opts_)
+  : opts(opts_), mec(opts.threaded)
+{
+  base_im = std::make_shared<InterpMethod>(uim);
+  init();
+}
+
+void PerturbedUniformMeshMetric::reset_opts (const Options& o) {
+  opts = o;
+  init();
+}
+
+void PerturbedUniformMeshMetric::init () {
+  xbs.resize(opts.threaded ? omp_get_max_threads() : 1);
+  for (auto& xb : xbs) xb.resize(opts.ne+1);
+  xnodess.resize(opts.threaded ? omp_get_max_threads() : 1);
+  for (auto& xnodes : xnodess) xnodes.resize(get_np());
+}
+
+void PerturbedUniformMeshMetric::eval(const Real& x, Real* const v) {
+  Int ie;
+  Real xr;
+  const Int tid = opts.threaded ? omp_get_thread_num() : 0;
+  prep_for_eval(opts.ne, xbs[tid].data(), base_im->np,
+                x, ie, xr, v);
+  op_eval(*base_im, xr, v + ie*(base_im->np-1));
+}
+
+const Real* PerturbedUniformMeshMetric::get_xnodes() const {
+  const Int tid = opts.threaded ? omp_get_thread_num() : 0;
+  return xnodess[tid].data();
+}
+
+Int PerturbedUniformMeshMetric::get_np() const {
+  return (xbs[0].size() - 1)*(base_im->np - 1) + 1;
+}
+
+Real PerturbedUniformMeshMetric::run (const Real stop_if_above,
+                                      const bool one_elem_hop_only) {
+  assert(base_im->np > 0);
+  std::vector<Real> dxs; {
+    dxs.push_back(1.0/opts.ne);
+    if ( ! one_elem_hop_only) {
+      for (const auto dx : {2.0/opts.ne, 0.5/opts.ne, 0.5})
+        dxs.push_back(dx);
+      const Real* xnodes = base_im->get_xnodes();
+      for (int i = 0; i < base_im->np; ++i) {
+        const Real xn = xnodes[i];
+        if (xn == 1 || xn == -1) continue;
+        dxs.push_back((0.5*(1 - xn))/opts.ne);
+      }
+    }
+  }
+  const auto run1 = [&] () {
+    const int tid = opts.threaded ? omp_get_thread_num() : 0;
+    auto& xb = xbs[tid];
+    auto& xnodes = xnodess[tid];
+    init_xb(opts.ne, opts.perturb, xb.data());
+    fill_xgrid(opts.ne, xb.data(), base_im->np, base_im->get_xnodes(),
+               xnodes.data());
+    MaxEigComputer mec(false);
+    Real mea, max_mea = 0;
+    for (const auto dx : dxs) {
+      mec.compute(this, dx, opts.mec_ne, &mea);
+      max_mea = std::max(max_mea, mea);
+      if (max_mea > 1 + stop_if_above) break;
+    }
+    return max_mea;
+  };
+  Real max_mea = 0;
+  if (opts.threaded) {
+#   pragma omp parallel for
+    for (Int trial = 0; trial < opts.ntrial; ++trial) {
+      if (max_mea > 1 + stop_if_above) continue;
+      const auto mea = run1();
+      if (mea > max_mea) {
+#       pragma omp critical (PerturbedUniformMeshMetric_run)
+        max_mea = std::max(max_mea, mea);
+#       pragma omp flush
+      }
+    }
+  } else {
+    for (Int trial = 0; trial < opts.ntrial; ++trial) {
+      const auto mea = run1();
+      max_mea = std::max(max_mea, mea);
+      if (max_mea > 1 + stop_if_above) break;
+    }    
+  }
+  return max_mea - 1;
+}
+
+void PerturbedUniformMeshMetric
+::sweep_and_collect_amplitudes (
+  const Int npts, const Real threshold, std::map<Real,Real>& dx2meam1,
+  const bool verbose)
+{
+  std::vector<Real> dxs; {
+    for (const auto dx : {1.0/opts.ne, 0.5/opts.ne})
+      dxs.push_back(dx);
+    const Real* xnodes = base_im->get_xnodes();
+    for (Int i = 0; i < base_im->np; ++i) {
+      const Real xn = xnodes[i];
+      if (xn == 1 || xn == -1) continue;
+      dxs.push_back((0.5*(1 - xn))/opts.ne);
+    }
+    for (Int i = 1; i < npts; ++i)
+      dxs.push_back(Real(i)/(opts.ne*npts));
+  }
+  const Int ndx = dxs.size();
+# pragma omp parallel for schedule(static,1)
+  for (Int trial = 0; trial < opts.ntrial; ++trial) {
+    const int tid = omp_get_thread_num();
+    auto& xb = xbs[tid];
+    auto& xnodes = xnodess[tid];
+    init_xb(opts.ne, opts.perturb, xb.data());
+    fill_xgrid(opts.ne, xb.data(), base_im->np, base_im->get_xnodes(), xnodes.data());
+    for (Int i = 0; i < ndx; ++i) {
+      MaxEigComputer mec(false /* threaded */);
+      Real mea;
+      mec.compute(this, dxs[i], opts.mec_ne, &mea);
+      if (mea >= 1 + threshold) {
+#       pragma omp critical
+        {
+          const Real dx = dxs[i]*opts.ne;
+          bool insert = false;
+          if (dx2meam1.find(dx) != dx2meam1.end()) {
+            const Real prev = dx2meam1[dx];
+            if (mea > 1 + prev) {
+              dx2meam1[dx] = mea-1;
+              insert = true;
+            }
+          } else {
+            dx2meam1[dx] = mea-1;
+            insert = true;
+          }
+          if (insert && verbose) printf("dx %1.16e meam1 %1.16e\n", dx, mea-1);
+        }
+      }
+    }
+  }
+}
+
+void demo () {
+  for (const Int np : {4, 5, 6, 7, 8, 9, 10, 11, 12, 13}) {
+    Real pum_meam1_m0 = 0;
+    for (Int method = 0; method < 2; ++method) {
+      if (method == 1 && (np == 9 || np == 11)) continue;
+      const auto oim = std::make_shared<islet::OperatorInterpMethod>(
+        np, islet::Operator::create(method == 0 ?
+                                    islet::Operator::gll_offset_nodal_subset :
+                                    islet::Operator::xnodal));
+      const auto im = std::make_shared<InterpMethod>(oim);
+      Real pum_meam1; {
+        pum::Options o;
+        o.threaded = true;
+        o.ne = 4;
+        o.ntrial = 71;
+        o.mec_ne = 3;
+        o.perturb = 0.01;
+        pum::PerturbedUniformMeshMetric pum(im, o);
+        pum_meam1 = pum.run();
+      }
+      if (method == 0) pum_meam1_m0 = pum_meam1;
+      Real meam1 = -1; {
+        // Check meam1 to be sure we transcribed the new methods correctly.
+        MaxEigComputer mec;
+        const Int ns = 4111;
+        meam1 = mec.run(np, ns, ns, 1e-14, true, im->uim);
+      }
+      printf("np %2d method %d meam1 %10.3e pum_meam1 %10.3e",
+             np, method, meam1, pum_meam1);
+      if (method == 0) printf("\n");
+      else printf(" better %6.2f\n", pum_meam1_m0/pum_meam1);
+    }
+  }
+}
+
+} // namespace pum
diff --git a/methods/islet/islet_pum.hpp b/methods/islet/islet_pum.hpp
new file mode 100644
index 0000000..fb9f2f4
--- /dev/null
+++ b/methods/islet/islet_pum.hpp
@@ -0,0 +1,56 @@
+#ifndef INCLUDE_ISLET_PUM_HPP
+#define INCLUDE_ISLET_PUM_HPP
+
+#include <map>
+
+#include "islet_types.hpp"
+#include "islet_maxeigcomp.hpp"
+
+namespace pum {
+
+struct Options {
+  bool threaded;
+  Int ne, ntrial, mec_ne;
+  Real perturb;
+  Options();
+};
+
+struct PerturbedUniformMeshMetric : public UserInterpMethod {
+  typedef std::shared_ptr<PerturbedUniformMeshMetric> Ptr;
+
+  PerturbedUniformMeshMetric(const InterpMethod::Ptr& im,
+                             const Options opts = Options());
+  PerturbedUniformMeshMetric(const UserInterpMethod::Ptr& im,
+                             const Options opts = Options());
+  Real run(Real stop_if_above = 1e3, const bool one_elem_hop_only = false);
+
+  // Can't reset opts.threaded
+  void reset_opts(const Options& o);
+
+  // UserInterpMethod interface
+  void eval(const Real& x, Real* const v) override;
+  const Real* get_xnodes() const override;
+  Int get_np() const override;
+
+  // Illustrate why a 1-element hop is the key thing to study.
+  void sweep_and_collect_amplitudes(
+    const Int npts, const Real threshold,
+    // Report meam1 at dx in [0,1] if meam1 >= threshold. This routine does not
+    // clear what is already in dx2meam1.
+    std::map<Real,Real>& dx2meam1,
+    const bool verbose = true);
+
+private:
+  Options opts;
+  InterpMethod::Ptr base_im;
+  MaxEigComputer mec;
+  std::vector<std::vector<Real> > xbs, xnodess;
+
+  void init();
+};
+
+void demo();
+
+} // namespace pum
+
+#endif
diff --git a/methods/islet/islet_studymetrics.cpp b/methods/islet/islet_studymetrics.cpp
new file mode 100644
index 0000000..8662384
--- /dev/null
+++ b/methods/islet/islet_studymetrics.cpp
@@ -0,0 +1,134 @@
+#include "islet_pum.hpp"
+#include "islet_xnodes_metrics.hpp"
+
+namespace {
+
+Nodes make_offset_nodal (int np, int n, const int* subnp, const int* offst) {
+  Nodes nodes(np);
+  const auto nh = nodes.get_nh();
+  std::vector<Int> ns;
+  ns.reserve(np);
+  for (Int ireg = 0; ireg < nh; ++ireg) {
+    if (ireg < n) {
+      ns.resize(subnp[ireg]);
+      for (Int i = 0; i < subnp[ireg]; ++i)
+        ns[i] = offst[ireg] + i;
+    } else {
+      ns.resize(np);
+      for (Int i = 0; i < np; ++i) ns[i] = i;
+    }
+    nodes.set(ireg, ns);
+  }
+  return nodes;
+}
+
+bool read_xnodes (const Int np, const std::string& s, Real* const xnodes) {
+  const auto p = s.find("x");
+  if (p == std::string::npos) return false;
+  std::stringstream sx(s.substr(p+1));
+  for (Int i = 0; i < np; ++i) {
+    if (sx.rdstate() & std::istream::eofbit) return false;
+    sx >> xnodes[i];
+    if (sx.rdstate() & std::istream::failbit) return false;
+  }
+  return true;
+}
+
+class Basis : public UserInterpMethod {
+  Nodes nodes;
+  bool ok, free_nodal;
+  Real xnodes[islet::np_max];
+
+public:
+  Basis (const std::string& basis) {
+    ok = nodes.init(basis);
+    if ( ! ok) {
+      printf("Invalid basis string: %s\n", basis.c_str());
+      return;
+    }
+    free_nodal = read_xnodes(nodes.get_np(), basis, xnodes);
+  }
+  bool is_ok () const { return ok; }
+  void eval(const Real& x, Real* const v) override {
+    ::eval(nodes, get_xnodes(), x, v);
+  }
+  const Real* get_xnodes() const override {
+    return free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np());
+  }
+  Int get_np() const override { return nodes.get_np(); }
+};
+
+} // namespace anon
+
+extern "C" {
+
+void offset_nodal_calc_xnodes_metrics (int np, int n, const int* subnp, const int* offst,
+                                       double* metrics) {
+  const auto nodes = make_offset_nodal(np, n, subnp, offst);
+  calc_xnodes_metrics(nodes, islet::get_x_gll(np), metrics);
+}
+
+void calc_xnodes_metrics_from_basis_string (const char* basis, double* metrics) {
+  Nodes nodes;
+  const auto ok = nodes.init(basis);
+  if ( ! ok) {
+    printf("Invalid basis string: %s\n", basis);
+    return;
+  }
+  Real xnodes[islet::np_max];
+  const auto free_nodal = read_xnodes(nodes.get_np(), basis, xnodes);
+  calc_xnodes_metrics(nodes,
+                      free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np()),
+                      metrics);
+}
+
+void calc_lebesgue_consts_from_basis_string (const char* basis, double* metrics) {
+  Nodes nodes;
+  const auto ok = nodes.init(basis);
+  if ( ! ok) {
+    printf("Invalid basis string: %s\n", basis);
+    return;
+  }
+  Real xnodes[islet::np_max];
+  const auto free_nodal = read_xnodes(nodes.get_np(), basis, xnodes);
+  calc_lebesgue_consts(nodes,
+                       free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np()),
+                       metrics);
+}
+
+void run_thorough_diagnostics_from_basis_string (const char* basis) {
+  {
+    Real m[3];
+    calc_xnodes_metrics_from_basis_string(basis, m);
+    printf("npm %1.4e %1.4e %1.4e\n", m[0], m[1], m[2]);
+  }
+  static const int ne_max = 11111;
+  pum::Options po;
+  po.threaded = true;
+  po.ntrial = 33;
+  po.mec_ne = 333;
+  po.perturb = 0.01;
+  printf("ne,ndx_max %d po.ntrial %d po.mec_ne %d po.perturb %1.4f\n",
+         ne_max, po.ntrial, po.mec_ne, po.perturb);
+  auto b = std::make_shared<Basis>(basis);
+  if ( ! b->is_ok()) return;
+  {
+    MaxEigComputer mec;
+    const auto meam1 = mec.run(b->get_np(), ne_max, ne_max, 1e-13, true, b);
+    printf("meam1 %1.4e\n", meam1);
+  }
+  {
+    Real pum_max = 0;
+    printf("pum:"); fflush(stdout);
+    for (Int ne = 3; ne <= 15; ++ne) {
+      po.ne = ne;
+      pum::PerturbedUniformMeshMetric pum(b, po);
+      const auto pum_val = pum.run();
+      printf(" %1.1e", pum_val); fflush(stdout);
+      pum_max = std::max(pum_max, pum_val);
+    }
+    printf("\npum_max %1.4e\n", pum_max);
+  }
+}
+
+} // extern "C"
diff --git a/methods/islet/islet_studymetrics.hpp b/methods/islet/islet_studymetrics.hpp
new file mode 100644
index 0000000..9029932
--- /dev/null
+++ b/methods/islet/islet_studymetrics.hpp
@@ -0,0 +1,6 @@
+#ifndef INCLUDE_ISLET_STUDYMETRICS_HPP
+#define INCLUDE_ISLET_STUDYMETRICS_HPP
+
+extern "C" void run_thorough_diagnostics_from_basis_string(const char* basis);
+
+#endif
diff --git a/methods/islet/islet_tables.cpp b/methods/islet/islet_tables.cpp
new file mode 100644
index 0000000..c66366f
--- /dev/null
+++ b/methods/islet/islet_tables.cpp
@@ -0,0 +1,268 @@
+#include "islet_types.hpp"
+#include "islet_util.hpp"
+#include "islet_tables.hpp"
+
+namespace islet {
+
+static const Real sqrt5 = std::sqrt(5.0);
+static const Real oosqrt5 = 1.0/sqrt5;
+static const Real sqrt3o7 = std::sqrt(3.0/7.0);
+static const Real np6a = std::sqrt(1.0/3.0 + 2.0*std::sqrt(7.0)/21.0);
+static const Real np6b = std::sqrt(1.0/3.0 - 2.0*std::sqrt(7.0)/21.0);
+static const Real np7a = std::sqrt((5.0 + 2.0*std::sqrt(5.0/3.0))/11.0);
+static const Real np7b = std::sqrt((5.0 - 2.0*std::sqrt(5.0/3.0))/11.0);
+
+Real x_gll_table[] = {
+  -1, 1,
+  -1, 0, 1,
+  -1, -oosqrt5, oosqrt5, 1,
+  -1, -sqrt3o7, 0, sqrt3o7, 1,
+  -1, -np6a, -np6b, np6b, np6a, 1,
+  -1, -np7a, -np7b, 0, np7b, np7a, 1,
+  // The rest are obtained from Michels, H. H. "Abscissas and weight
+  // coefficients for Lobatto quadrature." Mathematics of Computation 17.83
+  // (1963): 237-244.
+  // np 8
+  -1, -0.8717401485096066153, -0.59170018143314230214, -0.20929921790247886877,
+  0.20929921790247886877, 0.59170018143314230214, 0.87174014850960661534, 1,
+  // np 9
+  -1, -0.89975799541146015731, -0.67718627951073775345, -0.36311746382617815871,
+  0, 0.36311746382617815871, 0.67718627951073775345, 0.89975799541146015731, 1,
+  // np 10
+  -1, -0.91953390816645881383, -0.73877386510550507500, -0.47792494981044449566,
+  -0.16527895766638702463, 0.16527895766638702463, 0.47792494981044449566,
+  0.73877386510550507500, 0.91953390816645881383, 1,
+  // np 11
+  -1, -0.93400143040805913433, -0.78448347366314441862, -0.56523532699620500647,
+  -0.29575813558693939143, 0, 0.29575813558693939143, 0.56523532699620500647,
+  0.78448347366314441862, 0.93400143040805913433, 1,
+  // np 12
+  -1, -0.94489927222288222341, -0.81927932164400667835, -0.63287615303186067766,
+  -0.39953094096534893226, -0.13655293285492755486, 0.13655293285492755486,
+  0.39953094096534893226, 0.63287615303186067766, 0.81927932164400667835,
+  0.94489927222288222341, 1,
+  // np 13
+  -1, -0.95330984664216391190, -0.84634756465187231687, -0.68618846908175742607,
+  -0.48290982109133620175, -0.24928693010623999257, 0, 0.24928693010623999257,
+  0.48290982109133620175, 0.68618846908175742607, 0.84634756465187231687,
+  0.95330984664216391190, 1,
+  // np 16
+  -1, -0.96956804627021793295, -0.89920053309347209299, -0.79200829186181506393,
+  -0.65238870288249308947, -0.48605942188713761178, -0.29983046890076320810,
+  -0.10132627352194944784, 0.10132627352194944784, 0.29983046890076320810,
+  0.48605942188713761178, 0.65238870288249308947, 0.79200829186181506393,
+  0.89920053309347209299, 0.96956804627021793295, 1
+};
+
+Real w_gll_table[] = {
+  1, 1,
+#define v0 1.0/3.0
+  v0, 4.0/3.0, v0,
+#undef v0
+#define v0 1.0/6.0
+#define v1 5.0/6.0
+  v0, v1, v1, v0,
+#undef v0
+#undef v1
+#define v0 1.0/10.0
+#define v1 49.0/90.0
+  v0, v1, 32.0/45.0, v1, v0,
+#undef v0
+#undef v1
+#define v0 1.0/15.0
+#define v1 (14 - std::sqrt(7.0))/30.0
+#define v2 (14 + std::sqrt(7.0))/30.0
+  v0, v1, v2, v2, v1, v0,
+#undef v0
+#undef v1
+#undef v2
+#define v0 1.0/21.0
+#define v1 (124 - 7*std::sqrt(15.0))/350.0
+#define v2 (124 + 7*std::sqrt(15.0))/350.0
+  v0, v1, v2, 256.0/525.0, v2, v1, v0,
+#undef v0
+#undef v1
+#undef v2
+  // The rest are obtained from the reference in x_gll_table.
+  // np 8
+  0.03571428571428571429, 0.21070422714350603938, 0.34112269248350436476,
+  0.41245879465870388157, 0.41245879465870388157, 0.34112269248350436476,
+  0.21070422714350603938, 0.03571428571428571429,
+  // np 9
+  0.02777777777777777778, 0.16549536156080552505, 0.27453871250016173528,
+  0.34642851097304634512, 0.37151927437641723356, 0.34642851097304634512,
+  0.27453871250016173528, 0.16549536156080552505, 0.02777777777777777778,
+  // np 10
+  0.02222222222222222222, 0.13330599085107011113, 0.22488934206312645212,
+  0.29204268367968375788, 0.32753976118389745666, 0.32753976118389745666,
+  0.29204268367968375788, 0.22488934206312645212, 0.13330599085107011113,
+  0.02222222222222222222,
+  // np 11
+  0.01818181818181818182, 0.10961227326699486446, 0.18716988178030520411,
+  0.24804810426402831404, 0.28687912477900808868, 0.30021759545569069379,
+  0.28687912477900808868, 0.24804810426402831404, 0.18716988178030520411,
+  0.10961227326699486446, 0.01818181818181818182,
+  // np 12
+  0.01515151515151515152, 0.09168451741319613067, 0.15797470556437011517,
+  0.21250841776102114536, 0.25127560319920128029, 0.27140524091069617700,
+  0.27140524091069617700, 0.25127560319920128029, 0.21250841776102114536,
+  0.15797470556437011517, 0.09168451741319613067, 0.01515151515151515152,
+  // np 13
+  0.01282051282051282051, 0.07780168674681892779, 0.13498192668960834912,
+  0.18364686520355009201, 0.22076779356611008609, 0.24401579030667635646,
+  0.25193084933344673604, 0.24401579030667635646, 0.22076779356611008609,
+  0.18364686520355009201, 0.13498192668960834912, 0.07780168674681892779,
+  0.01282051282051282051,
+  // np 16
+  0.00833333333333333333, 0.05085036100591990540, 0.08939369732593080099,
+  0.12425538213251409835, 0.15402698080716428081, 0.17749191339170412530,
+  0.19369002382520358432, 0.20195830817822987149, 0.20195830817822987149,
+  0.19369002382520358432, 0.17749191339170412530, 0.15402698080716428081,
+  0.12425538213251409835, 0.08939369732593080099, 0.05085036100591990540,
+  0.00833333333333333333
+};
+
+/* These Gauss-Legendre tables were obtained using
+   $ gsl-config --version
+   1.15
+   with calls to
+   gsl_integration_glfixed_table_alloc
+   gsl_integration_glfixed_point
+       gsl_integration_glfixed_table_free.
+ */
+
+Real x_gl_table[] = {
+  // np 1
+  0.000000000000000000,
+  // np 2
+  -0.577350269189625731,  0.577350269189625731,
+  // np 3
+  -0.774596669241483404,  0.000000000000000000,  0.774596669241483404,
+  // np 4
+  -0.861136311594052573, -0.339981043584856257,  0.339981043584856257,
+  0.861136311594052573,
+  // np 5
+  -0.906179845938663964, -0.538469310105683108,  0.000000000000000000,
+  0.538469310105683108,  0.906179845938663964,
+  // np 6
+  -0.932469514203152050, -0.661209386466264482, -0.238619186083196905,
+  0.238619186083196905,  0.661209386466264482,  0.932469514203152050,
+  // np 7
+  -0.949107912342758486, -0.741531185599394460, -0.405845151377397184,
+  0.000000000000000000,  0.405845151377397184,  0.741531185599394460,
+  0.949107912342758486,
+  // np 8
+  -0.960289856497536287, -0.796666477413626728, -0.525532409916328991,
+  -0.183434642495649808,  0.183434642495649808,  0.525532409916328991,
+  0.796666477413626728,  0.960289856497536287,
+  // np 9
+  -0.968160239507626086, -0.836031107326635770, -0.613371432700590358,
+  -0.324253423403808916,  0.000000000000000000,  0.324253423403808916,
+  0.613371432700590358,  0.836031107326635770,  0.968160239507626086,
+  // np 10
+  -0.973906528517171743, -0.865063366688984536, -0.679409568299024436,
+  -0.433395394129247213, -0.148874338981631216,  0.148874338981631216,
+  0.433395394129247213,  0.679409568299024436,  0.865063366688984536,
+  0.973906528517171743,
+  // np 11
+  -0.978228658146056973, -0.887062599768095317, -0.730152005574049356,
+  -0.519096129206811807, -0.269543155952344959,  0.000000000000000000,
+  0.269543155952344959,  0.519096129206811807,  0.730152005574049356,
+  0.887062599768095317,  0.978228658146056973,
+  // np 12
+  -0.981560634246719244, -0.904117256370474909, -0.769902674194304693,
+  -0.587317954286617483, -0.367831498998180184, -0.125233408511468913,
+  0.125233408511468913,  0.367831498998180184,  0.587317954286617483,
+  0.769902674194304693,  0.904117256370474909,  0.981560634246719244  
+};
+
+Real w_gl_table[] = {
+  // np 1
+  2.000000000000000000,
+  // np 2
+  1.000000000000000000,  1.000000000000000000,
+  // np 3
+  0.555555555555555580,  0.888888888888888840,  0.555555555555555580,
+  // np 4
+  0.347854845137453850,  0.652145154862546095,  0.652145154862546095,
+  0.347854845137453850,
+  // np 5
+  0.236926885056189085,  0.478628670499366471,  0.568888888888888888,
+  0.478628670499366471,  0.236926885056189085,
+  // np 6
+  0.171324492379170357,  0.360761573048138606,  0.467913934572691037,
+  0.467913934572691037,  0.360761573048138606,  0.171324492379170357,
+  // np 7
+  0.129484966168869703,  0.279705391489276645,  0.381830050505118923,
+  0.417959183673469403,  0.381830050505118923,  0.279705391489276645,
+  0.129484966168869703,
+  // np 8
+  0.101228536290376259,  0.222381034453374482,  0.313706645877887269,
+  0.362683783378361990,  0.362683783378361990,  0.313706645877887269,
+  0.222381034453374482,  0.101228536290376259,
+  // np 9
+  0.081274388361574412,  0.180648160694857396,  0.260610696402935438,
+  0.312347077040002863,  0.330239355001259782,  0.312347077040002863,
+  0.260610696402935438,  0.180648160694857396,  0.081274388361574412,
+  // np 10
+  0.066671344308688138,  0.149451349150580587,  0.219086362515982042,
+  0.269266719309996350,  0.295524224714752870,  0.295524224714752870,
+  0.269266719309996350,  0.219086362515982042,  0.149451349150580587,
+  0.066671344308688138,
+  // np 11
+  0.055668567116173663,  0.125580369464904612,  0.186290210927734262,
+  0.233193764591990482,  0.262804544510246652,  0.272925086777900616,
+  0.262804544510246652,  0.233193764591990482,  0.186290210927734262,
+  0.125580369464904612,  0.055668567116173663,
+  // np 12
+  0.047175336386511828,  0.106939325995318427,  0.160078328543346221,
+  0.203167426723065925,  0.233492536538354806,  0.249147045813402773,
+  0.249147045813402773,  0.233492536538354806,  0.203167426723065925,
+  0.160078328543346221,  0.106939325995318427,  0.047175336386511828
+};
+
+bool get_gll_supported (const Int np) { return np >= 2 && np <= 13 || np == 16; }
+
+const Real* get_x_gll (const Int np) {
+  throw_if(np <  2, "get_x_gll: np <  2 not supported.");
+  if (np <= 13)
+    return x_gll_table + (np*(np-1))/2 - 1;
+  else
+    return get_x_gll_special(np);
+}
+
+const Real* get_w_gll (const Int np) {
+  throw_if(np <  2, "get_x_gll: np <  2 not supported.");
+  if (np <= 13)
+    return w_gll_table + (np*(np-1))/2 - 1;
+  else
+    return get_w_gll_special(np);
+}
+
+const Real* get_x_gll_special (const Int np) {
+  if (np <= 13) return get_x_gll(np);
+  throw_if(np != 16, "np 16 only is supported");
+  const auto end = x_gll_table + (14*(14-1))/2 - 1;
+  return end;
+}
+
+const Real* get_w_gll_special (const Int np) {
+  if (np <= 13) return get_w_gll(np);
+  throw_if(np != 16, "np 16 only is supported");
+  const auto end = w_gll_table + (14*(14-1))/2 - 1;
+  return end;
+}
+
+const Real* get_x_gl (const Int np) {
+  throw_if(np <  1, "get_x_gll: np <  1 not supported.");
+  throw_if(np > 12, "get_x_gll: np > 12 not supported.");
+  return x_gl_table + (np*(np-1))/2;
+}
+
+const Real* get_w_gl (const Int np) {
+  throw_if(np <  1, "get_x_gll: np <  1 not supported.");
+  throw_if(np > 12, "get_w_gll: np > 12 not supported.");
+  return w_gl_table + (np*(np-1))/2;
+}
+
+} // namespace islet
diff --git a/methods/islet/islet_tables.hpp b/methods/islet/islet_tables.hpp
new file mode 100644
index 0000000..04a5c7d
--- /dev/null
+++ b/methods/islet/islet_tables.hpp
@@ -0,0 +1,27 @@
+#ifndef INCLUDE_ISLET_TABLES_HPP
+#define INCLUDE_ISLET_TABLES_HPP
+
+#include "islet_types.hpp"
+
+namespace islet {
+static const Int np_max = 16;
+
+// Gauss-Lobatto-Legendre
+bool get_gll_supported(const Int np);
+const Real* get_x_gll(const Int np);
+const Real* get_w_gll(const Int np);
+const Real* get_x_gll_special(const Int np);
+const Real* get_w_gll_special(const Int np);
+// Gauss-Legendre
+const Real* get_x_gl (const Int np);
+const Real* get_w_gl (const Int np);
+
+// Gauss-Lobatto-Legendre
+extern Real x_gll_table[];
+extern Real w_gll_table[];
+// Gauss-Legendre
+extern Real x_gl_table[];
+extern Real w_gl_table[];
+}
+
+#endif
diff --git a/methods/islet/islet_types.hpp b/methods/islet/islet_types.hpp
new file mode 100644
index 0000000..3137009
--- /dev/null
+++ b/methods/islet/islet_types.hpp
@@ -0,0 +1,12 @@
+#ifndef INCLUDE_ISLET_TYPES_HPP
+#define INCLUDE_ISLET_TYPES_HPP
+
+#include <complex>
+
+typedef int Int;
+typedef double Real;
+typedef std::complex<Real> Complex;
+typedef int fint;
+typedef Int Size;
+
+#endif
diff --git a/methods/islet/islet_util.hpp b/methods/islet/islet_util.hpp
new file mode 100644
index 0000000..6c17e3b
--- /dev/null
+++ b/methods/islet/islet_util.hpp
@@ -0,0 +1,264 @@
+#ifndef INCLUDE_ISLET_UTIL_HPP
+#define INCLUDE_ISLET_UTIL_HPP
+
+#include <cassert>
+#include <cstring>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <limits>
+#include <memory>
+
+#include "islet_types.hpp"
+
+namespace islet {
+#define throw_if(condition, message) do {                               \
+    if (condition) {                                                    \
+      std::stringstream _ss_;                                           \
+      _ss_ << __FILE__ << ":" << __LINE__ << ": The condition:\n"       \
+           << #condition "\nled to the exception\n" << message << "\n"; \
+        throw std::logic_error(_ss_.str());                             \
+    }                                                                   \
+  } while (0)
+
+#define require(condition) do {                                         \
+    if ( ! (condition)) {                                               \
+      std::stringstream _ss_;                                           \
+      _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition  \
+        << "\n";                                                        \
+      throw std::logic_error(_ss_.str());                               \
+    }                                                                   \
+  } while (0)
+#define require_msg(condition, message) do {                            \
+    if ( ! (condition)) {                                               \
+      std::stringstream _ss_;                                           \
+      _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition  \
+           << "\nmessage:\n" << message << "\n";                        \
+      throw std::logic_error(_ss_.str());                               \
+    }                                                                   \
+  } while (0)
+
+template<typename T> inline T sign (const T& a) { return a >= 0 ? 1 : -1; }
+template<typename T> inline constexpr T square (const T& x) { return x*x; }
+inline Real reldif (const Real a, const Real b, const Real abstol = 0)
+{ return std::abs(b - a)/(abstol + std::abs(a)); }
+
+#define pr(m) do {                              \
+    std::stringstream _ss_;                     \
+    _ss_ << m << std::endl;                     \
+    std::cerr << _ss_.str();                    \
+  } while (0)
+#define prc(m) pr(#m << " | " << (m))
+#define puf(m) "(" << #m << " " << (m) << ")"
+#define pu(m) << " " << puf(m)
+
+template <typename T>
+static void prarr (const std::string& name, const T* const v, const size_t n) {
+  std::stringstream ss;
+  ss << name << " = [";
+  for (size_t i = 0; i < n; ++i) ss << " " << v[i];
+  ss << "]";
+  pr(ss.str());
+}
+template <typename Array>
+static void prarr (const std::string& name, const Array& a) {
+  prarr(name, a.data(), a.size());
+}
+
+#define mprarr(a) prarr(#a, a)
+
+/*! \brief RAII std stream state saver.
+ *
+ * Example: Preserve std::cout's state so manipulations don't affect others' use
+ * of cout.
+ */
+template<typename Stream> class IosSaver {
+  Stream& s_;
+  std::ios state_;
+public:
+  IosSaver (Stream& s) : s_(s), state_(nullptr) { state_.copyfmt(s); }
+  IosSaver (const IosSaver& ios) : s_(ios.s_), state_(nullptr)
+  { state_.copyfmt(ios.state_); }
+  IosSaver operator= (const IosSaver&) = delete;
+  ~IosSaver () { s_.copyfmt(state_); }
+};
+template<typename Stream> inline IosSaver<Stream> save_ios (Stream& s)
+{ return IosSaver<Stream>(s); }
+
+inline double urand () { return rand() / ((double) RAND_MAX + 1.0); }
+
+template <typename T>
+bool write (std::ofstream& os, const T s) {
+  return ! os.write((const char*) &s, sizeof(T)).bad();
+}
+
+template <typename T>
+bool write (std::ofstream& os, const Int n, const T* const d) {
+  return (write(os, n) &&
+          ! os.write((const char*) d, n*sizeof(T)).bad());
+}
+
+template <typename T>
+bool read (std::ifstream& os, T& s) {
+  return ! os.read((char*) &s, sizeof(T)).bad();
+}
+
+template <typename T>
+bool read (std::ifstream& os, Int& n, T* const d) {
+  return (read(os, n) &&
+          ! os.read((char*) d, n*sizeof(T)).bad());
+}
+
+template <typename T> class Array {
+  T* p_;
+  std::size_t n_, cap_;
+public:
+  Array () { init(); }
+  Array(std::size_t n);
+  Array(std::size_t n, const T& init);
+  Array(const Array<T>& a);
+  ~Array () { clear(); }
+  // Initialize the object with the assumption that all variables are uninit'ed
+  // prior to calling.
+  void init();
+  void clear();
+  // optclear means optionally clear. The function has the semantics of
+  // clearing, but it may not actually release the memory.
+  void optclear_and_resize(std::size_t n);
+  // _ft indicates first touch.
+  void optclear_and_resize_ft(std::size_t n);
+  void optclear_and_resize(std::size_t n, const T& i);
+  void optclear_and_reserve(std::size_t n);
+  void optclear_and_reserve_ft(std::size_t n);
+  T& operator[] (std::size_t i) { return p_[i]; }
+  const T& operator[] (std::size_t i) const { return p_[i]; }
+  T& back () { return p_[n_-1]; }
+  const T& back () const { return p_[n_-1]; }
+  std::size_t size () const { return n_; }
+  bool empty () const { return size() == 0; }
+  T* data () const { return p_; }
+  // This does not realloc; reserve must provide the necessary memory. It does
+  // not throw, either. It asserts.
+  void unsafe_push_back(const T& e);
+  T* begin () { return p_; }
+  T* end () { return p_ + n_; }
+  const T* begin () const { return p_; }
+  const T* end () const { return p_ + n_; }
+  void set (const T& v) { for (std::size_t i = 0; i < n_; ++i) p_[i] = v; }
+};
+
+template<typename T> inline int len (const Array<T>& v)
+{ return static_cast<int>(v.size()); }
+
+template<typename T> inline void touch (T* const p, const size_t n,
+                                        const T& init = T()) {
+  // 1 KB should be a safe lower bound on page size. Touch enough to touch every
+  // page; I don't think there's any need to touch more memory than that.
+  for (size_t i = 0; i < n; i += 1024 / sizeof(T))
+    p[i] = init;
+  // Make sure the last part is touched.
+  if (n) p[n-1] = init;
+}
+template<typename T> inline T*
+allocn (const size_t n, const bool first_touch = false) {
+  if ( ! n) return 0;
+  T* p = new T[n];
+  if (first_touch) touch(p, n);
+  return p;
+}
+template<typename T> inline void deln (T*& p) {
+  if (p) delete[] p;
+  p = 0;
+}
+template<typename T> inline void deln_const (const T* p) {
+  if (p) delete[] p;
+}
+template<typename T> inline void del (T*& p) {
+  if (p) delete p;
+  p = 0;
+}
+
+template<typename T>
+inline void Array<T>::init () {
+  n_ = cap_ = 0;
+  p_ = 0;
+}
+
+template<typename T>
+inline Array<T>::Array (std::size_t n)
+  : p_(0), n_(0), cap_(0)
+{ optclear_and_resize(n); }
+
+template<typename T>
+inline Array<T>::Array (std::size_t n, const T& init)
+  : p_(0), n_(0), cap_(0)
+{ optclear_and_resize(n, init); }
+
+template<typename T>
+inline Array<T>::Array (const Array<T>& a) {
+  init();
+  optclear_and_resize(a.size());
+  std::copy(a.begin(), a.end(), begin());
+}
+
+template<typename T>
+inline void Array<T>::clear () {
+  n_ = cap_ = 0;
+  deln(p_);
+}
+
+template<typename T>
+inline void Array<T>::optclear_and_reserve (std::size_t n) {
+  n_ = 0;
+  if (n <= cap_) return;
+  clear();
+  p_ = allocn<T>(n);
+  cap_ = n;
+}
+
+template<typename T>
+inline void Array<T>::optclear_and_reserve_ft (std::size_t n) {
+  n_ = 0;
+  if (n <= cap_) return;
+  clear();
+  p_ = allocn<T>(n, true);
+  cap_ = n;
+}
+
+template<typename T>
+inline void Array<T>::optclear_and_resize (std::size_t n) {
+  if (n <= cap_) {
+    n_ = n;
+    return;
+  }
+  optclear_and_reserve(n);
+  n_ = n;
+}
+
+template<typename T>
+inline void Array<T>::optclear_and_resize_ft (std::size_t n) {
+  if (n <= cap_) {
+    n_ = n;
+    return;
+  }
+  optclear_and_reserve_ft(n);
+  n_ = n;
+}
+
+template<typename T>
+inline void Array<T>::optclear_and_resize (std::size_t n, const T& init) {
+  optclear_and_resize(n);
+  for (std::size_t i = 0; i < n_; ++i)
+    memcpy(p_ + i, &init, sizeof(init));
+}
+
+template<typename T>
+inline void Array<T>::unsafe_push_back (const T& e) {
+  assert(n_ < cap_);
+  p_[n_++] = e;
+}
+
+} // namespace islet
+
+#endif
diff --git a/methods/islet/islet_xnodes_metrics.cpp b/methods/islet/islet_xnodes_metrics.cpp
new file mode 100644
index 0000000..9c516ab
--- /dev/null
+++ b/methods/islet/islet_xnodes_metrics.cpp
@@ -0,0 +1,257 @@
+#include "islet_xnodes_metrics.hpp"
+#include "islet_tables.hpp"
+#include "islet_npx.hpp"
+#include "islet_maxeigcomp.hpp"
+#include "islet_pum.hpp"
+#include "islet_isl.hpp"
+#include "islet_util.hpp"
+
+static Real factorial (const Int n) {
+  Real f = 1;
+  for (Int i = 2; i <= n; ++i) f *= i;
+  return f;
+}
+
+void calc_xnodes_metrics (const Nodes& nodes, const Real* const xnodes, Real* metrics) {
+  const Int np = nodes.get_np(), nph = np/2, nseg = 100;
+  Real npm1 = 0, npm2 = 0, npm_max = 0;
+  for (Int ireg = 0; ireg < nph; ++ireg) {
+    const bool center = np % 2 == 0 && ireg == nph-1;
+    const auto xs = xnodes[ireg], xe = xnodes[ireg+1];
+    const auto subnp  = nodes.get_subnp()[ireg];
+    const auto active = nodes.get_nodes()[ireg];
+    Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0;
+    for (Int seg = 0; seg < nseg; ++seg) {
+      const auto x = xs + (seg + 0.5)*(xe - xs)/nseg;
+      Real f = 1;
+      for (Int i = 0; i < subnp; ++i)
+        f *= x - xnodes[active[i]];
+      npm1_reg += std::abs(f);
+      npm2_reg += islet::square(f);
+      npm_max_reg = std::max(npm_max_reg, std::abs(f));
+    }
+    const auto fac = factorial(subnp);
+    const auto f = (center ? 1 : 2)*(xe - xs)/fac/nseg;
+    npm1 += f*npm1_reg;
+    npm2 += f*npm2_reg/fac; // need an extra fac b/c of square
+    npm_max = std::max(npm_max, npm_max_reg/fac);
+  }
+  metrics[0] = npm1;
+  metrics[1] = std::sqrt(npm2);
+  metrics[2] = npm_max;
+}
+
+Real calc_xnodes_metric (const Nodes& nodes, const Real* const xnodes) {
+  Real metrics[3];
+  calc_xnodes_metrics(nodes, xnodes, metrics);
+  return metrics[0];
+}
+
+void calc_lebesgue_consts (const Nodes& nodes, const Real* const xnodes, Real* metrics) {
+  const Int np = nodes.get_np(), nph = np/2, nseg = 100;
+  Real npm1 = 0, npm2 = 0, npm_max = 0;
+  for (Int ireg = 0; ireg < nph; ++ireg) {
+    const bool center = np % 2 == 0 && ireg == nph-1;
+    const auto xs = xnodes[ireg], xe = xnodes[ireg+1];
+    const auto subnp  = nodes.get_subnp()[ireg];
+    const auto active = nodes.get_nodes()[ireg];
+    Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0;
+    for (Int seg = 0; seg < nseg; ++seg) {
+      const auto x = xs + (seg + 0.5)*(xe - xs)/nseg;
+      Real f = 0;
+      for (Int i = 0; i < subnp; ++i) {
+        Real g = 1;
+        for (Int j = 0; j < subnp; ++j) {
+          if (j == i) continue;
+          g *= (x - xnodes[active[j]])/(xnodes[active[i]] - xnodes[active[j]]);
+        }
+        f += std::abs(g);
+      }
+      npm1_reg += f;
+      npm2_reg += islet::square(f);
+      npm_max_reg = std::max(npm_max_reg, f);
+    }
+    const auto f = (center ? 1 : 2)*(xe - xs)/nseg;
+    npm1 += f*npm1_reg;
+    npm2 += f*npm2_reg; // need an extra fac b/c of square
+    npm_max = std::max(npm_max, npm_max_reg);
+  }
+  metrics[0] = npm1;
+  metrics[1] = std::sqrt(npm2);
+  metrics[2] = npm_max;
+}
+
+MetricsTracker::MetricsTracker (const Int np, bool very_strict) {
+  pum_min = pum_max = 1;
+  const Real fac = std::pow(std::numeric_limits<Real>::epsilon(), 1.0/nbin);
+  pum_bins[0] = 1;
+  for (Int i = 0; i < nbin; ++i) pum_bins[i+1] = pum_bins[i]*fac;
+  //islet::prarr("pum_bins",pum_bins,nbin);
+  // From findbasic.
+  Real iv[3];
+  if (false) {
+    switch (np) {
+    case  4: iv[0] = 1.575830e-02; iv[1] = 1.278167e-02; iv[2] = 1.510916e-02; break;
+    case  5: iv[0] = 2.549179e-03; iv[1] = 2.582596e-03; iv[2] = 4.154765e-03; break;
+    case  6: iv[0] = 2.393393e-04; iv[1] = 2.104595e-04; iv[2] = 2.816403e-04; break; // use early findcombo results too
+    case  7: iv[0] = 5.557714e-05; iv[1] = 4.790768e-05; iv[2] = 6.934868e-05; break;
+    case  8: iv[0] = 7.265137e-06; iv[1] = 7.988089e-06; iv[2] = 1.618560e-05; break;
+    case  9: iv[0] = 7.860606e-07; iv[1] = 7.683143e-07; iv[2] = 1.179540e-06; break;
+    case 10: iv[0] = 1.075794e-07; iv[1] = 9.532486e-08; iv[2] = 1.540700e-07; break;
+    case 11: iv[0] = 1.589070e-08; iv[1] = 1.867321e-08; iv[2] = 3.345386e-08; break;
+    case 12: iv[0] = 6.963036e-10; iv[1] = 8.920290e-10; iv[2] = 1.715838e-09; break;
+    case 13: iv[0] = 4.127583e-11; iv[1] = 4.809655e-11; iv[2] = 9.223544e-11; break;
+    default: iv[0] = iv[1] = iv[2] = 1;
+    }
+    // 12 is taking too long, and incomplete search strongly suggests we can
+    // restrict our attention to pum < 1e-6.
+    if (np == 12) set_pum_max(1e-6);
+  } else {
+    iv[0] = iv[1] = iv[2] = 1;
+  }
+  if (very_strict) {
+    // Use GLL nodal search results to make this as small as possible.
+    Real pum_max;
+    switch (np) {
+    case  4: pum_max = 2.6609e-15; break;
+    case  5: pum_max = 1.0509e-07; break;
+    case  6: pum_max = 1.0809e-09; break;
+    case  7: pum_max = 4.7909e-09; break;
+    case  8: pum_max = 8.8109e-09; break;
+    case  9: pum_max = 3.6409e-09; break;
+    case 10: pum_max = 1.4409e-08; break;
+    case 11: pum_max = 3.5009e-07; break;
+    case 12: pum_max = 1.4509e-07; break;
+    default: assert(0);
+    }
+    set_pum_max(pum_max);
+  }
+  for (Int i = 0; i < nmet*nbin; ++i) best_metrics[i] = iv[i % nmet];
+}
+
+void MetricsTracker::set_pum_max (const Real pum_max_) {
+  pum_max = pum_max_;
+  assert(pum_max <= 1 && pum_max > 0);
+}
+
+bool MetricsTracker
+::acceptable_metrics (const Nodes& nodes, const Real* xnodes,
+                      const Real* metrics) const {
+  for (Int i = 0; i < nmet*nbin; ++i)
+    if (metrics[i % nmet] < best_metrics[i])
+      return true;
+  return false;
+}
+
+Real MetricsTracker
+::pum_to_accept (const Nodes& nodes, const Real* xnodes,
+                 const Real* metrics) const {
+  for (Int i = 0; i < nmet*nbin; ++i)
+    if (metrics[i % nmet] < best_metrics[i])
+      return std::min(pum_max, pum_bins[i/nmet]);
+  return 0;
+}
+
+bool MetricsTracker
+::would_update (const Real* metrics, const Real& pum) const {
+  if (pum > pum_max) return false;
+  Int bin;
+  for (bin = 0; bin < nbin; ++bin)
+    if (bin == nbin-1 || pum >= pum_bins[bin+1])
+      break;
+  for (Int i = 0; i < nmet; ++i)
+    if (metrics[i] < best_metrics[nmet*bin + i])
+      return true;
+  return false;
+}
+
+void MetricsTracker::update (const Real* metrics, const Real& pum) {
+  bool updated = false;
+  for (Int bin = 0; bin < nbin; ++bin) {
+    if (pum > pum_bins[bin]) break;
+    for (Int i = 0; i < nmet; ++i)
+      if (metrics[i] < best_metrics[nmet*bin + i]) {
+        best_metrics[nmet*bin + i] = metrics[i];
+        updated = true;
+      }
+  }
+  if (updated) pum_min = std::min(pum_min, pum);
+}
+
+void MetricsTracker::get_metrics (Real pum, Real* metrics) const {
+  Int bin;
+  for (bin = 0; bin < nbin; ++bin)
+    if (pum > pum_bins[bin]) break;
+  bin = std::max(0, bin-1);
+  for (Int i = 0; i < nmet; ++i)
+    metrics[i] = best_metrics[nmet*bin + i];
+}
+
+bool MetricsTracker::write (std::ofstream& os) {
+  using islet::write;
+  return (write(os, nmet) &&
+          write(os, nbin) &&
+          write(os, nmet*nbin, best_metrics) &&
+          write(os, nbin+1, pum_bins) &&
+          write(os, pum_max) &&
+          write(os, pum_min));
+}
+
+bool MetricsTracker::read (std::ifstream& os) {
+  using islet::read;
+  Int lnmet, lnbin, n;
+  const bool ok = (read(os, lnmet) && lnmet == nmet &&
+                   read(os, lnbin) && lnbin == nbin &&
+                   read(os, n, best_metrics) && n == nmet*nbin &&
+                   read(os, n, pum_bins) && n == nbin+1 &&
+                   read(os, pum_max) &&
+                   read(os, pum_min));
+  return ok;
+}
+
+static void symmetrize (const Int n, Real* x) {
+  const Int nh = n/2;
+  for (Int i = 0; i < nh; ++i) x[n-1-i] = -x[i];
+}
+
+void calc_weights (const Nodes& nodes, const Real* const xnode, Real* const wt) {
+  // Quadrature coefficients.
+  const Int qn = 7;
+  const Real* const qx = islet::get_x_gll(qn);
+  const Real* const qw = islet::get_w_gll(qn);
+  const Int np = nodes.get_np();
+  Real v[islet::np_max], integral[islet::np_max] = {0};
+  for (Int ireg = 0; ireg < np-1; ++ireg) {
+    Real reg_integral[islet::np_max] = {0};
+    for (Int qi = 0; qi < qn; ++qi) {
+      const auto alpha = 0.5*(qx[qi] + 1);
+      const auto x = (1 - alpha)*xnode[ireg] + alpha*xnode[ireg+1];
+      eval(nodes.get_np(), nodes.include_bdy(), xnode,
+           nodes.get_subnp(), nodes.get_nodes(), x, v);
+      for (Int i = 0; i < np; ++i)
+        reg_integral[i] += qw[qi]*v[i];
+    }
+    const auto fac = 0.5*(xnode[ireg+1] - xnode[ireg]);
+    for (Int i = 0; i < np; ++i)
+      integral[i] += fac*reg_integral[i];
+  }
+  // Numerically symmetrize.
+  for (Int ireg = 0; ireg < np/2; ++ireg) {
+    // std::min is to prevent a spurious -Warray-bounds warning.
+    const Int other = std::min(islet::np_max-1, np-ireg-1);
+    integral[ireg] = integral[other] =
+      0.5*(integral[ireg] + integral[other]);
+  }
+  for (Int i = 0; i < np; ++i) wt[i] = integral[i];
+}
+
+static bool has_all_positive_weights (const Nodes& nodes, const Real* const xnode) {
+  Real wt[islet::np_max];
+  calc_weights(nodes, xnode, wt);
+  bool pve = true;
+  const Int np = nodes.get_np();
+  for (Int i = 0; i < np; ++i)
+    if (wt[i] <= 0)
+      pve = false;
+  return pve;
+}
diff --git a/methods/islet/islet_xnodes_metrics.hpp b/methods/islet/islet_xnodes_metrics.hpp
new file mode 100644
index 0000000..14bcbe3
--- /dev/null
+++ b/methods/islet/islet_xnodes_metrics.hpp
@@ -0,0 +1,53 @@
+#ifndef INCLUDE_ISLET_XNODES_METRICS_HPP
+#define INCLUDE_ISLET_XNODES_METRICS_HPP
+
+#include "islet_types.hpp"
+#include "islet_nodalbasis.hpp"
+
+#include <vector>
+#include <sstream>
+
+// l1 only
+Real calc_xnodes_metric(const Nodes& nodes, const Real* const xnodes);
+// l1, l2, linf
+void calc_xnodes_metrics(const Nodes& nodes, const Real* const xnodes, Real* metrics);
+
+void calc_lebesgue_consts(const Nodes& nodes, const Real* const xnodes, Real* metrics);
+
+void calc_weights(const Nodes& nodes, const Real* const xnode, Real* const wt);
+
+struct MetricsTracker {
+  typedef std::shared_ptr<MetricsTracker> Ptr;
+
+  MetricsTracker(Int np, bool very_strict = false);
+
+  void set_pum_max(Real pum); // optional; default is 1
+  Real get_pum_max () const { return pum_max; }
+
+  // Min pum seen so far. If none, return 1.
+  Real get_pum_min () const { return pum_min; }
+
+  // Compute metrics. Return whether these are provisionally acceptable.
+  bool acceptable_metrics(const Nodes& nodes, const Real* xnodes,
+                          const Real* metrics) const;
+  // pum needs to be <= this value to update.
+  Real pum_to_accept(const Nodes& nodes, const Real* xnodes,
+                     const Real* metrics) const;
+  // Would update based on metrics and pum?
+  bool would_update(const Real* metrics, const Real& pum) const;
+  // Do the update.
+  void update(const Real* metrics, const Real& pum);
+
+  void get_metrics(Real pum, Real* metrics) const;
+
+  bool write(std::ofstream& os);
+  bool read(std::ifstream& os);
+
+private:
+  static const Int nmet = 3, nbin = 30;
+  Real best_metrics[nmet*nbin]; // l1, l2, linf
+  Real pum_bins[nbin+1];
+  Real pum_max, pum_min;
+};
+
+#endif
diff --git a/methods/islet/make-depends.sh b/methods/islet/make-depends.sh
new file mode 100644
index 0000000..3f50949
--- /dev/null
+++ b/methods/islet/make-depends.sh
@@ -0,0 +1,3 @@
+for i in *.cpp; do
+    g++ -MM $i
+done > make.depends
diff --git a/methods/islet/make.depends b/methods/islet/make.depends
new file mode 100644
index 0000000..c910667
--- /dev/null
+++ b/methods/islet/make.depends
@@ -0,0 +1,40 @@
+cslunstab.o: cslunstab.cpp
+islet_isl.o: islet_isl.cpp islet_tables.hpp islet_types.hpp \
+ islet_util.hpp islet_isl.hpp islet_interpmethod.hpp \
+ islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_npx.hpp
+islet_maxeigcomp.o: islet_maxeigcomp.cpp islet_maxeigcomp.hpp \
+ islet_types.hpp islet_util.hpp islet_interpmethod.hpp islet_npx.hpp \
+ islet_tables.hpp
+islet_nodalbasis.o: islet_nodalbasis.cpp islet_nodalbasis.hpp \
+ islet_types.hpp islet_util.hpp islet_npx.hpp islet_tables.hpp \
+ islet_interpmethod.hpp
+islet_np4.o: islet_np4.cpp islet_np4.hpp islet_types.hpp islet_isl.hpp \
+ islet_interpmethod.hpp
+islet_npx.o: islet_npx.cpp islet_npx.hpp islet_util.hpp islet_types.hpp \
+ islet_tables.hpp islet_interpmethod.hpp
+islet_pum.o: islet_pum.cpp islet_pum.hpp islet_types.hpp \
+ islet_maxeigcomp.hpp islet_util.hpp islet_interpmethod.hpp islet_npx.hpp \
+ islet_tables.hpp islet_isl.hpp
+islet_studymetrics.o: islet_studymetrics.cpp islet_pum.hpp \
+ islet_types.hpp islet_maxeigcomp.hpp islet_util.hpp \
+ islet_interpmethod.hpp islet_npx.hpp islet_tables.hpp \
+ islet_xnodes_metrics.hpp islet_nodalbasis.hpp
+islet_tables.o: islet_tables.cpp islet_types.hpp islet_util.hpp \
+ islet_tables.hpp
+islet_xnodes_metrics.o: islet_xnodes_metrics.cpp islet_xnodes_metrics.hpp \
+ islet_types.hpp islet_nodalbasis.hpp islet_util.hpp islet_tables.hpp \
+ islet_npx.hpp islet_interpmethod.hpp islet_maxeigcomp.hpp islet_pum.hpp \
+ islet_isl.hpp
+pum_sweep.o: pum_sweep.cpp islet_isl.hpp islet_types.hpp \
+ islet_interpmethod.hpp islet_pum.hpp islet_maxeigcomp.hpp islet_util.hpp \
+ islet_npx.hpp islet_tables.hpp
+run_meam1_sweep.o: run_meam1_sweep.cpp islet_isl.hpp islet_types.hpp \
+ islet_interpmethod.hpp islet_maxeigcomp.hpp islet_util.hpp islet_npx.hpp \
+ islet_tables.hpp islet_pum.hpp
+run_np4.o: run_np4.cpp islet_np4.hpp islet_types.hpp islet_isl.hpp \
+ islet_interpmethod.hpp islet_util.hpp islet_npx.hpp islet_tables.hpp \
+ islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_maxeigcomp.hpp \
+ islet_pum.hpp
+search.o: search.cpp islet_tables.hpp islet_types.hpp islet_npx.hpp \
+ islet_util.hpp islet_interpmethod.hpp islet_maxeigcomp.hpp \
+ islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_pum.hpp
diff --git a/methods/islet/make.inc.gnu b/methods/islet/make.inc.gnu
new file mode 100644
index 0000000..acd2a46
--- /dev/null
+++ b/methods/islet/make.inc.gnu
@@ -0,0 +1,2 @@
+CXXFLAGS = -g -O3 -std=c++11 -fPIC -fopenmp
+LINK_LAPACK_BLAS = -llapack -lblas
diff --git a/methods/islet/pum_sweep.cpp b/methods/islet/pum_sweep.cpp
new file mode 100644
index 0000000..f0c8ab9
--- /dev/null
+++ b/methods/islet/pum_sweep.cpp
@@ -0,0 +1,56 @@
+#include <cstdlib>
+
+#include "islet_isl.hpp"
+#include "islet_pum.hpp"
+
+// Illustrate why a 1-element hop is the key thing to study. Report meam1 at x
+// in [-1,1] if meam1 >= tol. x, meam1 are 1-1.
+static void run (const islet::Operator::ConstPtr& op,
+                 const Int np, const Int nx, const Int ntrial,
+                 std::map<Real,Real>& dx2meam1) {
+  const auto oim = std::make_shared<islet::OperatorInterpMethod>(np, op);
+  printf("%s\n", op->get_basis_string(np).c_str());
+  const auto x = op->get_xnodes(np);
+  printf("x:");
+  for (Int i = np/2; i < np; ++i)
+    printf(" %7.5f", x[i]);
+  printf("\n");
+  pum::Options o;
+  o.threaded = true;
+  o.perturb = 0.01;
+  for (const Int mec_ne : {3, 33, 333})
+    for (const Int ne : {4, 7, 15}) {
+      printf("ntrial %d mec_ne %d ne %d\n", ntrial, mec_ne, ne);
+      o.ntrial = ntrial;
+      o.mec_ne = mec_ne;
+      o.ne = ne;
+      pum::PerturbedUniformMeshMetric pum(oim, o);
+      pum.sweep_and_collect_amplitudes(nx, 1e-13, dx2meam1, false);
+    }
+  printf("final\n");
+  for (const auto& e : dx2meam1)
+    printf("dx %1.16e meam1 %1.16e\n", e.first, e.second);
+}
+
+int main (int argc, char** argv) {
+  if (argc < 5) {
+    printf("%s np nx ntrial (0 - natural, 1 - gll_best, 2 - uniform)\n", argv[0]);
+    return -1;
+  }
+  const Int np = std::atoi(argv[1]);
+  const Int nx = std::atoi(argv[2]);
+  const Int ntrial = std::atoi(argv[3]);
+  const Int opcode = std::atoi(argv[4]);
+  if (np < 4 || nx < 2 || ntrial < 1 || opcode < 0 || opcode > 2) {
+    printf("bad input");
+    return -1;
+  }
+  const auto op = islet::Operator::create(opcode == 0 ?
+                                          islet::Operator::gll_natural :
+                                          opcode == 1 ?
+                                          islet::Operator::gll_best :
+                                          islet::Operator::uniform_offset_nodal_subset);
+  std::map<Real,Real> dx2meam1;
+  run(op, np, nx, ntrial, dx2meam1);
+  return 0;
+}
diff --git a/methods/islet/readme.txt b/methods/islet/readme.txt
new file mode 100644
index 0000000..63b8f67
--- /dev/null
+++ b/methods/islet/readme.txt
@@ -0,0 +1,77 @@
+This directory and the directory methods/slmm contain the code used to generate
+the results in the Islet 2D paper, except the E3SM code for the GPU performance
+results.
+
+The directory methods/islet/figures contains the scripts used to generate data
+and figures. The file "figures/figs.tex" contains the latex for the figures.
+Comments before each figure explain how to generate the data and then the figure
+from these data. Bash and hy scripts are those in the "figures" directory.
+
+Programs need BLAS, LAPACK, and for slmmir, Kokkos
+(https://github.com/kokkos/kokkos). NetCDF is optional and was not used for the
+results in this paper. We used Kokkos version 3.1
+(https://github.com/kokkos/kokkos/tree/3.3.01) in our build.
+
+For the methods/islet programs, on a standard Linux system with GNU compiler suite,
+    ln -s make.inc.gnu make.inc
+    make
+
+The program "cslunstab" demonstrates the unstable classical cubic interpolation
+semi-Lagrangian instances. Running it should produce no output, as in this case
+all assertions pass. The program is self-contained and is meant to be read. See
+the top of cslunstab.cpp for instructions.
+
+The program "search" is used to find the Islet bases. Run as follows, in this
+example for np = 8:
+    OMP_NUM_THREADS=48 KMP_AFFINITY=balanced ./search findnodal_given_bestosn 8
+This produces output of the following form:
+
+    np  8
+    min_np  8
+    min_np  7
+    min_np  6
+    meam1 4.9e-15 mcV 9.6e+01 mdef 1.0e+00 w>0 1 wtr  9.44e+00 npm  7.27e-06  7.99e-06  1.62e-05 pum  5.41e-08 | np  8 subnp 6 6 7 6 offst 0 0 0 1
+    meam1 4.4e-15 mcV 1.3e+02 mdef 1.0e+00 w>0 1 wtr  7.48e+00 npm  1.08e-05  9.61e-06  1.62e-05 pum  1.60e-08 | np  8 subnp 6 6 6 6 offst 0 0 0 1
+    min_np  5
+    meam1 4.9e-15 mcV 1.3e+02 mdef 1.0e+00 w>0 1 wtr  8.31e+00 npm  7.21e-06  8.39e-06  1.62e-05 pum  1.10e-06 | np  8 subnp 5 7 7 6 offst 0 0 0 1
+    meam1 4.7e-15 mcV 2.5e+02 mdef 1.0e+00 w>0 1 wtr  1.06e+01 npm  1.46e-05  1.42e-05  2.16e-05 pum  5.78e-09 | np  8 subnp 5 5 7 6 offst 0 0 0 1
+    meam1 4.2e-15 mcV 4.1e+02 mdef 1.0e+00 w>0 1 wtr  1.89e+01 npm  5.08e-05  5.51e-05  8.70e-05 pum  3.04e-09 | np  8 subnp 5 6 5 6 offst 0 0 1 1
+    np  8
+    min_np  6 max_np  8
+    min_np  6 max_np  7
+    meam1  1.55e-15 w>0 1 wtr 9.30e+00 npm 7.51e-06 8.07e-06 1.62e-05 pum  2.71e-08 | np  8 subnp 6 6 7 6 nodes | 0 1 2 3 5 6 | 0 1 2 3 4 5 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6
+    meam1  1.78e-15 w>0 1 wtr 9.60e+00 npm 7.71e-06 8.20e-06 1.62e-05 pum  4.29e-09 | np  8 subnp 6 6 7 6 nodes | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6
+    meam1  1.78e-15 w>0 1 wtr 9.58e+00 npm 7.74e-06 8.21e-06 1.62e-05 pum  3.65e-08 | np  8 subnp 6 6 7 6 nodes | 0 1 2 3 4 7 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6
+    ...
+    meam1  8.88e-16 w>0 1 wtr 8.74e+00 npm 9.32e-06 1.05e-05 2.13e-05 pum  1.17e-09 | np  8 subnp 6 6 7 6 nodes | 0 1 2 3 5 7 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 0 2 3 4 5 7
+    meam1  1.55e-15 w>0 1 wtr 8.83e+00 npm 9.45e-06 1.05e-05 2.13e-05 pum  7.66e-10 | np  8 subnp 6 6 7 6 nodes | 0 1 2 3 4 7 | 0 1 2 3 4 6 | 0 1 2 3 4 6 7 | 0 2 3 4 5 7
+    min_np  6 max_np  6
+    meam1  1.78e-15 w>0 1 wtr 7.15e+00 npm 1.24e-05 1.07e-05 1.62e-05 pum  1.02e-09 | np  8 subnp 6 6 6 6 nodes | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 1 2 3 4 5 6
+    min_np  7 max_np  8
+    min_np  7 max_np  7
+    min_np  8 max_np  8
+
+In this output, each line beginning with "meam1" corresponds to a t.p.s. basis.
+"meam1" means "maximum eigenvalue amplitude minus 1", and the following value
+is log10 of this quantity. It should be near machine precision. Then come a few
+unused entries. Next is "w>0", which reports that all basis weights are > 0.
+"wtr" is unused. "npm" lists the a_1,2,infty values for the basis. "pum" gives
+the lambda_max^PUM value. After the "|" is the encoding of the basis, either
+o.n.s. ("offst" is in the encoding) or general n.s.
+
+For the methods/slmm/slmmir program, modify make.inc to point to your Kokkos
+installation, then
+    make
+Optionally run regression tests:
+    python2 slmm_runtests.py
+Bash scripts in the methods/islet/figures directory call the slmmir program.
+
+We use the language hy to create the figures. hy is a Lisp that compiles to
+Python AST. We used hy 0.18.0 ('pip install hy' for the latest version) with
+CPython 3.7.6 provided by Anaconda 3.
+
+The code used to obtain performance data on Summit will be part of main E3SM
+soon. The exact version used to generate the data is archived here:
+    https://github.com/ambrad/E3SM/releases/tag/islet-2d-paper-summit-sl-gpu-timings
+The data are here:
+    https://github.com/E3SM-Project/perf-data/tree/main/nhxx-sl-summit-mar2021
diff --git a/methods/islet/run_meam1_sweep.cpp b/methods/islet/run_meam1_sweep.cpp
new file mode 100644
index 0000000..5307a8c
--- /dev/null
+++ b/methods/islet/run_meam1_sweep.cpp
@@ -0,0 +1,48 @@
+#include "islet_isl.hpp"
+#include "islet_maxeigcomp.hpp"
+#include "islet_pum.hpp"
+
+#include <cstdio>
+
+static void
+run_sweep (const islet::Operator::ConstPtr& op, const Int np) {
+  static const Int nx = 192;
+  std::vector<Real> dxs(nx), meam1s(nx);
+  const auto oim = std::make_shared<islet::OperatorInterpMethod>(np, op);
+  const auto im = std::make_shared<InterpMethod>(oim);
+# pragma omp parallel for schedule(static,1)
+  for (Int ix = 0; ix < nx; ++ix) {
+    MaxEigComputer mec(false);
+    const Real dx = 0.5*(Real(ix+1)/nx);
+    dxs[ix] = dx;
+    Real mea;
+    mec.compute(*im, dx, 1024, &mea);
+    meam1s[ix] = mea - 1;
+  }
+  for (Int ix = 0; ix < nx; ++ix)
+    printf("%23.15e %23.15e\n", dxs[ix], meam1s[ix]);
+}
+
+int main (int argc, char** argv) {
+  const int np = argc == 2 ? std::atoi(argv[1]) : -1;
+  if (argc != 2 || np == -1) {
+    printf("%s np\n", argv[0]);
+    return -1;
+  }
+  {
+    const auto gll_natural = islet::Operator::create(islet::Operator::gll_natural);
+    printf("gll_natural %d\n", np);
+    run_sweep(gll_natural, np);
+  }
+  {
+    const auto gll_best = islet::Operator::create(islet::Operator::gll_best);
+    printf("gll_best %d\n", np);
+    run_sweep(gll_best, np);
+  }
+  {
+    const auto uofs = islet::Operator::create(islet::Operator::uniform_offset_nodal_subset);
+    printf("uniform_offset_nodal_subset %d\n", np);
+    run_sweep(uofs, np);
+  }
+  return 0;
+}
diff --git a/methods/islet/run_np4.cpp b/methods/islet/run_np4.cpp
new file mode 100644
index 0000000..a9fcdd2
--- /dev/null
+++ b/methods/islet/run_np4.cpp
@@ -0,0 +1,270 @@
+#include "islet_np4.hpp"
+
+#include "islet_util.hpp"
+#include "islet_npx.hpp"
+#include "islet_xnodes_metrics.hpp"
+#include "islet_maxeigcomp.hpp"
+#include "islet_pum.hpp"
+
+static const Real oosqrt5 = 0.44721359549995793928;
+
+static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup,
+                                const Real& x) {
+  Real y = 0;
+  for (int i = 0; i < n; ++i) {
+    Real f = 1;
+    for (int j = 0; j < n; ++j)
+      f *= (i == j) ?
+        1 :
+        (x - xsup[j]) / (xsup[i] - xsup[j]);
+    y += f*ysup[i];
+  }
+  return y;
+}
+
+static Real normalize_x (const Real* gll_x, const Real& x) {
+  const Real x0 = gll_x[1];
+  return (x - x0) / (1 - x0);
+}
+
+static void outer_eval (const Real* gll_x, const Real& x, Real v[4]) {
+  const Real
+    xbar = normalize_x(gll_x, gll_x[2]),
+    ooxbar = 1 / xbar,
+    ybar = 1 / (xbar - 1),
+    xn = normalize_x(gll_x, x);
+  v[0] = 0;
+  v[1] = 1 + ybar*xn*((1 - ooxbar)*xn + ooxbar - xbar);
+  v[2] = ybar*ooxbar*xn*(xn - 1);
+  v[3] = ybar*xn*(xbar - xn);
+}
+
+// Convex combination parameter for np=3 and np=4 combination that gives exactly
+// 1 for the interpolant at gll_x[2] - 1 for the antisymmetric function with GLL
+// point values (0, 1, -1, 0).
+static Real calc_alpha () {
+  const auto x_gll = islet::get_x_gll(4);
+  const Real x0 = x_gll[1] + 1;
+  Real y[4];
+  outer_eval(x_gll, x0, y);
+  const Real y3sum = y[2] - y[1];
+  eval_lagrange_poly(x_gll, 4, x0, y);
+  const Real y4sum = y[2] - y[1];
+  return (1 - y4sum) / (y3sum - y4sum);
+}
+
+struct Options {
+  Int ne_max;
+  Real meam1_tol, pum_tol;
+
+  Options ()
+    : ne_max(1001), meam1_tol(1e-14), pum_tol(1e-7)
+  {}
+};
+
+static Real run_maxeigcomp (const Np4InterpMethod::Ptr& uim, const Options& o) {
+  MaxEigComputer mec;
+  const auto meam1 = mec.run(4, o.ne_max, o.ne_max, o.meam1_tol, true, uim);
+  printf("mec %1.3e\n", meam1);
+  return meam1;
+}
+
+static Real run_pum (const UserInterpMethod::Ptr& uim, const Options& o) {
+  pum::Options po;
+  po.threaded = true;
+  po.ntrial = 31;
+  po.mec_ne = o.ne_max/10;
+  po.perturb = 0.01;
+  pum::PerturbedUniformMeshMetric pum(uim);
+  Real pum_max = 0;
+  printf("pum:"); fflush(stdout);
+  for (Int ne = 3; ne <= 15; ++ne) {
+    po.ne = ne;
+    pum.reset_opts(po);
+    const auto pum_val = pum.run();
+    printf(" %1.1e", pum_val); fflush(stdout);
+    pum_max = std::max(pum_max, pum_val);
+  }
+  printf("\npum_max %1.4e\n", pum_max);
+  return pum_max;
+}
+
+/*
+  |f(x) - p_i(x)| <= e_i(x)
+  sum_i a_i = 1
+  |f(x) - sum_i a_i p_i(x)|
+    = |sum_i a_i f(x) - sum_i a_i p_i(x)|
+    = |sum_i a_i (f(x) - p_i(x))|
+   <= sum_i |a_i| |f(x) - p_i(x)|
+    = sum_i |a_i| e_i(x)
+ */
+static void calc_metrics (const Np4InterpMethod& uim, Real metrics[3]) {
+  const Int nseg = 100;
+  const auto* xnodes = islet::get_x_gll(4);
+  Real npm1 = 0, npm2 = 0, npm_max = 0;
+  for (Int ireg = 0; ireg < 2; ++ireg) {
+    const bool center = ireg == 1;
+    const auto xs = xnodes[ireg], xe = xnodes[ireg+1];
+    Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0;
+    for (Int seg = 0; seg < nseg; ++seg) {
+      const auto x = xs + (seg + 0.5)*(xe - xs)/nseg;
+      Real f = 1;
+      if (ireg == 0) {
+        Real f3 = 1, f4 = 1;
+        for (Int i = 0; i < 3; ++i) f3 *= x - xnodes[i];
+        for (Int i = 0; i < 4; ++i) f4 *= x - xnodes[i];
+        const Real a = uim.eval_a(x);
+        // Divide by 3!, 4!.
+        f = std::abs(a*f3)/6 + std::abs((1 - a)*f4)/24;
+      } else {
+        for (Int i = 0; i < 4; ++i) f *= x - xnodes[i];
+        f /= 24;
+      }
+      npm1_reg += std::abs(f);
+      npm2_reg += islet::square(f);
+      npm_max_reg = std::max(npm_max_reg, std::abs(f));
+    }
+    const auto f = (center ? 1 : 2)*(xe - xs)/nseg;
+    npm1 += f*npm1_reg;
+    npm2 += f*npm2_reg;
+    npm_max = std::max(npm_max, npm_max_reg);
+  }
+  metrics[0] = npm1;
+  metrics[1] = std::sqrt(npm2);
+  metrics[2] = npm_max;
+}
+
+static void optimize (Real best_metrics[3], const Options& o,
+                      const bool c0_zero, const bool c2_one) {
+  const Real alpha = calc_alpha();
+  auto uim = std::make_shared<Np4InterpMethod>(0, alpha, alpha);
+  MaxEigComputer mec;
+  pum::Options po;
+  po.threaded = true;
+  po.ntrial = 31;
+  po.mec_ne = o.ne_max/10;
+  po.perturb = 0.01;
+  pum::PerturbedUniformMeshMetric pum(uim);
+  Int iteration = 0, expensive = 0;
+  for (;;) {
+    const Real
+      c0 = c0_zero ? 0 : islet::urand(),
+      c1 = islet::urand(),
+      c2 = c2_one ? 1 : islet::urand();
+    uim->reset_c(c0, c1, c2);
+
+    Real metrics[3], meam1 = 1, pum_max = 1;
+    calc_metrics(*uim, metrics);
+
+    const auto print = [&] (const char* s) {
+      printf("%14s: %1.16e %1.16e %1.16e | "
+             "pum %1.3e meam1 %1.3e npm %1.3e %1.3e %1.3e %d %d\n",
+             s, c0, c1, c2, pum_max, meam1, metrics[0], metrics[1], metrics[2],
+             iteration, expensive);
+    };
+
+    bool fnd = false;
+    for (Int i = 0; i < 3; ++i)
+      if (metrics[i] < best_metrics[i])
+        fnd = true;
+    ++iteration;
+    if ( ! fnd) continue;
+    ++expensive;
+
+    meam1 = mec.run(4, o.ne_max, o.ne_max, o.meam1_tol, true, uim);
+    if (meam1 > o.meam1_tol) {
+      if (meam1 < 1e-12) print("reject meam1");
+      continue;
+    }
+
+    pum_max = 0;
+    for (Int ne = 3; ne <= 15; ++ne) {
+      po.ne = ne;
+      pum.reset_opts(po);
+      const auto pum_val = pum.run(o.pum_tol);
+      pum_max = std::max(pum_max, pum_val);
+      if (pum_max > o.pum_tol) break;
+    }
+    if (pum_max > o.pum_tol) {
+      print("reject pum");
+      continue;
+    }
+
+    bool all = true;
+    for (Int i = 0; i < 3; ++i)
+      if (metrics[i] > best_metrics[i])
+        all = false;
+    if (all)
+      for (Int i = 0; i < 3; ++i)
+        best_metrics[i] = metrics[i];
+
+    print("accept");
+  }
+}
+
+// Test that the metrics specialization above matches the one we use everywhere
+// else when alpha = 0.
+static void test_metrics () {
+  Real m[3], m4[3];
+  Nodes nodes;
+  nodes.init("4 1 | 0 4: 0 1 2 3 | 1 4: 0 1 2 3");
+  calc_xnodes_metrics(nodes, islet::get_x_gll(4), m);
+  Np4InterpMethod op(0, 0, 0);
+  calc_metrics(op, m4);
+  for (Int i = 0; i < 3; ++i)
+    if (std::abs(m4[i] - m[i]) > 10*std::numeric_limits<Real>::epsilon())
+      printf("FAIL test_metrics %d %1.16e %1.16e\n", i, m[i], m4[i]);
+}
+
+int main (int argc, char** argv) {
+  printf("MaxEigComputer::unittest() %d\n", MaxEigComputer::unittest());
+  test_metrics();
+  Real best_metrics[3]; {
+    // We have a very good value; use this to make 2- and 3-dimensional the
+    // searches faster.
+    const auto uim = std::make_shared<Np4InterpMethod>(0, 0.306, 1);
+    calc_metrics(*uim, best_metrics);
+    printf("start: npm l1 %1.4e l2 %1.4e li %1.4e\n",
+           best_metrics[0], best_metrics[1], best_metrics[2]);
+  }
+  if (argc > 1 && std::string(argv[1]) == "opt1") {
+    Options o;
+    o.ne_max = 3333;
+    optimize(best_metrics, o, true, true);
+  } else if (argc > 1 && std::string(argv[1]) == "opt2") {
+    Options o;
+    o.ne_max = 3333;
+    optimize(best_metrics, o, true, false);
+  } else if (argc > 1 && std::string(argv[1]) == "opt3") {
+    Options o;
+    o.ne_max = 3333;
+    optimize(best_metrics, o, false, false);
+  } else {
+    const auto eval = [&] (const Real c[3]) {
+      printf("c %1.16e %1.16e %1.16e\n", c[0], c[1], c[2]);
+      const auto uim = std::make_shared<Np4InterpMethod>(c[0], c[1], c[2]);
+      printf("eval_a(1/sqrt(5) - 1) %1.16e\n", uim->eval_a(oosqrt5-1));
+      Real metrics[3];
+      calc_metrics(*uim, metrics);
+      printf("npm l1 %1.4e l2 %1.4e li %1.4e\n", metrics[0], metrics[1], metrics[2]);
+      Options o;
+      o.ne_max = (argc > 1 && std::string(argv[1]) == "dense") ? 1024 : 111;
+      const auto mec = run_maxeigcomp(uim, o);
+      run_pum(uim, o);
+    };
+
+    // (1, 1, 1)     1.332e-15 pum 1.2516e-08 npm l1 1.5758e-02 l2 1.2782e-02 li 1.5109e-02
+    // (0, 0.306, 1) 1.110e-15 pum 9.9197e-09 npm l1 1.1611e-02 l2 9.0249e-03 li 9.0817e-03
+
+    {
+      Real c[3];
+      c[0] = c[1] = c[2] = 1;
+      eval(c);
+    }
+
+    {
+      const Real c[] = {0,0.306,1};
+      eval(c);
+    }
+  }
+}
diff --git a/methods/islet/search.cpp b/methods/islet/search.cpp
new file mode 100644
index 0000000..8c769e8
--- /dev/null
+++ b/methods/islet/search.cpp
@@ -0,0 +1,962 @@
+#ifndef SLMM_NP_GT_4
+# define SLMM_NP_GT_4
+#endif
+
+#include "islet_tables.hpp"
+#include "islet_npx.hpp"
+#include "islet_maxeigcomp.hpp"
+#include "islet_xnodes_metrics.hpp"
+#include "islet_pum.hpp"
+#include "islet_util.hpp"
+
+#include <omp.h>
+#include <array>
+#include <algorithm>
+
+class SearchAtom : public UserInterpMethod {
+public:
+  struct Input {
+    enum Basis { gll, uniform, legendre, cheb };
+
+    static const int np_max = 12;
+
+    Int np, nmodregions;
+    Basis basis;
+    Int stabnp[np_max], staboffset[np_max];
+    // Looking for at least this max |lambda| - 1.
+    Real maxeigampm1;
+    // Conclude the search when it succeeds with these parameters. The second is
+    // the number of eigenvalues to sample in (0, 1/2].
+    Int ne, neigdx;
+    bool quiet, unittest;
+
+    Input () { init(); }
+
+  private:
+    void init () {
+      quiet = unittest = false;
+      np = 6;
+      basis = gll;
+      nmodregions = 2;
+      stabnp[0] = 6;
+      stabnp[1] = 5;
+      staboffset[0] = staboffset[1] = 0;
+      maxeigampm1 = 1e-13;
+      ne = 1000;
+      neigdx = 1000;
+    }
+  };
+
+  SearchAtom (const Input& iin) {
+    reset(iin);
+    if (in.unittest)
+      std::cerr << (unittest() > 0 ? "FAIL" : "PASS")
+                << ": SearchAtom::unittest.\n";
+  }
+
+  explicit SearchAtom () {}
+
+  void reset (const Input& iin) {
+    in = iin;
+    switch (in.basis) {
+    case Input::gll: x_gll = islet::get_x_gll_special(in.np); break;
+    case Input::uniform: {
+      static Real x[islet::np_max];
+      x_gll = x;
+      for (Int i = 0; i < in.np; ++i)
+        x[i] = 2*(Real(i)/(in.np-1)) - 1;
+    } break;
+    case Input::legendre: x_gll = islet::get_x_gl(in.np); break;
+    case Input::cheb: {
+      static Real x[islet::np_max];
+      x_gll = x;
+      for (Int i = 0; i < in.np; ++i)
+        x[i] = -std::cos(M_PI*Real(2*(i+1) - 1)/Real(2*in.np));
+    } break;
+    }
+  }
+
+  Real run () {
+    return max_eig_amp.run(in.np, in.ne, in.neigdx, in.maxeigampm1,
+                           in.quiet, this);
+  }
+
+  MaxEigComputer::Analysis calc_max_vals (const Int& nmu, const Int& ndx) {
+    return max_eig_amp.calc_max_vals(nmu, ndx, in.np, this);
+  }
+
+  static int unittest () {
+    int nerr = 0;
+    Input in;
+    SearchAtom sa(in);
+    Real v0[32], v1[32];
+    const Int np = 6;
+    for (Int ix = 0, nx = 11; ix < nx; ++ix) {
+      const auto x = -1 + (2.0*ix)/nx;
+      npxstab<Real>::eval(np, x, v0);
+      sa.eval(x, v1);
+      for (Int j = 0; j < np; ++j)
+        if (v0[j] != v1[j])
+          ++nerr;
+    }
+    return nerr;
+  }
+
+  void eval (const Real& x, Real* const v) override {
+    eval(in.np, in.nmodregions, x_gll,
+         in.stabnp, in.staboffset,
+         x, v);
+  }
+
+  Int get_np () const override { return in.np; }
+  const Real* get_xnodes () const override { return x_gll; }
+
+private:
+  Input in;
+  MaxEigComputer max_eig_amp;
+  const Real* x_gll;
+  std::vector<Real> x_gll_buf;
+
+  static void eval (
+    const Int& np, const Int& nreg, const Real* x_gll,
+    const Int* const subnp, const Int* const os,
+    const Real& x, Real* const v)
+  {
+    if (x > 0) {
+      eval(np, nreg, x_gll, subnp, os, -x, v);
+      for (int i = 0; i < np/2; ++i)
+        std::swap(v[i], v[np-i-1]);
+      return;
+    }
+    bool done = false;
+    for (Int i = 0; i < nreg; ++i) {
+      if (x > x_gll[i+1]) continue;
+      assert(i == 0 || x >= x_gll[i]);
+      assert( ! done);
+      done = true;
+      if (subnp[i] == np) {
+        eval_lagrange_poly(x_gll, np, x, v);
+      } else {
+        std::fill(v, v + np, 0);
+        eval_lagrange_poly(x_gll + os[i], subnp[i], x, v + os[i]);
+      }
+      break;
+    }
+    if ( ! done)
+      eval_lagrange_poly(x_gll, np, x, v);
+  }
+};
+
+static void calc_wts_metrics (const Int np, const Real* wt,
+                              bool& all_pve_wts, Real& ratio) {
+  all_pve_wts = true;
+  for (Int i = 0; i < np; ++i) if (wt[i] <= 0) all_pve_wts = false;
+  Real wtmin = 10, wtmax = -1;
+  for (Int i = 0; i < np; ++i) wtmin = std::min(wtmin, wt[i]);
+  for (Int i = 0; i < np; ++i) wtmax = std::max(wtmax, wt[i]);
+  ratio = wtmax/wtmin;
+}
+
+static Real calc_pum_metric (UserInterpMethod& im, const bool threaded = true,
+                             const Real stop_if_above = 1e3) {
+  std::shared_ptr<UserInterpMethod> uim(&im, [] (UserInterpMethod*) {});
+  const auto wrapper = std::make_shared<InterpMethod>(uim);
+  pum::Options o;
+  o.threaded = threaded;
+  o.ntrial = 48;
+  o.perturb = 0.01;
+  Real pum_metric = 0;
+  for (const Int mec_ne: {2, 4, 8, 16, 32, 64})
+    for (const Int ne: {3, 5, 10}) {
+      o.mec_ne = mec_ne;
+      o.ne = ne;
+      pum::PerturbedUniformMeshMetric pum(wrapper, o);
+      const auto pum_metric_ne = pum.run(stop_if_above);
+      // If we stopped, then we have no idea what the actual pum is, so return 1
+      // to be safe.
+      if (pum_metric > stop_if_above) return 1;
+      pum_metric = std::max(pum_metric, pum_metric_ne);
+    }
+  return pum_metric;
+}
+
+// Restriction of nodal subset bases to an offset followed by adjacent nodes.
+namespace find_offset_nodal_subset_bases {
+static const int nps[] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16};
+
+struct Basis {
+  static const int M = islet::np_max;
+  std::array<int,M> subnp, offst;
+  int n;
+};
+
+bool same (const Basis& a, const Basis& b) {
+  assert(a.n == b.n);
+  bool s = true;
+  for (int i = 0; i < a.n; ++i)
+    if (a.subnp[i] != b.subnp[i] || a.offst[i] != b.offst[i]) {
+      s = false;
+      break;
+    }
+  return s;
+}
+
+static bool issymmetric (const int np, const int subnp, const int offst) {
+  const int d = np - subnp;
+  return ((d % 2 == 0) && offst == d/2);
+}
+
+static bool nodes_on_bdy (const SearchAtom::Input::Basis b) {
+  return b == SearchAtom::Input::gll || b == SearchAtom::Input::uniform;
+}
+
+void recur (const int np, const int min_np, const int min_np_pos,
+            std::vector<Basis>& good_bases, Basis b, const int pos,
+            SearchAtom& sa, const SearchAtom::Input::Basis basis,
+            MetricsTracker& mt) {
+  if (pos == -1) {
+    // Don't check if we've seen this basis is already good.
+    bool already = false;
+    for (const auto& gb : good_bases)
+      if (same(gb, b)) {
+        already = true;
+        break;
+      }
+    if (already) return;
+
+    SearchAtom::Input in;
+    in.quiet = true;
+    in.basis = basis;
+    in.np = np;
+    in.nmodregions = b.n;
+    for (int i = 0; i < b.n; ++i) in.stabnp[i] = b.subnp[i];
+    for (int i = 0; i < b.n; ++i) in.staboffset[i] = b.offst[i];
+    in.ne = 500;
+    in.neigdx = 500;
+    sa.reset(in);
+
+    Nodes nodes(np, nodes_on_bdy(basis)); {
+      assert(nodes.get_nh() == b.n);
+      std::vector<Int> ns;
+      ns.reserve(np);
+      for (Int ireg = 0; ireg < nodes.get_nh(); ++ireg) {
+        ns.resize(in.stabnp[ireg]);
+        for (Int i = 0; i < in.stabnp[ireg]; ++i)
+          ns[i] = in.staboffset[ireg] + i;
+        nodes.set(ireg, ns);
+      }
+    }
+    Real xnodes_metric[3];
+    calc_xnodes_metrics(nodes, sa.get_xnodes(), xnodes_metric);
+    if ( ! mt.acceptable_metrics(nodes, sa.get_xnodes(), xnodes_metric)) return;
+    const Real pum_to_accept = mt.pum_to_accept(nodes, sa.get_xnodes(),
+                                                xnodes_metric);
+    bool all_pve_wts = false;
+    Real wtr = 0; {
+      Real wt[islet::np_max];
+      calc_weights(nodes, sa.get_xnodes(), wt);
+      calc_wts_metrics(np, wt, all_pve_wts, wtr);
+    }
+    if ( ! all_pve_wts) return;
+
+    // Run the potentially expensive analysis.
+    Real maxeigampm1 = sa.run();
+    Real maxcondV = 0, maxdefub = 0;
+    Real pum_metric = 0;
+    if (maxeigampm1 <= in.maxeigampm1) {
+      pum_metric = calc_pum_metric(sa, true, pum_to_accept);
+      if ( ! mt.would_update(xnodes_metric, pum_metric)) return;
+      const auto max_vals = sa.calc_max_vals(1111, 1111);
+      maxeigampm1 = max_vals.max_eig_amp - 1;
+      maxcondV = max_vals.max_condv;
+      maxdefub = max_vals.max_defect_ub;
+    }
+    if (maxeigampm1 <= in.maxeigampm1) {
+      printf("meam1 %1.1e mcV %1.1e mdef %1.1e w>0 %d wtr %9.2e "
+             "npm %9.2e %9.2e %9.2e pum %9.2e",
+             maxeigampm1, maxcondV, maxdefub, all_pve_wts, wtr,
+             xnodes_metric[0], xnodes_metric[1], xnodes_metric[2],
+             pum_metric);
+      printf(" | np %2d subnp", np);
+      for (int i = 0; i < b.n; ++i) printf(" %d", b.subnp[i]);
+      printf(" offst");
+      for (int i = 0; i < b.n; ++i) printf(" %d", b.offst[i]);
+      printf("\n");
+      fflush(stdout);
+      good_bases.push_back(b);
+      mt.update(xnodes_metric, pum_metric);
+    }
+    return;
+  }
+  // Set up a basis. Avoid some, but not all, redundant trials by making one
+  // slot have np = min_np.
+  const bool middle = np % 2 == 0 && pos == np/2 - 1;
+  for (
+#if 0
+    int subnp = pos == min_np_pos ? min_np : np;
+    subnp >= min_np;
+    --subnp
+#else
+    int subnp = min_np;
+    subnp <= (pos == min_np_pos ? min_np : np);
+    ++subnp
+#endif
+       )
+    for (int offst = std::max(0, pos - subnp + 2);
+         offst <= std::min(pos, np - subnp);
+         ++offst) {
+      if (middle && ! issymmetric(np, subnp, offst)) continue;
+      b.subnp[pos] = subnp;
+      b.offst[pos] = offst;
+      recur(np, min_np, min_np_pos, good_bases, b, pos-1, sa, basis, mt);
+    }
+}
+
+static Int run (const int np, MetricsTracker::Ptr mt = nullptr,
+                const SearchAtom::Input::Basis basis = SearchAtom::Input::gll) {
+  if ( ! mt) mt = std::make_shared<MetricsTracker>(np);
+  const int min_ooa = 2; //np/2;
+  const int min_np_lim = min_ooa;
+  SearchAtom sa;
+  std::vector<Basis> good_bases;
+  printf("np %2d\n", np);
+  Int good = 0, max_good_np = -1;
+  // Search from highest- to lowest-OOA bases.
+  for (int min_np = np; min_np >= min_np_lim; --min_np) {
+    fprintf(stdout, "min_np %2d\n", min_np); fflush(stdout);
+    Basis b;
+    b.n = nodes_on_bdy(basis) ? np/2 : (np+2)/2;
+    for (int i = 0; i < b.n; ++i) b.subnp[i] = np;
+    for (int i = 0; i < b.n; ++i) b.offst[i] = 0;
+    for (int min_np_pos = 0; min_np_pos < b.n; ++min_np_pos)
+      recur(np, min_np, min_np_pos, good_bases, b, b.n-1, sa, basis, *mt);
+    if ( ! good_bases.empty()) {
+      max_good_np = std::max(max_good_np, min_np);
+      ++good;
+    }
+    // We don't want to reduce order to improve other heuristics; instead, we'll
+    // use the more general nodal subset basis if needed. So break at the
+    // highest min_np having a good basis.
+    if (good) break;
+  }
+  return max_good_np;
+}
+
+static void runall (const Int np,
+                    const SearchAtom::Input::Basis basis = SearchAtom::Input::gll,
+                    const MetricsTracker::Ptr& mt = nullptr) {
+  if (np <= -1)
+    for (int np : nps)
+      run(np, mt, basis);
+  else
+    run(np, mt, basis);
+}
+} // namespace find_offset_nodal_subset_bases
+
+// List of region's valid node supports. Use a vector of ints, where in an int,
+// the 0/1 pattern gives the nodes.
+struct ValidNodesList {
+  typedef std::shared_ptr<ValidNodesList> Ptr;
+
+  struct Iterator {
+    Iterator (const std::vector<int>& nodes, const int& ntot, const int& nsub,
+              const int& pin0)
+      : nodes_(nodes), ntot_(ntot), nsub_(nsub), pin0_(pin0), idx_(-1)
+    {}
+
+    Iterator (const std::vector<int>& nodes, bool)
+      : nodes_(nodes), ntot_(-1), nsub_(-1), pin0_(-1), idx_(nodes.size())
+    {}
+
+    Iterator operator++ () {
+      int idx;
+      for (idx = idx_+1; idx < static_cast<int>(nodes_.size()); ++idx)
+        if (valid(nodes_[idx], pin0_))
+          break;
+      idx_ = idx;
+      return *this;
+    }
+
+    bool operator== (const Iterator& it) const { return idx_ == it.idx_; }
+    bool operator!= (const Iterator& it) const { return ! (*this == it); }
+
+    // Convert a bit string to a list of element node indices.
+    void get_nodes (int* nodes) {
+      assert(idx_ < static_cast<int>(nodes_.size()));
+      const auto nodemask = nodes_[idx_];
+      int k = 0;
+      for (int i = 0; i < ntot_; ++i)
+        if (nodemask & (1 << i))
+          nodes[k++] = i;
+      assert(k == nsub_);
+    }
+
+  private:
+    const std::vector<int>& nodes_;
+    const int ntot_, nsub_, pin0_;
+    int idx_;
+
+    // Do the selected nodes include the two bounding the region?
+    static bool valid (const int nodemask, const int pin0) {
+      const int pinmask = 3 << pin0;
+      return (nodemask & pinmask) == pinmask;
+    }
+  };
+
+  ValidNodesList (const int ntot, const int nsub,
+                  const bool symmetric = false) {
+    assert(ntot <= islet::np_max);
+    init(ntot, nsub, symmetric);
+  }
+
+  void init (const int ntot, const int nsub, const bool symmetric) {
+    ntot_ = ntot;
+    nsub_ = nsub;
+    symmetric_ = symmetric;
+    for (int i = 0; i < (1 << ntot); ++i)
+      if (num1s(i) == nsub && ( ! symmetric_ || issymmetric(i, ntot)))
+        nodes_.push_back(i);
+    //pr(puf(ntot) pu(nsub) pu(nodes_.size()));
+  }
+
+  Iterator begin (const int& pin0) const {
+    Iterator it(nodes_, ntot_, nsub_, pin0);
+    ++it;
+    return it;
+  }
+
+  Iterator end () const { return Iterator(nodes_, true); }
+
+  static Int test () {
+    if ( ! (issymmetric(1, 1) && issymmetric(3, 2) && !issymmetric(3, 3) &&
+            issymmetric(0x65a6, 16) && !issymmetric(0x65a6, 20))) {
+      std::cerr << "ValidNodesList::test FAILed.\n";
+      return 1;
+    }
+    return 0;
+  }
+
+private:
+  int ntot_, nsub_;
+  bool symmetric_;
+  std::vector<int> nodes_;
+
+  // Number of 1 bits in n.
+  static int num1s (int n) {
+    int cnt = 0;
+    while (n) {
+      if (1 & n) ++cnt;
+      n = n >> 1;
+    }
+    return cnt;
+  }
+
+  static bool issymmetric (const int n, const int nslot) {
+    int nd = n, rev = 0;
+    for (int i = 0; i < nslot; ++i) {
+      rev = rev << 1;
+      if (1 & nd) rev = (rev | 1);
+      nd = nd >> 1;
+    }
+    return rev == n;
+  }
+};
+
+namespace find_nodal_subset_bases {
+struct NsbSearchAtom : public UserInterpMethod {
+  struct Input {
+    static const int np_max = islet::np_max;
+    typedef char SInt;
+
+    SInt np;
+    SInt nodes[np_max-1][np_max];
+    SInt subnp[np_max-1];
+    Real maxeigampm1;
+    Int ne, neigdx;
+    bool quiet;
+
+    Input () {
+      np = -1;
+      maxeigampm1 = 1e-13;
+      ne = 1111;
+      neigdx = ne;
+      quiet = true;
+    }
+  };
+
+  NsbSearchAtom (const bool mea_threaded = false)
+    : max_eig_amp_(mea_threaded)
+  {}
+
+  Real run (const Input& in,
+            bool& all_pve_wts, Real& wtr,
+            MetricsTracker& mt, Real* metrics, Real& pum_metric) {
+    in_ = in;
+    Nodes nodes(in.np);
+    for (Int i = 0; i < nodes.get_nh(); ++i)
+      nodes.set(i, in.nodes[i], in.subnp[i]);
+    assert(nodes.ok_to_eval());
+    calc_xnodes_metrics(nodes, get_xnodes(), metrics);
+    if ( ! mt.acceptable_metrics(nodes, get_xnodes(), metrics)) return 2;
+    const Real pum_to_accept = mt.pum_to_accept(nodes, get_xnodes(), metrics);
+    {
+      Real wt[islet::np_max];
+      calc_weights(nodes, get_xnodes(), wt);
+      calc_wts_metrics(in.np, wt, all_pve_wts, wtr);
+      if ( ! all_pve_wts) return 1;
+    }
+    Int ne, neigdx;
+    ne = neigdx = 11;
+    auto maxeigampm1 = max_eig_amp_.run(in.np, ne, neigdx, in.maxeigampm1,
+                                        in.quiet, this);
+    if (maxeigampm1 > in.maxeigampm1) return maxeigampm1;
+    pum_metric = calc_pum_metric(*this, max_eig_amp_.is_threaded(),
+                                 pum_to_accept);
+    if ( ! mt.would_update(metrics, pum_metric)) return 2;
+    return max_eig_amp_.run(in.np, in.ne, in.neigdx, in.maxeigampm1,
+                            in.quiet, this);
+  }
+
+  void eval (const Real& x, Real* const v) override {
+    eval(in_.np, in_.nodes, in_.subnp, x, v);
+  }
+
+  Int get_np () const override { return in_.np; }
+
+  const Real* get_xnodes () const override {
+    return islet::get_x_gll(in_.np);
+  }
+
+  static void eval (
+    const Int& np, const Input::SInt nodes[][Input::np_max],
+    const Input::SInt subnp[], const Real& x, Real* const v)
+  {
+    if (x > 0) {
+      eval(np, nodes, subnp, -x, v);
+      for (int i = 0; i < np/2; ++i)
+        std::swap(v[i], v[np-i-1]);
+      return;
+    }
+    const auto x_gll = islet::get_x_gll(np);
+    Real xsub[Input::np_max], vsub[Input::np_max];
+    for (Int i = 0; i < np-1; ++i) {
+      if (i < np-2 && x > x_gll[i+1]) continue;
+      if (subnp[i] == np) {
+        eval_lagrange_poly(x_gll, np, x, v);
+      } else {
+#ifndef NDEBUG
+        { // Subregion's nodes must be included for the basis to be
+          // interpolatory.
+          int fnd = 0;
+          for (Int j = 0; j < subnp[i]; ++j) {
+            const auto nij = nodes[i][j];
+            if (nij == i || nij == i+1) ++fnd;
+          }
+          if (fnd != 2) {
+            pr(puf(np) pu(i) pu(x));
+            islet::prarr("nodes[i]", nodes[i], subnp[i]);
+          }
+          assert(fnd == 2);
+        }
+#endif
+        for (Int j = 0; j < subnp[i]; ++j)
+          xsub[j] = x_gll[nodes[i][j]];
+        // Lagrange polynomial basis.
+        std::fill(v, v + np, 0);
+        eval_lagrange_poly(xsub, subnp[i], x, vsub);
+        for (Int j = 0; j < subnp[i]; ++j)
+          v[nodes[i][j]] = vsub[j];
+      }
+      break;
+    }
+  }
+
+private:
+  Input in_;
+  MaxEigComputer max_eig_amp_;
+};
+
+static const int nps[] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16};
+
+struct Basis {
+  static const int np_max = NsbSearchAtom::Input::np_max;
+  int np;
+  std::array<int,np_max-1> subnp;
+  std::array<std::array<int,np_max>,np_max-1> nodes;
+};
+
+struct Count {
+  typedef std::shared_ptr<Count> Ptr;
+  bool just_count;
+  size_t value, total;
+  Count () : just_count(true), value(0), total(0) {}
+};
+
+void randperm (Int* const p, const Int n) {
+  using islet::urand;
+  for (size_t i = 0; i < n; ++i) p[i] = i;
+  for (size_t i = 0; i < 5*n; ++i) {
+    const int j = urand()*n, k = urand()*n;
+    std::swap(p[j], p[k]);
+  }
+}
+
+void make_subnp_list_recur (const Int np, const Int np_min,
+                            std::vector<char>& subnp_list,
+                            char* subnp, const Int pos) {
+  for (Int np_pos = np_min; np_pos <= np; ++np_pos) {
+    subnp[pos] = np_pos;
+    if (pos > 0)
+      make_subnp_list_recur(np, np_min, subnp_list, subnp, pos-1);
+    else
+      for (Int j = 0; j < np/2; ++j)
+        subnp_list.push_back(subnp[j]);
+  }
+}
+
+void make_subnp_list (const Int np, const Int np_min,
+                      std::vector<char>& subnp_list) {
+  char subnp[islet::np_max];
+  make_subnp_list_recur(np, np_min, subnp_list, subnp, np/2-1);
+}
+
+struct Restarter {
+  typedef std::shared_ptr<Restarter> Ptr;
+  MetricsTracker::Ptr mt;
+  int np, min_ooa;
+  size_t dont_eval_if_below, eval_count;
+
+  Restarter (const MetricsTracker::Ptr& mt_, const int np_, const int min_ooa_,
+             const int dont_eval_if_below_)
+    : mt(mt_), np(np_), min_ooa(min_ooa_), dont_eval_if_below(dont_eval_if_below_),
+      eval_count(0)
+  {}
+
+  bool write(std::string filename = "");
+};
+
+bool Restarter::write (std::string filename) {
+  using islet::write;
+  if (filename == "") {
+    std::stringstream ss;
+    ss << "NsbSearchAtomRestart_np" << np << ".dat";
+    filename = ss.str();
+  }
+  std::ofstream os(filename.c_str(), std::ofstream::binary);
+  assert(mt);
+  return (write(os, np) &&
+          write(os, min_ooa) &&
+          write(os, eval_count) && // new dont_eval_if_below value
+          write(os, eval_count) &&
+          mt->write(os));
+}
+
+static Restarter::Ptr read_restart (const int np) {
+  using islet::read;
+  std::stringstream ss;
+  ss << "NsbSearchAtomRestart_np" << np << ".dat";
+  std::ifstream is(ss.str().c_str(), std::ofstream::binary);
+  if ( ! is.is_open()) return nullptr;
+  const auto mt = std::make_shared<MetricsTracker>(np);
+  const auto r = std::make_shared<Restarter>(mt, np, 0, 0);
+  int lnp;
+  const bool ok = (read(is, r->np) && r->np == np &&
+                   read(is, r->min_ooa) &&
+                   read(is, r->dont_eval_if_below) &&
+                   read(is, r->eval_count) &&
+                   r->mt->read(is));
+  if ( ! ok) return nullptr;
+  return r;
+}
+
+void eval (std::vector<NsbSearchAtom>& esa,
+           const std::vector<NsbSearchAtom::Input>& input_list,
+           const Int ninput, MetricsTracker& mt, const bool show_progress,
+           const Count::Ptr& count) {
+  if (count) count->value += ninput;
+  if (count && count->just_count) return;
+  if (count)
+    printf("NsbSearchAtom::eval %ld/%ld (%5.1f%%)\n",
+           count->value, count->total, 100*Real(count->value)/count->total);
+  std::vector<Int> perm(ninput);
+  randperm(perm.data(), ninput);
+  Real progress = 0;
+  const auto run1 = [&] (const Int ili) {
+    {
+      const Real pdelta = 0.05;
+      const Real p = Real(ili)/ninput;
+      if (show_progress && p >= progress + pdelta) {
+#       pragma omp critical (NsbSearchAtom_progress)
+        {
+          const Real p = Real(ili)/ninput;
+          if (p >= progress + pdelta) {
+            printf("progress: %5.1f%% (%8d)\n", 100*p, ili);
+            progress = p;
+          }
+        }
+      }
+    }
+    const auto& in = input_list[perm[ili]];
+    auto& my_esa = esa[omp_get_thread_num()];
+    bool all_pve_wts;
+    Real wtr, xnodes[islet::np_max], metrics[3], pum_metric;
+    const auto maxeigampm1 = my_esa.run(in, all_pve_wts, wtr, mt, metrics,
+                                        pum_metric);
+    if (maxeigampm1 > in.maxeigampm1) return;
+    {
+      Basis b;
+      b.np = in.np;
+      for (int i = 0; i < b.np-1; ++i)
+        b.subnp[i] = in.subnp[i];
+      for (int i = 0; i < b.np-1; ++i)
+        for (int j = 0; j < b.subnp[i]; ++j)
+          b.nodes[i][j] = in.nodes[i][j];
+#     pragma omp critical (NsbSearchAtom_eval)
+      {
+        mt.update(metrics, pum_metric);
+        const int n = in.np/2;
+        printf("meam1 %9.2e w>0 %d wtr %8.2e npm %8.2e %8.2e %8.2e pum %9.2e | ",
+               maxeigampm1, all_pve_wts, wtr, metrics[0], metrics[1], metrics[2],
+               pum_metric);
+        printf("np %2d subnp", in.np);
+        for (int i = 0; i < n; ++i) printf(" %d", in.subnp[i]);
+        printf(" nodes");
+        for (int i = 0; i < n; ++i) {
+          printf(" |");
+          for (int j = 0; j < in.subnp[i]; ++j)
+            printf(" %d", in.nodes[i][j]);
+        }
+        printf("\n");
+      }
+    }
+  };
+
+  if (esa.size() > 1) {
+#   pragma omp parallel for schedule(dynamic,1)
+    for (Int ili = 0; ili < ninput; ++ili)
+      run1(ili);
+  } else {
+    for (Int ili = 0; ili < ninput; ++ili)
+      run1(ili);
+  }
+}
+
+void recur (const int np, const std::vector<ValidNodesList::Ptr>& vnls,
+            const std::vector<ValidNodesList::Ptr>& vnls_mid_reg,
+            Basis b, const int pos, std::vector<NsbSearchAtom>& esa,
+            std::vector<NsbSearchAtom::Input>& input_list,
+            Int& input_list_pos, MetricsTracker& mt, const Count::Ptr& count,
+            Restarter& restarter) {
+  if (pos == -1) {
+    NsbSearchAtom::Input& in = input_list[input_list_pos];
+    in.np = np;
+    in.maxeigampm1 = 1e-13;
+    for (int i = 0; i < b.np-1; ++i)
+      in.subnp[i] = b.subnp[i];
+    for (int i = 0; i < b.np-1; ++i)
+      for (int j = 0; j < b.subnp[i]; ++j)
+        in.nodes[i][j] = b.nodes[i][j];
+    ++input_list_pos;
+    if (input_list_pos == input_list.size()) {
+      if (restarter.eval_count >= restarter.dont_eval_if_below) {
+        // Run a bunch of analyses in parallel.
+        eval(esa, input_list, input_list_pos, mt, false, count);
+        if ( ! (count && count->just_count)) {
+          if ( ! count)
+            printf("restart eval_count %ld\n", restarter.eval_count);
+          ++restarter.eval_count;
+          restarter.write();
+        }
+      } else {
+        if (count) count->value += input_list_pos;
+        if ( ! (count && count->just_count)) ++restarter.eval_count;
+      }
+      input_list_pos = 0;
+    }
+    return;
+  }
+  // Set up a basis.
+  const auto& vnl = (np % 2 == 0 && pos == np/2-1) ?
+    vnls_mid_reg[b.subnp[pos]] : vnls[b.subnp[pos]];
+  for (auto it = vnl->begin(pos); it != vnl->end(); ++it) {
+    it.get_nodes(b.nodes[pos].data());
+    recur(np, vnls, vnls_mid_reg, b, pos-1, esa, input_list, input_list_pos,
+          mt, count, restarter);
+  }
+}
+
+static Int run (const int np, int min_ooa = -1,
+                MetricsTracker::Ptr mt = nullptr,
+                const Count::Ptr count = nullptr,
+                const int dont_eval_if_below = 0) {
+  assert(np <= NsbSearchAtom::Input::np_max);
+  if ( ! mt) mt = std::make_shared<MetricsTracker>(np);
+  const bool thread_toplevel = np >= 9;
+  std::vector<NsbSearchAtom> esa;
+  if (thread_toplevel)
+    esa.resize(omp_get_max_threads());
+  else
+    esa.emplace_back(true);
+  std::vector<NsbSearchAtom::Input> input_list(thread_toplevel ?
+                                               1 << 22 :
+                                               1 << 12);
+  Int input_list_pos = 0;
+  const bool just_count = count && count->just_count;
+  if ( ! just_count) printf("np %2d\n", np);
+  if (min_ooa <= 0) {
+    if (np == 5)      min_ooa = 2;
+    else if (np <= 6) min_ooa = np-2;
+    else if (np <= 9) min_ooa = np-3;
+    else              min_ooa = np-4;
+  }
+  std::vector<ValidNodesList::Ptr> vnls(np+1), vnls_mid_reg(np+1);
+  for (int min_np = np; min_np >= min_ooa+1; --min_np)
+    vnls[min_np] = std::make_shared<ValidNodesList>(np, min_np);
+  for (int min_np = np; min_np >= min_ooa+1; --min_np)
+    vnls_mid_reg[min_np] = std::make_shared<ValidNodesList>(np, min_np, true);
+  std::vector<char> subnp_list;
+  make_subnp_list(np, min_ooa+1, subnp_list);
+  Restarter restarter(mt, np, min_ooa, dont_eval_if_below);
+  const Int sz = np/2;
+  const Int n = subnp_list.size()/sz;
+  for (Int isubnp = 0; isubnp < n; ++isubnp) {
+    const char* subnp = &subnp_list[isubnp*sz];
+    Basis b;
+    b.np = np;
+    for (Int j = 0; j < sz; ++j) {
+      b.subnp[j] = subnp[j];
+      b.subnp[np-2-j] = subnp[j];
+    }
+    recur(np, vnls, vnls_mid_reg, b, b.np/2 - 1, esa,
+          input_list, input_list_pos, *mt, count, restarter);
+  }
+  if (input_list_pos > 0)
+    eval(esa, input_list, input_list_pos, *mt, true, count);
+}
+
+static void run () {
+  for (int np : nps) run(np);
+}
+} // namespace find_nodal_subset_bases
+
+static void find_nodal_subset_bases_given_mt (
+  const int np, const MetricsTracker::Ptr& mt, const int min_ooa = -1,
+  const int eval_count = 0)
+{
+  if (np > 10) {
+    find_nodal_subset_bases::run(np, min_ooa, mt, nullptr, eval_count);
+    return;
+  }
+  const auto count = std::make_shared<find_nodal_subset_bases::Count>();
+  find_nodal_subset_bases::run(np, min_ooa, mt, count);
+  printf("count %ld\n", count->value);
+  count->total = count->value;
+  count->value = 0;
+  count->just_count = false;
+  find_nodal_subset_bases::run(np, min_ooa, mt, count, eval_count);
+}
+
+static void find_nodal_given_best_offset_nodal (
+  const int np, const bool restart_if_available = true)
+{
+  find_nodal_subset_bases::Restarter::Ptr restarter;
+  if (restart_if_available)
+    restarter = find_nodal_subset_bases::read_restart(np);
+  if (restarter) {
+    find_nodal_subset_bases_given_mt(np, restarter->mt, restarter->min_ooa,
+                                     restarter->eval_count);
+  } else {
+    const auto mt = std::make_shared<MetricsTracker>(np);
+    const Int max_good_np = find_offset_nodal_subset_bases::run(np, mt);
+    const Int min_ooa = max_good_np-1;
+    mt->set_pum_max(mt->get_pum_min());
+    find_nodal_subset_bases_given_mt(np, mt, min_ooa);
+  }
+}
+
+static void run_general_unittests () {
+  int nerr = 0;
+  {
+    for (int np = 2; np <= 7; ++np) {
+      const auto x = islet::get_x_gll(np);
+      const auto w = islet::get_w_gll(np);
+      Real sum = 0;
+      for (int j = 0; j < np; ++j) sum += w[j];
+      if (islet::reldif(2, sum) >= 1e-14) ++nerr;
+      for (int j = 0; j < np/2; ++j)
+        if (w[j] != w[np-j-1]) ++nerr;
+      for (int j = 0; j < np/2; ++j)
+        if (x[j] != -x[np-j-1]) ++nerr;
+      for (int j = 0; j < np-1; ++j)
+        if (x[j+1] < x[j]) ++nerr;
+    }
+  }
+  nerr += MaxEigComputer::unittest();
+  nerr += SearchAtom::unittest();
+  nerr += ValidNodesList::test();
+  std::cout << (nerr ? "FAIL" : "PASS") << " unit test\n";
+}
+
+struct Command {
+  enum Enum { unittest, findoffsetnodal, findnodal,
+              finduniform, findlegendre, findcheb,
+              findnodal_given_bestosn};
+  static Enum convert (const std::string& s) {
+    if (s == "unittest") return unittest;
+    if (s == "findoffsetnodal") return findoffsetnodal;
+    if (s == "findnodal") return findnodal;
+    if (s == "findnodal_given_bestosn") return findnodal_given_bestosn;
+    if (s == "finduniform") return finduniform;
+    if (s == "findlegendre") return findlegendre;
+    if (s == "findcheb") return findcheb;
+    throw std::logic_error("Not a command.");
+  }
+};
+
+int main (int argc, char** argv) {
+  if (argc < 2) {
+    std::cerr << argv[0] << " <command> <options>\n";
+    return -1;
+  }
+
+  using NosbBasis = SearchAtom::Input;
+
+  const auto command = Command::convert(argv[1]);
+  bool unittest = false;
+  Int np = -1;
+  if (argc > 2) np = std::atoi(argv[2]);
+  switch (command) {
+  case Command::unittest: {
+    unittest = true;
+  } break;
+  case Command::findoffsetnodal: {
+    // Pretty efficient search for stable offset nodal bases.
+    find_offset_nodal_subset_bases::runall(np);
+  } break;
+  case Command::findnodal: {
+    // Search for stable nodal subset bases.
+    const auto restarter = find_nodal_subset_bases::read_restart(np);
+    if (restarter)
+      find_nodal_subset_bases_given_mt(np, restarter->mt, restarter->min_ooa,
+                                       restarter->eval_count);
+    else
+      find_nodal_subset_bases_given_mt(np, nullptr);
+  } break;
+  case Command::findnodal_given_bestosn: {
+    if (np == -1) {
+      std::cerr << argv[0] << " findnodal_given_bestosn np\n";
+      return -1;      
+    }
+    find_nodal_given_best_offset_nodal(np);
+  } break;
+  case Command::finduniform:
+    find_offset_nodal_subset_bases::runall(np, NosbBasis::uniform); break;
+  case Command::findlegendre:
+    find_offset_nodal_subset_bases::runall(np, NosbBasis::legendre); break;
+  case Command::findcheb:
+    find_offset_nodal_subset_bases::runall(np, NosbBasis::cheb); break;
+  default:
+    throw std::logic_error("Not a command.");
+  }
+  if (unittest) run_general_unittests();
+}
diff --git a/methods/slmm/Makefile b/methods/slmm/Makefile
index 0a231de..28f0f15 100644
--- a/methods/slmm/Makefile
+++ b/methods/slmm/Makefile
@@ -59,30 +59,5 @@ libslmm: $(SLMM_OBJECTS) slmm_c_compat.o
 clean:
 	rm -f *.o *.mod slmm_test slmmir libslmm.so *.gcov *.gcda *.gcno
 
-slmm_test.o: slmm_defs.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_islet.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_sqr.hpp
-slmmir.o: slmm_defs.hpp slmm_util.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_nla.hpp slmm_spf.hpp slmm_fit_extremum.hpp slmmir_time_int.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_quadrature.hpp $(SIQK)/siqk_sqr.hpp slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmm_islet.hpp slmm_accum.hpp slmm_vis.hpp slmmir_time_int.hpp
-slmmir_time_int.o: slmm_gallery.hpp slmm_time_int.hpp slmmir_p_refine.hpp slmmir_time_int.hpp
-slmmir_time_int_exp.o: slmm_gallery.hpp slmm_time_int.hpp
-slmm_test.o: slmm_defs.hpp slmm_util.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_nla.hpp slmm_spf.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_sqr.hpp
-slmm_mesh.o: slmm_mesh.hpp $(SIQK)/siqk.hpp slmm_array.hpp
-slmm_spf.o: slmm_spf.hpp $(SIQK)/siqk.hpp
-slmm_io.o: slmm_io.hpp
-slmm_nla.o: slmm_nla.hpp
-slmm_time_int.o: slmm_time_int.hpp
-slmm_gallery.o: slmm_gallery.hpp
-slmm_util.o: slmm_util.hpp
-slmmir_remap_data.o: slmmir_remap_data.hpp
-slmmir_p_refine.o: slmmir_p_refine.hpp
-slmmir_mono_data.o: slmmir_mono_data.hpp
-slmmir_remapper.o: slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_util.hpp slmmir.hpp slmmir_physgrid.hpp
-slmmir_remapper_isl.o: slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_p_refine.hpp slmm_islet.hpp slmmir.hpp slmmir_physgrid.hpp slmm_accum.hpp
-slmmir_util.o: slmmir_util.hpp
-slmmir_p_refine.o: slmmir_p_refine.hpp slmm_islet.hpp
-slmm_islet.o: slmm_islet.hpp
-slmm_islet_string.o: slmm_islet.hpp
-slmm_accum.o: slmm_accum.hpp
-slmm_vis.o: slmm_vis.hpp
-slmmir_physgrid.o: slmmir_physgrid.hpp
-physgrid.o: slmmir_physgrid.hpp
-slmm_basis.o: slmm_basis.hpp
-slmm_basis_reduced.o: slmm_islet.hpp slmm_basis_reduced.hpp
+# generate by running `bash make.depends`
+include make.depends
diff --git a/methods/slmm/make-depends.sh b/methods/slmm/make-depends.sh
new file mode 100644
index 0000000..0bb5cf3
--- /dev/null
+++ b/methods/slmm/make-depends.sh
@@ -0,0 +1,3 @@
+for i in *.cpp; do
+    g++ -I../../siqk -MM $i
+done > make.depends
diff --git a/methods/slmm/make.depends b/methods/slmm/make.depends
new file mode 100644
index 0000000..b938f11
--- /dev/null
+++ b/methods/slmm/make.depends
@@ -0,0 +1,220 @@
+physgrid.o: physgrid.cpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_mesh.hpp slmm_array_tree.hpp \
+ slmm_vis.hpp slmmir_util.hpp slmmir_mesh.hpp slmmir.hpp slmm_util.hpp \
+ slmm_spf.hpp slmmir_physgrid.hpp slmm_nla.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmmir_d2c.hpp
+slmm_accum.o: slmm_accum.cpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_accum.hpp
+slmm_array_tree.o: slmm_array_tree.cpp slmm_array_tree.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp
+slmm_basis.o: slmm_basis.cpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gll.hpp slmm_islet.hpp \
+ slmm_basis_reduced.hpp slmm_util.hpp
+slmm_basis_reduced.o: slmm_basis_reduced.cpp slmm_basis_reduced.hpp \
+ slmm_islet.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp
+slmm_c_compat.o: slmm_c_compat.cpp slmm_c_compat.hpp slmm_spf.hpp \
+ slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gll.hpp slmm_basis.hpp \
+ slmm_mesh.hpp slmm_array_tree.hpp slmm_util.hpp
+slmm_fit_extremum.o: slmm_fit_extremum.cpp slmm_fit_extremum.hpp \
+ slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_nla.hpp slmm_util.hpp \
+ slmm_gll.hpp slmm_basis.hpp ../../siqk/siqk_geometry.hpp
+slmm_gallery.o: slmm_gallery.cpp slmm_gallery.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_time_int.hpp slmm_util.hpp
+slmm_io.o: slmm_io.cpp slmm_io.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_util.hpp
+slmmir.o: slmmir.cpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_mesh.hpp slmm_array_tree.hpp slmm_spf.hpp slmm_gll.hpp \
+ slmm_basis.hpp slmm_io.hpp slmm_nla.hpp slmm_util.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmm_accum.hpp slmm_debug.hpp slmm_fit_extremum.hpp \
+ slmm_vis.hpp slmmir_util.hpp slmmir_mesh.hpp slmmir.hpp \
+ slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_remapper.hpp \
+ slmmir_p_refine.hpp slmmir_physgrid.hpp slmmir_d2c.hpp \
+ slmmir_time_int.hpp slmmir_lauritzen_diag.hpp slmmir_snapshot.hpp
+slmmir_d2c.o: slmmir_d2c.cpp slmmir_d2c.hpp slmmir.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp slmm_spf.hpp
+slmmir_lauritzen_diag.o: slmmir_lauritzen_diag.cpp slmmir_p_refine.hpp \
+ slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \
+ slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_lauritzen_diag.hpp \
+ slmmir_d2c.hpp
+slmmir_mono_data.o: slmmir_mono_data.cpp slmmir_mono_data.hpp \
+ slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_gallery.hpp slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp \
+ slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp
+slmmir_physgrid.o: slmmir_physgrid.cpp slmmir_physgrid.hpp slmm_nla.hpp \
+ slmm_array.hpp slmm_util.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmm_vis.hpp slmmir.hpp slmm_spf.hpp slmmir_d2c.hpp \
+ slmmir_mesh.hpp slmm_basis.hpp slmm_mesh.hpp slmm_array_tree.hpp \
+ slmm_gll.hpp slmm_io.hpp slmm_debug.hpp slmm_basis_reduced.hpp \
+ slmm_islet.hpp slmmir_util.hpp
+slmmir_p_refine.o: slmmir_p_refine.cpp slmmir_p_refine.hpp \
+ slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \
+ slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_util.hpp
+slmmir_remap_data.o: slmmir_remap_data.cpp slmmir_remap_data.hpp \
+ slmm_nla.hpp slmm_array.hpp slmm_util.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmmir.hpp slmm_spf.hpp slmmir_mesh.hpp \
+ slmm_basis.hpp slmmir_util.hpp slmm_gll.hpp
+slmmir_remapper.o: slmmir_remapper.cpp slmm_mesh.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp \
+ slmmir_remapper.hpp slmm_fit_extremum.hpp slmmir_p_refine.hpp \
+ slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \
+ slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_physgrid.hpp slmm_vis.hpp \
+ slmmir_d2c.hpp slmmir_util.hpp
+slmmir_remapper_isl.o: slmmir_remapper_isl.cpp slmm_mesh.hpp \
+ slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp \
+ slmm_accum.hpp slmmir_remapper.hpp slmm_fit_extremum.hpp \
+ slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp \
+ slmm_gallery.hpp slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp \
+ slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp \
+ slmmir_physgrid.hpp slmm_vis.hpp slmmir_d2c.hpp slmmir_util.hpp
+slmmir_snapshot.o: slmmir_snapshot.cpp slmmir_snapshot.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp slmm_gallery.hpp \
+ slmm_time_int.hpp slmmir_mesh.hpp slmm_basis.hpp slmmir_p_refine.hpp \
+ slmmir_mono_data.hpp slmm_gll.hpp slmmir_remap_data.hpp slmm_nla.hpp \
+ slmmir.hpp slmm_spf.hpp slmm_accum.hpp
+slmmir_test.o: slmmir_test.cpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_mesh.hpp slmm_array_tree.hpp slmm_debug.hpp
+slmmir_time_int.o: slmmir_time_int.cpp slmmir_time_int.hpp \
+ slmm_time_int.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_util.hpp slmm_gallery.hpp slmmir_mesh.hpp slmm_basis.hpp \
+ slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp \
+ slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp
+slmmir_time_int_exp.o: slmmir_time_int_exp.cpp slmmir_time_int.hpp \
+ slmm_time_int.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_util.hpp slmm_gallery.hpp slmmir_mesh.hpp slmm_basis.hpp \
+ slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp \
+ slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp
+slmmir_util.o: slmmir_util.cpp slmmir_util.hpp slmm_basis.hpp \
+ slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmmir_mesh.hpp slmmir.hpp \
+ slmm_util.hpp slmm_spf.hpp slmm_gll.hpp
+slmm_islet.o: slmm_islet.cpp slmm_islet.hpp slmm_gll.hpp slmm_basis.hpp \
+ slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp
+slmm_islet_string.o: slmm_islet_string.cpp slmm_islet.hpp slmm_gll.hpp \
+ slmm_basis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_util.hpp
+slmm_mesh.o: slmm_mesh.cpp slmm_mesh.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp slmm_gll.hpp \
+ slmm_basis.hpp slmm_util.hpp slmm_islet.hpp
+slmm_nla.o: slmm_nla.cpp slmm_util.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_nla.hpp
+slmm_spf.o: slmm_spf.cpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_spf.hpp slmm_nla.hpp \
+ slmm_util.hpp slmm_mesh.hpp slmm_array_tree.hpp slmm_accum.hpp
+slmm_spf_lqlt.o: slmm_spf_lqlt.cpp slmm_spf.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp
+slmm_test.o: slmm_test.cpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_mesh.hpp slmm_array_tree.hpp slmm_io.hpp slmm_nla.hpp slmm_util.hpp \
+ slmm_spf.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_debug.hpp \
+ slmm_fit_extremum.hpp slmm_gll.hpp slmm_basis.hpp slmm_islet.hpp \
+ slmm_basis_reduced.hpp
+slmm_time_int.o: slmm_time_int.cpp slmm_time_int.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp
+slmm_util.o: slmm_util.cpp slmm_util.hpp slmm_defs.hpp \
+ ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \
+ ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \
+ ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \
+ ../../siqk/siqk_sqr.hpp slmm_array.hpp
+slmm_vis.o: slmm_vis.cpp slmm_vis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \
+ ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \
+ ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \
+ slmm_io.hpp slmm_util.hpp ../../siqk/siqk_search.hpp \
+ ../../siqk/siqk_geometry.hpp
diff --git a/methods/slmm/slmm_basis.cpp b/methods/slmm/slmm_basis.cpp
index 5ba6ed9..dca8142 100644
--- a/methods/slmm/slmm_basis.cpp
+++ b/methods/slmm/slmm_basis.cpp
@@ -108,7 +108,7 @@ Int Basis::compute_and_print_weights (const Basis& basis, bool print_x, bool tes
   const Real* xnode;
   for (Int np = 2; np <= Basis::np_max; ++np) {
     // Basis need not support every np.
-    if ( ! basis.get_x(np, xnode)) continue;
+    if ( ! basis.get_w(np, xnode)) continue;
     Real integral[np_max] = {0};
     compute_weights(basis, np, integral);
     if ( ! test) {
diff --git a/methods/slmm/slmm_islet.cpp b/methods/slmm/slmm_islet.cpp
index c24a10f..6584731 100644
--- a/methods/slmm/slmm_islet.cpp
+++ b/methods/slmm/slmm_islet.cpp
@@ -28,55 +28,6 @@ Int GllOffsetNodal::max_degree (const Int& np) const {
   return degrees[np];
 }
 
-static Real normalize_x (const Real* gll_x, const Real& x) {
-  const Real x0 = gll_x[1];
-  return (x - x0) / (1 - x0);
-}
-
-static void outer_eval (const Real* gll_x, const Real& x, Real v[4]) {
-  const Real
-    xbar = normalize_x(gll_x, gll_x[2]),
-    ooxbar = 1 / xbar,
-    ybar = 1 / (xbar - 1),
-    xn = normalize_x(gll_x, x);
-  v[0] = 0;
-  v[1] = 1 + ybar*xn*((1 - ooxbar)*xn + ooxbar - xbar);
-  v[2] = ybar*ooxbar*xn*(xn - 1);
-  v[3] = ybar*xn*(xbar - xn);
-}
-
-#if 0
-static bool np4_subgrid_eval (const Real* const x_gll, const Real& x,
-                              Real y[4]) {
-  static constexpr Real
-    alpha = 0.5527864045000416708,
-    v = 0.427*(1 + alpha),
-    x2 = 0.4472135954999579277, // 1/sqrt(5)
-    x3 = 1 - x2,
-    det = x2*x3*(x2 - x3),
-    y2 = alpha,
-    y3 = v,
-    c1 = (x3*y2 - x2*y3)/det,
-    c2 = (-x3*x3*y2 + x2*x2*y3)/det;
-  if (x < x_gll[1] || x > x_gll[2]) {
-    Real y4[4];
-    GLL::eval_lagrange_poly(4, x_gll, x, y4);
-    if (x < x_gll[1]) {
-      outer_eval(x_gll, -x, y);
-      std::swap(y[0], y[3]);
-      std::swap(y[1], y[2]);
-    } else
-      outer_eval(x_gll, x, y);
-    const Real x0 = 1 - std::abs(x);
-    const Real a = (c1*x0 + c2)*x0;
-    for (int i = 0; i < 4; ++i)
-      y[i] = a*y[i] + (1 - a)*y4[i];
-  }
-  else
-    GLL::eval_lagrange_poly(4, x_gll, x, y);
-  return true;
-}
-#else
 static bool np4_subgrid_eval (const Real* const x_gll, const Real& x,
                               Real y[4]) {
   static const Real c1 = 0.306;
@@ -94,7 +45,6 @@ static bool np4_subgrid_eval (const Real* const x_gll, const Real& x,
     GLL::eval_lagrange_poly(4, x_gll, x, y);
   return true;
 }
-#endif
 
 bool GllOffsetNodal::eval (const Int& np, const Real& x, Real* const v) const {
   const Real* xnode;
@@ -254,13 +204,13 @@ ::eval (const Int& np, const Real& x, Real* const v) const {
   case  4: return evalon< 4,2>(xnode, {3,4        }, {0,0        }, x, v);
   case  5: return evalon< 5,2>(xnode, {3,4        }, {0,0        }, x, v);
   case  6: return evalon< 6,3>(xnode, {3,4,6      }, {0,0,0      }, x, v);
-  case  7: return evalon< 7,3>(xnode, {3,4,6      }, {0,0,0      }, x, v);
+  case  7: return evalon< 7,3>(xnode, {3,4,4      }, {0,0,1      }, x, v);
   case  8: return evalon< 8,4>(xnode, {4,4,4,4    }, {0,0,1,2    }, x, v);
   case  9: return evalon< 9,4>(xnode, {4,4,4,4    }, {0,0,1,2    }, x, v);
-  case 10: return evalon<10,5>(xnode, {4,5,4,4,4  }, {0,0,1,2,3  }, x, v);
-  case 11: return evalon<11,5>(xnode, {4,5,4,4,4  }, {0,0,1,2,3  }, x, v);
-  case 12: return evalon<12,6>(xnode, {4,5,5,4,4,4}, {0,0,1,2,3,4}, x, v);
-  case 13: return evalon<13,6>(xnode, {4,5,6,4,4,4}, {0,0,0,2,3,4}, x, v);
+  case 10: return evalon<10,5>(xnode, {4,4,4,4,4  }, {0,0,1,2,3  }, x, v);
+  case 11: return evalon<11,5>(xnode, {4,4,4,4,4  }, {0,0,1,2,3  }, x, v);
+  case 12: return evalon<12,6>(xnode, {4,4,4,4,4,4}, {0,0,1,2,3,4}, x, v);
+  case 13: return evalon<13,6>(xnode, {4,4,4,4,4,4}, {0,0,1,2,3,4}, x, v);
   }
   return false;
 }
diff --git a/methods/slmm/slmm_runtests.py b/methods/slmm/slmm_runtests.py
new file mode 100755
index 0000000..e210180
--- /dev/null
+++ b/methods/slmm/slmm_runtests.py
@@ -0,0 +1,292 @@
+#!/usr/bin/python
+
+import os, sys, re, optparse
+
+def readall (fn):
+    # Shorthand for reading in all the text in a file.
+    try:
+        with open(fn, 'r') as f:
+            text = f.read()
+    except:
+        text = ''
+    return text
+
+def writeall (text, fn, for_real):
+    if for_real:
+        with open(fn, 'w') as f:
+            f.write(text)
+
+def parse_one_liner (text):
+    class struct:
+        pass
+    hits = re.findall('<OL>.*', text)
+    hits = re.findall('l2 (?P<l2>[^ ]*) .* cv re (?P<cv>[^ ]*)' +
+                      '.* cvgll re (?P<cvgll>[^ ]*)' +
+                      '.* mo min [0-9.e\-+]+ (?P<min>[^ ]*) .* ' +
+                      'max [0-9.e\-+]+ (?P<max>[^ ]*)', hits[0])
+    o = struct
+    o.l2 = float(hits[0][0])
+    o.cv = float(hits[0][1])
+    o.cv_gll = float(hits[0][2])
+    o.mo_min = float(hits[0][3])
+    o.mo_max = float(hits[0][4])
+    return o
+
+def runtest (cmd):
+    outfn = 'runtests.tmp'
+    os.system(cmd + ' > ' + outfn + ' 2>&1')
+    return readall(outfn)
+
+long_output = False
+
+def print_test (cmd):
+    print_test.ctr += 1
+    ll = 87;
+    if long_output:
+        ll = 240
+    if len(cmd) > ll:
+        cmd = cmd[len(cmd)-ll+1:]
+    fmt = '{{0:.<{0:d}s}}'.format(ll+1)
+    print '{:3d} '.format(print_test.ctr) + fmt.format(cmd + ' '),
+    sys.stdout.flush()
+print_test.ctr = 0;
+
+def print_result (passed):
+    if not passed:
+        print '***FAILED'
+        return 1
+    else:
+        print '   PASSED'
+        return 0
+
+def check_passed (cmd):
+    print_test(cmd)
+    out = runtest(cmd)
+    hits = re.findall('PASSED', out)
+    passed = len(hits) > 0
+    return print_result(passed)
+
+def check_errs (cmd, l2_err, cv=10, cv_gll=10, min=-float('Inf'), max=float('Inf'),
+                l2_err_is_0=False):
+    print_test(cmd)
+    out = runtest(cmd)
+    o = parse_one_liner(out)
+    passed = ((o.l2 > 0 or l2_err_is_0) and o.l2 <= l2_err
+              and o.cv <= cv
+              and o.cv_gll <= cv_gll
+              and o.mo_min >= min and o.mo_max <= max)
+    result = print_result(passed)
+    if not passed:
+        print '   ' + cmd
+        print (('    l2 {:1.2e} cv {:1.2e} cv_gll {:1.2e} mo_min {:1.2e} mo_max {:1.2e}' +
+                ' but l2_err {:1.2e} cv {:1.2e} cv_gll {:1.2e} min {:1.2e} max {:1.2e}').
+               format(o.l2, o.cv, o.cv_gll, o.mo_min, o.mo_max,
+                      l2_err, cv, cv_gll, min, max))
+    return result
+
+p = optparse.OptionParser()
+p.add_option('-l', '--long', dest='long', action='store_true', default=False,
+             help='Long-line output.')
+opts, args = p.parse_args()
+long_output = opts.long
+
+try: os.mkdir('tmp')
+except: pass
+
+nerr = 0
+# Unit tests.
+nerr += check_passed('./slmm_test -q -c test_make_cubedsphere')
+nerr += check_passed('./slmm_test -q -c test_gll')
+nerr += check_passed('./slmm_test -q -c test_gll_2d')
+nerr += check_passed('./slmm_test -q -c test_time_int')
+nerr += check_passed('./slmm_test -q -c test_make_gll_mesh')
+nerr += check_passed('./slmm_test -q -c test_make_gll_subcell_mesh')
+nerr += check_passed('./slmm_test -q -c test_qp_limiter')
+nerr += check_passed('./slmm_test -q -c test_face_tree')
+nerr += check_passed('./slmm_test -q -c test_spf')
+nerr += check_passed('./slmm_test -q -c test_nla')
+nerr += check_passed('./slmm_test -q -c test_mass_matrix')
+#nerr += check_passed('./slmm_test -q -c test_fit_extremum')
+
+# Test classical semi-Lagrangian with global filters QLT, CAAS, min-norm2.
+base = ('./slmmir -method {method:s} -ode divergent -ic slottedcylinders ' +
+        '-ic cosinebells -ic gaussianhills -we 0 -np {np:d} -dmc f -mono {mono:s} ' +
+        '-nsteps 12 -ne {ne:d}')
+nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='qlt'),
+                   3.34e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is also done with CSL
+nerr += check_errs(base.format(method='pcsl', np=6, ne=6, mono='qlt'),
+                   3.34e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is also done with CSL
+nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='qlt'),
+                   3.47e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is remapped
+nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='qlt-pve'),
+                   3.36e-1, cv_gll=5e-14, min=0, max=2)    # >= 0 constraint only
+nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='caas'),
+                   3.47e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is also done with CSL
+nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='caas'),
+                   3.47e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is remapped
+nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='mn2'),
+                   3.47e-1, cv_gll=5e-14, min=0.1, max=1)  # rho is remapped
+#   Tracer consistency test. Apply CSL to constant q but remap rho.
+nerr += check_errs('./slmmir -method csl -ode divergent -ic constant -we 0 -np 4 ' +
+                   '-dmc f -mono qlt -rit -nsteps 12 -ne 10',
+                   3e-15, cv_gll=1e-13, min=0.42, max=0.42, l2_err_is_0=True)
+
+# Test ISL with p-refinement.
+base = ('./slmmir -method pcsl -ode divergent -ic gaussianhills ' +
+        '-we 0 -np {np:d} -dmc f -mono {mono:s} ' +
+        '-nsteps 12 -ne {ne:d} -timeint {timeint:s}')
+nerr += check_errs(base.format(np=12, ne=3, mono='none', timeint='interp'),
+                   9.939e-3)
+nerr += check_errs(base.format(np=12, ne=3, mono='none', timeint='exact'),
+                   8.793e-3)
+base = ('./slmmir -method pcsl -ode divergent -ic slottedcylinders '
+        '-we 0 -np {np:d} -dmc f -mono {mono:s} ' +
+        '-nsteps 12 -ne {ne:d} -timeint interp')
+nerr += check_errs(base.format(np=12, ne=3, mono='caas', timeint='interp'),
+                   2.896e-1, cv_gll=5e-14, min=0.1, max=1)
+
+# ISL with p-refinement and separate t and v meshes.
+base = ('./slmmir -method pcsl -ode divergent -ic gaussianhills ' +
+        '-we 0 -rit -dmc {dmc:s} -mono {mono:s} -lim {lim:s} -nsteps 13 -T 12 ' +
+        '-ne 6 -np 8 -timeint interp -prefine {prefine:d} -d2c')
+nerr += check_errs(base.format(prefine=0, dmc='es', mono='caas', lim='caas'),  5.968e-03, cv=2e-14)
+nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas', lim='caas'),  5.885e-03, cv=4e-14)
+nerr += check_errs(base.format(prefine=0, dmc='eh', mono='caas', lim='caas'),  5.968e-03, cv_gll=2e-14)
+nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas', lim='caas'),  5.886e-03, cv_gll=2e-14)
+# new global-only method
+nerr += check_errs(base.format(prefine=0, dmc='es', mono='caas-node', lim='caas'),  5.968e-03, cv=2e-14)
+nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas-node', lim='caas'),  5.885e-03, cv=4e-14)
+nerr += check_errs(base.format(prefine=0, dmc='eh', mono='caas-node', lim='caas'),  5.968e-03, cv_gll=2e-14)
+nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas-node', lim='caas'),  5.886e-03, cv_gll=2e-14)
+# don't break the no prop preserve case
+nerr += check_errs(base.format(prefine=5, dmc='es', mono='none', lim='none'),  4.2e-03)
+# GllOffsetNodal
+base += ' -basis GllOffsetNodal'
+nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas', lim='caas'),  5.885e-03, cv=4e-14)
+nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas', lim='caas'),  5.886e-03, cv_gll=2e-14)
+nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas-node', lim='caas'),  5.885e-03, cv=4e-14)
+nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas-node', lim='caas'),  5.886e-03, cv_gll=2e-14)
+
+base = './slmmir -nsteps 12 -ne 10 -we 0 -ode divergent -ic gaussianhills '
+
+# DSS for QOF rho, CSL tracer, with QLT.
+nerr += check_errs(base + '-np 3 -d2c -method csl -dmc f -mono qlt',
+                   9.05e-2, cv_gll=2e-14)
+
+# Cell-integrated method basics.
+nerr += check_errs(base + '-np 3', 2.43e-2, 1e-14)
+nerr += check_errs(base + '-np 3 -xyz -mono qlt',
+                   3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01)
+nerr += check_errs(base + '-np 3 -xyz -mono caas',
+                   3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01)
+nerr += check_errs(base + '-np 3 -xyz -mono mn2',
+                   3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01)
+nerr += check_errs(base + '-np 3 -xyz -d2c', 3.64e-2, 3e-15)
+nerr += check_errs(base + '-np 4 -xyz -d2c', 1.02e-2, 8e-15)
+nerr += check_errs(base + '-np 4 -xyz -d2c -method cdg', 1.02e-2, 3e-15)
+
+# Limiter.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -method ir',
+                   3.0e-1, cv=3e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -method ir -lim caas',
+                   3.0e-1, cv=3e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -method cdg',
+                   3.03e-1, cv=3e-14, min=0.1, max=1.0)
+# Multiple tracers.
+nerr += check_errs(base + '-np 4 -ic correlatedcosinebells 2', 1.02e-2, 2e-7)
+# Local DMC with internal mass definition.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -np 4 -dmc es -method ir',
+                   9.1e-3, cv=2e-13)
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -np 4 -dmc es -method cdg',
+                   9.1e-3, cv=2e-13)
+# Local DMC with Homme mass definition.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -np 4 -dmc eh',
+                   9.1e-3, cv_gll=4e-15)
+# Global (weaker than local) DMC with Homme mass definition.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -np 4 -dmc geh',
+                   9.1e-3, cv_gll=2e-14)
+# Local DMC, limiter, internal mass def.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc es',
+                   3.1e-1, cv=2.3e-13, min=0.1, max=1.0)
+# Local DMC, limiter, Homme mass def.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc eh',
+                   3e-1, cv_gll=5e-14, min=0.1, max=1.0)
+# Local DMC, facet transport.
+nerr += check_errs(base + '-np 4 -dmc f', 1.42e-2, cv_gll=6e-14)
+nerr += check_errs('./slmmir -nsteps 12 -ne 30 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -np 2 -dmc f',
+                   6.49e-2, cv_gll=1.4e-13)
+# With limiter.
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc f',
+                   4.6e-1, cv_gll=4e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc f -lim caas',
+                   4.6e-1, cv_gll=4e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc f -lim caags',
+                   4.6e-1, cv_gll=4e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method ir ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc f',
+                   4.6e-1, cv_gll=4e-14, min=0.1, max=1.0)
+# Add an equality constraint to nail DMC even more. In addition, output scalar
+# measurements by time step (this is just a test that it runs).
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc ef -o rittest -rit',
+                   4.6e-1, cv_gll=2e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method ir ' +
+                   '-ic slottedcylinders -np 4 -mono qlt -dmc ef -o rittest -rit',
+                   4.6e-1, cv_gll=2e-14, min=0.1, max=1.0)
+nerr += check_errs('./slmmir -nsteps 96 -ne 15 -we 0 -ode divergent ' +
+                   '-ic slottedcylinders -np 2 -mono qlt -dmc ef -o rittest -rit',
+                   4.5e-1, cv_gll=2.2e-14, min=0.1, max=1.0)
+# Test the more complicated mono method.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' +
+                   '-ic gaussianhills -ic slottedcylinders -np 4 -mono qlt -dmc f',
+                   1.5e-2, cv_gll=8e-14, min=0, max=0.957)
+# 3-1 subcell mesh, with new vertices at GLL points.
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -tq 4 ' +
+                   '-ic slottedcylinders -np 4 -mesh gllsubcell -mono qlt -dmc ef',
+                   4.6e-1, cv_gll=2e-14, min=0.1, max=1.0)
+# 3-1 subcell mesh, with new vertices at non-GLL points.
+nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -tq 4 ' +
+                   '-ic slottedcylinders -np 4 -mesh runisubcell -mono qlt -dmc ef',
+                   4.5e-1, cv_gll=2e-14, min=0.1, max=1.0)
+# Same, but now looking for accuracy difference.
+nerr += check_errs('./slmmir -nsteps 12 -ne 5 -we 0 -ode divergent -tq 4 ' +
+                   '-ic gaussianhills -np 4 -mesh gllsubcell -mono qlt -dmc ef',
+                   7.40e-2, cv_gll=9e-15, min=0, max=0.96)
+nerr += check_errs('./slmmir -nsteps 12 -ne 5 -we 0 -ode divergent -tq 4 ' +
+                   '-ic gaussianhills -np 4 -mesh runisubcell -mono qlt -dmc ef',
+                   5.41e-2, cv_gll=5e-15, min=0, max=0.96)
+# We can subdivide cells arbitrarily with runisubcell.
+nerr += check_errs('./slmmir -nsteps 12 -ne 2 -we 0 -ode divergent -tq 4 ' +
+                   '-ic gaussianhills -np 10 -mesh runisubcell -mono qlt -dmc ef',
+                   3.5e-2, cv_gll=3e-15, min=0, max=0.96)
+# Tracer-decoupled CMBC tests.
+base = ('./slmmir -nsteps 12 -ne 10 -np 4 -ode divergent ' +
+        '-ic gaussianhills -ic slottedcylinders -ic cosinebells ' +
+        '-ic correlatedcosinebells -ic xyztrig -dmc {0:s} -mono {mono:s} -we 0')
+#  This method also is intended to handle tracer consistency, but I haven't put
+#  together a test for that yet. So test just CMBC.
+nerr += check_errs(base.format('f', mono='qlt'), 1.45e-2, cv_gll=6e-14, min=1.495e-8, max=0.956)
+nerr += check_errs(base.format('es', mono='qlt'), 9.18e-3, cv=2e-13, min=1.495e-8, max=0.956)
+nerr += check_errs(base.format('eh', mono='qlt'), 9.18e-3, cv_gll=1e-14, min=1.495e-8, max=0.956)
+#  Test that if rho is perturbed, a constant q stays a constant.
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -np 4 -ode nondivergent ' +
+                   '-ic constant -dmc ef -mono qlt -we 0 --perturb-rho 0.05',
+                   1e-14, cv_gll=5e-14, min=0.42, max=0.42, l2_err_is_0=True)
+nerr += check_errs('./slmmir -nsteps 12 -ne 10 -np 4 -ode divergent ' +
+                   '-ic constant -dmc ef -mono qlt -we 0 --perturb-rho 0.05',
+                   1e-14, cv_gll=5e-14, min=0.42, max=0.42, l2_err_is_0=True)
+
+print '{0:d} tests failed'.format(nerr)
diff --git a/methods/slmm/slmmir_remapper_isl.cpp b/methods/slmm/slmmir_remapper_isl.cpp
index ce1daeb..cf93492 100644
--- a/methods/slmm/slmmir_remapper_isl.cpp
+++ b/methods/slmm/slmmir_remapper_isl.cpp
@@ -1167,8 +1167,12 @@ interp (const Mesh& m, const C2DRelations& c2d, const AVec3s& advected_p,
       const Real ta = gll_x[ni % np], tb = gll_x[ni / np];
       Real Jd;
       if (md_) {
+        // This is the case of interest and runs when property preservation is
+        // on. For the case of p-refinement, it runs with np=npv=4, not npt.
         Jd = calc_isoparametric_jacobian(advected_p, cell, np, ta, tb);
       } else {
+        // This happens only when property preservation is off, in which case
+        // density doesn't couple to the mixing ratios.
         const Int corners[] = {cell[0], cell[np-1],
                                cell[np*np-1], cell[np*(np-1)]};
         Jd = calc_jacobian(advected_p, corners, ta, tb);
@@ -1640,7 +1644,7 @@ ::csl (const AVec3s& advected_p, Real* const src_tracer,
               tgt_rho_impl, tgt_tracer_impl, ntracers,
               positive_only, false /* don't apply cdr to rho */, cdr_method);
     }
-    // In the following two cases, continuity does not hold nore need hold on
+    // In the following two cases, continuity does not hold nor need hold on
     // any of the grids.
     //   Map q on tgrid to q on vgrid.
     isl_impl_->transfer_q_to_v_mesh(run_cdr, ntracers,