From b5739eb53495265d1c9a3842862dcf1891ed9f9a Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Wed, 21 Jul 2021 00:27:38 -0600 Subject: [PATCH] Methods for the Islet paper. --- methods/islet/Makefile | 35 + methods/islet/cslunstab.cpp | 765 ++++++++++++ methods/islet/figures/amb3.hy | 870 ++++++++++++++ methods/islet/figures/figs-adv-diag.hy | 1035 +++++++++++++++++ methods/islet/figures/figs-methods.hy | 584 ++++++++++ methods/islet/figures/figs.tex | 376 ++++++ methods/islet/figures/figsutils.hy | 225 ++++ methods/islet/figures/islet.hy | 41 + methods/islet/figures/poly.hy | 190 +++ methods/islet/figures/run-accuracy.sh | 58 + methods/islet/figures/run-img-filament.sh | 34 + methods/islet/figures/run-isl-footprint.sh | 35 + methods/islet/figures/run-mixing.sh | 40 + .../figures/run-pg-srcterm-midpoint-test.sh | 36 + methods/islet/figures/run-stability-cmp.sh | 32 + .../islet/figures/run-toychem-diagnostic.sh | 34 + methods/islet/figures/run-toychem-imgs.sh | 23 + methods/islet/figures/sl-gpu-perf.hy | 221 ++++ methods/islet/islet_interpmethod.hpp | 14 + methods/islet/islet_isl.cpp | 364 ++++++ methods/islet/islet_isl.hpp | 54 + methods/islet/islet_maxeigcomp.cpp | 616 ++++++++++ methods/islet/islet_maxeigcomp.hpp | 79 ++ methods/islet/islet_nodalbasis.cpp | 195 ++++ methods/islet/islet_nodalbasis.hpp | 59 + methods/islet/islet_np4.cpp | 46 + methods/islet/islet_np4.hpp | 22 + methods/islet/islet_npx.cpp | 35 + methods/islet/islet_npx.hpp | 137 +++ methods/islet/islet_pum.cpp | 226 ++++ methods/islet/islet_pum.hpp | 56 + methods/islet/islet_studymetrics.cpp | 134 +++ methods/islet/islet_studymetrics.hpp | 6 + methods/islet/islet_tables.cpp | 268 +++++ methods/islet/islet_tables.hpp | 27 + methods/islet/islet_types.hpp | 12 + methods/islet/islet_util.hpp | 264 +++++ methods/islet/islet_xnodes_metrics.cpp | 257 ++++ methods/islet/islet_xnodes_metrics.hpp | 53 + methods/islet/make-depends.sh | 3 + methods/islet/make.depends | 40 + methods/islet/make.inc.gnu | 2 + methods/islet/pum_sweep.cpp | 56 + methods/islet/readme.txt | 77 ++ methods/islet/run_meam1_sweep.cpp | 48 + methods/islet/run_np4.cpp | 270 +++++ methods/islet/search.cpp | 962 +++++++++++++++ methods/slmm/Makefile | 29 +- methods/slmm/make-depends.sh | 3 + methods/slmm/make.depends | 220 ++++ methods/slmm/slmm_basis.cpp | 2 +- methods/slmm/slmm_islet.cpp | 60 +- methods/slmm/slmm_runtests.py | 292 +++++ methods/slmm/slmmir_remapper_isl.cpp | 6 +- 54 files changed, 9544 insertions(+), 84 deletions(-) create mode 100644 methods/islet/Makefile create mode 100644 methods/islet/cslunstab.cpp create mode 100644 methods/islet/figures/amb3.hy create mode 100644 methods/islet/figures/figs-adv-diag.hy create mode 100644 methods/islet/figures/figs-methods.hy create mode 100644 methods/islet/figures/figs.tex create mode 100644 methods/islet/figures/figsutils.hy create mode 100644 methods/islet/figures/islet.hy create mode 100644 methods/islet/figures/poly.hy create mode 100644 methods/islet/figures/run-accuracy.sh create mode 100644 methods/islet/figures/run-img-filament.sh create mode 100644 methods/islet/figures/run-isl-footprint.sh create mode 100644 methods/islet/figures/run-mixing.sh create mode 100644 methods/islet/figures/run-pg-srcterm-midpoint-test.sh create mode 100644 methods/islet/figures/run-stability-cmp.sh create mode 100644 methods/islet/figures/run-toychem-diagnostic.sh create mode 100644 methods/islet/figures/run-toychem-imgs.sh create mode 100644 methods/islet/figures/sl-gpu-perf.hy create mode 100644 methods/islet/islet_interpmethod.hpp create mode 100644 methods/islet/islet_isl.cpp create mode 100644 methods/islet/islet_isl.hpp create mode 100644 methods/islet/islet_maxeigcomp.cpp create mode 100644 methods/islet/islet_maxeigcomp.hpp create mode 100644 methods/islet/islet_nodalbasis.cpp create mode 100644 methods/islet/islet_nodalbasis.hpp create mode 100644 methods/islet/islet_np4.cpp create mode 100644 methods/islet/islet_np4.hpp create mode 100644 methods/islet/islet_npx.cpp create mode 100644 methods/islet/islet_npx.hpp create mode 100644 methods/islet/islet_pum.cpp create mode 100644 methods/islet/islet_pum.hpp create mode 100644 methods/islet/islet_studymetrics.cpp create mode 100644 methods/islet/islet_studymetrics.hpp create mode 100644 methods/islet/islet_tables.cpp create mode 100644 methods/islet/islet_tables.hpp create mode 100644 methods/islet/islet_types.hpp create mode 100644 methods/islet/islet_util.hpp create mode 100644 methods/islet/islet_xnodes_metrics.cpp create mode 100644 methods/islet/islet_xnodes_metrics.hpp create mode 100644 methods/islet/make-depends.sh create mode 100644 methods/islet/make.depends create mode 100644 methods/islet/make.inc.gnu create mode 100644 methods/islet/pum_sweep.cpp create mode 100644 methods/islet/readme.txt create mode 100644 methods/islet/run_meam1_sweep.cpp create mode 100644 methods/islet/run_np4.cpp create mode 100644 methods/islet/search.cpp create mode 100644 methods/slmm/make-depends.sh create mode 100644 methods/slmm/make.depends create mode 100755 methods/slmm/slmm_runtests.py diff --git a/methods/islet/Makefile b/methods/islet/Makefile new file mode 100644 index 0000000..10fac11 --- /dev/null +++ b/methods/islet/Makefile @@ -0,0 +1,35 @@ +include make.inc + +SOURCES = islet_isl.cpp islet_tables.cpp islet_npx.cpp islet_maxeigcomp.cpp islet_xnodes_metrics.cpp islet_pum.cpp islet_studymetrics.cpp islet_nodalbasis.cpp islet_np4.cpp +OBJECTS = $(SOURCES:.cpp=.o) +.cpp.o: + $(CXX) $(CXXFLAGS) -c $< -o $@ + +all: libislet search np4 pum_sweep run_meam1_sweep cslunstab + +libislet: $(OBJECTS) + $(CXX) $(OBJECTS) $(LDFLAGS) -fopenmp -shared $(LINK_LAPACK_BLAS) -o libislet.so + +search: libislet search.o + $(CXX) search.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o search + +np4: libislet run_np4.o + $(CXX) run_np4.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o np4 + +pum_perturb_plot: libislet pum_perturb_plot.o + $(CXX) pum_perturb_plot.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o pum_perturb_plot + +run_meam1_sweep: libislet run_meam1_sweep.o + $(CXX) run_meam1_sweep.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o run_meam1_sweep + +pum_sweep: libislet pum_sweep.o + $(CXX) pum_sweep.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -L. -lislet -fopenmp -o pum_sweep + +cslunstab: cslunstab.o + $(CXX) cslunstab.o $(LDFLAGS) $(LINK_LAPACK_BLAS) -fopenmp -o cslunstab + +clean: + rm -f *.o *.so search np4 pum_sweep pum_perturb_plot run_meam1_sweep + +# generate by running `bash make.depends` +include make.depends diff --git a/methods/islet/cslunstab.cpp b/methods/islet/cslunstab.cpp new file mode 100644 index 0000000..aec8ace --- /dev/null +++ b/methods/islet/cslunstab.cpp @@ -0,0 +1,765 @@ +/* This standalone program implements the example unstable 1D and 2D problems + that use the classical cubic interpolation semi-Lagrangian method. + + Build it: + g++ -O3 -c cslunstab.cpp + g++ cslunstab.o -llapack -lblas -o cslunstab + Run it: + ./cslunstab + There will be no output if all the assertions pass. The two lines + require(cubic1d_demo_unstable_problem() >= 1 + 1e-3); + and + require(cubic2d_demo_unstable_problem() >= 1 + 1e-2); + assert that the maximum eigenvalue magnitude is at least 1 + a small amount + in each problem. + */ + +#include +#include +#include +#include +#include +#include + +using Int = int; +using Real = double; + +#define require(condition) do { \ + if ( ! (condition)) { \ + std::stringstream _ss_; \ + _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition \ + << "\n"; \ + throw std::logic_error(_ss_.str()); \ + } \ + } while (0) +#define require_msg(condition, message) do { \ + if ( ! (condition)) { \ + std::stringstream _ss_; \ + _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition \ + << "\nmessage:\n" << message << "\n"; \ + throw std::logic_error(_ss_.str()); \ + } \ + } while (0) + +bool eq(const std::string& a, const char* const b1, const char* const b2 = 0); + +template T square (const T& x) { return x*x; } + +inline Real reldif (const Real& a, const Real& b) +{ return std::abs(b - a)/std::max(std::abs(a), std::abs(b)); } + +template +inline bool equal (const T1& a, const T2& b) { + if (a != b) + printf("equal: a,b = %23.16e %23.16e re = %23.16e\n", + Real(a), Real(b), std::abs((a-b)/Real(a))); + return a == b; +} + +template +inline bool almost_equal (const T1& a, const T2& b, const Real tol) { + const auto re = std::abs(a-b)/(1.0 + std::abs(a)); + const bool good = re <= tol; + if ( ! good) + printf("equal: a,b = %23.16e %23.16e re = %23.16e tol %9.2e\n", + Real(a), Real(b), re, tol); + return good; +} + +inline double urand () { return rand() / ((double) RAND_MAX + 1.0); } + +extern "C" void dgeev_(char* jobvl, char* jobvr, int* n, double* a, int* lda, + double* wr, double* wi, + double* vl, int* ldvl, + double* vr, int* ldvr, + double* work, int* lwork, int* info); + +void dgeev (int n, double* a, int lda, + double* wr, double* wi, + std::vector& work, int& info, + double* vl = nullptr, int ldvl = 1, + double* vr = nullptr, int ldvr = 1) { + int lwork = 10*n; + if (static_cast(work.size()) < lwork) work.resize(lwork); + char jobvl = vl ? 'v' : 'n'; + char jobvr = vr ? 'v' : 'n'; + assert(vl == nullptr || (ldvl >= n)); + assert(vr == nullptr || (ldvr >= n)); + dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, + work.data(), &lwork, &info); +} + +Real calc_max_eig_amp (int n, double* a, int lda, std::vector& work) { + work.resize(12*n); + Real* const wr = work.data() + 10*n; + Real* const wi = work.data() + 11*n; + int info; + dgeev(n, a, lda, wr, wi, work, info); + Real mea = 0; + for (int i = 0; i < n; ++i) mea = std::max(mea, std::sqrt(square(wr[i]) + square(wi[i]))); + return mea; +} + +static void eval_lagrange_poly_basis (const Int& n, const Real* xsup, const Real& x, + Real* const y) { + for (int i = 0; i < n; ++i) { + Real f = 1; + for (int j = 0; j < n; ++j) + f *= (i == j) ? + 1 : + (x - xsup[j]) / (xsup[i] - xsup[j]); + y[i] = f; + } +} + +static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup, + const Real& x) { + Real y = 0; + for (int i = 0; i < n; ++i) { + Real f = 1; + for (int j = 0; j < n; ++j) + f *= (i == j) ? + 1 : + (x - xsup[j]) / (xsup[i] - xsup[j]); + y += f*ysup[i]; + } + return y; +} + +// Move x to its in-bounds periodic point. +static Real calc_periodic_x (const Real& x0, const Real& x1, const Real x) { + if (x >= x0 && x <= x1) return x; + const auto D = x1 - x0; + auto xper = (x - x0)/D; + xper = x0 + (xper - std::floor(xper))*D; + assert(xper >= x0); + assert(xper <= x1); + return xper; +} + +// Find xper's cell. +static Int find_cell (const Real* const xnode, const Int nreg, const Real& xper) { + if (xper == xnode[0]) return 0; + const Int iper = static_cast( + std::lower_bound(xnode, xnode + nreg + 1, xper) + - xnode) + - 1; + assert(iper >= 0 ); + assert(iper < nreg); + assert(xper >= xnode[iper ]); + assert(xper <= xnode[iper+1]); + return iper; +} + +// xnode[0] is identical to xnode[nreg]. ynode[0] must equal ynode[nreg]. We +// take ynode[nreg] as input rather than use ynode[0] to be sure the caller has +// set up a periodic problem. +static Real periodic_cubic1d_interp ( + const Int nreg, const Real* const xnode, const Real* const ynode, const Real& xeval) +{ + assert(ynode[0] == ynode[nreg]); + + const auto xper = calc_periodic_x(xnode[0], xnode[nreg], xeval); + const auto iper = find_cell(xnode, nreg, xper); + + Real xsup[4], ysup[4]; + if (iper == 0) { + xsup[0] = xnode[0] - (xnode[nreg] - xnode[nreg-1]); + ysup[0] = ynode[nreg-1]; + for (int i = 1; i < 4; ++i) xsup[i] = xnode[iper-1+i]; + for (int i = 1; i < 4; ++i) ysup[i] = ynode[iper-1+i]; + } else if (iper == nreg-1) { + for (int i = 0; i < 3; ++i) xsup[i] = xnode[iper-1+i]; + for (int i = 0; i < 3; ++i) ysup[i] = ynode[iper-1+i]; + xsup[3] = xnode[nreg] + (xnode[1] - xnode[0]); + ysup[3] = ynode[1]; + } else { + for (int i = 0; i < 4; ++i) xsup[i] = xnode[iper-1+i]; + for (int i = 0; i < 4; ++i) ysup[i] = ynode[iper-1+i]; + } + + return eval_lagrange_poly(4, xsup, ysup, xper); +} + +// Very straightforward, inefficient impl. +void periodic_cubic1d_make_translation_matrix ( + const Int nreg, const Real* const xnode, const Real& xoffset, Real* op, Real* wrk) +{ + for (Int i = 0; i < nreg; ++i) wrk[i] = 0; + for (Int si = 0; si < nreg; ++si) { + wrk[si] = 1; + if (si == 0) wrk[nreg] = 1; + for (Int ti = 0; ti < nreg; ++ti) + op[nreg*si + ti] = periodic_cubic1d_interp(nreg, xnode, wrk, xnode[ti] + xoffset); + wrk[si] = 0; + if (si == 0) wrk[nreg] = 0; + } +} + +// Demo a configuration that has associated max eig amp >= 1 + 1e-3. +static Real cubic1d_demo_unstable_problem () { + const Real x[] = {0, 0.11242, 0.44817, 0.78392, 0.88737, 1}; + const Int nreg = sizeof(x)/sizeof(*x) - 1; + const Real xoffset = 0.33575; + std::vector op(nreg*nreg), wrk(nreg+1); + periodic_cubic1d_make_translation_matrix(nreg, x, xoffset, op.data(), wrk.data()); + const auto mea = calc_max_eig_amp(nreg, op.data(), nreg, wrk); + return mea; +} + +static void cubic1d_unittest () { + const auto eps = std::numeric_limits::epsilon(); + + { + require(equal(calc_periodic_x(-0.1, 1.3, 0.7), 0.7)); + require(equal(calc_periodic_x(-0.1, 1.3, 1.3), 1.3)); + require(equal(calc_periodic_x(-0.1, 1.3, -0.1), -0.1)); + require(almost_equal(calc_periodic_x(-0.1, 1.3, 1.4), 0, eps)); + const auto x = calc_periodic_x(-0.1, 1.3, 2.7); + require(almost_equal(x, -0.1, eps) || almost_equal(x, 1.2, eps)); + require(almost_equal(calc_periodic_x(1.1, 1.3, 1.4), 1.2, eps)); + } + + { // In the interior, a cubic is recovered exactly. + const auto f = [&] (const Real& x) { return ((((-1.2*x) + 0.1)*x) - 0.3)*x + 11; }; + const Int nreg = 5, nsamp = 100; + std::vector x(nreg+1), y(nreg+1); + for (Int i = 0; i <= nreg; ++i) x[i] = 2*urand() - 1; + std::sort(x.begin(), x.end()); + for (Int i = 0; i < nreg; ++i) y[i] = f(x[i]); + y[nreg] = y[0]; // y[nreg] doesn't influence this test. + const auto D = x[nreg-2] - x[1]; + for (Int i = 0; i < nsamp; ++i) { + const auto xs = x[1] + i*D/(nsamp - 1); + const auto ys = periodic_cubic1d_interp(nreg, x.data(), y.data(), xs); + const auto yt = f(xs); + require(almost_equal(ys, yt, 10*eps)); + } + } + + { // At each sample point, error decreases at least as fast as nreg^-4 in the + // 1-norm, nreg^-3 pointwise. + const Real xl = -1.2, xu = 4.2, L = xu - xl; + const auto f = [&] (const Real& x) { return std::cos(2*M_PI*(x - xl)/L); }; + const Int nsamp = 1111; + std::vector xs(nsamp); + std::vector > yss(2); + for (Int i = 0; i < nsamp; ++i) xs[i] = xl + i*L/(nsamp - 1); + for (Int nreg : {31, 80, 211}) { + Int cnt = 0; + for (int refine = 0; refine < 2; ++refine) { + nreg *= 2; + std::vector x(nreg+1), y(nreg+1); + for (Int i = 0; i <= nreg; ++i) x[i] = xl + i*L/nreg; + for (Int i = 0; i <= nreg; ++i) y[i] = f(x[i]); + auto& ys = yss[refine]; + ys.resize(nsamp); + for (Int i = 0; i < nsamp; ++i) + ys[i] = periodic_cubic1d_interp(nreg, x.data(), y.data(), xs[i]); + } + Real err[2] = {0}; + for (Int i = 0; i < nsamp; ++i) { + const auto yt = f(xs[i]); + const auto e1 = std::abs(yss[0][i] - yt); + const auto e2 = std::abs(yss[1][i] - yt); + require(e1 >= e2); + if ( ! (e1 == e2 || e1 >= 8*e2)) + ++cnt; + if (i < nsamp-1) { + err[0] += e1; + err[1] += e2; + } + } + // A few points not showing 3rd-order convergence is OK, since this means + // the less-accurate solution is more accurate than anticipated at these + // points. + require(cnt < 0.1*nsamp); + require(err[0] > 15.9*err[1]); + require(err[0] < 16.1*err[1]); + } + } + + { // The matrix gives the same answer as calls to periodic_cubic1d_interp. + const Int nreg = 17; + std::vector x(nreg+1), op(nreg*nreg), wrk(nreg+1), + ys(nreg+1), yt1(nreg+1), yt2(nreg+1); + + for (Int i = 0; i <= nreg; ++i) x[i] = 2*urand() - 1; + std::sort(x.begin(), x.end()); + for (Int i = 0; i < nreg; ++i) ys[i] = 2*urand() - 1; + ys[nreg] = ys[0]; + + for (const auto xoffset : {0.0, 0.01, -0.02, 0.1, -0.42, 1.7, -4.2}) { + periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data()); + for (Int i = 0; i < nreg; ++i) yt1[i] = 0; + for (Int j = 0; j < nreg; ++j) + for (Int i = 0; i < nreg; ++i) + yt1[i] += op[nreg*j + i]*ys[j]; + for (Int i = 0; i < nreg; ++i) + yt2[i] = periodic_cubic1d_interp(nreg, x.data(), ys.data(), x[i] + xoffset); + for (Int i = 0; i < nreg; ++i) + require(almost_equal(yt1[i], yt2[i], 10*eps)); + } + } + + { // The matrix for a uniform grid has max eig 1. + const Int nreg = 6; + std::vector x(nreg+1), op(nreg*nreg), wrk(nreg+1); + for (Int i = 0; i <= nreg; ++i) x[i] = i; + const Real xoffset = 1.2; + periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data()); + const auto mea = calc_max_eig_amp(nreg, op.data(), nreg, wrk); + require(almost_equal(mea, 1, 2*eps)); + } + + { // Cubic ISL on the uniform-grid periodic translation problem has OOA 3. + const Real xl = -4.2, xu = 1.7, L = xu - xl; + const auto f = [&] (const Real& x) { return std::cos(2*M_PI*(x - xl)/L); }; + const auto error = [&] (const std::vector& x, const std::vector& y) { + Real num = 0, den = 0; + // Ignore the last point, which is periodic; we don't update it. + for (size_t i = 0; i < y.size(); ++i) { + const auto yt = f(x[i]); + num += square(y[i] - yt); + den += square(yt); + } + return std::sqrt(num/den); + }; + Int nstep = 37; + Int nreg = 20; + Real e[2]; + for (Int refine = 0; refine < 2; ++refine) { + nreg *= 2; + nstep *= 2; + const Real xoffset = L/nstep; + std::vector x(nreg+1), op(nreg*nreg), wrk(nreg+1); + std::vector ys[2]; + // Size is nreg rather than nreg+1 because we don't maintain the periodic + // point y[nreg] = y[0]. + for (Int k = 0; k < 2; ++k) ys[k].resize(nreg); + // Make the space-time operator A. + for (Int i = 0; i <= nreg; ++i) x[i] = xl + i*L/nreg; + periodic_cubic1d_make_translation_matrix(nreg, x.data(), xoffset, op.data(), wrk.data()); + // Initial condition. + Int i0 = 0, i1 = 1; + for (Int i = 0; i < nreg; ++i) ys[i0][i] = f(x[i]); + for (Int si = 0; si < nstep; ++si) { + // y1 = A y0 + for (Int i = 0; i < nreg; ++i) ys[i1][i] = 0; + for (Int j = 0; j < nreg; ++j) + for (Int i = 0; i < nreg; ++i) + ys[i1][i] += op[nreg*j + i]*ys[i0][j]; + // At a time t < T, check that the error is large. + if (si == nstep/2) require(error(x, ys[i1]) > 1); + std::swap(i0, i1); + } + e[refine] = error(x, ys[i0]); + } + // At time T, the error decreases with OOA 3. + require(e[0] > 7.95*e[1]); + require(e[0] < 8.05*e[1]); + // Check that we solved the problem to reasonable accuracy. + require(e[1] <= 1e-3); + } + + // The primary purpose of periodic_cubic1d_make_translation_matrix is to + // demonstrate that there is a 1D periodic translation problem for which the + // associated classical cubic ISL space-time matrix has maximum eigenvalue + // amplitude > 1. In this demo, it is > 1 + 1e-3. + require(cubic1d_demo_unstable_problem() >= 1 + 1e-3); +} + +struct NonUniMesh1d { + using Array = std::vector; + + NonUniMesh1d (const Real* xb, const Int ne) + : ne_(ne), L_(xb[ne] - xb[0]), xb_(ne+1) + { + std::copy(xb, xb+ne+1, xb_.begin()); + } + + Int get_ne () const { return ne_; } + + const Array& get_xb() const { return xb_; } + + Real to_periodic (const Real& x) const { + if (x >= xb_[0] && x <= xb_[ne_]) return x; + auto y = (x - xb_[0])/L_; + y = y - std::floor(y); + return xb_[0] + y*L_; + } + + Int in_cell (const Real& x) const { + const auto xp = to_periodic(x); + if (xp <= xb_[0]) return 0; + const Int iper = std::lower_bound(xb_.begin(), xb_.end(), xp) - xb_.begin() - 1; + assert(iper >= 0 ); + assert(iper < ne_); + assert(xp >= xb_[iper ]); + assert(xp <= xb_[iper+1]); + return iper; + } + + Real to_physical (const Int& ie) const { return xb_[ie]; } + + static Int unittest () { + const Real eps = std::numeric_limits::epsilon(); + Int ne = 0; + std::vector xb({-1, 1, 5.}); + NonUniMesh1d m(xb.data(), 2); + if (m.in_cell(1.1) != 1) ++ne; + if (m.in_cell(-1) != 0) ++ne; + if (reldif(m.to_periodic(5.1), -0.9) > 10*eps) ++ne; + if (m.in_cell(5.1) != 0) ++ne; + if (m.in_cell(-1.1) != 1) ++ne; + return ne; + } + +private: + const Int ne_; + const Real L_; + Array xb_; +}; + +class NonUniMesh2d { + const NonUniMesh1d mx_, my_; + const Int nx_; + +public: + NonUniMesh2d (const Real* xb, const Int nx, const Real* yb, const Int ny) + : mx_(xb, nx), my_(yb, ny), nx_(nx) + {} + + Int get_ne () const { return mx_.get_ne() * my_.get_ne(); } + + const NonUniMesh1d& get_mx () const { return mx_; } + const NonUniMesh1d& get_my () const { return my_; } + + void to_periodic (const Real& x, const Real& y, + Real& xp, Real& yp) const { + xp = mx_.to_periodic(x); + yp = my_.to_periodic(y); + } + + void ncell (Int& nx, Int& ny) const { + nx = nx_; + ny = my_.get_ne(); + } + + Int in_cell (const Real& x, const Real& y) const { + const Int ix = mx_.in_cell(x), iy = my_.in_cell(y); + return iy*nx_ + ix; + } + + void to_physical (const Int& ci, Real& x, Real& y) const { + const Int yci = ci / nx_; + const Int xci = ci % nx_; + x = mx_.to_physical(xci); + y = my_.to_physical(yci); + } + + static Int unittest () { + const Real eps = std::numeric_limits::epsilon(); + Int ne = 0; + std::vector xb({-1, 1, 5.}), yb({-1, 1, 6.}); + NonUniMesh2d m(xb.data(), 2, yb.data(), 2); + if (m.in_cell(1.1, 1.1) != 3) ++ne; + if (m.in_cell(5, 6) != 3) ++ne; + if (m.in_cell(-1, 1) != 0) ++ne; + if (m.in_cell(1, 1) != 0) ++ne; + Real xp, yp; + m.to_periodic(5.1, -1.2, xp, yp); + if (reldif(xp, -0.9) > 10*eps) ++ne; + if (reldif(yp, 5.8) > 10*eps) ++ne; + if (m.in_cell(5.1, 6.1) != 0) ++ne; + if (m.in_cell(-1.1, -1.1) != 3) ++ne; + return ne; + } +}; + +// Sparse matrix data structures and operations. +struct SparseTriple { + Int m, n; + std::vector rp, ci; // row pointer, column index + std::vector d; // matrix entries +}; + +// d is row major. +static void sparse2dense (const SparseTriple& s, std::vector& d) { + d.resize(s.m*s.n, 0); + for (Int r = 0; r < s.m; ++r) + for (Int j = s.rp[r]; j < s.rp[r+1]; ++j) + d[s.n*r + s.ci[j]] = s.d[j]; +} + +static void apply (const SparseTriple& s, const std::vector& x, + std::vector& y) { + assert(s.n == static_cast(x.size())); + y.resize(s.m); + for (Int r = 0; r < s.m; ++r) { + Real yr = 0; + for (Int j = s.rp[r]; j < s.rp[r+1]; ++j) + yr += s.d[j]*x[s.ci[j]]; + y[r] = yr; + } +} + +// Fill the polynomial interpolant's support with periodically unwrapped +// coordinate values. +static void fill_lag_coord (const NonUniMesh1d::Array& xb, const Int& np, + const Int& cell, Real* coord) { + const Int nx = xb.size(); + const Int ne = nx - 1; + const Real L = xb[ne] - xb[0]; + for (Int i = 0; i < np; ++i) { + const Int k = cell + i; + if (k < 0) + coord[i] = xb[(k + ne) % ne] - L; + else if (k >= ne) + coord[i] = xb[k % ne] + L; + else + coord[i] = xb[k]; + } +#ifndef NDEBUG + for (Int i = 1; i < np; ++i) assert(coord[i] > coord[i-1]); +#endif +} + +// Order the degrees of freedom in the operator. +static Int dof (const Int& nx, const Int& ny, + const Int& xe, const Int& ye) { + return (ye % ny)*nx + (xe % nx); +} + +template +void make_ccsl_op_nondiv2d (const NonUniMesh2d& mesh, const Int np, + const Function& integrate, + const Real& dt, SparseTriple& s) +{ + const Int os = (np-1)/2; + Int nex, ney; + mesh.ncell(nex, ney); + const Int ne = nex*ney, n = ne; + const auto& xb = mesh.get_mx().get_xb(); + const auto& yb = mesh.get_my().get_xb(); + + s.m = s.n = ne; + s.rp.resize(ne+1, 0); + + for (Int tie = 0; tie < ne; ++tie) { + const Int tye = tie / nex, txe = tie % nex; + const Int tdof = dof(nex, ney, txe, tye); + assert(tdof >= 0 && tdof < n); + + Real x0, y0; + mesh.to_physical(tie, x0, y0); + Real tx, ty; + integrate(x0, y0, dt, tx, ty); + + const Int sie = mesh.in_cell(tx, ty); + const Int sxe0 = sie % nex, sye0 = sie / nex; + + Real txp, typ; + mesh.to_periodic(tx, ty, txp, typ); + Real xv[12], yv[12], lag_coord[12]; + assert(np <= 12); + fill_lag_coord(xb, np, sxe0 - os, lag_coord); + assert(txp >= lag_coord[0] && txp <= lag_coord[np-1]); + eval_lagrange_poly_basis(np, lag_coord, txp, xv); + fill_lag_coord(yb, np, sye0 - os, lag_coord); + assert(typ >= lag_coord[0] && typ <= lag_coord[np-1]); + eval_lagrange_poly_basis(np, lag_coord, typ, yv); + + Int k = s.rp[tie]; + for (Int dy = -os; dy <= os+1; ++dy) { + const Int sye = (sye0 + dy + ney) % ney; + for (Int dx = -os; dx <= os+1; ++dx) { + const Int sxe = (sxe0 + dx + nex) % nex; + const Int sdof = dof(nex, ney, sxe, sye); + assert(sdof >= 0 && sdof < n); + s.ci.push_back(sdof); + s.d.push_back(xv[dx+os]*yv[dy+os]); + k++; + } + } + s.rp[tie+1] = k; + } + + assert(s.d.size() == s.ci.size()); + assert(s.rp[s.m] == static_cast(s.d.size())); +} + +// Check that the 'integrate' function provides doubly-periodic outputs over the +// domain of the mesh. +template +void check_periodicity (const NonUniMesh2d& mesh, const Real& dt, const Function& integrate) { + static const Real rtol = 1e2*std::numeric_limits::epsilon(); + static const Real atol = std::numeric_limits::epsilon(); + + Int nex, ney; + mesh.ncell(nex, ney); + const auto& xb = mesh.get_mx().get_xb(); + const auto& yb = mesh.get_my().get_xb(); + + // Check that the domain is [0,1]^2. + require(std::abs(xb[nex] - xb[0] - 1) <= atol); + require(std::abs(yb[ney] - yb[0] - 1) <= atol); + + // Relative/absolute error checks. + const auto check_error = [=] (Real x0, Real tx0, Real x1, Real tx1) { + require(reldif(tx0 - x0, tx1 - x1) <= rtol || + std::abs((tx0 - x0) - (tx1 - x1)) <= atol); + }; + + // Run over the sides of the domain and check periodicity. + const auto check = [&] (Real x0, Real x1, Real y0, Real y1) { + Real tx0, ty0, tx1, ty1; + integrate(x0, y0, dt, tx0, ty0); + integrate(x1, y1, dt, tx1, ty1); + check_error(x0, tx0, x1, tx1); + check_error(y0, ty0, y1, ty1); + }; + Int n = 7*std::max(nex, ney); + for (int i = 0; i <= n; ++i) { + const Real a = Real(i)/n, x = a*xb[0] + (1-a)*xb[nex]; + check(x, x, yb[0], yb[ney]); + } + for (int i = 0; i <= n; ++i) { + const Real a = Real(i)/n, y = a*yb[0] + (1-a)*yb[ney]; + check(xb[0], xb[nex], y, y); + } +} + +static void setup_demo_problem (const std::vector& xb, const std::vector& yb, + const Real dt, SparseTriple& s, const Int np, + const bool check = false) { + const Int nex = xb.size() - 1, ney = yb.size() - 1; + // Shear (nondivergent) flow. Parameter values were obtained from a search for + // an unstable operator. + const auto integrate = [&] (const Real& x0, const Real& y0, const Real& dt, + Real& xf, Real& yf) { + const auto speed = 1 + std::cos(2*M_PI*(0.342 + x0 - y0)); + xf = x0 + speed*dt; + yf = y0 + speed*dt; + }; + NonUniMesh2d mesh(xb.data(), xb.size()-1, yb.data(), yb.size()-1); + if (check) check_periodicity(mesh, dt, integrate); + make_ccsl_op_nondiv2d(mesh, np, integrate, dt, s); +} + +static Real cubic2d_demo_unstable_problem (const bool unstable = true) { + const Int nex = 15, ney = 13, ne = nex*ney; + const Real dt = unstable ? 0.2761 : 0.1; + std::vector xb(nex+1), yb(ney+1), op(ne*ne), wrk; + const Real dx = 1.0/nex, dy = 1.0/ney; + for (Int i = 0; i <= nex; ++i) xb[i] = i*dx; + for (Int i = 0; i <= ney; ++i) yb[i] = i*dy; + SparseTriple s; + setup_demo_problem(xb, yb, dt, s, 4, true); + sparse2dense(s, op); + const auto mea = calc_max_eig_amp(ne, op.data(), ne, wrk); + return mea; +} + +static void set_ic (const std::vector& xb, const std::vector& yb, + std::vector& z) { + const Int nex = xb.size() - 1, ney = yb.size() - 1, ne = nex*ney; + z.resize(ne); + for (Int iy = 0; iy < ney; ++iy) { + const Real fy = std::cos(2*M_PI*(yb[iy] - yb[0])/(yb[ney] - yb[0])); + for (Int ix = 0; ix < nex; ++ix) + z[nex*iy + ix] = fy*std::cos(2*M_PI*(xb[ix] - xb[0])/(xb[nex] - xb[0])); + } +} + +static Real calc_l2_reldif (const std::vector& a, const std::vector& b) { + Real num = 0, den = 0; + for (size_t i = 0; i < a.size(); ++i) { + num += square(a[i] - b[i]); + den += square(a[i]); + } + return std::sqrt(num/den); +} + +// The example problem generically has order of accuracy np-1 as long as shear +// is not permitted to continue for too long. Demonstrate this: +// If reverse, integrate a smooth field forward for time T/2, then backward to +// time T, and compare the result with the initial condition. +// If not reverse, integrate forward for time T, then compare with the +// analytical solution at time T. +static Real measure_ooa (const Int np, const bool reverse) { + Int nex = 35, ney = 32, nstep = 22; + const Real x0 = -0.4, y0 = 0.2; + Real dt = 0.012, err[2]; + const Real T = dt*nstep; + for (Int refine = 0; refine < 2; ++refine) { + // Refinement parameters. + nex *= 2; + ney *= 2; + nstep *= 2; + dt /= 2; + const Int ne = nex*ney; + // Make forward and backward ops. + std::vector xb(nex+1), yb(ney+1); + for (Int i = 0; i <= nex; ++i) xb[i] = x0 + Real(i)/nex; + for (Int i = 0; i <= ney; ++i) yb[i] = y0 + Real(i)/ney; + SparseTriple op1, op2; + setup_demo_problem(xb, yb, dt, op1, np, refine == 0); + setup_demo_problem(xb, yb, reverse ? -dt : T, op2, np, refine == 0); + // Initial conditions. + std::vector z0, zs[2]; + set_ic(xb, yb, z0); + zs[0] = z0; + zs[1].resize(z0.size()); + // Time step. + Int i0 = 0, i1 = 1; + for (Int ti = 0; ti < nstep; ++ti) { + apply(op1, zs[i0], zs[i1]); + std::swap(i0, i1); + } + if (reverse) { + for (Int ti = 0; ti < nstep; ++ti) { + apply(op2, zs[i0], zs[i1]); + std::swap(i0, i1); + } + err[refine] = calc_l2_reldif(z0, zs[i0]); + } else { + apply(op2, z0, zs[i1]); + err[refine] = calc_l2_reldif(zs[i1], zs[i0]); + } + } + return std::log2(err[0]/err[1]); +} + +void cubic2d_unittest () { + const auto eps = std::numeric_limits::epsilon(); + require(NonUniMesh1d::unittest() == 0); + require(NonUniMesh2d::unittest() == 0); + // The primary purpose of make_ccsl_op_nondiv2d is to demonstrate that there + // is a 2D periodic nondivergent-flow problem, with uniform grid, for which + // the associated classical cubic ISL space-time matrix has maximum eigenvalue + // amplitude > 1. In this demo, it is > 1 + 1e-2. + require(cubic2d_demo_unstable_problem() >= 1 + 1e-2); + // Make sure it's stable if, for example, dt is not from the seach for + // unstable parameter values. + require(almost_equal(cubic2d_demo_unstable_problem(false), 1, 50*eps)); + // Order of accuracy is np-1. + for (const Int np : {4, 6, 8}) + for (const bool reverse : {true, false}) { + const auto ooa = measure_ooa(np, reverse); + const Real d = 0.025; // permit 2.5% deviation from theoretical OOA + require(ooa > (1 - d)*(np - 1)); + require(ooa < (1 + d)*(np - 1)); + } +} + +int main (int argc, char** argv) { + // Show a 1D periodic translation problem on a nonuniform grid for which the + // associated classical cubic ISL space-time matrix has maximum eigenvalue + // amplitude > 1 + 1e-3. + cubic1d_unittest(); + // Show a 2D periodic nondivergent-flow problem on a uniform grid for which + // the associated classical cubic ISL space-time matrix has maximum eigenvalue + // amplitude > 1 + 1e-2. + cubic2d_unittest(); +} diff --git a/methods/islet/figures/amb3.hy b/methods/islet/figures/amb3.hy new file mode 100644 index 0000000..6382bc9 --- /dev/null +++ b/methods/islet/figures/amb3.hy @@ -0,0 +1,870 @@ +;;; Collection of utils. + +;;(require [hy.contrib.walk [let]]) +(import sys copy math os re) + +;; when passing kwargs to another function like pl.plot, the dictionary should +;; be like {'option value}, not {:option value}. + +(defmacro sdo [&rest code] + "Scoped do. Just like do, but vars def'ed inside stay there." + `((fn [] ~@code))) + +(defmacro sv [&rest code] + "The ultimate in laziness." + `(setv ~@code)) + +(defmacro svb [&rest args] + "Sub-bracketed setv, like Lisp's let." + `(do ~@(map (fn [e] + `(setv ~(first e) ~(last e))) + args))) + +(defn symbol [string] (HySymbol string)) + +(defmacro/g! svifn [&rest args] + "setv if none" + `(do ~@(map (fn [e] + `(if (none? ~(first e)) (setv ~(first e) ~(last e)))) + (zip (cut args 0 (len args) 2) + (cut args 1 (len args) 2))))) + +(defmacro/g! expect [expr &optional [answer True]] + (setv expr-code (str expr)) + (setv answer-code (str answer)) + `(sdo (setv ~g!got ~expr) + (setv ~g!want ~answer) + (if (not (= ~g!got ~g!want)) + (print (.format "ERROR: {0:s} = {1:s} NOT EQUAL TO {2:s} = {3:s}" + ~expr-code (str ~g!got) ~answer-code (str ~g!want)))))) + +(defmacro/g! in-require [expr] + (setv expr-code (str expr)) + `(sdo (setv ~g!value ~expr) + (unless ~g!value + (sdo (import inspect) + (setv ~g!frame (inspect.currentframe) + ~g!file (. ~g!frame f-code co-filename) + ~g!lineno (. ~g!frame f-lineno)) + (raise (Exception (.format "IN-REQUIRE {:s} {:d}: {:s}" + ~g!file ~g!lineno ~expr-code))))))) + +(defmacro assert-type [v t] `(assert (= (type ~v) ~t))) + +(defmacro dont [&rest code] + "No-op." + `((fn []))) + +(defmacro raisefmt [&rest args] + `(raise (Exception (.format ~@args)))) + +(defmacro interact [&rest code] + "Block code for interactive eval, but that is silenced when the .hy file is + run as a program." + `(if (= --name-- "__main__") + (dont ~@code) + (do ~@code))) + +(defmacro if-main [&rest code] + "Block code when run with a main, but silence on import." + `(if (= --name-- "__main__") + (do ~@code) + (dont ~@code))) + +(defmacro prf [&rest args] + `(print (.format ~@args))) +(defmacro prfno [&rest args] + `(print (.format ~@args) :end "")) +(defmacro prff [fid &rest args] + `(print (.format ~@args) :file ~fid)) +(defmacro prffno [fid &rest args] + `(print (.format ~@args) :end "" :file ~fid)) + +(defmacro prc [sym] + `(prf ~(+ (name sym) " {}") ~sym)) + +(defmacro mapply [func &rest args] + "Apply func. Works for keyword args, unlike apply. (Probably broken in many + ways compared with apply, however.)" + `(~func ~@args)) + +(defmacro dispatch-dict [d m] + "Function object dispatch helper." + `(try (get ~d ~m) + (except [] (print "No function" (name ~m)) None))) + +(defmacro/g! inc! [x &optional [increment 1]] + `(do (setv ~x (+ ~x ~increment)) + ~x)) + +(defmacro/g! dec! [x &optional [increment 1]] + `(do (setv ~x (- ~x ~increment)) + ~x)) + +(defmacro/g! case/test [op keyform &rest entries] + "Case based on a test with op. Use :else as default action." + `(do (setv ~g!keyform-result ~keyform) + (cond ~@(map (fn [entry] + (+ (if (= (first entry) ':else) + '[True] ; If :else, use True in cond. + `[(~op ~g!keyform-result ~(first entry))]) + `[~@(rest entry)])) + entries) + ;; If no case matches, return None. + [True None]))) + +(defmacro/g! case/eq [&rest forms] + "Case using = for the key. Thus, the 'keylist' is not really a list, but an + atom." + `(case/test = ~@forms)) + +(defmacro/g! case/in [&rest forms] + "Case using 'in' for the key. Thus, each keylist *must* indeed be a list." + `(case/test in ~@forms)) + +(defmacro geton [&rest forms] + "Like get, but return instead of raising KeyError on failure." + `(try (get ~@forms) + (except [] None))) + +;; I want to switch to lfor in new code, but so old code doesn't break, provide +;; list-comp, which was removed in Hy 0.15. +(defmacro list-comp [transform range] + `(lfor ~(first range) ~(second range) ~transform)) + +;; Inject the variable(s) first? or last?. +(defmacro/g! for-last [it &rest body] + `(do + (setv ~g!last (first (last (enumerate ~(second it))))) + (for [(, ~g!i ~(first it)) (enumerate ~(second it))] + (setv last? (= ~g!i ~g!last)) + ~@body))) + +(defmacro/g! for-first-last [it &rest body] + `(do + (setv ~g!first (first (first (enumerate ~(second it)))) + ~g!last (first (last (enumerate ~(second it))))) + (for [(, ~g!i ~(first it)) (enumerate ~(second it))] + (setv first? (= ~g!i ~g!first)) + (setv last? (= ~g!i ~g!last)) + ~@body))) + +;; Use this instead of macroexpand to get output stripped of the Hy object +;; ctors. +(defn ppme [quoted-form] + (sv sym-dict {} b (Box) b.sym-num 0 + b.after-open True) + (defn sym [e] + (unless (in e sym-dict) + (assoc sym-dict e (.format "sym-{:d}" b.sym-num)) + (inc! b.sym-num)) + (get sym-dict e)) + (defn prl [e ldelim rdelim] + (prfno "{:s}" (+ (if b.after-open "" " ") ldelim)) + (sv b.after-open True) + (for [li e] (rec li)) + (prfno "{:s}" rdelim)) + (defn atom? [e] + (in "quote" (first e))) + (defn rec [e] + (setv t (type e)) + (case/in t + [[hy.models.HyFloat HyInteger] (print (+ " " (str e)) :end "")] + [[HyExpression] + (if (atom? e) + (for [li e] (rec li)) + (prl e "(" ")"))] + [[HyList] (prl e "[" "]")] + [[HyString] (print (.format " \"{:s}\"" e) :end "")] + [:else + (unless b.after-open (print " " :end "")) + (sv b.after-open False) + (cond [(in "keyform" e) + (print (sym e) :end "")] + [(in "quote" e) + (print "'" :end "") + (sv b.after-open True)] + [:else (print e :end "")])])) + (setv h (macroexpand quoted-form)) + (print h) + (rec h) + (print)) + +(defclass Box [] + "A box to hold values to be written in closures." + (defn --repr-- [me] + (str me.--dict--))) +(defn class? [o] (= (type o) (type (Box)))) +(defn has-field? [o field] (in field o.--dict--)) +(defn pod-number? [n] (in (type n) (, int float))) +(defn pod-or-len [n] (if (pod-number? n) n (len n))) +(defn list? [coll] (= (type coll) list)) +(defn tuple? [coll] (= (type coll) tuple)) +(defn dict? [coll] (= (type coll) dict)) +(defn fn? [f] (= (type f) (type (fn [])))) + +(defmacro/g! box-slots [&rest slots] + "Example: (setv hi 3 bye \"so long\" b (box-slots 'hi 'bye))" + `(do (setv ~g!box (Box)) + ~@(map (fn [s] + ;; handles _ vs - in names + (setv g!field ((. (str (second s)) replace) "-" "_") + ;; inject symbol directly + g!value (second s)) + `(assoc (. ~g!box --dict--) ~g!field ~g!value)) + slots) + ~g!box)) + +(if-main + (expect (sdo (setv hi "so long" foo-bar 3 b (box-slots 'hi 'foo-bar)) + b.foo-bar) + 3)) + +(defn strleq [s ref] + (if (< (len s) (len ref)) + False + (= (cut s 0 (len ref)) ref))) + +(defn mapl [fun &rest args] + (list (map fun #*args))) + +(defn assoc-nested [d keys val] + "Associate in a nested dict, creating new sub-dicts as needed." + (setv dn d) + (for [key (cut keys 0 -1)] + (if-not (in key dn) + (assoc dn key {})) + (setv dn (get dn key))) + (assoc dn (get keys -1) val)) + +(defn assoc-nested-append [d keys val] + "Associate in a nested dict, creating new sub-dicts as needed. The value is + intended to be an item to go into a list. If the list exists, append to it; if + not, create it with the item as the only element." + (assoc-nested d keys (+ (or (geton d #*keys) []) [val]))) + +;; Very nice for writing .py files from C++ with data, and then running in a +;; parse/plot program to load the data. +(defn get-python-mod [filename &optional [basedir ""]] + "Return the module for basedir/filename." + (defn get-module-basename [fn] + (setv name ((. os path basename) fn)) + (get (.split name ".") 0)) + (setv name (get-module-basename filename)) + (if (not (empty? basedir)) + (+= basedir ".")) + (import importlib) + (importlib.import-module (.format "{:s}{:s}" basedir name))) + +;; Set up the path so that os.* calls know where to find .so files. +(defn set-ld-path [] + (setv (, _ stdout) (os.popen2 ". ~/sems/source.sh; env|grep LD_LIBRARY_PATH") + paths (stdout.read) + paths (cut paths (inc (.find paths "=")) -1) + (get os.environ "LD_LIBRARY_PATH") paths)) + +;; Probably already exists, but haven't looked. +(defn unzip [coll-of-tups] + "Unzip [(a1 b1 ...) (a2 b2 ...) ...] into [[a1 a2 ...] [b2 b2 ...] ...]." + (if (empty? coll-of-tups) + coll-of-tups + (do (setv uz []) + (for [i (range (len (get coll-of-tups 0)))] + (.append uz (list (list-comp (get e i) [e coll-of-tups])))) + uz))) + +;; Basic cmp function that wrappers can call. +(defn cmp-fn [a b] + (int (cond [(< a b) -1] + [(> a b) 1] + [:else 0]))) + +(defn sort! [coll] + "Functional sort." + (.sort coll) + coll) + +(defn sort [coll] + "Functional sort." + (setv c (copy.deepcopy coll)) + (sort! c)) + + +(defn extend! [coll1 coll2] + (.extend coll1 coll2) + coll1) + +(defn extend [coll1 coll2] + (setv c (copy.deepcopy coll1)) + (extend! c coll2)) + +(defn find [e coll &optional [always-list False]] + (setv f []) + (for [i (range (len coll))] + (if (= e (get coll i)) + (.append f i))) + (if (and (not always-list) (= 1 (len f))) (first f) f)) + +(defn first-or-none [coll] + (if (empty? coll) None (first coll))) + +(defn safe-len [x] + (try (len x) (except [] 1))) + +(defn sscanf [str-to-parse fmt &optional split] + """ + Kind of like sscanf. Format is like this: 'i,if,f,s', where if is for + int(float(.)). + """ + (setv str2type {"i" int + "if" (fn [x] (int (float x))) + "f" float + "s" str}) + (setv ts (list-comp (get str2type s) + [s (.split fmt ",")])) + (list-comp (t s) + [(, t s) (zip ts (.split str-to-parse split))])) + +(defn split-convert [ln conversion] + "Example: + (split-convert \"COMPOSE> ne 24 nmax 480 qsize 5 slmpi 1\" + \"ssisisisi\") + 'conversion' can be shorter than (.split ln), in which case the remainder + of line is omitted." + (try + (list-comp ((get {"i" int "s" identity "f" float} + (get conversion i)) tkn) + [(, i tkn) (enumerate (cut (.split ln) 0 (len conversion)))]) + (except [] + (prf "split-convert failed on:\n {:s}\nlen ln {:d} len conversion {:d}" + ln (len (.split ln)) (len conversion))))) + +(defn cc [x] (* 0.5 (+ (cut x 0 -1) (cut x 1)))) + +(defn mean [coll] + (/ (sum coll) (len coll))) + +(defn median [coll] + (setv c (list (copy.deepcopy coll))) + (.sort c) + (if (odd? (len c)) + (get c (int (math.floor (/ (len c) 2)))) + (sdo (setv i (dec (int (/ (len c) 2)))) + (mean (cut c i (+ i 2)))))) + +(defn variance [coll] + (setv mu (mean coll)) + (/ (sum (map (fn [e] (** (- e mu) 2)) coll)) + (len coll))) + +(defn cross-prod [x y] + (defn one [i0 i1] + (- (* (get x i0) (get y i1)) (* (get x i1) (get y i0)))) + [(one 1 2) (one 2 0) (one 0 1)]) + +(defn readall [filename] + (with [f (open filename "r")] + (f.read))) + +(defn grep-str [pattern str] + (import re) + (re.findall (+ "(?:" pattern ").*") str :flags re.MULTILINE)) + +(defn grep [pattern filename] + (grep-str pattern (with [f (open filename "r")] (f.read)))) + +(defn sed-str [pat-repls str] + (import re) + (for [pr pat-repls] + (sv str (re.sub (first pr) (second pr) str :flags re.MULTILINE))) + str) + +(defn sed [pat-repls file-in file-out] + (sv str (sed-str pat-repls (with [f (open file-in "r")] (f.read)))) + (with [f (open file-out "w")] (f.write str))) + +(if-main + (sv s (sed-str (, (,"BAR" "yes") (, "FOO" "cow")) + "foo BAR FOO BAR\nBAR hold\nbar FOO moo")) + (expect s "foo yes cow yes\nyes hold\nbar cow moo")) + +(defn re-split-convert [converts pat ln] + "Ex: (re-split-convert (, int float) regex ln)" + (try + (list-comp (convert e) + [(, convert e) (zip converts + (first (re.findall pat ln)))]) + (except [e Exception] + (print ln)))) + +(defn ooa [y &optional [xfac 2] [x None]] + (setv r []) + (for [i (range (dec (len y)))] + (.append r (/ (- (math.log (get y i)) (math.log (get y (inc i)))) + (if x + (- (math.log (get x i)) (math.log (get x (inc i)))) + (math.log xfac))))) + r) + +(defn ooa-from-file [fname fieldno &optional anchor] + "Read text from file fname, optionally scan for lines beginning with anchor, + and read symbol fieldno, starting from 0. Return a list of OOAs." + (sv txt (.split (readall fname) "\n") + errs []) + (for [ln txt] + (unless (none? anchor) + (when (or (< (len ln) (len anchor)) + (!= anchor (cut ln 0 (len anchor)))) + (continue))) + (.append errs (float (get (.split ln) fieldno)))) + (, errs (ooa errs))) + +(defn single? [coll] + (or (not (coll? coll)) + (= (len coll) 1))) + +(if-main + (expect (single? (, 'gauss))) + (expect (single? 'gauss)) + (expect (not (single? (, 'sin 'gauss))))) + +(setv *when-inp-verbosity* 2) + +(defn inp [name] + (setv b (in name sys.argv)) + (when (or (and b (> *when-inp-verbosity* 0)) + (> *when-inp-verbosity* 1) + (= (len sys.argv) 1)) + (prf "{:s}: {:s}" (if b "DO" "av") name)) + b) + +(defmacro/g! when-inp [fn-name-args &rest body] + "Example: + (when-inp [\"hi\" {:hi int :bye float}] + (print hi bye))" + (if (> (len fn-name-args) 2) + (raise (Exception "(when-inp [fn-name &optional args] body)"))) + (unless (or (= (len fn-name-args) 1) + (is (type (second fn-name-args)) hy.models.HyDict)) + (raise (Exception "args must be a dict"))) + (setv fn-name (first fn-name-args) + args (if (> (len fn-name-args) 1) (second fn-name-args))) + (defn dissect-args [args] + (setv alist [] arg-str "") + (for [(, i e) (enumerate (zip (cut args 0 (len args) 2) + (cut args 1 (len args) 2)))] + (.append alist + `(setv + ;; grab "kw" from ":kw" and make it a symbol + ;;~(HySymbol (cut (first e) 2)) + ~(HySymbol (name (first e))) + ;; apply type conversion + (try (~(second e) (get sys.argv (+ ~i 2))) + (except [] + (.format "Could not parse sys.argv {:d}: {:s}" + (+ ~i 2) + (get sys.argv (+ ~i 2))))))) + (+= arg-str (+ " " (name (first e)) ": " (name (second e))))) + (, alist arg-str)) + (if args + (do + (setv (, alist arg-str) (dissect-args args)) + `(sdo + (import amb3) + (setv ~g!b (in ~fn-name sys.argv)) + (when (or (and ~g!b (> amb3.*when-inp-verbosity* 0)) + (> amb3.*when-inp-verbosity* 1) + (= (len sys.argv) 1)) + (prf "{:s}: {:s}:{:s}" (if ~g!b "DO" "av") + ~fn-name + ~arg-str)) + (when ~g!b + (if (< (- (len sys.argv) 2) (len ~args)) + (raise (Exception (+ "in " ~fn-name + " args has more entries than" + " are available in sys.argv")))) + ~@alist + ~@body))) + `(sdo (when (inp ~fn-name) ~@body)))) + +(if-main + (when-inp ["hi" {:bye int :aye float}] (print bye aye)) + (when-inp ["hello"] (print "hello"))) + +(defn and-coll [pred coll] + (reduce (fn [accum e] (and accum (pred e))) coll True)) + +(defn or-coll [pred coll] + (reduce (fn [accum e] (or accum (pred e))) coll False)) + +(defn none-in [items coll] + (and-coll (fn [e] (not (in e coll))) items)) + +(defn any-in [items coll] + (or-coll (fn [e] (in e coll)) items)) + +(if-main + (expect (none-in (, "hi" "bye") "bye hello") False) + (expect (any-in (, "hi" "bye") "bye hello")) + (expect (none-in (, "hi" "bye") "adieu hello")) + (expect (any-in (, "hi" "bye") "adieu hello") False) + (expect (none-in '(1 2 3) (range 10)) False) + (expect (none-in '(1 2 3) (range 4 10))) + (expect (none-in (, "hi" "bye") ["bye" "hello"]) False) + (expect (none-in (, "hi" "bye") ["adieu" "hello"]))) + +(defn str-ctypes [s] (ctypes.c-char-p (bytes s :encoding "ascii"))) + +;;; Numpy utils. + +(try + (do + (import [numpy :as npy] ctypes) + + (defn array-range [&rest args] + (npy.array (list (apply range args)) :dtype int)) + + (defn array-if-not [A &optional [dtype float]] + (unless (= (type A) npy.ndarray) + (npy.array A :dtype float))) + + (defn as-ctypes [x] + (npy.ctypeslib.as-ctypes x)) + + (defn vectorize [A] (npy.reshape A A.size)) + (defn row-vec [v] (npy.reshape v (, 1 v.size))) + (defn col-vec [v] (npy.reshape v (, v.size 1))) + (defn vector? [v] + (or (and (= (len v.shape) 2) (= (min v.shape) 1)) + (= (len v.shape) 1))) + (defn ones-row-vec [n] (npy.ones (, 1 (pod-or-len n)))) + (defn ones-col-vec [n] (npy.ones (, (pod-or-len n) 1))) + + (defn sort-with-p [ai] + "Return sorted ai and permutation array. Each entry of a must have the same + type." + (if (empty? ai) + (, ai []) + (do (setv dtype [(, (str "data") (type (get ai 0))) (, (str "p") int)] + a (npy.array (list-comp (, e i) [(, i e) (enumerate ai)]) + :dtype dtype)) + (setv a (npy.sort a :order (str "data"))) + (tuple (unzip a))))) + + (defn epsilon [&optional [type float]] + (. (npy.finfo type) eps)) + + (defn np-map [f a] + (npy.array (list (map f a)))) + + (defn dbg-array->np-array [a m n] + (npy.transpose (npy.reshape (npy.array a) (, n m)))) + + (defn reldif [a b &optional [norm None]] + (if (and (pod-number? a) (pod-number? b)) + (/ (abs (- a b)) (max (abs a) (abs b))) + (sdo (setv aa (npy.array a) + ba (npy.array b)) + (/ (npy.linalg.norm (- aa ba) :ord norm) + (npy.linalg.norm aa :ord norm))))) + + (defn np-set-print [] + (setv float-format (fn [x] + (cond [(zero? x) (+ " ." (* " " 8))] + [(= x 1) (+ " 1" (* " " 8))] + [:else (.format "{:10.3e}" x)])) + complex-format (fn [x] + (cond [(zero? x) (+ " ." (* " " 19))] + [(= x 1) (+ " 1" (* " " 19))] + [:else (if (zero? x.imag) + (.format (+ "{:10.3e}" (* " " 11)) x.real) + (.format "{:21.3e}" x))])) + int-format (fn [x] + (if (zero? x) + (+ " ." (* " " 1)) + (.format "{:2d} " x)))) + (npy.set-printoptions + :precision 2 + :linewidth 1000 + :formatter {"float" float-format + "complexfloat" complex-format + "int" int-format})) + + (defn triple-read-file [fn] + (setv triple []) + (with [f (open fn "r")] + (while True + (setv ln (f.readline)) + (if (empty? ln) (break)) + (setv d (sscanf ln "i,i,f")) + (.append triple (tuple d)))) + triple) + + (defn triple->dense [triple &optional [base 0]] + (setv (, row col val) (unzip triple) + A (sdo (setv d (if (= base 0) 1 0) + M (+ (max row) d) + N (+ (max col) d)) + (npy.zeros (, M N)))) + (for [e triple] + (setv r (get e 0) + c (get e 1)) + (if (> base 0) + (setv r (- r base) + c (- c base))) + (setv v (get e 2) + (get A r c) v)) + A) + + (defn dense-extract-block-diag [A bs] + (setv D (npy.zeros (npy.shape A))) + (for [br (range (// (npy.size A 0) bs))] + (setv r0 (* br bs) + cs (list (range r0 (+ r0 bs)))) + (for [i (range bs)] + (setv r (+ r0 i) + (get (get D r) cs) (get (get A r) cs)))) + D) + + (defn dense-max-norm [A] + (setv m1n 0) + (for [r (range (npy.size A 0))] + (setv r1n (as-> (get A r) it + (npy.abs it) + (npy.sum it)) + m1n (max m1n r1n))) + m1n) + + (defn pod-number? [n] + (in (type n) (, int float npy.int64 npy.float64))) + + (defn np-array? [a] (= (type a) npy.ndarray)) + + (defn conforms? [v u] + (and (np-array? u) (= u.shape v.shape))) + + (defn s-all [] (slice None)) + (defn s-all-rev [] (slice None None -1)) + + (defn idx-arr [A rows cols] + (get (get A rows) (, (s-all) cols))) + + (defn antidiag [v] + (get (npy.diag (get (npy.array v) (s-all-rev))) (s-all-rev))) + ) + (except [] + (do + (defn np-array? [a] False) + ))) + +;;; Matplotlib utils. + +(try + (do + (import matplotlib [matplotlib.pyplot :as pl]) + + (defn my-grid [&optional ls zorder] + (svifn ls "-" zorder -1) + (pl.grid True :lw 0.5 :ls ls :color (, 0.8 0.8 0.8) :zorder zorder + :which "both") + (.set_axisbelow (pl.gca) True)) + + (defn dispfig [&optional fn-prefix [format "pdf"] [tight True] [nowarn False]] + (import warnings) + (with [(warnings.catch-warnings)] + (when nowarn (warnings.filterwarnings "ignore")) + (when tight (pl.tight-layout)) + (if (or (not fn_prefix) (empty? fn-prefix)) + (pl.show) + (pl.savefig (+ fn-prefix (+ "." format)) + :format format :bbox-inches "tight")))) + + (defclass pl-plot [] + (defn --init-- [me figsize filename &optional [format None] + [tight True] [nowarn False]] + (setv me.filename filename + me.format (if (none? format) "pdf" format) + me.tight tight me.nowarn nowarn) + (pl.close) + (pl.figure :num 1 :figsize figsize)) + (defn cleanup [me] + (dispfig me.filename :format me.format :tight me.tight :nowarn me.nowarn)) + (defn --enter-- [me] me) + (defn --exit-- [me &rest args] (me.cleanup)) + (defn --del-- [me])) + + ;; To get Type 1 fonts only. From + ;; http://nerdjusttyped.blogspot.com/2010/07/type-1-fonts-and-matplotlib-figures.html + ;; The third one in particular really blows up rendering time, so switch this + ;; block to False during development iterations. + (defn pl-require-type1-fonts [] + (import matplotlib) + (assoc matplotlib.rcParams + "ps.useafm" True + "pdf.use14corefonts" True + "text.usetex" True)) + + (defn imshow-matrix [A] + (pl.imshow A :interpolation "none") + (pl.show)) + + (defn iml [A] + (pl.imshow (npy.log10 (npy.abs A)) :interpolation "none")) + + (defn pad-lim [lim &optional [pad 0.05] [mult False]] + (if mult + (do (, (* (first lim) (- 1 pad)) + (* (second lim) (+ 1 pad)))) + (do (setv d (- (second lim) (first lim)) + delta (* pad d)) + (, (- (first lim) delta) + (+ (second lim) delta))))) + + (defn axis-tight-pad [&optional [pad 0.05] [mult False]] + (pl.axis "tight") + (setv xl (pl.xlim) yl (pl.ylim)) + (pl.xlim (pad-lim xl pad mult)) + (pl.ylim (pad-lim yl pad mult))) + + (defn reset-colors [] + (.set-color-cycle (pl.gca) None)) + + (defn good-subplot-dims [n &optional [pref-horiz False]] + (setv d (cond [(< n 5) (case/eq n [1 (, 1 1)] [2 (, 2 1)] + [3 (, 3 1)] [4 (, 2 2)])] + [(< n 7) (, 3 2)] + [(< n 10) (, 3 3)] + [(< n 13) (, 4 3)] + [(< n 17) (, 4 4)] + [:else (, 5 (int (math.ceil (/ n 5))))])) + (if pref-horiz + (, (second d) (first d)) + d)) + + (defn get-linestyle-word [char] + (get {"-" "solid" "--" "dashed" ":" "dotted" "-." "dashdot"} char)) + + (defn set-tick-fontsize [fs] + (for [ax (, "xaxis" "yaxis")] + (sv ticks ((. (get (. (pl.gca) --dict--) ax) + get-major-ticks))) + (for [tick ticks] + ((. tick label set-fontsize) fs)))) + + (defn make-reference-slope-triangle [x-span y-span slope pattern + &optional [kwargs-plot None] + [kwargs-text None] + [opposite None]] + (assert (= 2 (len x-span))) + (assert (= 2 (len y-span))) + (svifn kwargs-plot {}) + (svifn kwargs-text {}) + (svifn opposite False) + (sv (, x0 x1) x-span (, y0 y1) y-span + dx (- x1 x0) dy (- y1 y0)) + (if opposite + (do (sv x [x0 x1 x1 x0] + y [y1 y1 y0 y1])) + (do (sv x [x0 x1 x0 x0] + y [y0 y0 y1 y0]))) + (pl.plot #*[x y pattern] #**kwargs-plot) + (when opposite + (if (none? kwargs-text) (sv kwargs-text {})) + (assoc kwargs-text "horizontalalignment" "right" "verticalalignment" "top")) + (pl.text #*[(if opposite (- x1 (* 0.1 dx)) (+ x0 (* 0.1 dx))) + (if opposite (- y1 (* 0.1 dy)) (+ y0 (* 0.1 dy))) (str slope)] + #**kwargs-text)) + + ) (except [] )) + +;;; More extensive unit tests. +(if-main + (expect + (case/eq 'hi + ('hit 'nope) + ('hi 'bark 'yep)) + 'yep) + (expect + (case/in 'hello + ['(bye hello) 'nope] + ('(hi) 'yep) + ('(4) 'another-nope)) + 'nope) + (expect + (case/in 'hi + ('(bye hello) 'nope) + ('(hi) 'yep) + ('(4) 'another-nope)) + 'yep) + (expect + (case/in 'hi + ('(bye hello) 'nope) + ('hi 'yep) + ('(4) 'another-nope)) + 'yep) + (expect + (case/test in 'hi + ('(bye hello) 'nope) + ('hi 'yep) + ('(4) 'another-nope)) + 'yep) + (expect (case/eq 4 [5 'bye] [4 'hi]) + 'hi) + (expect + (case/in 'hi + ('(bye hello) 'nope)) + None) + (expect + (case/in 'hi + ('(bye hello) 'nope) + (:else 'woot)) + 'woot) + (expect + (sdo (setv key 'hi) + (case/in key + ('(bye hello) 'nope) + ([key] 'yup))) + 'yup) + + (expect + (do + (sv a None b 4 c "hi" d None) + (svifn a "hi" b 3 c "bye") + (and (= a "hi") (= b 4) (= c "hi") (none? d)))) + (expect + (do + (sv a None) + (svifn a "hi") + (= a "hi"))) + (expect + (do + (sv a 5) + (svifn a "hi") + (= a 5))) + + (expect + (do (sv a 1 b "hi" c 'd) + (svb (ab 1) (bb "hi") (cb 'd)) + (and (= a ab) (= b bb) (= c cb)))) + + (when-inp ["test-pretty-print"] + (ppme '(case/in 'hi + (['hi 'bye] 'hi) + ('[foo] (for [d dinos] + (print (.format "{:s} goes {:d}" + d.name d.sound)))) + ('[bar] + (defn axis-tight-pad [&optional [pad 0.05] [mult False] + [foo 3]] + (pl.axis "tight") + (setv xl (pl.xlim) yl (pl.ylim)) + (pl.xlim (pad-lim xl pad mult)) + (pl.ylim (pad-lim yl pad mult))) + (axis-tight-pad))))) + + (when-inp ["ooa-from-file" {:fname str :anchor str :fieldno int}] + (sv (, errs ooas) (ooa-from-file fname fieldno :anchor anchor)) + (for [i (range (len errs))] + (prf "{:10.3e} {}" (get errs i) + (if (zero? i) " n/a" (.format "{:6.3f}" (get ooas (dec i))))))) +) diff --git a/methods/islet/figures/figs-adv-diag.hy b/methods/islet/figures/figs-adv-diag.hy new file mode 100644 index 0000000..02af0b4 --- /dev/null +++ b/methods/islet/figures/figs-adv-diag.hy @@ -0,0 +1,1035 @@ +(require [amb3 [*]]) +(import amb3 [amb3 [*]] + [figsutils [*]] + math glob struct) + +(assoc matplotlib.rcParams "savefig.dpi" 300) +(do (pl-require-type1-fonts)) + +;;; parse cmd, L, M, C slmmir output + +(defn acc-parse [fname &optional map-nstepfac] + (sv txt (.split (readall fname) "\n") + bo None d {}) + (for [ln txt] + (sv ln2 (cut ln 0 2)) + (cond [(and (= "cm" ln2) (in "cmd>" ln)) + (sv cmd ln c (parse-cmd ln :map-nstepfac map-nstepfac))] + [(= ln2 "M ") + (sv mp (parse-midpoint-check ln)) + (unless (none? (geton d #*(+ (cmd->key-base c) + (, (:ic mp) cyc (:ne c) :M)))) + ;; handle repeated ic used for src-term OOA testing. + (assoc mp :ic (+ (:ic mp) "-src"))) + (assoc-nested d (+ (cmd->key-base c) (, (:ic mp) cyc (:ne c) :M)) + {:l1 (:l1 mp) :l2 (:l2 mp)})] + [(= ln2 "C ") + (cond [(in "cycle" ln) (sv (, - - cyc) (sscanf ln "s,s,i"))] + [(or (in "PASS" ln) (in "FAIL" ln)) + (sv (, - pf) (sscanf ln "s,s")) + (when (and (= pf "FAIL") (!= (:mono c) "none") (<= cyc 10)) + (prf "FAIL {}" cmd))] + [:else + (sv cl (parse-C ln)) + (unless (none? (geton d #*(+ (cmd->key-base c) + (, (:ic cl) cyc (:ne c) :C)))) + ;; handle repeated ic used for src-term OOA testing. + (assoc cl :ic (+ (:ic cl) "-src"))) + (assoc-nested d (+ (cmd->key-base c) (, (:ic cl) cyc (:ne c) :C)) + cl)])] + [(= ln2 "L ") + (cond [(in "L file" ln) + (assoc-nested d (+ (cmd->key-base c) + (, "cos" cyc (:ne c) :L :mixing-file)) + (last (.split ln)))] + [(in "phimin" ln) + (sv (, - ic - l1 - l2 - li - phimin - phimax) + (sscanf ln "s,s,s,f,s,f,s,f,s,f,s,f")) + (assoc-nested d (+ (cmd->key-base c) (, ic cyc (:ne c) :Lerr)) + {:l1 l1 :l2 l2 :li li :phimin phimin :phimax phimax})] + [:else + (sv bo (parse-bakeoff-diag bo ln (:timeint c))) + (when (:done bo) + (assoc-nested d (+ (cmd->key-base c) (, "cos" cyc (:ne c) :L)) bo) + (sv bo None))])])) + d) + +;;; print a long table of all accuracy results + +(defn acc-print-txt-table [c d &optional] + (sv dent (Box) dent.indentation 0 dent.delta 1) + (defn indent [] (+= dent.indentation dent.delta)) + (defn dedent [] (-= dent.indentation dent.delta)) + (sv buf []) + (defn msg [s] (.append buf (+ (* " " dent.indentation) s))) + (defn msg-pop [] (unless (empty? buf) (.pop buf))) + (defn msg-clear [] (.clear buf)) + (defn msg-dump [] + (for [e buf] (prf e)) + (msg-clear)) + (defmacro dent-fn [title &rest body] + `(do (msg ~title) + (indent) + ~@body + (msg-pop) + (dedent))) + + (sv unstab-thr 0.9 unstab {}) + (defn study-unstab [e-all cdrglb cdrlcl np ic nstepfac ode timeint] + (sv e None) + (for [ne c.nes] + (sv pe e e (geton e-all ne)) + (when (none? e) (continue)) + (unless (or (> (get e :C :l2) 0.9) + (and (not (none? pe)) (> (get e :C :l2) (get pe :C :l2)) + (> (get e :C :l2) 1e-12))) + (continue)) + (dont prf "u {:2d} {} {:2d} {:2d} {} {} {:7.1e}" + np ic ne nstepfac cdrglb cdrlcl (get e :C :l2)) + (sv key (, np cdrglb cdrlcl) + use True) + (when (in key unstab) + (sv pv (get unstab key)) + (unless (or (and (= (:ic pv) "slo") (!= ic "slo")) + (and (< ne (:ne pv)) + (not (and (!= (:ic pv) "slo") (= ic "slo")))) + (and (not (= ic "slo")) + (> (get e :C :l2) (get pv :l2)) + (< (get pv :l2) unstab-thr))) + (sv use False))) + (when (and (!= np 4) + (or (!= timeint "interp") (= cdrglb "none") + (!= ode "divergent") (!= ic "gau") (!= nstepfac 1))) + (continue)) + (when use + (assoc unstab key {:ic ic :ne ne :l2 (get e :C :l2) + :nstepfac nstepfac :ode ode})))) + (defn print-unstab [] + (sv keys (list (.keys unstab))) + (.sort keys :key first) + (for [k keys] + (sv e (get unstab k)) + (prf "u {:2d} {} {:<12s} {:2d} {:2d} {:<9s} {} {:7.1e}" + (first k) (:ic e) (:ode e) (:ne e) (:nstepfac e) (second k) + (last k) (:l2 e)))) + + (defn pr [&rest args] + (prfno (* " " dent.indentation)) + (print #*args)) + (defn praccv [pne ne vp v] + (if (or (none? vp) (zero? vp) (zero? v)) + (prfno " {:7.1e} ( ----)" v) + (prfno " {:7.1e} ({:5.2f})" v (first (ooa [vp v] :x [ne pne]))))) + (defn pracc [diagnostic norm pne pe ne e] + (sv v (geton e diagnostic norm)) + (when (none? v) (return)) + (sv vp (unless (none? pe) (get pe diagnostic norm))) + (praccv pne ne vp v)) + (defn print-table [e-all &optional [Ldiags False]] + (sv pe None pne None) + (for [ne c.nes] + (sv e (geton e-all ne)) + (when (none? e) (continue)) + (when (< (get e :C :l2) 1e-13) (continue)) + (prfno "{}{:3d}" (* " " dent.indentation) ne) + (pracc ':C ':masscons pne pe ne e) + (for [diag (, :C :M)] + (sv first True) + (for [norm (, :l1 :l2 :li)] + (when (and (= diag :M) (!= norm :l2)) (continue)) + (when (none? (geton e diag norm)) (continue)) + (when first + (prfno " |") + (sv first False)) + (pracc diag norm pne pe ne e))) + (when Ldiags + (sv bo (geton e :L)) + (unless (none? bo) + (for [r (, 0 1)] + (unless (or (zero? r) (in :me-mixing bo)) (continue)) + (prfno " |") + (sv s (if (zero? r) :mixing :me-mixing) + pmixing (unless (none? pe) (get pe :L s)) + mixing (get bo s)) + (for [k (, :lr :lu :lo)] + (praccv pne ne (unless (none? pmixing) (get pmixing k)) + (get mixing k)))))) + (when (> (get e :C :l2) 0.9) (prfno " UNSTABLE")) + (print) + (sv pe e pne ne))) + + (pr (+ " ne mass l1 l2 linf" + " mid l2")) + (for [method c.methods] + (dent-fn method + (for [timeint c.timeints] + (dent-fn (.format "timeint {}" timeint) + (for [ode c.odes] + (dent-fn ode + (for [cdrgl c.cdrs] + (sv (, cdrglb cdrlcl) cdrgl) + (dent-fn (.format "{} {}" cdrglb cdrlcl) + (for [nstepfac c.nstepfacs] + (dent-fn (.format "nstepfac {}" nstepfac) + (for [ic c.ics] + (dent-fn ic + (for [cyc c.cycs] + (dent-fn (.format "cycle {}" cyc) + (for [np c.nps] + (sv prefine (if (or (= np 4) (= timeint "exact")) + 0 5) + e (geton d timeint ode nstepfac method + cdrglb cdrlcl prefine np ic cyc)) + (when (none? e) (continue)) + (dent-fn (.format "np {}" np) + (msg-dump) + (pr method timeint ode cdrglb cdrlcl nstepfac + ic cyc np) + (print-table e :Ldiags (= ic "cos")) + (study-unstab e cdrglb cdrlcl np ic nstepfac + ode timeint))))))))))))))))) + (print-unstab)) + +;;; accuracy figs + +(defclass AccFig [] + (defn --init-- [me]) + + (defn get-defaults [me &optional context] + (svifn context (get-context)) + (sv c context) + {:method "pcsl" :nstepfac 1 :timeint "interp" :ode "nondivergent" :ic "gau" + :cdrglb "caas-node" :cdrlcl "caas" :prefine 5 :nonuni 0 :cyc 1 :measure :l2 + :nps c.nps :nes c.nes :pat-line "-" :pat-clr (.copy c.npclrs) :C-line :C + :pat-mark (.copy c.npmarks) :fs 11 :lw 1 :markersize 4 :yform :log2 + :xticks :degrees :ooa-text False :filter-floor None :figsize (, 4 4) + :ref-ooa-2 False :ref-cos-033 True :pg None}) + + (defn plot [me ax d-all &optional [o None]] + (svifn o (me.get-defaults)) + (sv npa npy.array + d1 (geton d-all (:timeint o) (:ode o) (:nstepfac o) (:method o) + (:cdrglb o) (:cdrlcl o) (:prefine o)) + gray (* 0.5 (npy.ones 3)) + y-ext [1e10 -1e10]) + (when (none? d1) (return)) + (for [np (:nps o)] + (sv keys (if (none? (:pg o)) + (, np (:ic o) (:cyc o)) + (if (fn? (:pg o)) + (, ((:pg o) np) np (:ic o) (:cyc o)) + (, (:pg o) np (:ic o) (:cyc o)))) + d2 (geton d1 #*keys)) + (when (none? d2) (continue)) + (sv x [] y []) + (for [ne (:nes o)] + (sv val (geton d2 ne (:C-line o) (:measure o))) + (when (none? val) (continue)) + (unless (none? (:filter-floor o)) + (when (< val (:filter-floor o)) (continue))) + (.append x ne) + (.append y val) + (sv (get y-ext 0) (min (get y-ext 0) val) + (get y-ext 1) (max (get y-ext 1) val))) + (svb (pat (+ (get (:pat-clr o) np) (get (:pat-mark o) np) (:pat-line o))) + (xtform (npy.log2 (npa x))) + ((, xticks xtick-strs) + (case/eq (:xticks o) + [:degrees + (sv d (nes->degstrs x)) + (, (npy.log2 (npa (:ne d))) (:deg d))] + [:else (, xtform x)])) + ((, ytform y-lbl y-tfn) + (case/eq (:yform o) + [:log2 (, (npy.log2 (npa y)) "$\log_2$" npy.log2)] + [:log10 (, (npy.log2 (npa y)) "$\log_{10}$" npy.log2)] + [:semilogy (, (npa y) "$\log_{10}$" identity)])) + (pl-plot (if (= (:yform o) :semilogy) ax.semilogy ax.plot))) + (pl-plot xtform ytform pat + :lw (:lw o) :markersize (:markersize o) :fillstyle "none") + (when (= np (first (:nps o))) + (pl.xticks xticks xtick-strs :fontsize (:fs o)) + (pl.ylabel (+ y-lbl " " (get {:l1 "$l_1$" :l2 "$l_2$" :li "$l_{\infty}$" + :masscons "Mass conservation"} + (:measure o)) + " relative error") + :fontsize (:fs o)) + (cond [(= (:yform o) :log2) + (pl.yticks (list (range -40 10)) :fontsize (:fs o))]) + (when (and (:ref-cos-033 o) (= (:ic o) "cos") (= (:ode o) "nondivergent")) + (pl-plot xtform (* 0.033 (npy.ones (len xtform))) "k-." + :zorder -10 :lw (:lw o) :color gray))) + (when (:ooa-text o) + (sv i (- (len x) 2)) + (pl.text (- (get xtform (inc i)) 0.1) (* (get ytform (inc i)) 2) + (.format "{:1.1f}" + (- (first (ooa (cut y i (+ i 2)) + :x (cut x i (+ i 2)))))))) + (when (= np (last (:nps o))) + (when (= (:yform o) :semilogy) + (sv (, y ys) (pl.yticks)) + (when (> (/ (last y) (first y)) 1e3) + (for [i (range (len ys))] + (sv (get ys i) (.format "{}" (int (math.log10 (get y i)))))) + (pl.yticks y ys))) + (when (:ref-ooa-2 o) + (sv ytref (y-tfn (* 0.7 (first y-ext) (** (/ (last x) (npa x)) 2)))) + (pl-plot xtform ytref "k:" :color gray)))) + y-ext) + + (defn legend [me ax entries &optional [o None] [nps-legend True] bbox] + (svifn o (me.get-defaults)) + (sv xl (pl.xlim) yl (pl.ylim) hs [] delta 2) + (unless (empty? entries) + (for [e entries] + (sv h (ax.plot (- (first xl) delta) (- (first yl) delta) + (first e) :label (second e) :fillstyle "none" + :lw (:lw o) :markersize (:markersize o))) + (.extend hs h)) + (sv l1 (pl.legend :handles hs :fontsize (- (:fs o) 1) + :bbox-to-anchor (if (none? bbox) (, 0 0.08) bbox) + :loc "lower left" :framealpha 1)) + (ax.add-artist l1)) + (when nps-legend + (sv hs []) + (for [np (:nps o)] + (sv h (ax.plot (- (first xl) delta) (- (first yl) delta) + (+ (get (:pat-clr o) np) (get (:pat-mark o) np)) + :lw (:lw o) :markersize (:markersize o) + :label (.format "{}" np) :fillstyle "none")) + (.extend hs h)) + (sv l2 (pl.legend :handles hs :fontsize (- (:fs o) 1) + :ncol (len (:nps o)) :bbox-to-anchor (, 0 -0.01 1 0) + :loc "lower left" :mode "expand" :framealpha 1)) + (ax.add-artist l2)) + (pl.xlim xl) (pl.ylim yl)) + + (defn title [me s &optional [o None]] + (svifn o (me.get-defaults)) + (pl.title s :fontsize (:fs o)))) + +(defn make-nps-string [nps] + (sv s (.format "$n_p$ {}" (first nps))) + (for [ne (cut nps 1)] (sv s (+ s (.format ", {}" ne)))) + s) + +(defn nstepfac->word [nstepfac] + (get {1 "long" 3 "medium" 5 "short"} nstepfac)) + +(defn make-title [main o &optional extra] + (svifn extra "") + (+ main "\n" + (flow-short2long (:ode o)) ", " + (ic-short2long (cut (:ic o) 0 3)) ", " + (nstepfac->word (:nstepfac o)) " steps,\n" + (if (= (:prefine o) 5) "$p$-refinement, " "") + (+ (if (= (:cdrglb o) "none") "no " "") "property preservation") + extra)) + +(defn fig-stab-cmp [c d] + (sv p (AccFig) + o (p.get-defaults c)) + (assoc o :ic "gau" :ode "divergent" :cdrglb "caas-node" :cdrlcl "caas" + :measure :l2 :timeint "interp" :prefine 5 :yform :semilogy) + (sv nps-str (make-nps-string (:nps o))) + (with [(pl-plot (:figsize o) (+ c.fig-dir "stab-cmp-" (name (:measure o))))] + (sv ax (pl.subplot 1 1 1)) + (assoc o :method "pcslu" :pat-line "-." :cyc 1) (p.plot ax d o) + (assoc o :method "pcsl" :pat-line "--" :cyc 100) (p.plot ax d o) + (assoc o :method "pcsl" :pat-line "-" :cyc 1 :ref-ooa-2 True) (p.plot ax d o) + (if (= (:yform o) :semilogy) + (pl.ylim (, 9e-7 1)) + (pl.ylim (, -20 0))) + (my-grid) + (p.legend ax (, (, "k-" "Islet 1 cycle") (, "k--" "Islet 100 cycles") + (, "k-." "Natural 1 cycle") (, "k:" "OOA 2")) :o o) + (p.title (make-title "Islet stability:" o) o))) + +(defn nextpow10 [f] (** 10.0 (math.ceil (math.log10 f)))) +(defn prevpow10 [f] (** 10.0 (math.floor (math.log10 f)))) + +(defn figs-acc [c d &optional prefix ref-ooa-2 legend + general-timeint general-prefine show-linf pp] + (svifn prefix "" ref-ooa-2 True legend True general-timeint "interp" + general-prefine 5 show-linf True pp True) + (sv p (AccFig) + o (p.get-defaults c)) + (defn plot [o title plot-fn &optional [ref-ooa-2 False]] + (sv fname (+ prefix "acc-" (:ode o) "-" (:ic o) + "-" (:timeint o) "-" (if (= (:cdrglb o) "none") + "nopp" "pp") + "-fac" (str (:nstepfac o)))) + (print fname) + (with [(pl-plot (:figsize o) (+ c.fig-dir fname))] + (sv ax (pl.subplot 1 1 1)) + (plot-fn ax d o) + (my-grid) + (sv legs [(, "k-" "$l_2$") (, "k--" "$l_{\infty}$")]) + (when ref-ooa-2 (.append legs (, "k:" "OOA 2"))) + (when legend (p.legend ax legs :o o)) + (pl.ylabel "$\log_{10}$ relative error") + (p.title (make-title title o) o))) + (assoc o :ode "nondivergent" :ic "gau" :nstepfac 1 + :yform :semilogy :cdrglb "none" :cdrlcl "none" + :timeint "exact" :prefine 0 :filter-floor 1e-11) + (plot o "Islet empirical order of accuracy:" + (fn [ax d o] + (for [norm (, :l2 :li)] + (assoc o :measure norm :pat-line (if (= norm :l2) "-" "--") + :ooa-text (= norm :li)) + (p.plot ax d o) + (pl.ylim (, 1e-11 1)) + (sv e (npy.array [0 -2 -4 -6 -8 -10])) + (pl.yticks (** 10.0 e) e)))) + (for [nstepfac (, 1 5) + ic (, "gau" "cos" "slo") + ode (, "nondivergent" "divergent" "rotate")] + (assoc o :nstepfac nstepfac :ooa-text False :ode ode + :cdrglb (if pp "caas-node" "none") :cdrlcl (if pp "caas" "none") + :ic ic :filter-floor None :timeint general-timeint) + (dont when (and (= ode "divergent") (= ic "gau")) (continue)) + (plot o "Islet accuracy:" + (fn [ax d o] + (sv ye [1e10 -1e10]) + (defn update-ye [ye1] + (when (none? ye1) (return)) + (sv (get ye 0) (min (first ye) (first ye1)) + (get ye 1) (max (last ye) (last ye1)))) + (for [norm (, :l2 :li)] + (when (and (= norm :li) (not show-linf)) (continue)) + (assoc o :cdrglb (if pp "caas" "none") :timeint "exact" :prefine 0 + :measure norm :pat-line (if (= norm :l2) "-" "--") + :ref-ooa-2 False) + (sv ye1 (p.plot ax d o)) + (update-ye ye1) + (assoc o :cdrglb (if pp "caas-node" "none") + :timeint general-timeint :prefine general-prefine + :ref-ooa-2 (and ref-ooa-2 (= norm :l2) (!= ic "slo"))) + (sv ye1 (p.plot ax d o)) + (update-ye ye1)) + (if (= ic "slo") + (pl.ylim (, (* 0.7 (first ye)) (nextpow10 (second ye)))) + (pl.ylim (, (/ (first ye) 4) (nextpow10 (second ye)))))) + :ref-ooa-2 (and ref-ooa-2 (!= ic "slo"))))) + +(defn fig-acc-midpoint-check [c d] + (sv p (AccFig) + o (p.get-defaults c)) + (assoc o :ic "gau" :ode "nondivergent" :cdrglb "none" :cdrlcl "none" + :measure :l2 :yform :semilogy :nstepfac 1) + (sv nps-str (make-nps-string (:nps o))) + (with [(pl-plot (:figsize o) (+ c.fig-dir "midpoint-check"))] + (sv ax (pl.subplot 1 1 1)) + (assoc o :pat-line "-" :C-line :C :ooa-text True :prefine 0 :timeint "exact" :nps [4]) + (p.plot ax d o) + (assoc o :prefine 5 :timeint "interp" :nps (cut c.nps 0 -1)) + (p.plot ax d o) + (assoc o :prefine 5 :timeint "interp" :nps (cut c.nps -1) :ooa-text False) + (p.plot ax d o) + (assoc o :pat-line "--" :C-line :M :prefine 0 :timeint "exact" :nps [4]) + (p.plot ax d o) + (assoc o :prefine 5 :timeint "interp" :nps c.nps) + (p.plot ax d o) + (pl.ylim (, 5e-10 1)) + (my-grid) + (p.legend ax (, (, "k-" "1 cycle") (, "k--" "1/2 cycle")) :o o) + (p.title (make-title "Trajectory interpolation:" o) o))) + +(defn fig-acc-mimic-src-term-midpoint [c d &optional [np-minus-2 False]] + (sv p (AccFig) + o (p.get-defaults c)) + (defn plot [o title plot-fn &optional [ref-ooa-2 False]] + (sv fname (+ "acc-pg-mimic-src-term-midpoint-" (:ode o) "-" (:ic o) + "-" (:timeint o) "-" (if (= (:cdrglb o) "none") + "nopp" "pp") + "-fac" (str (:nstepfac o)) "-" (name (:measure o))) + title (make-title title o :extra ", midpoint")) + (print fname) + (with [(pl-plot (:figsize o) (+ c.fig-dir fname))] + (sv ax (pl.subplot 1 1 1)) + (plot-fn ax d o) + (my-grid) + (sv legs [(, "k-" "Reference") (, "k-." "$n_f=n_p$") (, "k--" "$n_f=2$")]) + (when np-minus-2 (.append legs (, "k:" "$n_f=n_p-2$"))) + (when ref-ooa-2 (.append legs (, "k:" "OOA 2"))) + (p.legend ax legs :o o) + (pl.ylabel "$\log_{10}$ $l_2$ relative error") + (p.title title o))) + (for [nstepfac (, 5) + ic (, "gau") + ode (, "nondivergent")] + (assoc o :nstepfac nstepfac :ooa-text False :ode ode :cdrglb "caas-node" + :cdrlcl "caas" :ic ic :filter-floor None :norm :l2 :timeint "interp" + :ref-cos-033 False :yform :semilogy :filter-floor 1e-11 :C-line :M) + (plot o "Physics grid source term accuracy:" + (fn [ax d o] + (sv ye [1e10 -1e10]) + (defn update-ye [ye1] + (sv (get ye 0) (min (first ye) (first ye1)) + (get ye 1) (max (last ye) (last ye1)))) + (for [suffix (, "" "-src")] + (for [(, ipg pg) + (enumerate (if (= suffix "") + (, 2) + (, 2 + (fn [np] np) + (fn [np] (- np 2)))))] + (when (and (not np-minus-2) (= ipg 2)) (continue)) + (assoc o :cdrglb "caas-node" :timeint "exact" :prefine 0 + :measure :l2 :ref-ooa-2 False :ic (+ ic suffix) + :pg pg :pat-line (if (= suffix "") + "-" + (case/eq ipg [0 "--"] [1 "-."] [2 ":"]))) + (sv ye1 (p.plot ax d o)) + (update-ye ye1) + (assoc o :timeint "interp" :prefine 5 :ref-ooa-2 (= suffix "")) + (sv ye1 (p.plot ax d o)) + (update-ye ye1))) + (pl.ylim (, (/ (first ye) 4) (nextpow10 (second ye))))) + :ref-ooa-2 True))) + +;;; filament diagnostic + +(defn fig-filament [c d-all] + (defn get-pat [nstepfac ne] + (+ (get {10 "b" 20 "r." 40 "k"} ne) + (get {1 "-" 5 "--"} nstepfac))) + (sv p (AccFig) o (p.get-defaults c) + nes (, 20 40) + degs (nes->degstrs nes)) + (with [(pl-plot (, 10 4) (+ c.fig-dir "filament"))] + (for [(, igrid grid) (enumerate (, :v :t))] + (for [(, inp np) (enumerate c.nps)] + (sv spi (inc (+ (* igrid (len c.nps)) inp)) + ax (pl.subplot 2 (len c.nps) spi)) + (for [(, ine ne) (enumerate nes) + nstepfac (, 1 5)] + (sv d (get d-all (if (= np 4) "exact" "interp") "nondivergent" nstepfac + "pcsl" (if (= np 4) "caas" "caas-node") "caas" + (if (= np 4) 0 5) np "cos" 1)) + (when (none? (geton d ne)) (continue)) + (sv thr (get d ne :L :thr) + fil (get d ne :L (if (or (= grid :v) (= np 4)) :fil :me-fil))) + (pl.plot thr fil (get-pat nstepfac ne) + :label (.format "{} {}" (get (:deg degs) ine) + (nstepfac->word nstepfac))) + (pl.xticks [0 0.2 0.4 0.6 0.8 1] :fontsize (:fs o)) + (pl.xlim (, 0.05 1)) + (pl.yticks (if (< np 8) + [0 20 40 60 80 100 120] + [50 60 70 80 90 100 110]) + :fontsize (:fs o)) + (my-grid) + (when (= spi (* 2 (len c.nps))) + (pl.legend :loc "center left" :fontsize (:fs o) :frameon False)) + (pl.ylim (case/in np + [(, 9 12) (if (= grid :v) (, 75 115) (, 75 105))] + [(, 8) (, 50 115)] + [:else (, -5 125)])) + (sv yl (pl.ylim) + extra "") + (when (> np 4) (sv extra (.format " {} grid" + (if (= grid :v) "v" "tracer"))))) + (pl.text 0.1 (+ (first yl) (* 0.05 (npy.diff yl))) + (.format (if 0 "$n_p$ {}{}" "$n_p$ {} {}") np extra) + :fontsize (+ (:fs o) 2)))))) + +;;; mixing diagnostic + +(defn triplot-read-dat [fname] + (sv raw (with [f (open fname "rb")] + (.read f)) + n (first (struct.unpack "i" (get raw (slice 0 4)))) + data (npy.zeros (* 2 n))) + (for [i (range (* 2 n))] + (sv os (+ 4 (* 4 i)) + (get data i) (first (struct.unpack "f" (cut raw os (+ os 4)))))) + (sv cb (cut data 0 n) + ccb (cut data n (* 2 n))) + (, cb ccb)) + +(defn triplot [cb ccb &optional [data None]] + (defn triplot-curve [x] + (+ 0.9 (* -0.8 x x))) + (sv x (npy.linspace 0.1 1 100) + lw 2) + (pl.plot x (triplot-curve x) "k-" :lw lw) + (sv tl (triplot-curve (last x)) + br (triplot-curve (first x))) + (pl.plot x (* (triplot-curve (first x)) (npy.ones (len x))) "k-" :lw lw) + (pl.plot (npy.ones (len x)) (npy.linspace tl br (len x)) "k-" :lw lw) + (pl.plot x (npy.linspace br tl (len x)) "k-" :lw lw) + (pl.plot cb ccb "r." :markersize 1) + (pl.xlim 0.05 1.05) + (pl.ylim 0.05 0.95) + (sv t [0.2 0.4 0.6 0.8 1]) + (pl.xticks t []) (pl.yticks t []) + (my-grid) + ;(pl.axis "off") + (defn text [x y txt sym me-sym] + (pl.text x (+ y 0.1) txt) + (sv dx 0.08) + (pl.text (+ x dx) (+ y 0.1) (.format "{:1.2e} (v)" (get data sym))) + (pl.text (+ x dx) y (if (in me-sym data) + (.format "{:1.2e}" (get data me-sym)) + ""))) + (unless (none? data) + (pl.text 0.1 0.91 (.format "$n_p$ {}, {} step" (get data 'np) + (nstepfac->word (get data 'nstepfac))) + :fontsize 12) + (sv x 0.1) + (text x 0.32 "$l_r$" 'lr 'me-lr) + (text x 0.11 "$l_u$" 'lu 'me-lu))) + +(defn figs-mixing [c d] + (sv p (AccFig) o (p.get-defaults c)) + (for [ne (, 20 40)] + (with [(pl-plot (, 10 4) (+ c.fig-dir "mixing-ne" (str ne)) :format "png")] + (for [(, instepfac nstepfac) (enumerate (, 1 5)) + (, inp np) (enumerate (:nps o))] + (sv spi (inc (+ (* instepfac (len (:nps o))) inp)) + ax (pl.subplot 2 (len (:nps o)) spi) + e (get d (if (= np 4) "exact" "interp") "nondivergent" nstepfac "pcsl" + (if (= np 4) "caas" "caas-node") "caas" (if (= np 4) 0 5) np + "cos" 1 ne :L) + (, cb ccb) (triplot-read-dat (+ c.data-dir (:mixing-file e))) + me-mixing (if (= np 4) :mixing :me-mixing) + data {'np np 'nstepfac nstepfac + 'lr (get e :mixing :lr) 'me-lr (get e me-mixing :lr) + 'lu (get e :mixing :lu) 'me-lu (get e me-mixing :lu)}) + (unless (and (zero? (get e :mixing :lo)) (zero? (get e me-mixing :lo))) + (prf "{}: lo {} {}" (:mixing-file e) (get e :mixing :lo) + (get e me-mixing :lo))) + (triplot cb ccb :data data))))) + +;;; slotted cylinders images + +(defn img-slo-filament [c d direc img-idx outname &optional nps nps-right] + (svifn nps (, 4 4 6 8) nps-right nps) + (sv degs (nes->degstrs (, 20 40)) + gap-for-colorbar (!= (len nps) (len nps-right)) + fs 7) + (with [(pl-plot (, 7.3 (+ (if gap-for-colorbar 0 0.5) (len nps))) + outname :format "pdf" :tight False)] + (sv spi 0 axs []) + (for [(, inp np) (enumerate nps) + (, ine ne) (enumerate (, 20 40)) + nstepfac (, 1 5)] + (inc! spi) + (when (and (= ne 40) (not (in np nps-right))) (continue)) + (sv c (% (dec spi) 4) + r (// (dec spi) 4) + w (/ 1 4) + h (/ 1 (len nps)) + ax (pl.axes [(* w c) (- 1 (* h r)) (* 0.95 w) h])) + (unless gap-for-colorbar (.append axs ax)) + (sv timeint (if (= np 4) "exact" "interp") + cdrglb (if (and (= np 4) (= inp 0)) "caas" "caas-node") + prefine (if (= np 4) 0 5) + fname (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin" + ne np (* ne 6 nstepfac) timeint cdrglb prefine)) + img (try (get (read-slmmir-io-arrays fname) img-idx) + (except [e Exception] (print e "couldn't read" fname img-idx)))) + (unless (none? img) + (print ne nstepfac np (/ (- 0.1 (npy.min img)) 0.1) (- (npy.max img) 1)) + (draw-slmmir-image img) + (pl.text (- (last img.shape) 500) (- (first img.shape) 80) + (.format "$n_p$ {} {}" np (cdr-name cdrglb)) :fontsize fs) + (defn write [x] (if (zero? x) "0" (.format "{:1.1e}" x))) + (unless (none? d) + (try + (sv keys (, timeint "nondivergent" nstepfac "pcsl" cdrglb "caas" prefine + np "slo" 1 ne) + e (get d #*(+ keys (, :Lerr)))) + (pl.text 10 15 (.format (+ "$l_2$ {:1.1e} $l_\infty$ {:1.1e}\n" + "$\phi_{{min}}$ {:1.1e} $\phi_{{max}}$ {:1.1e}") + (:l2 e) (:li e) (:phimin e) (:phimax e)) + :fontsize fs) + (except [] (print "no data for" timeint "nondivergent" nstepfac "pcsl" + cdrglb "caas" prefine np "slo" 1 ne)))) + (when (<= spi 4) + (pl.text (/ (second img.shape) 2) (+ 30 (first img.shape)) + (.format "{}, {} step" (get (:deg degs) ine) + (nstepfac->word nstepfac)) + :ha "center" :fontsize (inc fs))))) + (sv bdys (npy.linspace -0.05 1.05 23)) + (if gap-for-colorbar + (do + (sv c 2 r (+ 0.5 (- (len nps) 2)) + ax (pl.axes [(* w c) (- 1 (* h r)) (* 2 0.95 w) (* 0.2 h)]) + c (pl.colorbar :cax ax :orientation "horizontal" :aspect 30 :shrink 0.9 + :ticks (npy.linspace 0 1.1 12) + :boundaries bdys))) + (sv c (pl.colorbar :ax axs :orientation "horizontal" :aspect 50 :shrink 0.7 + :ticks (npy.linspace 0 1.1 12) :pad 0.025 + :boundaries bdys))) + (c.ax.tick-params :labelsize (inc fs)))) + +(defn img-slocyl-slide [c d direc img-idxs outname &optional [ylabel False]] + ;; 3 cols: IC above vertical colorbar, midpoint, endpoint + (sv ne 20 deg (first (:deg (nes->degstrs (, 20)))) nstepfac 5 fs 7 + times (, "start" "middle" "end") + w (/ 1 3) + h (/ 1 (len c.nps))) + (with [(pl-plot (, 5.5 (len c.nps)) outname :tight False)] + (for [col (range 3) + row (range (len c.nps))] + (when (and ylabel (zero? col)) + ((. (pl.gcf) text) + -0.01 (- 1 (* (- row 0.5) h)) + (if (zero? row) "Standard" "$p$-refined") + :fontsize (+ fs 2) :rotation 90 :ha "right" :va "center")) + (when (and (zero? col) (not (zero? row))) (continue)) + (sv ax (pl.axes [(* w col) (- 1 (* h row)) (* 0.95 w) h]) + np (nth c.nps row) + timeint (if (= np 4) "exact" "interp") + cdrglb (if (and (= np 4) (= inp 0)) "caas" "caas-node") + prefine (if (= np 4) 0 5) + fname (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin" + ne np (* ne 6 nstepfac) timeint cdrglb prefine)) + img (try (get (read-slmmir-io-arrays fname) (nth img-idxs col)) + (except [e Exception] + (print e "couldn't read" fname (nth img-idxs col))))) + (when (zero? col) + (sv (get img (= img 1)) (- 1 1e-16))) + (draw-slmmir-image img) + (pl.text (- (last img.shape) 500) (- (first img.shape) 80) + (.format "$n_p$ {} {}" np (cdr-name cdrglb)) :fontsize fs) + (when (zero? row) + (pl.text (/ (second img.shape) 2) (+ 30 (first img.shape)) + (.format "{}, {} step, {}" deg (nstepfac->word nstepfac) + (nth times col)) + :ha "center" :fontsize (inc fs)))) + (sv bdys (npy.linspace -0.05 1.05 23) + col 0 row (dec (len c.nps))) + (if (= (len c.nps) 2) + (sv ax (pl.axes [(* w col) (- 1 (* 0.5 h row)) (* 0.95 w) (* 0.15 h)]) + cb (pl.colorbar :cax ax :orientation "horizontal" :aspect 30 :shrink 0.9 + :ticks (npy.linspace 0 1 6) + :boundaries bdys)) + (sv col 0 row (dec (len c.nps)) + ax (pl.axes [(* w (+ col 0.35)) (- 1.02 (* h row)) (* 0.1 w) + (* (dec (len c.nps)) 0.95 h)]) + cb (pl.colorbar :cax ax :orientation "vertical" :aspect 30 :shrink 0.9 + :ticks (npy.linspace 0 1 11) + :boundaries bdys))) + (cb.ax.tick-params :labelsize (inc fs)))) + +;;; toy chemistry + +(defn toychem-diagnostic-parse [fname] + (sv txt (.split (readall fname) "\n") + d {} + skip 0 l2s [] lis []) + (for [ln txt] + (when (in "cmd> 1" ln) (break)) + (inc! skip)) + (for [ln (cut txt skip)] + (cond [(in "cmd>" ln) + (assert (or (empty? l2s) (= (len l2s) (* 576 10)))) + (sv cmd ln c (parse-cmd ln) + key (+ (cmd->key-base c) (, (:ne c))) + l2s [] lis []) + (unless (none? (geton d #*key)) + ;; we collected pg vmax thr) thr vmax) + vmin (- vmax) + ticks (npy.linspace vmin vmax 5)) + (draw-slmmir-image img :vmin vmin :vmax vmax :ncolor ncolor + :colorsym True :switch-halves sh)) + (draw-slmmir-image img :vmin vmin :vmax vmax :ncolor ncolor + :switch-halves sh)) + (sv d (parse-filename (nth img-files spi))) + (pl.text 10 70 + (.format "$n_p$ {} {} {} {}" + (:np d) (:pgtype d) (:pg d) (:cdr d)) + :fontsize fs) + (pl.text (- (last img.shape) 500) (- (first img.shape) 60) + (.format "min {:8.1e} max {:8.1e}" imin imax) + :fontsize (dec fs)) + (.append axs ax) + (when diagnostic + (sv c (pl.colorbar :ax ax :orientation "horizontal" :aspect 30 :shrink 0.8 + :pad 0.05 :ticks ticks)) + (c.ax.tick-params :labelsize fs) + (c.ax.set-xticklabels (lfor e ticks (.format "{:1.1e}" e))))) + (pl.text -20 (+ 15 (first img.shape)) + (.format "Toy chemistry: {}, $\Delta t$ {}, {}" + (:res d) (:dt d) (:ode d)) + :ha "center" :fontsize fs) + (unless diagnostic + (sv c (pl.colorbar :ax axs :orientation "horizontal" :aspect 50 :shrink 0.8 + :pad 0.05 :ticks ticks)) + (c.ax.tick-params :labelsize fs) + (c.ax.set-xticklabels (lfor e ticks (.format "{:1.1e}" e)))))) + +;;; ISL comm footprint + +(defn parse-footprint [fname] + (sv d {} + txt (.split (readall fname) "\n") + c None cnt 0) + (for [ln txt] + (sv ln4 (cut ln 0 4)) + (cond [(= ln4 "cmd>") + (dont unless (none? c) + (print cmd) + (print cnt)) + (sv cmd ln c (parse-cmd ln) cnt 0)] + [(and (not (none? c)) (in "footprint>" ln)) + (sv pos (.find ln "t>") + vals (sscanf (cut ln (+ pos 2)) "i,i,f,i")) + (for [(, isym sym) (enumerate (, :min :median :mean :max))] + (assoc-nested-append d (+ (cmd->key-base c) (, (:ne c) sym)) + (nth vals isym))) + (inc! cnt)])) + d) + +(defn fig-comm-footprint [c d] + (sv npa npy.array + p (AccFig) o (p.get-defaults c)) + (assoc o :nps (, 4 8 12) :lw 1.5) + (with [(pl-plot (:figsize o) (+ c.fig-dir "isl-footprint") :tight False)] + (sv pat-line {:min ":" :median "-" :max "--"}) + (for [(, spi nstepfac) (enumerate (, 1 5))] + (sv ax (pl.axes (, (/ spi 2) 0 0.41 0.8))) + (for [np (:nps o)] + (sv e (get d (if (= np 4) "exact" "interp") "nondivergent" nstepfac "pcsl" + (if (= np 4) "caas" "caas-node") "caas" (if (= np 4) 0 5) + np ; accidentally ran with pg; no effect but changes the dict nesting + np 30) + x (* 12 (/ (npa (list (range (len (get e :min))))) (len (get e :min)))) + step (// (len x) 5) + s (slice (// step 2) -1 step) + xsparse (get x s)) + (for [sym (, :median :max)] + (sv y (npa (get e sym)) + ysparse (get y s)) + (pl.plot [(first x) (last x)] (* (** np 2) (npa [1 1])) (+ (get c.npclrs np) ":")) + (pl.plot x y + (+ (get c.npclrs np) (get pat-line sym)) + xsparse ysparse + (+ (get c.npclrs np) (get c.npmarks np)) :fillstyle "none")) + (my-grid) + (pl.xticks (, 0 3 6 9 12) :fontsize (:fs o)) + (pl.yticks (npy.linspace 0 160 17) :fontsize (:fs o)) + (pl.ylim (, 0 160)) + (pl.xlabel "Days" :fontsize (:fs o)) + (pl.title (.format "{} time step" (get {1 "Long" 5 "Short"} nstepfac)) + :fontsize (:fs o))) + (cond [(= nstepfac 5) + (p.legend ax (, (, (+ "k" (get pat-line :max)) "max") + (, (+ "k" (get pat-line :median)) "median") + (, (+ "k" ":") "$n_p^2$ reference")) + :o o :bbox (, 0 0.75) :nps-legend False)] + [(= nstepfac 1) + (p.legend ax (, ) :o o)]) + (when (= nstepfac 1) + (pl.text 13 172 + (+ "Islet: Number of transmitted scalars per tracer per element\n" + "Nondivergent flow") + :ha "center" :fontsize (:fs o)))))) + +;;; miscellaneous figs likely not to go in the paper + +(defn img-slo-cyl-tracer-grid [c direc outname &optional [ne 10]] + (defn make-fname [direc ne np nstepfac timeint cdrglb prefine] + (+ direc "/" (.format "ne{}-np{}-nstep{}-{}-{}-pr{}.bin" + ne np (* ne 6 nstepfac) timeint cdrglb prefine))) + (sv nstepfac 5) + (sv degs (nes->degstrs (, 10)) + fs 7) + (with [(pl-plot (, 8.25 4.5) outname)] + (sv imgss [] nps []) + (for [c (, (, 4 "exact" "caas" 0) + (, 16 "interp" "caas-node" 5) + (, 16 "interp" "caas-node" 1))] + (.append nps (first c)) + (.append imgss (read-slmmir-io-arrays (make-fname direc ne (first c) + nstepfac (second c) (nth c 2) (last c))))) + (sv spi 0) + (for [idx (, 1 3 5) + (, i imgs) (enumerate imgss)] + (sv np (nth nps i) + img (nth imgs idx)) + (pl.subplot 3 3 (inc! spi)) + (when (= idx 1) + (sv (get img (= img 1)) (- 1 1e-16))) + (draw-slmmir-image img) + (when (>= spi 7) + (pl.text 10 25 (.format (+ "$n_e$ {} $n_p$ {} {} time step\n" + "{}") + ne np (if (= nstepfac 5) "short" "long") + (case/in spi + [(, 7 8) "on dynamics grid"] + [:else "on tracer grid"])) + :fontsize fs))))) + +;;; drivers + +(when-inp ["acc-print-txt-table" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (when (in "stab-cmp" fname) (sv c.ics (, "gau"))) + (acc-print-txt-table c d)) + +(when-inp ["fig-stab-cmp" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (fig-stab-cmp c d)) + +(when-inp ["figs-acc" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (figs-acc c d)) + +(when-inp ["fig-midpoint" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (fig-acc-midpoint-check c d)) + +(when-inp ["fig-filament" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (fig-filament c d)) + +(when-inp ["figs-mixing" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (figs-mixing c d)) + +(when-inp ["img-filament" {:fname str :direc str}] + (sv c (get-context) + c.nps (, 4 6 8) + d (acc-parse (+ c.data-dir fname))) + (img-slo-filament c None (+ c.data-dir direc) 3 (+ c.fig-dir "slo-midpoint") + :nps (, 4 6 8 12) :nps-right (, 4 6 8)) + (img-slo-filament c d (+ c.data-dir direc) 5 (+ c.fig-dir "slo-finpoint") + :nps (, 4 6 8 12) :nps-right (, 4 6 8))) + +(when-inp ["img-filament-slide" {:direc str}] + (sv c (get-context) + c.nps (, 4 6 8 12)) + (img-slocyl-slide c None (+ c.data-dir direc) (, 1 3 5) + (+ c.fig-dir "slo-imgs-slide")) + (sv c.nps (, 4 12)) + (img-slocyl-slide c None (+ c.data-dir direc) (, 1 3 5) + (+ c.fig-dir "slo-imgs-slide-brief") + :ylabel True)) + +(when-inp ["fig-pg-mimic-src-term" {:fname str}] + (sv c (get-context) + d (acc-parse (+ c.data-dir fname))) + (fig-acc-mimic-src-term-midpoint c d)) + +(when-inp ["fig-toychem-diagnostic" {:fname str}] + (sv c (get-context) + d (toychem-diagnostic-parse (+ c.data-dir fname))) + (fig-toychem-diagnostic c d)) + +(when-inp ["fig-toychem-finpoint" {:direc str}] + (sv c (get-context) + d (+ c.data-dir direc "/") + img-files []) + (for [np (, 4 8)] + (.append img-files (first (glob.glob (+ d (.format "*np{}*bin" np)))))) + (img-toychem c img-files (+ c.fig-dir "toychem-finpoint")) + (img-toychem c img-files (+ c.fig-dir "toychem-finpoint-diagnostic") + :diagnostic True)) + +(when-inp ["fig-comm-footprint" {:fname str}] + (sv c (get-context) + d (parse-footprint (+ c.data-dir fname))) + (fig-comm-footprint c d)) diff --git a/methods/islet/figures/figs-methods.hy b/methods/islet/figures/figs-methods.hy new file mode 100644 index 0000000..cbd9107 --- /dev/null +++ b/methods/islet/figures/figs-methods.hy @@ -0,0 +1,584 @@ +(require [amb3 [*]]) +(import amb3 [amb3 [*]] [figsutils :as futils] + [scipy.linalg :as linalg] + math re poly) + +(assoc matplotlib.rcParams "savefig.dpi" 300) +(do (pl-require-type1-fonts)) + +;;; tables + +(defn read-last-array [ln] + (sv arr (re.findall "{.*}" ln)) + (when (empty? arr) (return arr)) + (as-> (last arr) it + (.replace it "," " ") + (.replace it "{" "[") + (.replace it "}" "]") + (read-str it) + (eval it))) + +(defn parse-cpp-methods [fname] + (sv txt (.split (readall fname) "\n") + d {}) + (for [ln txt] + (cond [(in "xnodes: " ln) + (sv xnodes (cond [(in "GLL" ln) 'gll] + [(in "Uniform" ln) 'uniform]))] + [(in "case " ln) + (sv np (int (first (re.findall "case (\d*):" ln))))] + [(in "subnp[]" ln) + (sv subnp (read-last-array ln) + nodes [])] + [(in "offst[]" ln) + (sv offst (read-last-array ln) + subtype 'offset)] + [(and (not (in "nodes[]" ln)) + (not (empty? (do (sv arr (read-last-array ln)) + arr)))) + (sv subtype 'general) + (.append nodes arr)] + [(in "eval" ln) + (assoc-nested d (, xnodes np) + (if (= subtype 'general) + {:subtype subtype :subnp subnp :nodes nodes} + {:subtype subtype :subnp subnp :offst offst}))])) + d) + +(defn make-array-str [a] (+ "\{" (str.join ", " (lfor e a (str e))) "\}")) + +(defn make-subsets-str [ns] + (sv s "\n\\begin{tabular}{l}\n\!\!\!nodal subsets \\\\\n\{") + (for-first-last + [n ns] + (+= s + (if first? "" "\\phantom{\{}") + (make-array-str n) + (if last? "\}\n" ",\\\\\n"))) + (+ s "\end{tabular}")) + +(defn write-methods-latex-tables [d xnodes fname] + (sv col-hdrs (, "$\\numg$" "OOA" "$\\npsub$" "Supports") + d (get d xnodes) + nps (list (.keys d))) + (with [f (open fname "w")] + (defn write [s] (.write f s)) + (write "\\begin{center}\n") + (write "\\begin{tabular}{r|c|l|l}\n") + (write (+ (str.join " & " col-hdrs) " \\\\\n\\hline\n")) + (when (= xnodes 'gll) + (write "4 & 2 & see text & see text \\\\\n\\hline\n")) + (for [np nps] + (sv e (get d np)) + (svb (np-str (str np)) + (nodes-str (case/eq xnodes ['gll "GLL"] ['uniform "Uniform"])) + (ooa-str (str (dec (min (:subnp e))))) + (npsub-str (make-array-str (:subnp e))) + (subsets-str (if (= (:subtype e) 'offset) + (+ "offsets " (make-array-str (:offst e))) + (make-subsets-str (:nodes e))))) + (write (+ (str.join " & " (, np-str ooa-str npsub-str subsets-str)) + " \\\\\n")) + (unless (= np (last nps)) (write "\\hline\n"))) + (write "\\end{tabular}\n") + (write "\\end{center}\n"))) + +;;; illustrations + +(defn illustrate-grids [img-fname] + (defn draw-gll-dots [np marker] + (sv xgll (col-vec (poly.get-gll-x np)) + e (npy.ones (, np 1)) + X (npy.dot e (.transpose xgll)) + Y (npy.dot xgll (.transpose e))) + (pl.plot X Y marker :markersize 12)) + (sv nf 6 elc "k") + (with [(pl-plot (, 5 5) img-fname :format "pdf")] + (pl.plot [-1 1] [1 1] elc [-1 1] [-1 -1] elc [1 1] [-1 1] elc [-1 -1] [-1 1] elc + :linewidth 2 + :color (if 0 "#1D4BFA" "#3838FF")) + (sv lw 2 + line "g--" + d 0.99 nd (- d)) + (for [it (range 2)] + (for [i (range (inc nf))] + (sv x (as-> (- (* 2 (/ i nf)) 1) x + (if (= x -1) nd + (= x 1) d + x))) + (if (zero? it) + (pl.plot [x x] [nd d] line :linewidth lw) + (pl.plot [nd d] [x x] line :linewidth lw)))) + (draw-gll-dots 4 (+ elc "o")) + (draw-gll-dots 8 "r.") + (pl.axis "equal") + (pl.axis "off"))) + +(defn draw-np4-schematic [img-fname] + (import islet) + (sv np 4 fs 12 + x-gll (poly.get-gll-x np) + nx 256 + x (npy.linspace -1 1 nx) + isl (islet.Islet) + (, yn yo yb) (lfor m (, 0 1 3) (.transpose (isl.eval m np x))) + clrs "kgrb" + d 1.04) + (with [(pl-plot (, 4 4) img-fname :tight True)] + (pl.axes [0 0.6 1 0.4]) + (sv xsub (npy.linspace (nth x-gll 2) 1 (// nx 4)) + xscale (- (* 2 (/ (- xsub (nth x-gll 2)) (- 1 (nth x-gll 2)))) 1) + alpha (poly.eval-lagrange-poly (poly.get-gll-x 3) (, 0 0.306 1) xscale)) + (pl.plot (- (get xsub (s-all-rev))) (get alpha (s-all-rev)) "k-." + xsub alpha "k-.") + (do + (pl.plot -2 -2 "k:" :label "Natural") + (pl.plot -2 -2 "k--" :label "Offset nodal subset: $\{3,4\}$, $\{0,0\}$") + (pl.plot -2 -2 "k-" :label "Optimized") + (pl.plot -2 -2 "k-." :label "Convex combination parameter") + (pl.legend :loc "center" :fontsize fs :bbox-to-anchor (, 0.49 0.64) + :frameon False)) + (my-grid) + (axis-tight-pad :pad 0) + (pl.xlim (, (- d) d)) + (pl.ylim (, -0.05 1.05)) + (pl.yticks (, 0 0.5 1) :fontsize fs) + (pl.plot (nth x-gll 2) 0 "ko" 1 1 "ko") + (pl.text 0.57 1 "Fully $n_p$ 4" :fontsize fs :va "top") + (pl.text 0.05 0 "Fully $n_p$ 3" :fontsize fs) + (pl.title "Optimized $n_p$ 4 basis" :fontsize fs) + (sv xt (npy.linspace -1 1 9)) + (pl.xticks xt (lfor e (range (len xt)) "") :fontsize fs) + (pl.axes [0 0 1 0.57]) + (for [i (range (dec np) -1 -1)] + (sv c (nth clrs i)) + (pl.plot x (get yn i) (+ c ":")) + (pl.plot x (get yo i) (+ c "--")) + (pl.plot x (get yb i) (+ c "-"))) + (my-grid) + (axis-tight-pad :pad 0) + (pl.xlim (, (- d) d)) + (pl.xticks xt :fontsize fs) + (pl.yticks (npy.linspace -0.2 1 7) :fontsize fs) + (pl.xlabel "Reference coordinate" :fontsize fs))) + +;;; utils for search + +(defn parse-search-list [fname] + (sv txt (.split (readall fname) "\n") + c []) + (for [ln txt] + (when (or (< (len ln) 100) (!= (cut ln 0 5) "meam1")) (continue)) + (sv b (futils.parse-search-basis-line ln)) + (.append c b)) + c) + +(defn uniquify-search-list [c] + (sv uniq (set) c-uniq []) + (for [b c] + (sv s (futils.Nodes->string (:np b) (:nodes b))) + (when (in s uniq) (continue)) + (.add uniq s) + (.append c-uniq b)) + c-uniq) + +(defn get-slmmir-builtin [np] + (get + {4 None + 5 None + 6 [[0 1 2 3 4] [ 0 1 2 3 5] [0 1 2 3 4 5]] + 7 (futils.offst->Nodes (, 5 5 6) (, 0 0 0)) + 8 (futils.offst->Nodes (, 6 6 7 6) (, 0 0 0 1)) + 9 [[0 1 2 3 4 5 8] [0 1 2 3 4 5 7 8] [0 1 2 3 4 5 6 8] [1 2 3 4 5 6 7]] + 10 (futils.offst->Nodes (, 7 7 7 8 8) (, 0 0 0 0 1)) + 11 (futils.offst->Nodes (, 8 9 8 9 8) (, 0 0 0 0 1)) + 12 (futils.offst->Nodes (, 9 9 10 10 9 10) (, 0 0 0 0 1 1)) + 13 (futils.offst->Nodes (, 10 10 10 10 11 10) (, 0 0 0 0 0 1))} + np)) + +(defn nodes= [a-np a-nodes b-np b-nodes] + (unless (and (= a-np b-np) + (= (len a-nodes) (len b-nodes))) + (return False)) + (for [i (range (len a-nodes))] + (sv npa npy.array ai (npa (nth a-nodes i)) bi (npa (nth b-nodes i))) + (unless (and (= (len ai) (len bi)) (npy.all (= ai bi))) + (return False))) + True) + +;;; run slmmir on a bunch of formulas + +(defn write-slmmir-script [blns script-fname] + (svb (cmdstr + (fn [basis] + (+ "KMP_AFFINITY=balanced OMP_NUM_THREADS=48 $exe " + "-method pcsl -ode {ode:s} -ic gaussianhills -ic cosinebells -ic slottedcylinders " + "-we 0 -rit -dmc eh -mono {mono:s} -lim {lim:s} -nsteps {nstep:d} -T 12 -ne {ne:d} " + "-np {np:d} -timeint {timeint:s} -prefine {prefine:d} -d2c " + (if basis "-basis \"{basis:s}\" " "") + "|& grep \"^C \""))) + (ode "nondivergent") (ne 20) (nstep (* 20 6)) + (lims (, (, "caas-node" "caas") (, "none" "none")))) + (with [f (open script-fname "w")] + (f.write "exe=\n") + (sv ctr 0) + (defn write1 [b lim str-basis] + (sv cmd (.format (cmdstr str-basis) :ode ode :mono (first lim) + :lim (last lim) :nstep nstep :ne ne :np (:np b) + :timeint (if (= (:np b) 4) "exact" "interp") + :prefine (if (= (:np b) 4) 0 5) + :basis (if str-basis (futils.Nodes->string (:np b) (:nodes b))))) + (f.write (.format "echo 'line> {}'\n" (if str-basis + (:txt b) + (.format "builtin np {}" (:np b))))) + (f.write (.format "echo 'cmd> {} {}'\n" ctr cmd)) + (f.write (.format "{}\n" cmd))) + (dont for [np (range 4 13) lim lims] + (write1 {:np np} lim False) + (inc! ctr)) + (for [b blns lim lims] + (write1 b lim True) + (inc! ctr)))) + +(defn parse-slmmir-output [fname &optional d [lebesgue False]] + (import islet) + (svifn d {}) + (sv txt (.split (readall fname) "\n") + isl (islet.Islet)) + (for [ln txt] + (cond [(in "line>" ln) + (sv b (futils.parse-search-basis-line (cut ln 6))) + (when lebesgue + (sv bstr (futils.Nodes->string (:np b) (:nodes b)) + npm (isl.calc-xnodes-metrics-from-basis-string bstr) + npa npy.array) + (assert (< (reldif (npa (:npm b)) (npa npm)) 1e-2)) + (assoc b :lebesgue (isl.calc-lebesgue-consts-from-basis-string bstr)) + (sv strp (fn [t] + (sv s "") + (for [e t] (+= s (.format " {:1.2e}" e))) + s)) + (dont prf (+ (strp (:npm b)) " |" (strp (:lebesgue b)))))] + [(in "cmd>" ln) + (sv cmd ln c (futils.parse-cmd ln) + cls {})] + [(in "C cycle" ln) + (sv cyc (int (last (.split ln))))] + [(or (in "C PASS" ln) (in "C FAIL" ln)) + (assoc-nested-append d (+ (futils.cmd->key-base c) + (, cyc (:ne c))) + {:b b :cls cls})] + [(and (> (len ln) 10) (= (cut ln 0 2) "C ")) + (sv cl (futils.parse-C ln)) + (assoc cls (:ic cl) cl)])) + d) + +(defn basic-key [np &optional ode nstepfac prop-preserve cyc ne] + (svifn ode "nondivergent" nstepfac 1 cyc 1 ne 20) + (, (if (= np 4) "exact" "interp") ode nstepfac "pcsl" + (if prop-preserve "caas-node" "none") (if prop-preserve "caas" "none") + (if (= np 4) 0 5) np cyc ne)) + +(defn get-tick-labels [x] + (sv xt []) + (for [e x] + (sv v (npy.log10 e)) + (.append xt (if (= v (int v)) (int v) (.format "{:1.1f}" v)))) + xt) + +(defn set-semilogy-ticks [&optional fs] + (sv y (first (pl.yticks))) + (pl.yticks y (get-tick-labels y) :fontsize fs)) + +(defn set-loglog-ticks [&optional fs] + (sv x (first (pl.xticks))) + (pl.xticks x (get-tick-labels x) :fontsize fs) + (sv y (first (pl.yticks))) + (pl.yticks y (get-tick-labels y) :fontsize fs)) + +(defn norm->str [n] + (get {:l1 "$l_1$" :l2 "$l_2$" :li "$l_{\infty}$"} n)) + +(defn plot-slmmir-vs-heuristic [c d img-fname + &optional nps prop-preserve ic norm pum-thr lebesgue] + (svifn nps (list (range 4 14)) prop-preserve False ic "gau" norm :l2 + pum-thr 1e-6 lebesgue False) + (sv npa npy.array fs 11 + plot (if lebesgue pl.semilogy pl.loglog)) + (print img-fname) + (with [(pl-plot (, 4 4.2) img-fname :format "pdf")] + (for [np nps] + (sv es (get d #*(basic-key np :prop-preserve prop-preserve))) + (when (none? es) (continue)) + (sv x [] y []) + (for [e es] + (sv cls (:cls e) b (:b e)) + (when (> (:pum b) pum-thr) (continue)) + (.append x (nth (get b (if lebesgue :lebesgue :npm)) + (get {:l1 0 :l2 1 :li 2} norm))) + (.append y (get cls ic norm)) + (when (nodes= (:np b) (:nodes b) (:np b) (get-slmmir-builtin (:np b))) + (plot (last x) (last y) "ro" :fillstyle "none" :markersize 12 + :zorder 20))) + (plot (npa x) (npa y) (+ (get c.npclrs np) (get c.npmarks np)) + :fillstyle "none" :zorder (- 20 np) :label (.format "$n_p$ {}" np))) + (if lebesgue + (do (set-semilogy-ticks :fs fs) + (pl.xlabel (+ (norm->str norm) " heuristic") :fontsize fs)) + (do (set-loglog-ticks :fs fs) + (pl.xlabel (+ "$\log_{10}$ " (norm->str norm) " heuristic") :fontsize fs))) + (my-grid) + (pl.title (.format (+ "Test problem vs.~heuristic:\n" + "nondivergent flow, 1.5$^\circ$, {}, long steps\n" + "$p$-refinement, {}") + (futils.ic-short2long ic) + (+ (if prop-preserve "" "no ") "property preservation")) + :fontsize fs) + (pl.legend :loc "best" :fontsize fs) + (pl.axis "tight") + (pl.ylabel (+ "$\log_{10}$ " (norm->str norm) " relative error") :fontsize fs))) + +;;; pum vs perturb + +(defn parse-pum-vs-perturb [fname] + (sv txt (.split (readall fname) "\n") + perturb None + d {}) + (for [ln txt] + (cond [(= ">>> " (cut ln 0 4)) + (sv (, - basis np) (sscanf ln "s,s,i"))] + [(= ">> " (cut ln 0 3)) + (sv pos (.find ln "[") + arr (eval (read-str (cut ln pos)))) + (if (none? perturb) + (sv perturb arr) + (do (sv meam1 arr) + (assoc-nested d (, basis np) (, perturb meam1))))])) + d) + +(defn plot-pum-vs-perturb [c d fname] + (sv f identity fs 11) + (with [(pl-plot (, 4 4) fname)] + (for [basis (, "gll_best")] + (sv uni (in "uni" basis)) + (for [np (range (if uni 8 6) 14)] + (sv e (geton d basis np)) + (when (none? e) (continue)) + (pl.loglog (f (first e)) (f (second e)) + (+ (get c.npclrs np) (if uni "--" "-")) + :label (+ "$n_p$ " (str np) (if uni "'" ""))))) + (sv f 1.7 slope 4) + (make-reference-slope-triangle [(/ 2.8e-3 f) (* 2.8e-2 f)] + [(* 3e-8 (** f slope)) (/ 3e-12 (** f slope))] + slope "k-" + :opposite True + :kwargs-text {"fontsize" (+ fs 4)}) + (set-loglog-ticks) + (my-grid) + (pl.legend :loc "upper left" :fontsize (dec fs) :ncol 1) + (pl.xlabel "$\log_{10}$ Element size relative random perturbation $\delta$" + :fontsize fs) + (pl.ylabel "$\log_{10}$ (max $|\lambda|$ - 1)" :fontsize fs) + (pl.title "Perturbed uniform mesh metric" :fontsize fs) + (pl.xlim (, 1e-4 0.1)) + (pl.ylim (, 1e-14 0.1)))) + +;;; meam1 and pum vs dx + +(defn parse-meam1-sweep [fname] + (sv txt (.split (readall fname) "\n") + methods (, "gll_natural" "gll_best" "uniform_offset_nodal_subset") + fst True + d {}) + (defn fill [] + (print method (len dxs) (len meam1s)) + (assoc d method (, dxs meam1s np))) + (for [ln txt] + (sv toks (.split ln)) + (cond [(in (first toks) methods) + (unless fst (fill)) + (sv method (first toks) + np (int (second toks)) + fst False + dxs [] meam1s [])] + [(= (len toks) 2) + (sv (, dx meam1) (sscanf ln "f,f")) + (.append dxs dx) + (.append meam1s meam1)])) + (fill) + d) + +(defn parse-pum-sweep [method-fnames] + (sv d {}) + (for [(, method fname) method-fnames] + (sv txt (.split (readall fname) "\n") + dxs [] meam1s [] skip 1) + (for [ln txt] + (when (in "final" ln) (break)) + (inc! skip)) + (for [ln (cut txt skip)] + (sv toks (.split ln)) + (unless (= (len toks) 4) (break)) + (.append dxs (float (nth toks 1))) + (.append meam1s (float (nth toks 3)))) + (assoc d method (, dxs meam1s))) + d) + +(defn plot-meam1-and-pum-vs-dx [c dmeam1 dpum fname] + (defn symx [x] + (sv x (npy.array x)) + (npy.concatenate (, x (- 1 (get x (s-all-rev)))))) + (defn symy [y] + (sv y (npy.array y)) + (npy.concatenate (, y (get y (s-all-rev))))) + (sv fs 11 + ms (, "gll_natural" "uniform_offset_nodal_subset" "gll_best") + clrs {(nth ms 0) "r" (nth ms 1) "g" (nth ms 2) "k"} + mrks {(nth ms 0) "." (nth ms 1) "x" (nth ms 2) "o"} + lbls {(nth ms 0) "GLL natural" (nth ms 1) "Uniform offset nodal subset" + (nth ms 2) "GLL nodal subset"}) + (with [(pl-plot (, 4 4) fname)] + (for [method ms] + (sv e (get dpum method)) + (pl.semilogy (first e) (second e) + (+ (get clrs method) (get mrks method)) + :label (get lbls method)) + (sv e (get dmeam1 method)) + (pl.semilogy (symx (first e)) (symy (second e)) + (+ (get clrs method) "-"))) + (sv np (last e)) + (for [i (range (// np 2) np)] + (sv p (/ i (dec np))) + (pl.semilogy [p p] [1e-15 1] (+ (get clrs (nth ms 1)) ":") :zorder -1)) + (set-semilogy-ticks) + (pl.ylim (, 1e-16 1)) + (pl.ylabel "$\log_{10}$ (max $|\lambda|$ - 1)" :fontsize fs) + (pl.xlabel "Translation, $\Delta x$, relative to element size 1" :fontsize fs) + (pl.title (.format "$n_p$ {} methods" np) :fontsize fs) + (pl.legend :loc (, 0.02 0.15) :fontsize fs) + (pl.xticks (npy.linspace 0 1 11)) + (my-grid))) + +;;; drivers + +(when-inp ["dev-parse"] + (sv fname "data/search-0.txt" + blns (parse-search-list fname)) + (print (len blns)) + (for [b blns] + (when (and (= (:np b) 13) (= (:type b) :offst-nodal-subset) (< (:pum b) 1e-6)) + (print b) (break))) + (for [b blns] + (when (and (= (:np b) 8) (= (:type b) :nodal-subset) (< (:pum b) 1e-7)) + (print b) (break)))) + +(when-inp ["write-slmmir-script"] + (sv script-fname "../../slmm/meas/run-slmmir-on-basis-lines.sh" + data-fnames (, "search-0.txt" + "search-findnodal_given_bestosn-0.txt" + "search-findnodal_given_bestosn-1.txt" + "search-findnodal_given_bestosn-2.txt" + "search-findnodal_given_bestosn-3.txt" + "search-findnodal_given_bestosn-7.txt") + blns []) + (for [fname data-fnames] + (.extend blns (parse-search-list (+ "data/" fname)))) + (sv blns (uniquify-search-list blns)) + (write-slmmir-script blns script-fname)) + +(when-inp ["plot-slmmir-vs-heuristic"] + (sv fnames (, "slmmir-on-basis-lines-2.txt") + c (futils.get-context) + lebesgue False + d {}) + (for [fname fnames] + (sv d (parse-slmmir-output (+ "data/" fname) :d d :lebesgue lebesgue))) + (for [(, norm pp ic) (, (, :l2 False "gau") (, :l2 True "cos"))] + (plot-slmmir-vs-heuristic + c d (.format (+ "{}slmmir-vs-heuristic-{}-{}-{}" (if lebesgue "-leb" "")) + c.fig-dir ic (if pp "pp" "nopp") (name norm)) + :nps [6 7 8 9 10] + :prop-preserve pp :norm norm :ic ic :lebesgue lebesgue))) + +(when-inp ["illustrations"] + (sv c (futils.get-context)) + (illustrate-grids (+ c.fig-dir "illustrate-grids"))) + +(when-inp ["tables"] + (for [xnodes (, 'gll)] + (write-methods-latex-tables (parse-cpp-methods "islet-methods.txt") + xnodes (.format "figs/methods-table-{}.tex" + (name xnodes))))) + +(when-inp ["pum-vs-perturb"] + (sv fname "pum_perturb_plot-041021.txt" + d (parse-pum-vs-perturb (+ "data/" fname)) + c (futils.get-context)) + (plot-pum-vs-perturb c d (+ c.fig-dir "pum-vs-perturb"))) + +(when-inp ["meam1-and-pum-vs-dx"] + (sv c (futils.get-context) + data-dir "data/" + meam1-fname (+ data-dir "run_meam1_sweep-np8.txt") + method-fnames (zip (, "gll_best" "gll_natural" "uniform_offset_nodal_subset") + (lfor fname (, "pum_sweep-np8-gll_best.txt" + "pum_sweep-np8-gll_natural.txt" + "pum_sweep-np8-uni.txt") + (+ data-dir fname))) + d (parse-meam1-sweep meam1-fname) + d-pum (parse-pum-sweep method-fnames)) + (plot-meam1-and-pum-vs-dx c d d-pum (+ c.fig-dir "meam1-and-pum-vs-dx"))) + +(when-inp ["np4-schematic"] + (sv c (futils.get-context)) + (draw-np4-schematic (+ c.fig-dir "np4-schematic"))) + +(defn plot-basis-schematic [np &optional [annotate False]] + (import islet) + (sv c (futils.get-context) + pats {0 "--" 3 "-"} + clrs "krbcgm" + x (npy.linspace -1 1 512) + isl (islet.Islet) + fs 12) + (with [(pl-plot (, 6 (if annotate 4 3)) + (+ c.fig-dir "basis-schematic-np" (str np) + (if annotate "-annotated" "")))] + (for [method (, 0 3)] + (sv y (.transpose (isl.eval method np x)) + pat (get pats method)) + (for [i (range np)] + (pl.plot x (get y i) (+ (nth clrs (% i (len clrs))) pat) + :label (if (zero? i) (get {0 "Natural GLL" 3 "Islet GLL"} method)))) + (my-grid) + (sv d 1.04) + (pl.xlim (, (- d) d)) + (pl.ylim (, (if annotate -0.64 -0.22) 1.03))) + (pl.xlabel "Reference coordinate" :fontsize fs) + (pl.ylabel "Basis function value" :fontsize fs) + (pl.text -1 1.1 (.format "Basis functions, $n_p$ = {}" np) :fontsize fs) + (pl.figlegend :loc (, 0.49 (if annotate 0.91 0.88)) :fontsize fs :ncol 2) + (when annotate + (sv npa npy.array + xgll (npa (poly.get-gll-x np)) + ireg 1 + xs (cut xgll ireg (+ ireg 2)) + xc (cc xs) + clr "g" y -0.3 w 0.006 lw 2 ones (npa [1 1])) + (for [i (range 2)] + (pl.arrow (nth xgll (+ ireg i)) y 0 0.1 :width w :color clr)) + (pl.plot xs (* y ones) (+ clr "-") (* xc ones) [-0.36 y] :color clr :lw lw) + (pl.text xc -0.45 (.format "Region {}" ireg) + :color clr :ha "center" :fontsize fs) + (when (= np 6) + (sv support [0 1 2 3 5] + clr "r") + (for [i support] + (pl.arrow (nth xgll i) -0.5 0 0.1 :width w :color clr)) + (sv x (nth xgll 4)) + (pl.plot x -0.45 (+ clr "x") :markersize 14) + (pl.text 0 -0.59 (.format "Support nodes for region {}" ireg) + :color clr :ha "center" :fontsize fs))))) + +(when-inp ["basis-schematic" {:np int}] + (plot-basis-schematic np :annotate True)) diff --git a/methods/islet/figures/figs.tex b/methods/islet/figures/figs.tex new file mode 100644 index 0000000..5feda17 --- /dev/null +++ b/methods/islet/figures/figs.tex @@ -0,0 +1,376 @@ +% hy figs-methods.hy basis-schematic 6 +\begin{figure}[tbh] + \centering + \includegraphics[width=0.75\linewidth]{basis-schematic-np6-annotated} + \caption{ + Basis functions for the Islet $\np=6$ GLL nodal subset basis listed in Table \ref{tbl:gll}. + Each curve's color corresponds to a basis function. + Each line pattern corresponds to a basis type, as listed in the legend. + The green span shows region 1. + The red arrows point to the nodes in the support of region 1; + the red $\times$ is beneath the one node not in region 1's support. + } + \label{fig:np6-basis} +\end{figure} + +\begin{table}[tbh] + \input{figs/methods-table-gll.tex} + \caption{ + Islet GLL nodal subset bases. + Each row provides a formula for the row's $\np$ value. + Columns are $\np$, order of accuracy (OOA), + the support sizes $\npsub$ for each region ordered left to middle, + and the supports. + For offset nodal subset bases, supports are given by offsets. + For general nodal subset bases, supports are given by nodal subsets, again ordered from left region to middle. + The case $\np=4$ is described in Sect.~\ref{sec:np4}. + In all cases, the support points are GLL points. + } + \label{tbl:gll} +\end{table} + +% ./pum_sweep 8 512 4 0 > pum_sweep-np8-gll_natural.txt +% ./pum_sweep 8 512 4 1 > pum_sweep-np8-gll_best.txt +% ./pum_sweep 8 512 4 2 > pum_sweep-np8-uni.txt +% ./run_meam1_sweep 8 > run_meam1_sweep-np8.txt +% hy figs-methods.hy meam1-and-pum-vs-dx +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{meam1-and-pum-vs-dx} + \caption{ + $\lambdamax(\Delta x)-1$ (solid lines) and $\lambdamaxpum(\Delta x)-1$ (markers) for + the natural GLL (red, small circles), uniform-points offset nodal subset (green, $\times$), and + Islet GLL nodal subset (black, large circle) $\np=8$ bases. + Green dotted vertical lines mark multiples of $1/(\np-1)=1/7$. + } + \label{fig:meam1-and-pum-vs-dx} +\end{figure} + +% ./pum_perturb_plot > pum_perturb_plot-041021.txt +% hy figs-methods.hy pum-vs-perturb +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{pum-vs-perturb} + % include only gll_best b/c uniform_offset_nodal_subset was already shown to + % be bad at i*1/(np-1) > 0.5, integer i. + \caption{ + $\lambdamaxpum(\delta)-1$ for the bases in Table \ref{tbl:gll} with $\np \ge 6$. + The triangle provides a $\delta^4$ reference slope. + } + \label{fig:pum-vs-perturb} +\end{figure} + +% hy figs-methods.hy np4-schematic +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{np4-schematic} + \caption{ + Illustration of the optimized Islet GLL $\np=4$ basis (solid line) compared with + the natural (dotted) and the best nodal subset (dashed) $\np=4$ bases. + Each basis function in a basis has its own color. + The top panel shows the convex combination parameter value as a function of reference coordinate + that is used to combine the natural and best nodal subset bases + to form the optimized basis. + } + \label{fig:np4-schematic} +\end{figure} + +% hy figs-methods.hy illustrations +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{illustrate-grids} + \caption{ + One spectral element (blue solid line outlining the full square) with + dynamics (black large circles), tracer (small red circles), and physics (green dashed lines) subelement grids. + } + \label{fig:illustrate-grids} +\end{figure} + +% bash run-stability-cmp.sh > stability-cmp-0.txt +% hy figs-adv-diag.hy fig-stab-cmp stability-cmp-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{stab-cmp-l2} + \caption{ + Stability of the Islet method with the Islet GLL bases, + compared with the instability of the method with the natural GLL bases. + The $x$-axis is average dynamics grid point spacing at the equator in degrees for the quasiuniform cubed-sphere grid. + The $y$-axis is $\log_{10} l_2$ relative error. + A curve's line pattern corresponds to basis type and number of cycles, as listed in the top legend. + A curve's marker corresponds to $\npt$, as listed in the bottom legend. + The case is divergent flow, Gaussian hills ICs, property preservation, $p$-refinement, and long time steps. + } + \label{fig:islet-vs-gll} +\end{figure} + +% bash run-accuracy.sh > acc-0.txt +% hy figs-adv-diag.hy fig-midpoint acc-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{midpoint-check} + \caption{ + Comparison of relative errors calculated at the test simulation's midpoint time of 6 days (1/2 cycle, dashed lines) + and endpoint time of 12 days (1 cycle, solid lines). + Each number at the right side of the plot is the empirical OOA computed using the final two points of the 1-cycle result. + } + \label{fig:traj-interp} +\end{figure} + +% hy figs-adv-diag.hy figs-acc acc-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{acc-nondivergent-gau-exact-nopp-fac5} + \caption{ + Empirical verification of the order of accuracy of the Islet GLL bases. + Each number at the right side of the plot is empirical OOA computed using the final two points of the $l_\infty$ curve. + } + \label{fig:islet-empirical-ooa} +\end{figure} + +% hy figs-methods.hy write-slmmir-script +% bash run-slmmir-on-basis-lines.sh > slmmir-on-basis-lines-2.txt +% hy figs-methods.hy plot-slmmir-vs-heuristic # uses slmmir-on-basis-lines-2.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{slmmir-vs-heuristic-gau-nopp-l2} + \caption{$l_2$ norm on the nondivergent flow problem + using basis $\basisns_{\np}$ vs.~$a_2(\basisns_{\np})$, + for a large number of \abtps~bases and $\np=6$ to $10$. + The legend lists the marker type for each $\np$. + Large red circles outline the bases in Table \ref{tbl:gll}. + The configuration uses the Gaussian hills IC and no property preservation.} + \label{fig:slmmir-vs-heuristic-a} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{slmmir-vs-heuristic-cos-pp-l2} + \caption{Same as Fig.~\ref{fig:slmmir-vs-heuristic-a} except that the configuration + uses the cosine bells IC with property preservation.} + \label{fig:slmmir-vs-heuristic-b} +\end{figure} + +% hy figs-adv-diag.hy figs-acc acc-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-nondivergent-gau-interp-pp-fac1} + \caption{ + Accuracy diagnostic. + Compare with Figs.~1, 2 in TR14. + } + \label{fig:islet-acc-nondiv-gau-a} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-nondivergent-gau-interp-pp-fac5} + \caption{ + Accuracy diagnostic. + Compare with Figs.~1, 2 in TR14. + } + \label{fig:islet-acc-nondiv-gau-b} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-nondivergent-cos-interp-pp-fac1} + \caption{ + Accuracy diagnostic. + Compare with Fig.~3 in TR14. + } + \label{fig:islet-acc-nondiv-cos-a} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-nondivergent-cos-interp-pp-fac5} + \caption{ + Accuracy diagnostic. + Compare with Fig.~3 in TR14. + } + \label{fig:islet-acc-nondiv-cos-b} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-divergent-cos-interp-pp-fac1} + \caption{ + Accuracy diagnostic. + Compare with Fig.~16 in TR14. + } + \label{fig:islet-acc-div-cos-a} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=0.48\linewidth]{acc-divergent-cos-interp-pp-fac5} + \caption{ + Accuracy diagnostic. + Compare with Fig.~16 in TR14. + } + \label{fig:islet-acc-div-cos-b} +\end{figure} + +% hy figs-adv-diag.hy fig-filament acc-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{filament} + \caption{ + Filament diagnostic, following Sect.~3.3 of TS12. + Compare with Fig.~5 in TR14. + The top row shows the diagnostic measured on the $\npv=4$ dynamics grid; + the bottom row, on the tracer grid. + The legend describes the dynamics-grid resolution and the time step length. + The prescribed validation problem is the nondivergent flow with cosine bells IC. + Property preservation is on. + The $x$-axis is $\tau$, the mixing ratio threshold. + The $y$-axis is the percent area having mixing ratio at least $\tau$ relative to that at the initial time. + } + \label{fig:filament} +\end{figure} + +% bash run-mixing.sh > mixing-0.txt +% hy figs-adv-diag.hy figs-mixing mixing-0.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{mixing-ne20.png} + \caption{ + Mixing diagnostic, following Sect.~3.5 of TS12. + Compare with Figs.~11--14 in TR14. + This figure shows results for dynamics-grid resolution of 1.5$^\circ$. + $l_o$ is exactly 0 in all cases because shape preservation is on, and so is not shown. + See the text for further details.} + \label{fig:mixing-ne20} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{mixing-ne40.png} + \caption{Same as Fig.~\ref{fig:mixing-ne20} but with dynamics-grid resolution $0.75^\circ$.} + \label{fig:mixing-ne40} +\end{figure} + +% bash run-img-filament.sh > filament-imgs-0.txt +% hy figs-adv-diag.hy img-filament filament-imgs-0.txt filament-imgs +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{slo-midpoint} + \caption{ + Images of the slotted cylinders IC advected by the nondivergent flow at the simulation's midpoint. + Each column corresponds to a spatial resolution and time step length configuration, + as stated at the top of each column. + Each row corresponds to a particular value of $\npt$, as stated in the text at the top-right of each image. + We omit $\npt=12$ results for the $0.75^\circ$ resolution because they are essentially identical at the resolution of the figure to the $\npt=8$ images. + } + \label{fig:slocyl-midpoint} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{slo-finpoint} + \caption{ + Same as Fig.~\ref{fig:slocyl-midpoint} but for the simulation final point. + Error measures are printed at the bottom-left of each image; see text for details. + } + \label{fig:slocyl-finpoint} +\end{figure} + +% bash run-pg-srcterm-midpoint-test.sh > pg-srcterm-midpoint-test-nbdy3-1.txt +% hy figs-adv-diag.hy fig-pg-mimic-src-term pg-srcterm-midpoint-test-nbdy3-1.txt +% nbdy3 => edge_np = interior_np = 3 +\begin{figure}[tbh] + \centering + % pg = np and pg = 2 + \includegraphics[width=0.5\linewidth]{acc-pg-mimic-src-term-midpoint-nondivergent-gau-interp-pp-fac5-l2} + \caption{ + Validation of the remap of tendencies from physics to tracer grids and state from tracer to dynamics grids. + See Sect.~\ref{sec:results:sources} for a description of the problem. + } + \label{fig:pg-mimic-src-term} +\end{figure} + +% bash run-toychem-diagnostic.sh > toychem-diagnostic-nbdy3-0.txt +% hy figs-adv-diag.hy fig-toychem-diagnostic toychem-diagnostic-nbdy3-0.txt +\begin{figure}[tbh] + \centering + % pg = np-2 + \includegraphics[width=0.5\linewidth]{toychem-diagnostic} + \caption{ + Toy chemistry diagnostic values as a function of time for ten cycles of the nondivergent flow. + Time is on the $x$-axis and measured in cycles. + Diagnostic values for the $l_2$-norm (solid lines) and $l_\infty$-norm (dashed lines) are on the $y$-axis. + Markers as listed in the bottom legend are placed at the start of each cycle to differentiate the curves. + } + \label{fig:toychem-diagnostic} +\end{figure} + +% bash run-toychem-imgs.sh +% hy figs-adv-diag.hy fig-toychem-finpoint toychem-imgs-nbdy3 +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{toychem-finpoint} + \caption{ + Images of the monatomic tracer at the end of the first cycle. + Text at the lower left of each image states the configuration. + Text at the upper right reports global extremal values. + } + \label{fig:toychem-finpoint} +\end{figure} +\begin{figure}[tbh] + \centering + \includegraphics[width=1\linewidth]{toychem-finpoint-diagnostic} + \caption{ + Same as Fig.~\ref{fig:toychem-finpoint}, but now the images are of $(X_T - \bar{X}_T)/\bar{X}_T$. + } + \label{fig:toychem-finpoint-diagnostic} +\end{figure} + +% bash run-isl-footprint.sh > isl-footprint-1.txt +% hy figs-adv-diag.hy fig-comm-footprint isl-footprint-1.txt +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{isl-footprint} + \caption{ + Communication volume, in number of real scalars transmitted in $q$-messages + per tracer per element per time step ($y$-axis) + vs.~time in days of the simulation ($x$-axis), + in the case of one element per process, + for the nondivergent flow, + with long (left) and short (right) time steps. + Statistic and $\npt$ line patterns are stated in the legends. + } + \label{fig:footprint} +\end{figure} + +% code branch: https://github.com/ambrad/E3SM/releases/tag/islet-2d-paper-summit-sl-gpu-timings +% data: https://github.com/E3SM-Project/perf-data/tree/main/nhxx-sl-summit-mar2021 +% generate a table of data: +% hy sl-gpu-perf.hy table "perf-data/nhxx-sl-summit-mar2021/data/qsize10/*" +% hy sl-gpu-perf.hy table "perf-data/nhxx-sl-summit-mar2021/data/qsize40/*" +% we use these table entries to make the figure in addition to the SC20 paper's +% data: +% >>> ne 1024 qsize 10 nmax 4096 alg Eul main_loop +% 1024 383.49 0.29 +% 2048 225.43 0.50 +% 4096 132.30 0.85 +% 4600 120.84 0.93 +% >>> ne 1024 qsize 10 nmax 4096 alg SL main_loop +% 1024 253.64 0.44 1.51 +% 2048 146.66 0.77 1.54 +% 4096 89.18 1.26 1.48 +% 4600 81.39 1.38 1.48 +% >>> ne 1024 qsize 40 nmax 4096 alg Eul main_loop +% 2048 461.20 0.24 +% 4096 274.52 0.41 +% 4600 257.60 0.44 +% >>> ne 1024 qsize 40 nmax 4096 alg SL main_loop +% 2048 167.22 0.67 2.76 +% 4096 99.70 1.13 2.75 +% 4600 90.23 1.24 2.85 +% hy sl-gpu-perf.hy fig +\begin{figure}[tbh] + \centering + \includegraphics[width=0.5\linewidth]{sl-gpu-perf-032521-islet} + \caption{ + Performance comparison of SL transport with $\npv=\npt=4$ vs.~Eulerian transport + in the E3SM Atmosphere Model's dynamical core on the Summit supercomputer. + The $x$-axis is number of NVIDIA V100 GPUs on Summit used in a run; + the $y$-axis is dycore throughput reported in simulated years per wallclock day (SYPD). + The black curves are for Eulerian transport; the red, for SL. + Dashed lines are for 40 tracers; solid and the dotted black line, for 10. + A number above a data point reports the $y$-value of that point. + } + \label{fig:summit-perf} +\end{figure} diff --git a/methods/islet/figures/figsutils.hy b/methods/islet/figures/figsutils.hy new file mode 100644 index 0000000..69e830e --- /dev/null +++ b/methods/islet/figures/figsutils.hy @@ -0,0 +1,225 @@ +(require [amb3 [*]]) +(import amb3 [amb3 [*]] struct math) + +(defn get-context [] + (sv c (Box) + c.data-dir "data/" + c.fig-dir "figs/" + c.odes (, "rotate" "divergent" "nondivergent") + c.cdrs (, (, "none" "none") (, "caas-node" "caas") (, "caas" "caas")) + c.nstepfacs (, 1 5) + c.methods (, "pcsl" "pcslu") + c.cycs (, 1 100) + c.timeints (, "exact" "interp") + c.nes (, 5 10 20 40 80) + c.nps (, 4 6 8 9 12) ;(, 4 5 6 7 8 9 10 11 12 13 16) + c.ics (, "gau" "cos" "slo") + c.npclrs {4 "g" 5 "m" 6 "r" 7 "c" 8 "k" 9 "b" 10 "g" 11 "c" 12 "r" 13 "m" 16 "g"} + c.npmarks {4 "o" 5 "x" 6 "s" 7 "x" 8 "p" 9 "+" 10 "." 11 "^" 12 "." 13 "*" 16 "."}) + c) + +(defn flow-short2long [flow] + (get {"divergent" "divergent flow" + "nondivergent" "nondivergent flow" + "rotate" "solid-body rotation"} flow)) + +(defn ic-short2long [ic] + (get {"gau" "Gaussian hills" + "cos" "cosine bells" + "slo" "slotted cylinders"} ic)) + +(defn nes->degstrs [nes] + (sv x [] xstr []) + (for [ne nes] + (sv deg (geton {5 6 10 3 20 "1.5" 40 "0.75" 80 "0.375" 160 "0.1875"} ne)) + (when (none? deg) (continue)) + (.append x ne) + (.append xstr (.format "${}^{{\circ}}$" deg))) + {:ne x :deg xstr}) + +(defn cdr-name [short] + (get {"caas" "CAAS-CAAS" "caas-node" "CAAS-point"} short)) + +;;; slmmir I/O + +(defn read-slmmir-io-arrays [fname &optional beg end stride] + (defn vis-read-array [f] + (sv b (.read f 4)) + (when (zero? (len b)) (return None)) + (sv ndim (first (struct.unpack "@i" b)) + b (.read f (* 4 ndim)) + dims (struct.unpack (+ "@" (* "i" ndim)) b)) + (.reshape (npy.fromfile f :count (npy.prod dims)) dims)) + (svifn beg 0 end -1 stride 1) + (sv d []) + (with [f (open fname "rb")] + (sv i 0 i-next beg) + (while True + (sv a (vis-read-array f)) + (when (none? a) (break)) + (when (= i i-next) + (.append d a) + (sv i-next (+ i-next stride))) + (inc! i))) + d) + +(defn draw-slmmir-image [f &optional vmin vmax ncolor colorsym switch-halves] + (svifn vmin -0.05 vmax 1.15 ncolor 24 colorsym False switch-halves True) + (sv (, m n) f.shape + lon-idx (if switch-halves + (+ (list (range (// n 2) n)) (list (range 0 (// n 2)))) + (s-all)) + x (* (npy.array [(/ -0.5 n) 0.25 0.5 0.75 1]) n) + xticks [] ;(, "0" "$\pi$/2" "$\pi$" "3$\pi$/2" "$2\pi$") + y (* (npy.array [0 0.5 1]) m) + yticks []; (, "-$\pi$/2" "0" "$\pi$/2") + fs 8 + colors (if colorsym + [(, 0 0 1) (, 1 1 1) (, 1 0 0)] + [(, .85 .85 .95) (, 0 0 1) (, 0 1 0) (, 1 1 0) (, 1 0 0)])) + (if 0 + (pl.contour (get f (, (s-all) lon-idx)) + (npy.linspace -0.05 1.15 25)) + (pl.imshow (get f (, (s-all) lon-idx)) + (matplotlib.colors.LinearSegmentedColormap.from-list + "filament" colors ncolor) + :vmin vmin :vmax vmax)) + (pl.xlim (, (first x) (last x))) (pl.xticks x xticks :fontsize fs) + (pl.ylim (, (first y) (last y))) (pl.yticks y yticks :fontsize fs) + (my-grid :ls ":")) + +;;; parse slmmir text output + +(defn parse-cmd [cmd &optional map-nstepfac] + (sv toks (.split cmd)) + (defn int-or-none [x] + (unless (none? x) (int x))) + (defn get-key-val [key] + (for [(, i t) (enumerate toks)] + (unless (and (= (first t) "-") (= (cut t 1) key)) (continue)) + (return (get toks (inc i))))) + (sv keys {"ode" str "ne" int "np" int "nsteps" int "prefine" int + "mono" str "lim" str "timeint" str "method" str "pg" int-or-none} + d {}) + (for [e (.items keys)] + (assoc d (keyword (first e)) ((second e) (get-key-val (first e))))) + (sv nstepfac (/ (:nsteps d) (:ne d) 6)) + (unless (none? map-nstepfac) (sv nstepfac (map-nstepfac nstepfac))) + (assoc d :nstepfac (int nstepfac)) + (when (= (:timeint d) "exact") (assoc d :prefine 0)) + d) + +(defn cmd->key-base [c] + (sv c1 (, (:timeint c) (:ode c) (:nstepfac c) (:method c) (:mono c) (:lim c) + (:prefine c))) + (if (none? (:pg c)) + (+ c1 (, (:np c))) + (+ c1 (, (:pg c) (:np c))))) + +(defn parse-midpoint-check [ln] + (sv (, - ic - l1 l2) (sscanf ln "s,s,s,f,f")) + {:ic ic :l1 l1 :l2 l2}) + +(defn parse-C [ln] + (sv (, - ic - masscons - limmin limmax - l1 l2 li - massredist massdisc) + (sscanf ln "s,s,s,f,s,f,f,s,f,f,f,s,f,f")) + {:ic ic :masscons masscons :limerr (, limmin limmax) + :l1 l1 :l2 l2 :li li :massredist massredist :massdisc massdisc}) + +(defn parse-bakeoff-diag [d ln timeint] + (defn parse-mixing [ln] + (sv (, - lr - lu - lo) (sscanf (cut ln 4) "s,f,s,f,s,f")) + {:lr lr :lu lu :lo lo}) + (defn parse-arr [ln] + (sv toks (.split (cut ln 8))) + (lfor t toks (float t))) + (when (none? d) (sv d {})) + (assoc d :done False) + (cond [(in " l_r" ln) (assoc d :mixing (parse-mixing ln))] + [(in "me l_r" ln) (assoc d :me-mixing (parse-mixing ln))] + [(in " thr" ln) (assoc d :thr (parse-arr ln))] + [(in " fil" ln) (assoc d :fil (parse-arr ln) :done (= timeint "exact"))] + [(in "me fil" ln) (assoc d :me-fil (parse-arr ln) :done True)]) + d) + +;;; parse and write basis strings from search + +(defn offst->Nodes [subnp offst] + (sv nodes []) + (for [(, i e) (enumerate subnp)] + (sv os (get offst i)) + (.append nodes (list (range os (+ os e))))) + nodes) + +(defn Nodes->string [np nodes] + (sv bdy 1 s (.format "{:d} {:d}" np bdy)) + (for [i (range (len nodes))] + (+= s (.format " | {:d} {:d}:" i (len (get nodes i)))) + (for [e (get nodes i)] (+= s (.format " {:d}" e)))) + s) + +(defn string->Nodes [basis-str] + (sv toks (.split basis-str) + np (int (first toks)) + on-bdy (int (second toks)) + nodes [] n [] ctr 0 i 3 start True) + (assert on-bdy) + (while (< i (len toks)) + (sv t (get toks i)) + (cond [(= t "|") + (.append nodes n) + (sv n [] start True)] + [start + (assert (= (int t) ctr)) + (inc! ctr) + (inc! i) + (sv start False)] + [:else + (.append n (int t))]) + (inc! i)) + (.append nodes n) + (, np nodes)) + +(defn offset-nodal? [nodes] + (for [n nodes] + (sv d (npy.diff (npy.array n))) + (when (> (npy.max d) 1) (return False))) + True) + +(defn parse-ints [s] (lfor t (.split s) (int t))) + +(defn parse-search-offset-nodal-subset-line [ln] + (sv (, - meam1 - - - - - - - wtr - npm1 npm2 npm3 - pum - - np) + (sscanf ln "s,f,s,s,s,s,s,s,s,f,s,f,f,f,s,f,s,s,i") + p1 (.find ln "subnp ") + p2 (.find ln "offst ") + subnp (parse-ints (cut ln (+ p1 5) p2)) + offst (parse-ints (cut ln (+ p2 5)))) + {:txt ln :np np :meam1 meam1 :wtr wtr :npm (, npm1 npm2 npm3) :pum pum + :nodes (offst->Nodes subnp offst) :type :offst-nodal-subset}) + +(defn parse-search-nodal-subset-line [ln] + (defn parse-nodes [s] + (sv toks (.split s) nodess [] i 0) + (while (< i (len toks)) + (inc! i) + (sv nodes []) + (.append nodess nodes) + (while (and (< i (len toks)) (!= (nth toks i) "|")) + (.append nodes (int (nth toks i))) + (inc! i))) + nodess) + (sv (, - meam1 - - - wtr - npm1 npm2 npm3 - pum - - np) + (sscanf ln "s,f,s,s,s,f,s,f,f,f,s,f,s,s,i") + p1 (.find ln "subnp ") + p2 (.find ln "nodes ") + subnp (parse-ints (cut ln (+ p1 5) p2)) + nodes (parse-nodes (cut ln (+ p2 6)))) + {:txt ln :np np :meam1 meam1 :wtr wtr :npm (, npm1 npm2 npm3) :pum pum + :nodes nodes :type :nodal-subset}) + +;; parse output from search findoffsetnodal|findnodal. +(defn parse-search-basis-line [ln] + (if (in " offst " ln) + (parse-search-offset-nodal-subset-line ln) + (parse-search-nodal-subset-line ln))) diff --git a/methods/islet/figures/islet.hy b/methods/islet/figures/islet.hy new file mode 100644 index 0000000..0fca082 --- /dev/null +++ b/methods/islet/figures/islet.hy @@ -0,0 +1,41 @@ +(require [amb3 [*]]) +(import amb3 [amb3 [*]] + [scipy.linalg :as linalg] + scipy.integrate + re math sys ctypes) + +(defn nelem [xb yb] + (* (dec (len xb)) (dec (len yb)))) + +(defn ndof [method ne np] + (cond [(< method 2) (* ne (** (dec np) 2))] + [(= method 2) ne] + [:else (raisefmt "nope")])) + +(defclass Islet [] + (defn --init-- [me] + (try (sv lib (npy.ctypeslib.load-library "libislet" ".") + me.lib lib) + (except [e [Exception]] + (print e) + (sv me.lib None)))) + (defn unittest [me] + (me.lib.islet-unittest)) + (defn eval [me method np x] + (sv c-int ctypes.c-int + y (npy.zeros (, (len x) np))) + (me.lib.eval-interpolant (c-int method) (c-int np) (c-int (len x)) + (as-ctypes x) (as-ctypes y)) + y) + (defn get-xnodes [me method np] + (sv xnodes (npy.zeros np)) + (me.lib.get-xnodes (ctypes.c-int method) (ctypes.c-int np) (as-ctypes xnodes)) + xnodes) + (defn calc-xnodes-metrics-from-basis-string [me basis] + (sv metrics (npy.zeros 3 :dtype float)) + (me.lib.calc-xnodes-metrics-from-basis-string + (str-ctypes basis) (as-ctypes metrics)) + metrics)) + +(defn diff [x] (- (cut x 1) (cut x 0 -1))) + diff --git a/methods/islet/figures/poly.hy b/methods/islet/figures/poly.hy new file mode 100644 index 0000000..e1e2a2d --- /dev/null +++ b/methods/islet/figures/poly.hy @@ -0,0 +1,190 @@ +(require [amb3 [*]]) +(import [amb3 [*]] + [numpy :as npy] + math sys) + +(defn eval-lagrange-poly [x y xi] + (setv np (len x) + pod (pod-number? xi) + yi (if pod 0 (npy.zeros (len xi)))) + (for [i (range np)] + (if (= (get y i) 0) (continue)) + (setv f (if pod 1 (npy.ones (len xi)))) + (for [j (range np)] + (if (= i j) (continue)) + (*= f (/ (- xi (get x j)) + (- (get x i) (get x j))))) + (+= yi (* (get y i) f))) + yi) + +(defn eval-lagrange-poly-basis [x xi] + (setv np (len x) + pod (pod-number? xi) + v (if pod (npy.zeros np) (npy.zeros (, np (len xi))))) + (for [i (range np)] + (setv f (if pod 1 (npy.ones (len xi)))) + (for [j (range np)] + (if (= i j) (continue)) + (*= f (/ (- xi (get x j)) + (- (get x i) (get x j))))) + (setv (get v i) f)) + v) + +(defn eval-poly [coef x] + (setv pod (pod-number? x) + y (* (last coef) (if pod 1 (npy.ones (len x))))) + (for [p (range (- (len coef) 2) -1 -1)] + (setv y (+ (* x y) (get coef p)))) + y) + +(defn eval-lagrange-poly-basis-derivative [x xi] + (setv np (len x) + pod (pod-number? xi) + v (if pod (npy.zeros np) (npy.zeros (, np (len xi))))) + (for [i (range np)] + (setv f (if pod 0 (npy.zeros (len xi)))) + (for [j (range np)] + (if (= j i) (continue)) + (sv g (if pod 1 (npy.ones (len xi)))) + (for [k (range np)] + (if (= k i) (continue)) + (*= g (/ (if (= k j) + 1 + (- xi (get x k))) + (- (get x i) (get x k))))) + (+= f g)) + (setv (get v i) f)) + v) + +(defn eval-lagrange-poly-derivative [x y xi] + (setv np (len x) + pod (pod-number? xi) + yi (if pod 0 (npy.zeros (len xi)))) + (for [i (range np)] + (setv f (if pod 0 (npy.zeros (len xi)))) + (for [j (range np)] + (if (= j i) (continue)) + (sv g (if pod 1 (npy.ones (len xi)))) + (for [k (range np)] + (if (= k i) (continue)) + (*= g (/ (if (= k j) + 1 + (- xi (get x k))) + (- (get x i) (get x k))))) + (+= f g)) + (+= yi (* (get y i) f))) + yi) + +(defn get-gll-x [np] + (npy.array + (case/eq np + [1 (, 0)] + [2 (, -1 1)] + [3 (, -1 0 1)] + [4 (sdo (setv oosqrt5 (/ (math.sqrt 5))) + (, -1 (- oosqrt5) oosqrt5 1))] + [5 (sdo (setv sqrt3o7 (math.sqrt (/ 3 7))) + (, -1 (- sqrt3o7) 0 sqrt3o7 1))] + [6 (sdo (setv e (fn [sgn] + (math.sqrt (+ (/ 1 3) + (* sgn 2 (/ (math.sqrt 7) + 21))))) + np6a (e 1) + np6b (e -1)) + (, -1 (- np6a) (- np6b) np6b np6a 1))] + [7 (sdo (setv e (fn [sgn] + (math.sqrt (/ (+ 5 (* sgn 2 (math.sqrt (/ 5 3)))) + 11))) + np7a (e 1) + np7b (e -1)) + (, -1 (- np7a) (- np7b) 0 np7b np7a 1))] + [8 (sdo (setv c1 0.8717401485096066153 + c2 0.59170018143314230214 + c3 0.20929921790247886877) + (, -1 (- c1) (- c2) (- c3) c3 c2 c1 1))] + [9 (sdo (setv c1 0.89975799541146015731 + c2 0.67718627951073775345 + c3 0.36311746382617815871) + (, -1 (- c1) (- c2) (- c3) 0 c3 c2 c1 1))] + [10 (sdo (setv c1 0.91953390816645881383 + c2 0.73877386510550507500 + c3 0.47792494981044449566 + c4 0.16527895766638702463) + (, -1 (- c1) (- c2) (- c3) (- c4) c4 c3 c2 c1 1))] + [11 (sdo (setv c1 0.93400143040805913433 + c2 0.78448347366314441862 + c3 0.56523532699620500647 + c4 0.29575813558693939143) + (, -1 (- c1) (- c2) (- c3) (- c4) 0 c4 c3 c2 c1 1))] + [12 (sdo (setv c1 0.94489927222288222341 + c2 0.81927932164400667835 + c3 0.63287615303186067766 + c4 0.39953094096534893226 + c5 0.13655293285492755486) + (, -1 (- c1) (- c2) (- c3) (- c4) (- c5) c5 c4 c3 c2 c1 1))] + [:else (raise (Exception (.format "bad np: {}" np)))]))) + +(defn get-gll-w [np] + (defn reverse [coll] + (cut coll None None -1)) + (defn expand-sym [np a] + (if (zero? (% np 2)) + (+ a (reverse a)) + (+ a (reverse (cut a 0 -1))))) + (when (= np 1) + (return (, 2))) + (npy.array + (expand-sym + np (case/eq + np + [2 (, 1)] + [3 (, (/ 1 3) (/ 4 3))] + [4 (, (/ 1 6) (/ 5 6))] + [5 (, (/ 1 10) (/ 49 90) (/ 32 45))] + [6 (sv v (math.sqrt 7)) + (, (/ 1 15) (/ (- 14 v) 30) (/ (+ 14 v) 30))] + [7 (sv v (* 7 (math.sqrt 15))) + (, (/ 1 21) (/ (- 124 v) 350) (/ (+ 124 v) 350) (/ 256 525))] + [ 8 (, 0.03571428571428571429 0.21070422714350603938 0.34112269248350436476, + 0.41245879465870388157)] + [ 9 (, 0.02777777777777777778 0.16549536156080552505 0.27453871250016173528, + 0.34642851097304634512 0.37151927437641723356)] + [10 (, 0.02222222222222222222 0.13330599085107011113 0.22488934206312645212, + 0.29204268367968375788 0.32753976118389745666)] + [11 (, 0.01818181818181818182 0.10961227326699486446 0.18716988178030520411, + 0.24804810426402831404 0.28687912477900808868 0.30021759545569069379)] + [12 (, 0.01515151515151515152 0.09168451741319613067 0.15797470556437011517, + 0.21250841776102114536 0.25127560319920128029 0.27140524091069617700)] + [:else (raisefmt "bad np: {}" np)])))) + +(if-main + (when-inp ["test-lag-basis" {:np int}] + (for [f (, eval-lagrange-poly-basis-derivative eval-lagrange-poly-basis)] + (setv x (npy.linspace -1 1 1000) + v (f (get-gll-x np) x) + v1 (f (get-gll-x np) (get x 11)))) + (expect (npy.all (= v1 (get v (, (slice None) 11))))) + (pl.plot x (.transpose v) "-") + (dispfig "test-lag-basis")) + + (when-inp ["plot-lagp" {:np int}] + ;; just plot lagrange poly basis functions + (setv x-gll (get-gll-x np) + y-gll (npy.random.rand np) + xi (npy.linspace -1 1 100) + clrs "bgrcmybgrcmybgrcmy") + (with [(pl-plot (, 6 6) "csl-plot-lagp")] + (for [i (range np)] + (setv y-gll (npy.zeros np) + (get y-gll i) 1 + yi (eval-lagrange-poly x-gll y-gll xi) + yip (eval-lagrange-poly-derivative x-gll y-gll xi) + c (get clrs i)) + (pl.plot xi yi (+ c "-") + xi yip (+ c "--"))) + (pl.plot x-gll (npy.zeros np) "ko"))) + + (when-inp ["test-gll-w"] + (for [np (range 1 8)] + (sv w (get-gll-w np)) + (assert (<= (reldif 2 (sum w)) (* 1 (epsilon))))))) diff --git a/methods/islet/figures/run-accuracy.sh b/methods/islet/figures/run-accuracy.sh new file mode 100644 index 0000000..9b2639e --- /dev/null +++ b/methods/islet/figures/run-accuracy.sh @@ -0,0 +1,58 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T 12 -d2c -lauritzen -midpoint-check -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint" + grepcmd='grep "^C \|^L \|^M "' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" +} + +cdrglbs=(none caas-node); +cdrlcls=(none caas ); +for ne in 5 10 20 40 80; do + for nstepfac in 1 5; do + for ode in rotate nondivergent divergent; do + nstep=$(expr $ne \* 6) + nstep=$(expr $nstep \* $nstepfac) + timeint=exact + for icdr in 1; do + cdrglb=${cdrglbs[$icdr]} + cdrlcl=${cdrlcls[$icdr]} + prefine=0 + np=4 + run + done + for icdr in 0 1; do + cdrglb=${cdrglbs[$icdr]} + cdrlcl=${cdrlcls[$icdr]} + prefine=0 + for np in 4; do + cdrglb=${cdrglb:0:4} + run + done + done + timeint=interp + for icdr in 0 1; do + cdrglb=${cdrglbs[$icdr]} + cdrlcl=${cdrlcls[$icdr]} + prefine=5 + for np in $(seq 5 13) 16; do + run + done + done + timeint=exact + for icdr in 0; do + cdrglb=${cdrglbs[$icdr]} + cdrlcl=${cdrlcls[$icdr]} + prefine=0 + for np in $(seq 5 13) 16; do + run + done + done + done + done +done diff --git a/methods/islet/figures/run-img-filament.sh b/methods/islet/figures/run-img-filament.sh new file mode 100644 index 0000000..5a2b8fb --- /dev/null +++ b/methods/islet/figures/run-img-filament.sh @@ -0,0 +1,34 @@ +cat $0 +exe=../../slmm/slmmir + +function run { + cmd="$exe -method pcsl -ode nondivergent -ic slottedcylinders -T 12 -ne $ne -nsteps $nsteps -timeint $timeint -nonunimesh 0 -np $np -dmc eh -mono $cdrglb -lim caas -lauritzen -we $we -io internal -o ../data/mar21/filament-imgs/ne$ne-np$np-nstep$nsteps-$timeint-$cdrglb-pr$prefine -res 256 -rit -prefine $prefine $d2c" + echo "cmd> $cmd" + eval "OMP_NUM_THREADS=1 $cmd" +} + +d2c="-d2c -io-nodss" +for ne in 20 40; do + for nstepfac in 1 5; do + nsteps=$(expr $ne \* 6) + nsteps=$(expr $nsteps \* $nstepfac) + we=$(expr $nsteps / 2) + timeint=exact + prefine=0 + cdrglb=caas + np=4 + run + cdrglb=caas-node + run + timeint=interp + prefine=5 + cdrglb=caas-node + for np in 6 8; do + run + done + if [[ $ne == 20 ]]; then + np=12 + run + fi + done +done diff --git a/methods/islet/figures/run-isl-footprint.sh b/methods/islet/figures/run-isl-footprint.sh new file mode 100644 index 0000000..8ac0081 --- /dev/null +++ b/methods/islet/figures/run-isl-footprint.sh @@ -0,0 +1,35 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics -ic gaussianhills -we 0 -rit -dmc eh -T 12 -d2c -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -footprint" + grepcmd='grep "^C \|^L \|^M \|footprint>"' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" +} + +cdrlcl=caas +ncycle=1 +ode=nondivergent +for ne in 30; do + for nstepfac in 1 5; do + nstep=$(expr $ne \* 6) + nstep=$(expr $nstep \* $nstepfac) + for ode in nondivergent; do + timeint=exact + prefine=0 + cdrglb=caas + np=4 + run + timeint=interp + prefine=5 + cdrglb=caas-node + for np in 6 8 12; do + run + done + done + done +done diff --git a/methods/islet/figures/run-mixing.sh b/methods/islet/figures/run-mixing.sh new file mode 100644 index 0000000..a05f7e6 --- /dev/null +++ b/methods/islet/figures/run-mixing.sh @@ -0,0 +1,40 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T 12 -d2c -lauritzen -lauritzen-io -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -o mixing-0/${ode}-$timeint-nsteps${nstep}-prefine${prefine}-$cdrglb-$cdrlcl-ne$ne-np$np" + grepcmd='grep "^C \|^L \|^M "' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" +} + +cdrglbs=(none caas-node); +cdrlcls=(none caas ); +for ne in 20 40; do + for nstepfac in 1 5; do + for ode in nondivergent; do + nstep=$(expr $ne \* 6) + nstep=$(expr $nstep \* $nstepfac) + icdr=1 + timeint=exact + prefine=0 + cdrglb=${cdrglbs[$icdr]} + cdrlcl=${cdrlcls[$icdr]} + np=4 + run + timeint=interp + prefine=5 + for np in 6 8 9 12; do + run + done + cdrglb=${cdrglb:0:4} + timeint=exact + prefine=0 + np=4 + run + done + done +done diff --git a/methods/islet/figures/run-pg-srcterm-midpoint-test.sh b/methods/islet/figures/run-pg-srcterm-midpoint-test.sh new file mode 100644 index 0000000..4f6f4a5 --- /dev/null +++ b/methods/islet/figures/run-pg-srcterm-midpoint-test.sh @@ -0,0 +1,36 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + ics1="-ic gaussianhills -ic cosinebells -ic slottedcylinders" + ics2="-ic zero -ic zero -ic zero" + for pg in 2 $(expr $np - 2) $np; do + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics1 -ic toychem1 -ic toychem2 $ics2 -we 0 -rit -dmc eh -T 12 -d2c -ode $ode -ne $ne -np $np -pg $pg -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint -midpoint-check" + grepcmd='grep "^C \|^L \|^M "' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" + done +} + +cdrglb=caas-node +cdrlcl=caas +for ne in 5 10 20 40 80; do + for nstepfac in 1 5; do + for ode in nondivergent; do #divergent; do + nstep=$(expr $ne \* 6) + nstep=$(expr $nstep \* $nstepfac) + timeint=exact + prefine=0 + np=4 + run + timeint=interp + prefine=5 + for np in 6 8 9 12; do + run + done + done + done +done diff --git a/methods/islet/figures/run-stability-cmp.sh b/methods/islet/figures/run-stability-cmp.sh new file mode 100644 index 0000000..89e7237 --- /dev/null +++ b/methods/islet/figures/run-stability-cmp.sh @@ -0,0 +1,32 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method $method -ic gaussianhills -ic cosinebells -ic correlatedcosinebells -ic slottedcylinders -we 0 -rit -dmc eh -T $(expr 12 \* $ncycle) -d2c -ode $ode -ne $ne -np $np -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint" + grepcmd='grep "^C \|^L \|^M "' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" +} + +cdrglb=caas-node +cdrlcl=caas +for ne in 5 10 20 40 80; do + for nstepfac in 1; do + for ode in divergent; do + nstep=$(expr $ne \* 6) + nstep=$(expr $nstep \* $nstepfac) + timeint=interp + prefine=5 + ncycle=10 + for method in pcslu pcsl; do + for np in 4 6 8 9 12; do + run + done + ncycle=100 + done + done + done +done diff --git a/methods/islet/figures/run-toychem-diagnostic.sh b/methods/islet/figures/run-toychem-diagnostic.sh new file mode 100644 index 0000000..3383846 --- /dev/null +++ b/methods/islet/figures/run-toychem-diagnostic.sh @@ -0,0 +1,34 @@ +cat $0 + +exe=../../slmm/slmmir + +ctr=0 +function run { + ctr=$(expr $ctr + 1) + for pg in $(expr $np - 2) $np; do + cmd="OMP_NUM_THREADS=48 KMP_AFFINITY=balanced $exe -method pcsl $ics -ic gaussianhills -ic toychem1 -ic toychem2 -we 0 -rit -dmc eh -T $(expr 12 \* $ncycle) -d2c -ode $ode -ne $ne -np $np -pg $pg -nsteps $nstep -prefine $prefine -mono $cdrglb -lim $cdrlcl -timeint $timeint" + grepcmd='grep "^C \|^L \|^M \|toy "' + echo "cmd> $ctr $cmd" + eval "$cmd | $grepcmd" + done +} + +cdrlcl=caas +ncycle=10 +nstep=576 +for ne in 30; do + for ode in nondivergent; do + timeint=exact + prefine=0 + cdrglb=caas + np=4 + run + cdrglb=caas-node + run + timeint=interp + prefine=5 + for np in 6 8 9 12; do + run + done + done +done diff --git a/methods/islet/figures/run-toychem-imgs.sh b/methods/islet/figures/run-toychem-imgs.sh new file mode 100644 index 0000000..ae54dc8 --- /dev/null +++ b/methods/islet/figures/run-toychem-imgs.sh @@ -0,0 +1,23 @@ +exe=../../slmm/slmmir +datadir=toychem-imgs + +ne=30 +nstep=$(expr 48 \* 12) +dt=dt30min +we=$(expr 48 \* 6) + +np=8 +glbcdr=caas-node +pg=$np +name=toychem-nondiv-ne${ne}pr5np${np}pg${pg}-${glbcdr}-caas-$dt +cmd="$exe -method pcsl -ode nondivergent -ic gaussianhills -ic toychem1 -ic toychem2 -T 12 -nsteps $nstep -timeint interp -ne $ne -np ${np} -dmc eh -d2c -mono $glbcdr -lim caas -we $we -io internal -res 256 -o $datadir/$name -rit -prefine 5 -pg $pg" +echo "cmd> $cmd" +eval "$cmd" + +glbcdr=caas +np=4 +pg=0 +name=toychem-nondiv-ne${ne}pr0np${np}pg${pg}-${glbcdr}-caas-$dt +cmd="$exe -method pcsl -ode nondivergent -ic gaussianhills -ic toychem1 -ic toychem2 -T 12 -nsteps $nstep -timeint exact -ne $ne -np ${np} -dmc eh -d2c -mono $glbcdr -lim caas -we $we -io internal -res 256 -o $datadir/$name -rit -prefine 0 -pg $pg" +echo "cmd> $cmd" +eval "$cmd" diff --git a/methods/islet/figures/sl-gpu-perf.hy b/methods/islet/figures/sl-gpu-perf.hy new file mode 100644 index 0000000..d404673 --- /dev/null +++ b/methods/islet/figures/sl-gpu-perf.hy @@ -0,0 +1,221 @@ +(require [amb3 [*]]) +(import [amb3 [*]] math glob re) + +(defn get-context [] + (sv c (Box) + c.template-dir "/ccs/home/ambradl/repo/compy-goodies/slgpu/" + c.run-dir "/ccs/home/ambradl/sl/gpu/" + c.max-nnode 4600 + c.timers (, "main_loop" "dirk" "RK2" "tracers_compose")) + c) + +(defn get-tstep [ne] (* (/ 1024 ne) 10)) +(defn get-nu [ne] (* 2.5e10 (** (/ 1024 ne) 3))) +(defn get-hv-subcyc [ne] + (cond [(<= ne 128) 3] + [(<= ne 512) 2] + [:else 1])) +(defn get-remap-fac [ne] 2) +(defn get-tracer-fac [ne] 8) +(defn get-qsize [ne] 40) +(defn get-nmax [ne] (* 1 4096)) +;(defn get-nmax [ne] (* 1 2048)) +(defn get-nnode [ne] + (sv nelem (* 6 (** ne 2)) + nnode (min 4096 (math.ceil (/ nelem (* 6 256))))) + nnode) + +(defn timer->sypd [nmax tstep timer] + (/ (/ (* nmax tstep) 365) + timer)) + +(defn write-files [c ne nnode-fac &optional nnode] + ;; job: NNODE WALLTIME JOBNAME INPUTSL INPUTEUL + ;; nl: NE NMAX QSIZE TSTEP NUVAL HVSUB REMAPFAC TRACERFAC + (svifn nnode -1) + (defn make-input-name [job-name talg] (+ c.run-dir job-name "-" talg ".nl")) + (sv nnode (if (= nnode -1) (int (* nnode-fac (get-nnode ne))) nnode) + nmax (get-nmax ne) + qsize (get-qsize ne) + tstep (get-tstep ne) + nu (get-nu ne) + remap-fac (get-remap-fac ne) + tracer-fac (get-tracer-fac ne) + job-name (.format "r5-ne{}-nmax{}-qsize{}-nnode{}" ne nmax qsize nnode)) + (when (> nnode c.max-nnode) (return)) + (sed (, (, "NNODE" (str nnode)) (, "WALLTIME" "15") (, "JOBNAME" job-name) + (, "INPUTSL" (make-input-name job-name "sl")) + (, "INPUTEUL" (make-input-name job-name "eul"))) + (+ c.template-dir "job.sh.template") + (+ c.run-dir (+ job-name "-job.sh"))) + (for [talg (, "eul" "sl")] + (sv nl-template (+ "theta-" talg ".nl.template")) + (sed (, (, "NE" (str ne)) (, "NMAX" (str nmax)) (, "QSIZE" (str qsize)) + (, "TSTEP" (str tstep)) (, "NUVAL" (.format "{:1.2e}" nu)) + (, "HVSUB" (str (get-hv-subcyc ne))) + (, "REMAPFAC" (str remap-fac)) (, "TRACERFAC" (str tracer-fac))) + (+ c.template-dir nl-template) + (make-input-name job-name talg)))) + +(defn parse-out [c fname &optional d] + (defn list->dict [c s] + (assert (= (len c) (len s))) + (sv d {}) + (for [i (range (len c))] + (assoc d (first (nth s i)) + ((second (nth s i)) (nth c i)))) + d) + (defn parse-fname [fname] + (sv f (first (re.findall ".*ne(\d+)-nmax(\d+)-qsize(\d+)-nnode(\d+)" fname))) + (list->dict f (, (, :ne int) (, :nmax int) (, :qsize int) (, :nnode int)))) + (defn parse-timer-line [ln] + (sv (, - ngpu - ncall sum max - - - min) + (sscanf ln "s,i,s,f,f,f,s,s,s,f")) + {:ngpu ngpu :ncall ncall :sum sum :max max :min min}) + (svifn d {}) + (sv m (parse-fname fname) + pat ">>>.*full") + (for [t c.timers] (+= pat (+ "|" t ".*0\)"))) + (sv txt (grep pat fname)) + (for [ln txt] + (cond [(= ">>>" (cut ln 0 3)) + (sv talg (second (.split ln)))] + [:else + (for [t c.timers] + (when (= t (cut ln 0 (len t))) + (when (or (and (= t "RK2") (= talg "SL")) + (and (= t "tracers_compose") (= talg "Eul"))) + (raisefmt "inconsistent timers: {}" fname)) + (sv timer t + p (parse-timer-line (cut ln (inc (len t))))) + (break))) + (assoc-nested-append d (, (:ne m) (:qsize m) (:nmax m) talg + timer (:nnode m)) + p)])) + d) + +(defn parse-from-glob [c globpat] + (sv fnames (glob.glob globpat) + d {}) + (for [fname fnames] + (sv d (parse-out c fname :d d))) + d) + +(defn write-table [c d &optional talgs] + (svifn talgs (, "Eul" "SL")) + (for [ne (sort (list (.keys d)))] + (for [timer (, "main_loop" "advection")] + (for [qsize (.keys (get d ne))] + (for [nmax (.keys (get d ne qsize))] + (sv vfirst {} first True) + (for [talg talgs] + (prf ">>> ne {:4d} qsize {:2d} nmax {:5d} alg {:6s} {}" + ne qsize nmax talg timer) + (sv t (if (= timer "advection") + (if (in "SL" talg) "tracers_compose" "RK2") + timer) + e (get d ne qsize nmax talg t) + nnodes (sort (list (.keys e)))) + (for [nnode nnodes] + (sv ps (get e nnode) + vs []) + (for [p ps] (.append vs (:max p))) + (sv v (min vs) + sypd (timer->sypd nmax (get-tstep ne) v) + speedup "") + (if first + (assoc vfirst nnode v) + (sv speedup (.format "{:6.2f}" (/ (get vfirst nnode) v)))) + (prf "{:4d} {:7.2f} {:6.2f}{}" nnode v sypd speedup)) + (sv first False))))))) + +;;; drivers + +(when-inp ["gen"] + (sv c (get-context)) + (for [ne (, 32 64 128 256 512 1024) + fac (, 0.25 0.5 1 2 4)] + (write-files c ne fac))) + +(when-inp ["gen-4600"] + (sv c (get-context)) + (for [ne (, 1024)] + (write-files c ne 1 :nnode 4600))) + +(when-inp ["table" {:globpat str}] + (sv c (get-context) + d (parse-from-glob c globpat)) + (write-table c d :talgs (, "Eul" "SL"))) + +(when-inp ["sypd" {:ne int :timer float}] + (sv nmax (get-nmax ne) + tstep (get-tstep ne)) + (print (timer->sypd nmax tstep timer))) + +(when-inp ["fig"] + (assoc matplotlib.rcParams "savefig.dpi" 300) + (do (pl-require-type1-fonts)) + (sv fs 16 fsl 18) + (defn text1 [x y dx dy data] + (for [i (range (len x))] + (pl.text (+ (nth x i) dx) (+ (nth y i) dy) + (.format "{:4.2f}" (nth data i)) + :fontsize fs))) + (defn text2 [x y dx dy data] + (sv i (dec (len x))) + (pl.text (+ (nth x i) dx) (+ (nth y i) dy) + (.format "{:4.2f}" (nth data i)) + :fontsize fs)) + (defn int->str [i] + (sv s (str i) + s (case/eq (len s) + [4 (+ (first s) "," (cut s 1))] + [5 (+ (cut s 0 2) "," (cut s 2))] + [:else s])) + s) + (sv npa npy.array + x (npa [1024 2048 4096 4600]) + xfac 6 + x (* xfac x) + sc20-y (npa [0.31 0.54 0.90 0.97]) + eul-y (npa [0.29 0.50 0.85 0.93]) + sl-y (npa [0.44 0.77 1.26 1.38]) + q40 (Box) + q40.x (* xfac (npa [2048 4096 4600])) + q40.eul-y (npa [0.24 0.41 0.44]) + q40.sl-y (npa [0.67 1.13 1.24]) + perf-x (* xfac (npa [1024 4600])) + perf-y (do (sv b 0.2) + (npa [b (* (/ (last perf-x) (first perf-x)) b)])) + yt (/ (npy.linspace 1 15 15) 10)) + (for [format (, "png" "pdf")] + (with [(pl-plot (, 6 (if (= format "png") 6.6 6.4)) + "sl-gpu-perf-032521-islet" + :format format)] + (pl.plot (npy.log2 x) (npy.log2 sc20-y) "ko-" :label "SC20 data, Eulerian transport") + (text1 (npy.log2 x) (npy.log2 sc20-y) -0.13 0.08 sc20-y) + (pl.plot (npy.log2 x) (npy.log2 eul-y) "k.:" :label "SC20 config., Eulerian transport") + (pl.plot (npy.log2 x) (npy.log2 sl-y) "rs-" + :label "SC20 config., SL transport") + (text1 (npy.log2 x) (npy.log2 sl-y) -0.13 0.08 sl-y) + (unless (none? q40) + (pl.plot (npy.log2 q40.x) (npy.log2 q40.eul-y) "ko--") + (text2 (npy.log2 q40.x) (npy.log2 q40.eul-y) -0.13 0.08 q40.eul-y) + (pl.plot (npy.log2 q40.x) (npy.log2 q40.sl-y) "rs--") + (text2 (npy.log2 q40.x) (npy.log2 q40.sl-y) -0.03 0.03 q40.sl-y)) + (pl.plot (npy.log2 perf-x) (npy.log2 perf-y) "g:" :label "Perfect scaling") + (pl.xticks (npy.log2 x) (lfor e x (int->str e)) :fontsize fs :rotation 45) + (pl.yticks (npy.log2 yt) yt :fontsize fs) + (pl.xlabel (+ "Number of Summit " (if (= xfac 6) "GPUs" "nodes")) + :fontsize fsl) + (pl.ylabel "Simulated Years Per Day (SYPD)" :fontsize fsl) + (pl.title (+ "Semi-Lagrangian tracer transport on GPU:\n" + "Dycore performance of SCREAM 3.25km configuration\n" + "solid line: 10 tracers, dashed line: 40 tracers") + :fontsize fsl) + (pl.legend :loc "upper left" :fontsize (dec fs) :framealpha 0) + (my-grid) + (sv d 0.18) + (pl.xlim (, (- (npy.log2 (* xfac 1024)) d) (+ (npy.log2 (* xfac 4600)) d))) + (sv d 0.27) + (pl.ylim (, (- (npy.log (if (none? q40) 0.2 0.15)) d) (+ (npy.log 1.5) d)))))) diff --git a/methods/islet/islet_interpmethod.hpp b/methods/islet/islet_interpmethod.hpp new file mode 100644 index 0000000..8e6485c --- /dev/null +++ b/methods/islet/islet_interpmethod.hpp @@ -0,0 +1,14 @@ +#ifndef INCLUDE_ISLET_INTERPMETHOD_HPP +#define INCLUDE_ISLET_INTERPMETHOD_HPP + +#include + +struct UserInterpMethod { + typedef std::shared_ptr Ptr; + virtual ~UserInterpMethod () {} + virtual void eval(const Real& x, Real* const v) = 0; + virtual const Real* get_xnodes() const = 0; + virtual Int get_np() const = 0; +}; + +#endif diff --git a/methods/islet/islet_isl.cpp b/methods/islet/islet_isl.cpp new file mode 100644 index 0000000..3cf307f --- /dev/null +++ b/methods/islet/islet_isl.cpp @@ -0,0 +1,364 @@ +#include + +#include +#include +#include + +#include "islet_tables.hpp" +#include "islet_util.hpp" +#include "islet_isl.hpp" +#include "islet_xnodes_metrics.hpp" +#include "islet_npx.hpp" + +extern "C" { + void dgemm_(const char* transa, const char* transb, const int* m, + const int* n, const int* k, const double* alpha, const double* a, + const int* lda, const double* b, const int* ldb, + const double* beta, double* c, const int* ldc); + void dpotrf_(const char* uplo, const int* n, double* a, const int* lda, + int* info); + void dpotrs_(const char* uplo, const int* n, const int* nrhs, const double* a, + const int* lda, double* b, const int* ldb, int* info); + void dtrsm_(const char* side, const char* uplo, const char* transa, const char* diag, + const int* n, const int* nrhs, const double* alpha, const double* a, + const int* lda, double* b, const int* ldb); + void dtrtrs_(const char* uplo, const char* trans, const char* diag, + const int* n, const int* nrhs, double* a, const int* lda, + double* b, const int* ldb, int* info); + void dgeqrf_(const int* m, const int* n, double* a, const int* lda, + double* tau, double* wrk, int* iwrk, int* info); + void dormqr_(const char* side, const char* trans, + const int* m, const int* n, const int* k, + double* a, const int* lda, + double* tau, double* c, const int* ldc, + double* wrk, const int* iwrk, int* info); +} + +namespace islet { +// C = alpha op(A) op(B) + beta C +void dgemm (char transa, char transb, int m, int nrhs, int n, double alpha, + const double* a, int lda, const double* b, int ldb, double beta, + const double* c, int ldc) { + dgemm_(&transa, &transb, &m, &nrhs, &n, &alpha, const_cast(a), &lda, + const_cast(b), &ldb, &beta, const_cast(c), &ldc); +} + +int dpotrf (char uplo, int n, double* a, int lda) { + int info; + dpotrf_(&uplo, &n, a, &lda, &info); + return info; +} + +int dpotrs (char uplo, int n, int nrhs, const double* a, int lda, double* bx, + int ldb) { + int info; + dpotrs_(&uplo, &n, &nrhs, const_cast(a), &lda, bx, &ldb, &info); + return info; +} + +void dtrsm (char side, char uplo, char transa, char diag, int n, int nrhs, + double alpha, const double* a, int lda, double* bx, int ldb) { + dtrsm_(&side, &uplo, &transa, &diag, &n, &nrhs, &alpha, + const_cast(a), &lda, bx, &ldb); +} + +int dtrtrs (char uplo, char trans, char diag, int n, int nrhs, + double* a, int lda, double* b, int ldb) { + int info; + dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + return info; +} + +// tau[min(m,n)], wrk[>= n] +int dgeqrf (int m, int n, double* a, int lda, + double* tau, double* wrk, int iwrk) { + int info; + dgeqrf_(&m, &n, a, &lda, tau, wrk, &iwrk, &info); + return info; +} + +// tau[min(m,n)], wrk[>= max(m,n)] +int dormqr (char side, char trans, int m, int n, int k, double* a, int lda, + double* tau, double* c, int ldc, double* wrk, int iwrk) { + int info; + dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, wrk, &iwrk, &info); + return info; +} + +struct GllNatural : public Operator { + virtual void eval (const Int& np, const Real& x, Real* const v) const override { + eval_lagrange_poly(get_xnodes(np), np, x, v); + } +}; + +struct GllOffsetNodalSubset : public Operator, public npxstab { + virtual void eval (const Int& np, const Real& x, Real* const v) const override { + npxstab::eval(np, x, v); + } +}; + +void eval_offset_nodal_subset ( + const Int np, const Int nreg, const Int* subnp, const Int* os, const Real* xnodes, + const Real& x, Real* const v) +{ + if (x > 0) { + eval_offset_nodal_subset(np, nreg, subnp, os, xnodes, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + bool done = false; + for (Int i = 0; i < nreg; ++i) + if (x < xnodes[i+1]) { + std::fill(v, v + np, 0); + eval_lagrange_poly(xnodes + os[i], subnp[i], x, v + os[i]); + done = true; + break; + } + if ( ! done) + eval_lagrange_poly(xnodes, np, x, v); +} + +static void eval_offset (const Int& np, const Real* const xnodes, + const Int* const subnp, const Int* const offst, + const Real& x, Real* const v) { + if (x > 0) { + eval_offset(np, xnodes, subnp, offst, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + bool done = false; + for (Int i = 0; i < np/2; ++i) + if (x < xnodes[i+1]) { + std::fill(v, v + np, 0); + eval_lagrange_poly(xnodes + offst[i], subnp[i], x, v + offst[i]); + done = true; + break; + } + if ( ! done) + eval_lagrange_poly(xnodes, np, x, v); +} + +struct GllBest : public Operator { + static void eval_np4 (const Real* const xnodes, const Real& x, Real* const y) { + static const Real c1 = 0.306; + if (x < xnodes[1] || x > xnodes[2]) { + y[0] = y[3] = 0; + const Int os = x < xnodes[1] ? 0 : 1; + eval_lagrange_poly(xnodes + os, 3, x, y + os); + Real y4[4]; + eval_lagrange_poly(xnodes, 4, x, y4); + const Real x0 = 2*(1 - std::abs(x))/(1 - xnodes[2]) - 1; + const Real a = (c1 + (0.5 - c1)*x0)*(x0 + 1); + for (int i = 0; i < 4; ++i) + y[i] = a*y[i] + (1 - a)*y4[i]; + } else + eval_lagrange_poly(xnodes, 4, x, y); + } + + virtual void eval (const Int& np, const Real& x, Real* const v) const override { + const Real* xnodes = get_xnodes(np); + switch (np) { + case 4: eval_np4(xnodes, x, v); break; // 2 + case 5: { // 2 + const Int subnp[] = {3,4}; + const Int offst[] = {0,0}; + eval_offset(5, xnodes, subnp, offst, x, v); + } break; + case 6: { // 4 + const Int subnp[] = {5,5,6}; + const Int n0[] = { 0, 1, 2, 3, 4, }; + const Int n1[] = { 0, 1, 2, 3, 5}; + const Int n2[] = { 0, 1, 2, 3, 4, 5}; + const Int* nodes[] = {n0,n1,n2}; + ::eval(6, true, xnodes, subnp, nodes, x, v); + } break; + case 7: { // 4 + const Int subnp[] = {5,5,6}; + const Int offst[] = {0,0,0}; + eval_offset(7, xnodes, subnp, offst, x, v); + } break; + case 8: { // 5 + const Int subnp[] = {6,6,7,6}; + const Int offst[] = {0,0,0,1}; + eval_offset(8, xnodes, subnp, offst, x, v); + } break; + case 9: { // 6 + const Int subnp[] = {7,8,8,7}; + const Int n0[] = { 0, 1, 2, 3, 4, 5, 8}; + const Int n1[] = { 0, 1, 2, 3, 4, 5, 7, 8}; + const Int n2[] = { 0, 1, 2, 3, 4, 5, 6, 8}; + const Int n3[] = { 1, 2, 3, 4, 5, 6, 7 }; + const Int* nodes[] = {n0,n1,n2,n3}; + ::eval(9, true, xnodes, subnp, nodes, x, v); + } break; + case 10: { // 6 + const Int subnp[] = {7,7,7,8,8}; + const Int offst[] = {0,0,0,0,1}; + eval_offset(10, xnodes, subnp, offst, x, v); + } break; + case 11: { // 7 + const Int subnp[] = {8,9,8,9,8}; + const Int offst[] = {0,0,0,0,1}; + eval_offset(11, xnodes, subnp, offst, x, v); + } break; + case 12: { // 8 + const Int subnp[] = {9,9,10,10,9,10}; + const Int offst[] = {0,0,0,0,1,1}; + eval_offset(12, xnodes, subnp, offst, x, v); + } break; + case 13: { // 9 + const Int subnp[] = {10,10,10,10,11,10}; + const Int offst[] = {0,0,0,0,0,1}; + eval_offset(13, xnodes, subnp, offst, x, v); + } break; + default: throw_if(true, "not impl'ed"); + } + } + + std::string get_basis_string (const Int& np) const override { + switch (np) { + case 5: return "5 1 | 0 3: 0 1 2 | 1 4: 0 1 2 3"; + case 6: return "6 1 | 0 5: 0 1 2 3 4 | 1 5: 0 1 2 3 5 | 2 6: 0 1 2 3 4 5"; + case 7: return "7 1 | 0 5: 0 1 2 3 4 | 1 5: 0 1 2 3 4 | 2 6: 0 1 2 3 4 5"; + case 8: return "8 1 | 0 6: 0 1 2 3 4 5 | 1 6: 0 1 2 3 4 5 | 2 7: 0 1 2 3 4 5 6 | 3 6: 1 2 3 4 5 6"; + case 9: return "9 1 | 0 7: 0 1 2 3 4 5 8 | 1 8: 0 1 2 3 4 5 7 8 | 2 8: 0 1 2 3 4 5 6 8 | 3 7: 1 2 3 4 5 6 7"; + case 10: return "10 1 | 0 7: 0 1 2 3 4 5 6 | 1 7: 0 1 2 3 4 5 6 | 2 7: 0 1 2 3 4 5 6 | 3 8: 0 1 2 3 4 5 6 7 | 4 8: 1 2 3 4 5 6 7 8"; + case 11: return "11 1 | 0 8: 0 1 2 3 4 5 6 7 | 1 9: 0 1 2 3 4 5 6 7 8 | 2 8: 0 1 2 3 4 5 6 7 | 3 9: 0 1 2 3 4 5 6 7 8 | 4 8: 1 2 3 4 5 6 7 8"; + case 12: return "12 1 | 0 9: 0 1 2 3 4 5 6 7 8 | 1 9: 0 1 2 3 4 5 6 7 8 | 2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | 4 9: 1 2 3 4 5 6 7 8 9 | 5 10: 1 2 3 4 5 6 7 8 9 10"; + case 13: return "13 1 | 0 10: 0 1 2 3 4 5 6 7 8 9 | 1 10: 0 1 2 3 4 5 6 7 8 9 | 2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | 4 11: 0 1 2 3 4 5 6 7 8 9 10 | 5 10: 1 2 3 4 5 6 7 8 9 10"; + default: return ""; + } + } +}; + +struct UniformOffsetNodalSubset : public Operator { + virtual const Real* get_xnodes (const Int& np) const override { + if (np < 2 || np > np_max+1) return nullptr; + static Real xnode[np_max+1][np_max+1] = {0}; + if (xnode[np][0] == 0) { + for (Int i = 0; i < np; ++i) + xnode[np][i] = 2*(Real(i)/(np-1)) - 1; + } + return xnode[np]; + } + + virtual void eval (const Int& np, const Real& x, Real* const v) const override { + const Real* xnodes = get_xnodes(np); + switch (np) { + case 2: { + const Int subnp[] = {2}; + const Int offst[] = {0}; + eval_offset(2, xnodes, subnp, offst, x, v); + } break; + case 3: { + const Int subnp[] = {3}; + const Int offst[] = {0}; + eval_offset(3, xnodes, subnp, offst, x, v); + } break; + case 4: { + const Int subnp[] = {3,4}; + const Int offst[] = {0,0}; + eval_offset(4, xnodes, subnp, offst, x, v); + } break; + case 5: { + const Int subnp[] = {3,4}; + const Int offst[] = {0,0}; + eval_offset(5, xnodes, subnp, offst, x, v); + } break; + case 6: { + const Int subnp[] = {3,4,6}; + const Int offst[] = {0,0,0}; + eval_offset(6, xnodes, subnp, offst, x, v); + } break; + case 7: { + const Int subnp[] = {3,4,4}; + const Int offst[] = {0,0,1}; + eval_offset(7, xnodes, subnp, offst, x, v); + } break; + case 8: { + const Int subnp[] = {4,4,4,4}; + const Int offst[] = {0,0,1,2}; + eval_offset(8, xnodes, subnp, offst, x, v); + } break; + case 9: { + const Int subnp[] = {4,4,4,4}; + const Int offst[] = {0,0,1,2}; + eval_offset(9, xnodes, subnp, offst, x, v); + } break; + case 10: { + const Int subnp[] = {4,4,4,4,4}; + const Int offst[] = {0,0,1,2,3}; + eval_offset(10, xnodes, subnp, offst, x, v); + } break; + case 11: { + const Int subnp[] = {4,4,4,4,4}; + const Int offst[] = {0,0,1,2,3}; + eval_offset(11, xnodes, subnp, offst, x, v); + } break; + case 12: { + const Int subnp[] = {4,4,4,4,4,4}; + const Int offst[] = {0,0,1,2,3,4}; + eval_offset(12, xnodes, subnp, offst, x, v); + } break; + case 13: { + const Int subnp[] = {4,4,4,4,4,4}; + const Int offst[] = {0,0,1,2,3,4}; + eval_offset(13, xnodes, subnp, offst, x, v); + } break; + default: throw_if(true, "not impl'ed"); + } + } +}; + +Operator::ConstPtr Operator::create (Operator::Method m) { + switch (m) { + case gll_natural: return std::make_shared(); + case gll_offset_nodal_subset: return std::make_shared(); + case gll_best: return std::make_shared(); + case uniform_offset_nodal_subset: return std::make_shared(); + default: throw_if(true, "Operator::create: not a method: " << m); + } + return nullptr; +} + +Int unittest_eval () { + Int nerr = 0; + { + GllOffsetNodalSubset o1; + GllBest o2; + for (const Int np : {5,7,8,10,11,12,13}) { + const Int n = 100; + Int ne = 0; + for (Int i = 0; i <= n; ++i) { + const Real x = 2*(Real(i)/n) - 1; + Real v1[np_max], v2[np_max]; + o1.eval(np, x, v1); + o2.eval(np, x, v2); + for (Int j = 0; j < np; ++j) if (v1[j] != v2[j]) ++ne; + } + if (ne) printf("GllOffsetNodalSubset vs GllBest np %d failed\n", np); + nerr += ne; + } + } + return nerr; +} +} // namespace islet + +using namespace islet; +extern "C" { // For python ctypes. +void get_xnodes (const Int method, const Int np, Real* xnodes) { + const auto op = Operator::create(static_cast(method)); + const auto x = op->get_xnodes(np); + for (Int i = 0; i < np; ++i) xnodes[i] = x[i]; +} + +void eval_interpolant (const Int method, const Int np, const Int nx, + // y is np x nx, np the fast index. + const Real* const x, Real* const y) { + const auto op = Operator::create(static_cast(method)); + for (Int ix = 0; ix < nx; ++ix) + op->eval(np, x[ix], y + np*ix); +} +} // extern "C" diff --git a/methods/islet/islet_isl.hpp b/methods/islet/islet_isl.hpp new file mode 100644 index 0000000..1cde180 --- /dev/null +++ b/methods/islet/islet_isl.hpp @@ -0,0 +1,54 @@ +#ifndef INCLUDE_ISLET_ISL_HPP +#define INCLUDE_ISLET_ISL_HPP + +#include "islet_types.hpp" +#include "islet_interpmethod.hpp" + +#include + +namespace islet { +const Real* get_x_gll(const Int np); +const Real* get_w_gll(const Int np); + +template +void eval_lagrange_poly (const Scalar* x_gll, const Int& np, const Scalar& x, + Scalar* const y) { + for (int i = 0; i < np; ++i) { + Scalar f = 1; + for (int j = 0; j < np; ++j) + f *= (i == j) ? + 1 : + (x - x_gll[j]) / (x_gll[i] - x_gll[j]); + y[i] = f; + } +} + +struct Operator { + typedef std::shared_ptr Ptr; + typedef std::shared_ptr ConstPtr; + + virtual void eval(const Int& np, const Real& x, Real* const v) const = 0; + virtual const Real* get_xnodes (const Int& np) const { return get_x_gll(np); } + virtual std::string get_basis_string (const Int& np) const { return ""; } + + enum Method { gll_natural = 0, gll_offset_nodal_subset, xnodal, gll_best, + uniform_offset_nodal_subset }; + static ConstPtr create(Method m); +}; + +struct OperatorInterpMethod : public UserInterpMethod { + typedef std::shared_ptr Ptr; + OperatorInterpMethod (const Int np_, const Operator::ConstPtr& op_) : np(np_), op(op_) {} + void eval (const Real& x, Real* const v) override { op->eval(np, x, v); } + const Real* get_xnodes () const override { return op->get_xnodes(np); } + Int get_np () const override { return np; } +private: + Int np; + Operator::ConstPtr op; +}; + +Int unittest_eval(); + +} // namespace islet + +#endif diff --git a/methods/islet/islet_maxeigcomp.cpp b/methods/islet/islet_maxeigcomp.cpp new file mode 100644 index 0000000..f9afc64 --- /dev/null +++ b/methods/islet/islet_maxeigcomp.cpp @@ -0,0 +1,616 @@ +#include + +#include "islet_maxeigcomp.hpp" +#include "islet_tables.hpp" +#include "islet_npx.hpp" +#include "islet_util.hpp" + +// LAPACK eigendecomp routine for real unsymmetric matrix. +typedef int fint; +extern "C" void dgeev_ (char* jobvl, char* jobvr, fint* n, double* a, int* lda, + double* wr, double* wi, + double* vl, int* ldvl, + double* vr, int* ldvr, + double* work, int* lwork, int* info); +extern "C" void zgeev_ (char* jobvl, char* jobvr, fint* n, Complex* a, int* lda, + Complex* w, + Complex* vl, int* ldvl, + Complex* vr, int* ldvr, + Complex* work, int* lwork, double* rwork, int* info); +// LAPACK SVD routine for real unsymmetric matrix. +extern "C" void dgesvd_ (char* jobu, char* jobvt, fint* m, fint* n, double* a, int* lda, + double* s, double* u, int* ldu, double* vt, int* ldvt, + double* work, int* lwork, int* info); +extern "C" void zgesvd_ (char* jobu, char* jobvt, fint* m, fint* n, Complex* a, int* lda, + double* s, Complex* u, int* ldu, Complex* vt, int* ldvt, + Complex* work, int* lwork, double* rwork, int* info); + +static +void dgeev (char jobvl, char jobvr, int n, double* a, int lda, + double* wr, double* wi, + double* vl, int ldvl, + double* vr, int ldvr, + double* work, int lwork, int& info) { + dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, + work, &lwork, &info); +} + +static +void zgeev (char jobvl, char jobvr, int n, Complex* a, int lda, + Complex* w, + Complex* vl, int ldvl, + Complex* vr, int ldvr, + Complex* work, int lwork, double* rwork, int& info) { + zgeev_(&jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + work, &lwork, rwork, &info); +} + +static +void dgesvd (char jobu, char jobvt, fint m, fint n, double* a, int lda, + double* s, double* u, int ldu, double* vt, int ldvt, + double* work, int lwork, int& info) { + dgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + work, &lwork, &info); +} + +static +void zgesvd (char jobu, char jobvt, fint m, fint n, Complex* a, int lda, + double* s, Complex* u, int ldu, Complex* vt, int ldvt, + Complex* work, int lwork, double* rwork, int& info) { + zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + work, &lwork, rwork, &info); +} + +class Mesh { + Int nc_; + Real dx_; +public: + Mesh (const Int nc) { init(nc); } + void init (const Int nc) { + nc_ = nc; + dx_ = 1.0/nc; + } + + Real toperiodic (const Real& x) const { return x - std::floor(x); } + Int ncell () const { return nc_; } + Real dx () const { return dx_; } + Int incell (const Real& x) const { return std::floor(toperiodic(x) / dx_); } + void toref (const Real& x, Int& ci, Real& a) const { + ci = incell(x); + a = 2*(toperiodic(x)*nc_ - ci) - 1; + } + Real tophysical (const Int& ci, const Real& a) const { + return toperiodic((ci + 0.5*(a+1))*dx_); + } + + static Int unittest () { + Mesh m(42); + const Real dx = m.dx(); + const Int nc = m.ncell(); + const Real eps = std::numeric_limits::epsilon(); + using islet::reldif; + Int ne = 0; + if (reldif(m.toperiodic( 1.7), 0.7) > 10*eps) ++ne; + if (reldif(m.toperiodic(-0.8), 0.2) > 10*eps) ++ne; + if (m.incell(2) != 0) ++ne; + if (m.incell(2 - 0.5*dx) != nc-1) ++ne; + { Int ic; Real a; + const Real x = 4.4*dx; + m.toref(x, ic, a); + if (ic != 4) ++ne; + if (reldif(a, -0.2) > 1e2*eps) ++ne; + if (reldif(m.tophysical(ic, a), x) > 1e2*eps) ++ne; } + return ne; + } +}; + +void op_apply (const Mesh& m, const Int ne, const InterpMethod& im, + const Real dx_flow, const Real* const src, Real* const tgt) { + const auto xnodes = im.get_xnodes(); + for (Int ci = 0, k = 0; ci < ne; ++ci) { + for (Int i = 0; i < im.np-1; ++i, ++k) { + Int ci_src; + Real a_src; + m.toref(m.tophysical(ci, xnodes[i]) + dx_flow, ci_src, a_src); + Real v[32]; + op_eval(im, a_src, v); + Real val = 0; + const Real* src_cell = src + (im.np - 1)*ci_src; + for (Int i_src = 0; i_src < im.np; ++i_src) + val += v[i_src]*src_cell[i_src]; + tgt[k] = val; + } + } +} + +static void get_matrix ( + const Int& ne, const Int& np, const Real& dx, const InterpMethod& interp_method, + Array& A) +{ + // Get the CSL operator as a matrix. + const Int N = ne*(np-1), Np1 = N+1; + A.optclear_and_resize(N*N); + Mesh m(ne); + const Real dx_flow = -dx/ne; + Array u0(Np1, 0); + const Real* const src = u0.data(); + for (Int b = 0; b < N; ++b) { + // Get b'th column. + u0[b] = 1; + if (b == 0) u0[N] = 1; + else if (b == N) u0[0] = 1; + Real* const tgt = A.data() + b*N; + op_apply(m, ne, interp_method, dx_flow, src, tgt); + u0[b] = 0; + if (b == 0) u0[N] = 0; + else if (b == N) u0[0] = 0; + } +} + +// A is overwritten. +static Real cond_2norm (Complex* A, const Int& n, Real* work, const Int& nwork) { + throw_if(nwork < 12*n, "work should be >= 12 n"); + Real* rwork = work; + Real* s = work + 5*n; + Complex* cwork = reinterpret_cast(work + 6*n); + const Int lwork = (nwork - 6*n)/2; + int info; + zgesvd('n', 'n', n, n, A, n, s, + nullptr, 1, nullptr, 1, + cwork, lwork, rwork, + info); + return s[0]/s[n-1]; +} + +namespace bloch { +// K is the number of nodes still in the element after shifting right by the +// fraction dx of an element. +static Int get_K (const Int np, const Real* x_gll, const Real dx) { + assert(dx > 0 && dx < 1); + Int K; + for (K = 0; K < np; ++K) + if (x_gll[K] + 2*dx >= 1) + break; + assert(x_gll[0] > -1 || (K > 0 && K < np)); + return K; +} + +// K is the opposite of what it is in csl.hy. +struct Data { + const Int np; + const Real* const x_gll; + const Int K; + const Real dx; + + Data (const Int& inp, const Real& idx, const Real* const x_gll_ = nullptr) + : np(inp), + x_gll(x_gll_ ? x_gll_ : islet::get_x_gll(np)), + K(get_K(inp, x_gll, idx)), + dx(idx) + {} +}; + +// Get the (np-1)xnp block matrix that is repeated in A. +void form_kernel_block (const Data& d, const InterpMethod& im, Real* const A) { + const Int npm1 = d.np-1; + // Get the kernel block in an (np-1)xnp row-major matrix starting at A. + for (Int ip = 0; ip < npm1; ++ip) { + Real ref = d.x_gll[ip] + 2*d.dx; + if (ref >= 1) { + assert(ip >= d.K); + ref -= 2; + } else { + if (ip >= d.K) { + pr(puf(d.np) pu(d.K) pu(d.dx) pu(ip) pu(ref)); + islet::prarr("d.x_gll", d.x_gll, d.np); + } + assert(ip < d.K); + } + assert(ref >= -1 && ref < 1); + op_eval(im, ref, A + d.np*ip); + } +} + +// Get B(mu) from A. +void form_Bmu (const Data& d, const Complex& mu, const Real* const A, + Complex* const Bmu) { + const Int npm1 = d.np-1; + for (Int c = 0; c < npm1; ++c) { + Complex* const col = Bmu + npm1*c; + for (Int r = 0 ; r < d.K ; ++r) col[r] = A[d.np*r + c]; + for (Int r = d.K; r < npm1; ++r) col[r] = mu*A[d.np*r + c]; + } + { + Complex* const col = Bmu; + for (Int r = 0 ; r < d.K ; ++r) col[r] += mu*A[d.np*r + npm1]; + for (Int r = d.K; r < npm1; ++r) col[r] += mu*mu*A[d.np*r + npm1]; + } +} + +void form_Bmu (const Data& d, const Int& ne, const Int& ie, const Real* const A, + Complex* const Bmu) { + const Real arg = 2 * M_PI * (Real(ie)/ne); + const Complex mu(std::cos(arg), std::sin(arg)); + form_Bmu(d, mu, A, Bmu); +} + +void edecomp (const Data& d, Complex* const Bmu, + Real* const work, Int lwork, + // (w[2*i], ws(2*i+1)) contains the i'th eigenvalue. + Real* const w, Complex* const V = nullptr) { + const Int npm1 = d.np-1; + assert(lwork >= 22*npm1); + lwork -= 2*npm1; + Int eig_info; + zgeev('n', V ? 'v' : 'n', npm1, Bmu, npm1, + reinterpret_cast(w), + nullptr, 1, V, npm1, + reinterpret_cast(work), lwork/2, + work + lwork, eig_info); + assert(eig_info == 0); +} +} // namespace bloch + +void MaxEigComputer::setup_workspace (const Int max_ne_) { + max_ne = max_ne_; + const int nthr = threaded ? omp_get_max_threads() : 1; + wss.resize(nthr); +} + +struct IndexCover { + IndexCover (Int n_, Int P_) { + n = n_; + P = P_; + split = std::max(10, P); + N = std::max(1, n/split); + } + + Int nit () const { return (n+P-1)/P; } + + Int idx (const Int it, const Int tid) const { + const Int k = P*it + tid; + const Int i = (k >= split*N) ? k : (k % split)*N + (k / split); + return i >= n ? -1 : i; + } + +private: + Int P, n, N, split; +}; + +static Int test_index_cover () { + Int nerr = 0; + const auto check = [&] (const Int n, const Int P) -> Int { + Int ne = 0; + std::vector cnt(n, 0); + IndexCover ic(n, P); + const Int nit = ic.nit(); + for (Int it = 0; it < nit; ++it) + for (Int tid = 0; tid < P; ++tid) { + const Int i = ic.idx(it, tid); + if (i >= n) ++ne; + else if (i >= 0) ++cnt[i]; + } + for (Int i = 0; i < n; ++i) + if (cnt[i] != 1) + ++ne; + return ne; + }; + for (const Int n : {15, 33, 128, 1111, 3333, 4000, 7777}) + for (const Int P : {1, 2, 3, 8, 11, 48, 272}) + nerr += check(n, P); + return nerr; +} + +Real MaxEigComputer:: +run (const Int& ne_max, const Int& ndx_max, const Real& maxeigampm1, + const bool quiet, const InterpMethod& im) { + setup_workspace(ne_max); + // Search dx in (0, 0.5], in parallel, to see if there's a max |lambda| - 1 + // bigger than tol. + const auto cdxeig = [=] (Int ne, Int ndx, Real tol) { + Real mme = 0; + const int P = threaded ? omp_get_max_threads() : 1; + // Chunk up the search space so we explore widely as early as possible. + const bool both_dir = false; + const auto fac = both_dir ? 2 : 1; + const Int n = fac*ndx; + IndexCover ic(n, P); + const Int nit = ic.nit(); + assert(P <= max_nthread); + const auto run1 = [&] (const int it, const int tid) -> Real { + const auto i = ic.idx(it, tid) + 1; + if (i < 0) return 1; + // dx is in (-0.5, 0.5] or (0, 0.5]. + const Real dx = both_dir ? (Real(i)/n - 0.5) : (0.5*i)/n; + if (dx == 0) return 1; + Real me; + compute(im, dx, ne, &me); + return me; + }; + std::array mes; + for (int it = 0; it < nit; ++it) { + if (threaded) { +# pragma omp parallel + { + const int tid = omp_get_thread_num(); + mes[tid] = run1(it, tid); + } + for (int j = 0; j < std::min(P, n); ++j) + mme = std::max(mme, mes[j]); + } else { + mme = run1(it, 0); + } + if (mme - 1 >= tol) + break; + } + return mme; + }; + + Real maxeigamp = -1; + // Ramp up precision of search to encourage an early exit when the method + // (np, order, offset) is not stable. + for (int ne : {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, ne_max}) { + if (ne > ne_max) break; + bool toobig = false, first = true; + for (int ndx : {32, 64, 128, 256, 512, 1024, ndx_max}) { + if ( ! first && ndx > ndx_max) break; + if (ne < ne_max && ndx == ndx_max) break; + first = false; + maxeigamp = std::max(maxeigamp, + cdxeig(ne, ndx, maxeigampm1)); + if ( ! quiet) + printf("ne %2d nsamp %3d %1.3e\n", ne, ndx, maxeigamp-1); + toobig = maxeigamp - 1 > maxeigampm1; + if (toobig) break; + } + if (toobig) break; + } + return maxeigamp-1; +} + +MaxEigComputer::Analysis MaxEigComputer:: +calc_max_vals (const Int& nmu, const Int& ndx, + const InterpMethod& im) { + const auto calc1 = [=] (const Int& idx) -> Analysis { + const Real dx = (0.5*idx)/ndx; + if (dx == 0) return {1,1,1}; + Analysis vals; + compute(im, dx, nmu, &vals.max_eig_amp, &vals.max_condv, + &vals.max_defect_ub); + return vals; + }; + Real max_eig_amp = 0, max_condv = 0; + // Use <= ndx below to get dx in [0, 0.5] instead of [0, 0.5). + if (threaded) { + Real mea[max_nthread] = {0}, mcv[max_nthread] = {0}; +# pragma omp parallel for + for (Int ix = 0; ix <= ndx; ++ix) { + const int tid = omp_get_thread_num(); + const auto mv = calc1(ix); + mea[tid] = std::max(mea[tid], mv.max_eig_amp); + mcv[tid] = std::max(mcv[tid], mv.max_condv); + } + for (Int i = 0; i < omp_get_max_threads(); ++i) { + max_eig_amp = std::max(max_eig_amp, mea[i]); + max_condv = std::max(max_condv, mcv[i]); + } + } else { + for (Int ix = 0; ix <= ndx; ++ix) { + const auto mv = calc1(ix); + max_eig_amp = std::max(max_eig_amp, mv.max_eig_amp); + max_condv = std::max(max_condv, mv.max_condv); + } + } + return {max_eig_amp, max_condv, 1}; +} + +void MaxEigComputer:: +compute (const InterpMethod& im, const Real& dx, const Int& ne, + Real* max_amp_out, Real* max_condv, + Real* max_defect_ub, + Complex* lam, Complex* V) { + auto& ws = wss[threaded ? omp_get_thread_num() : 0]; + Real max_amp = 0; + const Int np = im.np, npm1 = np - 1; + if (bloch) { + const Int N = 3*npm1*np, edecomp_ws = 22*npm1, evecs_ws = 2*npm1*npm1, + cond_2norm_ws = 12*npm1; + if (static_cast(ws.A.size()) < N) { + ws.A.optclear_and_resize(N); + ws.wr.optclear_and_resize(2*npm1); + ws.work.optclear_and_resize(edecomp_ws + evecs_ws + cond_2norm_ws); + } + bloch::Data bd(np, dx, im.uim == nullptr ? nullptr : im.uim->get_xnodes()); + bloch::form_kernel_block(bd, im, ws.A.data()); + Complex* const Bmu = reinterpret_cast(ws.A.data() + npm1*np); + if (max_condv) *max_condv = 0; + Complex* evecs = reinterpret_cast(ws.work.data() + edecomp_ws); + for (int ie = 0; ie < ne; ++ie) { + bloch::form_Bmu(bd, ne, ie, ws.A.data(), Bmu); + bloch::edecomp(bd, Bmu, ws.work.data(), edecomp_ws, ws.wr.data(), + V || max_condv ? evecs : nullptr); + for (Int i = 0; i < npm1; ++i) { + const Real re = ws.wr[2*i], im = ws.wr[2*i+1]; + max_amp = std::max(max_amp, std::sqrt(re*re + im*im)); + } + if (V) + for (Int i = 0; i < npm1*npm1; ++i) + V[npm1*npm1*ie + i] = evecs[i]; + if (max_condv) { + const Real condv = cond_2norm(evecs, npm1, + ws.work.data() + edecomp_ws + evecs_ws, + cond_2norm_ws); + *max_condv = std::max(*max_condv, condv); + } + if (lam) + for (Int i = 0; i < npm1; ++i) + lam[ie*npm1 + i] = Complex(ws.wr[2*i], ws.wr[2*i+1]); + } + } else { + throw_if(max_condv || max_defect_ub, + "Only Bloch-wave-based edecomp supports" + " max cond(V) and max defect_ub(lam)."); + // Compute eigenvalues of discrete space-time operator with ne cells. + const Int N = ne*npm1; + if (static_cast(ws.wr.size()) < N) { + const int Nonce = std::max(ne, max_ne)*npm1; + ws.A.optclear_and_resize(Nonce*Nonce); + ws.wr.optclear_and_resize(Nonce); + ws.wi.optclear_and_resize(Nonce); + ws.work.optclear_and_resize(10*Nonce); + } + // -dx to match Bloch-wave computation. + get_matrix(ne, np, -dx, im, ws.A); + int eig_info; + Real* Vreal = V ? reinterpret_cast(V) : nullptr; + dgeev('n', V ? 'v' : 'n', N, ws.A.data(), N, ws.wr.data(), ws.wi.data(), + nullptr, 1, Vreal, N, ws.work.data(), ws.work.size(), eig_info); + for (Int i = 0; i < N; ++i) + max_amp = std::max(max_amp, + std::sqrt(ws.wr[i]*ws.wr[i] + ws.wi[i]*ws.wi[i])); + if (lam) + for (Int i = 0; i < N; ++i) + lam[i] = Complex(ws.wr[i], ws.wi[i]); + } + assert(max_amp_out); + *max_amp_out = max_amp; +} + +static void normalize (Complex* v, const Int& n) { + Real norm2 = 0; + for (Int k = 0; k < n; ++k) { + Real d = std::abs(v[k]); + norm2 += d*d; + } + const Real scale = 1/std::sqrt(norm2); + for (Int k = 0; k < n; ++k) + v[k] *= scale; +} + +static void remove_phase (Complex* v, const Int& n) { + const Real arg = std::atan2(v[0].imag(), v[0].real()); + Complex phase(std::cos(arg), -std::sin(arg)); + for (Int k = 0; k < n; ++k) + v[k] *= phase; +} + +static void write_dgeev_evec (const Complex& lam, const Real* V, + const Int& vi, const Int& pair, const Int& n, + Complex* v) { + if (lam.imag() == 0) { + for (Int k = 0; k < n; ++k) + v[k] = V[vi*n + k]; + } else { + const Int sign = pair == 0 ? 1 : -1; + for (Int k = 0; k < n; ++k) + v[k] = Complex(V[vi*n + k], sign*V[(vi+1)*n + k]); + } + normalize(v, n); + remove_phase(v, n); +} + +static void write_bloch_evec (const Complex* v_bloch, const Int& vi, + const Int& ne, const Int& np, Complex* v) { + const Int npm1 = np-1, n = ne*npm1; + const Int vie = vi / npm1; + const Real arg = 2 * M_PI * (Real(vie)/ne); + const Complex* const phi = v_bloch + npm1*vi; + for (Int ie = 0; ie < ne; ++ie) { + const Complex mu(std::cos(ie*arg), std::sin(ie*arg)); + for (Int i = 0; i < npm1; ++i) + v[ie*npm1 + i] = mu*phi[i]; + } + normalize(v, n); + remove_phase(v, n); +} + +// Check that the eigenvalues and eigenvectors derived from Bloch-wave analysis +// -- the size-(np-1) eigenvalue problem -- match those derived from brute-force +// eigendecomp of the ne-mesh space-time operator. +static int +check_bloch_edecomp (const int np, const int ne, + const Complex* lam, const Complex* lam_bloch, + const Real* V, const Complex* v_bloch, + // >= 2 ne (np-1) + Complex* work) { + const Int N = ne*(np-1); + Complex* const v_bf = work; + Complex* const v_full_bloch = work + N; + int nerr = 0; + Array used(N, false); + for (Int i = 0, i_vec = 0, pair = 0; i < N; ++i) { + // Find corresponding eigenvalues. + const auto lam_brute_force = lam[i]; + Real min_diff = 2; + Int jmin = -1; + for (Int j = 0; j < N; ++j) { + const Real diff = std::abs(lam_bloch[j] - lam_brute_force); + if (diff < min_diff) { + min_diff = diff; + jmin = j; + } + } + used[jmin] = true; + if (min_diff > 1e-13) + ++nerr; + // Compare eigenvectors. For dgeev, some arithmetic is needed to account + // for how evecs are packed. + write_dgeev_evec(lam[i], V, i_vec, pair, N, v_bf); + if (pair == 1) { + assert(lam[i].imag() != 0); + i_vec += 2; + pair = 0; + } else if (lam[i].imag() == 0) { + i_vec++; + assert(pair == 0); + } else { + assert(pair == 0); + pair++; + } + write_bloch_evec(v_bloch, jmin, ne, np, v_full_bloch); + Real num = 0, den = 0; + for (Int k = 0; k < N; ++k) { + Real d = std::abs(v_full_bloch[k] - v_bf[k]); + num += d*d; + d = std::abs(v_bf[k]); + den += d*d; + } + if (std::sqrt(num/den) >= 3e-9) { + pr(puf(i) pu(jmin) pu(min_diff) pu(std::sqrt(num/den))); + ++nerr; + } + } + // Check that all eigenvalues were matched. + for (Int j = 0; j < N; ++j) + if ( ! used[j]) + ++nerr; + return nerr; +} + +int MaxEigComputer::unittest () { + int nerr = 0; + MaxEigComputer mec(false, false), mec_bloch(false, true); + const int np_max = std::min(13, islet::np_max), ne_max = np_max, + Nmax = (np_max-1)*ne_max; + Array lam(Nmax), lam_bloch(Nmax), + V((Nmax*Nmax)/2), v_bloch((np_max-1)*(np_max-1)*ne_max), work(2*Nmax); + for (int np = 4; np <= np_max; ++np) { + InterpMethod im(np, InterpMethod::npxstab); + for (int ne : {1, 3, ne_max}) { + const Int N = ne*(np-1); + assert(N <= Nmax); + for (Real dx : {0.05, 0.42, 0.7}) { + Real max_amp; + mec.compute(im, dx, ne, &max_amp, nullptr, nullptr, + lam.data(), V.data()); + mec_bloch.compute(im, dx, ne, &max_amp, nullptr, nullptr, + lam_bloch.data(), v_bloch.data()); + nerr += check_bloch_edecomp(np, ne, lam.data(), lam_bloch.data(), + reinterpret_cast(V.data()), + v_bloch.data(), work.data()); + } + } + } + nerr += test_index_cover(); + return nerr; +} diff --git a/methods/islet/islet_maxeigcomp.hpp b/methods/islet/islet_maxeigcomp.hpp new file mode 100644 index 0000000..a673998 --- /dev/null +++ b/methods/islet/islet_maxeigcomp.hpp @@ -0,0 +1,79 @@ +#ifndef INCLUDE_ISLET_MAXEIGCOMP_HPP +#define INCLUDE_ISLET_MAXEIGCOMP_HPP + +#include "islet_types.hpp" +#include "islet_util.hpp" +#include "islet_interpmethod.hpp" +#include "islet_npx.hpp" + +using islet::Array; + +#include + +class MaxEigComputer { + static constexpr int max_nthread = 136; + + struct Workspace { + Array A, wr, wi, work; + }; + std::vector wss; + Int max_ne; + bool threaded, bloch; + +public: + MaxEigComputer (const bool ithreaded = true, const bool ibloch = true) + : max_ne(0), threaded(ithreaded), bloch(ibloch) + { setup_workspace(); } + + bool is_threaded () const { return threaded; } + + void setup_workspace(const Int max_ne_ = 64); + + Real run(const Int& ne_max, const Int& ndx_max, const Real& maxeigampm1, + const bool quiet, const InterpMethod& im); + + Real run (const Int& np, const Int& ne_max, const Int& ndx_max, + const Real& maxeigampm1, const bool quiet, + UserInterpMethod* uim) { + std::shared_ptr suim(uim, [] (UserInterpMethod*) {}); + return run(ne_max, ndx_max, maxeigampm1, quiet, InterpMethod(suim)); + } + Real run (const Int& np, const Int& ne_max, const Int& ndx_max, + const Real& maxeigampm1, const bool quiet, + const UserInterpMethod::Ptr& uim) { + return run(ne_max, ndx_max, maxeigampm1, quiet, InterpMethod(uim)); + } + + struct Analysis { + static constexpr Real condv_switch = 1e2; + Real max_eig_amp, max_condv, max_defect_ub; + }; + + Analysis calc_max_vals(const Int& nmu, const Int& ndx, + const InterpMethod& im); + + Analysis calc_max_vals (const Int& nmu, const Int& ndx, const Int& np, + UserInterpMethod* uim) { + std::shared_ptr suim(uim, [] (UserInterpMethod*) {}); + return calc_max_vals(nmu, ndx, InterpMethod(suim)); + } + + // dx is fraction of an element, so [0,1], *not* reference-element cordinate. + void compute(const InterpMethod& im, const Real& dx, const Int& ne, + Real* max_amp_out, Real* max_condv = nullptr, + Real* max_defect_ub = nullptr, + Complex* lam = nullptr, Complex* V = nullptr); + + void compute(UserInterpMethod* uim, const Real& dx, const Int& ne, + Real* max_amp_out, Real* max_condv = nullptr, + Real* max_defect_ub = nullptr, + Complex* lam = nullptr, Complex* V = nullptr) { + std::shared_ptr suim(uim, [] (UserInterpMethod*) {}); + compute(InterpMethod(suim), dx, ne, max_amp_out, max_condv, + max_defect_ub, lam, V); + } + + static int unittest(); +}; + +#endif diff --git a/methods/islet/islet_nodalbasis.cpp b/methods/islet/islet_nodalbasis.cpp new file mode 100644 index 0000000..16ef25b --- /dev/null +++ b/methods/islet/islet_nodalbasis.cpp @@ -0,0 +1,195 @@ +#include "islet_nodalbasis.hpp" +#include "islet_npx.hpp" +#include "islet_util.hpp" + +Nodes::Nodes () : np(-1), include_bdy_val(true) {} + +Nodes::Nodes (const Nodes& s) + : np(s.np), nh(s.nh), include_bdy_val(s.include_bdy_val), + data(s.data), subnp(s.subnp) +{ set_ptrs(); } + +Nodes::Nodes (const Int np_, const bool include_bdy) { init(np_, include_bdy); } + +void Nodes::set_ptrs () { + nodes.resize(nh); + for (size_t i = 0; i < nodes.size(); ++i) + nodes[i] = data.data() + np*i; +} + +void Nodes::init (const Int np_, const bool include_bdy_) { + np = np_; + include_bdy_val = include_bdy_; + nh = np/2 + (include_bdy_val ? 0 : 1); + data.resize(np*nh); + set_ptrs(); + subnp.resize(nh, -1); +} + +void Nodes::set (const Int i, const std::initializer_list& il) { + set(i, std::vector(il)); +} + +void Nodes::set (const Int i, const std::vector& il) { + assert(i <= static_cast(nodes.size())); + Int j = 0; + for (const auto e : il) nodes[i][j++] = e; + subnp[i] = j; +} + +std::string Nodes::string (const bool newline) const { + std::stringstream ss; + ss << np << " " << int(include_bdy_val) << " | "; + for (size_t i = 0; i < nodes.size(); ++i) { + if (subnp[i] == -1) continue; + ss << i << " " << subnp[i] << ":"; + for (Int j = 0; j < subnp[i]; ++j) + ss << " " << nodes[i][j]; + if (i+1 != nodes.size()) ss << " | "; + } + if (newline) ss << "\n"; + return ss.str(); +} + +bool Nodes::init (const std::string& s) { + std::stringstream ss(s); + const auto read_int = [&] () -> int { + int i; + ss >> i; + return i; + }; + const auto eat_until_after_bar = [&] () { + while ( ! ss.eof() && ss.peek() != '|') ss.get(); + ss.get(); + }; + np = read_int(); + include_bdy_val = read_int(); + init(np, include_bdy_val); + for (Int ni = 0; ni < get_nh(); ++ni) { + eat_until_after_bar(); + const auto ni_check = read_int(); + if (ni_check != ni) return false; + subnp[ni] = read_int(); + if (subnp[ni] < 2 || subnp[ni] > np) return false; + ss.get(); // colon + for (Int i = 0; i < subnp[ni]; ++i) + nodes[ni][i] = read_int(); + } + return ok_to_eval(); +} + +bool Nodes::ok_to_eval () const { + for (Int ni = 0; ni < get_nh(); ++ni) { + if (subnp[ni] < 2) return false; + for (Int i = 1; i < subnp[ni]; ++i) + if (nodes[ni][i] <= nodes[ni][i-1]) + return false; + Int fnd = 0; + for (Int i = 0; i < subnp[ni]; ++i) + if (nodes[ni][i] == ni || nodes[ni][i] == ni+1) + ++fnd; + if (fnd != 2) return false; + } + return true; +} + +bool operator== (const Nodes& a, const Nodes& b) { + if (a.get_np() != b.get_np()) return false; + if (a.include_bdy() != b.include_bdy()) return false; + const auto an = a.get_nodes(); + const auto bn = b.get_nodes(); + const auto as = a.get_subnp(); + const auto bs = b.get_subnp(); + for (Int ni = 0; ni < a.get_nh(); ++ni) { + if (as[ni] != bs[ni]) return false; + for (Int i = 0; i < as[ni]; ++i) + if (an[ni][i] != bn[ni][i]) return false; + } + return true; +} + +bool operator!= (const Nodes& a, const Nodes& b) { return ! (a == b); } + +void eval (const Int& np, const bool bdy, const Real* const xnodes, + const Int* subnp, Int const* const* nodes, + const Real& x, Real* const v) { + if (x > 0) { + eval(np, bdy, xnodes, subnp, nodes, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + Real xsub[islet::np_max], vsub[islet::np_max]; + const Int nreg = bdy ? np-1 : np+1; + const Int ios = bdy ? 1 : 0; + for (Int i = 0; i < nreg; ++i) { + if (i < np-2 && x > xnodes[i+ios]) continue; + if (subnp[i] == np) { + eval_lagrange_poly(xnodes, np, x, v); + } else { + for (Int j = 0; j < subnp[i]; ++j) + xsub[j] = xnodes[nodes[i][j]]; + std::fill(v, v + np, 0); + eval_lagrange_poly(xsub, subnp[i], x, vsub); + for (Int j = 0; j < subnp[i]; ++j) { + const auto node = nodes[i][j]; + assert(node >= 0); + assert(node < np); + v[node] = vsub[j]; + } + } + break; + } +} + +void eval (const Nodes& nodes, const Real* const xnodes, const Real& x, + Real* const v) { + eval(nodes.get_np(), nodes.include_bdy(), xnodes, + nodes.get_subnp(), nodes.get_nodes(), + x, v); +} + +void eval (const Nodes& nodes, const Real& x, Real* const v) { + eval(nodes, islet::get_x_gll_special(nodes.get_np()), x, v); +} + +void unittest_Nodes () { + Nodes n(10); + require( ! n.ok_to_eval()); // Test all regions specified checking. + n.set(0, {0,1,9}); + n.set(1, {1,2,9}); + require( ! n.ok_to_eval()); // Test all regions specified checking. + n.set(2, {0,2,3,9}); + n.set(3, {0,2,3,4,9}); + n.set(4, {0,2,4,5,6}); + // Test order checking. + require(n.ok_to_eval()); n.set(1, {2,1,9}); require( ! n.ok_to_eval()); n.set(1, {1,2,9}); + // Test interpolatory checking. + require(n.ok_to_eval()); n.set(2, {4,5,9}); require( ! n.ok_to_eval()); n.set(2, {0,2,3,9}); + require(n.ok_to_eval()); + { Nodes n1(n); require(n1.ok_to_eval()); require(n1 == n); } + { + Nodes n1(n); + n1.set(2, {1,2,3,9} ); require(n1.ok_to_eval()); require(n1 != n); + n1.set(2, {0,1,2,3,9}); require(n1.ok_to_eval()); require(n1 != n); + } + { Nodes n1; require(n1.init(n.string())); require(n1.ok_to_eval()); require(n1 == n); } + { + Nodes n1; + n1.init("12 1 | 0 12: 0 1 2 3 4 5 6 7 8 9 10 11 | 1 9: 0 1 2 3 4 5 6 7 8 | " + "2 10: 0 1 2 3 4 5 6 7 8 9 | 3 10: 0 1 2 3 4 5 6 7 8 9 | " + "4 9: 1 2 3 4 5 6 7 8 9 | 5 10: 1 2 3 4 5 6 7 8 9 10"); + const Int nx = 71; + Int ne = 0; + for (Int ix = 0; ix <= nx; ++ix) { + const Real x = -1 + Real(ix)/nx; + Real v1[12], v2[12]; + npxstab::eval<12,6>({12, 9, 10, 10, 9, 10}, {0, 0, 0, 0, 1, 1}, x, v1); + require(n1.ok_to_eval()); + eval(n1, x, v2); + for (Int i = 0; i < 12; ++i) + if (islet::reldif(v1[i], v2[i]) > 4*std::numeric_limits::epsilon()) ++ne; + } + require(ne == 0); + } +} diff --git a/methods/islet/islet_nodalbasis.hpp b/methods/islet/islet_nodalbasis.hpp new file mode 100644 index 0000000..38a9ba7 --- /dev/null +++ b/methods/islet/islet_nodalbasis.hpp @@ -0,0 +1,59 @@ +#ifndef INCLUDE_ISLET_NODALBASIS_HPP +#define INCLUDE_ISLET_NODALBASIS_HPP + +#include "islet_types.hpp" +#include "islet_util.hpp" + +class Nodes { + Int np, nh; + bool include_bdy_val; + std::vector nodes; + std::vector data, subnp; + + void set_ptrs(); + +public: + typedef std::shared_ptr Ptr; + + Nodes(); + Nodes(const Nodes& s); + Nodes(const Int np_, const bool include_bdy = true); + + void init(const Int np, const bool include_bdy); + bool init(const std::string& s); + + Int get_np () const { return np; } + Int get_nh () const { return nh; } + bool include_bdy () const { return include_bdy_val; } + Int const* const* get_nodes () const { return nodes.data(); } + const Int* get_subnp () const { return subnp.data(); } + + void set(const Int i, const std::initializer_list& il); + void set(const Int i, const std::vector& il); + + template + void set(const Int ireg, const IntT* const inodes, const Int isubnp) { + assert(ireg <= static_cast(nodes.size())); + assert(isubnp <= np); + subnp[ireg] = isubnp; + for (Int j = 0; j < isubnp; ++j) nodes[ireg][j] = inodes[j]; + } + + bool ok_to_eval() const; + + std::string string(const bool newline = true) const; +}; + +bool operator==(const Nodes&, const Nodes&); +bool operator!=(const Nodes&, const Nodes&); + +void eval(const Int& np, const bool bdy, const Real* const xnodes, + const Int* subnp, Int const* const* nodes, + const Real& x, Real* const v); +void eval(const Nodes& nodes, const Real* const xnodes, + const Real& x, Real* const v); +void eval(const Nodes& nodes, const Real& x, Real* const v); + +void unittest_Nodes(); + +#endif diff --git a/methods/islet/islet_np4.cpp b/methods/islet/islet_np4.cpp new file mode 100644 index 0000000..b2f4894 --- /dev/null +++ b/methods/islet/islet_np4.cpp @@ -0,0 +1,46 @@ +#include "islet_np4.hpp" +#include "islet_isl.hpp" + +static const Real oosqrt5 = 0.44721359549995793928; + +static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup, + const Real& x) { + Real y = 0; + for (int i = 0; i < n; ++i) { + Real f = 1; + for (int j = 0; j < n; ++j) + f *= (i == j) ? + 1 : + (x - xsup[j]) / (xsup[i] - xsup[j]); + y += f*ysup[i]; + } + return y; +} + +Np4InterpMethod::Np4InterpMethod (Real c0, Real c1, Real c2) { + reset_c(c0, c1, c2); +} + +void Np4InterpMethod::reset_c (Real c0, Real c1, Real c2) { + c[0] = c0; c[1] = c1; c[2] = c2; +} + +Real Np4InterpMethod::eval_a (const Real& x) const { + return eval_lagrange_poly(3, islet::get_x_gll(3), c, + 2*(1 - std::abs(x))/(1 - oosqrt5) - 1); +} + +void Np4InterpMethod::eval (const Real& x, Real* const y) { + const auto* x_gll = get_xnodes(); + if (x < -oosqrt5 || x > oosqrt5) { + y[0] = y[3] = 0; + const Int os = x < -oosqrt5 ? 0 : 1; + islet::eval_lagrange_poly(x_gll + os, 3, x, y + os); + Real y4[4]; + islet::eval_lagrange_poly(x_gll, 4, x, y4); + const Real a = eval_a(x); + for (int i = 0; i < 4; ++i) + y[i] = a*y[i] + (1 - a)*y4[i]; + } else + islet::eval_lagrange_poly(x_gll, 4, x, y); +} diff --git a/methods/islet/islet_np4.hpp b/methods/islet/islet_np4.hpp new file mode 100644 index 0000000..eb327cc --- /dev/null +++ b/methods/islet/islet_np4.hpp @@ -0,0 +1,22 @@ +#ifndef INCLUDE_ISLET_NP4_HPP +#define INCLUDE_ISLET_NP4_HPP + +#include "islet_types.hpp" +#include "islet_isl.hpp" +#include "islet_interpmethod.hpp" + +struct Np4InterpMethod : public UserInterpMethod { + typedef std::shared_ptr Ptr; + + Np4InterpMethod(Real c0, Real c1, Real c2); + void reset_c(Real c0, Real c1, Real c2); + Real eval_a(const Real& x) const; + void eval(const Real& x, Real* const y); + const Real* get_xnodes () const override { return islet::get_x_gll(4); } + Int get_np () const override { return 4; } + +private: + Real c[3]; +}; + +#endif diff --git a/methods/islet/islet_npx.cpp b/methods/islet/islet_npx.cpp new file mode 100644 index 0000000..8eb00d0 --- /dev/null +++ b/methods/islet/islet_npx.cpp @@ -0,0 +1,35 @@ +#include "islet_npx.hpp" +#include "islet_util.hpp" + +void op_eval (const InterpMethod& im, const Real a_src, Real* v) { + switch (im.type) { + case InterpMethod::npx: npx::eval(im.np, a_src, v); break; + case InterpMethod::npxstab: npxstab::eval(im.np, a_src, v); break; + case InterpMethod::user: im.uim->eval(a_src, v); break; + default: + throw_if(true, "op_eval: invalid im.type " << im.type); + } +} + +template +void npxstab::eval (const Int& np, const Scalar& x, Scalar* const v) { + switch (np) { // order of accuracy + case 2: return eval< 2,0>({ }, { }, x, v); // 1 + case 3: return eval< 3,0>({ }, { }, x, v); // 2 + case 4: return eval< 4,1>({ 3 }, {0, }, x, v); // 2 + case 5: return eval< 5,2>({ 3, 4 }, {0, 0 }, x, v); // 2 + case 6: return eval< 6,2>({ 6, 5 }, {0, 0 }, x, v); // 4 + case 7: return eval< 7,3>({ 5, 5, 6 }, {0, 0, 0 }, x, v); // 4 + case 8: return eval< 8,4>({ 6, 6, 7, 6 }, {0, 0, 0, 1 }, x, v); // 5 + case 9: return eval< 9,4>({ 7, 8, 7, 7 }, {0, 0, 0, 1 }, x, v); // 6 + case 10: return eval<10,5>({ 7, 7, 7, 8, 8 }, {0, 0, 0, 0, 1 }, x, v); // 6 + case 11: return eval<11,5>({ 8, 9, 8, 9, 8 }, {0, 0, 0, 0, 1 }, x, v); // 7 + case 12: return eval<12,6>({ 9, 9, 10, 10, 9, 10}, {0, 0, 0, 0, 1, 1}, x, v); // 8 + case 13: return eval<13,6>({10, 10, 10, 10, 11, 10}, {0, 0, 0, 0, 0, 1}, x, v); // 9 + case 16: return eval<16,8>({12, 13, 13, 13, 13, 14, 13, 12}, + { 0, 0, 0, 0, 0, 0, 1, 2}, x, v); // 11 + default: throw_if(true, "Only 2 <= np <= 13, np = 16 are supported."); + } +} + +template class npxstab; diff --git a/methods/islet/islet_npx.hpp b/methods/islet/islet_npx.hpp new file mode 100644 index 0000000..2c24b8a --- /dev/null +++ b/methods/islet/islet_npx.hpp @@ -0,0 +1,137 @@ +#ifndef INCLUDE_ISLET_NPX_HPP +#define INCLUDE_ISLET_NPX_HPP + +#include "islet_util.hpp" +#include "islet_types.hpp" +#include "islet_tables.hpp" +#include "islet_interpmethod.hpp" + +template +void eval_lagrange_poly (const Scalar* x_gll, const Int& np, const Scalar& x, + Scalar* const y) { + for (int i = 0; i < np; ++i) { + Scalar f = 1; + for (int j = 0; j < np; ++j) + f *= (i == j) ? + 1 : + (x - x_gll[j]) / (x_gll[i] - x_gll[j]); + y[i] = f; + } +} + +template +struct npx { + static void eval (const Int np, const Scalar& x, Scalar* const v) { + eval_lagrange_poly(islet::get_x_gll(np), np, x, v); + } +}; + +template +struct npxstab : public npx { + template + static void eval (const std::array& order, + const std::array& os, + const Scalar& x, Scalar* const v) { + if (x > 0) { + eval(order, os, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + const auto x_gll = islet::get_x_gll(np); + bool done = false; + for (Int i = 0; i < nreg; ++i) + if (x < x_gll[i+1]) { + std::fill(v, v + np, 0); + eval_lagrange_poly(x_gll + os[i], order[i], x, v + os[i]); + done = true; + break; + } + if ( ! done) + eval_lagrange_poly(x_gll, np, x, v); + } + + static void eval(const Int& np, const Scalar& x, Scalar* const v); + + static int ooa_vs_np (const int np) { + if (np == 5) return 2; + return np - 1 - ((np-1)/3); + } + + template + static void eval (const std::array& subnp, + const std::array& os, + const std::array& alphac, + const Scalar& x, Scalar* const v) { + if (x > 0) { + eval(subnp, os, alphac, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + const auto x_gll = islet::get_x_gll(np); + bool done = false; + for (int i = 0; i < nreg; ++i) + if (x < x_gll[i+1]) { + eval_lagrange_poly(x_gll, np, x, v); + if (subnp[i] < np) { + Real w[12] = {0}; + eval_lagrange_poly(x_gll + os[i], subnp[i], x, w + os[i]); + Real alpha = 0; + if (alphanp == 1) + alpha = alphac[i]; + else { + assert(alphanp <= 3); + const auto alpha_r_gll = islet::get_x_gll(alphanp); + const auto r = (x - x_gll[i]) / (x_gll[i+1] - x_gll[i]); + Real a[3]; + eval_lagrange_poly(alpha_r_gll, alphanp, r, a); + for (int j = 0; j < alphanp; ++j) + alpha += alphac[alphanp*i + j] * a[j]; + } + for (int j = 0; j < np; ++j) + v[j] = alpha*v[j] + (1 - alpha)*w[j]; + } + done = true; + break; + } + if ( ! done) + eval_lagrange_poly(x_gll, np, x, v); + } +}; + +struct InterpMethod { + typedef std::shared_ptr Ptr; + + enum Type { notype, npx, npxstab, user }; + Int np; + Type type; + std::shared_ptr uim; + + static Type convert (const std::string& s) { + if (s == "npx") return npx; + if (s == "npxstab") return npxstab; + if (s == "user") return user; + throw std::runtime_error(std::string("Not an InterpMethod::Type: ") + s); + } + + static std::string convert (const Type& t) { + if (t == npx) return "npx"; + if (t == npxstab) return "npxstab"; + if (t == user) return "user"; + throw std::runtime_error("Not an InterpMethod::Type."); + } + + InterpMethod () : np(-1), type(notype) {} + InterpMethod (Int inp, Type itype) : np(inp), type(itype) {} + InterpMethod (const std::shared_ptr& iuim) + : np(iuim->get_np()), type(user), uim(iuim) {} + + const Real* get_xnodes() const { + return uim ? uim->get_xnodes() : islet::get_x_gll(np); + }; +}; + +void op_eval(const InterpMethod& im, const Real a_src, Real* v); + +#endif diff --git a/methods/islet/islet_pum.cpp b/methods/islet/islet_pum.cpp new file mode 100644 index 0000000..8162ce7 --- /dev/null +++ b/methods/islet/islet_pum.cpp @@ -0,0 +1,226 @@ +#include "islet_pum.hpp" + +#include + +#include "islet_util.hpp" +#include "islet_isl.hpp" + +namespace pum { + +static void init_xb (const Int ne, const Real& perturb, Real* const xb) { + for (Int i = 0; i <= ne; ++i) + xb[i] = 2*(Real(i)/ne) - 1; + for (Int i = 1; i < ne; ++i) + xb[i] += (perturb/ne)*(2*islet::urand() - 1); +} + +static void fill_xgrid (const Int ne, const Real* const xb, + const Int np, const Real* const xnodes, + Real* const x) { + for (Int i = 0; i < ne; ++i) + for (Int j = 0; j < np; ++j) { + const auto alpha = (1 + xnodes[j])/2; + x[i*(np - 1) + j] = (1 - alpha)*xb[i] + alpha*xb[i+1]; + } +} + +static void prep_for_eval (const Int ne, const Real* const xb, const Int np, + const Real& x, Int& ie, Real& xr, Real* const v) { + for (ie = 0; ie < ne; ++ie) + if (x >= xb[ie] && (ie == ne-1 || x < xb[ie+1])) + break; + assert(ie >= 0 && ie < ne); + xr = 2*(x - xb[ie]) / (xb[ie+1] - xb[ie]) - 1; + std::fill(v, v + ne*(np-1) + 1, 0); +} + +Options::Options () + : threaded(false), + ne(3), ntrial(111), mec_ne(11), + perturb(0.05) +{} + +PerturbedUniformMeshMetric +::PerturbedUniformMeshMetric (const InterpMethod::Ptr& im, const Options opts_) + : opts(opts_), base_im(im), mec(opts.threaded) +{ init(); } + +PerturbedUniformMeshMetric +::PerturbedUniformMeshMetric (const UserInterpMethod::Ptr& uim, const Options opts_) + : opts(opts_), mec(opts.threaded) +{ + base_im = std::make_shared(uim); + init(); +} + +void PerturbedUniformMeshMetric::reset_opts (const Options& o) { + opts = o; + init(); +} + +void PerturbedUniformMeshMetric::init () { + xbs.resize(opts.threaded ? omp_get_max_threads() : 1); + for (auto& xb : xbs) xb.resize(opts.ne+1); + xnodess.resize(opts.threaded ? omp_get_max_threads() : 1); + for (auto& xnodes : xnodess) xnodes.resize(get_np()); +} + +void PerturbedUniformMeshMetric::eval(const Real& x, Real* const v) { + Int ie; + Real xr; + const Int tid = opts.threaded ? omp_get_thread_num() : 0; + prep_for_eval(opts.ne, xbs[tid].data(), base_im->np, + x, ie, xr, v); + op_eval(*base_im, xr, v + ie*(base_im->np-1)); +} + +const Real* PerturbedUniformMeshMetric::get_xnodes() const { + const Int tid = opts.threaded ? omp_get_thread_num() : 0; + return xnodess[tid].data(); +} + +Int PerturbedUniformMeshMetric::get_np() const { + return (xbs[0].size() - 1)*(base_im->np - 1) + 1; +} + +Real PerturbedUniformMeshMetric::run (const Real stop_if_above, + const bool one_elem_hop_only) { + assert(base_im->np > 0); + std::vector dxs; { + dxs.push_back(1.0/opts.ne); + if ( ! one_elem_hop_only) { + for (const auto dx : {2.0/opts.ne, 0.5/opts.ne, 0.5}) + dxs.push_back(dx); + const Real* xnodes = base_im->get_xnodes(); + for (int i = 0; i < base_im->np; ++i) { + const Real xn = xnodes[i]; + if (xn == 1 || xn == -1) continue; + dxs.push_back((0.5*(1 - xn))/opts.ne); + } + } + } + const auto run1 = [&] () { + const int tid = opts.threaded ? omp_get_thread_num() : 0; + auto& xb = xbs[tid]; + auto& xnodes = xnodess[tid]; + init_xb(opts.ne, opts.perturb, xb.data()); + fill_xgrid(opts.ne, xb.data(), base_im->np, base_im->get_xnodes(), + xnodes.data()); + MaxEigComputer mec(false); + Real mea, max_mea = 0; + for (const auto dx : dxs) { + mec.compute(this, dx, opts.mec_ne, &mea); + max_mea = std::max(max_mea, mea); + if (max_mea > 1 + stop_if_above) break; + } + return max_mea; + }; + Real max_mea = 0; + if (opts.threaded) { +# pragma omp parallel for + for (Int trial = 0; trial < opts.ntrial; ++trial) { + if (max_mea > 1 + stop_if_above) continue; + const auto mea = run1(); + if (mea > max_mea) { +# pragma omp critical (PerturbedUniformMeshMetric_run) + max_mea = std::max(max_mea, mea); +# pragma omp flush + } + } + } else { + for (Int trial = 0; trial < opts.ntrial; ++trial) { + const auto mea = run1(); + max_mea = std::max(max_mea, mea); + if (max_mea > 1 + stop_if_above) break; + } + } + return max_mea - 1; +} + +void PerturbedUniformMeshMetric +::sweep_and_collect_amplitudes ( + const Int npts, const Real threshold, std::map& dx2meam1, + const bool verbose) +{ + std::vector dxs; { + for (const auto dx : {1.0/opts.ne, 0.5/opts.ne}) + dxs.push_back(dx); + const Real* xnodes = base_im->get_xnodes(); + for (Int i = 0; i < base_im->np; ++i) { + const Real xn = xnodes[i]; + if (xn == 1 || xn == -1) continue; + dxs.push_back((0.5*(1 - xn))/opts.ne); + } + for (Int i = 1; i < npts; ++i) + dxs.push_back(Real(i)/(opts.ne*npts)); + } + const Int ndx = dxs.size(); +# pragma omp parallel for schedule(static,1) + for (Int trial = 0; trial < opts.ntrial; ++trial) { + const int tid = omp_get_thread_num(); + auto& xb = xbs[tid]; + auto& xnodes = xnodess[tid]; + init_xb(opts.ne, opts.perturb, xb.data()); + fill_xgrid(opts.ne, xb.data(), base_im->np, base_im->get_xnodes(), xnodes.data()); + for (Int i = 0; i < ndx; ++i) { + MaxEigComputer mec(false /* threaded */); + Real mea; + mec.compute(this, dxs[i], opts.mec_ne, &mea); + if (mea >= 1 + threshold) { +# pragma omp critical + { + const Real dx = dxs[i]*opts.ne; + bool insert = false; + if (dx2meam1.find(dx) != dx2meam1.end()) { + const Real prev = dx2meam1[dx]; + if (mea > 1 + prev) { + dx2meam1[dx] = mea-1; + insert = true; + } + } else { + dx2meam1[dx] = mea-1; + insert = true; + } + if (insert && verbose) printf("dx %1.16e meam1 %1.16e\n", dx, mea-1); + } + } + } + } +} + +void demo () { + for (const Int np : {4, 5, 6, 7, 8, 9, 10, 11, 12, 13}) { + Real pum_meam1_m0 = 0; + for (Int method = 0; method < 2; ++method) { + if (method == 1 && (np == 9 || np == 11)) continue; + const auto oim = std::make_shared( + np, islet::Operator::create(method == 0 ? + islet::Operator::gll_offset_nodal_subset : + islet::Operator::xnodal)); + const auto im = std::make_shared(oim); + Real pum_meam1; { + pum::Options o; + o.threaded = true; + o.ne = 4; + o.ntrial = 71; + o.mec_ne = 3; + o.perturb = 0.01; + pum::PerturbedUniformMeshMetric pum(im, o); + pum_meam1 = pum.run(); + } + if (method == 0) pum_meam1_m0 = pum_meam1; + Real meam1 = -1; { + // Check meam1 to be sure we transcribed the new methods correctly. + MaxEigComputer mec; + const Int ns = 4111; + meam1 = mec.run(np, ns, ns, 1e-14, true, im->uim); + } + printf("np %2d method %d meam1 %10.3e pum_meam1 %10.3e", + np, method, meam1, pum_meam1); + if (method == 0) printf("\n"); + else printf(" better %6.2f\n", pum_meam1_m0/pum_meam1); + } + } +} + +} // namespace pum diff --git a/methods/islet/islet_pum.hpp b/methods/islet/islet_pum.hpp new file mode 100644 index 0000000..fb9f2f4 --- /dev/null +++ b/methods/islet/islet_pum.hpp @@ -0,0 +1,56 @@ +#ifndef INCLUDE_ISLET_PUM_HPP +#define INCLUDE_ISLET_PUM_HPP + +#include + +#include "islet_types.hpp" +#include "islet_maxeigcomp.hpp" + +namespace pum { + +struct Options { + bool threaded; + Int ne, ntrial, mec_ne; + Real perturb; + Options(); +}; + +struct PerturbedUniformMeshMetric : public UserInterpMethod { + typedef std::shared_ptr Ptr; + + PerturbedUniformMeshMetric(const InterpMethod::Ptr& im, + const Options opts = Options()); + PerturbedUniformMeshMetric(const UserInterpMethod::Ptr& im, + const Options opts = Options()); + Real run(Real stop_if_above = 1e3, const bool one_elem_hop_only = false); + + // Can't reset opts.threaded + void reset_opts(const Options& o); + + // UserInterpMethod interface + void eval(const Real& x, Real* const v) override; + const Real* get_xnodes() const override; + Int get_np() const override; + + // Illustrate why a 1-element hop is the key thing to study. + void sweep_and_collect_amplitudes( + const Int npts, const Real threshold, + // Report meam1 at dx in [0,1] if meam1 >= threshold. This routine does not + // clear what is already in dx2meam1. + std::map& dx2meam1, + const bool verbose = true); + +private: + Options opts; + InterpMethod::Ptr base_im; + MaxEigComputer mec; + std::vector > xbs, xnodess; + + void init(); +}; + +void demo(); + +} // namespace pum + +#endif diff --git a/methods/islet/islet_studymetrics.cpp b/methods/islet/islet_studymetrics.cpp new file mode 100644 index 0000000..8662384 --- /dev/null +++ b/methods/islet/islet_studymetrics.cpp @@ -0,0 +1,134 @@ +#include "islet_pum.hpp" +#include "islet_xnodes_metrics.hpp" + +namespace { + +Nodes make_offset_nodal (int np, int n, const int* subnp, const int* offst) { + Nodes nodes(np); + const auto nh = nodes.get_nh(); + std::vector ns; + ns.reserve(np); + for (Int ireg = 0; ireg < nh; ++ireg) { + if (ireg < n) { + ns.resize(subnp[ireg]); + for (Int i = 0; i < subnp[ireg]; ++i) + ns[i] = offst[ireg] + i; + } else { + ns.resize(np); + for (Int i = 0; i < np; ++i) ns[i] = i; + } + nodes.set(ireg, ns); + } + return nodes; +} + +bool read_xnodes (const Int np, const std::string& s, Real* const xnodes) { + const auto p = s.find("x"); + if (p == std::string::npos) return false; + std::stringstream sx(s.substr(p+1)); + for (Int i = 0; i < np; ++i) { + if (sx.rdstate() & std::istream::eofbit) return false; + sx >> xnodes[i]; + if (sx.rdstate() & std::istream::failbit) return false; + } + return true; +} + +class Basis : public UserInterpMethod { + Nodes nodes; + bool ok, free_nodal; + Real xnodes[islet::np_max]; + +public: + Basis (const std::string& basis) { + ok = nodes.init(basis); + if ( ! ok) { + printf("Invalid basis string: %s\n", basis.c_str()); + return; + } + free_nodal = read_xnodes(nodes.get_np(), basis, xnodes); + } + bool is_ok () const { return ok; } + void eval(const Real& x, Real* const v) override { + ::eval(nodes, get_xnodes(), x, v); + } + const Real* get_xnodes() const override { + return free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np()); + } + Int get_np() const override { return nodes.get_np(); } +}; + +} // namespace anon + +extern "C" { + +void offset_nodal_calc_xnodes_metrics (int np, int n, const int* subnp, const int* offst, + double* metrics) { + const auto nodes = make_offset_nodal(np, n, subnp, offst); + calc_xnodes_metrics(nodes, islet::get_x_gll(np), metrics); +} + +void calc_xnodes_metrics_from_basis_string (const char* basis, double* metrics) { + Nodes nodes; + const auto ok = nodes.init(basis); + if ( ! ok) { + printf("Invalid basis string: %s\n", basis); + return; + } + Real xnodes[islet::np_max]; + const auto free_nodal = read_xnodes(nodes.get_np(), basis, xnodes); + calc_xnodes_metrics(nodes, + free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np()), + metrics); +} + +void calc_lebesgue_consts_from_basis_string (const char* basis, double* metrics) { + Nodes nodes; + const auto ok = nodes.init(basis); + if ( ! ok) { + printf("Invalid basis string: %s\n", basis); + return; + } + Real xnodes[islet::np_max]; + const auto free_nodal = read_xnodes(nodes.get_np(), basis, xnodes); + calc_lebesgue_consts(nodes, + free_nodal ? xnodes : islet::get_x_gll_special(nodes.get_np()), + metrics); +} + +void run_thorough_diagnostics_from_basis_string (const char* basis) { + { + Real m[3]; + calc_xnodes_metrics_from_basis_string(basis, m); + printf("npm %1.4e %1.4e %1.4e\n", m[0], m[1], m[2]); + } + static const int ne_max = 11111; + pum::Options po; + po.threaded = true; + po.ntrial = 33; + po.mec_ne = 333; + po.perturb = 0.01; + printf("ne,ndx_max %d po.ntrial %d po.mec_ne %d po.perturb %1.4f\n", + ne_max, po.ntrial, po.mec_ne, po.perturb); + auto b = std::make_shared(basis); + if ( ! b->is_ok()) return; + { + MaxEigComputer mec; + const auto meam1 = mec.run(b->get_np(), ne_max, ne_max, 1e-13, true, b); + printf("meam1 %1.4e\n", meam1); + } + { + Real pum_max = 0; + printf("pum:"); fflush(stdout); + for (Int ne = 3; ne <= 15; ++ne) { + po.ne = ne; + pum::PerturbedUniformMeshMetric pum(b, po); + const auto pum_val = pum.run(); + printf(" %1.1e", pum_val); fflush(stdout); + pum_max = std::max(pum_max, pum_val); + } + printf("\npum_max %1.4e\n", pum_max); + } +} + +} // extern "C" diff --git a/methods/islet/islet_studymetrics.hpp b/methods/islet/islet_studymetrics.hpp new file mode 100644 index 0000000..9029932 --- /dev/null +++ b/methods/islet/islet_studymetrics.hpp @@ -0,0 +1,6 @@ +#ifndef INCLUDE_ISLET_STUDYMETRICS_HPP +#define INCLUDE_ISLET_STUDYMETRICS_HPP + +extern "C" void run_thorough_diagnostics_from_basis_string(const char* basis); + +#endif diff --git a/methods/islet/islet_tables.cpp b/methods/islet/islet_tables.cpp new file mode 100644 index 0000000..c66366f --- /dev/null +++ b/methods/islet/islet_tables.cpp @@ -0,0 +1,268 @@ +#include "islet_types.hpp" +#include "islet_util.hpp" +#include "islet_tables.hpp" + +namespace islet { + +static const Real sqrt5 = std::sqrt(5.0); +static const Real oosqrt5 = 1.0/sqrt5; +static const Real sqrt3o7 = std::sqrt(3.0/7.0); +static const Real np6a = std::sqrt(1.0/3.0 + 2.0*std::sqrt(7.0)/21.0); +static const Real np6b = std::sqrt(1.0/3.0 - 2.0*std::sqrt(7.0)/21.0); +static const Real np7a = std::sqrt((5.0 + 2.0*std::sqrt(5.0/3.0))/11.0); +static const Real np7b = std::sqrt((5.0 - 2.0*std::sqrt(5.0/3.0))/11.0); + +Real x_gll_table[] = { + -1, 1, + -1, 0, 1, + -1, -oosqrt5, oosqrt5, 1, + -1, -sqrt3o7, 0, sqrt3o7, 1, + -1, -np6a, -np6b, np6b, np6a, 1, + -1, -np7a, -np7b, 0, np7b, np7a, 1, + // The rest are obtained from Michels, H. H. "Abscissas and weight + // coefficients for Lobatto quadrature." Mathematics of Computation 17.83 + // (1963): 237-244. + // np 8 + -1, -0.8717401485096066153, -0.59170018143314230214, -0.20929921790247886877, + 0.20929921790247886877, 0.59170018143314230214, 0.87174014850960661534, 1, + // np 9 + -1, -0.89975799541146015731, -0.67718627951073775345, -0.36311746382617815871, + 0, 0.36311746382617815871, 0.67718627951073775345, 0.89975799541146015731, 1, + // np 10 + -1, -0.91953390816645881383, -0.73877386510550507500, -0.47792494981044449566, + -0.16527895766638702463, 0.16527895766638702463, 0.47792494981044449566, + 0.73877386510550507500, 0.91953390816645881383, 1, + // np 11 + -1, -0.93400143040805913433, -0.78448347366314441862, -0.56523532699620500647, + -0.29575813558693939143, 0, 0.29575813558693939143, 0.56523532699620500647, + 0.78448347366314441862, 0.93400143040805913433, 1, + // np 12 + -1, -0.94489927222288222341, -0.81927932164400667835, -0.63287615303186067766, + -0.39953094096534893226, -0.13655293285492755486, 0.13655293285492755486, + 0.39953094096534893226, 0.63287615303186067766, 0.81927932164400667835, + 0.94489927222288222341, 1, + // np 13 + -1, -0.95330984664216391190, -0.84634756465187231687, -0.68618846908175742607, + -0.48290982109133620175, -0.24928693010623999257, 0, 0.24928693010623999257, + 0.48290982109133620175, 0.68618846908175742607, 0.84634756465187231687, + 0.95330984664216391190, 1, + // np 16 + -1, -0.96956804627021793295, -0.89920053309347209299, -0.79200829186181506393, + -0.65238870288249308947, -0.48605942188713761178, -0.29983046890076320810, + -0.10132627352194944784, 0.10132627352194944784, 0.29983046890076320810, + 0.48605942188713761178, 0.65238870288249308947, 0.79200829186181506393, + 0.89920053309347209299, 0.96956804627021793295, 1 +}; + +Real w_gll_table[] = { + 1, 1, +#define v0 1.0/3.0 + v0, 4.0/3.0, v0, +#undef v0 +#define v0 1.0/6.0 +#define v1 5.0/6.0 + v0, v1, v1, v0, +#undef v0 +#undef v1 +#define v0 1.0/10.0 +#define v1 49.0/90.0 + v0, v1, 32.0/45.0, v1, v0, +#undef v0 +#undef v1 +#define v0 1.0/15.0 +#define v1 (14 - std::sqrt(7.0))/30.0 +#define v2 (14 + std::sqrt(7.0))/30.0 + v0, v1, v2, v2, v1, v0, +#undef v0 +#undef v1 +#undef v2 +#define v0 1.0/21.0 +#define v1 (124 - 7*std::sqrt(15.0))/350.0 +#define v2 (124 + 7*std::sqrt(15.0))/350.0 + v0, v1, v2, 256.0/525.0, v2, v1, v0, +#undef v0 +#undef v1 +#undef v2 + // The rest are obtained from the reference in x_gll_table. + // np 8 + 0.03571428571428571429, 0.21070422714350603938, 0.34112269248350436476, + 0.41245879465870388157, 0.41245879465870388157, 0.34112269248350436476, + 0.21070422714350603938, 0.03571428571428571429, + // np 9 + 0.02777777777777777778, 0.16549536156080552505, 0.27453871250016173528, + 0.34642851097304634512, 0.37151927437641723356, 0.34642851097304634512, + 0.27453871250016173528, 0.16549536156080552505, 0.02777777777777777778, + // np 10 + 0.02222222222222222222, 0.13330599085107011113, 0.22488934206312645212, + 0.29204268367968375788, 0.32753976118389745666, 0.32753976118389745666, + 0.29204268367968375788, 0.22488934206312645212, 0.13330599085107011113, + 0.02222222222222222222, + // np 11 + 0.01818181818181818182, 0.10961227326699486446, 0.18716988178030520411, + 0.24804810426402831404, 0.28687912477900808868, 0.30021759545569069379, + 0.28687912477900808868, 0.24804810426402831404, 0.18716988178030520411, + 0.10961227326699486446, 0.01818181818181818182, + // np 12 + 0.01515151515151515152, 0.09168451741319613067, 0.15797470556437011517, + 0.21250841776102114536, 0.25127560319920128029, 0.27140524091069617700, + 0.27140524091069617700, 0.25127560319920128029, 0.21250841776102114536, + 0.15797470556437011517, 0.09168451741319613067, 0.01515151515151515152, + // np 13 + 0.01282051282051282051, 0.07780168674681892779, 0.13498192668960834912, + 0.18364686520355009201, 0.22076779356611008609, 0.24401579030667635646, + 0.25193084933344673604, 0.24401579030667635646, 0.22076779356611008609, + 0.18364686520355009201, 0.13498192668960834912, 0.07780168674681892779, + 0.01282051282051282051, + // np 16 + 0.00833333333333333333, 0.05085036100591990540, 0.08939369732593080099, + 0.12425538213251409835, 0.15402698080716428081, 0.17749191339170412530, + 0.19369002382520358432, 0.20195830817822987149, 0.20195830817822987149, + 0.19369002382520358432, 0.17749191339170412530, 0.15402698080716428081, + 0.12425538213251409835, 0.08939369732593080099, 0.05085036100591990540, + 0.00833333333333333333 +}; + +/* These Gauss-Legendre tables were obtained using + $ gsl-config --version + 1.15 + with calls to + gsl_integration_glfixed_table_alloc + gsl_integration_glfixed_point + gsl_integration_glfixed_table_free. + */ + +Real x_gl_table[] = { + // np 1 + 0.000000000000000000, + // np 2 + -0.577350269189625731, 0.577350269189625731, + // np 3 + -0.774596669241483404, 0.000000000000000000, 0.774596669241483404, + // np 4 + -0.861136311594052573, -0.339981043584856257, 0.339981043584856257, + 0.861136311594052573, + // np 5 + -0.906179845938663964, -0.538469310105683108, 0.000000000000000000, + 0.538469310105683108, 0.906179845938663964, + // np 6 + -0.932469514203152050, -0.661209386466264482, -0.238619186083196905, + 0.238619186083196905, 0.661209386466264482, 0.932469514203152050, + // np 7 + -0.949107912342758486, -0.741531185599394460, -0.405845151377397184, + 0.000000000000000000, 0.405845151377397184, 0.741531185599394460, + 0.949107912342758486, + // np 8 + -0.960289856497536287, -0.796666477413626728, -0.525532409916328991, + -0.183434642495649808, 0.183434642495649808, 0.525532409916328991, + 0.796666477413626728, 0.960289856497536287, + // np 9 + -0.968160239507626086, -0.836031107326635770, -0.613371432700590358, + -0.324253423403808916, 0.000000000000000000, 0.324253423403808916, + 0.613371432700590358, 0.836031107326635770, 0.968160239507626086, + // np 10 + -0.973906528517171743, -0.865063366688984536, -0.679409568299024436, + -0.433395394129247213, -0.148874338981631216, 0.148874338981631216, + 0.433395394129247213, 0.679409568299024436, 0.865063366688984536, + 0.973906528517171743, + // np 11 + -0.978228658146056973, -0.887062599768095317, -0.730152005574049356, + -0.519096129206811807, -0.269543155952344959, 0.000000000000000000, + 0.269543155952344959, 0.519096129206811807, 0.730152005574049356, + 0.887062599768095317, 0.978228658146056973, + // np 12 + -0.981560634246719244, -0.904117256370474909, -0.769902674194304693, + -0.587317954286617483, -0.367831498998180184, -0.125233408511468913, + 0.125233408511468913, 0.367831498998180184, 0.587317954286617483, + 0.769902674194304693, 0.904117256370474909, 0.981560634246719244 +}; + +Real w_gl_table[] = { + // np 1 + 2.000000000000000000, + // np 2 + 1.000000000000000000, 1.000000000000000000, + // np 3 + 0.555555555555555580, 0.888888888888888840, 0.555555555555555580, + // np 4 + 0.347854845137453850, 0.652145154862546095, 0.652145154862546095, + 0.347854845137453850, + // np 5 + 0.236926885056189085, 0.478628670499366471, 0.568888888888888888, + 0.478628670499366471, 0.236926885056189085, + // np 6 + 0.171324492379170357, 0.360761573048138606, 0.467913934572691037, + 0.467913934572691037, 0.360761573048138606, 0.171324492379170357, + // np 7 + 0.129484966168869703, 0.279705391489276645, 0.381830050505118923, + 0.417959183673469403, 0.381830050505118923, 0.279705391489276645, + 0.129484966168869703, + // np 8 + 0.101228536290376259, 0.222381034453374482, 0.313706645877887269, + 0.362683783378361990, 0.362683783378361990, 0.313706645877887269, + 0.222381034453374482, 0.101228536290376259, + // np 9 + 0.081274388361574412, 0.180648160694857396, 0.260610696402935438, + 0.312347077040002863, 0.330239355001259782, 0.312347077040002863, + 0.260610696402935438, 0.180648160694857396, 0.081274388361574412, + // np 10 + 0.066671344308688138, 0.149451349150580587, 0.219086362515982042, + 0.269266719309996350, 0.295524224714752870, 0.295524224714752870, + 0.269266719309996350, 0.219086362515982042, 0.149451349150580587, + 0.066671344308688138, + // np 11 + 0.055668567116173663, 0.125580369464904612, 0.186290210927734262, + 0.233193764591990482, 0.262804544510246652, 0.272925086777900616, + 0.262804544510246652, 0.233193764591990482, 0.186290210927734262, + 0.125580369464904612, 0.055668567116173663, + // np 12 + 0.047175336386511828, 0.106939325995318427, 0.160078328543346221, + 0.203167426723065925, 0.233492536538354806, 0.249147045813402773, + 0.249147045813402773, 0.233492536538354806, 0.203167426723065925, + 0.160078328543346221, 0.106939325995318427, 0.047175336386511828 +}; + +bool get_gll_supported (const Int np) { return np >= 2 && np <= 13 || np == 16; } + +const Real* get_x_gll (const Int np) { + throw_if(np < 2, "get_x_gll: np < 2 not supported."); + if (np <= 13) + return x_gll_table + (np*(np-1))/2 - 1; + else + return get_x_gll_special(np); +} + +const Real* get_w_gll (const Int np) { + throw_if(np < 2, "get_x_gll: np < 2 not supported."); + if (np <= 13) + return w_gll_table + (np*(np-1))/2 - 1; + else + return get_w_gll_special(np); +} + +const Real* get_x_gll_special (const Int np) { + if (np <= 13) return get_x_gll(np); + throw_if(np != 16, "np 16 only is supported"); + const auto end = x_gll_table + (14*(14-1))/2 - 1; + return end; +} + +const Real* get_w_gll_special (const Int np) { + if (np <= 13) return get_w_gll(np); + throw_if(np != 16, "np 16 only is supported"); + const auto end = w_gll_table + (14*(14-1))/2 - 1; + return end; +} + +const Real* get_x_gl (const Int np) { + throw_if(np < 1, "get_x_gll: np < 1 not supported."); + throw_if(np > 12, "get_x_gll: np > 12 not supported."); + return x_gl_table + (np*(np-1))/2; +} + +const Real* get_w_gl (const Int np) { + throw_if(np < 1, "get_x_gll: np < 1 not supported."); + throw_if(np > 12, "get_w_gll: np > 12 not supported."); + return w_gl_table + (np*(np-1))/2; +} + +} // namespace islet diff --git a/methods/islet/islet_tables.hpp b/methods/islet/islet_tables.hpp new file mode 100644 index 0000000..04a5c7d --- /dev/null +++ b/methods/islet/islet_tables.hpp @@ -0,0 +1,27 @@ +#ifndef INCLUDE_ISLET_TABLES_HPP +#define INCLUDE_ISLET_TABLES_HPP + +#include "islet_types.hpp" + +namespace islet { +static const Int np_max = 16; + +// Gauss-Lobatto-Legendre +bool get_gll_supported(const Int np); +const Real* get_x_gll(const Int np); +const Real* get_w_gll(const Int np); +const Real* get_x_gll_special(const Int np); +const Real* get_w_gll_special(const Int np); +// Gauss-Legendre +const Real* get_x_gl (const Int np); +const Real* get_w_gl (const Int np); + +// Gauss-Lobatto-Legendre +extern Real x_gll_table[]; +extern Real w_gll_table[]; +// Gauss-Legendre +extern Real x_gl_table[]; +extern Real w_gl_table[]; +} + +#endif diff --git a/methods/islet/islet_types.hpp b/methods/islet/islet_types.hpp new file mode 100644 index 0000000..3137009 --- /dev/null +++ b/methods/islet/islet_types.hpp @@ -0,0 +1,12 @@ +#ifndef INCLUDE_ISLET_TYPES_HPP +#define INCLUDE_ISLET_TYPES_HPP + +#include + +typedef int Int; +typedef double Real; +typedef std::complex Complex; +typedef int fint; +typedef Int Size; + +#endif diff --git a/methods/islet/islet_util.hpp b/methods/islet/islet_util.hpp new file mode 100644 index 0000000..6c17e3b --- /dev/null +++ b/methods/islet/islet_util.hpp @@ -0,0 +1,264 @@ +#ifndef INCLUDE_ISLET_UTIL_HPP +#define INCLUDE_ISLET_UTIL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "islet_types.hpp" + +namespace islet { +#define throw_if(condition, message) do { \ + if (condition) { \ + std::stringstream _ss_; \ + _ss_ << __FILE__ << ":" << __LINE__ << ": The condition:\n" \ + << #condition "\nled to the exception\n" << message << "\n"; \ + throw std::logic_error(_ss_.str()); \ + } \ + } while (0) + +#define require(condition) do { \ + if ( ! (condition)) { \ + std::stringstream _ss_; \ + _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition \ + << "\n"; \ + throw std::logic_error(_ss_.str()); \ + } \ + } while (0) +#define require_msg(condition, message) do { \ + if ( ! (condition)) { \ + std::stringstream _ss_; \ + _ss_ << __FILE__ << ":" << __LINE__ << ": FAIL:\n" << #condition \ + << "\nmessage:\n" << message << "\n"; \ + throw std::logic_error(_ss_.str()); \ + } \ + } while (0) + +template inline T sign (const T& a) { return a >= 0 ? 1 : -1; } +template inline constexpr T square (const T& x) { return x*x; } +inline Real reldif (const Real a, const Real b, const Real abstol = 0) +{ return std::abs(b - a)/(abstol + std::abs(a)); } + +#define pr(m) do { \ + std::stringstream _ss_; \ + _ss_ << m << std::endl; \ + std::cerr << _ss_.str(); \ + } while (0) +#define prc(m) pr(#m << " | " << (m)) +#define puf(m) "(" << #m << " " << (m) << ")" +#define pu(m) << " " << puf(m) + +template +static void prarr (const std::string& name, const T* const v, const size_t n) { + std::stringstream ss; + ss << name << " = ["; + for (size_t i = 0; i < n; ++i) ss << " " << v[i]; + ss << "]"; + pr(ss.str()); +} +template +static void prarr (const std::string& name, const Array& a) { + prarr(name, a.data(), a.size()); +} + +#define mprarr(a) prarr(#a, a) + +/*! \brief RAII std stream state saver. + * + * Example: Preserve std::cout's state so manipulations don't affect others' use + * of cout. + */ +template class IosSaver { + Stream& s_; + std::ios state_; +public: + IosSaver (Stream& s) : s_(s), state_(nullptr) { state_.copyfmt(s); } + IosSaver (const IosSaver& ios) : s_(ios.s_), state_(nullptr) + { state_.copyfmt(ios.state_); } + IosSaver operator= (const IosSaver&) = delete; + ~IosSaver () { s_.copyfmt(state_); } +}; +template inline IosSaver save_ios (Stream& s) +{ return IosSaver(s); } + +inline double urand () { return rand() / ((double) RAND_MAX + 1.0); } + +template +bool write (std::ofstream& os, const T s) { + return ! os.write((const char*) &s, sizeof(T)).bad(); +} + +template +bool write (std::ofstream& os, const Int n, const T* const d) { + return (write(os, n) && + ! os.write((const char*) d, n*sizeof(T)).bad()); +} + +template +bool read (std::ifstream& os, T& s) { + return ! os.read((char*) &s, sizeof(T)).bad(); +} + +template +bool read (std::ifstream& os, Int& n, T* const d) { + return (read(os, n) && + ! os.read((char*) d, n*sizeof(T)).bad()); +} + +template class Array { + T* p_; + std::size_t n_, cap_; +public: + Array () { init(); } + Array(std::size_t n); + Array(std::size_t n, const T& init); + Array(const Array& a); + ~Array () { clear(); } + // Initialize the object with the assumption that all variables are uninit'ed + // prior to calling. + void init(); + void clear(); + // optclear means optionally clear. The function has the semantics of + // clearing, but it may not actually release the memory. + void optclear_and_resize(std::size_t n); + // _ft indicates first touch. + void optclear_and_resize_ft(std::size_t n); + void optclear_and_resize(std::size_t n, const T& i); + void optclear_and_reserve(std::size_t n); + void optclear_and_reserve_ft(std::size_t n); + T& operator[] (std::size_t i) { return p_[i]; } + const T& operator[] (std::size_t i) const { return p_[i]; } + T& back () { return p_[n_-1]; } + const T& back () const { return p_[n_-1]; } + std::size_t size () const { return n_; } + bool empty () const { return size() == 0; } + T* data () const { return p_; } + // This does not realloc; reserve must provide the necessary memory. It does + // not throw, either. It asserts. + void unsafe_push_back(const T& e); + T* begin () { return p_; } + T* end () { return p_ + n_; } + const T* begin () const { return p_; } + const T* end () const { return p_ + n_; } + void set (const T& v) { for (std::size_t i = 0; i < n_; ++i) p_[i] = v; } +}; + +template inline int len (const Array& v) +{ return static_cast(v.size()); } + +template inline void touch (T* const p, const size_t n, + const T& init = T()) { + // 1 KB should be a safe lower bound on page size. Touch enough to touch every + // page; I don't think there's any need to touch more memory than that. + for (size_t i = 0; i < n; i += 1024 / sizeof(T)) + p[i] = init; + // Make sure the last part is touched. + if (n) p[n-1] = init; +} +template inline T* +allocn (const size_t n, const bool first_touch = false) { + if ( ! n) return 0; + T* p = new T[n]; + if (first_touch) touch(p, n); + return p; +} +template inline void deln (T*& p) { + if (p) delete[] p; + p = 0; +} +template inline void deln_const (const T* p) { + if (p) delete[] p; +} +template inline void del (T*& p) { + if (p) delete p; + p = 0; +} + +template +inline void Array::init () { + n_ = cap_ = 0; + p_ = 0; +} + +template +inline Array::Array (std::size_t n) + : p_(0), n_(0), cap_(0) +{ optclear_and_resize(n); } + +template +inline Array::Array (std::size_t n, const T& init) + : p_(0), n_(0), cap_(0) +{ optclear_and_resize(n, init); } + +template +inline Array::Array (const Array& a) { + init(); + optclear_and_resize(a.size()); + std::copy(a.begin(), a.end(), begin()); +} + +template +inline void Array::clear () { + n_ = cap_ = 0; + deln(p_); +} + +template +inline void Array::optclear_and_reserve (std::size_t n) { + n_ = 0; + if (n <= cap_) return; + clear(); + p_ = allocn(n); + cap_ = n; +} + +template +inline void Array::optclear_and_reserve_ft (std::size_t n) { + n_ = 0; + if (n <= cap_) return; + clear(); + p_ = allocn(n, true); + cap_ = n; +} + +template +inline void Array::optclear_and_resize (std::size_t n) { + if (n <= cap_) { + n_ = n; + return; + } + optclear_and_reserve(n); + n_ = n; +} + +template +inline void Array::optclear_and_resize_ft (std::size_t n) { + if (n <= cap_) { + n_ = n; + return; + } + optclear_and_reserve_ft(n); + n_ = n; +} + +template +inline void Array::optclear_and_resize (std::size_t n, const T& init) { + optclear_and_resize(n); + for (std::size_t i = 0; i < n_; ++i) + memcpy(p_ + i, &init, sizeof(init)); +} + +template +inline void Array::unsafe_push_back (const T& e) { + assert(n_ < cap_); + p_[n_++] = e; +} + +} // namespace islet + +#endif diff --git a/methods/islet/islet_xnodes_metrics.cpp b/methods/islet/islet_xnodes_metrics.cpp new file mode 100644 index 0000000..9c516ab --- /dev/null +++ b/methods/islet/islet_xnodes_metrics.cpp @@ -0,0 +1,257 @@ +#include "islet_xnodes_metrics.hpp" +#include "islet_tables.hpp" +#include "islet_npx.hpp" +#include "islet_maxeigcomp.hpp" +#include "islet_pum.hpp" +#include "islet_isl.hpp" +#include "islet_util.hpp" + +static Real factorial (const Int n) { + Real f = 1; + for (Int i = 2; i <= n; ++i) f *= i; + return f; +} + +void calc_xnodes_metrics (const Nodes& nodes, const Real* const xnodes, Real* metrics) { + const Int np = nodes.get_np(), nph = np/2, nseg = 100; + Real npm1 = 0, npm2 = 0, npm_max = 0; + for (Int ireg = 0; ireg < nph; ++ireg) { + const bool center = np % 2 == 0 && ireg == nph-1; + const auto xs = xnodes[ireg], xe = xnodes[ireg+1]; + const auto subnp = nodes.get_subnp()[ireg]; + const auto active = nodes.get_nodes()[ireg]; + Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0; + for (Int seg = 0; seg < nseg; ++seg) { + const auto x = xs + (seg + 0.5)*(xe - xs)/nseg; + Real f = 1; + for (Int i = 0; i < subnp; ++i) + f *= x - xnodes[active[i]]; + npm1_reg += std::abs(f); + npm2_reg += islet::square(f); + npm_max_reg = std::max(npm_max_reg, std::abs(f)); + } + const auto fac = factorial(subnp); + const auto f = (center ? 1 : 2)*(xe - xs)/fac/nseg; + npm1 += f*npm1_reg; + npm2 += f*npm2_reg/fac; // need an extra fac b/c of square + npm_max = std::max(npm_max, npm_max_reg/fac); + } + metrics[0] = npm1; + metrics[1] = std::sqrt(npm2); + metrics[2] = npm_max; +} + +Real calc_xnodes_metric (const Nodes& nodes, const Real* const xnodes) { + Real metrics[3]; + calc_xnodes_metrics(nodes, xnodes, metrics); + return metrics[0]; +} + +void calc_lebesgue_consts (const Nodes& nodes, const Real* const xnodes, Real* metrics) { + const Int np = nodes.get_np(), nph = np/2, nseg = 100; + Real npm1 = 0, npm2 = 0, npm_max = 0; + for (Int ireg = 0; ireg < nph; ++ireg) { + const bool center = np % 2 == 0 && ireg == nph-1; + const auto xs = xnodes[ireg], xe = xnodes[ireg+1]; + const auto subnp = nodes.get_subnp()[ireg]; + const auto active = nodes.get_nodes()[ireg]; + Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0; + for (Int seg = 0; seg < nseg; ++seg) { + const auto x = xs + (seg + 0.5)*(xe - xs)/nseg; + Real f = 0; + for (Int i = 0; i < subnp; ++i) { + Real g = 1; + for (Int j = 0; j < subnp; ++j) { + if (j == i) continue; + g *= (x - xnodes[active[j]])/(xnodes[active[i]] - xnodes[active[j]]); + } + f += std::abs(g); + } + npm1_reg += f; + npm2_reg += islet::square(f); + npm_max_reg = std::max(npm_max_reg, f); + } + const auto f = (center ? 1 : 2)*(xe - xs)/nseg; + npm1 += f*npm1_reg; + npm2 += f*npm2_reg; // need an extra fac b/c of square + npm_max = std::max(npm_max, npm_max_reg); + } + metrics[0] = npm1; + metrics[1] = std::sqrt(npm2); + metrics[2] = npm_max; +} + +MetricsTracker::MetricsTracker (const Int np, bool very_strict) { + pum_min = pum_max = 1; + const Real fac = std::pow(std::numeric_limits::epsilon(), 1.0/nbin); + pum_bins[0] = 1; + for (Int i = 0; i < nbin; ++i) pum_bins[i+1] = pum_bins[i]*fac; + //islet::prarr("pum_bins",pum_bins,nbin); + // From findbasic. + Real iv[3]; + if (false) { + switch (np) { + case 4: iv[0] = 1.575830e-02; iv[1] = 1.278167e-02; iv[2] = 1.510916e-02; break; + case 5: iv[0] = 2.549179e-03; iv[1] = 2.582596e-03; iv[2] = 4.154765e-03; break; + case 6: iv[0] = 2.393393e-04; iv[1] = 2.104595e-04; iv[2] = 2.816403e-04; break; // use early findcombo results too + case 7: iv[0] = 5.557714e-05; iv[1] = 4.790768e-05; iv[2] = 6.934868e-05; break; + case 8: iv[0] = 7.265137e-06; iv[1] = 7.988089e-06; iv[2] = 1.618560e-05; break; + case 9: iv[0] = 7.860606e-07; iv[1] = 7.683143e-07; iv[2] = 1.179540e-06; break; + case 10: iv[0] = 1.075794e-07; iv[1] = 9.532486e-08; iv[2] = 1.540700e-07; break; + case 11: iv[0] = 1.589070e-08; iv[1] = 1.867321e-08; iv[2] = 3.345386e-08; break; + case 12: iv[0] = 6.963036e-10; iv[1] = 8.920290e-10; iv[2] = 1.715838e-09; break; + case 13: iv[0] = 4.127583e-11; iv[1] = 4.809655e-11; iv[2] = 9.223544e-11; break; + default: iv[0] = iv[1] = iv[2] = 1; + } + // 12 is taking too long, and incomplete search strongly suggests we can + // restrict our attention to pum < 1e-6. + if (np == 12) set_pum_max(1e-6); + } else { + iv[0] = iv[1] = iv[2] = 1; + } + if (very_strict) { + // Use GLL nodal search results to make this as small as possible. + Real pum_max; + switch (np) { + case 4: pum_max = 2.6609e-15; break; + case 5: pum_max = 1.0509e-07; break; + case 6: pum_max = 1.0809e-09; break; + case 7: pum_max = 4.7909e-09; break; + case 8: pum_max = 8.8109e-09; break; + case 9: pum_max = 3.6409e-09; break; + case 10: pum_max = 1.4409e-08; break; + case 11: pum_max = 3.5009e-07; break; + case 12: pum_max = 1.4509e-07; break; + default: assert(0); + } + set_pum_max(pum_max); + } + for (Int i = 0; i < nmet*nbin; ++i) best_metrics[i] = iv[i % nmet]; +} + +void MetricsTracker::set_pum_max (const Real pum_max_) { + pum_max = pum_max_; + assert(pum_max <= 1 && pum_max > 0); +} + +bool MetricsTracker +::acceptable_metrics (const Nodes& nodes, const Real* xnodes, + const Real* metrics) const { + for (Int i = 0; i < nmet*nbin; ++i) + if (metrics[i % nmet] < best_metrics[i]) + return true; + return false; +} + +Real MetricsTracker +::pum_to_accept (const Nodes& nodes, const Real* xnodes, + const Real* metrics) const { + for (Int i = 0; i < nmet*nbin; ++i) + if (metrics[i % nmet] < best_metrics[i]) + return std::min(pum_max, pum_bins[i/nmet]); + return 0; +} + +bool MetricsTracker +::would_update (const Real* metrics, const Real& pum) const { + if (pum > pum_max) return false; + Int bin; + for (bin = 0; bin < nbin; ++bin) + if (bin == nbin-1 || pum >= pum_bins[bin+1]) + break; + for (Int i = 0; i < nmet; ++i) + if (metrics[i] < best_metrics[nmet*bin + i]) + return true; + return false; +} + +void MetricsTracker::update (const Real* metrics, const Real& pum) { + bool updated = false; + for (Int bin = 0; bin < nbin; ++bin) { + if (pum > pum_bins[bin]) break; + for (Int i = 0; i < nmet; ++i) + if (metrics[i] < best_metrics[nmet*bin + i]) { + best_metrics[nmet*bin + i] = metrics[i]; + updated = true; + } + } + if (updated) pum_min = std::min(pum_min, pum); +} + +void MetricsTracker::get_metrics (Real pum, Real* metrics) const { + Int bin; + for (bin = 0; bin < nbin; ++bin) + if (pum > pum_bins[bin]) break; + bin = std::max(0, bin-1); + for (Int i = 0; i < nmet; ++i) + metrics[i] = best_metrics[nmet*bin + i]; +} + +bool MetricsTracker::write (std::ofstream& os) { + using islet::write; + return (write(os, nmet) && + write(os, nbin) && + write(os, nmet*nbin, best_metrics) && + write(os, nbin+1, pum_bins) && + write(os, pum_max) && + write(os, pum_min)); +} + +bool MetricsTracker::read (std::ifstream& os) { + using islet::read; + Int lnmet, lnbin, n; + const bool ok = (read(os, lnmet) && lnmet == nmet && + read(os, lnbin) && lnbin == nbin && + read(os, n, best_metrics) && n == nmet*nbin && + read(os, n, pum_bins) && n == nbin+1 && + read(os, pum_max) && + read(os, pum_min)); + return ok; +} + +static void symmetrize (const Int n, Real* x) { + const Int nh = n/2; + for (Int i = 0; i < nh; ++i) x[n-1-i] = -x[i]; +} + +void calc_weights (const Nodes& nodes, const Real* const xnode, Real* const wt) { + // Quadrature coefficients. + const Int qn = 7; + const Real* const qx = islet::get_x_gll(qn); + const Real* const qw = islet::get_w_gll(qn); + const Int np = nodes.get_np(); + Real v[islet::np_max], integral[islet::np_max] = {0}; + for (Int ireg = 0; ireg < np-1; ++ireg) { + Real reg_integral[islet::np_max] = {0}; + for (Int qi = 0; qi < qn; ++qi) { + const auto alpha = 0.5*(qx[qi] + 1); + const auto x = (1 - alpha)*xnode[ireg] + alpha*xnode[ireg+1]; + eval(nodes.get_np(), nodes.include_bdy(), xnode, + nodes.get_subnp(), nodes.get_nodes(), x, v); + for (Int i = 0; i < np; ++i) + reg_integral[i] += qw[qi]*v[i]; + } + const auto fac = 0.5*(xnode[ireg+1] - xnode[ireg]); + for (Int i = 0; i < np; ++i) + integral[i] += fac*reg_integral[i]; + } + // Numerically symmetrize. + for (Int ireg = 0; ireg < np/2; ++ireg) { + // std::min is to prevent a spurious -Warray-bounds warning. + const Int other = std::min(islet::np_max-1, np-ireg-1); + integral[ireg] = integral[other] = + 0.5*(integral[ireg] + integral[other]); + } + for (Int i = 0; i < np; ++i) wt[i] = integral[i]; +} + +static bool has_all_positive_weights (const Nodes& nodes, const Real* const xnode) { + Real wt[islet::np_max]; + calc_weights(nodes, xnode, wt); + bool pve = true; + const Int np = nodes.get_np(); + for (Int i = 0; i < np; ++i) + if (wt[i] <= 0) + pve = false; + return pve; +} diff --git a/methods/islet/islet_xnodes_metrics.hpp b/methods/islet/islet_xnodes_metrics.hpp new file mode 100644 index 0000000..14bcbe3 --- /dev/null +++ b/methods/islet/islet_xnodes_metrics.hpp @@ -0,0 +1,53 @@ +#ifndef INCLUDE_ISLET_XNODES_METRICS_HPP +#define INCLUDE_ISLET_XNODES_METRICS_HPP + +#include "islet_types.hpp" +#include "islet_nodalbasis.hpp" + +#include +#include + +// l1 only +Real calc_xnodes_metric(const Nodes& nodes, const Real* const xnodes); +// l1, l2, linf +void calc_xnodes_metrics(const Nodes& nodes, const Real* const xnodes, Real* metrics); + +void calc_lebesgue_consts(const Nodes& nodes, const Real* const xnodes, Real* metrics); + +void calc_weights(const Nodes& nodes, const Real* const xnode, Real* const wt); + +struct MetricsTracker { + typedef std::shared_ptr Ptr; + + MetricsTracker(Int np, bool very_strict = false); + + void set_pum_max(Real pum); // optional; default is 1 + Real get_pum_max () const { return pum_max; } + + // Min pum seen so far. If none, return 1. + Real get_pum_min () const { return pum_min; } + + // Compute metrics. Return whether these are provisionally acceptable. + bool acceptable_metrics(const Nodes& nodes, const Real* xnodes, + const Real* metrics) const; + // pum needs to be <= this value to update. + Real pum_to_accept(const Nodes& nodes, const Real* xnodes, + const Real* metrics) const; + // Would update based on metrics and pum? + bool would_update(const Real* metrics, const Real& pum) const; + // Do the update. + void update(const Real* metrics, const Real& pum); + + void get_metrics(Real pum, Real* metrics) const; + + bool write(std::ofstream& os); + bool read(std::ifstream& os); + +private: + static const Int nmet = 3, nbin = 30; + Real best_metrics[nmet*nbin]; // l1, l2, linf + Real pum_bins[nbin+1]; + Real pum_max, pum_min; +}; + +#endif diff --git a/methods/islet/make-depends.sh b/methods/islet/make-depends.sh new file mode 100644 index 0000000..3f50949 --- /dev/null +++ b/methods/islet/make-depends.sh @@ -0,0 +1,3 @@ +for i in *.cpp; do + g++ -MM $i +done > make.depends diff --git a/methods/islet/make.depends b/methods/islet/make.depends new file mode 100644 index 0000000..c910667 --- /dev/null +++ b/methods/islet/make.depends @@ -0,0 +1,40 @@ +cslunstab.o: cslunstab.cpp +islet_isl.o: islet_isl.cpp islet_tables.hpp islet_types.hpp \ + islet_util.hpp islet_isl.hpp islet_interpmethod.hpp \ + islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_npx.hpp +islet_maxeigcomp.o: islet_maxeigcomp.cpp islet_maxeigcomp.hpp \ + islet_types.hpp islet_util.hpp islet_interpmethod.hpp islet_npx.hpp \ + islet_tables.hpp +islet_nodalbasis.o: islet_nodalbasis.cpp islet_nodalbasis.hpp \ + islet_types.hpp islet_util.hpp islet_npx.hpp islet_tables.hpp \ + islet_interpmethod.hpp +islet_np4.o: islet_np4.cpp islet_np4.hpp islet_types.hpp islet_isl.hpp \ + islet_interpmethod.hpp +islet_npx.o: islet_npx.cpp islet_npx.hpp islet_util.hpp islet_types.hpp \ + islet_tables.hpp islet_interpmethod.hpp +islet_pum.o: islet_pum.cpp islet_pum.hpp islet_types.hpp \ + islet_maxeigcomp.hpp islet_util.hpp islet_interpmethod.hpp islet_npx.hpp \ + islet_tables.hpp islet_isl.hpp +islet_studymetrics.o: islet_studymetrics.cpp islet_pum.hpp \ + islet_types.hpp islet_maxeigcomp.hpp islet_util.hpp \ + islet_interpmethod.hpp islet_npx.hpp islet_tables.hpp \ + islet_xnodes_metrics.hpp islet_nodalbasis.hpp +islet_tables.o: islet_tables.cpp islet_types.hpp islet_util.hpp \ + islet_tables.hpp +islet_xnodes_metrics.o: islet_xnodes_metrics.cpp islet_xnodes_metrics.hpp \ + islet_types.hpp islet_nodalbasis.hpp islet_util.hpp islet_tables.hpp \ + islet_npx.hpp islet_interpmethod.hpp islet_maxeigcomp.hpp islet_pum.hpp \ + islet_isl.hpp +pum_sweep.o: pum_sweep.cpp islet_isl.hpp islet_types.hpp \ + islet_interpmethod.hpp islet_pum.hpp islet_maxeigcomp.hpp islet_util.hpp \ + islet_npx.hpp islet_tables.hpp +run_meam1_sweep.o: run_meam1_sweep.cpp islet_isl.hpp islet_types.hpp \ + islet_interpmethod.hpp islet_maxeigcomp.hpp islet_util.hpp islet_npx.hpp \ + islet_tables.hpp islet_pum.hpp +run_np4.o: run_np4.cpp islet_np4.hpp islet_types.hpp islet_isl.hpp \ + islet_interpmethod.hpp islet_util.hpp islet_npx.hpp islet_tables.hpp \ + islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_maxeigcomp.hpp \ + islet_pum.hpp +search.o: search.cpp islet_tables.hpp islet_types.hpp islet_npx.hpp \ + islet_util.hpp islet_interpmethod.hpp islet_maxeigcomp.hpp \ + islet_xnodes_metrics.hpp islet_nodalbasis.hpp islet_pum.hpp diff --git a/methods/islet/make.inc.gnu b/methods/islet/make.inc.gnu new file mode 100644 index 0000000..acd2a46 --- /dev/null +++ b/methods/islet/make.inc.gnu @@ -0,0 +1,2 @@ +CXXFLAGS = -g -O3 -std=c++11 -fPIC -fopenmp +LINK_LAPACK_BLAS = -llapack -lblas diff --git a/methods/islet/pum_sweep.cpp b/methods/islet/pum_sweep.cpp new file mode 100644 index 0000000..f0c8ab9 --- /dev/null +++ b/methods/islet/pum_sweep.cpp @@ -0,0 +1,56 @@ +#include + +#include "islet_isl.hpp" +#include "islet_pum.hpp" + +// Illustrate why a 1-element hop is the key thing to study. Report meam1 at x +// in [-1,1] if meam1 >= tol. x, meam1 are 1-1. +static void run (const islet::Operator::ConstPtr& op, + const Int np, const Int nx, const Int ntrial, + std::map& dx2meam1) { + const auto oim = std::make_shared(np, op); + printf("%s\n", op->get_basis_string(np).c_str()); + const auto x = op->get_xnodes(np); + printf("x:"); + for (Int i = np/2; i < np; ++i) + printf(" %7.5f", x[i]); + printf("\n"); + pum::Options o; + o.threaded = true; + o.perturb = 0.01; + for (const Int mec_ne : {3, 33, 333}) + for (const Int ne : {4, 7, 15}) { + printf("ntrial %d mec_ne %d ne %d\n", ntrial, mec_ne, ne); + o.ntrial = ntrial; + o.mec_ne = mec_ne; + o.ne = ne; + pum::PerturbedUniformMeshMetric pum(oim, o); + pum.sweep_and_collect_amplitudes(nx, 1e-13, dx2meam1, false); + } + printf("final\n"); + for (const auto& e : dx2meam1) + printf("dx %1.16e meam1 %1.16e\n", e.first, e.second); +} + +int main (int argc, char** argv) { + if (argc < 5) { + printf("%s np nx ntrial (0 - natural, 1 - gll_best, 2 - uniform)\n", argv[0]); + return -1; + } + const Int np = std::atoi(argv[1]); + const Int nx = std::atoi(argv[2]); + const Int ntrial = std::atoi(argv[3]); + const Int opcode = std::atoi(argv[4]); + if (np < 4 || nx < 2 || ntrial < 1 || opcode < 0 || opcode > 2) { + printf("bad input"); + return -1; + } + const auto op = islet::Operator::create(opcode == 0 ? + islet::Operator::gll_natural : + opcode == 1 ? + islet::Operator::gll_best : + islet::Operator::uniform_offset_nodal_subset); + std::map dx2meam1; + run(op, np, nx, ntrial, dx2meam1); + return 0; +} diff --git a/methods/islet/readme.txt b/methods/islet/readme.txt new file mode 100644 index 0000000..63b8f67 --- /dev/null +++ b/methods/islet/readme.txt @@ -0,0 +1,77 @@ +This directory and the directory methods/slmm contain the code used to generate +the results in the Islet 2D paper, except the E3SM code for the GPU performance +results. + +The directory methods/islet/figures contains the scripts used to generate data +and figures. The file "figures/figs.tex" contains the latex for the figures. +Comments before each figure explain how to generate the data and then the figure +from these data. Bash and hy scripts are those in the "figures" directory. + +Programs need BLAS, LAPACK, and for slmmir, Kokkos +(https://github.com/kokkos/kokkos). NetCDF is optional and was not used for the +results in this paper. We used Kokkos version 3.1 +(https://github.com/kokkos/kokkos/tree/3.3.01) in our build. + +For the methods/islet programs, on a standard Linux system with GNU compiler suite, + ln -s make.inc.gnu make.inc + make + +The program "cslunstab" demonstrates the unstable classical cubic interpolation +semi-Lagrangian instances. Running it should produce no output, as in this case +all assertions pass. The program is self-contained and is meant to be read. See +the top of cslunstab.cpp for instructions. + +The program "search" is used to find the Islet bases. Run as follows, in this +example for np = 8: + OMP_NUM_THREADS=48 KMP_AFFINITY=balanced ./search findnodal_given_bestosn 8 +This produces output of the following form: + + np 8 + min_np 8 + min_np 7 + min_np 6 + meam1 4.9e-15 mcV 9.6e+01 mdef 1.0e+00 w>0 1 wtr 9.44e+00 npm 7.27e-06 7.99e-06 1.62e-05 pum 5.41e-08 | np 8 subnp 6 6 7 6 offst 0 0 0 1 + meam1 4.4e-15 mcV 1.3e+02 mdef 1.0e+00 w>0 1 wtr 7.48e+00 npm 1.08e-05 9.61e-06 1.62e-05 pum 1.60e-08 | np 8 subnp 6 6 6 6 offst 0 0 0 1 + min_np 5 + meam1 4.9e-15 mcV 1.3e+02 mdef 1.0e+00 w>0 1 wtr 8.31e+00 npm 7.21e-06 8.39e-06 1.62e-05 pum 1.10e-06 | np 8 subnp 5 7 7 6 offst 0 0 0 1 + meam1 4.7e-15 mcV 2.5e+02 mdef 1.0e+00 w>0 1 wtr 1.06e+01 npm 1.46e-05 1.42e-05 2.16e-05 pum 5.78e-09 | np 8 subnp 5 5 7 6 offst 0 0 0 1 + meam1 4.2e-15 mcV 4.1e+02 mdef 1.0e+00 w>0 1 wtr 1.89e+01 npm 5.08e-05 5.51e-05 8.70e-05 pum 3.04e-09 | np 8 subnp 5 6 5 6 offst 0 0 1 1 + np 8 + min_np 6 max_np 8 + min_np 6 max_np 7 + meam1 1.55e-15 w>0 1 wtr 9.30e+00 npm 7.51e-06 8.07e-06 1.62e-05 pum 2.71e-08 | np 8 subnp 6 6 7 6 nodes | 0 1 2 3 5 6 | 0 1 2 3 4 5 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6 + meam1 1.78e-15 w>0 1 wtr 9.60e+00 npm 7.71e-06 8.20e-06 1.62e-05 pum 4.29e-09 | np 8 subnp 6 6 7 6 nodes | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6 + meam1 1.78e-15 w>0 1 wtr 9.58e+00 npm 7.74e-06 8.21e-06 1.62e-05 pum 3.65e-08 | np 8 subnp 6 6 7 6 nodes | 0 1 2 3 4 7 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 1 2 3 4 5 6 + ... + meam1 8.88e-16 w>0 1 wtr 8.74e+00 npm 9.32e-06 1.05e-05 2.13e-05 pum 1.17e-09 | np 8 subnp 6 6 7 6 nodes | 0 1 2 3 5 7 | 0 1 2 3 4 6 | 0 1 2 3 4 5 6 | 0 2 3 4 5 7 + meam1 1.55e-15 w>0 1 wtr 8.83e+00 npm 9.45e-06 1.05e-05 2.13e-05 pum 7.66e-10 | np 8 subnp 6 6 7 6 nodes | 0 1 2 3 4 7 | 0 1 2 3 4 6 | 0 1 2 3 4 6 7 | 0 2 3 4 5 7 + min_np 6 max_np 6 + meam1 1.78e-15 w>0 1 wtr 7.15e+00 npm 1.24e-05 1.07e-05 1.62e-05 pum 1.02e-09 | np 8 subnp 6 6 6 6 nodes | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 0 1 2 3 4 6 | 1 2 3 4 5 6 + min_np 7 max_np 8 + min_np 7 max_np 7 + min_np 8 max_np 8 + +In this output, each line beginning with "meam1" corresponds to a t.p.s. basis. +"meam1" means "maximum eigenvalue amplitude minus 1", and the following value +is log10 of this quantity. It should be near machine precision. Then come a few +unused entries. Next is "w>0", which reports that all basis weights are > 0. +"wtr" is unused. "npm" lists the a_1,2,infty values for the basis. "pum" gives +the lambda_max^PUM value. After the "|" is the encoding of the basis, either +o.n.s. ("offst" is in the encoding) or general n.s. + +For the methods/slmm/slmmir program, modify make.inc to point to your Kokkos +installation, then + make +Optionally run regression tests: + python2 slmm_runtests.py +Bash scripts in the methods/islet/figures directory call the slmmir program. + +We use the language hy to create the figures. hy is a Lisp that compiles to +Python AST. We used hy 0.18.0 ('pip install hy' for the latest version) with +CPython 3.7.6 provided by Anaconda 3. + +The code used to obtain performance data on Summit will be part of main E3SM +soon. The exact version used to generate the data is archived here: + https://github.com/ambrad/E3SM/releases/tag/islet-2d-paper-summit-sl-gpu-timings +The data are here: + https://github.com/E3SM-Project/perf-data/tree/main/nhxx-sl-summit-mar2021 diff --git a/methods/islet/run_meam1_sweep.cpp b/methods/islet/run_meam1_sweep.cpp new file mode 100644 index 0000000..5307a8c --- /dev/null +++ b/methods/islet/run_meam1_sweep.cpp @@ -0,0 +1,48 @@ +#include "islet_isl.hpp" +#include "islet_maxeigcomp.hpp" +#include "islet_pum.hpp" + +#include + +static void +run_sweep (const islet::Operator::ConstPtr& op, const Int np) { + static const Int nx = 192; + std::vector dxs(nx), meam1s(nx); + const auto oim = std::make_shared(np, op); + const auto im = std::make_shared(oim); +# pragma omp parallel for schedule(static,1) + for (Int ix = 0; ix < nx; ++ix) { + MaxEigComputer mec(false); + const Real dx = 0.5*(Real(ix+1)/nx); + dxs[ix] = dx; + Real mea; + mec.compute(*im, dx, 1024, &mea); + meam1s[ix] = mea - 1; + } + for (Int ix = 0; ix < nx; ++ix) + printf("%23.15e %23.15e\n", dxs[ix], meam1s[ix]); +} + +int main (int argc, char** argv) { + const int np = argc == 2 ? std::atoi(argv[1]) : -1; + if (argc != 2 || np == -1) { + printf("%s np\n", argv[0]); + return -1; + } + { + const auto gll_natural = islet::Operator::create(islet::Operator::gll_natural); + printf("gll_natural %d\n", np); + run_sweep(gll_natural, np); + } + { + const auto gll_best = islet::Operator::create(islet::Operator::gll_best); + printf("gll_best %d\n", np); + run_sweep(gll_best, np); + } + { + const auto uofs = islet::Operator::create(islet::Operator::uniform_offset_nodal_subset); + printf("uniform_offset_nodal_subset %d\n", np); + run_sweep(uofs, np); + } + return 0; +} diff --git a/methods/islet/run_np4.cpp b/methods/islet/run_np4.cpp new file mode 100644 index 0000000..a9fcdd2 --- /dev/null +++ b/methods/islet/run_np4.cpp @@ -0,0 +1,270 @@ +#include "islet_np4.hpp" + +#include "islet_util.hpp" +#include "islet_npx.hpp" +#include "islet_xnodes_metrics.hpp" +#include "islet_maxeigcomp.hpp" +#include "islet_pum.hpp" + +static const Real oosqrt5 = 0.44721359549995793928; + +static Real eval_lagrange_poly (const Int& n, const Real* xsup, const Real* ysup, + const Real& x) { + Real y = 0; + for (int i = 0; i < n; ++i) { + Real f = 1; + for (int j = 0; j < n; ++j) + f *= (i == j) ? + 1 : + (x - xsup[j]) / (xsup[i] - xsup[j]); + y += f*ysup[i]; + } + return y; +} + +static Real normalize_x (const Real* gll_x, const Real& x) { + const Real x0 = gll_x[1]; + return (x - x0) / (1 - x0); +} + +static void outer_eval (const Real* gll_x, const Real& x, Real v[4]) { + const Real + xbar = normalize_x(gll_x, gll_x[2]), + ooxbar = 1 / xbar, + ybar = 1 / (xbar - 1), + xn = normalize_x(gll_x, x); + v[0] = 0; + v[1] = 1 + ybar*xn*((1 - ooxbar)*xn + ooxbar - xbar); + v[2] = ybar*ooxbar*xn*(xn - 1); + v[3] = ybar*xn*(xbar - xn); +} + +// Convex combination parameter for np=3 and np=4 combination that gives exactly +// 1 for the interpolant at gll_x[2] - 1 for the antisymmetric function with GLL +// point values (0, 1, -1, 0). +static Real calc_alpha () { + const auto x_gll = islet::get_x_gll(4); + const Real x0 = x_gll[1] + 1; + Real y[4]; + outer_eval(x_gll, x0, y); + const Real y3sum = y[2] - y[1]; + eval_lagrange_poly(x_gll, 4, x0, y); + const Real y4sum = y[2] - y[1]; + return (1 - y4sum) / (y3sum - y4sum); +} + +struct Options { + Int ne_max; + Real meam1_tol, pum_tol; + + Options () + : ne_max(1001), meam1_tol(1e-14), pum_tol(1e-7) + {} +}; + +static Real run_maxeigcomp (const Np4InterpMethod::Ptr& uim, const Options& o) { + MaxEigComputer mec; + const auto meam1 = mec.run(4, o.ne_max, o.ne_max, o.meam1_tol, true, uim); + printf("mec %1.3e\n", meam1); + return meam1; +} + +static Real run_pum (const UserInterpMethod::Ptr& uim, const Options& o) { + pum::Options po; + po.threaded = true; + po.ntrial = 31; + po.mec_ne = o.ne_max/10; + po.perturb = 0.01; + pum::PerturbedUniformMeshMetric pum(uim); + Real pum_max = 0; + printf("pum:"); fflush(stdout); + for (Int ne = 3; ne <= 15; ++ne) { + po.ne = ne; + pum.reset_opts(po); + const auto pum_val = pum.run(); + printf(" %1.1e", pum_val); fflush(stdout); + pum_max = std::max(pum_max, pum_val); + } + printf("\npum_max %1.4e\n", pum_max); + return pum_max; +} + +/* + |f(x) - p_i(x)| <= e_i(x) + sum_i a_i = 1 + |f(x) - sum_i a_i p_i(x)| + = |sum_i a_i f(x) - sum_i a_i p_i(x)| + = |sum_i a_i (f(x) - p_i(x))| + <= sum_i |a_i| |f(x) - p_i(x)| + = sum_i |a_i| e_i(x) + */ +static void calc_metrics (const Np4InterpMethod& uim, Real metrics[3]) { + const Int nseg = 100; + const auto* xnodes = islet::get_x_gll(4); + Real npm1 = 0, npm2 = 0, npm_max = 0; + for (Int ireg = 0; ireg < 2; ++ireg) { + const bool center = ireg == 1; + const auto xs = xnodes[ireg], xe = xnodes[ireg+1]; + Real npm1_reg = 0, npm2_reg = 0, npm_max_reg = 0; + for (Int seg = 0; seg < nseg; ++seg) { + const auto x = xs + (seg + 0.5)*(xe - xs)/nseg; + Real f = 1; + if (ireg == 0) { + Real f3 = 1, f4 = 1; + for (Int i = 0; i < 3; ++i) f3 *= x - xnodes[i]; + for (Int i = 0; i < 4; ++i) f4 *= x - xnodes[i]; + const Real a = uim.eval_a(x); + // Divide by 3!, 4!. + f = std::abs(a*f3)/6 + std::abs((1 - a)*f4)/24; + } else { + for (Int i = 0; i < 4; ++i) f *= x - xnodes[i]; + f /= 24; + } + npm1_reg += std::abs(f); + npm2_reg += islet::square(f); + npm_max_reg = std::max(npm_max_reg, std::abs(f)); + } + const auto f = (center ? 1 : 2)*(xe - xs)/nseg; + npm1 += f*npm1_reg; + npm2 += f*npm2_reg; + npm_max = std::max(npm_max, npm_max_reg); + } + metrics[0] = npm1; + metrics[1] = std::sqrt(npm2); + metrics[2] = npm_max; +} + +static void optimize (Real best_metrics[3], const Options& o, + const bool c0_zero, const bool c2_one) { + const Real alpha = calc_alpha(); + auto uim = std::make_shared(0, alpha, alpha); + MaxEigComputer mec; + pum::Options po; + po.threaded = true; + po.ntrial = 31; + po.mec_ne = o.ne_max/10; + po.perturb = 0.01; + pum::PerturbedUniformMeshMetric pum(uim); + Int iteration = 0, expensive = 0; + for (;;) { + const Real + c0 = c0_zero ? 0 : islet::urand(), + c1 = islet::urand(), + c2 = c2_one ? 1 : islet::urand(); + uim->reset_c(c0, c1, c2); + + Real metrics[3], meam1 = 1, pum_max = 1; + calc_metrics(*uim, metrics); + + const auto print = [&] (const char* s) { + printf("%14s: %1.16e %1.16e %1.16e | " + "pum %1.3e meam1 %1.3e npm %1.3e %1.3e %1.3e %d %d\n", + s, c0, c1, c2, pum_max, meam1, metrics[0], metrics[1], metrics[2], + iteration, expensive); + }; + + bool fnd = false; + for (Int i = 0; i < 3; ++i) + if (metrics[i] < best_metrics[i]) + fnd = true; + ++iteration; + if ( ! fnd) continue; + ++expensive; + + meam1 = mec.run(4, o.ne_max, o.ne_max, o.meam1_tol, true, uim); + if (meam1 > o.meam1_tol) { + if (meam1 < 1e-12) print("reject meam1"); + continue; + } + + pum_max = 0; + for (Int ne = 3; ne <= 15; ++ne) { + po.ne = ne; + pum.reset_opts(po); + const auto pum_val = pum.run(o.pum_tol); + pum_max = std::max(pum_max, pum_val); + if (pum_max > o.pum_tol) break; + } + if (pum_max > o.pum_tol) { + print("reject pum"); + continue; + } + + bool all = true; + for (Int i = 0; i < 3; ++i) + if (metrics[i] > best_metrics[i]) + all = false; + if (all) + for (Int i = 0; i < 3; ++i) + best_metrics[i] = metrics[i]; + + print("accept"); + } +} + +// Test that the metrics specialization above matches the one we use everywhere +// else when alpha = 0. +static void test_metrics () { + Real m[3], m4[3]; + Nodes nodes; + nodes.init("4 1 | 0 4: 0 1 2 3 | 1 4: 0 1 2 3"); + calc_xnodes_metrics(nodes, islet::get_x_gll(4), m); + Np4InterpMethod op(0, 0, 0); + calc_metrics(op, m4); + for (Int i = 0; i < 3; ++i) + if (std::abs(m4[i] - m[i]) > 10*std::numeric_limits::epsilon()) + printf("FAIL test_metrics %d %1.16e %1.16e\n", i, m[i], m4[i]); +} + +int main (int argc, char** argv) { + printf("MaxEigComputer::unittest() %d\n", MaxEigComputer::unittest()); + test_metrics(); + Real best_metrics[3]; { + // We have a very good value; use this to make 2- and 3-dimensional the + // searches faster. + const auto uim = std::make_shared(0, 0.306, 1); + calc_metrics(*uim, best_metrics); + printf("start: npm l1 %1.4e l2 %1.4e li %1.4e\n", + best_metrics[0], best_metrics[1], best_metrics[2]); + } + if (argc > 1 && std::string(argv[1]) == "opt1") { + Options o; + o.ne_max = 3333; + optimize(best_metrics, o, true, true); + } else if (argc > 1 && std::string(argv[1]) == "opt2") { + Options o; + o.ne_max = 3333; + optimize(best_metrics, o, true, false); + } else if (argc > 1 && std::string(argv[1]) == "opt3") { + Options o; + o.ne_max = 3333; + optimize(best_metrics, o, false, false); + } else { + const auto eval = [&] (const Real c[3]) { + printf("c %1.16e %1.16e %1.16e\n", c[0], c[1], c[2]); + const auto uim = std::make_shared(c[0], c[1], c[2]); + printf("eval_a(1/sqrt(5) - 1) %1.16e\n", uim->eval_a(oosqrt5-1)); + Real metrics[3]; + calc_metrics(*uim, metrics); + printf("npm l1 %1.4e l2 %1.4e li %1.4e\n", metrics[0], metrics[1], metrics[2]); + Options o; + o.ne_max = (argc > 1 && std::string(argv[1]) == "dense") ? 1024 : 111; + const auto mec = run_maxeigcomp(uim, o); + run_pum(uim, o); + }; + + // (1, 1, 1) 1.332e-15 pum 1.2516e-08 npm l1 1.5758e-02 l2 1.2782e-02 li 1.5109e-02 + // (0, 0.306, 1) 1.110e-15 pum 9.9197e-09 npm l1 1.1611e-02 l2 9.0249e-03 li 9.0817e-03 + + { + Real c[3]; + c[0] = c[1] = c[2] = 1; + eval(c); + } + + { + const Real c[] = {0,0.306,1}; + eval(c); + } + } +} diff --git a/methods/islet/search.cpp b/methods/islet/search.cpp new file mode 100644 index 0000000..8c769e8 --- /dev/null +++ b/methods/islet/search.cpp @@ -0,0 +1,962 @@ +#ifndef SLMM_NP_GT_4 +# define SLMM_NP_GT_4 +#endif + +#include "islet_tables.hpp" +#include "islet_npx.hpp" +#include "islet_maxeigcomp.hpp" +#include "islet_xnodes_metrics.hpp" +#include "islet_pum.hpp" +#include "islet_util.hpp" + +#include +#include +#include + +class SearchAtom : public UserInterpMethod { +public: + struct Input { + enum Basis { gll, uniform, legendre, cheb }; + + static const int np_max = 12; + + Int np, nmodregions; + Basis basis; + Int stabnp[np_max], staboffset[np_max]; + // Looking for at least this max |lambda| - 1. + Real maxeigampm1; + // Conclude the search when it succeeds with these parameters. The second is + // the number of eigenvalues to sample in (0, 1/2]. + Int ne, neigdx; + bool quiet, unittest; + + Input () { init(); } + + private: + void init () { + quiet = unittest = false; + np = 6; + basis = gll; + nmodregions = 2; + stabnp[0] = 6; + stabnp[1] = 5; + staboffset[0] = staboffset[1] = 0; + maxeigampm1 = 1e-13; + ne = 1000; + neigdx = 1000; + } + }; + + SearchAtom (const Input& iin) { + reset(iin); + if (in.unittest) + std::cerr << (unittest() > 0 ? "FAIL" : "PASS") + << ": SearchAtom::unittest.\n"; + } + + explicit SearchAtom () {} + + void reset (const Input& iin) { + in = iin; + switch (in.basis) { + case Input::gll: x_gll = islet::get_x_gll_special(in.np); break; + case Input::uniform: { + static Real x[islet::np_max]; + x_gll = x; + for (Int i = 0; i < in.np; ++i) + x[i] = 2*(Real(i)/(in.np-1)) - 1; + } break; + case Input::legendre: x_gll = islet::get_x_gl(in.np); break; + case Input::cheb: { + static Real x[islet::np_max]; + x_gll = x; + for (Int i = 0; i < in.np; ++i) + x[i] = -std::cos(M_PI*Real(2*(i+1) - 1)/Real(2*in.np)); + } break; + } + } + + Real run () { + return max_eig_amp.run(in.np, in.ne, in.neigdx, in.maxeigampm1, + in.quiet, this); + } + + MaxEigComputer::Analysis calc_max_vals (const Int& nmu, const Int& ndx) { + return max_eig_amp.calc_max_vals(nmu, ndx, in.np, this); + } + + static int unittest () { + int nerr = 0; + Input in; + SearchAtom sa(in); + Real v0[32], v1[32]; + const Int np = 6; + for (Int ix = 0, nx = 11; ix < nx; ++ix) { + const auto x = -1 + (2.0*ix)/nx; + npxstab::eval(np, x, v0); + sa.eval(x, v1); + for (Int j = 0; j < np; ++j) + if (v0[j] != v1[j]) + ++nerr; + } + return nerr; + } + + void eval (const Real& x, Real* const v) override { + eval(in.np, in.nmodregions, x_gll, + in.stabnp, in.staboffset, + x, v); + } + + Int get_np () const override { return in.np; } + const Real* get_xnodes () const override { return x_gll; } + +private: + Input in; + MaxEigComputer max_eig_amp; + const Real* x_gll; + std::vector x_gll_buf; + + static void eval ( + const Int& np, const Int& nreg, const Real* x_gll, + const Int* const subnp, const Int* const os, + const Real& x, Real* const v) + { + if (x > 0) { + eval(np, nreg, x_gll, subnp, os, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + bool done = false; + for (Int i = 0; i < nreg; ++i) { + if (x > x_gll[i+1]) continue; + assert(i == 0 || x >= x_gll[i]); + assert( ! done); + done = true; + if (subnp[i] == np) { + eval_lagrange_poly(x_gll, np, x, v); + } else { + std::fill(v, v + np, 0); + eval_lagrange_poly(x_gll + os[i], subnp[i], x, v + os[i]); + } + break; + } + if ( ! done) + eval_lagrange_poly(x_gll, np, x, v); + } +}; + +static void calc_wts_metrics (const Int np, const Real* wt, + bool& all_pve_wts, Real& ratio) { + all_pve_wts = true; + for (Int i = 0; i < np; ++i) if (wt[i] <= 0) all_pve_wts = false; + Real wtmin = 10, wtmax = -1; + for (Int i = 0; i < np; ++i) wtmin = std::min(wtmin, wt[i]); + for (Int i = 0; i < np; ++i) wtmax = std::max(wtmax, wt[i]); + ratio = wtmax/wtmin; +} + +static Real calc_pum_metric (UserInterpMethod& im, const bool threaded = true, + const Real stop_if_above = 1e3) { + std::shared_ptr uim(&im, [] (UserInterpMethod*) {}); + const auto wrapper = std::make_shared(uim); + pum::Options o; + o.threaded = threaded; + o.ntrial = 48; + o.perturb = 0.01; + Real pum_metric = 0; + for (const Int mec_ne: {2, 4, 8, 16, 32, 64}) + for (const Int ne: {3, 5, 10}) { + o.mec_ne = mec_ne; + o.ne = ne; + pum::PerturbedUniformMeshMetric pum(wrapper, o); + const auto pum_metric_ne = pum.run(stop_if_above); + // If we stopped, then we have no idea what the actual pum is, so return 1 + // to be safe. + if (pum_metric > stop_if_above) return 1; + pum_metric = std::max(pum_metric, pum_metric_ne); + } + return pum_metric; +} + +// Restriction of nodal subset bases to an offset followed by adjacent nodes. +namespace find_offset_nodal_subset_bases { +static const int nps[] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16}; + +struct Basis { + static const int M = islet::np_max; + std::array subnp, offst; + int n; +}; + +bool same (const Basis& a, const Basis& b) { + assert(a.n == b.n); + bool s = true; + for (int i = 0; i < a.n; ++i) + if (a.subnp[i] != b.subnp[i] || a.offst[i] != b.offst[i]) { + s = false; + break; + } + return s; +} + +static bool issymmetric (const int np, const int subnp, const int offst) { + const int d = np - subnp; + return ((d % 2 == 0) && offst == d/2); +} + +static bool nodes_on_bdy (const SearchAtom::Input::Basis b) { + return b == SearchAtom::Input::gll || b == SearchAtom::Input::uniform; +} + +void recur (const int np, const int min_np, const int min_np_pos, + std::vector& good_bases, Basis b, const int pos, + SearchAtom& sa, const SearchAtom::Input::Basis basis, + MetricsTracker& mt) { + if (pos == -1) { + // Don't check if we've seen this basis is already good. + bool already = false; + for (const auto& gb : good_bases) + if (same(gb, b)) { + already = true; + break; + } + if (already) return; + + SearchAtom::Input in; + in.quiet = true; + in.basis = basis; + in.np = np; + in.nmodregions = b.n; + for (int i = 0; i < b.n; ++i) in.stabnp[i] = b.subnp[i]; + for (int i = 0; i < b.n; ++i) in.staboffset[i] = b.offst[i]; + in.ne = 500; + in.neigdx = 500; + sa.reset(in); + + Nodes nodes(np, nodes_on_bdy(basis)); { + assert(nodes.get_nh() == b.n); + std::vector ns; + ns.reserve(np); + for (Int ireg = 0; ireg < nodes.get_nh(); ++ireg) { + ns.resize(in.stabnp[ireg]); + for (Int i = 0; i < in.stabnp[ireg]; ++i) + ns[i] = in.staboffset[ireg] + i; + nodes.set(ireg, ns); + } + } + Real xnodes_metric[3]; + calc_xnodes_metrics(nodes, sa.get_xnodes(), xnodes_metric); + if ( ! mt.acceptable_metrics(nodes, sa.get_xnodes(), xnodes_metric)) return; + const Real pum_to_accept = mt.pum_to_accept(nodes, sa.get_xnodes(), + xnodes_metric); + bool all_pve_wts = false; + Real wtr = 0; { + Real wt[islet::np_max]; + calc_weights(nodes, sa.get_xnodes(), wt); + calc_wts_metrics(np, wt, all_pve_wts, wtr); + } + if ( ! all_pve_wts) return; + + // Run the potentially expensive analysis. + Real maxeigampm1 = sa.run(); + Real maxcondV = 0, maxdefub = 0; + Real pum_metric = 0; + if (maxeigampm1 <= in.maxeigampm1) { + pum_metric = calc_pum_metric(sa, true, pum_to_accept); + if ( ! mt.would_update(xnodes_metric, pum_metric)) return; + const auto max_vals = sa.calc_max_vals(1111, 1111); + maxeigampm1 = max_vals.max_eig_amp - 1; + maxcondV = max_vals.max_condv; + maxdefub = max_vals.max_defect_ub; + } + if (maxeigampm1 <= in.maxeigampm1) { + printf("meam1 %1.1e mcV %1.1e mdef %1.1e w>0 %d wtr %9.2e " + "npm %9.2e %9.2e %9.2e pum %9.2e", + maxeigampm1, maxcondV, maxdefub, all_pve_wts, wtr, + xnodes_metric[0], xnodes_metric[1], xnodes_metric[2], + pum_metric); + printf(" | np %2d subnp", np); + for (int i = 0; i < b.n; ++i) printf(" %d", b.subnp[i]); + printf(" offst"); + for (int i = 0; i < b.n; ++i) printf(" %d", b.offst[i]); + printf("\n"); + fflush(stdout); + good_bases.push_back(b); + mt.update(xnodes_metric, pum_metric); + } + return; + } + // Set up a basis. Avoid some, but not all, redundant trials by making one + // slot have np = min_np. + const bool middle = np % 2 == 0 && pos == np/2 - 1; + for ( +#if 0 + int subnp = pos == min_np_pos ? min_np : np; + subnp >= min_np; + --subnp +#else + int subnp = min_np; + subnp <= (pos == min_np_pos ? min_np : np); + ++subnp +#endif + ) + for (int offst = std::max(0, pos - subnp + 2); + offst <= std::min(pos, np - subnp); + ++offst) { + if (middle && ! issymmetric(np, subnp, offst)) continue; + b.subnp[pos] = subnp; + b.offst[pos] = offst; + recur(np, min_np, min_np_pos, good_bases, b, pos-1, sa, basis, mt); + } +} + +static Int run (const int np, MetricsTracker::Ptr mt = nullptr, + const SearchAtom::Input::Basis basis = SearchAtom::Input::gll) { + if ( ! mt) mt = std::make_shared(np); + const int min_ooa = 2; //np/2; + const int min_np_lim = min_ooa; + SearchAtom sa; + std::vector good_bases; + printf("np %2d\n", np); + Int good = 0, max_good_np = -1; + // Search from highest- to lowest-OOA bases. + for (int min_np = np; min_np >= min_np_lim; --min_np) { + fprintf(stdout, "min_np %2d\n", min_np); fflush(stdout); + Basis b; + b.n = nodes_on_bdy(basis) ? np/2 : (np+2)/2; + for (int i = 0; i < b.n; ++i) b.subnp[i] = np; + for (int i = 0; i < b.n; ++i) b.offst[i] = 0; + for (int min_np_pos = 0; min_np_pos < b.n; ++min_np_pos) + recur(np, min_np, min_np_pos, good_bases, b, b.n-1, sa, basis, *mt); + if ( ! good_bases.empty()) { + max_good_np = std::max(max_good_np, min_np); + ++good; + } + // We don't want to reduce order to improve other heuristics; instead, we'll + // use the more general nodal subset basis if needed. So break at the + // highest min_np having a good basis. + if (good) break; + } + return max_good_np; +} + +static void runall (const Int np, + const SearchAtom::Input::Basis basis = SearchAtom::Input::gll, + const MetricsTracker::Ptr& mt = nullptr) { + if (np <= -1) + for (int np : nps) + run(np, mt, basis); + else + run(np, mt, basis); +} +} // namespace find_offset_nodal_subset_bases + +// List of region's valid node supports. Use a vector of ints, where in an int, +// the 0/1 pattern gives the nodes. +struct ValidNodesList { + typedef std::shared_ptr Ptr; + + struct Iterator { + Iterator (const std::vector& nodes, const int& ntot, const int& nsub, + const int& pin0) + : nodes_(nodes), ntot_(ntot), nsub_(nsub), pin0_(pin0), idx_(-1) + {} + + Iterator (const std::vector& nodes, bool) + : nodes_(nodes), ntot_(-1), nsub_(-1), pin0_(-1), idx_(nodes.size()) + {} + + Iterator operator++ () { + int idx; + for (idx = idx_+1; idx < static_cast(nodes_.size()); ++idx) + if (valid(nodes_[idx], pin0_)) + break; + idx_ = idx; + return *this; + } + + bool operator== (const Iterator& it) const { return idx_ == it.idx_; } + bool operator!= (const Iterator& it) const { return ! (*this == it); } + + // Convert a bit string to a list of element node indices. + void get_nodes (int* nodes) { + assert(idx_ < static_cast(nodes_.size())); + const auto nodemask = nodes_[idx_]; + int k = 0; + for (int i = 0; i < ntot_; ++i) + if (nodemask & (1 << i)) + nodes[k++] = i; + assert(k == nsub_); + } + + private: + const std::vector& nodes_; + const int ntot_, nsub_, pin0_; + int idx_; + + // Do the selected nodes include the two bounding the region? + static bool valid (const int nodemask, const int pin0) { + const int pinmask = 3 << pin0; + return (nodemask & pinmask) == pinmask; + } + }; + + ValidNodesList (const int ntot, const int nsub, + const bool symmetric = false) { + assert(ntot <= islet::np_max); + init(ntot, nsub, symmetric); + } + + void init (const int ntot, const int nsub, const bool symmetric) { + ntot_ = ntot; + nsub_ = nsub; + symmetric_ = symmetric; + for (int i = 0; i < (1 << ntot); ++i) + if (num1s(i) == nsub && ( ! symmetric_ || issymmetric(i, ntot))) + nodes_.push_back(i); + //pr(puf(ntot) pu(nsub) pu(nodes_.size())); + } + + Iterator begin (const int& pin0) const { + Iterator it(nodes_, ntot_, nsub_, pin0); + ++it; + return it; + } + + Iterator end () const { return Iterator(nodes_, true); } + + static Int test () { + if ( ! (issymmetric(1, 1) && issymmetric(3, 2) && !issymmetric(3, 3) && + issymmetric(0x65a6, 16) && !issymmetric(0x65a6, 20))) { + std::cerr << "ValidNodesList::test FAILed.\n"; + return 1; + } + return 0; + } + +private: + int ntot_, nsub_; + bool symmetric_; + std::vector nodes_; + + // Number of 1 bits in n. + static int num1s (int n) { + int cnt = 0; + while (n) { + if (1 & n) ++cnt; + n = n >> 1; + } + return cnt; + } + + static bool issymmetric (const int n, const int nslot) { + int nd = n, rev = 0; + for (int i = 0; i < nslot; ++i) { + rev = rev << 1; + if (1 & nd) rev = (rev | 1); + nd = nd >> 1; + } + return rev == n; + } +}; + +namespace find_nodal_subset_bases { +struct NsbSearchAtom : public UserInterpMethod { + struct Input { + static const int np_max = islet::np_max; + typedef char SInt; + + SInt np; + SInt nodes[np_max-1][np_max]; + SInt subnp[np_max-1]; + Real maxeigampm1; + Int ne, neigdx; + bool quiet; + + Input () { + np = -1; + maxeigampm1 = 1e-13; + ne = 1111; + neigdx = ne; + quiet = true; + } + }; + + NsbSearchAtom (const bool mea_threaded = false) + : max_eig_amp_(mea_threaded) + {} + + Real run (const Input& in, + bool& all_pve_wts, Real& wtr, + MetricsTracker& mt, Real* metrics, Real& pum_metric) { + in_ = in; + Nodes nodes(in.np); + for (Int i = 0; i < nodes.get_nh(); ++i) + nodes.set(i, in.nodes[i], in.subnp[i]); + assert(nodes.ok_to_eval()); + calc_xnodes_metrics(nodes, get_xnodes(), metrics); + if ( ! mt.acceptable_metrics(nodes, get_xnodes(), metrics)) return 2; + const Real pum_to_accept = mt.pum_to_accept(nodes, get_xnodes(), metrics); + { + Real wt[islet::np_max]; + calc_weights(nodes, get_xnodes(), wt); + calc_wts_metrics(in.np, wt, all_pve_wts, wtr); + if ( ! all_pve_wts) return 1; + } + Int ne, neigdx; + ne = neigdx = 11; + auto maxeigampm1 = max_eig_amp_.run(in.np, ne, neigdx, in.maxeigampm1, + in.quiet, this); + if (maxeigampm1 > in.maxeigampm1) return maxeigampm1; + pum_metric = calc_pum_metric(*this, max_eig_amp_.is_threaded(), + pum_to_accept); + if ( ! mt.would_update(metrics, pum_metric)) return 2; + return max_eig_amp_.run(in.np, in.ne, in.neigdx, in.maxeigampm1, + in.quiet, this); + } + + void eval (const Real& x, Real* const v) override { + eval(in_.np, in_.nodes, in_.subnp, x, v); + } + + Int get_np () const override { return in_.np; } + + const Real* get_xnodes () const override { + return islet::get_x_gll(in_.np); + } + + static void eval ( + const Int& np, const Input::SInt nodes[][Input::np_max], + const Input::SInt subnp[], const Real& x, Real* const v) + { + if (x > 0) { + eval(np, nodes, subnp, -x, v); + for (int i = 0; i < np/2; ++i) + std::swap(v[i], v[np-i-1]); + return; + } + const auto x_gll = islet::get_x_gll(np); + Real xsub[Input::np_max], vsub[Input::np_max]; + for (Int i = 0; i < np-1; ++i) { + if (i < np-2 && x > x_gll[i+1]) continue; + if (subnp[i] == np) { + eval_lagrange_poly(x_gll, np, x, v); + } else { +#ifndef NDEBUG + { // Subregion's nodes must be included for the basis to be + // interpolatory. + int fnd = 0; + for (Int j = 0; j < subnp[i]; ++j) { + const auto nij = nodes[i][j]; + if (nij == i || nij == i+1) ++fnd; + } + if (fnd != 2) { + pr(puf(np) pu(i) pu(x)); + islet::prarr("nodes[i]", nodes[i], subnp[i]); + } + assert(fnd == 2); + } +#endif + for (Int j = 0; j < subnp[i]; ++j) + xsub[j] = x_gll[nodes[i][j]]; + // Lagrange polynomial basis. + std::fill(v, v + np, 0); + eval_lagrange_poly(xsub, subnp[i], x, vsub); + for (Int j = 0; j < subnp[i]; ++j) + v[nodes[i][j]] = vsub[j]; + } + break; + } + } + +private: + Input in_; + MaxEigComputer max_eig_amp_; +}; + +static const int nps[] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16}; + +struct Basis { + static const int np_max = NsbSearchAtom::Input::np_max; + int np; + std::array subnp; + std::array,np_max-1> nodes; +}; + +struct Count { + typedef std::shared_ptr Ptr; + bool just_count; + size_t value, total; + Count () : just_count(true), value(0), total(0) {} +}; + +void randperm (Int* const p, const Int n) { + using islet::urand; + for (size_t i = 0; i < n; ++i) p[i] = i; + for (size_t i = 0; i < 5*n; ++i) { + const int j = urand()*n, k = urand()*n; + std::swap(p[j], p[k]); + } +} + +void make_subnp_list_recur (const Int np, const Int np_min, + std::vector& subnp_list, + char* subnp, const Int pos) { + for (Int np_pos = np_min; np_pos <= np; ++np_pos) { + subnp[pos] = np_pos; + if (pos > 0) + make_subnp_list_recur(np, np_min, subnp_list, subnp, pos-1); + else + for (Int j = 0; j < np/2; ++j) + subnp_list.push_back(subnp[j]); + } +} + +void make_subnp_list (const Int np, const Int np_min, + std::vector& subnp_list) { + char subnp[islet::np_max]; + make_subnp_list_recur(np, np_min, subnp_list, subnp, np/2-1); +} + +struct Restarter { + typedef std::shared_ptr Ptr; + MetricsTracker::Ptr mt; + int np, min_ooa; + size_t dont_eval_if_below, eval_count; + + Restarter (const MetricsTracker::Ptr& mt_, const int np_, const int min_ooa_, + const int dont_eval_if_below_) + : mt(mt_), np(np_), min_ooa(min_ooa_), dont_eval_if_below(dont_eval_if_below_), + eval_count(0) + {} + + bool write(std::string filename = ""); +}; + +bool Restarter::write (std::string filename) { + using islet::write; + if (filename == "") { + std::stringstream ss; + ss << "NsbSearchAtomRestart_np" << np << ".dat"; + filename = ss.str(); + } + std::ofstream os(filename.c_str(), std::ofstream::binary); + assert(mt); + return (write(os, np) && + write(os, min_ooa) && + write(os, eval_count) && // new dont_eval_if_below value + write(os, eval_count) && + mt->write(os)); +} + +static Restarter::Ptr read_restart (const int np) { + using islet::read; + std::stringstream ss; + ss << "NsbSearchAtomRestart_np" << np << ".dat"; + std::ifstream is(ss.str().c_str(), std::ofstream::binary); + if ( ! is.is_open()) return nullptr; + const auto mt = std::make_shared(np); + const auto r = std::make_shared(mt, np, 0, 0); + int lnp; + const bool ok = (read(is, r->np) && r->np == np && + read(is, r->min_ooa) && + read(is, r->dont_eval_if_below) && + read(is, r->eval_count) && + r->mt->read(is)); + if ( ! ok) return nullptr; + return r; +} + +void eval (std::vector& esa, + const std::vector& input_list, + const Int ninput, MetricsTracker& mt, const bool show_progress, + const Count::Ptr& count) { + if (count) count->value += ninput; + if (count && count->just_count) return; + if (count) + printf("NsbSearchAtom::eval %ld/%ld (%5.1f%%)\n", + count->value, count->total, 100*Real(count->value)/count->total); + std::vector perm(ninput); + randperm(perm.data(), ninput); + Real progress = 0; + const auto run1 = [&] (const Int ili) { + { + const Real pdelta = 0.05; + const Real p = Real(ili)/ninput; + if (show_progress && p >= progress + pdelta) { +# pragma omp critical (NsbSearchAtom_progress) + { + const Real p = Real(ili)/ninput; + if (p >= progress + pdelta) { + printf("progress: %5.1f%% (%8d)\n", 100*p, ili); + progress = p; + } + } + } + } + const auto& in = input_list[perm[ili]]; + auto& my_esa = esa[omp_get_thread_num()]; + bool all_pve_wts; + Real wtr, xnodes[islet::np_max], metrics[3], pum_metric; + const auto maxeigampm1 = my_esa.run(in, all_pve_wts, wtr, mt, metrics, + pum_metric); + if (maxeigampm1 > in.maxeigampm1) return; + { + Basis b; + b.np = in.np; + for (int i = 0; i < b.np-1; ++i) + b.subnp[i] = in.subnp[i]; + for (int i = 0; i < b.np-1; ++i) + for (int j = 0; j < b.subnp[i]; ++j) + b.nodes[i][j] = in.nodes[i][j]; +# pragma omp critical (NsbSearchAtom_eval) + { + mt.update(metrics, pum_metric); + const int n = in.np/2; + printf("meam1 %9.2e w>0 %d wtr %8.2e npm %8.2e %8.2e %8.2e pum %9.2e | ", + maxeigampm1, all_pve_wts, wtr, metrics[0], metrics[1], metrics[2], + pum_metric); + printf("np %2d subnp", in.np); + for (int i = 0; i < n; ++i) printf(" %d", in.subnp[i]); + printf(" nodes"); + for (int i = 0; i < n; ++i) { + printf(" |"); + for (int j = 0; j < in.subnp[i]; ++j) + printf(" %d", in.nodes[i][j]); + } + printf("\n"); + } + } + }; + + if (esa.size() > 1) { +# pragma omp parallel for schedule(dynamic,1) + for (Int ili = 0; ili < ninput; ++ili) + run1(ili); + } else { + for (Int ili = 0; ili < ninput; ++ili) + run1(ili); + } +} + +void recur (const int np, const std::vector& vnls, + const std::vector& vnls_mid_reg, + Basis b, const int pos, std::vector& esa, + std::vector& input_list, + Int& input_list_pos, MetricsTracker& mt, const Count::Ptr& count, + Restarter& restarter) { + if (pos == -1) { + NsbSearchAtom::Input& in = input_list[input_list_pos]; + in.np = np; + in.maxeigampm1 = 1e-13; + for (int i = 0; i < b.np-1; ++i) + in.subnp[i] = b.subnp[i]; + for (int i = 0; i < b.np-1; ++i) + for (int j = 0; j < b.subnp[i]; ++j) + in.nodes[i][j] = b.nodes[i][j]; + ++input_list_pos; + if (input_list_pos == input_list.size()) { + if (restarter.eval_count >= restarter.dont_eval_if_below) { + // Run a bunch of analyses in parallel. + eval(esa, input_list, input_list_pos, mt, false, count); + if ( ! (count && count->just_count)) { + if ( ! count) + printf("restart eval_count %ld\n", restarter.eval_count); + ++restarter.eval_count; + restarter.write(); + } + } else { + if (count) count->value += input_list_pos; + if ( ! (count && count->just_count)) ++restarter.eval_count; + } + input_list_pos = 0; + } + return; + } + // Set up a basis. + const auto& vnl = (np % 2 == 0 && pos == np/2-1) ? + vnls_mid_reg[b.subnp[pos]] : vnls[b.subnp[pos]]; + for (auto it = vnl->begin(pos); it != vnl->end(); ++it) { + it.get_nodes(b.nodes[pos].data()); + recur(np, vnls, vnls_mid_reg, b, pos-1, esa, input_list, input_list_pos, + mt, count, restarter); + } +} + +static Int run (const int np, int min_ooa = -1, + MetricsTracker::Ptr mt = nullptr, + const Count::Ptr count = nullptr, + const int dont_eval_if_below = 0) { + assert(np <= NsbSearchAtom::Input::np_max); + if ( ! mt) mt = std::make_shared(np); + const bool thread_toplevel = np >= 9; + std::vector esa; + if (thread_toplevel) + esa.resize(omp_get_max_threads()); + else + esa.emplace_back(true); + std::vector input_list(thread_toplevel ? + 1 << 22 : + 1 << 12); + Int input_list_pos = 0; + const bool just_count = count && count->just_count; + if ( ! just_count) printf("np %2d\n", np); + if (min_ooa <= 0) { + if (np == 5) min_ooa = 2; + else if (np <= 6) min_ooa = np-2; + else if (np <= 9) min_ooa = np-3; + else min_ooa = np-4; + } + std::vector vnls(np+1), vnls_mid_reg(np+1); + for (int min_np = np; min_np >= min_ooa+1; --min_np) + vnls[min_np] = std::make_shared(np, min_np); + for (int min_np = np; min_np >= min_ooa+1; --min_np) + vnls_mid_reg[min_np] = std::make_shared(np, min_np, true); + std::vector subnp_list; + make_subnp_list(np, min_ooa+1, subnp_list); + Restarter restarter(mt, np, min_ooa, dont_eval_if_below); + const Int sz = np/2; + const Int n = subnp_list.size()/sz; + for (Int isubnp = 0; isubnp < n; ++isubnp) { + const char* subnp = &subnp_list[isubnp*sz]; + Basis b; + b.np = np; + for (Int j = 0; j < sz; ++j) { + b.subnp[j] = subnp[j]; + b.subnp[np-2-j] = subnp[j]; + } + recur(np, vnls, vnls_mid_reg, b, b.np/2 - 1, esa, + input_list, input_list_pos, *mt, count, restarter); + } + if (input_list_pos > 0) + eval(esa, input_list, input_list_pos, *mt, true, count); +} + +static void run () { + for (int np : nps) run(np); +} +} // namespace find_nodal_subset_bases + +static void find_nodal_subset_bases_given_mt ( + const int np, const MetricsTracker::Ptr& mt, const int min_ooa = -1, + const int eval_count = 0) +{ + if (np > 10) { + find_nodal_subset_bases::run(np, min_ooa, mt, nullptr, eval_count); + return; + } + const auto count = std::make_shared(); + find_nodal_subset_bases::run(np, min_ooa, mt, count); + printf("count %ld\n", count->value); + count->total = count->value; + count->value = 0; + count->just_count = false; + find_nodal_subset_bases::run(np, min_ooa, mt, count, eval_count); +} + +static void find_nodal_given_best_offset_nodal ( + const int np, const bool restart_if_available = true) +{ + find_nodal_subset_bases::Restarter::Ptr restarter; + if (restart_if_available) + restarter = find_nodal_subset_bases::read_restart(np); + if (restarter) { + find_nodal_subset_bases_given_mt(np, restarter->mt, restarter->min_ooa, + restarter->eval_count); + } else { + const auto mt = std::make_shared(np); + const Int max_good_np = find_offset_nodal_subset_bases::run(np, mt); + const Int min_ooa = max_good_np-1; + mt->set_pum_max(mt->get_pum_min()); + find_nodal_subset_bases_given_mt(np, mt, min_ooa); + } +} + +static void run_general_unittests () { + int nerr = 0; + { + for (int np = 2; np <= 7; ++np) { + const auto x = islet::get_x_gll(np); + const auto w = islet::get_w_gll(np); + Real sum = 0; + for (int j = 0; j < np; ++j) sum += w[j]; + if (islet::reldif(2, sum) >= 1e-14) ++nerr; + for (int j = 0; j < np/2; ++j) + if (w[j] != w[np-j-1]) ++nerr; + for (int j = 0; j < np/2; ++j) + if (x[j] != -x[np-j-1]) ++nerr; + for (int j = 0; j < np-1; ++j) + if (x[j+1] < x[j]) ++nerr; + } + } + nerr += MaxEigComputer::unittest(); + nerr += SearchAtom::unittest(); + nerr += ValidNodesList::test(); + std::cout << (nerr ? "FAIL" : "PASS") << " unit test\n"; +} + +struct Command { + enum Enum { unittest, findoffsetnodal, findnodal, + finduniform, findlegendre, findcheb, + findnodal_given_bestosn}; + static Enum convert (const std::string& s) { + if (s == "unittest") return unittest; + if (s == "findoffsetnodal") return findoffsetnodal; + if (s == "findnodal") return findnodal; + if (s == "findnodal_given_bestosn") return findnodal_given_bestosn; + if (s == "finduniform") return finduniform; + if (s == "findlegendre") return findlegendre; + if (s == "findcheb") return findcheb; + throw std::logic_error("Not a command."); + } +}; + +int main (int argc, char** argv) { + if (argc < 2) { + std::cerr << argv[0] << " \n"; + return -1; + } + + using NosbBasis = SearchAtom::Input; + + const auto command = Command::convert(argv[1]); + bool unittest = false; + Int np = -1; + if (argc > 2) np = std::atoi(argv[2]); + switch (command) { + case Command::unittest: { + unittest = true; + } break; + case Command::findoffsetnodal: { + // Pretty efficient search for stable offset nodal bases. + find_offset_nodal_subset_bases::runall(np); + } break; + case Command::findnodal: { + // Search for stable nodal subset bases. + const auto restarter = find_nodal_subset_bases::read_restart(np); + if (restarter) + find_nodal_subset_bases_given_mt(np, restarter->mt, restarter->min_ooa, + restarter->eval_count); + else + find_nodal_subset_bases_given_mt(np, nullptr); + } break; + case Command::findnodal_given_bestosn: { + if (np == -1) { + std::cerr << argv[0] << " findnodal_given_bestosn np\n"; + return -1; + } + find_nodal_given_best_offset_nodal(np); + } break; + case Command::finduniform: + find_offset_nodal_subset_bases::runall(np, NosbBasis::uniform); break; + case Command::findlegendre: + find_offset_nodal_subset_bases::runall(np, NosbBasis::legendre); break; + case Command::findcheb: + find_offset_nodal_subset_bases::runall(np, NosbBasis::cheb); break; + default: + throw std::logic_error("Not a command."); + } + if (unittest) run_general_unittests(); +} diff --git a/methods/slmm/Makefile b/methods/slmm/Makefile index 0a231de..28f0f15 100644 --- a/methods/slmm/Makefile +++ b/methods/slmm/Makefile @@ -59,30 +59,5 @@ libslmm: $(SLMM_OBJECTS) slmm_c_compat.o clean: rm -f *.o *.mod slmm_test slmmir libslmm.so *.gcov *.gcda *.gcno -slmm_test.o: slmm_defs.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_islet.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_sqr.hpp -slmmir.o: slmm_defs.hpp slmm_util.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_nla.hpp slmm_spf.hpp slmm_fit_extremum.hpp slmmir_time_int.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_quadrature.hpp $(SIQK)/siqk_sqr.hpp slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmm_islet.hpp slmm_accum.hpp slmm_vis.hpp slmmir_time_int.hpp -slmmir_time_int.o: slmm_gallery.hpp slmm_time_int.hpp slmmir_p_refine.hpp slmmir_time_int.hpp -slmmir_time_int_exp.o: slmm_gallery.hpp slmm_time_int.hpp -slmm_test.o: slmm_defs.hpp slmm_util.hpp slmm_mesh.hpp slmm_gll.hpp slmm_io.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_nla.hpp slmm_spf.hpp $(SIQK)/siqk.hpp $(SIQK)/siqk_sqr.hpp -slmm_mesh.o: slmm_mesh.hpp $(SIQK)/siqk.hpp slmm_array.hpp -slmm_spf.o: slmm_spf.hpp $(SIQK)/siqk.hpp -slmm_io.o: slmm_io.hpp -slmm_nla.o: slmm_nla.hpp -slmm_time_int.o: slmm_time_int.hpp -slmm_gallery.o: slmm_gallery.hpp -slmm_util.o: slmm_util.hpp -slmmir_remap_data.o: slmmir_remap_data.hpp -slmmir_p_refine.o: slmmir_p_refine.hpp -slmmir_mono_data.o: slmmir_mono_data.hpp -slmmir_remapper.o: slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_util.hpp slmmir.hpp slmmir_physgrid.hpp -slmmir_remapper_isl.o: slmmir_remapper.hpp slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_p_refine.hpp slmm_islet.hpp slmmir.hpp slmmir_physgrid.hpp slmm_accum.hpp -slmmir_util.o: slmmir_util.hpp -slmmir_p_refine.o: slmmir_p_refine.hpp slmm_islet.hpp -slmm_islet.o: slmm_islet.hpp -slmm_islet_string.o: slmm_islet.hpp -slmm_accum.o: slmm_accum.hpp -slmm_vis.o: slmm_vis.hpp -slmmir_physgrid.o: slmmir_physgrid.hpp -physgrid.o: slmmir_physgrid.hpp -slmm_basis.o: slmm_basis.hpp -slmm_basis_reduced.o: slmm_islet.hpp slmm_basis_reduced.hpp +# generate by running `bash make.depends` +include make.depends diff --git a/methods/slmm/make-depends.sh b/methods/slmm/make-depends.sh new file mode 100644 index 0000000..0bb5cf3 --- /dev/null +++ b/methods/slmm/make-depends.sh @@ -0,0 +1,3 @@ +for i in *.cpp; do + g++ -I../../siqk -MM $i +done > make.depends diff --git a/methods/slmm/make.depends b/methods/slmm/make.depends new file mode 100644 index 0000000..b938f11 --- /dev/null +++ b/methods/slmm/make.depends @@ -0,0 +1,220 @@ +physgrid.o: physgrid.cpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_mesh.hpp slmm_array_tree.hpp \ + slmm_vis.hpp slmmir_util.hpp slmmir_mesh.hpp slmmir.hpp slmm_util.hpp \ + slmm_spf.hpp slmmir_physgrid.hpp slmm_nla.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmmir_d2c.hpp +slmm_accum.o: slmm_accum.cpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_accum.hpp +slmm_array_tree.o: slmm_array_tree.cpp slmm_array_tree.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp +slmm_basis.o: slmm_basis.cpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gll.hpp slmm_islet.hpp \ + slmm_basis_reduced.hpp slmm_util.hpp +slmm_basis_reduced.o: slmm_basis_reduced.cpp slmm_basis_reduced.hpp \ + slmm_islet.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp +slmm_c_compat.o: slmm_c_compat.cpp slmm_c_compat.hpp slmm_spf.hpp \ + slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gll.hpp slmm_basis.hpp \ + slmm_mesh.hpp slmm_array_tree.hpp slmm_util.hpp +slmm_fit_extremum.o: slmm_fit_extremum.cpp slmm_fit_extremum.hpp \ + slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_nla.hpp slmm_util.hpp \ + slmm_gll.hpp slmm_basis.hpp ../../siqk/siqk_geometry.hpp +slmm_gallery.o: slmm_gallery.cpp slmm_gallery.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_time_int.hpp slmm_util.hpp +slmm_io.o: slmm_io.cpp slmm_io.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_util.hpp +slmmir.o: slmmir.cpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_mesh.hpp slmm_array_tree.hpp slmm_spf.hpp slmm_gll.hpp \ + slmm_basis.hpp slmm_io.hpp slmm_nla.hpp slmm_util.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmm_accum.hpp slmm_debug.hpp slmm_fit_extremum.hpp \ + slmm_vis.hpp slmmir_util.hpp slmmir_mesh.hpp slmmir.hpp \ + slmmir_remap_data.hpp slmmir_mono_data.hpp slmmir_remapper.hpp \ + slmmir_p_refine.hpp slmmir_physgrid.hpp slmmir_d2c.hpp \ + slmmir_time_int.hpp slmmir_lauritzen_diag.hpp slmmir_snapshot.hpp +slmmir_d2c.o: slmmir_d2c.cpp slmmir_d2c.hpp slmmir.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp slmm_spf.hpp +slmmir_lauritzen_diag.o: slmmir_lauritzen_diag.cpp slmmir_p_refine.hpp \ + slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \ + slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_lauritzen_diag.hpp \ + slmmir_d2c.hpp +slmmir_mono_data.o: slmmir_mono_data.cpp slmmir_mono_data.hpp \ + slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_gallery.hpp slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp \ + slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp +slmmir_physgrid.o: slmmir_physgrid.cpp slmmir_physgrid.hpp slmm_nla.hpp \ + slmm_array.hpp slmm_util.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmm_vis.hpp slmmir.hpp slmm_spf.hpp slmmir_d2c.hpp \ + slmmir_mesh.hpp slmm_basis.hpp slmm_mesh.hpp slmm_array_tree.hpp \ + slmm_gll.hpp slmm_io.hpp slmm_debug.hpp slmm_basis_reduced.hpp \ + slmm_islet.hpp slmmir_util.hpp +slmmir_p_refine.o: slmmir_p_refine.cpp slmmir_p_refine.hpp \ + slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \ + slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_util.hpp +slmmir_remap_data.o: slmmir_remap_data.cpp slmmir_remap_data.hpp \ + slmm_nla.hpp slmm_array.hpp slmm_util.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmmir.hpp slmm_spf.hpp slmmir_mesh.hpp \ + slmm_basis.hpp slmmir_util.hpp slmm_gll.hpp +slmmir_remapper.o: slmmir_remapper.cpp slmm_mesh.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp \ + slmmir_remapper.hpp slmm_fit_extremum.hpp slmmir_p_refine.hpp \ + slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp slmmir_remap_data.hpp \ + slmm_nla.hpp slmmir.hpp slmm_spf.hpp slmmir_physgrid.hpp slmm_vis.hpp \ + slmmir_d2c.hpp slmmir_util.hpp +slmmir_remapper_isl.o: slmmir_remapper_isl.cpp slmm_mesh.hpp \ + slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp \ + slmm_accum.hpp slmmir_remapper.hpp slmm_fit_extremum.hpp \ + slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp slmm_basis.hpp \ + slmm_gallery.hpp slmm_time_int.hpp slmm_util.hpp slmmir_mesh.hpp \ + slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp \ + slmmir_physgrid.hpp slmm_vis.hpp slmmir_d2c.hpp slmmir_util.hpp +slmmir_snapshot.o: slmmir_snapshot.cpp slmmir_snapshot.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp slmm_gallery.hpp \ + slmm_time_int.hpp slmmir_mesh.hpp slmm_basis.hpp slmmir_p_refine.hpp \ + slmmir_mono_data.hpp slmm_gll.hpp slmmir_remap_data.hpp slmm_nla.hpp \ + slmmir.hpp slmm_spf.hpp slmm_accum.hpp +slmmir_test.o: slmmir_test.cpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_mesh.hpp slmm_array_tree.hpp slmm_debug.hpp +slmmir_time_int.o: slmmir_time_int.cpp slmmir_time_int.hpp \ + slmm_time_int.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_util.hpp slmm_gallery.hpp slmmir_mesh.hpp slmm_basis.hpp \ + slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp \ + slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp +slmmir_time_int_exp.o: slmmir_time_int_exp.cpp slmmir_time_int.hpp \ + slmm_time_int.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_util.hpp slmm_gallery.hpp slmmir_mesh.hpp slmm_basis.hpp \ + slmmir_p_refine.hpp slmmir_mono_data.hpp slmm_gll.hpp \ + slmmir_remap_data.hpp slmm_nla.hpp slmmir.hpp slmm_spf.hpp +slmmir_util.o: slmmir_util.cpp slmmir_util.hpp slmm_basis.hpp \ + slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmmir_mesh.hpp slmmir.hpp \ + slmm_util.hpp slmm_spf.hpp slmm_gll.hpp +slmm_islet.o: slmm_islet.cpp slmm_islet.hpp slmm_gll.hpp slmm_basis.hpp \ + slmm_defs.hpp ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp +slmm_islet_string.o: slmm_islet_string.cpp slmm_islet.hpp slmm_gll.hpp \ + slmm_basis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_util.hpp +slmm_mesh.o: slmm_mesh.cpp slmm_mesh.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp slmm_gll.hpp \ + slmm_basis.hpp slmm_util.hpp slmm_islet.hpp +slmm_nla.o: slmm_nla.cpp slmm_util.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_nla.hpp +slmm_spf.o: slmm_spf.cpp slmm_gll.hpp slmm_basis.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_spf.hpp slmm_nla.hpp \ + slmm_util.hpp slmm_mesh.hpp slmm_array_tree.hpp slmm_accum.hpp +slmm_spf_lqlt.o: slmm_spf_lqlt.cpp slmm_spf.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_array_tree.hpp +slmm_test.o: slmm_test.cpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_mesh.hpp slmm_array_tree.hpp slmm_io.hpp slmm_nla.hpp slmm_util.hpp \ + slmm_spf.hpp slmm_time_int.hpp slmm_gallery.hpp slmm_debug.hpp \ + slmm_fit_extremum.hpp slmm_gll.hpp slmm_basis.hpp slmm_islet.hpp \ + slmm_basis_reduced.hpp +slmm_time_int.o: slmm_time_int.cpp slmm_time_int.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp slmm_util.hpp +slmm_util.o: slmm_util.cpp slmm_util.hpp slmm_defs.hpp \ + ../../siqk/siqk.hpp ../../siqk/siqk_geometry.hpp \ + ../../siqk/siqk_defs.hpp ../../siqk/siqk_quadrature.hpp \ + ../../siqk/siqk_search.hpp ../../siqk/siqk_intersect.hpp \ + ../../siqk/siqk_sqr.hpp slmm_array.hpp +slmm_vis.o: slmm_vis.cpp slmm_vis.hpp slmm_defs.hpp ../../siqk/siqk.hpp \ + ../../siqk/siqk_geometry.hpp ../../siqk/siqk_defs.hpp \ + ../../siqk/siqk_quadrature.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_intersect.hpp ../../siqk/siqk_sqr.hpp slmm_array.hpp \ + slmm_io.hpp slmm_util.hpp ../../siqk/siqk_search.hpp \ + ../../siqk/siqk_geometry.hpp diff --git a/methods/slmm/slmm_basis.cpp b/methods/slmm/slmm_basis.cpp index 5ba6ed9..dca8142 100644 --- a/methods/slmm/slmm_basis.cpp +++ b/methods/slmm/slmm_basis.cpp @@ -108,7 +108,7 @@ Int Basis::compute_and_print_weights (const Basis& basis, bool print_x, bool tes const Real* xnode; for (Int np = 2; np <= Basis::np_max; ++np) { // Basis need not support every np. - if ( ! basis.get_x(np, xnode)) continue; + if ( ! basis.get_w(np, xnode)) continue; Real integral[np_max] = {0}; compute_weights(basis, np, integral); if ( ! test) { diff --git a/methods/slmm/slmm_islet.cpp b/methods/slmm/slmm_islet.cpp index c24a10f..6584731 100644 --- a/methods/slmm/slmm_islet.cpp +++ b/methods/slmm/slmm_islet.cpp @@ -28,55 +28,6 @@ Int GllOffsetNodal::max_degree (const Int& np) const { return degrees[np]; } -static Real normalize_x (const Real* gll_x, const Real& x) { - const Real x0 = gll_x[1]; - return (x - x0) / (1 - x0); -} - -static void outer_eval (const Real* gll_x, const Real& x, Real v[4]) { - const Real - xbar = normalize_x(gll_x, gll_x[2]), - ooxbar = 1 / xbar, - ybar = 1 / (xbar - 1), - xn = normalize_x(gll_x, x); - v[0] = 0; - v[1] = 1 + ybar*xn*((1 - ooxbar)*xn + ooxbar - xbar); - v[2] = ybar*ooxbar*xn*(xn - 1); - v[3] = ybar*xn*(xbar - xn); -} - -#if 0 -static bool np4_subgrid_eval (const Real* const x_gll, const Real& x, - Real y[4]) { - static constexpr Real - alpha = 0.5527864045000416708, - v = 0.427*(1 + alpha), - x2 = 0.4472135954999579277, // 1/sqrt(5) - x3 = 1 - x2, - det = x2*x3*(x2 - x3), - y2 = alpha, - y3 = v, - c1 = (x3*y2 - x2*y3)/det, - c2 = (-x3*x3*y2 + x2*x2*y3)/det; - if (x < x_gll[1] || x > x_gll[2]) { - Real y4[4]; - GLL::eval_lagrange_poly(4, x_gll, x, y4); - if (x < x_gll[1]) { - outer_eval(x_gll, -x, y); - std::swap(y[0], y[3]); - std::swap(y[1], y[2]); - } else - outer_eval(x_gll, x, y); - const Real x0 = 1 - std::abs(x); - const Real a = (c1*x0 + c2)*x0; - for (int i = 0; i < 4; ++i) - y[i] = a*y[i] + (1 - a)*y4[i]; - } - else - GLL::eval_lagrange_poly(4, x_gll, x, y); - return true; -} -#else static bool np4_subgrid_eval (const Real* const x_gll, const Real& x, Real y[4]) { static const Real c1 = 0.306; @@ -94,7 +45,6 @@ static bool np4_subgrid_eval (const Real* const x_gll, const Real& x, GLL::eval_lagrange_poly(4, x_gll, x, y); return true; } -#endif bool GllOffsetNodal::eval (const Int& np, const Real& x, Real* const v) const { const Real* xnode; @@ -254,13 +204,13 @@ ::eval (const Int& np, const Real& x, Real* const v) const { case 4: return evalon< 4,2>(xnode, {3,4 }, {0,0 }, x, v); case 5: return evalon< 5,2>(xnode, {3,4 }, {0,0 }, x, v); case 6: return evalon< 6,3>(xnode, {3,4,6 }, {0,0,0 }, x, v); - case 7: return evalon< 7,3>(xnode, {3,4,6 }, {0,0,0 }, x, v); + case 7: return evalon< 7,3>(xnode, {3,4,4 }, {0,0,1 }, x, v); case 8: return evalon< 8,4>(xnode, {4,4,4,4 }, {0,0,1,2 }, x, v); case 9: return evalon< 9,4>(xnode, {4,4,4,4 }, {0,0,1,2 }, x, v); - case 10: return evalon<10,5>(xnode, {4,5,4,4,4 }, {0,0,1,2,3 }, x, v); - case 11: return evalon<11,5>(xnode, {4,5,4,4,4 }, {0,0,1,2,3 }, x, v); - case 12: return evalon<12,6>(xnode, {4,5,5,4,4,4}, {0,0,1,2,3,4}, x, v); - case 13: return evalon<13,6>(xnode, {4,5,6,4,4,4}, {0,0,0,2,3,4}, x, v); + case 10: return evalon<10,5>(xnode, {4,4,4,4,4 }, {0,0,1,2,3 }, x, v); + case 11: return evalon<11,5>(xnode, {4,4,4,4,4 }, {0,0,1,2,3 }, x, v); + case 12: return evalon<12,6>(xnode, {4,4,4,4,4,4}, {0,0,1,2,3,4}, x, v); + case 13: return evalon<13,6>(xnode, {4,4,4,4,4,4}, {0,0,1,2,3,4}, x, v); } return false; } diff --git a/methods/slmm/slmm_runtests.py b/methods/slmm/slmm_runtests.py new file mode 100755 index 0000000..e210180 --- /dev/null +++ b/methods/slmm/slmm_runtests.py @@ -0,0 +1,292 @@ +#!/usr/bin/python + +import os, sys, re, optparse + +def readall (fn): + # Shorthand for reading in all the text in a file. + try: + with open(fn, 'r') as f: + text = f.read() + except: + text = '' + return text + +def writeall (text, fn, for_real): + if for_real: + with open(fn, 'w') as f: + f.write(text) + +def parse_one_liner (text): + class struct: + pass + hits = re.findall('
    .*', text) + hits = re.findall('l2 (?P[^ ]*) .* cv re (?P[^ ]*)' + + '.* cvgll re (?P[^ ]*)' + + '.* mo min [0-9.e\-+]+ (?P[^ ]*) .* ' + + 'max [0-9.e\-+]+ (?P[^ ]*)', hits[0]) + o = struct + o.l2 = float(hits[0][0]) + o.cv = float(hits[0][1]) + o.cv_gll = float(hits[0][2]) + o.mo_min = float(hits[0][3]) + o.mo_max = float(hits[0][4]) + return o + +def runtest (cmd): + outfn = 'runtests.tmp' + os.system(cmd + ' > ' + outfn + ' 2>&1') + return readall(outfn) + +long_output = False + +def print_test (cmd): + print_test.ctr += 1 + ll = 87; + if long_output: + ll = 240 + if len(cmd) > ll: + cmd = cmd[len(cmd)-ll+1:] + fmt = '{{0:.<{0:d}s}}'.format(ll+1) + print '{:3d} '.format(print_test.ctr) + fmt.format(cmd + ' '), + sys.stdout.flush() +print_test.ctr = 0; + +def print_result (passed): + if not passed: + print '***FAILED' + return 1 + else: + print ' PASSED' + return 0 + +def check_passed (cmd): + print_test(cmd) + out = runtest(cmd) + hits = re.findall('PASSED', out) + passed = len(hits) > 0 + return print_result(passed) + +def check_errs (cmd, l2_err, cv=10, cv_gll=10, min=-float('Inf'), max=float('Inf'), + l2_err_is_0=False): + print_test(cmd) + out = runtest(cmd) + o = parse_one_liner(out) + passed = ((o.l2 > 0 or l2_err_is_0) and o.l2 <= l2_err + and o.cv <= cv + and o.cv_gll <= cv_gll + and o.mo_min >= min and o.mo_max <= max) + result = print_result(passed) + if not passed: + print ' ' + cmd + print ((' l2 {:1.2e} cv {:1.2e} cv_gll {:1.2e} mo_min {:1.2e} mo_max {:1.2e}' + + ' but l2_err {:1.2e} cv {:1.2e} cv_gll {:1.2e} min {:1.2e} max {:1.2e}'). + format(o.l2, o.cv, o.cv_gll, o.mo_min, o.mo_max, + l2_err, cv, cv_gll, min, max)) + return result + +p = optparse.OptionParser() +p.add_option('-l', '--long', dest='long', action='store_true', default=False, + help='Long-line output.') +opts, args = p.parse_args() +long_output = opts.long + +try: os.mkdir('tmp') +except: pass + +nerr = 0 +# Unit tests. +nerr += check_passed('./slmm_test -q -c test_make_cubedsphere') +nerr += check_passed('./slmm_test -q -c test_gll') +nerr += check_passed('./slmm_test -q -c test_gll_2d') +nerr += check_passed('./slmm_test -q -c test_time_int') +nerr += check_passed('./slmm_test -q -c test_make_gll_mesh') +nerr += check_passed('./slmm_test -q -c test_make_gll_subcell_mesh') +nerr += check_passed('./slmm_test -q -c test_qp_limiter') +nerr += check_passed('./slmm_test -q -c test_face_tree') +nerr += check_passed('./slmm_test -q -c test_spf') +nerr += check_passed('./slmm_test -q -c test_nla') +nerr += check_passed('./slmm_test -q -c test_mass_matrix') +#nerr += check_passed('./slmm_test -q -c test_fit_extremum') + +# Test classical semi-Lagrangian with global filters QLT, CAAS, min-norm2. +base = ('./slmmir -method {method:s} -ode divergent -ic slottedcylinders ' + + '-ic cosinebells -ic gaussianhills -we 0 -np {np:d} -dmc f -mono {mono:s} ' + + '-nsteps 12 -ne {ne:d}') +nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='qlt'), + 3.34e-1, cv_gll=5e-14, min=0.1, max=1) # rho is also done with CSL +nerr += check_errs(base.format(method='pcsl', np=6, ne=6, mono='qlt'), + 3.34e-1, cv_gll=5e-14, min=0.1, max=1) # rho is also done with CSL +nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='qlt'), + 3.47e-1, cv_gll=5e-14, min=0.1, max=1) # rho is remapped +nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='qlt-pve'), + 3.36e-1, cv_gll=5e-14, min=0, max=2) # >= 0 constraint only +nerr += check_errs(base.format(method='pcsl', np=4, ne=10, mono='caas'), + 3.47e-1, cv_gll=5e-14, min=0.1, max=1) # rho is also done with CSL +nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='caas'), + 3.47e-1, cv_gll=5e-14, min=0.1, max=1) # rho is remapped +nerr += check_errs(base.format(method='csl', np=4, ne=10, mono='mn2'), + 3.47e-1, cv_gll=5e-14, min=0.1, max=1) # rho is remapped +# Tracer consistency test. Apply CSL to constant q but remap rho. +nerr += check_errs('./slmmir -method csl -ode divergent -ic constant -we 0 -np 4 ' + + '-dmc f -mono qlt -rit -nsteps 12 -ne 10', + 3e-15, cv_gll=1e-13, min=0.42, max=0.42, l2_err_is_0=True) + +# Test ISL with p-refinement. +base = ('./slmmir -method pcsl -ode divergent -ic gaussianhills ' + + '-we 0 -np {np:d} -dmc f -mono {mono:s} ' + + '-nsteps 12 -ne {ne:d} -timeint {timeint:s}') +nerr += check_errs(base.format(np=12, ne=3, mono='none', timeint='interp'), + 9.939e-3) +nerr += check_errs(base.format(np=12, ne=3, mono='none', timeint='exact'), + 8.793e-3) +base = ('./slmmir -method pcsl -ode divergent -ic slottedcylinders ' + '-we 0 -np {np:d} -dmc f -mono {mono:s} ' + + '-nsteps 12 -ne {ne:d} -timeint interp') +nerr += check_errs(base.format(np=12, ne=3, mono='caas', timeint='interp'), + 2.896e-1, cv_gll=5e-14, min=0.1, max=1) + +# ISL with p-refinement and separate t and v meshes. +base = ('./slmmir -method pcsl -ode divergent -ic gaussianhills ' + + '-we 0 -rit -dmc {dmc:s} -mono {mono:s} -lim {lim:s} -nsteps 13 -T 12 ' + + '-ne 6 -np 8 -timeint interp -prefine {prefine:d} -d2c') +nerr += check_errs(base.format(prefine=0, dmc='es', mono='caas', lim='caas'), 5.968e-03, cv=2e-14) +nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas', lim='caas'), 5.885e-03, cv=4e-14) +nerr += check_errs(base.format(prefine=0, dmc='eh', mono='caas', lim='caas'), 5.968e-03, cv_gll=2e-14) +nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas', lim='caas'), 5.886e-03, cv_gll=2e-14) +# new global-only method +nerr += check_errs(base.format(prefine=0, dmc='es', mono='caas-node', lim='caas'), 5.968e-03, cv=2e-14) +nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas-node', lim='caas'), 5.885e-03, cv=4e-14) +nerr += check_errs(base.format(prefine=0, dmc='eh', mono='caas-node', lim='caas'), 5.968e-03, cv_gll=2e-14) +nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas-node', lim='caas'), 5.886e-03, cv_gll=2e-14) +# don't break the no prop preserve case +nerr += check_errs(base.format(prefine=5, dmc='es', mono='none', lim='none'), 4.2e-03) +# GllOffsetNodal +base += ' -basis GllOffsetNodal' +nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas', lim='caas'), 5.885e-03, cv=4e-14) +nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas', lim='caas'), 5.886e-03, cv_gll=2e-14) +nerr += check_errs(base.format(prefine=5, dmc='es', mono='caas-node', lim='caas'), 5.885e-03, cv=4e-14) +nerr += check_errs(base.format(prefine=5, dmc='eh', mono='caas-node', lim='caas'), 5.886e-03, cv_gll=2e-14) + +base = './slmmir -nsteps 12 -ne 10 -we 0 -ode divergent -ic gaussianhills ' + +# DSS for QOF rho, CSL tracer, with QLT. +nerr += check_errs(base + '-np 3 -d2c -method csl -dmc f -mono qlt', + 9.05e-2, cv_gll=2e-14) + +# Cell-integrated method basics. +nerr += check_errs(base + '-np 3', 2.43e-2, 1e-14) +nerr += check_errs(base + '-np 3 -xyz -mono qlt', + 3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01) +nerr += check_errs(base + '-np 3 -xyz -mono caas', + 3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01) +nerr += check_errs(base + '-np 3 -xyz -mono mn2', + 3.18e-2, 4e-15, min=1.495e-08, max=9.518e-01) +nerr += check_errs(base + '-np 3 -xyz -d2c', 3.64e-2, 3e-15) +nerr += check_errs(base + '-np 4 -xyz -d2c', 1.02e-2, 8e-15) +nerr += check_errs(base + '-np 4 -xyz -d2c -method cdg', 1.02e-2, 3e-15) + +# Limiter. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 4 -mono qlt -method ir', + 3.0e-1, cv=3e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 4 -mono qlt -method ir -lim caas', + 3.0e-1, cv=3e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 4 -mono qlt -method cdg', + 3.03e-1, cv=3e-14, min=0.1, max=1.0) +# Multiple tracers. +nerr += check_errs(base + '-np 4 -ic correlatedcosinebells 2', 1.02e-2, 2e-7) +# Local DMC with internal mass definition. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic gaussianhills -np 4 -dmc es -method ir', + 9.1e-3, cv=2e-13) +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic gaussianhills -np 4 -dmc es -method cdg', + 9.1e-3, cv=2e-13) +# Local DMC with Homme mass definition. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic gaussianhills -np 4 -dmc eh', + 9.1e-3, cv_gll=4e-15) +# Global (weaker than local) DMC with Homme mass definition. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic gaussianhills -np 4 -dmc geh', + 9.1e-3, cv_gll=2e-14) +# Local DMC, limiter, internal mass def. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc es', + 3.1e-1, cv=2.3e-13, min=0.1, max=1.0) +# Local DMC, limiter, Homme mass def. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc eh', + 3e-1, cv_gll=5e-14, min=0.1, max=1.0) +# Local DMC, facet transport. +nerr += check_errs(base + '-np 4 -dmc f', 1.42e-2, cv_gll=6e-14) +nerr += check_errs('./slmmir -nsteps 12 -ne 30 -we 0 -ode divergent ' + + '-ic gaussianhills -np 2 -dmc f', + 6.49e-2, cv_gll=1.4e-13) +# With limiter. +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc f', + 4.6e-1, cv_gll=4e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc f -lim caas', + 4.6e-1, cv_gll=4e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc f -lim caags', + 4.6e-1, cv_gll=4e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method ir ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc f', + 4.6e-1, cv_gll=4e-14, min=0.1, max=1.0) +# Add an equality constraint to nail DMC even more. In addition, output scalar +# measurements by time step (this is just a test that it runs). +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method cdg ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc ef -o rittest -rit', + 4.6e-1, cv_gll=2e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -method ir ' + + '-ic slottedcylinders -np 4 -mono qlt -dmc ef -o rittest -rit', + 4.6e-1, cv_gll=2e-14, min=0.1, max=1.0) +nerr += check_errs('./slmmir -nsteps 96 -ne 15 -we 0 -ode divergent ' + + '-ic slottedcylinders -np 2 -mono qlt -dmc ef -o rittest -rit', + 4.5e-1, cv_gll=2.2e-14, min=0.1, max=1.0) +# Test the more complicated mono method. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -we 0 -ode divergent ' + + '-ic gaussianhills -ic slottedcylinders -np 4 -mono qlt -dmc f', + 1.5e-2, cv_gll=8e-14, min=0, max=0.957) +# 3-1 subcell mesh, with new vertices at GLL points. +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -tq 4 ' + + '-ic slottedcylinders -np 4 -mesh gllsubcell -mono qlt -dmc ef', + 4.6e-1, cv_gll=2e-14, min=0.1, max=1.0) +# 3-1 subcell mesh, with new vertices at non-GLL points. +nerr += check_errs('./slmmir -nsteps 96 -ne 5 -we 0 -ode divergent -tq 4 ' + + '-ic slottedcylinders -np 4 -mesh runisubcell -mono qlt -dmc ef', + 4.5e-1, cv_gll=2e-14, min=0.1, max=1.0) +# Same, but now looking for accuracy difference. +nerr += check_errs('./slmmir -nsteps 12 -ne 5 -we 0 -ode divergent -tq 4 ' + + '-ic gaussianhills -np 4 -mesh gllsubcell -mono qlt -dmc ef', + 7.40e-2, cv_gll=9e-15, min=0, max=0.96) +nerr += check_errs('./slmmir -nsteps 12 -ne 5 -we 0 -ode divergent -tq 4 ' + + '-ic gaussianhills -np 4 -mesh runisubcell -mono qlt -dmc ef', + 5.41e-2, cv_gll=5e-15, min=0, max=0.96) +# We can subdivide cells arbitrarily with runisubcell. +nerr += check_errs('./slmmir -nsteps 12 -ne 2 -we 0 -ode divergent -tq 4 ' + + '-ic gaussianhills -np 10 -mesh runisubcell -mono qlt -dmc ef', + 3.5e-2, cv_gll=3e-15, min=0, max=0.96) +# Tracer-decoupled CMBC tests. +base = ('./slmmir -nsteps 12 -ne 10 -np 4 -ode divergent ' + + '-ic gaussianhills -ic slottedcylinders -ic cosinebells ' + + '-ic correlatedcosinebells -ic xyztrig -dmc {0:s} -mono {mono:s} -we 0') +# This method also is intended to handle tracer consistency, but I haven't put +# together a test for that yet. So test just CMBC. +nerr += check_errs(base.format('f', mono='qlt'), 1.45e-2, cv_gll=6e-14, min=1.495e-8, max=0.956) +nerr += check_errs(base.format('es', mono='qlt'), 9.18e-3, cv=2e-13, min=1.495e-8, max=0.956) +nerr += check_errs(base.format('eh', mono='qlt'), 9.18e-3, cv_gll=1e-14, min=1.495e-8, max=0.956) +# Test that if rho is perturbed, a constant q stays a constant. +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -np 4 -ode nondivergent ' + + '-ic constant -dmc ef -mono qlt -we 0 --perturb-rho 0.05', + 1e-14, cv_gll=5e-14, min=0.42, max=0.42, l2_err_is_0=True) +nerr += check_errs('./slmmir -nsteps 12 -ne 10 -np 4 -ode divergent ' + + '-ic constant -dmc ef -mono qlt -we 0 --perturb-rho 0.05', + 1e-14, cv_gll=5e-14, min=0.42, max=0.42, l2_err_is_0=True) + +print '{0:d} tests failed'.format(nerr) diff --git a/methods/slmm/slmmir_remapper_isl.cpp b/methods/slmm/slmmir_remapper_isl.cpp index ce1daeb..cf93492 100644 --- a/methods/slmm/slmmir_remapper_isl.cpp +++ b/methods/slmm/slmmir_remapper_isl.cpp @@ -1167,8 +1167,12 @@ interp (const Mesh& m, const C2DRelations& c2d, const AVec3s& advected_p, const Real ta = gll_x[ni % np], tb = gll_x[ni / np]; Real Jd; if (md_) { + // This is the case of interest and runs when property preservation is + // on. For the case of p-refinement, it runs with np=npv=4, not npt. Jd = calc_isoparametric_jacobian(advected_p, cell, np, ta, tb); } else { + // This happens only when property preservation is off, in which case + // density doesn't couple to the mixing ratios. const Int corners[] = {cell[0], cell[np-1], cell[np*np-1], cell[np*(np-1)]}; Jd = calc_jacobian(advected_p, corners, ta, tb); @@ -1640,7 +1644,7 @@ ::csl (const AVec3s& advected_p, Real* const src_tracer, tgt_rho_impl, tgt_tracer_impl, ntracers, positive_only, false /* don't apply cdr to rho */, cdr_method); } - // In the following two cases, continuity does not hold nore need hold on + // In the following two cases, continuity does not hold nor need hold on // any of the grids. // Map q on tgrid to q on vgrid. isl_impl_->transfer_q_to_v_mesh(run_cdr, ntracers,