From c230d93ab67556b42f255526dd016a87eabdc3ad Mon Sep 17 00:00:00 2001 From: GitHub Actions Bot <> Date: Tue, 15 Oct 2024 09:05:51 +0000 Subject: [PATCH] References for 'https://github.com/BlueBrain/nmodl/pull/1517'. --- nonlinear/coreneuron/art_nonlin.cpp | 704 +++++++++++++++++++++++++ nonlinear/coreneuron/nonlin.cpp | 697 +++++++++++++++++++++++++ nonlinear/neuron/art_nonlin.cpp | 761 +++++++++++++++++++++++++++ nonlinear/neuron/nonlin.cpp | 768 ++++++++++++++++++++++++++++ 4 files changed, 2930 insertions(+) create mode 100644 nonlinear/coreneuron/art_nonlin.cpp create mode 100644 nonlinear/coreneuron/nonlin.cpp create mode 100644 nonlinear/neuron/art_nonlin.cpp create mode 100644 nonlinear/neuron/nonlin.cpp diff --git a/nonlinear/coreneuron/art_nonlin.cpp b/nonlinear/coreneuron/art_nonlin.cpp new file mode 100644 index 00000000..afd20be5 --- /dev/null +++ b/nonlinear/coreneuron/art_nonlin.cpp @@ -0,0 +1,704 @@ +/********************************************************* +Model Name : art_nonlin +Filename : art_nonlin.mod +NMODL Version : 7.7.0 +Vectorized : false +Threadsafe : true +Created : DATE +Simulator : CoreNEURON +Backend : C++ (api-compatibility) +NMODL Compiler : VERSION +*********************************************************/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * \dir + * \brief Solver for a system of linear equations : Crout matrix decomposition + * + * \file + * \brief Implementation of Crout matrix decomposition (LU decomposition) followed by + * Forward/Backward substitution: Implementation details : (Legacy code) nrn / scopmath / crout.c + */ + +#include +#include + +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +#include "coreneuron/utils/offload.hpp" +#endif + +namespace nmodl { +namespace crout { + +/** + * \brief Crout matrix decomposition : in-place LU Decomposition of matrix a. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: 0 if no error; -1 if matrix is singular or ill-conditioned + */ +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int Crout(int n, T* const a, int* const perm, double* const rowmax) { + // roundoff is the minimal value for a pivot element without its being considered too close to + // zero + double roundoff = 1.e-20; + int i, j, k, r, pivot, irow, save_i = 0, krow; + T sum, equil_1, equil_2; + + /* Initialize permutation and rowmax vectors */ + + for (i = 0; i < n; i++) { + perm[i] = i; + k = 0; + for (j = 1; j < n; j++) { + if (std::fabs(a[i * n + j]) > std::fabs(a[i * n + k])) { + k = j; + } + } + rowmax[i] = a[i * n + k]; + } + + /* Loop over rows and columns r */ + + for (r = 0; r < n; r++) { + /* + * Operate on rth column. This produces the lower triangular matrix + * of terms needed to transform the constant vector. + */ + + for (i = r; i < n; i++) { + sum = 0.0; + irow = perm[i]; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[irow * n + k] * a[krow * n + r]; + } + a[irow * n + r] -= sum; + } + + /* Find row containing the pivot in the rth column */ + + pivot = perm[r]; + equil_1 = std::fabs(a[pivot * n + r] / rowmax[pivot]); + for (i = r + 1; i < n; i++) { + irow = perm[i]; + equil_2 = std::fabs(a[irow * n + r] / rowmax[irow]); + if (equil_2 > equil_1) { + /* make irow the new pivot row */ + + pivot = irow; + save_i = i; + equil_1 = equil_2; + } + } + + /* Interchange entries in permutation vector if necessary */ + + if (pivot != perm[r]) { + perm[save_i] = perm[r]; + perm[r] = pivot; + } + + /* Check that pivot element is not too small */ + + if (std::fabs(a[pivot * n + r]) < roundoff) { + return -1; + } + + /* + * Operate on row in rth position. This produces the upper + * triangular matrix whose diagonal elements are assumed to be unity. + * This matrix is used in the back substitution algorithm. + */ + + for (j = r + 1; j < n; j++) { + sum = 0.0; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[pivot * n + k] * a[krow * n + j]; + } + a[pivot * n + j] = (a[pivot * n + j] - sum) / a[pivot * n + r]; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +/** + * \brief Crout matrix decomposition : Forward/Backward substitution. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: no return variable + */ +#define y_(arg) p[y[arg]] +#define b_(arg) b[arg] +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int solveCrout(int n, + T const* const a, + T const* const b, + T* const p, + int const* const perm, + int const* const y = nullptr) { + int i, j, pivot; + T sum; + + /* Perform forward substitution with pivoting */ + if (y) { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) -= sum; + } + } else { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] -= sum; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +#undef y_ +#undef b_ + +} // namespace crout +} // namespace nmodl + +/** + * \dir + * \brief Newton solver implementations + * + * \file + * \brief Implementation of Newton method for solving system of non-linear equations + */ + +#include +#include + +namespace nmodl { +/// newton solver implementations +namespace newton { + +/** + * @defgroup solver Solver Implementation + * @brief Solver implementation details + * + * Implementation of Newton method for solving system of non-linear equations using Eigen + * - newton::newton_solver with user, e.g. SymPy, provided Jacobian + * + * @{ + */ + +static constexpr int MAX_ITER = 50; +static constexpr double EPS = 1e-13; + +template +EIGEN_DEVICE_FUNC bool is_converged(const Eigen::Matrix& X, + const Eigen::Matrix& J, + const Eigen::Matrix& F, + double eps) { + bool converged = true; + double square_eps = eps * eps; + for (Eigen::Index i = 0; i < N; ++i) { + double square_error = 0.0; + for (Eigen::Index j = 0; j < N; ++j) { + double JX = J(i, j) * X(j); + square_error += JX * JX; + } + + if (F(i) * F(i) > square_eps * square_error) { + converged = false; +// The NVHPC is buggy and wont allow us to short-circuit. +#ifndef __NVCOMPILER + return converged; +#endif + } + } + return converged; +} + +/** + * \brief Newton method with user-provided Jacobian + * + * Newton method with user-provided Jacobian: given initial vector X and a + * functor that calculates `F(X)`, `J(X)` where `J(X)` is the Jacobian of `F(X)`, + * solves for \f$F(X) = 0\f$, starting with initial value of `X` by iterating: + * + * \f[ + * X_{n+1} = X_n - J(X_n)^{-1} F(X_n) + * \f] + * when \f$|F|^2 < eps^2\f$, solution has converged. + * + * @return number of iterations (-1 if failed to converge) + */ +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + // If finite differences are needed, this is stores the stepwidth. + Eigen::Matrix dX; + // Vector to store result of function F(X): + Eigen::Matrix F; + // Matrix to store Jacobian of F(X): + Eigen::Matrix J; + // Solver iteration count: + int iter = -1; + while (++iter < max_iter) { + // calculate F, J from X using user-supplied functor + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // In Eigen the default storage order is ColMajor. + // Crout's implementation requires matrices stored in RowMajor order (C-style arrays). + // Therefore, the transposeInPlace is critical such that the data() method to give the rows + // instead of the columns. + if (!J.IsRowMajor) { + J.transposeInPlace(); + } + Eigen::Matrix pivot; + Eigen::Matrix rowmax; + // Check if J is singular + if (nmodl::crout::Crout(N, J.data(), pivot.data(), rowmax.data()) < 0) { + return -1; + } + Eigen::Matrix X_solve; + nmodl::crout::solveCrout(N, J.data(), F.data(), X_solve.data(), pivot.data()); + X -= X_solve; + } + // If we fail to converge after max_iter iterations, return -1 + return -1; +} + +/** + * Newton method template specializations for \f$N <= 4\f$ Use explicit inverse + * of `F` instead of LU decomposition. This is faster, as there is no pivoting + * and therefore no branches, but it is not numerically safe for \f$N > 4\f$. + */ + +template +EIGEN_DEVICE_FUNC int newton_solver_small_N(Eigen::Matrix& X, + FUNC functor, + double eps, + int max_iter) { + bool invertible; + Eigen::Matrix F; + Eigen::Matrix dX; + Eigen::Matrix J, J_inv; + int iter = -1; + while (++iter < max_iter) { + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // The inverse can be called from within OpenACC regions without any issue, as opposed to + // Eigen::PartialPivLU. + J.computeInverseWithCheck(J_inv, invertible); + if (invertible) { + X -= J_inv * F; + } else { + return -1; + } + } + return -1; +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +/** @} */ // end of solver + +} // namespace newton +} // namespace nmodl + + + +namespace coreneuron { + #ifndef NRN_PRCELLSTATE + #define NRN_PRCELLSTATE 0 + #endif + + + /** channel information */ + static const char *mechanism_info[] = { + "7.7.0", + "art_nonlin", + 0, + 0, + "x", + 0, + 0 + }; + + + /** all global variables */ + struct art_nonlin_Store { + int point_type{}; + double x0{}; + int reset{}; + int mech_type{}; + }; + static_assert(std::is_trivially_copy_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_copy_assignable_v); + static_assert(std::is_trivially_move_assignable_v); + static_assert(std::is_trivially_destructible_v); + art_nonlin_Store art_nonlin_global; + + + /** all mechanism instance variables and global variables */ + struct art_nonlin_Instance { + double* x{}; + double* Dx{}; + const double* node_area{}; + void** point_process{}; + art_nonlin_Store* global{&art_nonlin_global}; + }; + + + /** connect global (scalar) variables to hoc -- */ + static DoubScal hoc_scalar_double[] = { + {nullptr, nullptr} + }; + + + /** connect global (array) variables to hoc -- */ + static DoubVec hoc_vector_double[] = { + {nullptr, nullptr, 0} + }; + + + static inline int first_pointer_var_index() { + return -1; + } + + + static inline int first_random_var_index() { + return -1; + } + + + static inline int float_variables_size() { + return 2; + } + + + static inline int int_variables_size() { + return 2; + } + + + static inline int get_mech_type() { + return art_nonlin_global.mech_type; + } + + + static inline Memb_list* get_memb_list(NrnThread* nt) { + if (!nt->_ml_list) { + return nullptr; + } + return nt->_ml_list[get_mech_type()]; + } + + + static inline void* mem_alloc(size_t num, size_t size, size_t alignment = 64) { + size_t aligned_size = ((num*size + alignment - 1) / alignment) * alignment; + void* ptr = aligned_alloc(alignment, aligned_size); + memset(ptr, 0, aligned_size); + return ptr; + } + + + static inline void mem_free(void* ptr) { + free(ptr); + } + + + static inline void coreneuron_abort() { + abort(); + } + + // Allocate instance structure + static void nrn_private_constructor_art_nonlin(NrnThread* nt, Memb_list* ml, int type) { + assert(!ml->instance); + assert(!ml->global_variables); + assert(ml->global_variables_size == 0); + auto* const inst = new art_nonlin_Instance{}; + assert(inst->global == &art_nonlin_global); + ml->instance = inst; + ml->global_variables = inst->global; + ml->global_variables_size = sizeof(art_nonlin_Store); + } + + // Deallocate the instance structure + static void nrn_private_destructor_art_nonlin(NrnThread* nt, Memb_list* ml, int type) { + auto* const inst = static_cast(ml->instance); + assert(inst); + assert(inst->global); + assert(inst->global == &art_nonlin_global); + assert(inst->global == ml->global_variables); + assert(ml->global_variables_size == sizeof(art_nonlin_Store)); + delete inst; + ml->instance = nullptr; + ml->global_variables = nullptr; + ml->global_variables_size = 0; + } + + /** initialize mechanism instance variables */ + static inline void setup_instance(NrnThread* nt, Memb_list* ml) { + auto* const inst = static_cast(ml->instance); + assert(inst); + assert(inst->global); + assert(inst->global == &art_nonlin_global); + assert(inst->global == ml->global_variables); + assert(ml->global_variables_size == sizeof(art_nonlin_Store)); + int pnodecount = ml->_nodecount_padded; + Datum* indexes = ml->pdata; + inst->x = ml->data+0*pnodecount; + inst->Dx = ml->data+1*pnodecount; + inst->node_area = nt->_data; + inst->point_process = nt->_vdata; + } + + + + static void nrn_alloc_art_nonlin(double* data, Datum* indexes, int type) { + // do nothing + } + + + void nrn_constructor_art_nonlin(NrnThread* nt, Memb_list* ml, int type) { + #ifndef CORENEURON_BUILD + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + auto* const inst = static_cast(ml->instance); + + #endif + } + + + void nrn_destructor_art_nonlin(NrnThread* nt, Memb_list* ml, int type) { + #ifndef CORENEURON_BUILD + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + auto* const inst = static_cast(ml->instance); + + #endif + } + + + inline double solve_art_nonlin(int id, int pnodecount, art_nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v); + inline double residual_art_nonlin(int id, int pnodecount, art_nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v, double _lx); + + + struct functor_art_nonlin_0 { + NrnThread* nt; + art_nonlin_Instance* inst; + int id; + int pnodecount; + double v; + const Datum* indexes; + double* data; + ThreadDatum* thread; + + void initialize() { + } + + functor_art_nonlin_0(NrnThread* nt, art_nonlin_Instance* inst, int id, int pnodecount, double v, const Datum* indexes, double* data, ThreadDatum* thread) + : nt(nt), inst(inst), id(id), pnodecount(pnodecount), v(v), indexes(indexes), data(data), thread(thread) + {} + void operator()(const Eigen::Matrix& nmodl_eigen_xm, Eigen::Matrix& nmodl_eigen_dxm, Eigen::Matrix& nmodl_eigen_fm, Eigen::Matrix& nmodl_eigen_jm) const { + const double* nmodl_eigen_x = nmodl_eigen_xm.data(); + double* nmodl_eigen_dx = nmodl_eigen_dxm.data(); + double* nmodl_eigen_j = nmodl_eigen_jm.data(); + double* nmodl_eigen_f = nmodl_eigen_fm.data(); + nmodl_eigen_dx[0] = std::max(1e-6, 0.02*std::fabs(nmodl_eigen_x[0])); + nmodl_eigen_f[static_cast(0)] = 4.0 - pow(nmodl_eigen_x[static_cast(0)], 2.0); + nmodl_eigen_j[static_cast(0)] = -2.0 * nmodl_eigen_x[static_cast(0)]; + } + + void finalize() { + } + }; + + + inline double solve_art_nonlin(int id, int pnodecount, art_nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v) { + double ret_solve = 0.0; + inst->x[id] = 1.0; + + Eigen::Matrix nmodl_eigen_xm; + double* nmodl_eigen_x = nmodl_eigen_xm.data(); + nmodl_eigen_x[static_cast(0)] = inst->x[id]; + // call newton solver + functor_art_nonlin_0 newton_functor(nt, inst, id, pnodecount, v, indexes, data, thread); + newton_functor.initialize(); + int newton_iterations = nmodl::newton::newton_solver(nmodl_eigen_xm, newton_functor); + if (newton_iterations < 0) assert(false && "Newton solver did not converge!"); + inst->x[id] = nmodl_eigen_x[static_cast(0)]; + newton_functor.initialize(); // TODO mimic calling F again. + newton_functor.finalize(); + + + ret_solve = inst->x[id]; + return ret_solve; + } + + + inline double residual_art_nonlin(int id, int pnodecount, art_nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v, double _lx) { + double ret_residual = 0.0; + ret_residual = _lx - 2.0; + return ret_residual; + } + + + /** initialize channel */ + void nrn_init_art_nonlin(NrnThread* nt, Memb_list* ml, int type) { + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + + setup_instance(nt, ml); + auto* const inst = static_cast(ml->instance); + + if (_nrn_skip_initmodel == 0) { + #pragma omp simd + #pragma ivdep + for (int id = 0; id < nodecount; id++) { + double v = 0.0; + inst->x[id] = inst->global->x0; + } + } + } + + + /** register channel with the simulator */ + void _art_nonlin_reg() { + + int mech_type = nrn_get_mechtype("art_nonlin"); + art_nonlin_global.mech_type = mech_type; + if (mech_type == -1) { + return; + } + + _nrn_layout_reg(mech_type, 0); + point_register_mech(mechanism_info, nrn_alloc_art_nonlin, nullptr, nullptr, nullptr, nrn_init_art_nonlin, nrn_private_constructor_art_nonlin, nrn_private_destructor_art_nonlin, first_pointer_var_index(), nullptr, nullptr, 0); + + hoc_register_prop_size(mech_type, float_variables_size(), int_variables_size()); + hoc_register_dparam_semantics(mech_type, 0, "area"); + hoc_register_dparam_semantics(mech_type, 1, "pntproc"); + add_nrn_artcell(mech_type, 0); + hoc_register_var(hoc_scalar_double, hoc_vector_double, NULL); + } +} diff --git a/nonlinear/coreneuron/nonlin.cpp b/nonlinear/coreneuron/nonlin.cpp new file mode 100644 index 00000000..775e3a61 --- /dev/null +++ b/nonlinear/coreneuron/nonlin.cpp @@ -0,0 +1,697 @@ +/********************************************************* +Model Name : nonlin +Filename : nonlin.mod +NMODL Version : 7.7.0 +Vectorized : false +Threadsafe : true +Created : DATE +Simulator : CoreNEURON +Backend : C++ (api-compatibility) +NMODL Compiler : VERSION +*********************************************************/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * \dir + * \brief Solver for a system of linear equations : Crout matrix decomposition + * + * \file + * \brief Implementation of Crout matrix decomposition (LU decomposition) followed by + * Forward/Backward substitution: Implementation details : (Legacy code) nrn / scopmath / crout.c + */ + +#include +#include + +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +#include "coreneuron/utils/offload.hpp" +#endif + +namespace nmodl { +namespace crout { + +/** + * \brief Crout matrix decomposition : in-place LU Decomposition of matrix a. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: 0 if no error; -1 if matrix is singular or ill-conditioned + */ +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int Crout(int n, T* const a, int* const perm, double* const rowmax) { + // roundoff is the minimal value for a pivot element without its being considered too close to + // zero + double roundoff = 1.e-20; + int i, j, k, r, pivot, irow, save_i = 0, krow; + T sum, equil_1, equil_2; + + /* Initialize permutation and rowmax vectors */ + + for (i = 0; i < n; i++) { + perm[i] = i; + k = 0; + for (j = 1; j < n; j++) { + if (std::fabs(a[i * n + j]) > std::fabs(a[i * n + k])) { + k = j; + } + } + rowmax[i] = a[i * n + k]; + } + + /* Loop over rows and columns r */ + + for (r = 0; r < n; r++) { + /* + * Operate on rth column. This produces the lower triangular matrix + * of terms needed to transform the constant vector. + */ + + for (i = r; i < n; i++) { + sum = 0.0; + irow = perm[i]; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[irow * n + k] * a[krow * n + r]; + } + a[irow * n + r] -= sum; + } + + /* Find row containing the pivot in the rth column */ + + pivot = perm[r]; + equil_1 = std::fabs(a[pivot * n + r] / rowmax[pivot]); + for (i = r + 1; i < n; i++) { + irow = perm[i]; + equil_2 = std::fabs(a[irow * n + r] / rowmax[irow]); + if (equil_2 > equil_1) { + /* make irow the new pivot row */ + + pivot = irow; + save_i = i; + equil_1 = equil_2; + } + } + + /* Interchange entries in permutation vector if necessary */ + + if (pivot != perm[r]) { + perm[save_i] = perm[r]; + perm[r] = pivot; + } + + /* Check that pivot element is not too small */ + + if (std::fabs(a[pivot * n + r]) < roundoff) { + return -1; + } + + /* + * Operate on row in rth position. This produces the upper + * triangular matrix whose diagonal elements are assumed to be unity. + * This matrix is used in the back substitution algorithm. + */ + + for (j = r + 1; j < n; j++) { + sum = 0.0; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[pivot * n + k] * a[krow * n + j]; + } + a[pivot * n + j] = (a[pivot * n + j] - sum) / a[pivot * n + r]; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +/** + * \brief Crout matrix decomposition : Forward/Backward substitution. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: no return variable + */ +#define y_(arg) p[y[arg]] +#define b_(arg) b[arg] +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int solveCrout(int n, + T const* const a, + T const* const b, + T* const p, + int const* const perm, + int const* const y = nullptr) { + int i, j, pivot; + T sum; + + /* Perform forward substitution with pivoting */ + if (y) { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) -= sum; + } + } else { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] -= sum; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +#undef y_ +#undef b_ + +} // namespace crout +} // namespace nmodl + +/** + * \dir + * \brief Newton solver implementations + * + * \file + * \brief Implementation of Newton method for solving system of non-linear equations + */ + +#include +#include + +namespace nmodl { +/// newton solver implementations +namespace newton { + +/** + * @defgroup solver Solver Implementation + * @brief Solver implementation details + * + * Implementation of Newton method for solving system of non-linear equations using Eigen + * - newton::newton_solver with user, e.g. SymPy, provided Jacobian + * + * @{ + */ + +static constexpr int MAX_ITER = 50; +static constexpr double EPS = 1e-13; + +template +EIGEN_DEVICE_FUNC bool is_converged(const Eigen::Matrix& X, + const Eigen::Matrix& J, + const Eigen::Matrix& F, + double eps) { + bool converged = true; + double square_eps = eps * eps; + for (Eigen::Index i = 0; i < N; ++i) { + double square_error = 0.0; + for (Eigen::Index j = 0; j < N; ++j) { + double JX = J(i, j) * X(j); + square_error += JX * JX; + } + + if (F(i) * F(i) > square_eps * square_error) { + converged = false; +// The NVHPC is buggy and wont allow us to short-circuit. +#ifndef __NVCOMPILER + return converged; +#endif + } + } + return converged; +} + +/** + * \brief Newton method with user-provided Jacobian + * + * Newton method with user-provided Jacobian: given initial vector X and a + * functor that calculates `F(X)`, `J(X)` where `J(X)` is the Jacobian of `F(X)`, + * solves for \f$F(X) = 0\f$, starting with initial value of `X` by iterating: + * + * \f[ + * X_{n+1} = X_n - J(X_n)^{-1} F(X_n) + * \f] + * when \f$|F|^2 < eps^2\f$, solution has converged. + * + * @return number of iterations (-1 if failed to converge) + */ +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + // If finite differences are needed, this is stores the stepwidth. + Eigen::Matrix dX; + // Vector to store result of function F(X): + Eigen::Matrix F; + // Matrix to store Jacobian of F(X): + Eigen::Matrix J; + // Solver iteration count: + int iter = -1; + while (++iter < max_iter) { + // calculate F, J from X using user-supplied functor + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // In Eigen the default storage order is ColMajor. + // Crout's implementation requires matrices stored in RowMajor order (C-style arrays). + // Therefore, the transposeInPlace is critical such that the data() method to give the rows + // instead of the columns. + if (!J.IsRowMajor) { + J.transposeInPlace(); + } + Eigen::Matrix pivot; + Eigen::Matrix rowmax; + // Check if J is singular + if (nmodl::crout::Crout(N, J.data(), pivot.data(), rowmax.data()) < 0) { + return -1; + } + Eigen::Matrix X_solve; + nmodl::crout::solveCrout(N, J.data(), F.data(), X_solve.data(), pivot.data()); + X -= X_solve; + } + // If we fail to converge after max_iter iterations, return -1 + return -1; +} + +/** + * Newton method template specializations for \f$N <= 4\f$ Use explicit inverse + * of `F` instead of LU decomposition. This is faster, as there is no pivoting + * and therefore no branches, but it is not numerically safe for \f$N > 4\f$. + */ + +template +EIGEN_DEVICE_FUNC int newton_solver_small_N(Eigen::Matrix& X, + FUNC functor, + double eps, + int max_iter) { + bool invertible; + Eigen::Matrix F; + Eigen::Matrix dX; + Eigen::Matrix J, J_inv; + int iter = -1; + while (++iter < max_iter) { + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // The inverse can be called from within OpenACC regions without any issue, as opposed to + // Eigen::PartialPivLU. + J.computeInverseWithCheck(J_inv, invertible); + if (invertible) { + X -= J_inv * F; + } else { + return -1; + } + } + return -1; +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +/** @} */ // end of solver + +} // namespace newton +} // namespace nmodl + + + +namespace coreneuron { + #ifndef NRN_PRCELLSTATE + #define NRN_PRCELLSTATE 0 + #endif + + + /** channel information */ + static const char *mechanism_info[] = { + "7.7.0", + "nonlin", + 0, + 0, + "x_nonlin", + 0, + 0 + }; + + + /** all global variables */ + struct nonlin_Store { + double x0{}; + int reset{}; + int mech_type{}; + }; + static_assert(std::is_trivially_copy_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_copy_assignable_v); + static_assert(std::is_trivially_move_assignable_v); + static_assert(std::is_trivially_destructible_v); + nonlin_Store nonlin_global; + + + /** all mechanism instance variables and global variables */ + struct nonlin_Instance { + double* x{}; + double* Dx{}; + nonlin_Store* global{&nonlin_global}; + }; + + + /** connect global (scalar) variables to hoc -- */ + static DoubScal hoc_scalar_double[] = { + {nullptr, nullptr} + }; + + + /** connect global (array) variables to hoc -- */ + static DoubVec hoc_vector_double[] = { + {nullptr, nullptr, 0} + }; + + + static inline int first_pointer_var_index() { + return -1; + } + + + static inline int first_random_var_index() { + return -1; + } + + + static inline int float_variables_size() { + return 2; + } + + + static inline int int_variables_size() { + return 0; + } + + + static inline int get_mech_type() { + return nonlin_global.mech_type; + } + + + static inline Memb_list* get_memb_list(NrnThread* nt) { + if (!nt->_ml_list) { + return nullptr; + } + return nt->_ml_list[get_mech_type()]; + } + + + static inline void* mem_alloc(size_t num, size_t size, size_t alignment = 64) { + size_t aligned_size = ((num*size + alignment - 1) / alignment) * alignment; + void* ptr = aligned_alloc(alignment, aligned_size); + memset(ptr, 0, aligned_size); + return ptr; + } + + + static inline void mem_free(void* ptr) { + free(ptr); + } + + + static inline void coreneuron_abort() { + abort(); + } + + // Allocate instance structure + static void nrn_private_constructor_nonlin(NrnThread* nt, Memb_list* ml, int type) { + assert(!ml->instance); + assert(!ml->global_variables); + assert(ml->global_variables_size == 0); + auto* const inst = new nonlin_Instance{}; + assert(inst->global == &nonlin_global); + ml->instance = inst; + ml->global_variables = inst->global; + ml->global_variables_size = sizeof(nonlin_Store); + } + + // Deallocate the instance structure + static void nrn_private_destructor_nonlin(NrnThread* nt, Memb_list* ml, int type) { + auto* const inst = static_cast(ml->instance); + assert(inst); + assert(inst->global); + assert(inst->global == &nonlin_global); + assert(inst->global == ml->global_variables); + assert(ml->global_variables_size == sizeof(nonlin_Store)); + delete inst; + ml->instance = nullptr; + ml->global_variables = nullptr; + ml->global_variables_size = 0; + } + + /** initialize mechanism instance variables */ + static inline void setup_instance(NrnThread* nt, Memb_list* ml) { + auto* const inst = static_cast(ml->instance); + assert(inst); + assert(inst->global); + assert(inst->global == &nonlin_global); + assert(inst->global == ml->global_variables); + assert(ml->global_variables_size == sizeof(nonlin_Store)); + int pnodecount = ml->_nodecount_padded; + Datum* indexes = ml->pdata; + inst->x = ml->data+0*pnodecount; + inst->Dx = ml->data+1*pnodecount; + } + + + + static void nrn_alloc_nonlin(double* data, Datum* indexes, int type) { + // do nothing + } + + + void nrn_constructor_nonlin(NrnThread* nt, Memb_list* ml, int type) { + #ifndef CORENEURON_BUILD + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + auto* const inst = static_cast(ml->instance); + + #endif + } + + + void nrn_destructor_nonlin(NrnThread* nt, Memb_list* ml, int type) { + #ifndef CORENEURON_BUILD + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + auto* const inst = static_cast(ml->instance); + + #endif + } + + + inline double solve_nonlin(int id, int pnodecount, nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v); + inline double residual_nonlin(int id, int pnodecount, nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v, double _lx); + + + struct functor_nonlin_0 { + NrnThread* nt; + nonlin_Instance* inst; + int id; + int pnodecount; + double v; + const Datum* indexes; + double* data; + ThreadDatum* thread; + + void initialize() { + } + + functor_nonlin_0(NrnThread* nt, nonlin_Instance* inst, int id, int pnodecount, double v, const Datum* indexes, double* data, ThreadDatum* thread) + : nt(nt), inst(inst), id(id), pnodecount(pnodecount), v(v), indexes(indexes), data(data), thread(thread) + {} + void operator()(const Eigen::Matrix& nmodl_eigen_xm, Eigen::Matrix& nmodl_eigen_dxm, Eigen::Matrix& nmodl_eigen_fm, Eigen::Matrix& nmodl_eigen_jm) const { + const double* nmodl_eigen_x = nmodl_eigen_xm.data(); + double* nmodl_eigen_dx = nmodl_eigen_dxm.data(); + double* nmodl_eigen_j = nmodl_eigen_jm.data(); + double* nmodl_eigen_f = nmodl_eigen_fm.data(); + nmodl_eigen_dx[0] = std::max(1e-6, 0.02*std::fabs(nmodl_eigen_x[0])); + nmodl_eigen_f[static_cast(0)] = 4.0 - pow(nmodl_eigen_x[static_cast(0)], 2.0); + nmodl_eigen_j[static_cast(0)] = -2.0 * nmodl_eigen_x[static_cast(0)]; + } + + void finalize() { + } + }; + + + inline double solve_nonlin(int id, int pnodecount, nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v) { + double ret_solve = 0.0; + inst->x[id] = 1.0; + + Eigen::Matrix nmodl_eigen_xm; + double* nmodl_eigen_x = nmodl_eigen_xm.data(); + nmodl_eigen_x[static_cast(0)] = inst->x[id]; + // call newton solver + functor_nonlin_0 newton_functor(nt, inst, id, pnodecount, v, indexes, data, thread); + newton_functor.initialize(); + int newton_iterations = nmodl::newton::newton_solver(nmodl_eigen_xm, newton_functor); + if (newton_iterations < 0) assert(false && "Newton solver did not converge!"); + inst->x[id] = nmodl_eigen_x[static_cast(0)]; + newton_functor.initialize(); // TODO mimic calling F again. + newton_functor.finalize(); + + + ret_solve = inst->x[id]; + return ret_solve; + } + + + inline double residual_nonlin(int id, int pnodecount, nonlin_Instance* inst, double* data, const Datum* indexes, ThreadDatum* thread, NrnThread* nt, double v, double _lx) { + double ret_residual = 0.0; + ret_residual = _lx - 2.0; + return ret_residual; + } + + + /** initialize channel */ + void nrn_init_nonlin(NrnThread* nt, Memb_list* ml, int type) { + int nodecount = ml->nodecount; + int pnodecount = ml->_nodecount_padded; + const int* node_index = ml->nodeindices; + double* data = ml->data; + const double* voltage = nt->_actual_v; + Datum* indexes = ml->pdata; + ThreadDatum* thread = ml->_thread; + + setup_instance(nt, ml); + auto* const inst = static_cast(ml->instance); + + if (_nrn_skip_initmodel == 0) { + #pragma omp simd + #pragma ivdep + for (int id = 0; id < nodecount; id++) { + int node_id = node_index[id]; + double v = voltage[node_id]; + inst->x[id] = inst->global->x0; + } + } + } + + + /** register channel with the simulator */ + void _nonlin_reg() { + + int mech_type = nrn_get_mechtype("nonlin"); + nonlin_global.mech_type = mech_type; + if (mech_type == -1) { + return; + } + + _nrn_layout_reg(mech_type, 0); + register_mech(mechanism_info, nrn_alloc_nonlin, nullptr, nullptr, nullptr, nrn_init_nonlin, nrn_private_constructor_nonlin, nrn_private_destructor_nonlin, first_pointer_var_index(), 0); + + hoc_register_prop_size(mech_type, float_variables_size(), int_variables_size()); + hoc_register_var(hoc_scalar_double, hoc_vector_double, NULL); + } +} diff --git a/nonlinear/neuron/art_nonlin.cpp b/nonlinear/neuron/art_nonlin.cpp new file mode 100644 index 00000000..3ecd8f34 --- /dev/null +++ b/nonlinear/neuron/art_nonlin.cpp @@ -0,0 +1,761 @@ +/********************************************************* +Model Name : art_nonlin +Filename : art_nonlin.mod +NMODL Version : 7.7.0 +Vectorized : false +Threadsafe : true +Created : DATE +Simulator : NEURON +Backend : C++ (api-compatibility) +NMODL Compiler : VERSION +*********************************************************/ + +#include +#include +#include +#include +#include +#include + +/** + * \dir + * \brief Solver for a system of linear equations : Crout matrix decomposition + * + * \file + * \brief Implementation of Crout matrix decomposition (LU decomposition) followed by + * Forward/Backward substitution: Implementation details : (Legacy code) nrn / scopmath / crout.c + */ + +#include +#include + +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +#include "coreneuron/utils/offload.hpp" +#endif + +namespace nmodl { +namespace crout { + +/** + * \brief Crout matrix decomposition : in-place LU Decomposition of matrix a. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: 0 if no error; -1 if matrix is singular or ill-conditioned + */ +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int Crout(int n, T* const a, int* const perm, double* const rowmax) { + // roundoff is the minimal value for a pivot element without its being considered too close to + // zero + double roundoff = 1.e-20; + int i, j, k, r, pivot, irow, save_i = 0, krow; + T sum, equil_1, equil_2; + + /* Initialize permutation and rowmax vectors */ + + for (i = 0; i < n; i++) { + perm[i] = i; + k = 0; + for (j = 1; j < n; j++) { + if (std::fabs(a[i * n + j]) > std::fabs(a[i * n + k])) { + k = j; + } + } + rowmax[i] = a[i * n + k]; + } + + /* Loop over rows and columns r */ + + for (r = 0; r < n; r++) { + /* + * Operate on rth column. This produces the lower triangular matrix + * of terms needed to transform the constant vector. + */ + + for (i = r; i < n; i++) { + sum = 0.0; + irow = perm[i]; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[irow * n + k] * a[krow * n + r]; + } + a[irow * n + r] -= sum; + } + + /* Find row containing the pivot in the rth column */ + + pivot = perm[r]; + equil_1 = std::fabs(a[pivot * n + r] / rowmax[pivot]); + for (i = r + 1; i < n; i++) { + irow = perm[i]; + equil_2 = std::fabs(a[irow * n + r] / rowmax[irow]); + if (equil_2 > equil_1) { + /* make irow the new pivot row */ + + pivot = irow; + save_i = i; + equil_1 = equil_2; + } + } + + /* Interchange entries in permutation vector if necessary */ + + if (pivot != perm[r]) { + perm[save_i] = perm[r]; + perm[r] = pivot; + } + + /* Check that pivot element is not too small */ + + if (std::fabs(a[pivot * n + r]) < roundoff) { + return -1; + } + + /* + * Operate on row in rth position. This produces the upper + * triangular matrix whose diagonal elements are assumed to be unity. + * This matrix is used in the back substitution algorithm. + */ + + for (j = r + 1; j < n; j++) { + sum = 0.0; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[pivot * n + k] * a[krow * n + j]; + } + a[pivot * n + j] = (a[pivot * n + j] - sum) / a[pivot * n + r]; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +/** + * \brief Crout matrix decomposition : Forward/Backward substitution. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: no return variable + */ +#define y_(arg) p[y[arg]] +#define b_(arg) b[arg] +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int solveCrout(int n, + T const* const a, + T const* const b, + T* const p, + int const* const perm, + int const* const y = nullptr) { + int i, j, pivot; + T sum; + + /* Perform forward substitution with pivoting */ + if (y) { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) -= sum; + } + } else { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] -= sum; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +#undef y_ +#undef b_ + +} // namespace crout +} // namespace nmodl + +/** + * \dir + * \brief Newton solver implementations + * + * \file + * \brief Implementation of Newton method for solving system of non-linear equations + */ + +#include +#include + +namespace nmodl { +/// newton solver implementations +namespace newton { + +/** + * @defgroup solver Solver Implementation + * @brief Solver implementation details + * + * Implementation of Newton method for solving system of non-linear equations using Eigen + * - newton::newton_solver with user, e.g. SymPy, provided Jacobian + * + * @{ + */ + +static constexpr int MAX_ITER = 50; +static constexpr double EPS = 1e-13; + +template +EIGEN_DEVICE_FUNC bool is_converged(const Eigen::Matrix& X, + const Eigen::Matrix& J, + const Eigen::Matrix& F, + double eps) { + bool converged = true; + double square_eps = eps * eps; + for (Eigen::Index i = 0; i < N; ++i) { + double square_error = 0.0; + for (Eigen::Index j = 0; j < N; ++j) { + double JX = J(i, j) * X(j); + square_error += JX * JX; + } + + if (F(i) * F(i) > square_eps * square_error) { + converged = false; +// The NVHPC is buggy and wont allow us to short-circuit. +#ifndef __NVCOMPILER + return converged; +#endif + } + } + return converged; +} + +/** + * \brief Newton method with user-provided Jacobian + * + * Newton method with user-provided Jacobian: given initial vector X and a + * functor that calculates `F(X)`, `J(X)` where `J(X)` is the Jacobian of `F(X)`, + * solves for \f$F(X) = 0\f$, starting with initial value of `X` by iterating: + * + * \f[ + * X_{n+1} = X_n - J(X_n)^{-1} F(X_n) + * \f] + * when \f$|F|^2 < eps^2\f$, solution has converged. + * + * @return number of iterations (-1 if failed to converge) + */ +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + // If finite differences are needed, this is stores the stepwidth. + Eigen::Matrix dX; + // Vector to store result of function F(X): + Eigen::Matrix F; + // Matrix to store Jacobian of F(X): + Eigen::Matrix J; + // Solver iteration count: + int iter = -1; + while (++iter < max_iter) { + // calculate F, J from X using user-supplied functor + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // In Eigen the default storage order is ColMajor. + // Crout's implementation requires matrices stored in RowMajor order (C-style arrays). + // Therefore, the transposeInPlace is critical such that the data() method to give the rows + // instead of the columns. + if (!J.IsRowMajor) { + J.transposeInPlace(); + } + Eigen::Matrix pivot; + Eigen::Matrix rowmax; + // Check if J is singular + if (nmodl::crout::Crout(N, J.data(), pivot.data(), rowmax.data()) < 0) { + return -1; + } + Eigen::Matrix X_solve; + nmodl::crout::solveCrout(N, J.data(), F.data(), X_solve.data(), pivot.data()); + X -= X_solve; + } + // If we fail to converge after max_iter iterations, return -1 + return -1; +} + +/** + * Newton method template specializations for \f$N <= 4\f$ Use explicit inverse + * of `F` instead of LU decomposition. This is faster, as there is no pivoting + * and therefore no branches, but it is not numerically safe for \f$N > 4\f$. + */ + +template +EIGEN_DEVICE_FUNC int newton_solver_small_N(Eigen::Matrix& X, + FUNC functor, + double eps, + int max_iter) { + bool invertible; + Eigen::Matrix F; + Eigen::Matrix dX; + Eigen::Matrix J, J_inv; + int iter = -1; + while (++iter < max_iter) { + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // The inverse can be called from within OpenACC regions without any issue, as opposed to + // Eigen::PartialPivLU. + J.computeInverseWithCheck(J_inv, invertible); + if (invertible) { + X -= J_inv * F; + } else { + return -1; + } + } + return -1; +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +/** @} */ // end of solver + +} // namespace newton +} // namespace nmodl + + +#include "mech_api.h" +#include "neuron/cache/mechanism_range.hpp" +#include "nrniv_mf.h" +#include "section_fwd.hpp" + +/* NEURON global macro definitions */ +/* NOT VECTORIZED */ +#define NRN_VECTORIZED 0 + +static constexpr auto number_of_datum_variables = 2; +static constexpr auto number_of_floating_point_variables = 2; + +namespace { +template +using _nrn_mechanism_std_vector = std::vector; +using _nrn_model_sorted_token = neuron::model_sorted_token; +using _nrn_mechanism_cache_range = neuron::cache::MechanismRange; +using _nrn_mechanism_cache_instance = neuron::cache::MechanismInstance; +using _nrn_non_owning_id_without_container = neuron::container::non_owning_identifier_without_container; +template +using _nrn_mechanism_field = neuron::mechanism::field; +template +void _nrn_mechanism_register_data_fields(Args&&... args) { + neuron::mechanism::register_data_fields(std::forward(args)...); +} +} // namespace + +extern Prop* nrn_point_prop_; +extern Node* nrn_alloc_node_; + + +namespace neuron { + #ifndef NRN_PRCELLSTATE + #define NRN_PRCELLSTATE 0 + #endif + + + /** channel information */ + static const char *mechanism_info[] = { + "7.7.0", + "art_nonlin", + 0, + 0, + "x", + 0, + 0 + }; + + + /* NEURON global variables */ + static int mech_type; + static int _pointtype; + static _nrn_mechanism_std_vector _extcall_thread; + + + /** all global variables */ + struct art_nonlin_Store { + double x0{0}; + }; + static_assert(std::is_trivially_copy_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_copy_assignable_v); + static_assert(std::is_trivially_move_assignable_v); + static_assert(std::is_trivially_destructible_v); + art_nonlin_Store art_nonlin_global; + auto x0_art_nonlin() -> std::decay::type { + return art_nonlin_global.x0; + } + + static std::vector _parameter_defaults = { + }; + + + /** all mechanism instance variables and global variables */ + struct art_nonlin_Instance { + double* x{}; + double* Dx{}; + const double* const* node_area{}; + art_nonlin_Store* global{&art_nonlin_global}; + }; + + + struct art_nonlin_NodeData { + int const * nodeindices; + double const * node_voltages; + double * node_diagonal; + double * node_rhs; + int nodecount; + }; + + + static art_nonlin_Instance make_instance_art_nonlin(_nrn_mechanism_cache_range& _lmc) { + return art_nonlin_Instance { + _lmc.template fpfield_ptr<0>(), + _lmc.template fpfield_ptr<1>(), + _lmc.template dptr_field_ptr<0>() + }; + } + + + static art_nonlin_NodeData make_node_data_art_nonlin(NrnThread& nt, Memb_list& _ml_arg) { + return art_nonlin_NodeData { + _ml_arg.nodeindices, + nt.node_voltage_storage(), + nt.node_d_storage(), + nt.node_rhs_storage(), + _ml_arg.nodecount + }; + } + static art_nonlin_NodeData make_node_data_art_nonlin(Prop * _prop) { + static std::vector node_index{0}; + Node* _node = _nrn_mechanism_access_node(_prop); + return art_nonlin_NodeData { + node_index.data(), + &_nrn_mechanism_access_voltage(_node), + &_nrn_mechanism_access_d(_node), + &_nrn_mechanism_access_rhs(_node), + 1 + }; + } + + void nrn_destructor_art_nonlin(Prop* prop); + + + static void nrn_alloc_art_nonlin(Prop* _prop) { + Datum *_ppvar = nullptr; + if (nrn_point_prop_) { + _nrn_mechanism_access_alloc_seq(_prop) = _nrn_mechanism_access_alloc_seq(nrn_point_prop_); + _ppvar = _nrn_mechanism_access_dparam(nrn_point_prop_); + } else { + _ppvar = nrn_prop_datum_alloc(mech_type, 2, _prop); + _nrn_mechanism_access_dparam(_prop) = _ppvar; + _nrn_mechanism_cache_instance _lmc{_prop}; + size_t const _iml = 0; + assert(_nrn_mechanism_get_num_vars(_prop) == 2); + /*initialize range parameters*/ + } + _nrn_mechanism_access_dparam(_prop) = _ppvar; + if(!nrn_point_prop_) { + } + } + + + /* Mechanism procedures and functions */ + inline double solve_art_nonlin(_nrn_mechanism_cache_range& _lmc, art_nonlin_Instance& inst, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt); + inline double residual_art_nonlin(_nrn_mechanism_cache_range& _lmc, art_nonlin_Instance& inst, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt, double _lx); + static void _apply_diffusion_function(ldifusfunc2_t _f, const _nrn_model_sorted_token& _sorted_token, NrnThread& _nt) { + } + + /* Point Process specific functions */ + static void* _hoc_create_pnt(Object* _ho) { + return create_point_process(_pointtype, _ho); + } + static void _hoc_destroy_pnt(void* _vptr) { + destroy_point_process(_vptr); + } + static double _hoc_loc_pnt(void* _vptr) { + return loc_point_process(_pointtype, _vptr); + } + static double _hoc_has_loc(void* _vptr) { + return has_loc_point(_vptr); + } + static double _hoc_get_loc_pnt(void* _vptr) { + return (get_loc_point_process(_vptr)); + } + /* Neuron setdata functions */ + extern void _nrn_setdata_reg(int, void(*)(Prop*)); + static void _setdata(Prop* _prop) { + } + static void _hoc_setdata(void* _vptr) { + Prop* _prop; + _prop = ((Point_process*)_vptr)->prop; + _setdata(_prop); + } + + + struct functor_art_nonlin_0 { + _nrn_mechanism_cache_range& _lmc; + art_nonlin_Instance& inst; + size_t id; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + + void initialize() { + } + + functor_art_nonlin_0(_nrn_mechanism_cache_range& _lmc, art_nonlin_Instance& inst, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt) + : _lmc(_lmc), inst(inst), id(id), _ppvar(_ppvar), _thread(_thread), nt(nt) + {} + void operator()(const Eigen::Matrix& nmodl_eigen_xm, Eigen::Matrix& nmodl_eigen_dxm, Eigen::Matrix& nmodl_eigen_fm, Eigen::Matrix& nmodl_eigen_jm) const { + const double* nmodl_eigen_x = nmodl_eigen_xm.data(); + double* nmodl_eigen_dx = nmodl_eigen_dxm.data(); + double* nmodl_eigen_j = nmodl_eigen_jm.data(); + double* nmodl_eigen_f = nmodl_eigen_fm.data(); + nmodl_eigen_dx[0] = std::max(1e-6, 0.02*std::fabs(nmodl_eigen_x[0])); + nmodl_eigen_f[static_cast(0)] = 4.0 - pow(nmodl_eigen_x[static_cast(0)], 2.0); + nmodl_eigen_j[static_cast(0)] = -2.0 * nmodl_eigen_x[static_cast(0)]; + } + + void finalize() { + } + }; + + + /** connect global (scalar) variables to hoc -- */ + static DoubScal hoc_scalar_double[] = { + {nullptr, nullptr} + }; + + + /** connect global (array) variables to hoc -- */ + static DoubVec hoc_vector_double[] = { + {nullptr, nullptr, 0} + }; + + + /* declaration of user functions */ + static double _hoc_solve(void*); + static double _hoc_residual(void*); + + + /* connect user functions to hoc names */ + static VoidFunc hoc_intfunc[] = { + {0, 0} + }; + static Member_func _member_func[] = { + {"loc", _hoc_loc_pnt}, + {"has_loc", _hoc_has_loc}, + {"get_loc", _hoc_get_loc_pnt}, + {"solve", _hoc_solve}, + {"residual", _hoc_residual}, + {nullptr, nullptr} + }; + static double _hoc_solve(void* _vptr) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + auto* const _pnt = static_cast(_vptr); + auto* const _p = _pnt->prop; + if (!_p) { + hoc_execerror("POINT_PROCESS data instance not valid", nullptr); + } + _nrn_mechanism_cache_instance _lmc{_p}; + size_t const id{}; + _ppvar = _nrn_mechanism_access_dparam(_p); + _thread = _extcall_thread.data(); + nt = static_cast(_pnt->_vnt); + auto inst = make_instance_art_nonlin(_lmc); + _r = solve_art_nonlin(_lmc, inst, id, _ppvar, _thread, nt); + return(_r); + } + static double _hoc_residual(void* _vptr) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + auto* const _pnt = static_cast(_vptr); + auto* const _p = _pnt->prop; + if (!_p) { + hoc_execerror("POINT_PROCESS data instance not valid", nullptr); + } + _nrn_mechanism_cache_instance _lmc{_p}; + size_t const id{}; + _ppvar = _nrn_mechanism_access_dparam(_p); + _thread = _extcall_thread.data(); + nt = static_cast(_pnt->_vnt); + auto inst = make_instance_art_nonlin(_lmc); + _r = residual_art_nonlin(_lmc, inst, id, _ppvar, _thread, nt, *getarg(1)); + return(_r); + } + + + inline double solve_art_nonlin(_nrn_mechanism_cache_range& _lmc, art_nonlin_Instance& inst, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt) { + double ret_solve = 0.0; + inst.x[id] = 1.0; + + Eigen::Matrix nmodl_eigen_xm; + double* nmodl_eigen_x = nmodl_eigen_xm.data(); + nmodl_eigen_x[static_cast(0)] = inst.x[id]; + // call newton solver + functor_art_nonlin_0 newton_functor(_lmc, inst, id, _ppvar, _thread, nt); + newton_functor.initialize(); + int newton_iterations = nmodl::newton::newton_solver(nmodl_eigen_xm, newton_functor); + if (newton_iterations < 0) assert(false && "Newton solver did not converge!"); + inst.x[id] = nmodl_eigen_x[static_cast(0)]; + newton_functor.initialize(); // TODO mimic calling F again. + newton_functor.finalize(); + + + ret_solve = inst.x[id]; + return ret_solve; + } + + + inline double residual_art_nonlin(_nrn_mechanism_cache_range& _lmc, art_nonlin_Instance& inst, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt, double _lx) { + double ret_residual = 0.0; + ret_residual = _lx - 2.0; + return ret_residual; + } + + + void nrn_init_art_nonlin(const _nrn_model_sorted_token& _sorted_token, NrnThread* nt, Memb_list* _ml_arg, int _type) { + _nrn_mechanism_cache_range _lmc{_sorted_token, *nt, *_ml_arg, _ml_arg->type()}; + auto inst = make_instance_art_nonlin(_lmc); + auto* _thread = _ml_arg->_thread; + auto nodecount = _ml_arg->nodecount; + for (int id = 0; id < nodecount; id++) { + auto* _ppvar = _ml_arg->pdata[id]; + inst.x[id] = inst.global->x0; + } + } + + + static void nrn_jacob_art_nonlin(const _nrn_model_sorted_token& _sorted_token, NrnThread* nt, Memb_list* _ml_arg, int _type) { + _nrn_mechanism_cache_range _lmc{_sorted_token, *nt, *_ml_arg, _ml_arg->type()}; + auto inst = make_instance_art_nonlin(_lmc); + auto* _thread = _ml_arg->_thread; + auto nodecount = _ml_arg->nodecount; + for (int id = 0; id < nodecount; id++) { + } + } + void nrn_destructor_art_nonlin(Prop* prop) { + Datum* _ppvar = _nrn_mechanism_access_dparam(prop); + _nrn_mechanism_cache_instance _lmc{prop}; + const size_t id = 0; + auto inst = make_instance_art_nonlin(_lmc); + + } + + + static void _initlists() { + } + + + /** register channel with the simulator */ + extern "C" void _art_nonlin_reg() { + _initlists(); + + _pointtype = point_register_mech(mechanism_info, nrn_alloc_art_nonlin, nullptr, nullptr, nullptr, nrn_init_art_nonlin, -1, 1, _hoc_create_pnt, _hoc_destroy_pnt, _member_func); + + mech_type = nrn_get_mechtype(mechanism_info[1]); + hoc_register_parm_default(mech_type, &_parameter_defaults); + _nrn_mechanism_register_data_fields(mech_type, + _nrn_mechanism_field{"x"} /* 0 */, + _nrn_mechanism_field{"Dx"} /* 1 */, + _nrn_mechanism_field{"node_area", "area"} /* 0 */, + _nrn_mechanism_field{"point_process", "pntproc"} /* 1 */ + ); + + hoc_register_prop_size(mech_type, 2, 2); + hoc_register_dparam_semantics(mech_type, 0, "area"); + hoc_register_dparam_semantics(mech_type, 1, "pntproc"); + add_nrn_artcell(mech_type, 0); + hoc_register_var(hoc_scalar_double, hoc_vector_double, hoc_intfunc); + } +} diff --git a/nonlinear/neuron/nonlin.cpp b/nonlinear/neuron/nonlin.cpp new file mode 100644 index 00000000..cd4661a8 --- /dev/null +++ b/nonlinear/neuron/nonlin.cpp @@ -0,0 +1,768 @@ +/********************************************************* +Model Name : nonlin +Filename : nonlin.mod +NMODL Version : 7.7.0 +Vectorized : false +Threadsafe : true +Created : DATE +Simulator : NEURON +Backend : C++ (api-compatibility) +NMODL Compiler : VERSION +*********************************************************/ + +#include +#include +#include +#include +#include +#include + +/** + * \dir + * \brief Solver for a system of linear equations : Crout matrix decomposition + * + * \file + * \brief Implementation of Crout matrix decomposition (LU decomposition) followed by + * Forward/Backward substitution: Implementation details : (Legacy code) nrn / scopmath / crout.c + */ + +#include +#include + +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +#include "coreneuron/utils/offload.hpp" +#endif + +namespace nmodl { +namespace crout { + +/** + * \brief Crout matrix decomposition : in-place LU Decomposition of matrix a. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: 0 if no error; -1 if matrix is singular or ill-conditioned + */ +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int Crout(int n, T* const a, int* const perm, double* const rowmax) { + // roundoff is the minimal value for a pivot element without its being considered too close to + // zero + double roundoff = 1.e-20; + int i, j, k, r, pivot, irow, save_i = 0, krow; + T sum, equil_1, equil_2; + + /* Initialize permutation and rowmax vectors */ + + for (i = 0; i < n; i++) { + perm[i] = i; + k = 0; + for (j = 1; j < n; j++) { + if (std::fabs(a[i * n + j]) > std::fabs(a[i * n + k])) { + k = j; + } + } + rowmax[i] = a[i * n + k]; + } + + /* Loop over rows and columns r */ + + for (r = 0; r < n; r++) { + /* + * Operate on rth column. This produces the lower triangular matrix + * of terms needed to transform the constant vector. + */ + + for (i = r; i < n; i++) { + sum = 0.0; + irow = perm[i]; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[irow * n + k] * a[krow * n + r]; + } + a[irow * n + r] -= sum; + } + + /* Find row containing the pivot in the rth column */ + + pivot = perm[r]; + equil_1 = std::fabs(a[pivot * n + r] / rowmax[pivot]); + for (i = r + 1; i < n; i++) { + irow = perm[i]; + equil_2 = std::fabs(a[irow * n + r] / rowmax[irow]); + if (equil_2 > equil_1) { + /* make irow the new pivot row */ + + pivot = irow; + save_i = i; + equil_1 = equil_2; + } + } + + /* Interchange entries in permutation vector if necessary */ + + if (pivot != perm[r]) { + perm[save_i] = perm[r]; + perm[r] = pivot; + } + + /* Check that pivot element is not too small */ + + if (std::fabs(a[pivot * n + r]) < roundoff) { + return -1; + } + + /* + * Operate on row in rth position. This produces the upper + * triangular matrix whose diagonal elements are assumed to be unity. + * This matrix is used in the back substitution algorithm. + */ + + for (j = r + 1; j < n; j++) { + sum = 0.0; + for (k = 0; k < r; k++) { + krow = perm[k]; + sum += a[pivot * n + k] * a[krow * n + j]; + } + a[pivot * n + j] = (a[pivot * n + j] - sum) / a[pivot * n + r]; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +/** + * \brief Crout matrix decomposition : Forward/Backward substitution. + * + * Implementation details : (Legacy code) nrn / scopmath / crout.c + * + * Returns: no return variable + */ +#define y_(arg) p[y[arg]] +#define b_(arg) b[arg] +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +#endif +template +EIGEN_DEVICE_FUNC inline int solveCrout(int n, + T const* const a, + T const* const b, + T* const p, + int const* const perm, + int const* const y = nullptr) { + int i, j, pivot; + T sum; + + /* Perform forward substitution with pivoting */ + if (y) { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (y_(j)); + } + y_(i) -= sum; + } + } else { + for (i = 0; i < n; i++) { + pivot = perm[i]; + sum = 0.0; + for (j = 0; j < i; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] = (b_(pivot) - sum) / a[pivot * n + i]; + } + + /* + * Note that the y vector is already in the correct order for back + * substitution. Perform back substitution, pivoting the matrix but not + * the y vector. There is no need to divide by the diagonal element as + * this is assumed to be unity. + */ + + for (i = n - 1; i >= 0; i--) { + pivot = perm[i]; + sum = 0.0; + for (j = i + 1; j < n; j++) { + sum += a[pivot * n + j] * (p[j]); + } + p[i] -= sum; + } + } + return 0; +} +#if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) +nrn_pragma_omp(end declare target) +#endif + +#undef y_ +#undef b_ + +} // namespace crout +} // namespace nmodl + +/** + * \dir + * \brief Newton solver implementations + * + * \file + * \brief Implementation of Newton method for solving system of non-linear equations + */ + +#include +#include + +namespace nmodl { +/// newton solver implementations +namespace newton { + +/** + * @defgroup solver Solver Implementation + * @brief Solver implementation details + * + * Implementation of Newton method for solving system of non-linear equations using Eigen + * - newton::newton_solver with user, e.g. SymPy, provided Jacobian + * + * @{ + */ + +static constexpr int MAX_ITER = 50; +static constexpr double EPS = 1e-13; + +template +EIGEN_DEVICE_FUNC bool is_converged(const Eigen::Matrix& X, + const Eigen::Matrix& J, + const Eigen::Matrix& F, + double eps) { + bool converged = true; + double square_eps = eps * eps; + for (Eigen::Index i = 0; i < N; ++i) { + double square_error = 0.0; + for (Eigen::Index j = 0; j < N; ++j) { + double JX = J(i, j) * X(j); + square_error += JX * JX; + } + + if (F(i) * F(i) > square_eps * square_error) { + converged = false; +// The NVHPC is buggy and wont allow us to short-circuit. +#ifndef __NVCOMPILER + return converged; +#endif + } + } + return converged; +} + +/** + * \brief Newton method with user-provided Jacobian + * + * Newton method with user-provided Jacobian: given initial vector X and a + * functor that calculates `F(X)`, `J(X)` where `J(X)` is the Jacobian of `F(X)`, + * solves for \f$F(X) = 0\f$, starting with initial value of `X` by iterating: + * + * \f[ + * X_{n+1} = X_n - J(X_n)^{-1} F(X_n) + * \f] + * when \f$|F|^2 < eps^2\f$, solution has converged. + * + * @return number of iterations (-1 if failed to converge) + */ +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + // If finite differences are needed, this is stores the stepwidth. + Eigen::Matrix dX; + // Vector to store result of function F(X): + Eigen::Matrix F; + // Matrix to store Jacobian of F(X): + Eigen::Matrix J; + // Solver iteration count: + int iter = -1; + while (++iter < max_iter) { + // calculate F, J from X using user-supplied functor + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // In Eigen the default storage order is ColMajor. + // Crout's implementation requires matrices stored in RowMajor order (C-style arrays). + // Therefore, the transposeInPlace is critical such that the data() method to give the rows + // instead of the columns. + if (!J.IsRowMajor) { + J.transposeInPlace(); + } + Eigen::Matrix pivot; + Eigen::Matrix rowmax; + // Check if J is singular + if (nmodl::crout::Crout(N, J.data(), pivot.data(), rowmax.data()) < 0) { + return -1; + } + Eigen::Matrix X_solve; + nmodl::crout::solveCrout(N, J.data(), F.data(), X_solve.data(), pivot.data()); + X -= X_solve; + } + // If we fail to converge after max_iter iterations, return -1 + return -1; +} + +/** + * Newton method template specializations for \f$N <= 4\f$ Use explicit inverse + * of `F` instead of LU decomposition. This is faster, as there is no pivoting + * and therefore no branches, but it is not numerically safe for \f$N > 4\f$. + */ + +template +EIGEN_DEVICE_FUNC int newton_solver_small_N(Eigen::Matrix& X, + FUNC functor, + double eps, + int max_iter) { + bool invertible; + Eigen::Matrix F; + Eigen::Matrix dX; + Eigen::Matrix J, J_inv; + int iter = -1; + while (++iter < max_iter) { + functor(X, dX, F, J); + if (is_converged(X, J, F, eps)) { + return iter; + } + // The inverse can be called from within OpenACC regions without any issue, as opposed to + // Eigen::PartialPivLU. + J.computeInverseWithCheck(J_inv, invertible); + if (invertible) { + X -= J_inv * F; + } else { + return -1; + } + } + return -1; +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +template +EIGEN_DEVICE_FUNC int newton_solver(Eigen::Matrix& X, + FUNC functor, + double eps = EPS, + int max_iter = MAX_ITER) { + return newton_solver_small_N(X, functor, eps, max_iter); +} + +/** @} */ // end of solver + +} // namespace newton +} // namespace nmodl + + +#include "mech_api.h" +#include "neuron/cache/mechanism_range.hpp" +#include "nrniv_mf.h" +#include "section_fwd.hpp" + +/* NEURON global macro definitions */ +/* NOT VECTORIZED */ +#define NRN_VECTORIZED 0 + +static constexpr auto number_of_datum_variables = 0; +static constexpr auto number_of_floating_point_variables = 2; + +namespace { +template +using _nrn_mechanism_std_vector = std::vector; +using _nrn_model_sorted_token = neuron::model_sorted_token; +using _nrn_mechanism_cache_range = neuron::cache::MechanismRange; +using _nrn_mechanism_cache_instance = neuron::cache::MechanismInstance; +using _nrn_non_owning_id_without_container = neuron::container::non_owning_identifier_without_container; +template +using _nrn_mechanism_field = neuron::mechanism::field; +template +void _nrn_mechanism_register_data_fields(Args&&... args) { + neuron::mechanism::register_data_fields(std::forward(args)...); +} +} // namespace + +Prop* hoc_getdata_range(int type); +extern Node* nrn_alloc_node_; + + +namespace neuron { + #ifndef NRN_PRCELLSTATE + #define NRN_PRCELLSTATE 0 + #endif + + + /** channel information */ + static const char *mechanism_info[] = { + "7.7.0", + "nonlin", + 0, + 0, + "x_nonlin", + 0, + 0 + }; + + + /* NEURON global variables */ + static int mech_type; + static Prop* _extcall_prop; + /* _prop_id kind of shadows _extcall_prop to allow validity checking. */ + static _nrn_non_owning_id_without_container _prop_id{}; + static _nrn_mechanism_std_vector _extcall_thread; + + + /** all global variables */ + struct nonlin_Store { + double x0{0}; + }; + static_assert(std::is_trivially_copy_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_copy_assignable_v); + static_assert(std::is_trivially_move_assignable_v); + static_assert(std::is_trivially_destructible_v); + nonlin_Store nonlin_global; + auto x0_nonlin() -> std::decay::type { + return nonlin_global.x0; + } + + static std::vector _parameter_defaults = { + }; + + + /** all mechanism instance variables and global variables */ + struct nonlin_Instance { + double* x{}; + double* Dx{}; + nonlin_Store* global{&nonlin_global}; + }; + + + struct nonlin_NodeData { + int const * nodeindices; + double const * node_voltages; + double * node_diagonal; + double * node_rhs; + int nodecount; + }; + + + static nonlin_Instance make_instance_nonlin(_nrn_mechanism_cache_range& _lmc) { + return nonlin_Instance { + _lmc.template fpfield_ptr<0>(), + _lmc.template fpfield_ptr<1>() + }; + } + + + static nonlin_NodeData make_node_data_nonlin(NrnThread& nt, Memb_list& _ml_arg) { + return nonlin_NodeData { + _ml_arg.nodeindices, + nt.node_voltage_storage(), + nt.node_d_storage(), + nt.node_rhs_storage(), + _ml_arg.nodecount + }; + } + static nonlin_NodeData make_node_data_nonlin(Prop * _prop) { + static std::vector node_index{0}; + Node* _node = _nrn_mechanism_access_node(_prop); + return nonlin_NodeData { + node_index.data(), + &_nrn_mechanism_access_voltage(_node), + &_nrn_mechanism_access_d(_node), + &_nrn_mechanism_access_rhs(_node), + 1 + }; + } + + void nrn_destructor_nonlin(Prop* prop); + + + static void nrn_alloc_nonlin(Prop* _prop) { + Datum *_ppvar = nullptr; + _nrn_mechanism_cache_instance _lmc{_prop}; + size_t const _iml = 0; + assert(_nrn_mechanism_get_num_vars(_prop) == 2); + /*initialize range parameters*/ + } + + + /* Mechanism procedures and functions */ + inline double solve_nonlin(_nrn_mechanism_cache_range& _lmc, nonlin_Instance& inst, nonlin_NodeData& node_data, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt); + inline double residual_nonlin(_nrn_mechanism_cache_range& _lmc, nonlin_Instance& inst, nonlin_NodeData& node_data, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt, double _lx); + static void _apply_diffusion_function(ldifusfunc2_t _f, const _nrn_model_sorted_token& _sorted_token, NrnThread& _nt) { + } + + /* Neuron setdata functions */ + extern void _nrn_setdata_reg(int, void(*)(Prop*)); + static void _setdata(Prop* _prop) { + _extcall_prop = _prop; + _prop_id = _nrn_get_prop_id(_prop); + } + static void _hoc_setdata() { + Prop *_prop = hoc_getdata_range(mech_type); + _setdata(_prop); + hoc_retpushx(1.); + } + + + struct functor_nonlin_0 { + _nrn_mechanism_cache_range& _lmc; + nonlin_Instance& inst; + nonlin_NodeData& node_data; + size_t id; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + double v; + + void initialize() { + } + + functor_nonlin_0(_nrn_mechanism_cache_range& _lmc, nonlin_Instance& inst, nonlin_NodeData& node_data, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt, double v) + : _lmc(_lmc), inst(inst), node_data(node_data), id(id), _ppvar(_ppvar), _thread(_thread), nt(nt), v(v) + {} + void operator()(const Eigen::Matrix& nmodl_eigen_xm, Eigen::Matrix& nmodl_eigen_dxm, Eigen::Matrix& nmodl_eigen_fm, Eigen::Matrix& nmodl_eigen_jm) const { + const double* nmodl_eigen_x = nmodl_eigen_xm.data(); + double* nmodl_eigen_dx = nmodl_eigen_dxm.data(); + double* nmodl_eigen_j = nmodl_eigen_jm.data(); + double* nmodl_eigen_f = nmodl_eigen_fm.data(); + nmodl_eigen_dx[0] = std::max(1e-6, 0.02*std::fabs(nmodl_eigen_x[0])); + nmodl_eigen_f[static_cast(0)] = 4.0 - pow(nmodl_eigen_x[static_cast(0)], 2.0); + nmodl_eigen_j[static_cast(0)] = -2.0 * nmodl_eigen_x[static_cast(0)]; + } + + void finalize() { + } + }; + + + /** connect global (scalar) variables to hoc -- */ + static DoubScal hoc_scalar_double[] = { + {nullptr, nullptr} + }; + + + /** connect global (array) variables to hoc -- */ + static DoubVec hoc_vector_double[] = { + {nullptr, nullptr, 0} + }; + + + /* declaration of user functions */ + static void _hoc_solve(void); + static double _npy_solve(Prop*); + static void _hoc_residual(void); + static double _npy_residual(Prop*); + + + /* connect user functions to hoc names */ + static VoidFunc hoc_intfunc[] = { + {"setdata_nonlin", _hoc_setdata}, + {"solve_nonlin", _hoc_solve}, + {"residual_nonlin", _hoc_residual}, + {nullptr, nullptr} + }; + static NPyDirectMechFunc npy_direct_func_proc[] = { + {"solve", _npy_solve}, + {"residual", _npy_residual}, + {nullptr, nullptr} + }; + static void _hoc_solve(void) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + Prop* _local_prop = _prop_id ? _extcall_prop : nullptr; + _nrn_mechanism_cache_instance _lmc{_local_prop}; + size_t const id{}; + _ppvar = _local_prop ? _nrn_mechanism_access_dparam(_local_prop) : nullptr; + _thread = _extcall_thread.data(); + nt = nrn_threads; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(_local_prop); + _r = solve_nonlin(_lmc, inst, node_data, id, _ppvar, _thread, nt); + hoc_retpushx(_r); + } + static double _npy_solve(Prop* _prop) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + _nrn_mechanism_cache_instance _lmc{_prop}; + size_t const id = 0; + _ppvar = _nrn_mechanism_access_dparam(_prop); + _thread = _extcall_thread.data(); + nt = nrn_threads; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(_prop); + _r = solve_nonlin(_lmc, inst, node_data, id, _ppvar, _thread, nt); + return(_r); + } + static void _hoc_residual(void) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + Prop* _local_prop = _prop_id ? _extcall_prop : nullptr; + _nrn_mechanism_cache_instance _lmc{_local_prop}; + size_t const id{}; + _ppvar = _local_prop ? _nrn_mechanism_access_dparam(_local_prop) : nullptr; + _thread = _extcall_thread.data(); + nt = nrn_threads; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(_local_prop); + _r = residual_nonlin(_lmc, inst, node_data, id, _ppvar, _thread, nt, *getarg(1)); + hoc_retpushx(_r); + } + static double _npy_residual(Prop* _prop) { + double _r{}; + Datum* _ppvar; + Datum* _thread; + NrnThread* nt; + _nrn_mechanism_cache_instance _lmc{_prop}; + size_t const id = 0; + _ppvar = _nrn_mechanism_access_dparam(_prop); + _thread = _extcall_thread.data(); + nt = nrn_threads; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(_prop); + _r = residual_nonlin(_lmc, inst, node_data, id, _ppvar, _thread, nt, *getarg(1)); + return(_r); + } + + + inline double solve_nonlin(_nrn_mechanism_cache_range& _lmc, nonlin_Instance& inst, nonlin_NodeData& node_data, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt) { + double ret_solve = 0.0; + auto v = node_data.node_voltages[node_data.nodeindices[id]]; + inst.x[id] = 1.0; + + Eigen::Matrix nmodl_eigen_xm; + double* nmodl_eigen_x = nmodl_eigen_xm.data(); + nmodl_eigen_x[static_cast(0)] = inst.x[id]; + // call newton solver + functor_nonlin_0 newton_functor(_lmc, inst, node_data, id, _ppvar, _thread, nt, v); + newton_functor.initialize(); + int newton_iterations = nmodl::newton::newton_solver(nmodl_eigen_xm, newton_functor); + if (newton_iterations < 0) assert(false && "Newton solver did not converge!"); + inst.x[id] = nmodl_eigen_x[static_cast(0)]; + newton_functor.initialize(); // TODO mimic calling F again. + newton_functor.finalize(); + + + ret_solve = inst.x[id]; + return ret_solve; + } + + + inline double residual_nonlin(_nrn_mechanism_cache_range& _lmc, nonlin_Instance& inst, nonlin_NodeData& node_data, size_t id, Datum* _ppvar, Datum* _thread, NrnThread* nt, double _lx) { + double ret_residual = 0.0; + auto v = node_data.node_voltages[node_data.nodeindices[id]]; + ret_residual = _lx - 2.0; + return ret_residual; + } + + + void nrn_init_nonlin(const _nrn_model_sorted_token& _sorted_token, NrnThread* nt, Memb_list* _ml_arg, int _type) { + _nrn_mechanism_cache_range _lmc{_sorted_token, *nt, *_ml_arg, _ml_arg->type()}; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(*nt, *_ml_arg); + auto* _thread = _ml_arg->_thread; + auto nodecount = _ml_arg->nodecount; + for (int id = 0; id < nodecount; id++) { + auto* _ppvar = _ml_arg->pdata[id]; + int node_id = node_data.nodeindices[id]; + auto v = node_data.node_voltages[node_id]; + inst.x[id] = inst.global->x0; + } + } + + + static void nrn_jacob_nonlin(const _nrn_model_sorted_token& _sorted_token, NrnThread* nt, Memb_list* _ml_arg, int _type) { + _nrn_mechanism_cache_range _lmc{_sorted_token, *nt, *_ml_arg, _ml_arg->type()}; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(*nt, *_ml_arg); + auto* _thread = _ml_arg->_thread; + auto nodecount = _ml_arg->nodecount; + for (int id = 0; id < nodecount; id++) { + } + } + void nrn_destructor_nonlin(Prop* prop) { + Datum* _ppvar = _nrn_mechanism_access_dparam(prop); + _nrn_mechanism_cache_instance _lmc{prop}; + const size_t id = 0; + auto inst = make_instance_nonlin(_lmc); + auto node_data = make_node_data_nonlin(prop); + + } + + + static void _initlists() { + } + + + /** register channel with the simulator */ + extern "C" void _nonlin_reg() { + _initlists(); + + register_mech(mechanism_info, nrn_alloc_nonlin, nullptr, nullptr, nullptr, nrn_init_nonlin, -1, 1); + + mech_type = nrn_get_mechtype(mechanism_info[1]); + hoc_register_parm_default(mech_type, &_parameter_defaults); + _nrn_mechanism_register_data_fields(mech_type, + _nrn_mechanism_field{"x"} /* 0 */, + _nrn_mechanism_field{"Dx"} /* 1 */ + ); + + hoc_register_prop_size(mech_type, 2, 0); + hoc_register_var(hoc_scalar_double, hoc_vector_double, hoc_intfunc); + hoc_register_npy_direct(mech_type, npy_direct_func_proc); + } +}