From 01ae7d7f7ccc15fcb89fc8709b3a9474a95d1086 Mon Sep 17 00:00:00 2001 From: "John W. Parent" <45471568+johnwparent@users.noreply.github.com> Date: Tue, 12 Sep 2023 23:23:57 -0400 Subject: [PATCH 01/35] Examples: add missing include (#331) example_utilities.hpp requires `stoi` and similar methods. These are declared in ``, but `` is not included. This means this file will break any TU it's included in. Resolves #330 Co-authored-by: Cody Balos --- examples/utilities/example_utilities.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/utilities/example_utilities.hpp b/examples/utilities/example_utilities.hpp index 96a158b841..1d436eb234 100644 --- a/examples/utilities/example_utilities.hpp +++ b/examples/utilities/example_utilities.hpp @@ -17,6 +17,7 @@ #include #include +#include #include // Check for an unrecoverable (negative) return value from a SUNDIALS function From 75ffe46d14e571256860732dd7a9992b57dccb90 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Wed, 13 Sep 2023 08:24:00 +0100 Subject: [PATCH 02/35] append to CMAKE_MODULE_PATH instead of overwrite (#336) Fixes #335 Co-authored-by: David Gardner --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c937443af..9612b9de19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ project(SUNDIALS C) # Specify the location of additional CMAKE modules set(CMAKE_MODULE_PATH + ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake ${PROJECT_SOURCE_DIR}/cmake/macros ${PROJECT_SOURCE_DIR}/cmake/tpl From 235a5069b1d028e93c1f8b2c30df3accfa4fe0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20MOTTELET?= Date: Thu, 14 Sep 2023 22:38:03 +0200 Subject: [PATCH 03/35] Change typedefs names to allow simulateous use of idas and cvodes (#337) Fixes #333. Some other typedefs already had specialized (towards idas or cvodes) prefixed names, it won't harm to generalize this. --------- Co-authored-by: Cody Balos --- CHANGELOG.md | 4 + doc/cvodes/guide/source/Introduction.rst | 5 + doc/idas/guide/source/Introduction.rst | 5 + src/cvodes/cvodea.c | 112 ++++++++++----------- src/cvodes/cvodea_io.c | 14 +-- src/cvodes/cvodes_impl.h | 32 +++--- src/idas/idaa.c | 122 +++++++++++------------ src/idas/idaa_io.c | 14 +-- src/idas/idas_impl.h | 32 +++--- 9 files changed, 177 insertions(+), 163 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f246a840d5..dd786dde05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # SUNDIALS Changelog +## Changes to SUNDIALS in release X.X.X + +Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. + ## Changes to SUNDIALS in release 6.6.1 Updated the Tpetra NVector interface to support Trilinos 14. diff --git a/doc/cvodes/guide/source/Introduction.rst b/doc/cvodes/guide/source/Introduction.rst index 1fde2952fb..6ad961ebd3 100644 --- a/doc/cvodes/guide/source/Introduction.rst +++ b/doc/cvodes/guide/source/Introduction.rst @@ -111,6 +111,11 @@ Fortran. Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. + Changes in v6.6.1 ----------------- diff --git a/doc/idas/guide/source/Introduction.rst b/doc/idas/guide/source/Introduction.rst index ba71498036..676dd88777 100644 --- a/doc/idas/guide/source/Introduction.rst +++ b/doc/idas/guide/source/Introduction.rst @@ -86,6 +86,11 @@ integrate any final-condition ODE dependent on the solution of the original IVP Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. + Changes in v5.6.1 ----------------- diff --git a/src/cvodes/cvodea.c b/src/cvodes/cvodea.c index 5f53830241..c382e80a2f 100644 --- a/src/cvodes/cvodea.c +++ b/src/cvodes/cvodea.c @@ -54,14 +54,14 @@ * ================================================================= */ -static CkpntMem CVAckpntInit(CVodeMem cv_mem); -static CkpntMem CVAckpntNew(CVodeMem cv_mem); -static void CVAckpntDelete(CkpntMem *ck_memPtr); +static CVckpntMem CVAckpntInit(CVodeMem cv_mem); +static CVckpntMem CVAckpntNew(CVodeMem cv_mem); +static void CVAckpntDelete(CVckpntMem *ck_memPtr); static void CVAbckpbDelete(CVodeBMem *cvB_memPtr); -static int CVAdataStore(CVodeMem cv_mem, CkpntMem ck_mem); -static int CVAckpntGet(CVodeMem cv_mem, CkpntMem ck_mem); +static int CVAdataStore(CVodeMem cv_mem, CVckpntMem ck_mem); +static int CVAckpntGet(CVodeMem cv_mem, CVckpntMem ck_mem); static int CVAfindIndex(CVodeMem cv_mem, realtype t, long int *indx, booleantype *newpoint); @@ -69,12 +69,12 @@ static int CVAfindIndex(CVodeMem cv_mem, realtype t, static booleantype CVAhermiteMalloc(CVodeMem cv_mem); static void CVAhermiteFree(CVodeMem cv_mem); static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, N_Vector y, N_Vector *yS); -static int CVAhermiteStorePnt(CVodeMem cv_mem, DtpntMem d); +static int CVAhermiteStorePnt(CVodeMem cv_mem, CVdtpntMem d); static booleantype CVApolynomialMalloc(CVodeMem cv_mem); static void CVApolynomialFree(CVodeMem cv_mem); static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, N_Vector y, N_Vector *yS); -static int CVApolynomialStorePnt(CVodeMem cv_mem, DtpntMem d); +static int CVApolynomialStorePnt(CVodeMem cv_mem, CVdtpntMem d); /* Wrappers */ @@ -174,7 +174,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) /* Allocate space for the array of Data Point structures */ ca_mem->dt_mem = NULL; - ca_mem->dt_mem = (DtpntMem *) malloc((steps+1)*sizeof(struct DtpntMemRec *)); + ca_mem->dt_mem = (CVdtpntMem *) malloc((steps+1)*sizeof(struct CVdtpntMemRec *)); if (ca_mem->dt_mem == NULL) { free(ca_mem); ca_mem = NULL; cvProcessError(cv_mem, CV_MEM_FAIL, "CVODEA", "CVodeAdjInit", MSGCV_MEM_FAIL); @@ -184,7 +184,7 @@ int CVodeAdjInit(void *cvode_mem, long int steps, int interp) for (i=0; i<=steps; i++) { ca_mem->dt_mem[i] = NULL; - ca_mem->dt_mem[i] = (DtpntMem) malloc(sizeof(struct DtpntMemRec)); + ca_mem->dt_mem[i] = (CVdtpntMem) malloc(sizeof(struct CVdtpntMemRec)); if (ca_mem->dt_mem[i] == NULL) { for(ii=0; iidt_mem[ii]); ca_mem->dt_mem[ii] = NULL;} free(ca_mem->dt_mem); ca_mem->dt_mem = NULL; @@ -380,8 +380,8 @@ int CVodeF(void *cvode_mem, realtype tout, N_Vector yout, { CVadjMem ca_mem; CVodeMem cv_mem; - CkpntMem tmp; - DtpntMem *dt_mem; + CVckpntMem tmp; + CVdtpntMem *dt_mem; long int nstloc; int flag, i; booleantype allocOK, earlyret; @@ -1281,7 +1281,7 @@ int CVodeB(void *cvode_mem, realtype tBout, int itaskB) CVodeMem cv_mem; CVadjMem ca_mem; CVodeBMem cvB_mem, tmp_cvB_mem; - CkpntMem ck_mem; + CVckpntMem ck_mem; int sign, flag=0; realtype tfuzz, tBret, tBn; booleantype gotCheckpoint, isActive, reachedTBout; @@ -1630,14 +1630,14 @@ int CVodeGetQuadB(void *cvode_mem, int which, realtype *tret, N_Vector qB) * information from the initial time. */ -static CkpntMem CVAckpntInit(CVodeMem cv_mem) +static CVckpntMem CVAckpntInit(CVodeMem cv_mem) { - CkpntMem ck_mem; + CVckpntMem ck_mem; int is; /* Allocate space for ckdata */ ck_mem = NULL; - ck_mem = (CkpntMem) malloc(sizeof(struct CkpntMemRec)); + ck_mem = (CVckpntMem) malloc(sizeof(struct CVckpntMemRec)); if (ck_mem == NULL) return(NULL); ck_mem->ck_zn[0] = N_VClone(cv_mem->cv_tempv); @@ -1737,14 +1737,14 @@ static CkpntMem CVAckpntInit(CVodeMem cv_mem) * its data from current values in cv_mem. */ -static CkpntMem CVAckpntNew(CVodeMem cv_mem) +static CVckpntMem CVAckpntNew(CVodeMem cv_mem) { - CkpntMem ck_mem; + CVckpntMem ck_mem; int j, jj, is, qmax; /* Allocate space for ckdata */ ck_mem = NULL; - ck_mem = (CkpntMem) malloc(sizeof(struct CkpntMemRec)); + ck_mem = (CVckpntMem) malloc(sizeof(struct CVckpntMemRec)); if (ck_mem == NULL) return(NULL); /* Set cv_next to NULL */ @@ -1976,9 +1976,9 @@ static CkpntMem CVAckpntNew(CVodeMem cv_mem) * the new list head */ -static void CVAckpntDelete(CkpntMem *ck_memPtr) +static void CVAckpntDelete(CVckpntMem *ck_memPtr) { - CkpntMem tmp; + CVckpntMem tmp; int j; if (*ck_memPtr == NULL) return; @@ -2095,10 +2095,10 @@ static void CVAbckpbDelete(CVodeBMem *cvB_memPtr) * CV_FWD_FAIL */ -static int CVAdataStore(CVodeMem cv_mem, CkpntMem ck_mem) +static int CVAdataStore(CVodeMem cv_mem, CVckpntMem ck_mem) { CVadjMem ca_mem; - DtpntMem *dt_mem; + CVdtpntMem *dt_mem; realtype t; long int i; int flag, sign; @@ -2151,7 +2151,7 @@ static int CVAdataStore(CVodeMem cv_mem, CkpntMem ck_mem) * the check point ck_mem */ -static int CVAckpntGet(CVodeMem cv_mem, CkpntMem ck_mem) +static int CVAckpntGet(CVodeMem cv_mem, CVckpntMem ck_mem) { int flag, j, is, qmax, retval; @@ -2310,7 +2310,7 @@ static int CVAfindIndex(CVodeMem cv_mem, realtype t, long int *indx, booleantype *newpoint) { CVadjMem ca_mem; - DtpntMem *dt_mem; + CVdtpntMem *dt_mem; int sign; booleantype to_left, to_right; @@ -2427,8 +2427,8 @@ int CVodeGetAdjY(void *cvode_mem, realtype t, N_Vector y) static booleantype CVAhermiteMalloc(CVodeMem cv_mem) { CVadjMem ca_mem; - DtpntMem *dt_mem; - HermiteDataMem content; + CVdtpntMem *dt_mem; + CVhermiteDataMem content; long int i, ii=0; booleantype allocOK; @@ -2458,7 +2458,7 @@ static booleantype CVAhermiteMalloc(CVodeMem cv_mem) for (i=0; i<=ca_mem->ca_nsteps; i++) { content = NULL; - content = (HermiteDataMem) malloc(sizeof(struct HermiteDataMemRec)); + content = (CVhermiteDataMem) malloc(sizeof(struct CVhermiteDataMemRec)); if (content == NULL) { ii = i; allocOK = SUNFALSE; @@ -2522,7 +2522,7 @@ static booleantype CVAhermiteMalloc(CVodeMem cv_mem) } for (i=0; icontent); + content = (CVhermiteDataMem) (dt_mem[i]->content); N_VDestroy(content->y); N_VDestroy(content->yd); if (ca_mem->ca_IMstoreSensi) { @@ -2546,8 +2546,8 @@ static booleantype CVAhermiteMalloc(CVodeMem cv_mem) static void CVAhermiteFree(CVodeMem cv_mem) { CVadjMem ca_mem; - DtpntMem *dt_mem; - HermiteDataMem content; + CVdtpntMem *dt_mem; + CVhermiteDataMem content; long int i; ca_mem = cv_mem->cv_adj_mem; @@ -2561,7 +2561,7 @@ static void CVAhermiteFree(CVodeMem cv_mem) dt_mem = ca_mem->dt_mem; for (i=0; i<=ca_mem->ca_nsteps; i++) { - content = (HermiteDataMem) (dt_mem[i]->content); + content = (CVhermiteDataMem) (dt_mem[i]->content); N_VDestroy(content->y); N_VDestroy(content->yd); if (ca_mem->ca_IMstoreSensi) { @@ -2580,15 +2580,15 @@ static void CVAhermiteFree(CVodeMem cv_mem) * Note that the time is already stored. */ -static int CVAhermiteStorePnt(CVodeMem cv_mem, DtpntMem d) +static int CVAhermiteStorePnt(CVodeMem cv_mem, CVdtpntMem d) { CVadjMem ca_mem; - HermiteDataMem content; + CVhermiteDataMem content; int is, retval; ca_mem = cv_mem->cv_adj_mem; - content = (HermiteDataMem) d->content; + content = (CVhermiteDataMem) d->content; /* Load solution */ @@ -2647,8 +2647,8 @@ static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, N_Vector y, N_Vector *yS) { CVadjMem ca_mem; - DtpntMem *dt_mem; - HermiteDataMem content0, content1; + CVdtpntMem *dt_mem; + CVhermiteDataMem content0, content1; realtype t0, t1, delta; realtype factor1, factor2, factor3; @@ -2682,7 +2682,7 @@ static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, then return y at the left limit. */ if (indx == 0) { - content0 = (HermiteDataMem) (dt_mem[0]->content); + content0 = (CVhermiteDataMem) (dt_mem[0]->content); N_VScale(ONE, content0->y, y); if (NS > 0) { @@ -2703,7 +2703,7 @@ static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, t1 = dt_mem[indx]->t; delta = t1 - t0; - content0 = (HermiteDataMem) (dt_mem[indx-1]->content); + content0 = (CVhermiteDataMem) (dt_mem[indx-1]->content); y0 = content0->y; yd0 = content0->yd; if (ca_mem->ca_IMinterpSensi) { @@ -2715,7 +2715,7 @@ static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, /* Recompute Y0 and Y1 */ - content1 = (HermiteDataMem) (dt_mem[indx]->content); + content1 = (CVhermiteDataMem) (dt_mem[indx]->content); y1 = content1->y; yd1 = content1->yd; @@ -2822,8 +2822,8 @@ static int CVAhermiteGetY(CVodeMem cv_mem, realtype t, static booleantype CVApolynomialMalloc(CVodeMem cv_mem) { CVadjMem ca_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + CVdtpntMem *dt_mem; + CVpolynomialDataMem content; long int i, ii=0; booleantype allocOK; @@ -2853,7 +2853,7 @@ static booleantype CVApolynomialMalloc(CVodeMem cv_mem) for (i=0; i<=ca_mem->ca_nsteps; i++) { content = NULL; - content = (PolynomialDataMem) malloc(sizeof(struct PolynomialDataMemRec)); + content = (CVpolynomialDataMem) malloc(sizeof(struct CVpolynomialDataMemRec)); if (content == NULL) { ii = i; allocOK = SUNFALSE; @@ -2896,7 +2896,7 @@ static booleantype CVApolynomialMalloc(CVodeMem cv_mem) } for (i=0; icontent); + content = (CVpolynomialDataMem) (dt_mem[i]->content); N_VDestroy(content->y); if (ca_mem->ca_IMstoreSensi) { N_VDestroyVectorArray(content->yS, cv_mem->cv_Ns); @@ -2919,8 +2919,8 @@ static booleantype CVApolynomialMalloc(CVodeMem cv_mem) static void CVApolynomialFree(CVodeMem cv_mem) { CVadjMem ca_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + CVdtpntMem *dt_mem; + CVpolynomialDataMem content; long int i; ca_mem = cv_mem->cv_adj_mem; @@ -2934,7 +2934,7 @@ static void CVApolynomialFree(CVodeMem cv_mem) dt_mem = ca_mem->dt_mem; for (i=0; i<=ca_mem->ca_nsteps; i++) { - content = (PolynomialDataMem) (dt_mem[i]->content); + content = (CVpolynomialDataMem) (dt_mem[i]->content); N_VDestroy(content->y); if (ca_mem->ca_IMstoreSensi) { N_VDestroyVectorArray(content->yS, cv_mem->cv_Ns); @@ -2951,15 +2951,15 @@ static void CVApolynomialFree(CVodeMem cv_mem) * Note that the time is already stored. */ -static int CVApolynomialStorePnt(CVodeMem cv_mem, DtpntMem d) +static int CVApolynomialStorePnt(CVodeMem cv_mem, CVdtpntMem d) { CVadjMem ca_mem; - PolynomialDataMem content; + CVpolynomialDataMem content; int is, retval; ca_mem = cv_mem->cv_adj_mem; - content = (PolynomialDataMem) d->content; + content = (CVpolynomialDataMem) d->content; N_VScale(ONE, cv_mem->cv_zn[0], content->y); @@ -2989,8 +2989,8 @@ static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, N_Vector y, N_Vector *yS) { CVadjMem ca_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + CVdtpntMem *dt_mem; + CVpolynomialDataMem content; int flag, dir, order, i, j, is, NS, retval; long int indx, base; @@ -3013,7 +3013,7 @@ static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, then return y at the left limit. */ if (indx == 0) { - content = (PolynomialDataMem) (dt_mem[0]->content); + content = (CVpolynomialDataMem) (dt_mem[0]->content); N_VScale(ONE, content->y, y); if (NS > 0) { @@ -3039,12 +3039,12 @@ static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, if (dir == 1) { base = indx; - content = (PolynomialDataMem) (dt_mem[base]->content); + content = (CVpolynomialDataMem) (dt_mem[base]->content); order = content->order; if(indx < order) base += order-indx; } else { base = indx-1; - content = (PolynomialDataMem) (dt_mem[base]->content); + content = (CVpolynomialDataMem) (dt_mem[base]->content); order = content->order; if (ca_mem->ca_np-indx > order) base -= indx+order-ca_mem->ca_np; } @@ -3057,7 +3057,7 @@ static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, if (dir == 1) { for(j=0;j<=order;j++) { ca_mem->ca_T[j] = dt_mem[base-j]->t; - content = (PolynomialDataMem) (dt_mem[base-j]->content); + content = (CVpolynomialDataMem) (dt_mem[base-j]->content); N_VScale(ONE, content->y, ca_mem->ca_Y[j]); if (NS > 0) { @@ -3071,7 +3071,7 @@ static int CVApolynomialGetY(CVodeMem cv_mem, realtype t, } else { for(j=0;j<=order;j++) { ca_mem->ca_T[j] = dt_mem[base-1+j]->t; - content = (PolynomialDataMem) (dt_mem[base-1+j]->content); + content = (CVpolynomialDataMem) (dt_mem[base-1+j]->content); N_VScale(ONE, content->y, ca_mem->ca_Y[j]); if (NS > 0) { for (is=0; ist; - content = (HermiteDataMem) (dt_mem[which]->content); + content = (CVhermiteDataMem) (dt_mem[which]->content); if (y != NULL) N_VScale(ONE, content->y, y); @@ -679,8 +679,8 @@ int CVodeGetAdjDataPointPolynomial(void *cvode_mem, int which, { CVodeMem cv_mem; CVadjMem ca_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + CVdtpntMem *dt_mem; + CVpolynomialDataMem content; /* Check if cvode_mem exists */ if (cvode_mem == NULL) { @@ -705,7 +705,7 @@ int CVodeGetAdjDataPointPolynomial(void *cvode_mem, int which, *t = dt_mem[which]->t; - content = (PolynomialDataMem) (dt_mem[which]->content); + content = (CVpolynomialDataMem) (dt_mem[which]->content); if (y != NULL) N_VScale(ONE, content->y, y); diff --git a/src/cvodes/cvodes_impl.h b/src/cvodes/cvodes_impl.h index b4a18676a8..c1f9721427 100644 --- a/src/cvodes/cvodes_impl.h +++ b/src/cvodes/cvodes_impl.h @@ -207,8 +207,8 @@ extern "C" { */ typedef struct CVadjMemRec *CVadjMem; -typedef struct CkpntMemRec *CkpntMem; -typedef struct DtpntMemRec *DtpntMem; +typedef struct CVckpntMemRec *CVckpntMem; +typedef struct CVdtpntMemRec *CVdtpntMem; typedef struct CVodeBMemRec *CVodeBMem; /* @@ -704,15 +704,15 @@ typedef struct CVodeMemRec { /* * ----------------------------------------------------------------- - * Types : struct CkpntMemRec, CkpntMem + * Types : struct CVckpntMemRec, CVckpntMem * ----------------------------------------------------------------- - * The type CkpntMem is type pointer to struct CkpntMemRec. + * The type CVckpntMem is type pointer to struct CVckpntMemRec. * This structure contains fields to store all information at a * check point that is needed to 'hot' start cvodes. * ----------------------------------------------------------------- */ -struct CkpntMemRec { +struct CVckpntMemRec { /* Integration limits */ realtype ck_t0; @@ -768,7 +768,7 @@ struct CkpntMemRec { realtype ck_saved_tq5; /* Pointer to next structure in list */ - struct CkpntMemRec *ck_next; + struct CVckpntMemRec *ck_next; }; @@ -790,11 +790,11 @@ struct CkpntMemRec { typedef booleantype (*cvaIMMallocFn)(CVodeMem cv_mem); typedef void (*cvaIMFreeFn)(CVodeMem cv_mem); typedef int (*cvaIMGetYFn)(CVodeMem cv_mem, realtype t, N_Vector y, N_Vector *yS); -typedef int (*cvaIMStorePntFn)(CVodeMem cv_mem, DtpntMem d); +typedef int (*cvaIMStorePntFn)(CVodeMem cv_mem, CVdtpntMem d); /* * ----------------------------------------------------------------- - * Type : struct DtpntMemRec + * Type : struct CVdtpntMemRec * ----------------------------------------------------------------- * This structure contains fields to store all information at a * data point that is needed to interpolate solution of forward @@ -802,25 +802,25 @@ typedef int (*cvaIMStorePntFn)(CVodeMem cv_mem, DtpntMem d); * ----------------------------------------------------------------- */ -struct DtpntMemRec { +struct CVdtpntMemRec { realtype t; /* time */ void *content; /* IMtype-dependent content */ }; /* Data for cubic Hermite interpolation */ -typedef struct HermiteDataMemRec { +typedef struct CVhermiteDataMemRec { N_Vector y; N_Vector yd; N_Vector *yS; N_Vector *ySd; -} *HermiteDataMem; +} *CVhermiteDataMem; /* Data for polynomial interpolation */ -typedef struct PolynomialDataMemRec { +typedef struct CVpolynomialDataMemRec { N_Vector y; N_Vector *yS; int order; -} *PolynomialDataMem; +} *CVpolynomialDataMem; /* @@ -935,13 +935,13 @@ struct CVadjMemRec { * ---------------- */ /* Storage for check point information */ - struct CkpntMemRec *ck_mem; + struct CVckpntMemRec *ck_mem; /* Number of check points */ int ca_nckpnts; /* address of the check point structure for which data is available */ - struct CkpntMemRec *ca_ckpntData; + struct CVckpntMemRec *ca_ckpntData; /* ------------------ * Interpolation data @@ -954,7 +954,7 @@ struct CVadjMemRec { long int ca_ilast; /* Storage for data from forward runs */ - struct DtpntMemRec **dt_mem; + struct CVdtpntMemRec **dt_mem; /* Actual number of data points in dt_mem (typically np=nsteps+1) */ long int ca_np; diff --git a/src/idas/idaa.c b/src/idas/idaa.c index 2b09738d78..64ccdd45b1 100644 --- a/src/idas/idaa.c +++ b/src/idas/idaa.c @@ -46,30 +46,30 @@ /* Private Functions Prototypes */ /*=================================================================*/ -static CkpntMem IDAAckpntInit(IDAMem IDA_mem); -static CkpntMem IDAAckpntNew(IDAMem IDA_mem); -static void IDAAckpntCopyVectors(IDAMem IDA_mem, CkpntMem ck_mem); -static booleantype IDAAckpntAllocVectors(IDAMem IDA_mem, CkpntMem ck_mem); -static void IDAAckpntDelete(CkpntMem *ck_memPtr); +static IDAckpntMem IDAAckpntInit(IDAMem IDA_mem); +static IDAckpntMem IDAAckpntNew(IDAMem IDA_mem); +static void IDAAckpntCopyVectors(IDAMem IDA_mem, IDAckpntMem ck_mem); +static booleantype IDAAckpntAllocVectors(IDAMem IDA_mem, IDAckpntMem ck_mem); +static void IDAAckpntDelete(IDAckpntMem *ck_memPtr); static void IDAAbckpbDelete(IDABMem *IDAB_memPtr); static booleantype IDAAdataMalloc(IDAMem IDA_mem); static void IDAAdataFree(IDAMem IDA_mem); -static int IDAAdataStore(IDAMem IDA_mem, CkpntMem ck_mem); +static int IDAAdataStore(IDAMem IDA_mem, IDAckpntMem ck_mem); -static int IDAAckpntGet(IDAMem IDA_mem, CkpntMem ck_mem); +static int IDAAckpntGet(IDAMem IDA_mem, IDAckpntMem ck_mem); static booleantype IDAAhermiteMalloc(IDAMem IDA_mem); static void IDAAhermiteFree(IDAMem IDA_mem); -static int IDAAhermiteStorePnt(IDAMem IDA_mem, DtpntMem d); +static int IDAAhermiteStorePnt(IDAMem IDA_mem, IDAdtpntMem d); static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, N_Vector yy, N_Vector yp, N_Vector *yyS, N_Vector *ypS); static booleantype IDAApolynomialMalloc(IDAMem IDA_mem); static void IDAApolynomialFree(IDAMem IDA_mem); -static int IDAApolynomialStorePnt(IDAMem IDA_mem, DtpntMem d); +static int IDAApolynomialStorePnt(IDAMem IDA_mem, IDAdtpntMem d); static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, N_Vector yy, N_Vector yp, N_Vector *yyS, N_Vector *ypS); @@ -360,8 +360,8 @@ int IDASolveF(void *ida_mem, realtype tout, realtype *tret, { IDAadjMem IDAADJ_mem; IDAMem IDA_mem; - CkpntMem tmp; - DtpntMem *dt_mem; + IDAckpntMem tmp; + IDAdtpntMem *dt_mem; long int nstloc; int flag, i; booleantype allocOK, earlyret; @@ -1451,7 +1451,7 @@ int IDASolveB(void *ida_mem, realtype tBout, int itaskB) { IDAMem IDA_mem; IDAadjMem IDAADJ_mem; - CkpntMem ck_mem; + IDAckpntMem ck_mem; IDABMem IDAB_mem, tmp_IDAB_mem; int flag=0, sign; realtype tfuzz, tBret, tBn; @@ -1798,12 +1798,12 @@ int IDAGetQuadB(void *ida_mem, int which, realtype *tret, N_Vector qB) * information from the initial time. */ -static CkpntMem IDAAckpntInit(IDAMem IDA_mem) +static IDAckpntMem IDAAckpntInit(IDAMem IDA_mem) { - CkpntMem ck_mem; + IDAckpntMem ck_mem; /* Allocate space for ckdata */ - ck_mem = (CkpntMem) malloc(sizeof(struct CkpntMemRec)); + ck_mem = (IDAckpntMem) malloc(sizeof(struct IDAckpntMemRec)); if (NULL==ck_mem) return(NULL); ck_mem->ck_t0 = IDA_mem->ida_tn; @@ -1844,13 +1844,13 @@ static CkpntMem IDAAckpntInit(IDAMem IDA_mem) * its data from current values in IDA_mem. */ -static CkpntMem IDAAckpntNew(IDAMem IDA_mem) +static IDAckpntMem IDAAckpntNew(IDAMem IDA_mem) { - CkpntMem ck_mem; + IDAckpntMem ck_mem; int j; /* Allocate space for ckdata */ - ck_mem = (CkpntMem) malloc(sizeof(struct CkpntMemRec)); + ck_mem = (IDAckpntMem) malloc(sizeof(struct IDAckpntMemRec)); if (ck_mem == NULL) return(NULL); ck_mem->ck_nst = IDA_mem->ida_nst; @@ -1908,9 +1908,9 @@ static CkpntMem IDAAckpntNew(IDAMem IDA_mem) * This routine deletes the first check point in list. */ -static void IDAAckpntDelete(CkpntMem *ck_memPtr) +static void IDAAckpntDelete(IDAckpntMem *ck_memPtr) { - CkpntMem tmp; + IDAckpntMem tmp; int j; if (*ck_memPtr != NULL) { @@ -1951,7 +1951,7 @@ static void IDAAckpntDelete(CkpntMem *ck_memPtr) * current state of IDAMem. * */ -static booleantype IDAAckpntAllocVectors(IDAMem IDA_mem, CkpntMem ck_mem) +static booleantype IDAAckpntAllocVectors(IDAMem IDA_mem, IDAckpntMem ck_mem) { int j, jj; @@ -2032,7 +2032,7 @@ static booleantype IDAAckpntAllocVectors(IDAMem IDA_mem, CkpntMem ck_mem) * Copy phi* vectors from IDAMem in the corresponding vectors from checkpoint * */ -static void IDAAckpntCopyVectors(IDAMem IDA_mem, CkpntMem ck_mem) +static void IDAAckpntCopyVectors(IDAMem IDA_mem, IDAckpntMem ck_mem) { int j, is; @@ -2096,18 +2096,18 @@ static void IDAAckpntCopyVectors(IDAMem IDA_mem, CkpntMem ck_mem) static booleantype IDAAdataMalloc(IDAMem IDA_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; + IDAdtpntMem *dt_mem; long int i, j; IDAADJ_mem = IDA_mem->ida_adj_mem; IDAADJ_mem->dt_mem = NULL; - dt_mem = (DtpntMem *)malloc((IDAADJ_mem->ia_nsteps+1)*sizeof(struct DtpntMemRec *)); + dt_mem = (IDAdtpntMem *)malloc((IDAADJ_mem->ia_nsteps+1)*sizeof(struct IDAdtpntMemRec *)); if (dt_mem==NULL) return(SUNFALSE); for (i=0; i<=IDAADJ_mem->ia_nsteps; i++) { - dt_mem[i] = (DtpntMem)malloc(sizeof(struct DtpntMemRec)); + dt_mem[i] = (IDAdtpntMem)malloc(sizeof(struct IDAdtpntMemRec)); /* On failure, free any allocated memory and return NULL. */ if (dt_mem[i] == NULL) { @@ -2166,10 +2166,10 @@ static void IDAAdataFree(IDAMem IDA_mem) * - IDA_SUCCESS */ -static int IDAAdataStore(IDAMem IDA_mem, CkpntMem ck_mem) +static int IDAAdataStore(IDAMem IDA_mem, IDAckpntMem ck_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; + IDAdtpntMem *dt_mem; realtype t; long int i; int flag, sign; @@ -2222,7 +2222,7 @@ static int IDAAdataStore(IDAMem IDA_mem, CkpntMem ck_mem) * the check point ck_mem */ -static int IDAAckpntGet(IDAMem IDA_mem, CkpntMem ck_mem) +static int IDAAckpntGet(IDAMem IDA_mem, IDAckpntMem ck_mem) { int flag, j, is; @@ -2331,8 +2331,8 @@ static int IDAAckpntGet(IDAMem IDA_mem, CkpntMem ck_mem) static booleantype IDAAhermiteMalloc(IDAMem IDA_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - HermiteDataMem content; + IDAdtpntMem *dt_mem; + IDAhermiteDataMem content; long int i, ii=0; booleantype allocOK; @@ -2377,7 +2377,7 @@ static booleantype IDAAhermiteMalloc(IDAMem IDA_mem) for (i=0; i<=IDAADJ_mem->ia_nsteps; i++) { content = NULL; - content = (HermiteDataMem) malloc(sizeof(struct HermiteDataMemRec)); + content = (IDAhermiteDataMem) malloc(sizeof(struct IDAhermiteDataMemRec)); if (content == NULL) { ii = i; allocOK = SUNFALSE; @@ -2442,7 +2442,7 @@ static booleantype IDAAhermiteMalloc(IDAMem IDA_mem) } for (i=0; icontent); + content = (IDAhermiteDataMem) (dt_mem[i]->content); N_VDestroy(content->y); N_VDestroy(content->yd); @@ -2468,8 +2468,8 @@ static booleantype IDAAhermiteMalloc(IDAMem IDA_mem) static void IDAAhermiteFree(IDAMem IDA_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - HermiteDataMem content; + IDAdtpntMem *dt_mem; + IDAhermiteDataMem content; long int i; IDAADJ_mem = IDA_mem->ida_adj_mem; @@ -2486,7 +2486,7 @@ static void IDAAhermiteFree(IDAMem IDA_mem) for (i=0; i<=IDAADJ_mem->ia_nsteps; i++) { - content = (HermiteDataMem) (dt_mem[i]->content); + content = (IDAhermiteDataMem) (dt_mem[i]->content); /* content might be NULL, if IDAAdjInit was called but IDASolveF was not. */ if(content) { @@ -2511,15 +2511,15 @@ static void IDAAhermiteFree(IDAMem IDA_mem) * Note that the time is already stored. */ -static int IDAAhermiteStorePnt(IDAMem IDA_mem, DtpntMem d) +static int IDAAhermiteStorePnt(IDAMem IDA_mem, IDAdtpntMem d) { IDAadjMem IDAADJ_mem; - HermiteDataMem content; + IDAhermiteDataMem content; int is, retval; IDAADJ_mem = IDA_mem->ida_adj_mem; - content = (HermiteDataMem) d->content; + content = (IDAhermiteDataMem) d->content; /* Load solution(s) */ N_VScale(ONE, IDA_mem->ida_phi[0], content->y); @@ -2559,8 +2559,8 @@ static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, N_Vector *yyS, N_Vector *ypS) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - HermiteDataMem content0, content1; + IDAdtpntMem *dt_mem; + IDAhermiteDataMem content0, content1; realtype t0, t1, delta; realtype factor1, factor2, factor3; @@ -2592,7 +2592,7 @@ static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, then return y at the left limit. */ if (indx == 0) { - content0 = (HermiteDataMem) (dt_mem[0]->content); + content0 = (IDAhermiteDataMem) (dt_mem[0]->content); N_VScale(ONE, content0->y, yy); N_VScale(ONE, content0->yd, yp); @@ -2615,7 +2615,7 @@ static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, t1 = dt_mem[indx]->t; delta = t1 - t0; - content0 = (HermiteDataMem) (dt_mem[indx-1]->content); + content0 = (IDAhermiteDataMem) (dt_mem[indx-1]->content); y0 = content0->y; yd0 = content0->yd; if (IDAADJ_mem->ia_interpSensi) { @@ -2626,7 +2626,7 @@ static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, if (newpoint) { /* Recompute Y0 and Y1 */ - content1 = (HermiteDataMem) (dt_mem[indx]->content); + content1 = (IDAhermiteDataMem) (dt_mem[indx]->content); y1 = content1->y; yd1 = content1->yd; @@ -2770,8 +2770,8 @@ static int IDAAhermiteGetY(IDAMem IDA_mem, realtype t, static booleantype IDAApolynomialMalloc(IDAMem IDA_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + IDAdtpntMem *dt_mem; + IDApolynomialDataMem content; long int i, ii=0; booleantype allocOK; @@ -2814,7 +2814,7 @@ static booleantype IDAApolynomialMalloc(IDAMem IDA_mem) for (i=0; i<=IDAADJ_mem->ia_nsteps; i++) { content = NULL; - content = (PolynomialDataMem) malloc(sizeof(struct PolynomialDataMemRec)); + content = (IDApolynomialDataMem) malloc(sizeof(struct IDApolynomialDataMemRec)); if (content == NULL) { ii = i; allocOK = SUNFALSE; @@ -2887,7 +2887,7 @@ static booleantype IDAApolynomialMalloc(IDAMem IDA_mem) } for (i=0; icontent); + content = (IDApolynomialDataMem) (dt_mem[i]->content); N_VDestroy(content->y); if (content->yd) N_VDestroy(content->yd); @@ -2915,8 +2915,8 @@ static booleantype IDAApolynomialMalloc(IDAMem IDA_mem) static void IDAApolynomialFree(IDAMem IDA_mem) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + IDAdtpntMem *dt_mem; + IDApolynomialDataMem content; long int i; IDAADJ_mem = IDA_mem->ida_adj_mem; @@ -2933,7 +2933,7 @@ static void IDAApolynomialFree(IDAMem IDA_mem) for (i=0; i<=IDAADJ_mem->ia_nsteps; i++) { - content = (PolynomialDataMem) (dt_mem[i]->content); + content = (IDApolynomialDataMem) (dt_mem[i]->content); /* content might be NULL, if IDAAdjInit was called but IDASolveF was not. */ if(content) { @@ -2964,14 +2964,14 @@ static void IDAApolynomialFree(IDAMem IDA_mem) * in which case content->yp is non-null. */ -static int IDAApolynomialStorePnt(IDAMem IDA_mem, DtpntMem d) +static int IDAApolynomialStorePnt(IDAMem IDA_mem, IDAdtpntMem d) { IDAadjMem IDAADJ_mem; - PolynomialDataMem content; + IDApolynomialDataMem content; int is, retval; IDAADJ_mem = IDA_mem->ida_adj_mem; - content = (PolynomialDataMem) d->content; + content = (IDApolynomialDataMem) d->content; N_VScale(ONE, IDA_mem->ida_phi[0], content->y); @@ -3013,8 +3013,8 @@ static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, N_Vector *yyS, N_Vector *ypS) { IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + IDAdtpntMem *dt_mem; + IDApolynomialDataMem content; int flag, dir, order, i, j, is, NS, retval; long int indx, base; @@ -3035,7 +3035,7 @@ static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, then return y at the left limit. */ if (indx == 0) { - content = (PolynomialDataMem) (dt_mem[0]->content); + content = (IDApolynomialDataMem) (dt_mem[0]->content); N_VScale(ONE, content->y, yy); N_VScale(ONE, content->yd, yp); @@ -3064,12 +3064,12 @@ static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, if (dir == 1) { base = indx; - content = (PolynomialDataMem) (dt_mem[base]->content); + content = (IDApolynomialDataMem) (dt_mem[base]->content); order = content->order; if(indx < order) base += order-indx; } else { base = indx-1; - content = (PolynomialDataMem) (dt_mem[base]->content); + content = (IDApolynomialDataMem) (dt_mem[base]->content); order = content->order; if (IDAADJ_mem->ia_np-indx > order) base -= indx+order-IDAADJ_mem->ia_np; } @@ -3082,7 +3082,7 @@ static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, if (dir == 1) { for(j=0;j<=order;j++) { IDAADJ_mem->ia_T[j] = dt_mem[base-j]->t; - content = (PolynomialDataMem) (dt_mem[base-j]->content); + content = (IDApolynomialDataMem) (dt_mem[base-j]->content); N_VScale(ONE, content->y, IDAADJ_mem->ia_Y[j]); if (NS > 0) { @@ -3096,7 +3096,7 @@ static int IDAApolynomialGetY(IDAMem IDA_mem, realtype t, } else { for(j=0;j<=order;j++) { IDAADJ_mem->ia_T[j] = dt_mem[base-1+j]->t; - content = (PolynomialDataMem) (dt_mem[base-1+j]->content); + content = (IDApolynomialDataMem) (dt_mem[base-1+j]->content); N_VScale(ONE, content->y, IDAADJ_mem->ia_Y[j]); if (NS > 0) { @@ -3289,7 +3289,7 @@ static int IDAAfindIndex(IDAMem ida_mem, realtype t, { IDAadjMem IDAADJ_mem; IDAMem IDA_mem; - DtpntMem *dt_mem; + IDAdtpntMem *dt_mem; int sign; booleantype to_left, to_right; diff --git a/src/idas/idaa_io.c b/src/idas/idaa_io.c index 76923817c2..4af2ec0e05 100644 --- a/src/idas/idaa_io.c +++ b/src/idas/idaa_io.c @@ -553,7 +553,7 @@ int IDAGetAdjCheckPointsInfo(void *ida_mem, IDAadjCheckPointRec *ckpnt) { IDAMem IDA_mem; IDAadjMem IDAADJ_mem; - CkpntMem ck_mem; + IDAckpntMem ck_mem; int i; /* Is ida_mem valid? */ @@ -666,8 +666,8 @@ int IDAGetAdjDataPointHermite(void *ida_mem, int which, { IDAMem IDA_mem; IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - HermiteDataMem content; + IDAdtpntMem *dt_mem; + IDAhermiteDataMem content; /* Is ida_mem valid? */ if (ida_mem == NULL) { @@ -691,7 +691,7 @@ int IDAGetAdjDataPointHermite(void *ida_mem, int which, } *t = dt_mem[which]->t; - content = (HermiteDataMem) dt_mem[which]->content; + content = (IDAhermiteDataMem) dt_mem[which]->content; if (yy != NULL) N_VScale(ONE, content->y, yy); if (yd != NULL) N_VScale(ONE, content->yd, yd); @@ -716,8 +716,8 @@ int IDAGetAdjDataPointPolynomial(void *ida_mem, int which, { IDAMem IDA_mem; IDAadjMem IDAADJ_mem; - DtpntMem *dt_mem; - PolynomialDataMem content; + IDAdtpntMem *dt_mem; + IDApolynomialDataMem content; /* Is ida_mem valid? */ if (ida_mem == NULL) { IDAProcessError(NULL, IDA_MEM_NULL, "IDAA", "IDAGetAdjDataPointPolynomial", MSGAM_NULL_IDAMEM); @@ -740,7 +740,7 @@ int IDAGetAdjDataPointPolynomial(void *ida_mem, int which, } *t = dt_mem[which]->t; - content = (PolynomialDataMem) dt_mem[which]->content; + content = (IDApolynomialDataMem) dt_mem[which]->content; if (y != NULL) N_VScale(ONE, content->y, y); diff --git a/src/idas/idas_impl.h b/src/idas/idas_impl.h index ed1cf325f4..ee29def3ea 100644 --- a/src/idas/idas_impl.h +++ b/src/idas/idas_impl.h @@ -550,8 +550,8 @@ typedef struct IDAMemRec { */ typedef struct IDAadjMemRec *IDAadjMem; -typedef struct CkpntMemRec *CkpntMem; -typedef struct DtpntMemRec *DtpntMem; +typedef struct IDAckpntMemRec *IDAckpntMem; +typedef struct IDAdtpntMemRec *IDAdtpntMem; typedef struct IDABMemRec *IDABMem; /* @@ -574,19 +574,19 @@ typedef void (*IDAAMFreeFn)(IDAMem IDA_mem); typedef int (*IDAAGetYFn)(IDAMem IDA_mem, realtype t, N_Vector yy, N_Vector yp, N_Vector *yyS, N_Vector *ypS); -typedef int (*IDAAStorePntFn)(IDAMem IDA_mem, DtpntMem d); +typedef int (*IDAAStorePntFn)(IDAMem IDA_mem, IDAdtpntMem d); /* * ----------------------------------------------------------------- - * Types : struct CkpntMemRec, CkpntMem + * Types : struct IDAckpntMemRec, IDAckpntMem * ----------------------------------------------------------------- - * The type CkpntMem is type pointer to struct CkpntMemRec. + * The type IDAckpntMem is type pointer to struct IDAckpntMemRec. * This structure contains fields to store all information at a * check point that is needed to 'hot' start IDAS. * ----------------------------------------------------------------- */ -struct CkpntMemRec { +struct IDAckpntMemRec { /* Integration limits */ realtype ck_t0; @@ -646,12 +646,12 @@ struct CkpntMemRec { int ck_phi_alloc; /* Pointer to next structure in list */ - struct CkpntMemRec *ck_next; + struct IDAckpntMemRec *ck_next; }; /* * ----------------------------------------------------------------- - * Type : struct DtpntMemRec + * Type : struct IDAdtpntMemRec * ----------------------------------------------------------------- * This structure contains fields to store all information at a * data point that is needed to interpolate solution of forward @@ -659,21 +659,21 @@ struct CkpntMemRec { * ----------------------------------------------------------------- */ -struct DtpntMemRec { +struct IDAdtpntMemRec { realtype t; /* time */ void *content; /* interpType-dependent content */ }; /* Data for cubic Hermite interpolation */ -typedef struct HermiteDataMemRec { +typedef struct IDAhermiteDataMemRec { N_Vector y; N_Vector yd; N_Vector *yS; N_Vector *ySd; -} *HermiteDataMem; +} *IDAhermiteDataMem; /* Data for polynomial interpolation */ -typedef struct PolynomialDataMemRec { +typedef struct IDApolynomialDataMemRec { N_Vector y; N_Vector *yS; @@ -682,7 +682,7 @@ typedef struct PolynomialDataMemRec { N_Vector yd; N_Vector *ySd; int order; -} *PolynomialDataMem; +} *IDApolynomialDataMem; /* * ----------------------------------------------------------------- @@ -798,10 +798,10 @@ struct IDAadjMemRec { * ---------------- */ /* Storage for check point information */ - struct CkpntMemRec *ck_mem; + struct IDAckpntMemRec *ck_mem; /* address of the check point structure for which data is available */ - struct CkpntMemRec *ia_ckpntData; + struct IDAckpntMemRec *ia_ckpntData; /* Number of checkpoints. */ int ia_nckpnts; @@ -817,7 +817,7 @@ struct IDAadjMemRec { long int ia_ilast; /* Storage for data from forward runs */ - struct DtpntMemRec **dt_mem; + struct IDAdtpntMemRec **dt_mem; /* Actual number of data points saved in current dt_mem */ /* Commonly, np = nsteps+1 */ From 5aebf4434a77ee8eea482ae355cf111cca7f8e25 Mon Sep 17 00:00:00 2001 From: Yu Pan <70486009+yu-nix@users.noreply.github.com> Date: Fri, 15 Sep 2023 11:56:40 -0400 Subject: [PATCH 04/35] Feature/compare califiles (#318) Add scripts for comparing benchmark and example timings against release timings and a notebook using Thicket to visualize data --------- Co-authored-by: David J. Gardner Co-authored-by: Cody J. Balos --- .gitlab/build_and_test.sh | 22 ++++-- .gitlab/lassen-jobs.yml | 3 +- src/sundials/sundials_context.c | 3 +- test/benchmark_analysis.ipynb | 87 ++++++++++++++++++++++++ test/compare_benchmarks.py | 109 ++++++++++++++++++++++++++++++ test/compare_examples.py | 115 ++++++++++++++++++++++++++++++++ test/testRunner | 2 + 7 files changed, 334 insertions(+), 7 deletions(-) create mode 100644 test/benchmark_analysis.ipynb create mode 100644 test/compare_benchmarks.py create mode 100644 test/compare_examples.py diff --git a/.gitlab/build_and_test.sh b/.gitlab/build_and_test.sh index 0ef3a2e9ce..cf7ab193fa 100755 --- a/.gitlab/build_and_test.sh +++ b/.gitlab/build_and_test.sh @@ -188,11 +188,23 @@ then $cmake_exe --version # configure - $cmake_exe \ - -C "${hostconfig_path}" \ - -DCMAKE_INSTALL_PREFIX=${install_dir} \ - "${project_dir}" - + if [[ "${CI_COMMIT_BRANCH}" == "main" ]] + then + # redirect caliper files to release directory + sundials_version=$(cd ${project_dir}; git describe --abbrev=0) + $cmake_exe \ + -C "${hostconfig_path}" \ + -DCMAKE_INSTALL_PREFIX=${install_dir} \ + -DSUNDIALS_CALIPER_OUTPUT_DIR="${CALIPER_DIR}/Release/${hostname}/${sundials_version}" \ + "${project_dir}" + + else + $cmake_exe \ + -C "${hostconfig_path}" \ + -DCMAKE_INSTALL_PREFIX=${install_dir} \ + "${project_dir}" + fi + # build VERBOSE_BUILD=${VERBOSE_BUILD:-"OFF"} if [[ "${VERBOSE_BUILD}" == "ON" ]]; then diff --git a/.gitlab/lassen-jobs.yml b/.gitlab/lassen-jobs.yml index 92362a3d2a..1ffca71d46 100644 --- a/.gitlab/lassen-jobs.yml +++ b/.gitlab/lassen-jobs.yml @@ -38,7 +38,8 @@ lassen_gcc_cuda_bench: matrix: - COMPILER_SPEC: gcc@8.3.1 CUDA_SPEC: [cuda@11.8.0] + CALIPER_DIR: /usr/workspace/sundials/califiles variables: - SPEC: "%${COMPILER_SPEC} cstd=99 cxxstd=14 build_type=Release precision=double scheduler=lsf caliper-dir=/usr/workspace/sundials/califiles ~int64 +benchmarks+profiling+caliper+adiak+mpi+openmp+cuda+raja cuda_arch=70 ^raja+cuda~openmp~examples~exercises cuda_arch=70 ^caliper+adiak+cuda cuda_arch=70 ^${CUDA_SPEC}+allow-unsupported-compilers" + SPEC: "%${COMPILER_SPEC} cstd=99 cxxstd=14 build_type=Release precision=double scheduler=lsf caliper-dir=${CALIPER_DIR} ~int64 +benchmarks+profiling+caliper+adiak+mpi+openmp+cuda+raja cuda_arch=70 ^raja+cuda~openmp~examples~exercises cuda_arch=70 ^caliper+adiak+cuda cuda_arch=70 ^${CUDA_SPEC}+allow-unsupported-compilers" extends: .lassen_build_and_bench diff --git a/src/sundials/sundials_context.c b/src/sundials/sundials_context.c index 26ea56f944..94f90301b8 100644 --- a/src/sundials/sundials_context.c +++ b/src/sundials/sundials_context.c @@ -239,7 +239,8 @@ void sunAdiakCollectMetadata() { adiak_namevalue("fortran_compiler_version", 2, NULL, "%s", SUN_FORTRAN_COMPILER_VERSION); adiak_namevalue("fortran_compiler_flags", 2, NULL, "%s", SUN_FORTRAN_COMPILER_FLAGS); - adiak_namevalue("sundials_version", 2, NULL, "%s", SUNDIALS_GIT_VERSION); + adiak_namevalue("sundials_version", 2, NULL, "%s", SUNDIALS_VERSION); + adiak_namevalue("sundials_git_version", 2, NULL, "%s", SUNDIALS_GIT_VERSION); adiak_namevalue("build_type", 2, NULL, "%s", SUN_BUILD_TYPE); adiak_namevalue("third_party_libraries", 2, NULL, "%s", SUN_TPL_LIST); #ifdef SUN_JOB_ID diff --git a/test/benchmark_analysis.ipynb b/test/benchmark_analysis.ipynb new file mode 100644 index 0000000000..9dccfc010d --- /dev/null +++ b/test/benchmark_analysis.ipynb @@ -0,0 +1,87 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import subprocess\n", + "import sys\n", + "import re\n", + "import glob\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from IPython.display import display, HTML\n", + "display(HTML(\"\"))\n", + "\n", + "import hatchet as ht\n", + "import thicket as tt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set desired pandas options\n", + "pd.set_option('display.max_rows', None)\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "caliDir = \"/usr/workspace/pan13/shrimp/Benchmarking/diffusion_2D/arkode_diffusion_2D_mpi_d2d_arkode_serial\"\n", + "caliFiles = glob.glob(\"%s/*.cali\" % caliDir)\n", + "\n", + "th_bench = tt.Thicket.from_caliperreader(caliFiles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_merge = th_bench.dataframe.merge(th_bench.metadata, on='profile')\n", + "df_merge['launchdate'] = pd.to_datetime(df_merge['launchdate'], unit='s')\n", + "\n", + "# display only the top-most node\n", + "df_main = df_merge[df_merge['name'] == 'main']\n", + "df_main.set_index('launchdate')\n", + "\n", + "# adjust y-axis to be 2 degrees of precision\n", + "df_main.plot(x='launchdate', y=['Max time/rank', 'Avg time/rank', 'Min time/rank'], kind='line', use_index=True, xticks=df_main['launchdate'], rot=80, x_compat=True, figsize=(18,1))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/compare_benchmarks.py b/test/compare_benchmarks.py new file mode 100644 index 0000000000..663f406262 --- /dev/null +++ b/test/compare_benchmarks.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# ----------------------------------------------------------------------------- +# Programmer(s): Yu Pan @ LLNL +# ----------------------------------------------------------------------------- +# SUNDIALS Copyright Start +# Copyright (c) 2002-2023, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# ----------------------------------------------------------------------------- + +import os +import glob +import argparse +import multiprocessing as mp + +import thicket as tt + + +def main(): + parser = argparse.ArgumentParser(description='Compare Sundials performance results against previous results') + + parser.add_argument('--release', dest='release', action='store_true', help='indicate if the current run to process is a release') + + parser.add_argument('--calidir', dest='caliDir', type=str, help='path to directory containing caliper files', default="/usr/workspace/sundials/califiles") + + parser.add_argument('--releasedir', dest='releaseDir', type=str, help='path to directory containing release caliper files', default="/usr/workspace/sundials/califiles/Release") + + parser.add_argument('--outpath', dest='outPath', type=str, help='path to directory to write results to', default="/dev/null") + + parser.add_argument('--jobid', dest='jobID', type=int, help='job id of the current run to identify .cali files') + + parser.add_argument('--threshold', dest="threshold", type=float, help='the percentage threshold in performance difference that indicates a regression', default=2.0) + + args = parser.parse_args() + + release = args.release + releaseDir = args.releaseDir + caliDir = args.caliDir + outPath = args.outPath + jobID = args.jobID + threshold = args.threshold + + # Get available benchmarks + benchFiles = glob.glob("%s/Benchmarking/*/*" % caliDir) + + if not os.path.exists(outPath): + os.makedirs(outPath) + outFile = open("%s/benchmark_output.out" % outPath, 'w') + + # thread per file + with mp.Pool() as pool: + for res in pool.starmap(process_benchmark, [(jobID, release, releaseDir, i, threshold) for i in benchFiles]): + if res: + outFile.write(res + "\n") + outFile.close() + + outFile = open("%s/benchmark_output.out" % outPath, 'r') + try: + outLines = outFile.readlines() + finally: + outFile.close() + + if (len(outLines) == 0): + return -1 + return 0 + +def process_benchmark(jobID, isRelease, releaseDir, benchmarkDir, threshold): + # Get the current benchmark run + benchmarkFiles = glob.glob("%s/*.cali" % benchmarkDir) + # Don't compare if the run didn't include this benchmark + if (len(benchmarkFiles) == 0): + return + + th_files = tt.Thicket.from_caliperreader(benchmarkFiles) + curFilter = lambda x: x['job_id'] == jobID + th_current = th_files.filter_metadata(curFilter) + + # Get the release caliper file + cluster = th_current.metadata['cluster'].values[0] + if isRelease: + # Get the last release + versionDirs = glob.glob("%s/%s/*" % (releaseDir, cluster)) + versionDirs.sort(key=os.path.getmtime, reverse=True) + versionDir = versionDirs[1] + else: + # Get the release the run is a part of + version = th_current.metadata['sundials_version'].values[0] + versionDir = "%s/%s/%s" % (releaseDir, cluster, version) + benchmarkName = th_current.metadata['env.TEST_NAME'].values[0] + releaseFile = glob.glob("%s/Benchmarking/*/%s/*.cali" % (versionDir, benchmarkName), recursive=True) + th_compare = tt.Thicket.from_caliperreader(releaseFile) + metrics = ['Max time/rank'] + tt.mean(th_current, columns=metrics) + tt.mean(th_compare, columns=metrics) + + ratio = th_current.statsframe.dataframe['Max time/rank_mean'] / th_compare.statsframe.dataframe['Max time/rank_mean'] + + tolerance = threshold/100 + if 1 - ratio[0] < tolerance: + return benchmarkName + + +if __name__ == "__main__": + main() diff --git a/test/compare_examples.py b/test/compare_examples.py new file mode 100644 index 0000000000..4b792ba03a --- /dev/null +++ b/test/compare_examples.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# ----------------------------------------------------------------------------- +# Programmer(s): Yu Pan @ LLNL +# ----------------------------------------------------------------------------- +# SUNDIALS Copyright Start +# Copyright (c) 2002-2023, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# ----------------------------------------------------------------------------- + +import os +import subprocess +import sys +import re +import glob +import argparse +import multiprocessing as mp +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +import hatchet as ht +import thicket as tt + +def main(): + parser = argparse.ArgumentParser(description='Compare Sundials performance results against previous results') + + parser.add_argument('--release', dest='release', action='store_true', help='indicate if the current run to process is a release') + + parser.add_argument('--calidir', dest='caliDir', type=str, help='path to directory containing caliper files', default="/usr/workspace/sundials/califiles") + + parser.add_argument('--releasedir', dest='releaseDir', type=str, help='path to directory containing release caliper files', default="/usr/workspace/sundials/califiles/Release") + + parser.add_argument('--outpath', dest='outPath', type=str, help='path to directory to write results to', default="/dev/null") + + parser.add_argument('--threshold', dest="threshold", type=float, help='the percentage threshold in performance difference that indicates a regression', default=2.0) + + args = parser.parse_args() + + release = args.release + releaseDir = args.releaseDir + caliDir = args.caliDir + outPath = args.outPath + threshold = args.threshold + + # Get the latest test run + runDirs = glob.glob("%s/Testing/*" % caliDir, recursive = True) + runDirs.sort(key=os.path.getmtime, reverse=True) + runDir = runDirs[0] + + runFile = glob.glob(runDir)[0] + th_temp = tt.Thicket.from_caliperreader(runFile) + cluster = th_temp.metadata['cluster'] + # get machine from the file + if release: + # Compare against the last release + versionDirs = glob.glob("%s/%s/*" % (releaseDir, cluster)) + versionDirs.sort(key=os.path.getmtime, reverse=True) + versionDir = versionDirs[1] + else: + # Compare against the release the run is a part of + version = th_temp.metadata['sundials_version'].values[0] + versionDir = "%s/%s/%s" % (releaseDir, cluster, version) + + # Gather files to process + runFiles = glob.glob("%s/*.cali" % (runDir)) + + if not os.path.exists(outPath): + os.makedirs(outPath) + outFile = open("%s/output.out" % outPath, 'w') + + # Compare test results against past runs. If a test performs below a threshold, output test name to outFile. + with mp.Pool() as pool: + for res in pool.starmap(compare_against_release, [(versionDir, i, threshold) for i in runFiles]): + if res: + outFile.write(res + "\n") + outFile.close() + + outFile = open("%s/example_output.out" % outPath, 'r') + try: + outLines = outFile.readlines() + finally: + outFile.close() + + if (len(outLines) == 0): + return -1 + return 0 + + +def compare_against_release(releaseDir, file, threshold): + th = tt.Thicket.from_caliperreader(file) + + testName = th.metadata['env.TEST_NAME'].values[0] + + # Gather release run + releaseFile = glob.glob("%s/Testing/*/%s.*.cali" % (releaseDir, testName), recursive=True) + th_release = tt.Thicket.from_caliperreader(releaseFile) + + metrics = ['Max time/rank'] + tt.mean(th_release, columns=metrics) + tt.mean(th, columns=metrics) + + ratio = th.statsframe.dataframe['Max time/rank_mean'] / th_release.statsframe.dataframe['Max time/rank_mean'] + print(ratio[0]) + tolerance = threshold/100 + if 1 - ratio[0] < tolerance: + return testName + +if __name__ == "__main__": + main() diff --git a/test/testRunner b/test/testRunner index 630ec67261..7f854785e7 100755 --- a/test/testRunner +++ b/test/testRunner @@ -201,6 +201,8 @@ def main(): dateTime = datetime.datetime.now().strftime("%m%d%Y_%H%M%S") profilePath = os.path.join(caliDir, args.testName+".%s.cali" % dateTime) os.environ['CALI_SERVICES_ENABLE'] = 'env' + os.environ['TEST_NAME'] = testName + os.environ['CALI_ENV_EXTRA'] = 'TEST_NAME' os.environ['CALI_CONFIG'] = 'spot(output=%s)' % profilePath # if user supplies precision info overide the default choices From 0ece42cce58e3654362a4497032fa9d48a8e7cb5 Mon Sep 17 00:00:00 2001 From: phannebohm Date: Mon, 18 Sep 2023 02:49:32 +0200 Subject: [PATCH 05/35] Fix time complexity issue for sparse matrix (#257) Fixes part of #253 by avoiding nested loops that take `O(M*N)` operations. Also fixes #256. --------- Signed-off-by: phannebohm Co-authored-by: Cody Balos --- CHANGELOG.md | 4 ++ doc/arkode/guide/source/Introduction.rst | 11 +++- doc/cvode/guide/source/Introduction.rst | 7 +++ doc/cvodes/guide/source/Introduction.rst | 4 ++ doc/ida/guide/source/Introduction.rst | 7 +++ doc/idas/guide/source/Introduction.rst | 4 ++ doc/kinsol/guide/source/Introduction.rst | 7 +++ src/sunmatrix/sparse/sunmatrix_sparse.c | 71 +++++++++++++----------- 8 files changed, 82 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd786dde05..e040f9abdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + ## Changes to SUNDIALS in release 6.6.1 Updated the Tpetra NVector interface to support Trilinos 14. diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index 9bc832af85..df64ba2729 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -85,8 +85,8 @@ with adaptive explicit methods of orders 2-8. H(t, p, q) = T(t, p) + V(t, q) .. math:: - \dot{p} = f_1(t,q) = \frac{\partial V(t,q)}{\partial q}, \quad - \dot{q} = f_2(t,p) = \frac{\partial T(t,p)}{\partial p}, + \dot{p} = f_1(t,q) = \frac{\partial V(t,q)}{\partial q}, \quad + \dot{q} = f_2(t,p) = \frac{\partial T(t,p)}{\partial p}, :label: ARKODE_ODE_hamiltonian allowing for conservation of quadratic invariants. @@ -130,6 +130,13 @@ provided with SUNDIALS, or again may utilize a user-supplied module. Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v5.6.1 ----------------- diff --git a/doc/cvode/guide/source/Introduction.rst b/doc/cvode/guide/source/Introduction.rst index ca4c41b3de..d67e63dee2 100644 --- a/doc/cvode/guide/source/Introduction.rst +++ b/doc/cvode/guide/source/Introduction.rst @@ -111,6 +111,13 @@ implementations. Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v6.6.1 ----------------- diff --git a/doc/cvodes/guide/source/Introduction.rst b/doc/cvodes/guide/source/Introduction.rst index 6ad961ebd3..71ef9989a2 100644 --- a/doc/cvodes/guide/source/Introduction.rst +++ b/doc/cvodes/guide/source/Introduction.rst @@ -116,6 +116,10 @@ Changes in vX.X.X Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v6.6.1 ----------------- diff --git a/doc/ida/guide/source/Introduction.rst b/doc/ida/guide/source/Introduction.rst index 9d18bd826a..9e81c930c4 100644 --- a/doc/ida/guide/source/Introduction.rst +++ b/doc/ida/guide/source/Introduction.rst @@ -72,6 +72,13 @@ systems. Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v6.6.1 ----------------- diff --git a/doc/idas/guide/source/Introduction.rst b/doc/idas/guide/source/Introduction.rst index 676dd88777..b9893a4fd0 100644 --- a/doc/idas/guide/source/Introduction.rst +++ b/doc/idas/guide/source/Introduction.rst @@ -91,6 +91,10 @@ Changes in vX.X.X Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v5.6.1 ----------------- diff --git a/doc/kinsol/guide/source/Introduction.rst b/doc/kinsol/guide/source/Introduction.rst index 039eed6918..4269af1a5c 100644 --- a/doc/kinsol/guide/source/Introduction.rst +++ b/doc/kinsol/guide/source/Introduction.rst @@ -88,6 +88,13 @@ applications written in Fortran. Changes from previous versions ============================== +Changes in vX.X.X +----------------- + +Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to +`O(NNZ)`. +Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. + Changes in v6.6.1 ----------------- diff --git a/src/sunmatrix/sparse/sunmatrix_sparse.c b/src/sunmatrix/sparse/sunmatrix_sparse.c index 0afa22bac4..efa956f84f 100644 --- a/src/sunmatrix/sparse/sunmatrix_sparse.c +++ b/src/sunmatrix/sparse/sunmatrix_sparse.c @@ -572,7 +572,7 @@ int SUNMatCopy_Sparse(SUNMatrix A, SUNMatrix B) int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) { - sunindextype j, i, p, nz, newvals, M, N, cend; + sunindextype j, i, p, nz, newvals, M, N, cend, nw; booleantype newmat, found; sunindextype *w, *Ap, *Ai, *Cp, *Ci; realtype *x, *Ax, *Cx; @@ -640,8 +640,7 @@ int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) /* case 2: A has sufficient storage, but does not already contain a diagonal */ } else if (!newmat) { - - /* create work arrays for nonzero indices and values in a single column (row) */ + /* create work arrays for nonzero row (column) indices and values in a single column (row) */ w = (sunindextype *) malloc(M * sizeof(sunindextype)); x = (realtype *) malloc(M * sizeof(realtype)); @@ -656,30 +655,38 @@ int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) /* iterate through columns (rows) backwards */ for (j=N-1; j>=0; j--) { - /* clear out temporary arrays for this column (row) */ - for (i=0; i=0; i--) { - if ( w[i] > 0 ) { - Ai[--nz] = i; - Ax[nz] = x[i]; - } + /* fill entries past diagonal */ + for (i=nw-1; i>=0 && w[i]>j; i--) { + Ai[--nz] = w[i]; + Ax[nz] = x[w[i]]; + } + /* fill diagonal if applicable */ + if (i < 0 /* empty or insert at front */ || w[i] != j /* insert behind front */) { + Ai[--nz] = j; + Ax[nz] = x[j]; + } + /* fill entries before diagonal */ + for (; i>=0; i--) { + Ai[--nz] = w[i]; + Ax[nz] = x[w[i]]; } /* store ptr past this col (row) from orig A, update value for new A */ @@ -696,8 +703,7 @@ int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) /* case 3: A must be reallocated with sufficient storage */ } else { - /* create work arrays for nonzero indices and values */ - w = (sunindextype *) malloc(M * sizeof(sunindextype)); + /* create work array for nonzero values in a single column (row) */ x = (realtype *) malloc(M * sizeof(realtype)); /* create new matrix for sum */ @@ -724,30 +730,34 @@ int SUNMatScaleAddI_Sparse(realtype c, SUNMatrix A) /* set current column (row) pointer to current # nonzeros */ Cp[j] = nz; - /* clear out temporary arrays for this column (row) */ - for (i=0; i 0 ) { - Ci[nz] = i; - Cx[nz++] = x[i]; - } + /* fill entries before diagonal */ + for (p=Ap[j]; p= Ap[j+1] /* empty or insert at end */ || Ai[p] != j /* insert before end */) { + Ci[nz] = j; + Cx[nz++] = x[j]; + } + /* fill entries past diagonal */ + for (; p Date: Wed, 27 Sep 2023 22:06:13 +0200 Subject: [PATCH 06/35] Fix missing soversion (#343) Fix missing soversion on some SUNLinSol and SUNNonlinSol targets --------- Signed-off-by: Julien Schueller Co-authored-by: Cody Balos --- CHANGELOG.md | 4 ++++ doc/arkode/guide/source/Introduction.rst | 9 ++++++--- doc/cvode/guide/source/Introduction.rst | 9 ++++++--- doc/cvodes/guide/source/Introduction.rst | 9 ++++++--- doc/ida/guide/source/Introduction.rst | 9 ++++++--- doc/idas/guide/source/Introduction.rst | 9 ++++++--- doc/kinsol/guide/source/Introduction.rst | 9 ++++++--- src/sunlinsol/band/CMakeLists.txt | 2 +- src/sunlinsol/cusolversp/CMakeLists.txt | 2 +- src/sunlinsol/dense/CMakeLists.txt | 2 +- src/sunlinsol/klu/CMakeLists.txt | 2 +- src/sunlinsol/lapackband/CMakeLists.txt | 2 +- src/sunlinsol/lapackdense/CMakeLists.txt | 2 +- src/sunlinsol/pcg/CMakeLists.txt | 2 +- src/sunlinsol/spbcgs/CMakeLists.txt | 2 +- src/sunlinsol/spfgmr/CMakeLists.txt | 2 +- src/sunlinsol/spgmr/CMakeLists.txt | 2 +- src/sunlinsol/sptfqmr/CMakeLists.txt | 2 +- src/sunlinsol/superludist/CMakeLists.txt | 2 +- src/sunlinsol/superlumt/CMakeLists.txt | 2 +- src/sunnonlinsol/fixedpoint/CMakeLists.txt | 2 +- src/sunnonlinsol/newton/CMakeLists.txt | 2 +- 22 files changed, 55 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e040f9abdf..998379450f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,17 @@ # SUNDIALS Changelog + ## Changes to SUNDIALS in release X.X.X Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to `O(NNZ)`. + Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Fixed missing soversions in some `SUNLinearSolver` CMake targets. + ## Changes to SUNDIALS in release 6.6.1 Updated the Tpetra NVector interface to support Trilinos 14. diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index df64ba2729..3b31d1a83d 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -133,9 +133,12 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v5.6.1 ----------------- diff --git a/doc/cvode/guide/source/Introduction.rst b/doc/cvode/guide/source/Introduction.rst index d67e63dee2..81e7226f58 100644 --- a/doc/cvode/guide/source/Introduction.rst +++ b/doc/cvode/guide/source/Introduction.rst @@ -114,9 +114,12 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v6.6.1 ----------------- diff --git a/doc/cvodes/guide/source/Introduction.rst b/doc/cvodes/guide/source/Introduction.rst index 71ef9989a2..71f1e416c2 100644 --- a/doc/cvodes/guide/source/Introduction.rst +++ b/doc/cvodes/guide/source/Introduction.rst @@ -116,9 +116,12 @@ Changes in vX.X.X Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v6.6.1 ----------------- diff --git a/doc/ida/guide/source/Introduction.rst b/doc/ida/guide/source/Introduction.rst index 9e81c930c4..cee6ce1b53 100644 --- a/doc/ida/guide/source/Introduction.rst +++ b/doc/ida/guide/source/Introduction.rst @@ -75,9 +75,12 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v6.6.1 ----------------- diff --git a/doc/idas/guide/source/Introduction.rst b/doc/idas/guide/source/Introduction.rst index b9893a4fd0..db661cf71f 100644 --- a/doc/idas/guide/source/Introduction.rst +++ b/doc/idas/guide/source/Introduction.rst @@ -91,9 +91,12 @@ Changes in vX.X.X Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v5.6.1 ----------------- diff --git a/doc/kinsol/guide/source/Introduction.rst b/doc/kinsol/guide/source/Introduction.rst index 4269af1a5c..4e0a7531bf 100644 --- a/doc/kinsol/guide/source/Introduction.rst +++ b/doc/kinsol/guide/source/Introduction.rst @@ -91,9 +91,12 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to -`O(NNZ)`. -Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to +``O(NNZ)``. + +Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. + +Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. Changes in v6.6.1 ----------------- diff --git a/src/sunlinsol/band/CMakeLists.txt b/src/sunlinsol/band/CMakeLists.txt index 6d912ecf13..3041c51c38 100644 --- a/src/sunlinsol/band/CMakeLists.txt +++ b/src/sunlinsol/band/CMakeLists.txt @@ -34,7 +34,7 @@ sundials_add_library(sundials_sunlinsolband VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_BAND module") diff --git a/src/sunlinsol/cusolversp/CMakeLists.txt b/src/sunlinsol/cusolversp/CMakeLists.txt index 7d1539624c..e2e7f95589 100644 --- a/src/sunlinsol/cusolversp/CMakeLists.txt +++ b/src/sunlinsol/cusolversp/CMakeLists.txt @@ -33,7 +33,7 @@ sundials_add_library(sundials_sunlinsolcusolversp VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_CUSOLVERSP module") diff --git a/src/sunlinsol/dense/CMakeLists.txt b/src/sunlinsol/dense/CMakeLists.txt index ad8e0e4e07..40e10cd3bd 100644 --- a/src/sunlinsol/dense/CMakeLists.txt +++ b/src/sunlinsol/dense/CMakeLists.txt @@ -34,7 +34,7 @@ sundials_add_library(sundials_sunlinsoldense VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_DENSE module") diff --git a/src/sunlinsol/klu/CMakeLists.txt b/src/sunlinsol/klu/CMakeLists.txt index ee39c58795..69c4eb0ddc 100644 --- a/src/sunlinsol/klu/CMakeLists.txt +++ b/src/sunlinsol/klu/CMakeLists.txt @@ -33,7 +33,7 @@ sundials_add_library(sundials_sunlinsolklu VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_KLU module") diff --git a/src/sunlinsol/lapackband/CMakeLists.txt b/src/sunlinsol/lapackband/CMakeLists.txt index 48ffb2b399..a89e132998 100644 --- a/src/sunlinsol/lapackband/CMakeLists.txt +++ b/src/sunlinsol/lapackband/CMakeLists.txt @@ -33,7 +33,7 @@ sundials_add_library(sundials_sunlinsollapackband VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_LAPACKBAND module") diff --git a/src/sunlinsol/lapackdense/CMakeLists.txt b/src/sunlinsol/lapackdense/CMakeLists.txt index 947085e320..e52a4b6825 100644 --- a/src/sunlinsol/lapackdense/CMakeLists.txt +++ b/src/sunlinsol/lapackdense/CMakeLists.txt @@ -33,7 +33,7 @@ sundials_add_library(sundials_sunlinsollapackdense VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_LAPACKDENSE module") diff --git a/src/sunlinsol/pcg/CMakeLists.txt b/src/sunlinsol/pcg/CMakeLists.txt index 21c0e7f70e..9094ba0e45 100644 --- a/src/sunlinsol/pcg/CMakeLists.txt +++ b/src/sunlinsol/pcg/CMakeLists.txt @@ -32,7 +32,7 @@ sundials_add_library(sundials_sunlinsolpcg VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_PCG module") diff --git a/src/sunlinsol/spbcgs/CMakeLists.txt b/src/sunlinsol/spbcgs/CMakeLists.txt index a1c678328b..e7446be084 100644 --- a/src/sunlinsol/spbcgs/CMakeLists.txt +++ b/src/sunlinsol/spbcgs/CMakeLists.txt @@ -32,7 +32,7 @@ sundials_add_library(sundials_sunlinsolspbcgs VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SPBCGS module") diff --git a/src/sunlinsol/spfgmr/CMakeLists.txt b/src/sunlinsol/spfgmr/CMakeLists.txt index 660f8e7576..817fbda37c 100644 --- a/src/sunlinsol/spfgmr/CMakeLists.txt +++ b/src/sunlinsol/spfgmr/CMakeLists.txt @@ -31,7 +31,7 @@ sundials_add_library(sundials_sunlinsolspfgmr VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SPFGMR module") diff --git a/src/sunlinsol/spgmr/CMakeLists.txt b/src/sunlinsol/spgmr/CMakeLists.txt index 5e4bbd8356..df03282a92 100644 --- a/src/sunlinsol/spgmr/CMakeLists.txt +++ b/src/sunlinsol/spgmr/CMakeLists.txt @@ -31,7 +31,7 @@ sundials_add_library(sundials_sunlinsolspgmr VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SPGMR module") diff --git a/src/sunlinsol/sptfqmr/CMakeLists.txt b/src/sunlinsol/sptfqmr/CMakeLists.txt index d5f4c35e7f..86f06b8b1b 100644 --- a/src/sunlinsol/sptfqmr/CMakeLists.txt +++ b/src/sunlinsol/sptfqmr/CMakeLists.txt @@ -31,7 +31,7 @@ sundials_add_library(sundials_sunlinsolsptfqmr VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SPTFQMR module") diff --git a/src/sunlinsol/superludist/CMakeLists.txt b/src/sunlinsol/superludist/CMakeLists.txt index 189a844ee8..0d7ad0803f 100644 --- a/src/sunlinsol/superludist/CMakeLists.txt +++ b/src/sunlinsol/superludist/CMakeLists.txt @@ -39,7 +39,7 @@ sundials_add_library(sundials_sunlinsolsuperludist VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SUPERLUDIST module") diff --git a/src/sunlinsol/superlumt/CMakeLists.txt b/src/sunlinsol/superlumt/CMakeLists.txt index 6d1a075fad..f2a1e9ff66 100644 --- a/src/sunlinsol/superlumt/CMakeLists.txt +++ b/src/sunlinsol/superlumt/CMakeLists.txt @@ -43,7 +43,7 @@ sundials_add_library(sundials_sunlinsolsuperlumt VERSION ${sunlinsollib_VERSION} SOVERSION - ${sunlinsollib_VERSION} + ${sunlinsollib_SOVERSION} ) message(STATUS "Added SUNLINSOL_SUPERLUMT module") diff --git a/src/sunnonlinsol/fixedpoint/CMakeLists.txt b/src/sunnonlinsol/fixedpoint/CMakeLists.txt index 2b19f025c5..50d520be5e 100644 --- a/src/sunnonlinsol/fixedpoint/CMakeLists.txt +++ b/src/sunnonlinsol/fixedpoint/CMakeLists.txt @@ -31,7 +31,7 @@ sundials_add_library(sundials_sunnonlinsolfixedpoint VERSION ${sunnonlinsollib_VERSION} SOVERSION - ${sunnonlinsollib_VERSION} + ${sunnonlinsollib_SOVERSION} ) message(STATUS "Added SUNNONLINSOL_FIXEDPOINT module") diff --git a/src/sunnonlinsol/newton/CMakeLists.txt b/src/sunnonlinsol/newton/CMakeLists.txt index 64d2dcba56..54856d2d55 100644 --- a/src/sunnonlinsol/newton/CMakeLists.txt +++ b/src/sunnonlinsol/newton/CMakeLists.txt @@ -31,7 +31,7 @@ sundials_add_library(sundials_sunnonlinsolnewton VERSION ${sunnonlinsollib_VERSION} SOVERSION - ${sunnonlinsollib_VERSION} + ${sunnonlinsollib_SOVERSION} ) message(STATUS "Added SUNNONLINSOL_NEWTON module") From a4e18e58df39ccb0c9ce67de1fe5f3168fda33c2 Mon Sep 17 00:00:00 2001 From: Steven Roberts Date: Wed, 27 Sep 2023 17:16:30 -0700 Subject: [PATCH 07/35] Update ARKODE methods docs (#344) 1. Adds missing implicit tables to the ARKODE constants docs 2. Makes formatting of constants consistent in ARKODE docs 3. Groups implicit tables by order --------- Co-authored-by: David Gardner --- doc/arkode/guide/source/Butcher.rst | 319 +++++++++++++------------- doc/arkode/guide/source/Constants.rst | 44 ++-- 2 files changed, 186 insertions(+), 177 deletions(-) diff --git a/doc/arkode/guide/source/Butcher.rst b/doc/arkode/guide/source/Butcher.rst index 73f4028e04..ecafdab263 100644 --- a/doc/arkode/guide/source/Butcher.rst +++ b/doc/arkode/guide/source/Butcher.rst @@ -967,8 +967,75 @@ lower-order method (from :cite:p:`Bank:85`). region is outlined in blue; the embedding's region is in red. +.. _Butcher.ESDIRK324L2SA: + +ESDIRK324L2SA-4-2-3 +^^^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK324L2SA-4-2-3 method + +Accessible via the constant ``ARKODE_ESDIRK324L2SA_4_2_3`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK324L2SA_4_2_3"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK3(2)4L[2]SA method from :cite:p:`KenCarp:19b`. +Both the method and embedding are A- and L-stable. + +.. figure:: /figs/arkode/stab_region_25.png + :scale: 50 % + :align: center + + Linear stability region for the ESDIRK324L2SA-4-2-3 method method. The method's + region is outlined in blue; the embedding's region is in red. + + + +.. _Butcher.ESDIRK325L2SA: + +ESDIRK325L2SA-5-2-3 +^^^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK325L2SA-5-2-3 method + +Accessible via the constant ``ARKODE_ESDIRK325L2SA_5_2_3`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK325L2SA_5_2_3"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK3(2)5L[2]SA method from :cite:p:`KenCarp:16`. +Both the method and embedding are A- and L-stable. + +.. figure:: /figs/arkode/stab_region_26.png + :scale: 50 % + :align: center + + Linear stability region for the ESDIRK325L2SA-5-2-3 method method. The method's + region is outlined in blue; the embedding's region is in red. + + + +.. _Butcher.ESDIRK32I5L2SA: + +ESDIRK32I5L2SA-5-2-3 +^^^^^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK32I5L2SA-5-2-3 method + +Accessible via the constant ``ARKODE_ESDIRK32I5L2SA_5_2_3`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK32I5L2SA_5_2_3"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK3(2I)5L[2]SA method from :cite:p:`KenCarp:16`. +Both the method and embedding are A- and L-stable. +.. figure:: /figs/arkode/stab_region_27.png + :scale: 50 % + :align: center + Linear stability region for the ESDIRK32I5L2SA-5-2-3 method method. The method's + region is outlined in blue; the embedding's region is in red. .. _Butcher.Kvaerno_4_2_3: @@ -1309,8 +1376,96 @@ This is the implicit portion of the 4th order ARK4(3)7L[2]SA method from region is outlined in blue; the embedding's region is in red. +.. _Butcher.ESDIRK436L2SA: + +ESDIRK436L2SA-6-3-4 +^^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK436L2SA-6-3-4 method + +Accessible via the constant ``ARKODE_ESDIRK436L2SA_6_3_4`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK436L2SA_6_3_4"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK4(3)6L[2]SA method from :cite:p:`KenCarp:16`. +Both the method and embedding are A- and L-stable. + +.. figure:: /figs/arkode/stab_region_28.png + :scale: 50 % + :align: center + + Linear stability region for the ESDIRK436L2SA-6-3-4 method method. The method's + region is outlined in blue; the embedding's region is in red. + + +.. _Butcher.ESDIRK43I6L2SA: + +ESDIRK43I6L2SA-6-3-4 +^^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK43I6L2SA-6-3-4 method + +Accessible via the constant ``ARKODE_ESDIRK43I6L2SA_6_3_4`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK43I6L2SA_6_3_4"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK4(3I)6L[2]SA method from :cite:p:`KenCarp:16`. +Both the method and embedding are A- and L-stable. + +.. figure:: /figs/arkode/stab_region_29.png + :scale: 50 % + :align: center + + Linear stability region for the ESDIRK43I6L2SA-6-3-4 method method. The method's + region is outlined in blue; the embedding's region is in red. + + +.. _Butcher.QESDIRK436L2SA: + +QESDIRK436L2SA-6-3-4 +^^^^^^^^^^^^^^^^^^^^ + +.. index:: QESDIRK436L2SA-6-3-4 method + +Accessible via the constant ``ARKODE_QESDIRK436L2SA_6_3_4`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_QESDIRK436L2SA_6_3_4"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the QESDIRK4(3)6L[2]SA method from :cite:p:`KenCarp:16`. +Both the method and embedding are A- and L-stable. + +.. figure:: /figs/arkode/stab_region_30.png + :scale: 50 % + :align: center + + Linear stability region for the QESDIRK436L2SA-6-3-4 method method. The method's + region is outlined in blue; the embedding's region is in red. + + +.. _Butcher.ESDIRK437L2SA: + +ESDIRK437L2SA-7-3-4 +^^^^^^^^^^^^^^^^^^^ + +.. index:: ESDIRK437L2SA-7-3-4 method + +Accessible via the constant ``ARKODE_ESDIRK437L2SA_7_3_4`` to +:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. +Accessible via the string ``"ARKODE_ESDIRK437L2SA_7_3_4"`` to +:c:func:`ARKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadDIRKByName`. +This is the ESDIRK4(3)7L[2]SA method from :cite:p:`KenCarp:19b`. +Both the method and embedding are A- and L-stable. +.. figure:: /figs/arkode/stab_region_31.png + :scale: 50 % + :align: center + Linear stability region for the ESDIRK437L2SA-7-3-4 method method. The method's + region is outlined in blue; the embedding's region is in red. .. _Butcher.Kvaerno_7_4_5: @@ -1446,170 +1601,6 @@ Both the method and embedding are A-stable; additionally the method is L-stable region is outlined in blue; the embedding's region is in red. - -.. _Butcher.ESDIRK324L2SA: - -ESDIRK324L2SA-4-2-3 -^^^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK324L2SA-4-2-3 method - -Accessible via the constant ``ARKODE_ESDIRK324L2SA_4_2_3`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK324L2SA_4_2_3"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK3(2)4L[2]SA method from :cite:p:`KenCarp:19b`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_25.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK324L2SA-4-2-3 method method. The method's - region is outlined in blue; the embedding's region is in red. - - - -.. _Butcher.ESDIRK325L2SA: - -ESDIRK325L2SA-5-2-3 -^^^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK325L2SA-5-2-3 method - -Accessible via the constant ``ARKODE_ESDIRK325L2SA_5_2_3`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK325L2SA_5_2_3"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK3(2)5L[2]SA method from :cite:p:`KenCarp:16`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_26.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK325L2SA-5-2-3 method method. The method's - region is outlined in blue; the embedding's region is in red. - - - -.. _Butcher.ESDIRK32I5L2SA: - -ESDIRK32I5L2SA-5-2-3 -^^^^^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK32I5L2SA-5-2-3 method - -Accessible via the constant ``ARKODE_ESDIRK32I5L2SA_5_2_3`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK32I5L2SA_5_2_3"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK3(2I)5L[2]SA method from :cite:p:`KenCarp:16`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_27.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK32I5L2SA-5-2-3 method method. The method's - region is outlined in blue; the embedding's region is in red. - - -.. _Butcher.ESDIRK436L2SA: - -ESDIRK436L2SA-6-3-4 -^^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK436L2SA-6-3-4 method - -Accessible via the constant ``ARKODE_ESDIRK436L2SA_6_3_4`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK436L2SA_6_3_4"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK4(3)6L[2]SA method from :cite:p:`KenCarp:16`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_28.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK436L2SA-6-3-4 method method. The method's - region is outlined in blue; the embedding's region is in red. - - -.. _Butcher.ESDIRK43I6L2SA: - -ESDIRK43I6L2SA-6-3-4 -^^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK43I6L2SA-6-3-4 method - -Accessible via the constant ``ARKODE_ESDIRK43I6L2SA_6_3_4`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK43I6L2SA_6_3_4"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK4(3I)6L[2]SA method from :cite:p:`KenCarp:16`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_29.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK43I6L2SA-6-3-4 method method. The method's - region is outlined in blue; the embedding's region is in red. - - -.. _Butcher.QESDIRK436L2SA: - -QESDIRK436L2SA-6-3-4 -^^^^^^^^^^^^^^^^^^^^ - -.. index:: QESDIRK436L2SA-6-3-4 method - -Accessible via the constant ``ARKODE_QESDIRK436L2SA_6_3_4`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_QESDIRK436L2SA_6_3_4"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the QESDIRK4(3)6L[2]SA method from :cite:p:`KenCarp:16`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_30.png - :scale: 50 % - :align: center - - Linear stability region for the QESDIRK436L2SA-6-3-4 method method. The method's - region is outlined in blue; the embedding's region is in red. - - -.. _Butcher.ESDIRK437L2SA: - -ESDIRK437L2SA-7-3-4 -^^^^^^^^^^^^^^^^^^^ - -.. index:: ESDIRK437L2SA-7-3-4 method - -Accessible via the constant ``ARKODE_ESDIRK437L2SA_7_3_4`` to -:c:func:`ARKStepSetTableNum` or :c:func:`ARKodeButcherTable_LoadDIRK`. -Accessible via the string ``"ARKODE_ESDIRK437L2SA_7_3_4"`` to -:c:func:`ARKStepSetTableName` or -:c:func:`ARKodeButcherTable_LoadDIRKByName`. -This is the ESDIRK4(3)7L[2]SA method from :cite:p:`KenCarp:19b`. -Both the method and embedding are A- and L-stable. - -.. figure:: /figs/arkode/stab_region_31.png - :scale: 50 % - :align: center - - Linear stability region for the ESDIRK437L2SA-7-3-4 method method. The method's - region is outlined in blue; the embedding's region is in red. - - .. _Butcher.ESDIRK547L2SA: ESDIRK547L2SA-7-4-5 diff --git a/doc/arkode/guide/source/Constants.rst b/doc/arkode/guide/source/Constants.rst index 968b68b432..9a3e6600ac 100644 --- a/doc/arkode/guide/source/Constants.rst +++ b/doc/arkode/guide/source/Constants.rst @@ -148,6 +148,12 @@ contains the ARKODE output constants. +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_BILLINGTON_3_3_2` | Use the Billington-3-3-2 SDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK324L2SA_4_2_3` | Use the ESDIRK324L2SA-4-2-3 ESDIRK method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK325L2SA_5_2_3` | Use the ESDIRK325L2SA-5-2-3 ESDIRK method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK32I5L2SA_5_2_3` | Use the ESDIRK32I5L2SA-5-2-3 ESDIRK method. | + +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_TRBDF2_3_3_2` | Use the TRBDF2-3-3-2 ESDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_KVAERNO_4_2_3` | Use the Kvaerno-4-2-3 ESDIRK method. | @@ -166,12 +172,24 @@ contains the ARKODE output constants. +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_KVAERNO_7_4_5` | Use the Kvaerno-7-4-5 ESDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK436L2SA_6_3_4` | Use the ESDIRK436L2SA-6-3-4 method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK43I6L2SA_6_3_4` | Use the ESDIRK43I6L2SA-6-3-4 method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_QESDIRK436L2SA_6_3_4` | Use the QESDIRK436L2SA-6-3-4 method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK437L2SA_7_3_4` | Use the ESDIRK437L2SA-7-3-4 method. | + +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ARK548L2SA_DIRK_8_4_5` | Use the ARK-8-4-5 ESDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ARK437L2SA_DIRK_7_3_4` | Use the ARK-7-3-4 ESDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ARK548L2SAb_DIRK_8_4_5` | Use the ARK-8-4-5b ESDIRK method. | +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK547L2SA_7_4_5` | Use the ESDIRK547L2SA-7-4-5 ESDIRK method. | + +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_ESDIRK547L2SA2_7_4_5` | Use the ESDIRK547L2SA2-7-4-5 ESDIRK method. | + +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKSTEP_DEFAULT_DIRK_2` | Use ARKStep's default second-order DIRK method | | | (ARKODE_SDIRK_2_1_2). | +-----------------------------------------------+------------------------------------------------------------+ @@ -215,33 +233,33 @@ contains the ARKODE output constants. | :index:`ARKSTEP_DEFAULT_ARK_ITABLE_5` | (ARKODE_ARK548L2SA_ERK_8_4_5 and | | | ARKODE_ARK548L2SA_DIRK_8_4_5). | +-----------------------------------------------+------------------------------------------------------------+ - | | + | + | | | +-----------------------------------------------+------------------------------------------------------------+ | **Symplectic Method storage specification** | | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_EULER_1_1` | Symplectic Euler 1st order method with 1 stage. | + | :index:`ARKODE_SPRK_EULER_1_1` | Symplectic Euler 1st order method with 1 stage. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_LEAPFROG_2_2` | Symplectic Leapfrog 2nd order method with 2 stages. | + | :index:`ARKODE_SPRK_LEAPFROG_2_2` | Symplectic Leapfrog 2nd order method with 2 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_PSEUDO_LEAPFROG_2_2` | Symplectic Pseudo Leapfrog 2nd order method with 2 stages. | + | :index:`ARKODE_SPRK_PSEUDO_LEAPFROG_2_2` | Symplectic Pseudo Leapfrog 2nd order method with 2 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_RUTH_3_3` | Symplectic Ruth 3rd order method with 3 stages. | + | :index:`ARKODE_SPRK_RUTH_3_3` | Symplectic Ruth 3rd order method with 3 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_MCLACHLAN_2_2` | Symplectic McLachlan 2nd order method with 2 stages. | + | :index:`ARKODE_SPRK_MCLACHLAN_2_2` | Symplectic McLachlan 2nd order method with 2 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_MCLACHLAN_3_3` | Symplectic McLachlan 3rd order method with 3 stages. | + | :index:`ARKODE_SPRK_MCLACHLAN_3_3` | Symplectic McLachlan 3rd order method with 3 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_CANDY_ROZMUS_4_4` | Symplectic Candy-Rozmus 4th order method with 4 stages. | + | :index:`ARKODE_SPRK_CANDY_ROZMUS_4_4` | Symplectic Candy-Rozmus 4th order method with 4 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_MCLACHLAN_4_4` | Symplectic McLachlan 4th order method with 4 stages. | + | :index:`ARKODE_SPRK_MCLACHLAN_4_4` | Symplectic McLachlan 4th order method with 4 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_MCLACHLAN_5_6` | Symplectic McLachlan 5th order method with 6 stages. | + | :index:`ARKODE_SPRK_MCLACHLAN_5_6` | Symplectic McLachlan 5th order method with 6 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_YOSHIDA_6_8` | Symplectic Yoshida 6th order method with 8 stages. | + | :index:`ARKODE_SPRK_YOSHIDA_6_8` | Symplectic Yoshida 6th order method with 8 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_SUZUKI_UMENO_8_16` | Symplectic McLachlan 8th order method with 16 stages. | + | :index:`ARKODE_SPRK_SUZUKI_UMENO_8_16` | Symplectic McLachlan 8th order method with 16 stages. | +-----------------------------------------------+------------------------------------------------------------+ - | :c:macro:`ARKODE_SPRK_SOFRONIOU_10_36` | Symplectic Sofroniou 10th order method with 36 stages. | + | :index:`ARKODE_SPRK_SOFRONIOU_10_36` | Symplectic Sofroniou 10th order method with 36 stages. | +-----------------------------------------------+------------------------------------------------------------+ | | | +-----------------------------------------------+------------------------------------------------------------+ From b557eea56e84e315816e456fb3b0a86f99dc70f5 Mon Sep 17 00:00:00 2001 From: "Daniel R. Reynolds" Date: Fri, 29 Sep 2023 18:21:21 -0500 Subject: [PATCH 08/35] Bugfix: Support ARK2-3-1-2 in ARKStepSetTableNum (#346) Update ARKStepSetTableNum to recognize ARK2-3-1-2 as a valid ARK pair. --- CHANGELOG.md | 3 +++ doc/arkode/guide/source/Introduction.rst | 4 ++++ src/arkode/arkode_arkstep_io.c | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 998379450f..0854c78ebe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ## Changes to SUNDIALS in release X.X.X +Fixed a bug in `ARKStepSetTableNum` wherein it did not recognize `ARKODE_ARK2_ERK_3_1_2` and +`ARKODE_ARK2_DIRK_3_1_2` as a valid additive Runge--Kutta Butcher table pair. + Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index 3b31d1a83d..18bccb9935 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -133,6 +133,10 @@ Changes from previous versions Changes in vX.X.X ----------------- +Fixed a bug in :c:func:`ARKStepSetTableNum` wherein it did not recognize +`ARKODE_ARK2_ERK_3_1_2` and `ARKODE_ARK2_DIRK_3_1_2` as a valid additive +Runge--Kutta Butcher table pair. + Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to ``O(NNZ)``. diff --git a/src/arkode/arkode_arkstep_io.c b/src/arkode/arkode_arkstep_io.c index 50f9182a75..fc2df26f98 100644 --- a/src/arkode/arkode_arkstep_io.c +++ b/src/arkode/arkode_arkstep_io.c @@ -1130,7 +1130,8 @@ int ARKStepSetTableNum(void *arkode_mem, ARKODE_DIRKTableID itable, ARKODE_ERKTa !((etable == ARKODE_ARK436L2SA_ERK_6_3_4) && (itable == ARKODE_ARK436L2SA_DIRK_6_3_4)) && !((etable == ARKODE_ARK437L2SA_ERK_7_3_4) && (itable == ARKODE_ARK437L2SA_DIRK_7_3_4)) && !((etable == ARKODE_ARK548L2SA_ERK_8_4_5) && (itable == ARKODE_ARK548L2SA_DIRK_8_4_5)) && - !((etable == ARKODE_ARK548L2SAb_ERK_8_4_5) && (itable == ARKODE_ARK548L2SAb_DIRK_8_4_5)) ) { + !((etable == ARKODE_ARK548L2SAb_ERK_8_4_5) && (itable == ARKODE_ARK548L2SAb_DIRK_8_4_5)) && + !((etable == ARKODE_ARK2_ERK_3_1_2) && (itable == ARKODE_ARK2_DIRK_3_1_2)) ) { arkProcessError(ark_mem, ARK_ILL_INPUT, "ARKODE::ARKStep", "ARKStepSetTableNum", "Incompatible Butcher tables for ARK method"); From e800b1538eb51d83edf05ee8b490a83d39f2c6ee Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 4 Oct 2023 12:28:41 -0700 Subject: [PATCH 09/35] Bugfix: stop time and output time reached in same step (#349) Fixes #339: A regression was introduced by the stop time bug fix in v6.6.1 causing integrators to return at the stop time rather than the requested output time if the stop time was reached in the same step in which the output time was passed. Also fixes a missing check in ARKODE to interpolate (or not) the solution at the stop time. --- CHANGELOG.md | 25 +- doc/arkode/guide/source/Introduction.rst | 22 +- doc/cvode/guide/source/Introduction.rst | 18 +- doc/cvodes/guide/source/Introduction.rst | 21 +- doc/ida/guide/source/Introduction.rst | 11 +- doc/idas/guide/source/Introduction.rst | 14 +- doc/kinsol/guide/source/Introduction.rst | 3 +- scripts/arkode | 23 +- scripts/cvode | 20 +- scripts/cvodes | 20 +- scripts/ida | 17 +- scripts/idas | 16 +- scripts/kinsol | 7 +- scripts/shared | 13 +- src/arkode/arkode.c | 70 +++-- src/cvode/cvode.c | 80 +++--- src/cvodes/cvodes.c | 80 +++--- src/ida/ida.c | 67 +++-- src/idas/idas.c | 69 +++-- test/answers | 2 +- .../unit_tests/arkode/C_serial/CMakeLists.txt | 1 + .../arkode/C_serial/ark_test_tstop.c | 231 +++++++++++++++++ test/unit_tests/cvode/C_serial/CMakeLists.txt | 1 + .../unit_tests/cvode/C_serial/cv_test_tstop.c | 234 +++++++++++++++++ .../unit_tests/cvodes/C_serial/CMakeLists.txt | 1 + .../cvodes/C_serial/cvs_test_tstop.c | 234 +++++++++++++++++ test/unit_tests/ida/C_serial/CMakeLists.txt | 1 + test/unit_tests/ida/C_serial/ida_test_tstop.c | 243 ++++++++++++++++++ test/unit_tests/idas/C_serial/CMakeLists.txt | 1 + .../idas/C_serial/idas_test_tstop.c | 243 ++++++++++++++++++ 30 files changed, 1511 insertions(+), 277 deletions(-) create mode 100644 test/unit_tests/arkode/C_serial/ark_test_tstop.c create mode 100644 test/unit_tests/cvode/C_serial/cv_test_tstop.c create mode 100644 test/unit_tests/cvodes/C_serial/cvs_test_tstop.c create mode 100644 test/unit_tests/ida/C_serial/ida_test_tstop.c create mode 100644 test/unit_tests/idas/C_serial/idas_test_tstop.c diff --git a/CHANGELOG.md b/CHANGELOG.md index 0854c78ebe..4182e059f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,17 +3,29 @@ ## Changes to SUNDIALS in release X.X.X -Fixed a bug in `ARKStepSetTableNum` wherein it did not recognize `ARKODE_ARK2_ERK_3_1_2` and -`ARKODE_ARK2_DIRK_3_1_2` as a valid additive Runge--Kutta Butcher table pair. +Fixed a regression introduced by the stop time bug fix in v6.6.1 where ARKODE, +CVODE, CVODES, IDA, and IDAS would return at the stop time rather than the +requested output time if the stop time was reached in the same step in which the +output time was passed. -Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Fixed a bug in ARKODE where `ARKStepSetInterpolateStopTime` would return an +interpolated solution at the stop time in some cases when interpolation was +disabled. + +Fixed a bug in `ARKStepSetTableNum` wherein it did not recognize +`ARKODE_ARK2_ERK_3_1_2` and `ARKODE_ARK2_DIRK_3_1_2` as a valid additive +Runge--Kutta Butcher table pair. + +Renamed some internal types in CVODES and IDAS to allow both packages to be +built together in the same binary. Improved computational complexity of `SUNMatScaleAddI_Sparse` from `O(M*N)` to `O(NNZ)`. Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. -Fixed missing soversions in some `SUNLinearSolver` CMake targets. +Fixed missing soversions in some `SUNLinearSolver` and `SUNNonlinearSolver` +CMake targets. ## Changes to SUNDIALS in release 6.6.1 @@ -24,8 +36,9 @@ object. Fixed a bug in ARKODE, CVODE, CVODES, IDA, and IDAS where the stop time may not be cleared when using normal mode if the requested output time is the same as -the stop time. Additionally, with ARKODE, CVODE, and CVODES an unnecessary -interpolation of the solution at the stop time may occur in this case. +the stop time. Additionally, with ARKODE, CVODE, and CVODES this fix removes an +unnecessary interpolation of the solution at the stop time that could occur in +this case. ## Changes to SUNDIALS in release 6.6.0 diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index 18bccb9935..3b5d5ac711 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -133,16 +133,25 @@ Changes from previous versions Changes in vX.X.X ----------------- +Fixed a regression introduced by the stop time bug fix in v6.6.1 where ARKODE +steppers would return at the stop time rather than the requested output time if +the stop time was reached in the same step in which the output time was passed. + +Fixed a bug in ARKODE where :c:func:`ARKStepSetInterpolateStopTime` would return +an interpolated solution at the stop time in some cases when interpolation was +disabled. + Fixed a bug in :c:func:`ARKStepSetTableNum` wherein it did not recognize `ARKODE_ARK2_ERK_3_1_2` and `ARKODE_ARK2_DIRK_3_1_2` as a valid additive Runge--Kutta Butcher table pair. -Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to -``O(NNZ)``. +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` +to ``O(NNZ)``. Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v5.6.1 ----------------- @@ -152,9 +161,10 @@ Updated the Tpetra NVector interface to support Trilinos 14. Fixed a memory leak when destroying a CUDA, HIP, SYCL, or system SUNMemoryHelper object. -Fixed a bug where the stop time may not be cleared and an unnecessary -interpolation may occur when using normal mode if the requested output time is -the same as the stop time. +Fixed a bug where the stop time may not be cleared when using normal mode if the +requested output time is the same as the stop time. Additionally, this fix +removes an unnecessary interpolation of the solution at the stop time that could +occur in this case. Changes in v5.6.0 ----------------- diff --git a/doc/cvode/guide/source/Introduction.rst b/doc/cvode/guide/source/Introduction.rst index 81e7226f58..085aae8f34 100644 --- a/doc/cvode/guide/source/Introduction.rst +++ b/doc/cvode/guide/source/Introduction.rst @@ -114,12 +114,17 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to -``O(NNZ)``. +Fixed a regression introduced by the stop time bug fix in v6.6.1 where CVODE +would return at the stop time rather than the requested output time if the stop +time was reached in the same step in which the output time was passed. + +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` +to ``O(NNZ)``. Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v6.6.1 ----------------- @@ -129,9 +134,10 @@ Updated the Tpetra NVector interface to support Trilinos 14. Fixed a memory leak when destroying a CUDA, HIP, SYCL, or system SUNMemoryHelper object. -Fixed a bug where the stop time may not be cleared and an unnecessary -interpolation may occur when using normal mode if the requested output time is -the same as the stop time. +Fixed a bug where the stop time may not be cleared when using normal mode if the +requested output time is the same as the stop time. Additionally, this fix +removes an unnecessary interpolation of the solution at the stop time that could +occur in this case. Changes in v6.6.0 ----------------- diff --git a/doc/cvodes/guide/source/Introduction.rst b/doc/cvodes/guide/source/Introduction.rst index 71f1e416c2..4382d3c2cb 100644 --- a/doc/cvodes/guide/source/Introduction.rst +++ b/doc/cvodes/guide/source/Introduction.rst @@ -114,14 +114,20 @@ Changes from previous versions Changes in vX.X.X ----------------- -Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Fixed a regression introduced by the stop time bug fix in v6.6.1 where CVODES +would return at the stop time rather than the requested output time if the stop +time was reached in the same step in which the output time was passed. -Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to -``O(NNZ)``. +Renamed some internal types in CVODES and IDAS to allow both packages to be +built together in the same binary. + +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` +to ``O(NNZ)``. Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v6.6.1 ----------------- @@ -131,9 +137,10 @@ Updated the Tpetra NVector interface to support Trilinos 14. Fixed a memory leak when destroying a CUDA, HIP, SYCL, or system SUNMemoryHelper object. -Fixed a bug where the stop time may not be cleared and an unnecessary -interpolation may occur when using normal mode if the requested output time is -the same as the stop time. +Fixed a bug where the stop time may not be cleared when using normal mode if the +requested output time is the same as the stop time. Additionally, this fix +removes an unnecessary interpolation of the solution at the stop time that could +occur in this case. Changes in v6.6.0 ----------------- diff --git a/doc/ida/guide/source/Introduction.rst b/doc/ida/guide/source/Introduction.rst index cee6ce1b53..1fa001bde2 100644 --- a/doc/ida/guide/source/Introduction.rst +++ b/doc/ida/guide/source/Introduction.rst @@ -75,12 +75,17 @@ Changes from previous versions Changes in vX.X.X ----------------- -Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to -``O(NNZ)``. +Fixed a regression introduced by the stop time bug fix in v6.6.1 where IDA would +return at the stop time rather than the requested output time if the stop time +was reached in the same step in which the output time was passed. + +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` +to ``O(NNZ)``. Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v6.6.1 ----------------- diff --git a/doc/idas/guide/source/Introduction.rst b/doc/idas/guide/source/Introduction.rst index db661cf71f..16db98882a 100644 --- a/doc/idas/guide/source/Introduction.rst +++ b/doc/idas/guide/source/Introduction.rst @@ -89,14 +89,20 @@ Changes from previous versions Changes in vX.X.X ----------------- -Renamed some internal types in CVODES and IDAS to allow both packages to be built together in the same binary. +Fixed a regression introduced by the stop time bug fix in v6.6.1 where IDAS +would return at the stop time rather than the requested output time if the stop +time was reached in the same step in which the output time was passed. -Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to -``O(NNZ)``. +Renamed some internal types in CVODES and IDAS to allow both packages to be +built together in the same binary. + +Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` +to ``O(NNZ)``. Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v5.6.1 ----------------- diff --git a/doc/kinsol/guide/source/Introduction.rst b/doc/kinsol/guide/source/Introduction.rst index 4e0a7531bf..0500e6d0df 100644 --- a/doc/kinsol/guide/source/Introduction.rst +++ b/doc/kinsol/guide/source/Introduction.rst @@ -96,7 +96,8 @@ Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. -Fixed missing soversions in some ``SUNLinearSolver`` CMake targets. +Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` +CMake targets. Changes in v6.6.1 ----------------- diff --git a/scripts/arkode b/scripts/arkode index bb216f7248..6df8c34b75 100755 --- a/scripts/arkode +++ b/scripts/arkode @@ -387,25 +387,4 @@ $tar $tarfile $distrobase/examples/arkode/F2003_serial/test_ark_butcher_f2003.f9 echo " --- Add arkode unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/arkode/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/arkode/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/arkode/C_serial/ark_test_arkstepsetforcing.c -$tar $tarfile $distrobase/test/unit_tests/arkode/C_serial/ark_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/arkode/C_serial/ark_test_interp.c -$tar $tarfile $distrobase/test/unit_tests/arkode/C_serial/ark_test_reset.c -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_analytic_sys_mri.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_analytic_sys_mri_0.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_analytic_sys_mri_1.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_butcher.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_butcher.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_dahlquist_mri.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_dahlquist_mri.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_getjac.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_getjac_mri.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_serial/ark_test_getjac_mri.out -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_parallel/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/arkode/CXX_parallel/ark_test_heat2D_mri.cpp -$tar $tarfile $distrobase/test/unit_tests/arkode/F2003_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/arkode/F2003_serial/ark_test_table_f2003.f90 +$tar $tarfile $distrobase/test/unit_tests/arkode diff --git a/scripts/cvode b/scripts/cvode index af97c77f74..2f83eb3ebf 100755 --- a/scripts/cvode +++ b/scripts/cvode @@ -272,22 +272,4 @@ $tar $tarfile $distrobase/examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.out echo " --- Add cvode unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/cvode/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvode/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvode/C_serial/cv_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_getjac.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr.cpp -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr.hpp -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--dgmax_jbad_1.0.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--dgmax_lsetup_0.0.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_cf_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_max_ef_0.1_--small_nef_1.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_max_fs_2.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_min_ef_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_min_es_2_--small_nst_5.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_min_fx_1.0_--eta_max_fx_2.0.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_min_fx_1.0_--eta_min_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvode/CXX_serial/cv_test_kpr_--eta_min_gs_2.out +$tar $tarfile $distrobase/test/unit_tests/cvode diff --git a/scripts/cvodes b/scripts/cvodes index ae262ac1d0..cab37658ee 100755 --- a/scripts/cvodes +++ b/scripts/cvodes @@ -190,22 +190,4 @@ $tar $tarfile $distrobase/examples/cvodes/F2003_serial/cvsAdvDiff_FSA_non_f2003_ echo " --- Add cvodes unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/cvodes/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvodes/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvodes/C_serial/cvs_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_getjac.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr.cpp -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr.hpp -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--dgmax_jbad_1.0.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--dgmax_lsetup_0.0.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_cf_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_max_ef_0.1_--small_nef_1.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_max_fs_2.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_min_ef_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_min_es_2_--small_nst_5.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_min_fx_1.0_--eta_max_fx_2.0.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_min_fx_1.0_--eta_min_0.5.out -$tar $tarfile $distrobase/test/unit_tests/cvodes/CXX_serial/cvs_test_kpr_--eta_min_gs_2.out +$tar $tarfile $distrobase/test/unit_tests/cvodes diff --git a/scripts/ida b/scripts/ida index e616bf7a19..cc320c286b 100755 --- a/scripts/ida +++ b/scripts/ida @@ -162,19 +162,4 @@ $tar $tarfile $distrobase/examples/ida/F2003_serial/idaRoberts_dns_f2003.out echo " --- Add ida unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/ida/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/ida/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/ida/C_serial/ida_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_getjac.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr.cpp -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr.hpp -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--dcj_0.9.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_cf_0.5.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_max_fs_2.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_min_2.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_min_ef_0.5.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_min_fx_1.0_--eta_max_fx_2.0.out -$tar $tarfile $distrobase/test/unit_tests/ida/CXX_serial/ida_test_kpr_--eta_min_fx_1.0_--eta_min_0.5.out +$tar $tarfile $distrobase/test/unit_tests/ida diff --git a/scripts/idas b/scripts/idas index a373b1eeda..7604bb5983 100755 --- a/scripts/idas +++ b/scripts/idas @@ -151,18 +151,4 @@ $tar $tarfile $distrobase/examples/idas/F2003_serial/idasHeat2D_kry_f2003.out echo " --- Add idas unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/idas/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/idas/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/idas/C_serial/idas_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_getjac.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr.cpp -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr.hpp -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--dcj_0.9.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--eta_cf_0.5.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--eta_min_2.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--eta_min_ef_0.5.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--eta_min_fx_1.0_--eta_max_fx_2.0.out -$tar $tarfile $distrobase/test/unit_tests/idas/CXX_serial/idas_test_kpr_--eta_min_fx_1.0_--eta_min_0.5.out +$tar $tarfile $distrobase/test/unit_tests/idas diff --git a/scripts/kinsol b/scripts/kinsol index cc081f8c2d..c7bf7dc752 100755 --- a/scripts/kinsol +++ b/scripts/kinsol @@ -145,9 +145,4 @@ $tar $tarfile $distrobase/examples/kinsol/F2003_serial/kinLaplace_picard_kry_f20 echo " --- Add kinsol unit tests to $tarfile" -$tar $tarfile $distrobase/test/unit_tests/kinsol/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/kinsol/C_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/kinsol/C_serial/kin_test_getuserdata.c -$tar $tarfile $distrobase/test/unit_tests/kinsol/CXX_serial/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/kinsol/CXX_serial/kin_test_getjac.cpp -$tar $tarfile $distrobase/test/unit_tests/kinsol/CXX_serial/kin_test_getjac.out +$tar $tarfile $distrobase/test/unit_tests/kinsol diff --git a/scripts/shared b/scripts/shared index 2a9cef1731..9e42969552 100755 --- a/scripts/shared +++ b/scripts/shared @@ -835,15 +835,4 @@ echo " --- Add unit tests files to $tarfile" $tar $tarfile $distrobase/test/unit_tests/CMakeLists.txt $tar $tarfile $distrobase/test/unit_tests/reductions -$tar $tarfile $distrobase/test/unit_tests/reductions/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/reductions/test_reduction_operators.cpp -$tar $tarfile $distrobase/test/unit_tests/sunmemory/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/sunmemory/sys -$tar $tarfile $distrobase/test/unit_tests/sunmemory/sys/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/sunmemory/sys/test_sunmemory_sys.cpp -$tar $tarfile $distrobase/test/unit_tests/sunmemory/cuda/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/sunmemory/cuda/test_sunmemory_cuda.cu -$tar $tarfile $distrobase/test/unit_tests/sunmemory/hip/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/sunmemory/hip/test_sunmemory_hip.cpp -$tar $tarfile $distrobase/test/unit_tests/sunmemory/sycl/CMakeLists.txt -$tar $tarfile $distrobase/test/unit_tests/sunmemory/sycl/test_sunmemory_sycl.cpp +$tar $tarfile $distrobase/test/unit_tests/sunmemory diff --git a/src/arkode/arkode.c b/src/arkode/arkode.c index 43a3920a18..601abca7ae 100644 --- a/src/arkode/arkode.c +++ b/src/arkode/arkode.c @@ -967,22 +967,31 @@ int arkEvolve(ARKodeMem ark_mem, realtype tout, N_Vector yout, } /* Check if tn is at tstop or near tstop */ - if ( ark_mem->tstopset ) { + if ( ark_mem->tstopset ) + { troundoff = FUZZ_FACTOR*ark_mem->uround * (SUNRabs(ark_mem->tcur) + SUNRabs(ark_mem->h)); - if ( SUNRabs(ark_mem->tcur - ark_mem->tstop) <= troundoff) { - if (ark_mem->tstopinterp) { - (void) arkGetDky(ark_mem, ark_mem->tstop, 0, yout); - } else { - N_VScale(ONE, ark_mem->yn, yout); + + if ( SUNRabs(ark_mem->tcur - ark_mem->tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - ark_mem->tstop) * ark_mem->h >= ZERO || + SUNRabs(tout - ark_mem->tstop) <= troundoff) + { + if (ark_mem->tstopinterp) { + (void) arkGetDky(ark_mem, ark_mem->tstop, 0, yout); + } else { + N_VScale(ONE, ark_mem->yn, yout); + } + ark_mem->tretlast = *tret = ark_mem->tstop; + ark_mem->tstopset = SUNFALSE; + istate = ARK_TSTOP_RETURN; + break; } - ark_mem->tretlast = *tret = ark_mem->tstop; - ark_mem->tstopset = SUNFALSE; - istate = ARK_TSTOP_RETURN; - break; } /* limit upcoming step if it will overcome tstop */ - if ( (ark_mem->tcur + ark_mem->hprime - ark_mem->tstop)*ark_mem->h > ZERO ) { + else if ( (ark_mem->tcur + ark_mem->hprime - ark_mem->tstop)*ark_mem->h > ZERO ) + { ark_mem->hprime = (ark_mem->tstop - ark_mem->tcur) * (ONE-FOUR*ark_mem->uround); ark_mem->eta = ark_mem->hprime/ark_mem->h; @@ -2105,24 +2114,35 @@ int arkStopTests(ARKodeMem ark_mem, realtype tout, N_Vector yout, } /* end of root stop check */ /* Test for tn at tstop or near tstop */ - if ( ark_mem->tstopset ) { - - if ( SUNRabs(ark_mem->tcur - ark_mem->tstop) <= troundoff) { - *ier = arkGetDky(ark_mem, ark_mem->tstop, 0, yout); - if (*ier != ARK_SUCCESS) { - arkProcessError(ark_mem, ARK_ILL_INPUT, "ARKODE", "arkStopTests", - MSG_ARK_BAD_TSTOP, ark_mem->tstop, ark_mem->tcur); - *ier = ARK_ILL_INPUT; + if ( ark_mem->tstopset ) + { + if ( SUNRabs(ark_mem->tcur - ark_mem->tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - ark_mem->tstop) * ark_mem->h >= ZERO || + SUNRabs(tout - ark_mem->tstop) <= troundoff) + { + if (ark_mem->tstopinterp) + { + *ier = arkGetDky(ark_mem, ark_mem->tstop, 0, yout); + if (*ier != ARK_SUCCESS) { + arkProcessError(ark_mem, ARK_ILL_INPUT, "ARKODE", "arkStopTests", + MSG_ARK_BAD_TSTOP, ark_mem->tstop, ark_mem->tcur); + *ier = ARK_ILL_INPUT; + return(1); + } + } else { + N_VScale(ONE, ark_mem->yn, yout); + } + ark_mem->tretlast = *tret = ark_mem->tstop; + ark_mem->tstopset = SUNFALSE; + *ier = ARK_TSTOP_RETURN; return(1); } - ark_mem->tretlast = *tret = ark_mem->tstop; - ark_mem->tstopset = SUNFALSE; - *ier = ARK_TSTOP_RETURN; - return(1); } - /* If next step would overtake tstop, adjust stepsize */ - if ( (ark_mem->tcur + ark_mem->hprime - ark_mem->tstop)*ark_mem->h > ZERO ) { + else if ( (ark_mem->tcur + ark_mem->hprime - ark_mem->tstop)*ark_mem->h > ZERO ) + { ark_mem->hprime = (ark_mem->tstop - ark_mem->tcur)*(ONE-FOUR*ark_mem->uround); ark_mem->eta = ark_mem->hprime/ark_mem->h; } diff --git a/src/cvode/cvode.c b/src/cvode/cvode.c index 2fd769b299..dddf19e171 100644 --- a/src/cvode/cvode.c +++ b/src/cvode/cvode.c @@ -1233,32 +1233,38 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, } /* end of root stop check */ /* Test for tn at tstop or near tstop */ - if ( cv_mem->cv_tstopset ) { - - if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) { - if (cv_mem->cv_tstopinterp) { - ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - if (ier != CV_SUCCESS) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVode", - MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_ILL_INPUT); + if ( cv_mem->cv_tstopset ) + { + /* Test for tn at tstop */ + if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - cv_mem->cv_tstop) * cv_mem->cv_h >= ZERO || + SUNRabs(tout - cv_mem->cv_tstop) <= troundoff) + { + if (cv_mem->cv_tstopinterp) { + ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (ier != CV_SUCCESS) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVode", + MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_ILL_INPUT); + } + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); } - } else { - N_VScale(ONE, cv_mem->cv_zn[0], yout); + cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; + cv_mem->cv_tstopset = SUNFALSE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_TSTOP_RETURN); } - cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; - cv_mem->cv_tstopset = SUNFALSE; - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_TSTOP_RETURN); } - /* If next step would overtake tstop, adjust stepsize */ - if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) { + else if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) + { cv_mem->cv_hprime = (cv_mem->cv_tstop - cv_mem->cv_tn)*(ONE-FOUR*cv_mem->cv_uround); cv_mem->cv_eta = cv_mem->cv_hprime / cv_mem->cv_h; } - } /* In CV_NORMAL mode, test if tout was reached */ @@ -1424,27 +1430,35 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, } /* Check if tn is at tstop or near tstop */ - if ( cv_mem->cv_tstopset ) { - + if ( cv_mem->cv_tstopset ) + { troundoff = FUZZ_FACTOR * cv_mem->cv_uround * (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)); - if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) { - if (cv_mem->cv_tstopinterp) { - (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - } else { - N_VScale(ONE, cv_mem->cv_zn[0], yout); + + /* Test for tn at tstop */ + if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - cv_mem->cv_tstop) * cv_mem->cv_h >= ZERO || + SUNRabs(tout - cv_mem->cv_tstop) <= troundoff) + { + if (cv_mem->cv_tstopinterp) { + (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); + } + cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; + cv_mem->cv_tstopset = SUNFALSE; + istate = CV_TSTOP_RETURN; + break; } - cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; - cv_mem->cv_tstopset = SUNFALSE; - istate = CV_TSTOP_RETURN; - break; } - - if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) { + /* If next step would overtake tstop, adjust stepsize */ + else if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) + { cv_mem->cv_hprime = (cv_mem->cv_tstop - cv_mem->cv_tn)*(ONE-FOUR*cv_mem->cv_uround); cv_mem->cv_eta = cv_mem->cv_hprime / cv_mem->cv_h; } - } /* In NORMAL mode, check if tout reached */ diff --git a/src/cvodes/cvodes.c b/src/cvodes/cvodes.c index 204d207fb0..cd1fabfd3d 100644 --- a/src/cvodes/cvodes.c +++ b/src/cvodes/cvodes.c @@ -3059,32 +3059,38 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, } /* end of root stop check */ /* Test for tn at tstop or near tstop */ - if ( cv_mem->cv_tstopset ) { - - if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) { - if (cv_mem->cv_tstopinterp) { - ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - if (ier != CV_SUCCESS) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", - MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_ILL_INPUT); + if ( cv_mem->cv_tstopset ) + { + /* Test for tn at tstop */ + if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - cv_mem->cv_tstop) * cv_mem->cv_h >= ZERO || + SUNRabs(tout - cv_mem->cv_tstop) <= troundoff) + { + if (cv_mem->cv_tstopinterp) { + ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (ier != CV_SUCCESS) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", + MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_ILL_INPUT); + } + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); } - } else { - N_VScale(ONE, cv_mem->cv_zn[0], yout); + cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; + cv_mem->cv_tstopset = SUNFALSE; + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_TSTOP_RETURN); } - cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; - cv_mem->cv_tstopset = SUNFALSE; - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_TSTOP_RETURN); } - /* If next step would overtake tstop, adjust stepsize */ - if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) { + else if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) + { cv_mem->cv_hprime = (cv_mem->cv_tstop - cv_mem->cv_tn)*(ONE-FOUR*cv_mem->cv_uround); cv_mem->cv_eta = cv_mem->cv_hprime / cv_mem->cv_h; } - } /* In CV_NORMAL mode, test if tout was reached */ @@ -3294,27 +3300,35 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, } /* Check if tn is at tstop or near tstop */ - if ( cv_mem->cv_tstopset ) { - + if ( cv_mem->cv_tstopset ) + { troundoff = FUZZ_FACTOR * cv_mem->cv_uround * (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)); - if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) { - if (cv_mem->cv_tstopinterp) { - (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - } else { - N_VScale(ONE, cv_mem->cv_zn[0], yout); + + /* Test for tn at tstop */ + if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - cv_mem->cv_tstop) * cv_mem->cv_h >= ZERO || + SUNRabs(tout - cv_mem->cv_tstop) <= troundoff) + { + if (cv_mem->cv_tstopinterp) { + (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); + } + cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; + cv_mem->cv_tstopset = SUNFALSE; + istate = CV_TSTOP_RETURN; + break; } - cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; - cv_mem->cv_tstopset = SUNFALSE; - istate = CV_TSTOP_RETURN; - break; } - - if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) { + /* If next step would overtake tstop, adjust stepsize */ + else if ( (cv_mem->cv_tn + cv_mem->cv_hprime - cv_mem->cv_tstop)*cv_mem->cv_h > ZERO ) + { cv_mem->cv_hprime = (cv_mem->cv_tstop - cv_mem->cv_tn)*(ONE-FOUR*cv_mem->cv_uround); cv_mem->cv_eta = cv_mem->cv_hprime / cv_mem->cv_h; } - } /* In NORMAL mode, check if tout reached */ diff --git a/src/ida/ida.c b/src/ida/ida.c index 9e9b7c4b1d..2250bd6937 100644 --- a/src/ida/ida.c +++ b/src/ida/ida.c @@ -1977,28 +1977,42 @@ static int IDAStopTest1(IDAMem IDA_mem, realtype tout, realtype *tret, int ier; realtype troundoff; - if (IDA_mem->ida_tstopset) { - /* Test for tn past tstop, tn = tretlast, tn past tout, tn near tstop. */ - if ( (IDA_mem->ida_tn - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) { + if (IDA_mem->ida_tstopset) + { + /* Test for tn past tstop */ + if ( (IDA_mem->ida_tn - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDA", "IDASolve", MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); return(IDA_ILL_INPUT); } troundoff = HUNDRED * IDA_mem->ida_uround * (SUNRabs(IDA_mem->ida_tn) + SUNRabs(IDA_mem->ida_hh)); - if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) { - ier = IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); - if (ier != IDA_SUCCESS) { - IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDA", "IDASolve", - MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); - return(IDA_ILL_INPUT); + + /* Test for tn at tstop */ + if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - IDA_mem->ida_tstop) * IDA_mem->ida_hh >= ZERO || + SUNRabs(tout - IDA_mem->ida_tstop) <= troundoff) + { + ier = IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); + if (ier != IDA_SUCCESS) + { + IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDA", "IDASolve", + MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); + return(IDA_ILL_INPUT); + } + *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; + IDA_mem->ida_tstopset = SUNFALSE; + return(IDA_TSTOP_RETURN); } - *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; - IDA_mem->ida_tstopset = SUNFALSE; - return(IDA_TSTOP_RETURN); } - if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + /* Test for tn approaching tstop */ + else if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { IDA_mem->ida_hh = (IDA_mem->ida_tstop - IDA_mem->ida_tn)*(ONE - FOUR * IDA_mem->ida_uround); + } } switch (itask) { @@ -2065,17 +2079,28 @@ static int IDAStopTest2(IDAMem IDA_mem, realtype tout, realtype *tret, /* int ier; */ realtype troundoff; - if (IDA_mem->ida_tstopset) { - /* Test for tn at tstop and for tn near tstop */ + if (IDA_mem->ida_tstopset) + { troundoff = HUNDRED * IDA_mem->ida_uround * (SUNRabs(IDA_mem->ida_tn) + SUNRabs(IDA_mem->ida_hh)); - if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) { - /* ier = */ IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); - *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; - IDA_mem->ida_tstopset = SUNFALSE; - return(IDA_TSTOP_RETURN); + + /* Test for tn at tstop */ + if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - IDA_mem->ida_tstop) * IDA_mem->ida_hh >= ZERO || + SUNRabs(tout - IDA_mem->ida_tstop) <= troundoff) + { + /* ier = */ IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); + *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; + IDA_mem->ida_tstopset = SUNFALSE; + return(IDA_TSTOP_RETURN); + } } - if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + /* Test for tn approaching tstop */ + else if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { IDA_mem->ida_hh = (IDA_mem->ida_tstop - IDA_mem->ida_tn)*(ONE - FOUR * IDA_mem->ida_uround); + } } switch (itask) { diff --git a/src/idas/idas.c b/src/idas/idas.c index a2b0874d03..0008a13375 100644 --- a/src/idas/idas.c +++ b/src/idas/idas.c @@ -4985,28 +4985,42 @@ static int IDAStopTest1(IDAMem IDA_mem, realtype tout, realtype *tret, int ier; realtype troundoff; - if (IDA_mem->ida_tstopset) { - /* Test for tn past tstop, tn past tretlast, and tn near tstop. */ - if ((IDA_mem->ida_tn - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) { - IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDA", "IDASolve", + if (IDA_mem->ida_tstopset) + { + /* Test for tn past tstop */ + if ( (IDA_mem->ida_tn - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { + IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDAS", "IDASolve", MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); return(IDA_ILL_INPUT); } troundoff = HUNDRED * IDA_mem->ida_uround * (SUNRabs(IDA_mem->ida_tn) + SUNRabs(IDA_mem->ida_hh)); - if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) { - ier = IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); - if (ier != IDA_SUCCESS) { - IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDA", "IDASolve", - MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); - return(IDA_ILL_INPUT); + + /* Test for tn at tstop */ + if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - IDA_mem->ida_tstop) * IDA_mem->ida_hh >= ZERO || + SUNRabs(tout - IDA_mem->ida_tstop) <= troundoff) + { + ier = IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); + if (ier != IDA_SUCCESS) + { + IDAProcessError(IDA_mem, IDA_ILL_INPUT, "IDAS", "IDASolve", + MSG_BAD_TSTOP, IDA_mem->ida_tstop, IDA_mem->ida_tn); + return(IDA_ILL_INPUT); + } + *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; + IDA_mem->ida_tstopset = SUNFALSE; + return(IDA_TSTOP_RETURN); } - *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; - IDA_mem->ida_tstopset = SUNFALSE; - return(IDA_TSTOP_RETURN); } - if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + /* Test for tn approaching tstop */ + else if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { IDA_mem->ida_hh = (IDA_mem->ida_tstop - IDA_mem->ida_tn)*(ONE - FOUR * IDA_mem->ida_uround); + } } switch (itask) { @@ -5073,17 +5087,28 @@ static int IDAStopTest2(IDAMem IDA_mem, realtype tout, realtype *tret, /* int ier; */ realtype troundoff; - if (IDA_mem->ida_tstopset) { - /* Test for tn at tstop and for tn near tstop */ + if (IDA_mem->ida_tstopset) + { troundoff = HUNDRED * IDA_mem->ida_uround * (SUNRabs(IDA_mem->ida_tn) + SUNRabs(IDA_mem->ida_hh)); - if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) { - /* ier = */ IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); - *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; - IDA_mem->ida_tstopset = SUNFALSE; - return(IDA_TSTOP_RETURN); + + /* Test for tn at tstop */ + if (SUNRabs(IDA_mem->ida_tn - IDA_mem->ida_tstop) <= troundoff) + { + /* Ensure tout >= tstop, otherwise check for tout return below */ + if ((tout - IDA_mem->ida_tstop) * IDA_mem->ida_hh >= ZERO || + SUNRabs(tout - IDA_mem->ida_tstop) <= troundoff) + { + /* ier = */ IDAGetSolution(IDA_mem, IDA_mem->ida_tstop, yret, ypret); + *tret = IDA_mem->ida_tretlast = IDA_mem->ida_tstop; + IDA_mem->ida_tstopset = SUNFALSE; + return(IDA_TSTOP_RETURN); + } } - if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + /* Test for tn approaching tstop */ + else if ((IDA_mem->ida_tn + IDA_mem->ida_hh - IDA_mem->ida_tstop)*IDA_mem->ida_hh > ZERO) + { IDA_mem->ida_hh = (IDA_mem->ida_tstop - IDA_mem->ida_tn)*(ONE - FOUR * IDA_mem->ida_uround); + } } switch (itask) { diff --git a/test/answers b/test/answers index b47ad4f1f5..8f1d469a42 160000 --- a/test/answers +++ b/test/answers @@ -1 +1 @@ -Subproject commit b47ad4f1f5da5548b62f1933dad38294d9943fc5 +Subproject commit 8f1d469a426ffb562361ab2aa1702ad15194cb2c diff --git a/test/unit_tests/arkode/C_serial/CMakeLists.txt b/test/unit_tests/arkode/C_serial/CMakeLists.txt index 597196c228..2c4daa7d74 100644 --- a/test/unit_tests/arkode/C_serial/CMakeLists.txt +++ b/test/unit_tests/arkode/C_serial/CMakeLists.txt @@ -30,6 +30,7 @@ set(ARKODE_unit_tests "ark_test_interp\;-10000" "ark_test_interp\;-1000000" "ark_test_reset\;" + "ark_test_tstop\;" ) # Add the build and install targets for each test diff --git a/test/unit_tests/arkode/C_serial/ark_test_tstop.c b/test/unit_tests/arkode/C_serial/ark_test_tstop.c new file mode 100644 index 0000000000..66a17b5a46 --- /dev/null +++ b/test/unit_tests/arkode/C_serial/ark_test_tstop.c @@ -0,0 +1,231 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Unit test for setting stop time + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "nvector/nvector_serial.h" +#include "sundials/sundials_matrix.h" +#include "sundials/sundials_nvector.h" +#include "sunmatrix/sunmatrix_dense.h" +#include "sunlinsol/sunlinsol_dense.h" +#include "arkode/arkode_arkstep.h" + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#else +#define GSYM "g" +#endif + +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) + + +int ode_rhs(sunrealtype t, N_Vector y, N_Vector ydot, void *user_data) +{ + sunrealtype* ydot_data = N_VGetArrayPointer(ydot); + ydot_data[0] = ONE; + return 0; +} + + +int ode_jac(sunrealtype t, N_Vector y, N_Vector f, SUNMatrix J, void *user_data, + N_Vector tempv1, N_Vector tempv2, N_Vector tempv3) +{ + sunrealtype* J_data = SUNDenseMatrix_Data(J); + J_data[0] = ZERO; + return 0; +} + + +int main(int argc, char *argv[]) +{ + SUNContext sunctx = NULL; + N_Vector y = NULL; + SUNMatrix A = NULL; + SUNLinearSolver LS = NULL; + void* arkode_mem = NULL; + + int flag = 0; + int arkode_flag = 0; + int i = 0; + sunrealtype tout = SUN_RCONST(0.10); + sunrealtype dt_tout = SUN_RCONST(0.25); + sunrealtype tstop = SUN_RCONST(0.30); + sunrealtype dt_tstop = SUN_RCONST(0.30); + sunrealtype tret = ZERO; + sunrealtype tcur = ZERO; + + /* -------------- + * Create context + * -------------- */ + + flag = SUNContext_Create(NULL, &sunctx); + if (flag) + { + fprintf(stderr, "SUNContext_Create returned %i\n", flag); + return 1; + } + + /* ----------------------- + * Setup initial condition + * ----------------------- */ + + y = N_VNew_Serial(1, sunctx); + if (!y) { return 1; } + N_VConst(ZERO, y); + + /* ----------- + * Setup ARKODE + * ----------- */ + + arkode_mem = ARKStepCreate(NULL, ode_rhs, ZERO, y, sunctx); + if (!arkode_mem) { return 1; } + + flag = ARKStepSStolerances(arkode_mem, SUN_RCONST(1.0e-4), SUN_RCONST(1.0e-8)); + if (flag) { return 1; } + + A = SUNDenseMatrix(1, 1, sunctx); + if (!A) { return 1; } + + LS = SUNLinSol_Dense(y, A, sunctx); + if (!LS) { return 1; } + + flag = ARKStepSetLinearSolver(arkode_mem, LS, A); + if (flag) { return 1; } + + flag = ARKStepSetJacFn(arkode_mem, ode_jac); + if (flag) { return 1; } + + flag = ARKStepSetOrder(arkode_mem, 2); + if (flag) { return 1; } + + flag = ARKStepSetStopTime(arkode_mem, tstop); + if (flag) { return 1; } + + /* --------------- + * Advance in time + * --------------- */ + + printf("0: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM "\n", + tout, tstop, tret, tcur); + + for (i = 1; i <= 6; i++) + { + arkode_flag = ARKStepEvolve(arkode_mem, tout, y, &tret, ARK_NORMAL); + if (arkode_flag < 0) { flag = 1; break; } + + flag = ARKStepGetCurrentTime(arkode_mem, &tcur); + if (flag) { break; } + + printf("%i: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM ", return = %i\n", + i, tout, tstop, tret, tcur, arkode_flag); + + /* First return: output time < stop time */ + if (i == 1 && arkode_flag != ARK_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* Second return: output time > stop time */ + if (i == 2) + { + if (arkode_flag != ARK_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = ARKStepSetStopTime(arkode_mem, tstop); + if (flag) { break; } + } + + /* Third return: output time = stop time */ + if (i == 3) + { + if (arkode_flag != ARK_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = ARKStepSetStopTime(arkode_mem, tstop); + if (flag) { break; } + } + + /* Fourth return: output time < stop time but both output time and the stop + time are overtaken in the same step */ + if (i == 4) + { + if (arkode_flag != ARK_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + } + + /* Fifth return: output time > stop time after step where both output time + and the stop time were overtaken in the same step */ + if (i == 5) + { + if (arkode_flag != ARK_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + } + + /* Sixth return: output time < stop time (not updated) */ + if (i == 6 && arkode_flag != ARK_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* update output time */ + tout += dt_tout; + } + + /* -------- + * Clean up + * -------- */ + + ARKStepFree(&arkode_mem); + N_VDestroy(y); + SUNMatDestroy(A); + SUNLinSolFree(LS); + SUNContext_Free(&sunctx); + + if (!flag) + { + printf("SUCCESS\n"); + } + + return flag; +} + +/*---- end of file ----*/ diff --git a/test/unit_tests/cvode/C_serial/CMakeLists.txt b/test/unit_tests/cvode/C_serial/CMakeLists.txt index e576d75ddd..f396478a16 100644 --- a/test/unit_tests/cvode/C_serial/CMakeLists.txt +++ b/test/unit_tests/cvode/C_serial/CMakeLists.txt @@ -17,6 +17,7 @@ # List of test tuples of the form "name\;args" set(unit_tests "cv_test_getuserdata\;" + "cv_test_tstop\;" ) # Add the build and install targets for each test diff --git a/test/unit_tests/cvode/C_serial/cv_test_tstop.c b/test/unit_tests/cvode/C_serial/cv_test_tstop.c new file mode 100644 index 0000000000..50c7e053d6 --- /dev/null +++ b/test/unit_tests/cvode/C_serial/cv_test_tstop.c @@ -0,0 +1,234 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Unit test for setting stop time + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "nvector/nvector_serial.h" +#include "sundials/sundials_matrix.h" +#include "sundials/sundials_nvector.h" +#include "sunmatrix/sunmatrix_dense.h" +#include "sunlinsol/sunlinsol_dense.h" +#include "cvode/cvode.h" + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#else +#define GSYM "g" +#endif + +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) + + +int ode_rhs(sunrealtype t, N_Vector y, N_Vector ydot, void *user_data) +{ + sunrealtype* ydot_data = N_VGetArrayPointer(ydot); + ydot_data[0] = ONE; + return 0; +} + + +int ode_jac(sunrealtype t, N_Vector y, N_Vector f, SUNMatrix J, void *user_data, + N_Vector tempv1, N_Vector tempv2, N_Vector tempv3) +{ + sunrealtype* J_data = SUNDenseMatrix_Data(J); + J_data[0] = ZERO; + return 0; +} + + +int main(int argc, char *argv[]) +{ + SUNContext sunctx = NULL; + N_Vector y = NULL; + SUNMatrix A = NULL; + SUNLinearSolver LS = NULL; + void* cvode_mem = NULL; + + int flag = 0; + int cvode_flag = 0; + int i = 0; + sunrealtype tout = SUN_RCONST(0.10); + sunrealtype dt_tout = SUN_RCONST(0.25); + sunrealtype tstop = SUN_RCONST(0.30); + sunrealtype dt_tstop = SUN_RCONST(0.30); + sunrealtype tret = ZERO; + sunrealtype tcur = ZERO; + + /* -------------- + * Create context + * -------------- */ + + flag = SUNContext_Create(NULL, &sunctx); + if (flag) + { + fprintf(stderr, "SUNContext_Create returned %i\n", flag); + return 1; + } + + /* ----------------------- + * Setup initial condition + * ----------------------- */ + + y = N_VNew_Serial(1, sunctx); + if (!y) { return 1; } + N_VConst(ZERO, y); + + /* ----------- + * Setup CVODE + * ----------- */ + + cvode_mem = CVodeCreate(CV_BDF, sunctx); + if (!cvode_mem) { return 1; } + + flag = CVodeInit(cvode_mem, ode_rhs, ZERO, y); + if (flag) { return 1; } + + flag = CVodeSStolerances(cvode_mem, SUN_RCONST(1.0e-4), SUN_RCONST(1.0e-8)); + if (flag) { return 1; } + + A = SUNDenseMatrix(1, 1, sunctx); + if (!A) { return 1; } + + LS = SUNLinSol_Dense(y, A, sunctx); + if (!LS) { return 1; } + + flag = CVodeSetLinearSolver(cvode_mem, LS, A); + if (flag) { return 1; } + + flag = CVodeSetJacFn(cvode_mem, ode_jac); + if (flag) { return 1; } + + flag = CVodeSetMaxOrd(cvode_mem, 1); + if (flag) { return 1; } + + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { return 1; } + + /* --------------- + * Advance in time + * --------------- */ + + printf("0: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM "\n", + tout, tstop, tret, tcur); + + for (i = 1; i <= 6; i++) + { + cvode_flag = CVode(cvode_mem, tout, y, &tret, CV_NORMAL); + if (cvode_flag < 0) { flag = 1; break; } + + flag = CVodeGetCurrentTime(cvode_mem, &tcur); + if (flag) { break; } + + printf("%i: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM ", return = %i\n", + i, tout, tstop, tret, tcur, cvode_flag); + + /* First return: output time < stop time */ + if (i == 1 && cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* Second return: output time > stop time */ + if (i == 2) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { break; } + } + + /* Third return: output time = stop time */ + if (i == 3) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { break; } + } + + /* Fourth return: output time < stop time but both output time and the stop + time are overtaken in the same step */ + if (i == 4) + { + if (cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + } + + /* Fifth return: output time > stop time after step where both output time + and the stop time were overtaken in the same step */ + if (i == 5) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + } + + /* Sixth return: output time < stop time (not updated) */ + if (i == 6 && cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* update output time */ + tout += dt_tout; + } + + /* -------- + * Clean up + * -------- */ + + CVodeFree(&cvode_mem); + N_VDestroy(y); + SUNMatDestroy(A); + SUNLinSolFree(LS); + SUNContext_Free(&sunctx); + + if (!flag) + { + printf("SUCCESS\n"); + } + + return flag; +} + +/*---- end of file ----*/ diff --git a/test/unit_tests/cvodes/C_serial/CMakeLists.txt b/test/unit_tests/cvodes/C_serial/CMakeLists.txt index 0775ba2ab5..96c747becb 100644 --- a/test/unit_tests/cvodes/C_serial/CMakeLists.txt +++ b/test/unit_tests/cvodes/C_serial/CMakeLists.txt @@ -17,6 +17,7 @@ # List of test tuples of the form "name\;args" set(unit_tests "cvs_test_getuserdata\;" + "cvs_test_tstop\;" ) # Add the build and install targets for each test diff --git a/test/unit_tests/cvodes/C_serial/cvs_test_tstop.c b/test/unit_tests/cvodes/C_serial/cvs_test_tstop.c new file mode 100644 index 0000000000..5ce9c1c611 --- /dev/null +++ b/test/unit_tests/cvodes/C_serial/cvs_test_tstop.c @@ -0,0 +1,234 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Unit test for setting stop time + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "nvector/nvector_serial.h" +#include "sundials/sundials_matrix.h" +#include "sundials/sundials_nvector.h" +#include "sunmatrix/sunmatrix_dense.h" +#include "sunlinsol/sunlinsol_dense.h" +#include "cvodes/cvodes.h" + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#else +#define GSYM "g" +#endif + +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) + + +int ode_rhs(sunrealtype t, N_Vector y, N_Vector ydot, void *user_data) +{ + sunrealtype* ydot_data = N_VGetArrayPointer(ydot); + ydot_data[0] = ONE; + return 0; +} + + +int ode_jac(sunrealtype t, N_Vector y, N_Vector f, SUNMatrix J, void *user_data, + N_Vector tempv1, N_Vector tempv2, N_Vector tempv3) +{ + sunrealtype* J_data = SUNDenseMatrix_Data(J); + J_data[0] = ZERO; + return 0; +} + + +int main(int argc, char *argv[]) +{ + SUNContext sunctx = NULL; + N_Vector y = NULL; + SUNMatrix A = NULL; + SUNLinearSolver LS = NULL; + void* cvode_mem = NULL; + + int flag = 0; + int cvode_flag = 0; + int i = 0; + sunrealtype tout = SUN_RCONST(0.10); + sunrealtype dt_tout = SUN_RCONST(0.25); + sunrealtype tstop = SUN_RCONST(0.30); + sunrealtype dt_tstop = SUN_RCONST(0.30); + sunrealtype tret = ZERO; + sunrealtype tcur = ZERO; + + /* -------------- + * Create context + * -------------- */ + + flag = SUNContext_Create(NULL, &sunctx); + if (flag) + { + fprintf(stderr, "SUNContext_Create returned %i\n", flag); + return 1; + } + + /* ----------------------- + * Setup initial condition + * ----------------------- */ + + y = N_VNew_Serial(1, sunctx); + if (!y) { return 1; } + N_VConst(ZERO, y); + + /* ----------- + * Setup CVODE + * ----------- */ + + cvode_mem = CVodeCreate(CV_BDF, sunctx); + if (!cvode_mem) { return 1; } + + flag = CVodeInit(cvode_mem, ode_rhs, ZERO, y); + if (flag) { return 1; } + + flag = CVodeSStolerances(cvode_mem, SUN_RCONST(1.0e-4), SUN_RCONST(1.0e-8)); + if (flag) { return 1; } + + A = SUNDenseMatrix(1, 1, sunctx); + if (!A) { return 1; } + + LS = SUNLinSol_Dense(y, A, sunctx); + if (!LS) { return 1; } + + flag = CVodeSetLinearSolver(cvode_mem, LS, A); + if (flag) { return 1; } + + flag = CVodeSetJacFn(cvode_mem, ode_jac); + if (flag) { return 1; } + + flag = CVodeSetMaxOrd(cvode_mem, 1); + if (flag) { return 1; } + + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { return 1; } + + /* --------------- + * Advance in time + * --------------- */ + + printf("0: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM "\n", + tout, tstop, tret, tcur); + + for (i = 1; i <= 6; i++) + { + cvode_flag = CVode(cvode_mem, tout, y, &tret, CV_NORMAL); + if (cvode_flag < 0) { flag = 1; break; } + + flag = CVodeGetCurrentTime(cvode_mem, &tcur); + if (flag) { break; } + + printf("%i: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM ", return = %i\n", + i, tout, tstop, tret, tcur, cvode_flag); + + /* First return: output time < stop time */ + if (i == 1 && cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* Second return: output time > stop time */ + if (i == 2) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { break; } + } + + /* Third return: output time = stop time */ + if (i == 3) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = CVodeSetStopTime(cvode_mem, tstop); + if (flag) { break; } + } + + /* Fourth return: output time < stop time but both output time and the stop + time are overtaken in the same step */ + if (i == 4) + { + if (cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + } + + /* Fifth return: output time > stop time after step where both output time + and the stop time were overtaken in the same step */ + if (i == 5) + { + if (cvode_flag != CV_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + } + + /* Sixth return: output time < stop time (not updated) */ + if (i == 6 && cvode_flag != CV_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* update output time */ + tout += dt_tout; + } + + /* -------- + * Clean up + * -------- */ + + CVodeFree(&cvode_mem); + N_VDestroy(y); + SUNMatDestroy(A); + SUNLinSolFree(LS); + SUNContext_Free(&sunctx); + + if (!flag) + { + printf("SUCCESS\n"); + } + + return flag; +} + +/*---- end of file ----*/ diff --git a/test/unit_tests/ida/C_serial/CMakeLists.txt b/test/unit_tests/ida/C_serial/CMakeLists.txt index f2060ee3ee..543b11c4f9 100644 --- a/test/unit_tests/ida/C_serial/CMakeLists.txt +++ b/test/unit_tests/ida/C_serial/CMakeLists.txt @@ -17,6 +17,7 @@ # List of test tuples of the form "name\;args" set(unit_tests "ida_test_getuserdata\;" + "ida_test_tstop\;" ) # Add the build and install targets for each test diff --git a/test/unit_tests/ida/C_serial/ida_test_tstop.c b/test/unit_tests/ida/C_serial/ida_test_tstop.c new file mode 100644 index 0000000000..491910ae1b --- /dev/null +++ b/test/unit_tests/ida/C_serial/ida_test_tstop.c @@ -0,0 +1,243 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Unit test for setting stop time + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "nvector/nvector_serial.h" +#include "sundials/sundials_matrix.h" +#include "sundials/sundials_nvector.h" +#include "sunmatrix/sunmatrix_dense.h" +#include "sunlinsol/sunlinsol_dense.h" +#include "ida/ida.h" + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#else +#define GSYM "g" +#endif + +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) + + +int dae_res(sunrealtype t, N_Vector y, N_Vector ydot, N_Vector res, + void *user_data) +{ + sunrealtype* ydot_data = N_VGetArrayPointer(ydot); + sunrealtype* res_data = N_VGetArrayPointer(res); + res_data[0] = ydot_data[0] - ONE; + return 0; +} + + +int dae_jac(sunrealtype t, sunrealtype cj, N_Vector y, N_Vector yp, N_Vector rr, + SUNMatrix J, void *user_data, N_Vector tempv1, N_Vector tempv2, + N_Vector tempv3) +{ + sunrealtype* J_data = SUNDenseMatrix_Data(J); + J_data[0] = ONE; + return 0; +} + + +int main(int argc, char *argv[]) +{ + SUNContext sunctx = NULL; + N_Vector y = NULL; + N_Vector yp = NULL; + SUNMatrix A = NULL; + SUNLinearSolver LS = NULL; + void* ida_mem = NULL; + + int flag = 0; + int ida_flag = 0; + int i = 0; + sunrealtype tout = SUN_RCONST(0.10); + sunrealtype dt_tout = SUN_RCONST(0.25); + sunrealtype tstop = SUN_RCONST(0.30); + sunrealtype dt_tstop = SUN_RCONST(0.30); + sunrealtype tret = ZERO; + sunrealtype tcur = ZERO; + + /* -------------- + * Create context + * -------------- */ + + flag = SUNContext_Create(NULL, &sunctx); + if (flag) + { + fprintf(stderr, "SUNContext_Create returned %i\n", flag); + return 1; + } + + /* ----------------------- + * Setup initial condition + * ------------------------ */ + + y = N_VNew_Serial(1, sunctx); + if (!y) { return 1; } + N_VConst(ZERO, y); + + yp = N_VClone(y); + if (!yp) { return 1; } + N_VConst(ONE, yp); + + /* --------- + * Setup IDA + * --------- */ + + ida_mem = IDACreate(sunctx); + if (!ida_mem) { return 1; } + + flag = IDAInit(ida_mem, dae_res, ZERO, y, yp); + if (flag) { return 1; } + + flag = IDASStolerances(ida_mem, SUN_RCONST(1.0e-4), SUN_RCONST(1.0e-8)); + if (flag) { return 1; } + + A = SUNDenseMatrix(1, 1, sunctx); + if (!A) { return 1; } + + LS = SUNLinSol_Dense(y, A, sunctx); + if (!LS) { return 1; } + + flag = IDASetLinearSolver(ida_mem, LS, A); + if (flag) { return 1; } + + flag = IDASetJacFn(ida_mem, dae_jac); + if (flag) { return 1; } + + flag = IDASetMaxOrd(ida_mem, 1); + if (flag) { return 1; } + + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { return 1; } + + /* --------------- + * Advance in time + * --------------- */ + + printf("0: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM "\n", + tout, tstop, tret, tcur); + + for (i = 1; i <= 6; i++) + { + ida_flag = IDASolve(ida_mem, tout, &tret, y, yp, IDA_NORMAL); + if (ida_flag < 0) { flag = 1; break; } + + flag = IDAGetCurrentTime(ida_mem, &tcur); + if (flag) { break; } + + printf("%i: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM ", return = %i\n", + i, tout, tstop, tret, tcur, ida_flag); + + /* First return: output time < stop time */ + if (i == 1 && ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* Second return: output time > stop time */ + if (i == 2) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { break; } + } + + /* Third return: output time = stop time */ + if (i == 3) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { break; } + } + + /* Fourth return: output time < stop time but both output time and the stop + time are overtaken in the same step */ + if (i == 4) + { + if (ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + } + + /* Fifth return: output time > stop time after step where both output time + and the stop time were overtaken in the same step */ + if (i == 5) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + } + + /* Sixth return: output time < stop time (not updated) */ + if (i == 6 && ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* update output time */ + tout += dt_tout; + } + + /* -------- + * Clean up + * -------- */ + + IDAFree(&ida_mem); + N_VDestroy(y); + N_VDestroy(yp); + SUNMatDestroy(A); + SUNLinSolFree(LS); + SUNContext_Free(&sunctx); + + if (!flag) + { + printf("SUCCESS\n"); + } + + return flag; +} + +/*---- end of file ----*/ diff --git a/test/unit_tests/idas/C_serial/CMakeLists.txt b/test/unit_tests/idas/C_serial/CMakeLists.txt index 2514c73c1e..f928ff11ba 100644 --- a/test/unit_tests/idas/C_serial/CMakeLists.txt +++ b/test/unit_tests/idas/C_serial/CMakeLists.txt @@ -17,6 +17,7 @@ # List of test tuples of the form "name\;args" set(unit_tests "idas_test_getuserdata\;" + "idas_test_tstop\;" ) # Add the build and install targets for each test diff --git a/test/unit_tests/idas/C_serial/idas_test_tstop.c b/test/unit_tests/idas/C_serial/idas_test_tstop.c new file mode 100644 index 0000000000..6875fa4d79 --- /dev/null +++ b/test/unit_tests/idas/C_serial/idas_test_tstop.c @@ -0,0 +1,243 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * Unit test for setting stop time + * ---------------------------------------------------------------------------*/ + +#include +#include + +#include "nvector/nvector_serial.h" +#include "sundials/sundials_matrix.h" +#include "sundials/sundials_nvector.h" +#include "sunmatrix/sunmatrix_dense.h" +#include "sunlinsol/sunlinsol_dense.h" +#include "idas/idas.h" + +#if defined(SUNDIALS_EXTENDED_PRECISION) +#define GSYM "Lg" +#else +#define GSYM "g" +#endif + +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) + + +int dae_res(sunrealtype t, N_Vector y, N_Vector ydot, N_Vector res, + void *user_data) +{ + sunrealtype* ydot_data = N_VGetArrayPointer(ydot); + sunrealtype* res_data = N_VGetArrayPointer(res); + res_data[0] = ydot_data[0] - ONE; + return 0; +} + + +int dae_jac(sunrealtype t, sunrealtype cj, N_Vector y, N_Vector yp, N_Vector rr, + SUNMatrix J, void *user_data, N_Vector tempv1, N_Vector tempv2, + N_Vector tempv3) +{ + sunrealtype* J_data = SUNDenseMatrix_Data(J); + J_data[0] = ONE; + return 0; +} + + +int main(int argc, char *argv[]) +{ + SUNContext sunctx = NULL; + N_Vector y = NULL; + N_Vector yp = NULL; + SUNMatrix A = NULL; + SUNLinearSolver LS = NULL; + void* ida_mem = NULL; + + int flag = 0; + int ida_flag = 0; + int i = 0; + sunrealtype tout = SUN_RCONST(0.10); + sunrealtype dt_tout = SUN_RCONST(0.25); + sunrealtype tstop = SUN_RCONST(0.30); + sunrealtype dt_tstop = SUN_RCONST(0.30); + sunrealtype tret = ZERO; + sunrealtype tcur = ZERO; + + /* -------------- + * Create context + * -------------- */ + + flag = SUNContext_Create(NULL, &sunctx); + if (flag) + { + fprintf(stderr, "SUNContext_Create returned %i\n", flag); + return 1; + } + + /* ----------------------- + * Setup initial condition + * ------------------------ */ + + y = N_VNew_Serial(1, sunctx); + if (!y) { return 1; } + N_VConst(ZERO, y); + + yp = N_VClone(y); + if (!yp) { return 1; } + N_VConst(ONE, yp); + + /* --------- + * Setup IDA + * --------- */ + + ida_mem = IDACreate(sunctx); + if (!ida_mem) { return 1; } + + flag = IDAInit(ida_mem, dae_res, ZERO, y, yp); + if (flag) { return 1; } + + flag = IDASStolerances(ida_mem, SUN_RCONST(1.0e-4), SUN_RCONST(1.0e-8)); + if (flag) { return 1; } + + A = SUNDenseMatrix(1, 1, sunctx); + if (!A) { return 1; } + + LS = SUNLinSol_Dense(y, A, sunctx); + if (!LS) { return 1; } + + flag = IDASetLinearSolver(ida_mem, LS, A); + if (flag) { return 1; } + + flag = IDASetJacFn(ida_mem, dae_jac); + if (flag) { return 1; } + + flag = IDASetMaxOrd(ida_mem, 1); + if (flag) { return 1; } + + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { return 1; } + + /* --------------- + * Advance in time + * --------------- */ + + printf("0: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM "\n", + tout, tstop, tret, tcur); + + for (i = 1; i <= 6; i++) + { + ida_flag = IDASolve(ida_mem, tout, &tret, y, yp, IDA_NORMAL); + if (ida_flag < 0) { flag = 1; break; } + + flag = IDAGetCurrentTime(ida_mem, &tcur); + if (flag) { break; } + + printf("%i: tout = %" GSYM ", tstop = %" GSYM ", tret = %" GSYM ", tcur = %" GSYM ", return = %i\n", + i, tout, tstop, tret, tcur, ida_flag); + + /* First return: output time < stop time */ + if (i == 1 && ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* Second return: output time > stop time */ + if (i == 2) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { break; } + } + + /* Third return: output time = stop time */ + if (i == 3) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + + /* Update stop time */ + tstop += dt_tstop; + flag = IDASetStopTime(ida_mem, tstop); + if (flag) { break; } + } + + /* Fourth return: output time < stop time but both output time and the stop + time are overtaken in the same step */ + if (i == 4) + { + if (ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + } + + /* Fifth return: output time > stop time after step where both output time + and the stop time were overtaken in the same step */ + if (i == 5) + { + if (ida_flag != IDA_TSTOP_RETURN) + { + printf("ERROR: Expected stop return!\n"); + flag = 1; + break; + } + } + + /* Sixth return: output time < stop time (not updated) */ + if (i == 6 && ida_flag != IDA_SUCCESS) + { + printf("ERROR: Expected output return!\n"); + flag = 1; + break; + } + + /* update output time */ + tout += dt_tout; + } + + /* -------- + * Clean up + * -------- */ + + IDAFree(&ida_mem); + N_VDestroy(y); + N_VDestroy(yp); + SUNMatDestroy(A); + SUNLinSolFree(LS); + SUNContext_Free(&sunctx); + + if (!flag) + { + printf("SUCCESS\n"); + } + + return flag; +} + +/*---- end of file ----*/ From df0b21109ace56aac92f8200e53d5888b049ae2c Mon Sep 17 00:00:00 2001 From: Cody Balos Date: Fri, 20 Oct 2023 14:03:50 -0700 Subject: [PATCH 10/35] Bugfix: docs missing cvodequadsstolerances (#352) Fix CVodeQuadSStolerances docs, add CVodeQuadSVtolerances docs (Fixes #340) --- doc/cvodes/guide/source/Usage/ADJ.rst | 2 +- doc/cvodes/guide/source/Usage/SIM.rst | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/cvodes/guide/source/Usage/ADJ.rst b/doc/cvodes/guide/source/Usage/ADJ.rst index db4d6b9655..80e567f024 100644 --- a/doc/cvodes/guide/source/Usage/ADJ.rst +++ b/doc/cvodes/guide/source/Usage/ADJ.rst @@ -24,7 +24,7 @@ several supporting user-callable functions. For this reason, in the following sections we refer to the *backward problem* and not to the *adjoint problem* when discussing details relevant to the ODEs that are integrated backward in time. The backward problem can be the adjoint problem :eq:`CVODES_adj_eqns` or -:eq:`CVODES_adj_eqns`, and can be augmented with some quadrature differential +:eq:`CVODES_adj1_eqns`, and can be augmented with some quadrature differential equations. CVODES uses various constants for both input and output. These are defined as diff --git a/doc/cvodes/guide/source/Usage/SIM.rst b/doc/cvodes/guide/source/Usage/SIM.rst index dbc6506dd1..539f521ea7 100644 --- a/doc/cvodes/guide/source/Usage/SIM.rst +++ b/doc/cvodes/guide/source/Usage/SIM.rst @@ -4122,13 +4122,13 @@ If the quadrature variables are part of the step size control mechanism, one of the following functions must be called to specify the integration tolerances for quadrature variables. -.. c:function:: int CVodeQuadSVtolerances(void * cvode_mem, realtype reltolQ, N_Vector abstolQ) +.. c:function:: int CVodeQuadSStolerances(void *cvode_mem, realtype reltolQ, realtype abstolQ) - The function ``CVodeQuadSStolerances`` specifies scalar relative and absolute tolerances. + The function ``CVodeQuadSStolerances`` specifies scalar relative and absolute tolerances. **Arguments:** * ``cvode_mem`` -- pointer to the CVODES memory block. - * ``reltolQ`` -- tolerances is the scalar relative error tolerance. + * ``reltolQ`` -- is the scalar relative error tolerance. * ``abstolQ`` -- is the scalar absolute error tolerance. **Return value:** @@ -4138,6 +4138,22 @@ integration tolerances for quadrature variables. * ``CV_ILL_INPUT`` -- One of the input tolerances was negative. +.. c:function:: int CVodeQuadSVtolerances(void * cvode_mem, realtype reltolQ, N_Vector abstolQ) + + The function ``CVodeQuadSVtolerances`` specifies scalar relative and vector absolute tolerances. + + **Arguments:** + * ``cvode_mem`` -- pointer to the CVODES memory block. + * ``reltolQ`` -- is the scalar relative error tolerance. + * ``abstolQ`` -- the vector of absolute error tolerances. + + **Return value:** + * ``CV_SUCCESS`` -- The optional value has been successfully set. + * ``CV_NO_QUAD`` -- Quadrature integration was not initialized. + * ``CV_MEM_NULL`` -- The ``cvode_mem`` pointer is ``NULL``. + * ``CV_ILL_INPUT`` -- One of the input tolerances was negative. + + .. _CVODES.Usage.purequad.optional_output: Optional outputs for quadrature integration From 6ae281ef1a6c366fc3ed91322c1626907a5507a0 Mon Sep 17 00:00:00 2001 From: Steven Roberts Date: Thu, 26 Oct 2023 07:18:42 -0700 Subject: [PATCH 11/35] Add Sofroniou-Spaletta-5-3-4 method only (#355) Only adding the `Sofroniou-Spaletta-5-3-4` method but not changing the defaults like #345 --------- Co-authored-by: Daniel R. Reynolds Co-authored-by: David Gardner --- CHANGELOG.md | 2 + doc/arkode/guide/source/Butcher.rst | 37 ++++++++++++++++++ doc/arkode/guide/source/Constants.rst | 2 + doc/arkode/guide/source/Introduction.rst | 2 + .../sofroniou_spaletta_erk_stab_region.png | Bin 0 -> 25026 bytes doc/shared/sundials.bib | 12 ++++++ include/arkode/arkode_butcher_erk.h | 3 +- src/arkode/arkode_butcher_erk.def | 34 ++++++++++++++++ 8 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 doc/shared/figs/arkode/sofroniou_spaletta_erk_stab_region.png diff --git a/CHANGELOG.md b/CHANGELOG.md index 4182e059f3..4b1c49a4a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices. Fixed missing soversions in some `SUNLinearSolver` and `SUNNonlinearSolver` CMake targets. +Added the fourth order ERK method `ARKODE_SOFRONIOU_SPALETTA_5_3_4`. + ## Changes to SUNDIALS in release 6.6.1 Updated the Tpetra NVector interface to support Trilinos 14. diff --git a/doc/arkode/guide/source/Butcher.rst b/doc/arkode/guide/source/Butcher.rst index ecafdab263..c9166aa55a 100644 --- a/doc/arkode/guide/source/Butcher.rst +++ b/doc/arkode/guide/source/Butcher.rst @@ -331,6 +331,43 @@ This is the default 3th order slow and fast MRIStep method (from Linear stability region for the Knoth-Wolke method +.. _Butcher.Sofroniou_Spaletta: + +Sofroniou-Spaletta-5-3-4 +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. index:: Sofroniou-Spaletta-5-3-4 ERK method + +Accessible via the constant ``ARKODE_SOFRONIOU_SPALETTA_5_3_4`` to +:c:func:`ARKStepSetTableNum`, :c:func:`ERKStepSetTableNum` +or :c:func:`ARKodeButcherTable_LoadERK`. +Accessible via the string ``"ARKODE_SOFRONIOU_SPALETTA_5_3_4"`` to +:c:func:`ARKStepSetTableName`, :c:func:`ERKStepSetTableName` or +:c:func:`ARKodeButcherTable_LoadERKByName`. +(from :cite:p:`Sof:04`). + +.. math:: + + \renewcommand{\arraystretch}{1.5} + \begin{array}{r|ccccc} + 0 & 0 & 0 & 0 & 0 & 0 \\ + \frac{2}{5} & \frac{2}{5} & 0 & 0 & 0 & 0 \\ + \frac{3}{5} & -\frac{3}{20} & \frac{3}{4} & 0 & 0 & 0 \\ + 1 & \frac{19}{44} & -\frac{15}{44} & \frac{10}{11} & 0 & 0 \\ + 1 & \frac{11}{72} & \frac{25}{72} & \frac{25}{72} & \frac{11}{72} & 0 \\ + 4 & \frac{11}{72} & \frac{25}{72} & \frac{25}{72} & \frac{11}{72} & 0 \\ + 3 & \frac{1251515}{8970912} & \frac{3710105}{8970912} & \frac{2519695}{8970912} & \frac{61105}{8970912} & \frac{119041}{747576} \\ + \end{array} + +.. figure:: /figs/arkode/sofroniou_spaletta_erk_stab_region.png + :scale: 50 % + :align: center + + Linear stability region for the Sofroniou-Spaletta method. The method's + region is outlined in blue; the embedding's region is in red. + + + .. _Butcher.Zonneveld: diff --git a/doc/arkode/guide/source/Constants.rst b/doc/arkode/guide/source/Constants.rst index 9a3e6600ac..95b6a43125 100644 --- a/doc/arkode/guide/source/Constants.rst +++ b/doc/arkode/guide/source/Constants.rst @@ -80,6 +80,8 @@ contains the ARKODE output constants. +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ARK324L2SA_ERK_4_2_3` | Use the ARK-4-2-3 ERK method. | +-----------------------------------------------+------------------------------------------------------------+ + | :index:`ARKODE_SOFRONIOU_SPALETTA_5_3_4` | Use the Sofroniou-Spaletta-5-3-4 ERK method. | + +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ZONNEVELD_5_3_4` | Use the Zonneveld-5-3-4 ERK method. | +-----------------------------------------------+------------------------------------------------------------+ | :index:`ARKODE_ARK436L2SA_ERK_6_3_4` | Use the ARK-6-3-4 ERK method. | diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index 3b5d5ac711..4fafa4d0bb 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -153,6 +153,8 @@ Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices. Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver`` CMake targets. +Added the fourth order ERK method ``ARKODE_SOFRONIOU_SPALETTA_5_3_4``. + Changes in v5.6.1 ----------------- diff --git a/doc/shared/figs/arkode/sofroniou_spaletta_erk_stab_region.png b/doc/shared/figs/arkode/sofroniou_spaletta_erk_stab_region.png new file mode 100644 index 0000000000000000000000000000000000000000..78bd107329d9d7efff80a6e3c3b3b301617e2389 GIT binary patch literal 25026 zcmc$`by$^Mw=cXvR8SBEl~OuH8c8Wx)B+>~36T^K=>`b_5fr2wNhtwoq>&V)LApb_ zq^08<%V)oPzvrC2zw7*Wysqbgb+0?-ocEk#{9;V}loX`#u8>_pAP{&mC<$c*;({3h zff<602}gqdIAp;O9Bb55I|PE55d9y9aZ)dwg}7`lBQJS*5r_B!>$Qd?NjEq|XD_K? zuVVGW-bvrq2q7=_;CKg5rE-qFTBWqSJ4zBxDTpSO$c^+_b)4s8-Lm;RSG7@4>Ud69XxVqgKs6XHO zT0tQ08}s24MsEu(mPDg3>Pg;*h6|XI?b(_fG|Ep1oC{Je<|<{ElGS^)HEGIA63AXJ zo@Wxd+jd7Xz*Ee?(<>~KYEwKU#Cz@RjO63BD#5S!tL_VybssnR53R2mE_QTujcgvndy*gL!Tdra4^lV>kVIwPD>F;A;vd#NY2 zcTo$wr`F>2hV36PnMSF?*xr<%Rdh?eZ&EzBv9K#k=!h_e*wOn6ZfaNjY~g0ga$w)#Fqt zKbFe8JT_WdahLeeP@Cc0AQae1rUW;W5|%-OX)wb|?9 z)%D8NJWBTiF#(%_FRHJP_vAH6aqpOI*)*6+%@2R4#u1bh{aUKram0#q^V8bkGb>@o zwU{5c+EtDT4;;SioiH&mJ$?H0_gEPvmtl)hwD#vhU)}Sgq1Q*dk82({tn}wTD{E7v z^f=jSaVkjIqbS_kv@x)t3?>K=w4Q#Lp^(5`x>EP%LgWtqod-|cO-720aM-j(^t=g3 zNl9(Gt?Vn8nV)ACQ*c<0^E(gx2qZUlvU#4JnF*s2c=9S=v*ezM5r<1oQ$0?0){l?x z#R(lhP`|K9kNNYA@ZQ6xUte@`ms-zB1qqxN=zljgHSIr}8&KC(t+DSG-oJZZT3p=U z8yOh)Pph;oC@H(1H_vtnZj2Wiyq`$_$xKggG+p8w?m-)K zzT1o+V;bddf}oRvCTW;;j(|ST0RN7jz4}G$)-Zax?VX(@_oL+)yGe&3ZT6ajl#|_( zAsuoNxAI8Ix6cI9Wo<+AYtN1-&kx@3@-s7={Rkixe}BzlwAe&jdt$W2%xQhR!hTuV zRgQ##mGyVh`ALSu&iCxL_V)Q^r%7kyZN+7VMUAhod3g9Y_3X7TE@3n^HBD4HT#buK zO3LRwhd1o9yF~4MaI({F+!gOU*K*zDXuQJCfT3l~s`eDdATo(ft9%n)XJG#DrVwG8 z{8#jwf1Or3Na%R&Yl(lc-C~z< z(rd@Q&+?)to8pfj_q^Vp$XN5XU+HH$jF50~Ia(X}S~A7Da>A)@xG`C6Hk22c7(~W% zFNe5)d%k`C<9j@E$M1N2FQdiIM_WtHM~d|7yICvvEHB6cgJbG=d(Qd zym^wx+2QVTUwX5EfI!=k{n^Q3Xf{Cw4=kTQ8KS2qzeh`;xyysOW0{XrFX569sKsp5 zo}Y~vCuvu^{8>!$m@F|HBDeaswO+L`s3Cny>DjW48e4peFT($7bNZQ8^`FDVB*q%r zH2Ffyi3iSYu$Iyk-5nkG0!Z)UTI|Fat;37#e@^mXkmj8C#mMu#&u3xH)l9{r3Y_>l z{vvy1Gs8@#agS#|DHO=!eub6S+pBECoZv9{)W^qXqRNR<*roVHgmbm*j-Vr5GJ|?w znbp*oWtIH-3B2j!CU$pf;jZ>}avsx&-Gkvm0|y5OvI#z`s^6tUIf$JhxHHV+S{rT|W0~r|^_Y1YXUfz;9$)+?*1P`a*h&Ua+1<%a2Yc6O@PaJ%fjJfyEsSJH5vbld6d=;$a& z7lI(zwm(wkWM#PXaH{6@yo;8^Za~jD1~zjy%c$+Fh|z{qotv7H+rR= zy*6TM6aD|iwaMzLy#=01$7H3KKR=0)84)2L#qr7~3WQ>1sU^alFI>Kc+Tr8o&cx#i zk-m5(5-V%*0)bj$e0+RMXV^iIFKI5Wg`KcbP}|SKTg{90N?uQxooV7|7Mf*f$)6q- z4iPjf_|1L_s*>#|If;&q)zr|?P*VC7fA}3=6!&8SmBX!Paa@yur)QW<95{ zBs`T?b1SKi4xyUufiROv84K+TrIn6~jBK@Y-|dlRR?AW1JrUY&p=DMGubp62h%a~E zc`;9OjZ$PBX2N0qLT4OfZJN+MZ-UM^KHU;iRdI1~NlD55^@;gq$WtHUMc1*ij3xh` z_T?m5O<9>n(c^GFx}+?(r$oiEQdVC6^>TVN%xTCH5u%RpiXqaaA4ck?A08gINj5(U zLHfe{TTFCa|Jp6W%*v{I7O!1yoy>g>Gd;3B;l8ngg@pyI!QtT1f(Mf4m#K1ukiID? zDZRbDK39p)j)rQZRt6iL8iVc#s^w_(97`zoLYxnqte$6estr#FX9&Z+?)Og#~vxb1EmZwWR6x zvpa)QneiSC-Z<@mF(5RCD~e>r*SzdWiSb7%Dh?ey&S0=5uH73o8!NTAY9`;&))vcW zH3_krK|VGu{%&osUlmKy6HffHka}9?n$bx)RLE9lUfv)@hJZ;g-TcXx_J!v?DU#c| zH;IYc`@RQVF-kw5kJi2CaS|CF{r-_`h>KcUO^r@om?kt&aicfgpPTdk{ja5^YOc9~ zc?m@*{W$GI;r*r5U|Av}3!CZ%V=HtwAV+}rr_BiHrGB`l3_NT(KD+Hx2|4dfL^xYbk?TZzVRaEdCJUQQjL zJjNG2v6jTD$ojFQ!0Dot-J?SP#UdRY5)akj|G6i<&}W)OzAaM`)26}67C*WcRIX;p z75`vUk6u{H1-t^n4UUCwf09;KOy}(@@{y48iz3(GQ@V$Ij;m@9ps=KVD;?!mQY~kr z{{5Afb^S#&x{+hA?uY1^+4+Z_o?b4BrRV(J`)hZOPB%QJQl*0dxLj6H*^})lFV3<3 zq_sMjJ0C&Sb=xo&%f!UQ7JmkoPQF%!GIpVHH^pI5aoL1pT#wo253MRl_R@(dC%Nq2 zN`n_)XuBECwS;aP%tyZj{IjbEd30N1oB43T|M-_pK}xdLanh_` z$x@qR)05t}gfW(vL(JoW+2pZbHH8;I6`U+nfZ4BNM6XQW7V&KP&ya4ds{T-(_O={@ z&c_y|!VWUzjk+b#ok_8zFm6&x$)0(-{N%r+XquZ_ndEcL(=#ZFJjn2 zuq-AkD?9rmaDKU7Psv4ZnXS*Rob*A_O!n_;@4{glSs9rMNHrUfZ&WfA02AiruXF%*K$ZllX_#Sm4r~w&u0pH``t-V7vC0N14p#3M|*!+S()JLqkK8Z_AHBI%{!`NL7kdH8k8ZwDCan_Vuw> z%!OsLf2^WypeyR+Y0LTXv+%{lBDHBN(bM?E;P6;O_qEgyRha#DN{q_%_n6AUgk<@` zo<3{Mc`ohp%ZC^@u!-&7y|(UdQS0ftm4WQP8NqI$gNd2>S(#!Wdx^V8bHPOC(h6(*C)?IFL?%am?PdG=SXT=@}nXY7k=HqZ_TaP1qPrOuWa!Z%ic z7s$vslH0+I0&K*PQ1TQos;X+_xJsHqbC8sk{b%Ff(D&8V)pREPMV6J7mAU4}tDOZ? zBcI(u)6&yho3NQZ3^N|cj8h^4PtDeYu)|w;73c0}Eo#W6nUAJKjaN3A*mX5ezPy;Z z_(>s6_DNeyO9)>VfZ6Q5k^8Ibi!W{p+Q6eC2^V8|%<`(%A1p`DL6}W?HG?xBNDwI% zOd)JOT(C9UET_nP?;asesOfVGY?(=;P~1CDZTj~p#J#x9A&v9!@BoJhcFO|1Xe*yw zD=FsOG<|+Lbsovi=Kvr)#^af;ZY@9+fA$&;7c$&Sk;Ha5l(TH}h+IpjU|1%74v_k}P&UK@j*pXpW8G8Nr>ZkLT z52>a5Bu8RoYEPezK*TvaJ!aP^cvv%7OE3c~tF0=7S@9Lq``40=(@JMp(acAcm6b8k zXHn?%w>=A(<_;PjFpCqf4~n}=(h0llhUAY@O^s3U@C^KXO7gU$L`4VR@o0CsIhZn< zVq>h#YHOxxTW%YN$0#Ekuy~91g(R1ithTl`epwQ!5r@xdX%c7N@RGzHq~2hv$M+<& zbMg-Bv7jj94=PEE?2Kw^s@5VB-LqMX;NZkJ{ThT_F_b_+W6C_8k>u_I9c@!I7_^ES zoFjw5*{khYIvfKK+3Iun>pkV!Zic8yhui*y6LbEY5&(Xf=spzgO_g~N~KEN4FZ9@!|zO|1l1 zR|qyOi6x*SGsk78Uhiw~EOgFC=Y895Bi4DP50URGFSp%OEC;?sZhsj~Rzf)ssXtXv2-@y_Zo>$`b9daXTe8OE#fybV zbsZ*A)j=j0iFdskG=el~e<9>oLc=He&9Oa2Ci$Ne!{SPqaOqq4)%t!mPmU)%-$h3b z+%ehJq5Prsj?@5J&Q(4;Gg}J=h89S&MxR6TuV-&D>Whl(Y;PBfr#;2tGF)NvPu;sl zDpV9O_%}x6l$JXFYVau=xAou72+Oe;D+M%dKBxP8xNtV=nI+D#R@?i|Z_Yar>^xSJ zCsxq)BBS;>Cb9t|dHK#5PJ;+M1Q2XhouBQESfqB6hQT67nJ1>h!W_>nb;QjWN2*=- z**y;13v}H_Mn;Gj9`U;!SkEa#PK&6$lMipLtxZgE}1SZeDa6gu{&tzANUkYYP1;~trG+x&uu?49I$X$|As@$k@ojxW19J68)^eFq`>jLiVF;gzuV(xtCgm4L z;Y*OSP!nGO%Q*lnHZ}hNQdZ6R*)afI2-E611`RCrde*>}VBMGj`z|7m{c+;whUkx} zqNO1nUGVdr)sWL1F@LJ%*3PqUkZ+gKKQPeR$^+6A+3B5~PrUoS*q7d-C6Z`eccz;q z5ZG_w5fm;x+)GaT%@v3VloMWjqL`OAeZKHawIu zCLo)ju`UQ}M}Hv0eSe2XTS7-Eh>6f+8l}?zjOM>1%Zng_oX>?TY8JoLHwZoPyMwX# z36F4-4t+*(G52yd4I`s5C>?;NXiJ8@;A_aKr2N*Oc87Pzb`S+uEkZzk0!R!gM6YxF+cZ(rFK;2Bzu%a38dYDj_Y3%9Qz91wGKn{Pd}Aou*FS z&DF;`6@5)GEzv_pcdMo5r{?fIi8%U(o}6^3fBO)S*0T!4Lg3ZrCxV%y)%>z57WmeY zMr@@B^7eIFss5p%t3P%z5NwMCIi3>nz;CeD3r?JU?n?0@%7$F{oBj%(24aT zXemGDDSYwU$|LUMM**X@8-K68q3dz_2c#PR4>$pCC2x6K=Pm0fOFQC}LoTs$j2b zxoqbeqz;I41Sx50bfTi7%*@%&I}1o(K=J!i9v)2}Zi2K5FQv1!^*R#rn>!@uZA!So z9;_2$BtYlS1qB>{#{>nBPxq&yby!(hL3+7?gc$WAiGke`kj0$`_AMP9=9Ax?U0f;& zW`aFVQj4BIMuRM)C@;^-!V(dMrdenPB?O5t{DaHylA%)5fiKXKVBx(Og6AOg4qBBi z1-NSpoiF!3^m>8@c?}H>ke=lf0~uOSYPW772oe^vb(wmNjEsQdpj1STmQsNh0E1qb zpU>5)){N!1TX;m>2%#%21I7VlK0prh^YgtCAfJ^%M*u8I2UA3T&e1H9#ohu;AgMhL z9R!+>O#TVu*9VP+aRK>8ZYLr-I^*-_g~dg6H8oik9hYT#YKg~>Lr+8m9oIl^MkgA$ zf>z#nyOj~-frNOfLtAwku`fph`R)UNCJw{p$>|W~kNdAY#V$MZr<1I#d3}73@ftbr zBh#2C3LPyiOA}RPy9dedDF~6~S9zJp%H>K(&pmALzaR;+D<_TzoyAg;qe()&`_ z)bG_>SCGARvorYHGp|=)mf2iB5aNj#G08x^n}=rMF_#-u-=#u|vbcY#}BTIMbY8 z36k^*SpR687)pMJ9<+sU26;jxDBa@NHo)yhRk#ocLJ&#I;5qq_JJsS9S4T@$fzbp} zin8fmLj7gt)QvntTC~Eyd(+!L%f~4sJcx|iOS%g<<5u6*3;sXxxhesA;S2S7obP*p zB&ztmix`2Jrh!>w2mFLcQRw<%Sii?Uyz)`GvXgOnC`8O zWMySRpo#jtyD?Q;Q{xUcz_t(o)~DZI74{g_vlWYBBBlkM08oO-@L4fQM^%-_?O+p{ zGuYn`bjG;e>msCT3JyI_f)oh3Xf+I!U`3B36(;#f$1w|+NjK1l0O{y{e)N$L_Y!Cp zIxw4+m6dgL&On@jY^oe<451A&^6Sm|%Wv!TK!^vRat>een)J@W6dTOtc_abZp)MGd zEKY!?u*_IBi&3#*v9Y6|cKEYZqFiFb!nXHo&tJE}>IBB0n|rg8XzrCE9}s zm8J;&3{z70>|hoon*t$4cqcI7s=@471OOKD`35oZl=XK!TB!gIvy1v!4^gwD2w!zE zF)?(eO-CSCg}glrjWt^j}IxCU%teOdYnPTe&n%;9mk;*?*Rt@Os(+(K2h7bmXpIB zNMc1_zrM$(gnY{~Ke971Tw~5Wwg^0iy}RIf(-mq7h!5`W?rLgkkQ{snKp;jNFQD6VadE}+ zTEvEj)1nv2DHtyi9)AG3LF!n6?1bjB|Cw--)4SvN6g9^|ia;wPpgD-{4I3bRjoPD@ zlZ-)BMfyS`nc8{5iUisN*^Ad|Qpj`!Rt;OVMXmek_V#uPn`Y}$MiYJ#8xI{y?%{@aYF-< zTYn?|Vz1NMFo^o25N^Rok&6A*%9!*>US9PFzwnNPdE9VIu+tJs>wk+iOhM)C;~bfgRC#dRv{tK9YQVsyoPmGj}8w}=K*HBoL(&Wh7dgsjg6&io~Wy5 zxRLQnNSRhbDpXSFXl)%Svr4j+UF=HuK3NJ~g+%or8m&MASSg5xITWGzr-Nt!@>z!4 zz1+|>h;4Xs=}6zJY~@r7W5~a2oGl6@zMQ9D(J^J4-o#VR%l+Wj`vNAAO36x7jF-Vf z2eJ5y>vDgVN?V4@f}eP*R3O-Nl)RiQsZ+=Elp^1tK>#H|8hu)zS3lPp zei@e}WcqkT*U>S9iBAtVJ+Yhk?CAh|pc^1>H&%3~-R@I7o2!)+b!g&rX4_ zzk~#F?KVFZJF;8IrW|H3F^dKVEN-yZXF&Ow1Q-mgY8VWpvMG1LWL8XSEHvjSdsg_o z310t;&RBZg8b%(T62KG?F(1!Zi2(h806%Rkdi)ereu5N`2w){CUu)~parxiUkq#>x zTg|U`-f`0;Sp38vM zP&xB~8N4^R5vhYaVVZmK3=|nz-D6`DaaHx#;P+zpUl4VyGiZQb!_VaQHj5_lHktP_5X&TRz9TCbGkmQ^f$}iIi*VeZ&GzC zC0#$d1n8mt?ptp%bm~L|{K|XxWo31h&vLvRI3BD{P_o9y$3eUMmYLbIZVS~z!vR=X zUotb74z_0Xi3A;1xFI}3*y)a^^*x^fWSFMLqSs=FCFTFSG}H*Q93~iFtEP|+iiqVnaPxCY|4)D0(LJP z9RB!Tr)B%LiL8>tLQq7$GE8=ga$Z`w#6#CY0&)k0VzT8e@L_}vegwR^aC!6@9l{#R zUo(R+n8oHsKs*+SgjYZ^5JHHn^?C&vb_$IuA???Q?H}c@b|kgfl-vP6PHzk#;49Rz z+$PWAl=2UWZepJb;_7c4%${Ajh$$+*x8=BoKw(jl zE#P0IFBEO`e741~=J+j9H&m0G)3KRxzSf~8V;1G`!#3yDj;~PE77ydpAAjer>Y_89!Ix`e-{Gh;sbcAN8Zf zHpU-|bOIK5joYDZl^jN$3!P-NwJxHFNBYZ6>DJeAHx!UwSkvU*aMW`4`R&_Lw2?HI zqnXQ2mc9D^wj1MBXa<)ryZ3L-{NLFHY+*rK!jaiGeQs+R4*Ox@)Xii2ug`6@WHQ{a zec@_ISJJL4RfFY0BZg^xb34WCkef@&cGKAcuTAYqFCBzP&%U0qsghv_%?Q7a`Z4+< z%Czl|W6wD{0OC*6d0&f8P43}X{#I(e^rM$TipcrmqhCM&cpk#?HkUNC7gaV$ncEp{ z$c(=v#a_dEQ$;emEZUCgPVsd|GMv4jiAWxQU5F z_;B2pM5ob2`m&9(K^jv_s1A&l9#&S5|8RDfk<&uaOwF)kK2e957Z$$L!@DysTSH#DIkwQ*V&*Ggur15j3bRo3F|9DBFnEEz#we?N=v#Ot| z+4HaIP<8t+!?xM>axc^Gv-zs+z71#_;d7F>0bk_97u=5d(ezsZjZMr0JaUhisCtD0+;rbniQ!!YE3l4+|NF7vJT&v2g~ROKhaA+#1~ z@X6?z!LI&BP9t25OoUMw_nThQUVGNq>P288C)(o6WlhC@9nK`zOc(kvjZGADhsGi} zIKJJHg^+HV5KEe!hNr=`rsn9pv%Rq>w9u65`?g*v2gftMtX>_gRHOQk5BltAMq2wLlIu7?L8&e z6{OF+MqpHOTY~Z=O})+NI11jmhbV!%XpVHWr&gvNzc;zn(I}k>eV1t-aMtqa#}2l( z(?#Yj%s!+;QTeq5Sw1xMHm(qmsQlWmBjtq^=C!t@Cn0slP{eJ5)X&40C%KE=H*K)_qkkGiUu}#Uzh=3OUZKsJa4H_n8xO;K7vC}45j0K zHe!>c=8Mz)lXYUe?FMBeh-meKG^l&`4we!>D^I}_UQmf>2DD(ubki>u2@EIrVP&O% z`{o8YA4<+{Ad9W0YK*%RK|d(1QnDr~O1;?P|1gaI_)6$CHZoMmJrO8_@*`qYQ&)d4 zPbpwK2RO)riSYLOwU=R;fmG(|RL=e3WPhJes;#Oz0Ac+dE~)K=!_fTV;#F#1^WjY( zKhaT9FN}-;V7+!ECMbPg{^;&`Mmq8qp7Zrh z>_}=J$3Jh=7;Hoc(=gCHLsrrC2yte}8QpbP!7 z)!BtTkUI9l?}rR;P5XRC`f4lR07RGsXW~0RD0D4Lpl_yK zhHeD8IUH>MYGu8a^J{I?|7$NosAJ?N$Mdg?mXe8!i_dba-tCyOYm#E{`$jGr$E#yB zw?aNI7Hf#UZMP8nP70y$LR%E|MX-!ktigEgY+lX~YntYBvvk#}UJNXH(ZZ}1<?zfB2ctq^iRa?azA{jQp4n-7@bBLsnzw%G&44 zx{jEUT1_p3uLb;6HQ@q|t0kH5;UnwT4`~5@|4dfLi!xop-mpB)zVF+Eu`ZBK#24t;QZ?xQk*(4#p$rGgjEM~5o!`{;19 z{yHM((^pEmRAV<^ES$2(_Qlk2(R>;~mQHu?X-^U=#5(ffL+tAW)M7rMHi%iQiNlrB zR0ME9^&+B`5E}~_>%cZa`Pv@~hmU_^7bOfwZ`>DYD1%G$D31r@MGWL!un-x^rhBY} zbn#1~;)Y9`*oYvB3u)cy!`g^x+&6U|@BUn;#HiE3=!{4GSu=)r%G6a(Awflj`9uMs zcfaJC1nwsc970}yTY2d`)=LP_uc1`>aZ6&M2iS-%sS9XWP^u*F_e2gcO&2=!))0Y5 zM~9+Q4Om0DVIdg(M)AYGka=PGrS&`z>{QeQdg_CP(=JKMOmxU4YJyR>bx@5@ibHto zL+G80cc2|TG{<7f#^NxF^!7&5xzQMiJ(VBQWK_u#Ufze#-;2-CBFYH{cn#@LF*530 za~(Vg1e>&%x4`a;=HdGoh;PevxVosYO5mY?^Mh3ec8GXg<@ z^pZHiZIpJe1c8*A4%PTwqmOp$I%1q@+a<)4u4x^!?zL|)Vw%X-Esaf@WR%JP{6~ zaA7Ja9zc-62-~9-!SuwxSRQui$}#)shI#fy%_%?d6a?%VfoBEHez zxZ?NF^UIx3U>D@gT2ZZkjUX(=hKm!Ki%|p+76pwo@`Q6=}T#SW> zC;pgkOn%3U%9e0UwQq0ov~NOoX4$$I5?RS{5xv-=`6NyZUS54*EV+7{y{ol6NIm zVbnP;TZBwH!OVv_gP;q*TD~gE`|xiQNKqja`v(7f5%V5v`8Bl|x{oht#9Xvc);7acQZ3H3yeoH*iT#NfhqgVR7r#<=~4) zFN&||mvrk#bq_i)KOWT$`7w-A{ld9D2QQFmJetbv??0lC^=}UXVT8k4UgK<+LY+L6 zr33DOO0m=~>ppotZO0KK8MerTw4EVMBG3*x=4!B*4uQi9+AkHj?psz}ox#117gCIZ zS+rWtBX*6HO9Gn{c0d5&8d@(J{;(&7K!n&^%7@gRpKegPEKyT80tf)JLUc^~_OqTz z^t0P@ag?z+q5c94+sD{j9rwE*B?^tK&kVo01R9>+WnxAYwFH!lspV-gdBZ^Qr5z6* zT*CvUglc&_fy##fii3W2B?v-o%NnTtDJe817^Bcw4*yl6Gy)nP9tW5eusZ;~Z{cV1 zcaU1GoczMP%pO9c<6cpV$$~XZhxbzi%AF>RK>XO`+NW>%_U)TmuBPPcWsutvIrNcX z?H9n7;(J42jAYe2%pDw1)VH%GN9d8&JmG$N=CGl+ZuVEzsqS54^XvmiSq*v#%#y`; zI|VQs^2=wgptT7|BgDmGP@)YR36?=DkUHN474Po7d$NW-V4+omR+i|t-3k`@3249+ zFjc{xP=T4=;tH)TPq<2$Mlh3GHhGVQQ}YA9ZS^6s>Z4921ysO_Jhn*+Q2slX|ev5K3REPLY+q2=4HMUcEai8s?zTDx(2 zPh}NfVQ--Fr~CAzHyCCJU##!VsoPeyRgJ!n*dtc75- zYevP+9fO4GO(1%@toZ%2+yrIt9l;mkAK}~hH&~?FXmNCA?Q3_?ktqjB|Je?DN7Hoi zLn63&$ zD)i@-wAskr5{?q>meIHs?A&2Mu`@gytmu)&pDt7?5_Iw+I`;iZYZFeMfEH@CMdED3PO4_0RWM8gFQ9zq5E@JPR07O7 z$zm+$&1o^}MoD7rh?Y2*`sK05goaRhc>!OD${StA0JR8GQc`n5OiX@z8{ZaU&A<%> z(q%o<=m%iCy>H5S@eZ|w7aw}kTl4JGqnJd{^P<5UXOI;=EBPSifgs8Qp%X1&$7(!% zdWiObL4t#?QXTv+!FBww62axt!PYl%i+BVq=Je;9((;*5yLr{?9;jhZvFXSSUpV0@WVccXVNb`>xN_Fnv~1=x)&Tf zh&UdF2n01*Ca}uEOyGjjG_(9_-j$Zs^P~K8S@(K4>!dCc2O<*}J^ef=s`ud(P6Meq zTt8j>-sS!(WUmY;?i!sCg24xYDv`|Ha!3Y`^Di&&JX3{buhI4z2mHxUq%TyI3;*d? z0l(Xk_%h^lr*Xcp1yI6~*fXh0eHa>P$^Y+ZJk`k?jKtfmz;NPHdqu4-aSS4?eNT@x9AN- z;kvx8)?4t55G}YE4s?UF>rHUHzd-Va~{(idiojF{K%l}a?k zGqjNwZGjs>NL@}6CBSCUD5$I4=8v#v6TICCvC9`05Ca z^<3gow*$~azyc7^Pvlx0`kD6dnCs}f(RpFvTx7e6K13yU6rrYMXpVgn>przo0w*1)Uf0O5SSNF0njGt$B2vrm8 zDTXLHp8-M|gbjAWw@z=c^`S-6sm9_g@6;>h)(nh7DMbH`*xv zzEhZ9QBd{a2qmL!Oxt}J7cLDyrV+djy|==A^YHEsQ1+2Z>9e@-9$?GbZK%GSNVg`a zs7m#9E8Hn*2QU=w^s>R%pr9she^=*b6g*Ti4G1q#S5#U(|K z(7O9@0PiMHR=OL#(fB5Bu@J;5fBT}3sT~+vEP?46_t*mnB#zyZX+VTACZha`MJTqo zm-olr1r)rkIy`hYfsVp1s$9(->tsrV_1gmXkSAjyK$LN^IF(<{p}JmP@XWTRAqVcI zfl?2egXvm+`6e$gWetUow5hc@M`VaP1^rVhlXu<$SblUpWQO7p5u|5a(E6u@a@GQ> zQWDcsPDxcfgi8Nv?rj-(oz@&=^wdCfdE0IuCjD$7wLn-0K-8KGxQ^!J;kI+6fx+Hz z+9jwBm5+se^6vI8EOC918A(bW7@przqQ_Z@rvbbu{&v#nkQ>}mn4Wfzg^z)ZE9@rS zmVw6~zZ@ryyM#&tQK!PaBQ#@-&My=ZV#o=U`u_1P970QiD}G9M=`gicE+gXKzCel6 zis8Lx@P&2#_wNEU2qTk2uv-Me?&2UyXWK+-Kt0L!Mk0MfuqA4v=n!>J9vtI8#y=)d z-+43EB3>eXzMMp>t`o0}fgtj+*D94h6IR4qV+PG5+-*j}G?Gph)AKcd^lx$Czh;tG z{fxB~!idHZAuQgg+GIRJOUD;-X|_dB1aA_Fg-FGG@y{V@@OG4|Kq6D$kUp$^{uxv4Qs-Y0~E;6kSD zTdRv7NZD?b-Td?JgO(Aorx_NSPC5_V+85dd{o)rR@9C8VLq3E$m%X~V_J*`vniI%d z_Z@Y0kh6wPbJ>V#PRF5o4v@lY3|D;I30Ztuwz$7oe$xU6#!~s z@`kM~uR)}Q8UqWbWW7s^3G#mCiH7{w5U%lgq5k)G=xtn3e9g?fT64Vq zwaSyj&!Fq2S)XNcKs*LwCZf?y4I0@VHfUl6C0anJde8+H@#PTJ0L~h}c;SAs)y%~7 zWeE8nthXkBBbXeS#*0ahu%qMatEk-j11_vZ$d~_7)6Uj#UX7ib&f$aR_Twx2MWLc; z#K?$nFTJgY;%1Pw3#(WPJhj4chA!vD7+2FSVt5wZjgBXZv^AzF#|D=Yh+$bxO%3R~ zAeVy#J{UqyTm8qM-&S==2B%ISjX;KVjs}EsC_dtb@*(O*0GxVGPxUw{9J}ST2>lf5-^abYN2}2^iUMSEuGXPP;1u%nK(V9O&)xBCFI`_Q)>9UWDzSDFDt1t7`H z#3bW_q0e{DBlRWuK0P9v^UDv`e`Sodd%!fOVioj0 zBOd~&Xn>lgtFC|0)r6n|{Ju+kyDoJo;1OA#<28O?m2fh3UweMSw0q8;dr&n-t9u97 zovr_cMSbr;8In&Ih666({uudLTaSO@mhF&4Bdr-AC0j>WsCj6=+arc**)IIvK zSz7cRGy-lf3ysd&4$Ezdh|Hr%a0pRXY}4MF>?a#K)vNGR;)aJ%y`iI#uEcmlm}n#< z@&F18yq8wF!ADyzUk}cHY5&P^s9t{NgWfg2V6DF9H(<=k!xvXW8GfnFvBv~r;S7Jc zy{wrgQ+{dSDhpJE+mY9ncS9msH=W8QzlZam4O zs6QZ&sk39uH}SMbJDCcNcpD!^Ia+HZ%@h5D3A=Mcq`_c~rj>OeW9|Cc9((Z8N)#;V zS&*0Om}ye!RP)t;-t!}25uE!S7;Mc2cMu3Qu*9wiKG=UHZG(M2k? zDf+#AFC%xkf)`kV^P-HQvs1J0I?*@0>|8jTy18cR3@N$JWV~Rc@1?z?u;A!Y7J4LX(MQxB!F>*}jP;`Ea)9&+B&ooKc=@-&|T?Z`+`_y3k zGy0uuCx`Z`E@7C(#91v$^GO4&z_c!n35*EcsUuy&$Zjh6<*$1&1O~5MwqHt?E1lfB zDTA`&o?zQ4mCQLnGpuRb$F-d__tQt?3uFvG;rfi<%$u2+W((F=*zgZ={Sc4_L$^tD zTBvOCf-bt4DD7N$IA`tM%e5&r{YJ1S>iGJf)8j>5OTB+yWk4*zFIedi-T<>o^Z&Pi zh$DE^i2;3;ag2Nai1}>68i}qB_kPXrCLthND9r^W>An0K{Miwx-hm=}p=TeksAK{d zgDY%-hb;yNu#KSP zZ$ZfAx@%;*nPk5tkMZ>>0_XCpJ-E-*@(nLax2t3Qd^2cbpb*awzGptD3GT47Gg{m6 z!drpPC!|ujNag@0ZB? zKk-TQqKqzfW>36w2)9c?n+z^8BQ_ukL#aPhZZ^<9g);?sBKwX66X8qUG@ZG z1-9|PP6w#UrmJRo_luZ!eaeXL`X>E+pLY+ zp%8BOv&sxSWftwqV)zpW?xUHHX#=sGPyY-BlW@*}Vtoj$IIFvWPnYhAm}Z<4t;73Q zZltIGaWm+FVq-ZW`1&V0^qp>YWft~zf}~_-hcnQTN1<^r|XNGSJ2gWzCr(#ae!>`@0-MWptK5R0MLcMyVuYa znE!c;{U%_Vp&cu4W9mM>T=&NVf`ve=eZF-|UdD< z_uzBc-o%a&fr=4C9VT0MP^EWn?EUVZ?_}twKAL~OMiOcJ+9YxJ0lGIYc4YNFoV$eR zoAj1tjaH#M5F^nhLvclBK?|R2MdLQU1wFci3ZZwYyGeQ{D`DY9Y6Xj2>_kHJ|F_&v*p6~Phe)jvm2jDT1yO6(iuMs`iEGHrbM|-m!yxHE4Z; zK0{s#BT%>ULMMiQ1U4;xDpt7e8EuS2gUYHJ($^)A^RH;&XV+~f|1U9;@BfAH+yXmaj6FQ{A`WqQ0b5P~i67{z5}%S{~|9J*as5)AL~JDah;`{Pmcfu2~=xS#3?tgP^R0EuT*e z@=5Czj2(ZEC?EWQo*}VJ&5eXJr>3T6WvvX5z*J6sb*n#HcR&Y7jhT&Q*V_61d<_D> zjlow3)KVmR=CCtCgvgK3NA7Sd7f-Z@phFY^%Z;raL{Cz>ao`seb4bz{oE}fP)Q9JF zbd!~id2b<6n1Xgz*?PC~@=T&$pt+B3si0h%ntKiWrLuj|O(--a-%(WKC8D@5R=zEH zZ}K8tN-<^NLsZKj3Un5f!+iN0k;hvK+P#}$W@fVYK2v$CsO$fmed*m-r_ePnOka-u zKQPYPB=~mZ55Sz(y^*n=4)#2MV?rHhbaZ7+6G*&#7@@%Yqsha=(E?I@b~v;Y6YZRgv zKhD$=%L`-*r(Ox1&iVBjYrNvLW4}2G2h0hsXm2FX?{~h7BC9M}BrlfOzv7p7Q}y^% zvhi<{tU%-s-Ki#MmBW8Ms$0zib<^lP>(Ap;uf_NYkPa$Q6&zETN z!jR*>1i1<#iTwWm-3z7`d*Pr7_6Zs-5Lx;NhNx+tEg}ezt8NYsK8Gt)um86gxY@a} zzDO(k5i0+u_*mN*%ZIhyvO02h*>R^iw5Iv1M73?x>ebT{D9s+MQDJIR2v2syXwlvlF$;ru@`dj6T4iOP?zCA#A98{)AN?zTg=0mYyk4}&b zRm^3aoSs9sDfamBqsNXtd+Ym3xzM#OobO^mM$gkLHj51SYn*}b=5P{B3*zG&;cui? zt`yd|?A+DSk;-ItzkGSEO`1xbA;)tSk*21m&dwJP%%J{ULZPT@+G~aA@txZUeLdTq zJG5nKE|cLUJ}sI< zE4fcTPfR!igiVqQ)!eMipo2kPM!i?)?%lfqD$n!VNX2jyMLG%49vmFRm{oK;@Yf{! zX1rIIO3ehR&NkZwqpYByAg;^3>guoRF7jIPJlq(h2Z{Awetv!;B4;7V5U74!*)L_h zeK@9}p#hWUCtY1rfUH9baCCeeqhw=q^LogXESAp?>#S%}L0_h;~Role-iZ>B4 zjH|rFWDed2Tpyxlf2ozz(?=&7P>i269lEf_$!8tpyFd^#&HL19H0RLw+{d1to;$&( zVIdWsx<`Abiwl$=PbKj2@d+EG_Y8jcP;#^yGuE%XF&3`uet(w<+u}FX-9Yob861?Q2+ltas&xw8 zr>R}Od7i+DwHm?b#^wEK7z!#gW&75xr}bi)yM&tQZEy5^k~3^g-fca4^r)kw_?_Uu zz{h{Z-_X)nzy3z_ToesL6?T5nk&%)6_U*&ogFyq$HxMXE&?soQ(W|v68GNDw&OVc~ zvorf>N5C%;e^D9Dt*fhRU0FNdp8hR&g3z$oRxJDVH3spwSS(NH+c>hc6sVy9uM&p4 z8<&&Lla!piN^d;O-y0HA+WJURejB+e z-uy?8ENSJpZe>q8j^~JqTWOyBU0Bjc_F(4;A-hP#n%y~}Bt0tN>8hTyp`oGWV<0;| zeE9Ihi4(70d4+^7>$jDfPm`39832tK3VS|2T+k8oTbM#urWf$m;$*q(+NB_7X>Put zb%%ME1u>H4$7iyz=)C!KLVuQ~ujtx!>k{b^u8twC5ubA=KDPdDN6$$%8Ro}5wW3kK zfFpQ?>DZOyZfaFExqSOTUuH&q)(P9E9`5C z)m|?5&$^*-IBBGjbd~?5Q|WF?GXn#I>FlQ^_LTX5I;e;x0_X|9f=VxMI0g{j1plK) zcQ=M6HJ2=CV+MS=R=Ch@PkV`ZD|6e5`Qt9e7v2*g1|^{Q?G2T6D>|otY<_Ty3C^N; z;gr^{!0T*wW&#?5y2qLujEfdN;BpJ;3Rg=@7Yhh5Q{Kh9EDnGuK$VEE!=81FhM=yr zbzI+N7r*D9%~FmX{P?kTy%qe*=xFh+TenKGXBu>&DX?zw!^cZgubj0mfRbKz6=!^Vz{NlK%m%#8Q`Gp1iVDa(Jn!sO2&{~r zt2sbuYG^$4VgX%}sDG+3iikO9zJ5_);CsBABxivSSKa^v!ko3fp<&BxnYtZ)%g@{i z2^>`&e%(3%T}j=9bLT{H)ZqfZeEoXs&K)NQhZ}3zI88YQkBqWWqfbsApk*+bbUNKO z{1BRs2ot?wz$F7~jRFcLbvF&J5ACHGBT~fAG%qa!&VW_T8bMA8uuk8CX$d7;0WB^2Y}Cr!VJBnWNn%+i1Og6 z$-Z2qtYFieJ3)mHE0*=^epP=OJ_)E4)qznicPW+14t}YH+!_FpjW1MPRaI3%qwnwUuR1%>*?AKX6I}h?7p%*d6*q3Ylbh>79&ej@gt5)1>Tbg0 z)T`m)Ygexp9aYxX2S{L8xZtT`288ym6ciKy=-J-By&xIZC?_X}b;1!>p=viS+b>Qf zte&Hz{Je{Ngu(xTX$B;QXgjo%Y_jz(KfZxv$&00KhUg0!p-}r<|w&5OM(ZkilrRnwm|zXGM8=faOCShk*y4(H?3fdO|{SHy_- zBO^)!Ip@zmZEsia3l&+Z;}H_l5Dj{-&mL`ty~M9>QtoNlgVH(P#|SwNRDT%CK#+*M^FNzH)v^zroi+|a)u!a0by&zPeX`9UcWwun2+R* zmI9~iPjYPQ>dFl;o=)V{)ou3VE>W_+sXU~*Q_Z!?VQBvvr&5J{eDu$ITM5FIyqmH# zNhzu3mX@0RL%bApIkwGSMo_|BDb5opy5$lQoDEPcg)YL#7+9IW!ox-SY&v}C5G4f| zu?OGFBUOFLNfLGvI9_O7tl6!8_~60wixvW+?_;d5FFJ;5I|w5uYn&dw3vKY7L&pMxsl-<&D1@)L{M zY!~*o1w`&X-0%Cps|$Mv1X!A_eSAQii+ltpP&_O!I9M?s{sj6=C)43j5{fG;ox3y) z4Xw>Szxi|e{fqwo95^#G1o>}YzBu&9WQuQGN-!v<3j~RxHF|n_ii#&+)_@O5f)HI@ zrQjMN)e-IeYIJlp1>Ge)w&}JY2n1z%N~HO|c3Jx6+qcHX# ziyn>F-hlYl_ zySq{R-UAc@eNASFMsJS=GTgpM9-?id1FEe#zu@3(CX=LWmXwq*nG!v_l6p^_jg5^3 zUMupAEG!wZ2FBB*##+n9$a{nDzHiJRX_67#pcaK3(y7z?vNTe)Cn;P)5 zc3PRu&dyd}%CnA7Mn;DGFzmDV z>3XnjE@Z2H`TW`CrvG+W?EU-qKermGQKv8;GvkhqhC_1Ub*%KJFsm?4-{@%Mcx>dA zddr)62w`4v1%iR|b`y6JG-R-lvaj(X3PZ^Vu2a>u>h~X$zC9|ba48D~1t%saQ@{F3 ztOBk@UVa#yaH03ViEX#AX#8U!nUn;7q{2a0SuABT-xMvoI;$UScOJq+Z|!644I4=A zzNZ8<5e}Ob?(~LdA0U4t;PBy-W#{GcvHsj#S-9Fu(X-B{H{Y9RpyCn{((2%2hun2SAss)T9(gUW@u?bBU#QHrtpr)_?yv4!-rvYm9 zCjo&yqqoJ(OnA{EXaMPd4xr+96UdW}M@H>lLq = 4; + B->p = 3; + B->A[1][0] = RCONST(2.0) / RCONST(5.0); + B->A[2][0] = RCONST(-3.0) / RCONST(20.0); + B->A[2][1] = RCONST(3.0) / RCONST(4.0); + B->A[3][0] = RCONST(19.0)/RCONST(44.0); + B->A[3][1] = RCONST(-15.0)/RCONST(44.0); + B->A[3][2] = RCONST(10.0)/RCONST(11.0); + B->A[4][0] = RCONST(11.0)/RCONST(72.0); + B->A[4][1] = RCONST(25.0)/RCONST(72.0); + B->A[4][2] = RCONST(25.0)/RCONST(72.0); + B->A[4][3] = RCONST(11.0)/RCONST(72.0); + + B->b[0] = RCONST(11.0)/RCONST(72.0); + B->b[1] = RCONST(25.0)/RCONST(72.0); + B->b[2] = RCONST(25.0)/RCONST(72.0); + B->b[3] = RCONST(11.0)/RCONST(72.0); + + B->d[0] = RCONST(1251515.0)/RCONST(8970912.0); + B->d[1] = RCONST(3710105.0)/RCONST(8970912.0); + B->d[2] = RCONST(2519695.0)/RCONST(8970912.0); + B->d[3] = RCONST(61105.0)/RCONST(8970912.0); + B->d[4] = RCONST(119041.0)/RCONST(747576.0); + + B->c[1] = RCONST(2.0) / RCONST(5.0); + B->c[2] = RCONST(3.0) / RCONST(5.0); + B->c[3] = RCONST(1.0); + B->c[4] = RCONST(1.0); + return B; + }) + ARK_BUTCHER_TABLE(ARKODE_ZONNEVELD_5_3_4, { /* Zonneveld */ ARKodeButcherTable B = ARKodeButcherTable_Alloc(5, SUNTRUE); B->q = 4; From 747da2ef60accfa78a71077abe2786bc3a532b59 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" <19565938+yhmtsai@users.noreply.github.com> Date: Thu, 26 Oct 2023 23:05:37 +0200 Subject: [PATCH 12/35] Add Ginkgo dpcpp into example (#351) This PR adds the ginkgo dpcpp support in the examples: sunmatrix, sunlinsol, and cvode/cv_heat2D I currently only know `queue->wait_and_throw()` to synchronize which requires queue unlike `cudaDeviceSynchronize` or `hipDeviceSynchronize` In sunlinsol and cv_heat2D, some function signatures are also used in other files such that I can not pass an additional parameter. I use global variable to store ginkgo executor and then get the queue when SYCL needs sync or submit the kernel in those functions. --------- Signed-off-by: Yu-Hsiang M. Tsai Co-authored-by: Cody Balos --- .clang-format | 1 - cmake/macros/SundialsAddExamplesGinkgo.cmake | 2 + examples/cvode/ginkgo/CMakeLists.txt | 5 +- .../cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out | 77 +++ .../cvode/ginkgo/cv_heat2D_ginkgo.HIP.out | 77 +++ examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp | 339 ++++++++--- examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp | 224 +++++--- examples/sunlinsol/ginkgo/CMakeLists.txt | 5 +- .../ginkgo/test_sunlinsol_ginkgo.cpp | 543 ++++++++++++------ examples/sunmatrix/ginkgo/CMakeLists.txt | 5 +- .../ginkgo/test_sunmatrix_ginkgo.cpp | 349 ++++++----- 11 files changed, 1163 insertions(+), 464 deletions(-) create mode 100644 examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out create mode 100644 examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out diff --git a/.clang-format b/.clang-format index 35fa8341a5..c7692ae54e 100644 --- a/.clang-format +++ b/.clang-format @@ -140,7 +140,6 @@ SpacesInConditionalStatement : false SpacesInContainerLiterals : true SpacesInParentheses : false SpacesInSquareBrackets : false -SpaceBeforeSquareBrackets : false Standard : c++14 TabWidth: 2 UseCRLF : false diff --git a/cmake/macros/SundialsAddExamplesGinkgo.cmake b/cmake/macros/SundialsAddExamplesGinkgo.cmake index 1e56b34230..7fc5ef2d5f 100644 --- a/cmake/macros/SundialsAddExamplesGinkgo.cmake +++ b/cmake/macros/SundialsAddExamplesGinkgo.cmake @@ -63,6 +63,8 @@ macro(sundials_add_examples_ginkgo EXAMPLES_VAR) elseif(backend MATCHES "HIP") set_source_files_properties(${example} PROPERTIES LANGUAGE CXX) set(vector nvechip) + elseif(backend MATCHES "DPCPP") + set(vector nvecsycl) elseif(backend MATCHES "OMP") set(vector nvecopenmp) elseif(backend MATCHES "REF") diff --git a/examples/cvode/ginkgo/CMakeLists.txt b/examples/cvode/ginkgo/CMakeLists.txt index c993e377c4..6c876d691c 100644 --- a/examples/cvode/ginkgo/CMakeLists.txt +++ b/examples/cvode/ginkgo/CMakeLists.txt @@ -20,7 +20,7 @@ set(cpu_gpu_examples sundials_add_examples_ginkgo(cpu_gpu_examples TARGETS sundials_cvode - BACKENDS REF OMP CUDA HIP) + BACKENDS REF OMP CUDA HIP DPCPP) # Examples that only support CPU Ginkgo backends set(cpu_examples @@ -39,6 +39,9 @@ if(EXAMPLES_INSTALL) if(SUNDIALS_GINKGO_BACKENDS MATCHES "HIP") list(APPEND vectors nvechip) endif() + if(SUNDIALS_GINKGO_BACKENDS MATCHES "DPCPP") + list(APPEND vectors nvecsycl) + endif() if((SUNDIALS_GINKGO_BACKENDS MATCHES "OMP") OR (SUNDIALS_GINKGO_BACKENDS MATCHES "REF")) list(APPEND vectors nvecserial) diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out b/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out new file mode 100644 index 0000000000..a843e62f28 --- /dev/null +++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out @@ -0,0 +1,77 @@ + +2D Heat problem: + ---------------------------- + kx = 1 + ky = 1 + tf = 1 + xu = 1 + yu = 1 + nx = 64 + ny = 64 + dx = 0.015873 + dy = 0.015873 + ---------------------------- + rtol = 0.0001 + atol = 1e-08 + ---------------------------- + lin iters = 20 + eps lin = 0 + ---------------------------- + output = 0 + ---------------------------- + + t ||u||_rms max error + ----------------------------------------------------------------------- + 0.000000000000000e+00 1.273091462283009e+00 0.000000000000000e+00 + 5.000000000000000e-02 1.265953031236337e+00 5.779434661301597e-04 + 1.000000000000000e-01 1.245126467995815e+00 8.596410825743028e-04 + 1.500000000000000e-01 1.212971698816507e+00 1.027071183737238e-03 + 2.000000000000000e-01 1.173149707911348e+00 1.049506292939650e-03 + 2.500000000000000e-01 1.129970993609124e+00 7.767258516966358e-04 + 3.000000000000000e-01 1.088067923761304e+00 3.857233565973672e-04 + 3.500000000000000e-01 1.051569245238569e+00 2.296842605538085e-04 + 4.000000000000000e-01 1.023519508142414e+00 1.160865021105906e-04 + 4.500000000000000e-01 1.005965995289331e+00 3.382124480899584e-05 + 4.999999999999999e-01 9.999934385586851e-01 6.776957530241212e-05 + 5.499999999999999e-01 1.005920028619227e+00 1.074298825753939e-04 + 6.000000000000000e-01 1.023439646225216e+00 1.256532195708093e-04 + 6.500000000000000e-01 1.051474380012092e+00 4.493207354094864e-05 + 7.000000000000001e-01 1.087965937316374e+00 8.048432853913212e-05 + 7.500000000000001e-01 1.129792873621271e+00 2.390181284921411e-04 + 8.000000000000002e-01 1.172918427971992e+00 4.322892993839922e-04 + 8.500000000000002e-01 1.212840417005807e+00 6.887143222911174e-04 + 9.000000000000002e-01 1.245177171528506e+00 9.911108446238881e-04 + 9.500000000000003e-01 1.266240637720725e+00 1.309517693130591e-03 + 1.000000000000000e+00 1.273471195699113e+00 1.189685923645323e-03 + ----------------------------------------------------------------------- + +Final integrator statistics: +Current time = 1 +Steps = 41 +Error test fails = 0 +NLS step fails = 0 +Initial step size = 0.002110117778857815 +Last step size = 0.02437232616233551 +Current step size = 0.02437232616233551 +Last method order = 3 +Current method order = 3 +Stab. lim. order reductions = 0 +RHS fn evals = 52 +NLS iters = 49 +NLS fails = 0 +NLS iters per step = 1.195121951219512 +LS setups = 7 +Jac fn evals = 1 +LS RHS fn evals = 0 +Prec setup evals = 0 +Prec solves = 0 +LS iters = 873 +LS fails = 0 +Jac-times setups = 0 +Jac-times evals = 0 +LS iters per NLS iter = 17.81632653061224 +Jac evals per NLS iter = 0.02040816326530612 +Prec evals per NLS iter = 0 +Root fn evals = 0 + +Max error = 1.189685923645323e-03 diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out b/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out new file mode 100644 index 0000000000..4022dba817 --- /dev/null +++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out @@ -0,0 +1,77 @@ + +2D Heat problem: + ---------------------------- + kx = 1 + ky = 1 + tf = 1 + xu = 1 + yu = 1 + nx = 64 + ny = 64 + dx = 0.015873 + dy = 0.015873 + ---------------------------- + rtol = 0.0001 + atol = 1e-08 + ---------------------------- + lin iters = 20 + eps lin = 0 + ---------------------------- + output = 0 + ---------------------------- + + t ||u||_rms max error + ----------------------------------------------------------------------- + 0.000000000000000e+00 1.273091462283009e+00 0.000000000000000e+00 + 5.000000000000000e-02 1.265953031236678e+00 5.779434664550109e-04 + 1.000000000000000e-01 1.245126468025294e+00 8.596397006188639e-04 + 1.500000000000000e-01 1.212971692633635e+00 1.027175433592653e-03 + 2.000000000000000e-01 1.173149607363054e+00 1.048511182224932e-03 + 2.500000000000000e-01 1.129971118809724e+00 7.777031646749588e-04 + 3.000000000000000e-01 1.088068652479702e+00 3.867786296469777e-04 + 3.500000000000000e-01 1.051569453796381e+00 2.291655197173004e-04 + 4.000000000000000e-01 1.023519691519509e+00 1.152507676536185e-04 + 4.500000000000000e-01 1.005966466128100e+00 3.464712602863074e-05 + 4.999999999999999e-01 9.999941074735683e-01 6.549023902890916e-05 + 5.499999999999999e-01 1.005920139252285e+00 1.085862394125670e-04 + 6.000000000000000e-01 1.023440066617863e+00 1.253667245226797e-04 + 6.500000000000000e-01 1.051474489311814e+00 4.368064039983466e-05 + 7.000000000000001e-01 1.087966430721224e+00 8.251704806849780e-05 + 7.500000000000001e-01 1.129793211633907e+00 2.403035068856418e-04 + 8.000000000000002e-01 1.172918720617323e+00 4.322501964297842e-04 + 8.500000000000002e-01 1.212839862652567e+00 6.883283219258907e-04 + 9.000000000000002e-01 1.245175128903723e+00 9.857170108882318e-04 + 9.500000000000003e-01 1.266235911062742e+00 1.301833676780495e-03 + 1.000000000000000e+00 1.273469281873646e+00 1.183015268845011e-03 + ----------------------------------------------------------------------- + +Final integrator statistics: +Current time = 1 +Steps = 41 +Error test fails = 0 +NLS step fails = 0 +Initial step size = 0.002110117764420172 +Last step size = 0.02782878040979117 +Current step size = 0.02782878040979117 +Last method order = 3 +Current method order = 3 +Stab. lim. order reductions = 0 +RHS fn evals = 52 +NLS iters = 49 +NLS fails = 0 +NLS iters per step = 1.195121951219512 +LS setups = 7 +Jac fn evals = 1 +LS RHS fn evals = 0 +Prec setup evals = 0 +Prec solves = 0 +LS iters = 875 +LS fails = 0 +Jac-times setups = 0 +Jac-times evals = 0 +LS iters per NLS iter = 17.85714285714286 +Jac evals per NLS iter = 0.02040816326530612 +Prec evals per NLS iter = 0 +Root fn evals = 0 + +Max error = 1.183015268845011e-03 diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp b/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp index 2671c77934..e979d60459 100644 --- a/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp +++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.cpp @@ -38,8 +38,9 @@ * The spatial derivatives are computed using second-order centered differences, * with the data distributed over nx * ny points on a uniform spatial grid. The * problem is advanced in time with BDF methods using an inexact Newton method - * paired with the CG linear solver from Ginkgo. Several command line options are - * available to change the problem parameters and CVODE settings. Use the flag + * paired with the CG linear solver from Ginkgo. Several command line options + * are available to change the problem parameters and CVODE settings. Use the + * flag * --help for more information. * ---------------------------------------------------------------------------*/ @@ -53,19 +54,23 @@ #if defined(USE_CUDA) #include -#define HIP_OR_CUDA(a, b) b +#define HIP_OR_CUDA_OR_SYCL(a, b, c) b constexpr auto N_VNew = N_VNew_Cuda; #elif defined(USE_HIP) #include -#define HIP_OR_CUDA(a, b) a +#define HIP_OR_CUDA_OR_SYCL(a, b, c) a constexpr auto N_VNew = N_VNew_Hip; +#elif defined(USE_DPCPP) +#include +#define HIP_OR_CUDA_OR_SYCL(a, b, c) c +constexpr auto N_VNew = N_VNew_Sycl; #elif defined(USE_OMP) #include -#define HIP_OR_CUDA(a, b) +#define HIP_OR_CUDA_OR_SYCL(a, b, c) constexpr auto N_VNew = N_VNew_Serial; #else #include -#define HIP_OR_CUDA(a, b) +#define HIP_OR_CUDA_OR_SYCL(a, b, c) constexpr auto N_VNew = N_VNew_Serial; #endif @@ -73,7 +78,8 @@ using GkoMatrixType = gko::matrix::Csr; using GkoSolverType = gko::solver::Cg; using SUNGkoMatrixType = sundials::ginkgo::Matrix; -using SUNGkoSolverType = sundials::ginkgo::LinearSolver; +using SUNGkoSolverType = + sundials::ginkgo::LinearSolver; // ----------------------------------------------------------------------------- // Functions provided to the SUNDIALS integrator @@ -83,7 +89,8 @@ using SUNGkoSolverType = sundials::ginkgo::LinearSolverget_queue(), sunctx); +#else N_Vector u = N_VNew(udata.nodes, sunctx); +#endif if (check_ptr(u, "N_VNew")) return 1; // Set initial condition @@ -120,34 +151,26 @@ int main(int argc, char* argv[]) N_Vector e = N_VClone(u); if (check_ptr(e, "N_VClone")) return 1; - // --------------------------------------- - // Create Ginkgo matrix and linear solver - // --------------------------------------- - -#if defined(USE_CUDA) - auto gko_exec{gko::CudaExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)}; -#elif defined(USE_HIP) - auto gko_exec{gko::HipExecutor::create(0, gko::OmpExecutor::create(), false, gko::allocation_mode::device)}; -#elif defined(USE_OMP) - auto gko_exec{gko::OmpExecutor::create()}; -#else - auto gko_exec{gko::ReferenceExecutor::create()}; -#endif - auto gko_matrix_dim = gko::dim<2>(udata.nodes, udata.nodes); auto gko_matrix_nnz{(5 * (udata.nx - 2) + 2) * (udata.ny - 2) + 2 * udata.nx}; - auto gko_matrix = gko::share(GkoMatrixType::create(gko_exec, gko_matrix_dim, gko_matrix_nnz)); + auto gko_matrix = + gko::share(GkoMatrixType::create(gko_exec, gko_matrix_dim, gko_matrix_nnz)); SUNGkoMatrixType A{gko_matrix, sunctx}; // Use default stopping criteria - auto crit{sundials::ginkgo::DefaultStop::build().with_max_iters(static_cast(udata.liniters)).on(gko_exec)}; + auto crit{sundials::ginkgo::DefaultStop::build() + .with_max_iters(static_cast(udata.liniters)) + .on(gko_exec)}; // Use Jacobi preconditioner - auto precon{gko::preconditioner::Jacobi::build().on(gko_exec)}; + auto precon{ + gko::preconditioner::Jacobi::build().on(gko_exec)}; - auto gko_solver_factory = gko::share( - GkoSolverType::build().with_criteria(std::move(crit)).with_preconditioner(std::move(precon)).on(gko_exec)); + auto gko_solver_factory = gko::share(GkoSolverType::build() + .with_criteria(std::move(crit)) + .with_preconditioner(std::move(precon)) + .on(gko_exec)); SUNGkoSolverType LS{gko_solver_factory, sunctx}; @@ -206,7 +229,8 @@ int main(int argc, char* argv[]) flag = WriteOutput(t, u, e, udata); if (check_flag(flag, "WriteOutput")) return 1; - for (int iout = 0; iout < udata.nout; iout++) { + for (int iout = 0; iout < udata.nout; iout++) + { // Evolve in time flag = CVode(cvode_mem, tout, u, &t, CV_NORMAL); if (check_flag(flag, "CVode")) break; @@ -239,13 +263,15 @@ int main(int argc, char* argv[]) sunrealtype maxerr = N_VMaxNorm(e); - std::cout << std::scientific << std::setprecision(std::numeric_limits::digits10) + std::cout << std::scientific + << std::setprecision(std::numeric_limits::digits10) << "\nMax error = " << maxerr << std::endl; // -------------------- // Clean up and return // -------------------- + udata.exec = nullptr; CVodeFree(&cvode_mem); // Free integrator memory N_VDestroy(u); // Free vectors N_VDestroy(e); @@ -259,15 +285,19 @@ int main(int argc, char* argv[]) #if defined(USE_CUDA) || defined(USE_HIP) // GPU kernel to compute the ODE RHS function f(t,y). -__global__ void f_kernel(const sunindextype nx, const sunindextype ny, const sunrealtype dx, const sunrealtype dy, - const sunrealtype cx, const sunrealtype cy, const sunrealtype cc, const sunrealtype bx, - const sunrealtype by, const sunrealtype sin_t_cos_t, const sunrealtype cos_sqr_t, - sunrealtype* uarray, sunrealtype* farray) +__global__ void f_kernel(const sunindextype nx, const sunindextype ny, + const sunrealtype dx, const sunrealtype dy, + const sunrealtype cx, const sunrealtype cy, + const sunrealtype cc, const sunrealtype bx, + const sunrealtype by, const sunrealtype sin_t_cos_t, + const sunrealtype cos_sqr_t, sunrealtype* uarray, + sunrealtype* farray) { const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x; const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y; - if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) { + if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + { auto x = i * dx; auto y = j * dy; @@ -284,7 +314,8 @@ __global__ void f_kernel(const sunindextype nx, const sunindextype ny, const sun auto idx_e = (i + 1) + j * nx; auto idx_w = (i - 1) + j * nx; - farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + cy * (uarray[idx_s] + uarray[idx_n]) - + farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + + cy * (uarray[idx_s] + uarray[idx_n]) - TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t - bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y * cos_sqr_t - by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x * cos_sqr_t; @@ -328,14 +359,67 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data) if (check_ptr(farray, "N_VGetDeviceArrayPointer")) return -1; dim3 threads_per_block{16, 16}; - const auto nbx{(static_cast(nx) + threads_per_block.x - 1) / threads_per_block.x}; - const auto nby{(static_cast(ny) + threads_per_block.y - 1) / threads_per_block.y}; + const auto nbx{(static_cast(nx) + threads_per_block.x - 1) / + threads_per_block.x}; + const auto nby{(static_cast(ny) + threads_per_block.y - 1) / + threads_per_block.y}; dim3 num_blocks{nbx, nby}; - f_kernel<<>>(nx, ny, dx, dy, cx, cy, cc, bx, by, sin_t_cos_t, cos_sqr_t, uarray, farray); + f_kernel<<>>(nx, ny, dx, dy, cx, cy, cc, bx, + by, sin_t_cos_t, cos_sqr_t, + uarray, farray); - HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize();); + HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), ); +#elif defined(USE_DPCPP) + // Access device data arrays + sunrealtype* uarray = N_VGetDeviceArrayPointer(u); + if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1; + + sunrealtype* farray = N_VGetDeviceArrayPointer(f); + if (check_ptr(farray, "N_VGetDeviceArrayPointer")) return -1; + + std::dynamic_pointer_cast(udata->exec) + ->get_queue() + ->submit( + [&](sycl::handler& cgh) + { + cgh.parallel_for(sycl::range<2>(ny, nx), + [=](sycl::id<2> id) + { + const sunindextype i = id[1]; + const sunindextype j = id[0]; + if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + { + auto x = i * dx; + auto y = j * dy; + + auto sin_sqr_x = sin(PI * x) * sin(PI * x); + auto sin_sqr_y = sin(PI * y) * sin(PI * y); + + auto cos_sqr_x = cos(PI * x) * cos(PI * x); + auto cos_sqr_y = cos(PI * y) * cos(PI * y); + + // center, north, south, east, and west indices + auto idx_c = i + j * nx; + auto idx_n = i + (j + 1) * nx; + auto idx_s = i + (j - 1) * nx; + auto idx_e = (i + 1) + j * nx; + auto idx_w = (i - 1) + j * nx; + + farray[idx_c] = + cc * uarray[idx_c] + + cx * (uarray[idx_w] + uarray[idx_e]) + + cy * (uarray[idx_s] + uarray[idx_n]) - + TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t - + bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y * + cos_sqr_t - + by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x * + cos_sqr_t; + } + }); + }); + udata->exec->synchronize(); #else // Access host data arrays @@ -346,8 +430,10 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data) if (check_ptr(farray, "N_VGetArrayPointer")) return -1; // Iterate over domain interior and fill the RHS vector - for (sunindextype j = 1; j < ny - 1; j++) { - for (sunindextype i = 1; i < nx - 1; i++) { + for (sunindextype j = 1; j < ny - 1; j++) + { + for (sunindextype i = 1; i < nx - 1; i++) + { auto x = i * dx; auto y = j * dy; @@ -364,7 +450,8 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data) auto idx_e = (i + 1) + j * nx; auto idx_w = (i - 1) + j * nx; - farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + cy * (uarray[idx_s] + uarray[idx_n]) - + farray[idx_c] = cc * uarray[idx_c] + cx * (uarray[idx_w] + uarray[idx_e]) + + cy * (uarray[idx_s] + uarray[idx_n]) - TWO * PI * sin_sqr_x * sin_sqr_y * sin_t_cos_t - bx * (cos_sqr_x - sin_sqr_x) * sin_sqr_y * cos_sqr_t - by * (cos_sqr_y - sin_sqr_y) * sin_sqr_x * cos_sqr_t; @@ -380,12 +467,14 @@ int f(sunrealtype t, N_Vector u, N_Vector f, void* user_data) #if defined(USE_CUDA) || defined(USE_HIP) // GPU kernel to fill southern (j = 0) and northern (j = nx - 1) boundary // entries including the corners. -__global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny, sunindextype* row_ptrs, - sunindextype* col_idxs, sunrealtype* mat_data) +__global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny, + sunindextype* row_ptrs, sunindextype* col_idxs, + sunrealtype* mat_data) { const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= 0 && i < nx) { + if (i >= 0 && i < nx) + { // Southern face mat_data[i] = ZERO; col_idxs[i] = i; @@ -404,12 +493,14 @@ __global__ void J_sn_kernel(const sunindextype nx, const sunindextype ny, sunind // GPU kernel to fill western (i = 0) and eastern (i = nx - 1) boundary entries // excluding the corners (set by J_sn_kernel). -__global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny, sunindextype* row_ptrs, sunindextype* col_idxs, +__global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny, + sunindextype* row_ptrs, sunindextype* col_idxs, sunrealtype* mat_data) { const sunindextype j = blockIdx.x * blockDim.x + threadIdx.x; - if (j > 0 && j < ny - 1) { + if (j > 0 && j < ny - 1) + { // Western face auto col = j * nx; auto idx = (5 * (nx - 2) + 2) * (j - 1) + nx; @@ -427,13 +518,16 @@ __global__ void J_we_kernel(const sunindextype nx, const sunrealtype ny, suninde } // GPU kernel to compute the ODE RHS Jacobian function df/dy(t,y). -__global__ void J_kernel(const sunindextype nx, const sunindextype ny, const sunrealtype cx, const sunrealtype cy, - const sunrealtype cc, sunindextype* row_ptrs, sunindextype* col_idxs, sunrealtype* mat_data) +__global__ void J_kernel(const sunindextype nx, const sunindextype ny, + const sunrealtype cx, const sunrealtype cy, + const sunrealtype cc, sunindextype* row_ptrs, + sunindextype* col_idxs, sunrealtype* mat_data) { const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x; const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y; - if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) { + if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + { auto row = i + j * nx; auto col_s = row - nx; auto col_w = row - 1; @@ -467,7 +561,8 @@ __global__ void J_kernel(const sunindextype nx, const sunindextype ny, const sun // J routine to compute the ODE RHS Jacobian function df/dy(t,y). This // explicitly set boundary entries to zero so J(t,y) has the same sparsity // pattern as A = I - gamma * J(t,y). -int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) +int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, + N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) { // Access problem data auto udata = static_cast(user_data); @@ -492,35 +587,141 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve #if defined(USE_CUDA) || defined(USE_HIP) unsigned threads_per_block_bx = 16; - unsigned num_blocks_bx = ((nx + threads_per_block_bx - 1) / threads_per_block_bx); + unsigned num_blocks_bx = + ((nx + threads_per_block_bx - 1) / threads_per_block_bx); - J_sn_kernel<<>>(nx, ny, row_ptrs, col_idxs, mat_data); + J_sn_kernel<<>>(nx, ny, row_ptrs, + col_idxs, mat_data); unsigned threads_per_block_by = 16; - unsigned num_blocks_by = ((ny + threads_per_block_by - 1) / threads_per_block_by); + unsigned num_blocks_by = + ((ny + threads_per_block_by - 1) / threads_per_block_by); - J_we_kernel<<>>(nx, ny, row_ptrs, col_idxs, mat_data); + J_we_kernel<<>>(nx, ny, row_ptrs, + col_idxs, mat_data); dim3 threads_per_block_i{16, 16}; - const auto nbx{(static_cast(nx) + threads_per_block_i.x - 1) / threads_per_block_i.x}; - const auto nby{(static_cast(ny) + threads_per_block_i.y - 1) / threads_per_block_i.y}; + const auto nbx{(static_cast(nx) + threads_per_block_i.x - 1) / + threads_per_block_i.x}; + const auto nby{(static_cast(ny) + threads_per_block_i.y - 1) / + threads_per_block_i.y}; dim3 num_blocks_i{nbx, nby}; - J_kernel<<>>(nx, ny, cx, cy, cc, row_ptrs, col_idxs, mat_data); - - HIP_OR_CUDA(hipDeviceSynchronize();, cudaDeviceSynchronize();); - + J_kernel<<>>(nx, ny, cx, cy, cc, row_ptrs, + col_idxs, mat_data); + + HIP_OR_CUDA_OR_SYCL(hipDeviceSynchronize(), cudaDeviceSynchronize(), ); +#elif defined(USE_DPCPP) + auto queue = + std::dynamic_pointer_cast(udata->exec)->get_queue(); + // J_sn_kernel + queue->submit( + [&](sycl::handler& cgh) + { + cgh.parallel_for(nx, + [=](sycl::id<1> id) + { + const sunindextype i = id[0]; + + // Southern face + mat_data[i] = ZERO; + col_idxs[i] = i; + row_ptrs[i] = i; + + // Northern face + auto col = i + (ny - 1) * nx; + auto idx = (5 * (nx - 2) + 2) * (ny - 2) + nx + i; + mat_data[idx] = ZERO; + col_idxs[idx] = col; + row_ptrs[col] = idx; + + if (i == nx - 1) + row_ptrs[nx * ny] = (5 * (nx - 2) + 2) * (ny - 2) + + 2 * nx; + }); + }); + // J_we_kernel + queue->submit( + [&](sycl::handler& cgh) + { + cgh.parallel_for(ny, + [=](sycl::id<1> id) + { + const sunindextype j = id[0]; + if (j > 0 && j < ny - 1) + { + // Western face + auto col = j * nx; + auto idx = (5 * (nx - 2) + 2) * (j - 1) + nx; + mat_data[idx] = ZERO; + col_idxs[idx] = col; + row_ptrs[col] = idx; + + // Eastern face + col = (nx - 1) + j * nx; + idx = (5 * (nx - 2) + 2) * (j - 1) + nx + 1 + + 5 * (nx - 2); + mat_data[idx] = ZERO; + col_idxs[idx] = col; + row_ptrs[col] = idx; + } + }); + }); + // J_kernel + queue->submit( + [&](sycl::handler& cgh) + { + cgh.parallel_for(sycl::range<2>(ny, nx), + [=](sycl::id<2> id) + { + const sunindextype i = id[1]; + const sunindextype j = id[0]; + + if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + { + auto row = i + j * nx; + auto col_s = row - nx; + auto col_w = row - 1; + auto col_c = row; + auto col_e = row + 1; + auto col_n = row + nx; + + // Number of non-zero entries from preceding rows + auto prior_nnz = (5 * (nx - 2) + 2) * (j - 1) + nx; + + // Starting index for this row + auto idx = prior_nnz + 1 + 5 * (i - 1); + + mat_data[idx] = cy; + mat_data[idx + 1] = cx; + mat_data[idx + 2] = cc; + mat_data[idx + 3] = cx; + mat_data[idx + 4] = cy; + + col_idxs[idx] = col_s; + col_idxs[idx + 1] = col_w; + col_idxs[idx + 2] = col_c; + col_idxs[idx + 3] = col_e; + col_idxs[idx + 4] = col_n; + + row_ptrs[row] = idx; + } + }); + }); + udata->exec->synchronize(); #else // Fill southern boundary entries (j = 0) - for (sunindextype i = 0; i < nx; i++) { + for (sunindextype i = 0; i < nx; i++) + { mat_data[i] = ZERO; col_idxs[i] = i; row_ptrs[i] = i; } // Fill western boundary entries (i = 0) - for (sunindextype j = 1; j < ny - 1; j++) { + for (sunindextype j = 1; j < ny - 1; j++) + { auto col = j * nx; auto idx = (5 * (nx - 2) + 2) * (j - 1) + nx; mat_data[idx] = ZERO; @@ -529,7 +730,8 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve } // Fill eastern boundary entries (i = nx - 1) - for (sunindextype j = 1; j < ny - 1; j++) { + for (sunindextype j = 1; j < ny - 1; j++) + { auto col = (nx - 1) + j * nx; auto idx = (5 * (nx - 2) + 2) * (j - 1) + nx + 1 + 5 * (nx - 2); mat_data[idx] = ZERO; @@ -538,7 +740,8 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve } // Fill northern boundary entries (j = ny - 1) - for (sunindextype i = 0; i < nx; i++) { + for (sunindextype i = 0; i < nx; i++) + { auto col = i + (ny - 1) * nx; auto idx = (5 * (nx - 2) + 2) * (ny - 2) + nx + i; mat_data[idx] = ZERO; @@ -548,8 +751,10 @@ int J(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, N_Ve row_ptrs[nx * ny] = (5 * (nx - 2) + 2) * (ny - 2) + 2 * nx; // Fill interior entries - for (sunindextype j = 1; j < ny - 1; j++) { - for (sunindextype i = 1; i < nx - 1; i++) { + for (sunindextype j = 1; j < ny - 1; j++) + { + for (sunindextype i = 1; i < nx - 1; i++) + { auto row = i + j * nx; auto col_s = row - nx; auto col_w = row - 1; diff --git a/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp b/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp index bc459c68d1..a363985c7b 100644 --- a/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp +++ b/examples/cvode/ginkgo/cv_heat2D_ginkgo.hpp @@ -15,24 +15,30 @@ * See cv_heat2D_ginkgo.cpp for more information. * ---------------------------------------------------------------------------*/ +#include #include -#include -#include #include +#include +#include #include -#include +#include #include // SUNDIALS types -#include #include +#include #if defined(USE_CUDA) #include #elif defined(USE_HIP) #include +#elif defined(USE_DPCPP) +#include #endif +// Ginkgo Type +#include + // Common utility functions #include @@ -72,19 +78,22 @@ struct UserData sunrealtype dy = yu / (ny - 1); // Integrator settings - sunrealtype rtol = SUN_RCONST(1.0e-4); // relative tolerance - sunrealtype atol = SUN_RCONST(1.0e-8); // absolute tolerance - int maxsteps = 0; // max number of steps between outputs + sunrealtype rtol = SUN_RCONST(1.0e-4); // relative tolerance + sunrealtype atol = SUN_RCONST(1.0e-8); // absolute tolerance + int maxsteps = 0; // max number of steps between outputs // Linear solver settings - int liniters = 20; // number of linear iterations - sunrealtype epslin = ZERO; // linear solver tolerance factor + int liniters = 20; // number of linear iterations + sunrealtype epslin = ZERO; // linear solver tolerance factor // Ouput variables - bool output = false; // write solution to disk - int nout = 20; // number of output times - std::ofstream uout; // output file stream - std::ofstream eout; // error file stream + bool output = false; // write solution to disk + int nout = 20; // number of output times + std::ofstream uout; // output file stream + std::ofstream eout; // error file stream + + // Ginkgo executor for synchronization on sycl + std::shared_ptr exec; }; // ----------------------------------------------------------------------------- @@ -93,11 +102,9 @@ struct UserData #if defined(USE_CUDA) || defined(USE_HIP) // GPU kernel to compute the ODE RHS function f(t,y). -__global__ -void solution_kernel(const sunindextype nx, const sunindextype ny, - const sunrealtype dx, const sunrealtype dy, - const sunrealtype cos_sqr_t, - sunrealtype* uarray) +__global__ void solution_kernel(const sunindextype nx, const sunindextype ny, + const sunrealtype dx, const sunrealtype dy, + const sunrealtype cos_sqr_t, sunrealtype* uarray) { const sunindextype i = blockIdx.x * blockDim.x + threadIdx.x; const sunindextype j = blockIdx.y * blockDim.y + threadIdx.y; @@ -110,14 +117,14 @@ void solution_kernel(const sunindextype nx, const sunindextype ny, auto sin_sqr_x = sin(PI * x) * sin(PI * x); auto sin_sqr_y = sin(PI * y) * sin(PI * y); - auto idx = i + j * nx; + auto idx = i + j * nx; uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t + ONE; } } #endif // Compute the exact solution -int Solution(sunrealtype t, N_Vector u, UserData &udata) +int Solution(sunrealtype t, N_Vector u, UserData& udata) { // Access problem data and set shortcuts const auto nx = udata.nx; @@ -133,22 +140,49 @@ int Solution(sunrealtype t, N_Vector u, UserData &udata) #if defined(USE_CUDA) || defined(USE_HIP) - sunrealtype *uarray = N_VGetDeviceArrayPointer(u); + sunrealtype* uarray = N_VGetDeviceArrayPointer(u); if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1; dim3 threads_per_block{16, 16}; - const auto nbx{(static_cast(nx) + threads_per_block.x - 1) - / threads_per_block.x}; - const auto nby{(static_cast(ny) + threads_per_block.y - 1) - / threads_per_block.y}; + const auto nbx{(static_cast(nx) + threads_per_block.x - 1) / + threads_per_block.x}; + const auto nby{(static_cast(ny) + threads_per_block.y - 1) / + threads_per_block.y}; dim3 num_blocks{nbx, nby}; - solution_kernel<<>> - (nx, ny, dx, dy, cos_sqr_t, uarray); - + solution_kernel<<>>(nx, ny, dx, dy, cos_sqr_t, + uarray); +#elif defined(USE_DPCPP) + sunrealtype* uarray = N_VGetDeviceArrayPointer(u); + if (check_ptr(uarray, "N_VGetDeviceArrayPointer")) return -1; + std::dynamic_pointer_cast(udata.exec) + ->get_queue() + ->submit( + [&](sycl::handler& cgh) + { + cgh.parallel_for(sycl::range<2>(ny, nx), + [=](sycl::id<2> id) + { + const sunindextype i = id[1]; + const sunindextype j = id[0]; + + if (i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + { + auto x = i * dx; + auto y = j * dy; + + auto sin_sqr_x = sin(PI * x) * sin(PI * x); + auto sin_sqr_y = sin(PI * y) * sin(PI * y); + + auto idx = i + j * nx; + uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t + + ONE; + } + }); + }); #else - sunrealtype *uarray = N_VGetArrayPointer(u); + sunrealtype* uarray = N_VGetArrayPointer(u); if (check_ptr(uarray, "N_VGetArrayPointer")) return -1; for (sunindextype j = 1; j < ny - 1; j++) @@ -161,18 +195,18 @@ int Solution(sunrealtype t, N_Vector u, UserData &udata) auto sin_sqr_x = sin(PI * x) * sin(PI * x); auto sin_sqr_y = sin(PI * y) * sin(PI * y); - auto idx = i + j * nx; + auto idx = i + j * nx; uarray[idx] = sin_sqr_x * sin_sqr_y * cos_sqr_t + ONE; } } #endif - + udata.exec->synchronize(); return 0; } // Compute the solution error -int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData &udata) +int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData& udata) { // Compute true solution int flag = Solution(t, e, udata); @@ -188,29 +222,28 @@ int SolutionError(sunrealtype t, N_Vector u, N_Vector e, UserData &udata) // Print command line options void InputHelp() { - std::cout - << std::endl - << "Command line options:\n" - << " --nx : number of x mesh points\n" - << " --nx : number of y mesh points\n" - << " --xu : x upper bound\n" - << " --yu : y upper bound\n" - << " --kx : x diffusion coefficient\n" - << " --kx : y diffusion coefficient\n" - << " --tf