From c2f51ef3e254e47088543be0ce6be257d0c818d6 Mon Sep 17 00:00:00 2001 From: Stefan K Date: Sun, 3 Mar 2019 15:32:05 +0100 Subject: [PATCH] Update to gslib.h and allow to set a communicator on input (#603) --- core/comm_mpi.f | 91 ++++++++---------- core/crs_amg.c | 13 +-- core/crs_xxt.c | 174 ++++++++++++++++++++++++++++++--- core/drive.f | 8 +- core/drive1.f | 11 ++- core/drive2.f | 12 ++- core/experimental/crs_hypre.c | 14 +-- core/fcrs.c | 9 +- core/hpf.f | 16 ---- core/makefile.template | 13 ++- core/makenek.inc | 12 ++- core/multimesh.f | 25 +++-- core/partitioner.c | 2 +- core/sparse_cholesky.c | 175 ---------------------------------- core/sparse_cholesky.h | 34 ------- examples | 2 +- 16 files changed, 254 insertions(+), 357 deletions(-) delete mode 100644 core/sparse_cholesky.c delete mode 100644 core/sparse_cholesky.h diff --git a/core/comm_mpi.f b/core/comm_mpi.f index 61d59692d..303f78edd 100644 --- a/core/comm_mpi.f +++ b/core/comm_mpi.f @@ -1,11 +1,13 @@ c----------------------------------------------------------------------- - subroutine setupcomm() + subroutine setupcomm(comm,newcomm,newcommg,path_in,session_in) include 'mpif.h' include 'SIZE' include 'PARALLEL' include 'TSTEP' include 'INPUT' + integer comm, newcomm, newcommg + character session_in*(*), path_in*(*) logical flag common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal @@ -18,14 +20,18 @@ subroutine setupcomm() integer*8 ntags - ! Init MPI call mpi_initialized(mpi_is_initialized, ierr) if (.not.mpi_is_initialized) call mpi_init(ierr) - call mpi_comm_size(MPI_COMM_WORLD,np_global,ierr) - call mpi_comm_rank(MPI_COMM_WORLD,nid_global,ierr) + + call mpi_comm_dup(comm,newcommg,ierr) + newcomm = newcommg + nekcomm = newcommg + + call mpi_comm_size(nekcomm,np_global,ierr) + call mpi_comm_rank(nekcomm,nid_global,ierr) ! check upper tag size limit - call mpi_comm_get_attr(MPI_COMM_WORLD,MPI_TAG_UB,ntags,flag,ierr) + call mpi_comm_get_attr(nekcomm,MPI_TAG_UB,ntags,flag,ierr) if (ntags .lt. np_global) then if(nid_global.eq.0) write(6,*) 'ABORT: MPI_TAG_UB too small!' call exitt @@ -33,33 +39,39 @@ subroutine setupcomm() ! set defaults nid = nid_global - nekcomm = MPI_COMM_WORLD - iglobalcomm = MPI_COMM_WORLD ifneknek = .false. ifneknekc = .false. ! session are uncoupled - ifneknekm = .false. ! not moving nsessions = 1 ierr = 0 nlin = 0 if (nid .eq. 0) then - write(6,*) 'Reading session file ...' - open (unit=8,file='SESSION.NAME',status='old',err=24) - 21 read (8,*,END=22) - nlin = nlin + 1 - goto 21 - 22 rewind(8) - if (nlin.gt.2) read(8,*,err=24) nsessions - if (nsessions.gt.1) read(8,*,err=24) ifneknekc - do n=0,nsessions-1 - call blank(session_mult(n),132) - call blank(path_mult(n) ,132) - read(8,11,err=24) session_mult(n) - read(8,11,err=24) path_mult(n) - if (nsessions.gt.1) read(8,*,err=24) npsess(n) - enddo - 11 format(a132) - close(8) + l = ltrunc(session_in,len(session_in)) + if (l .gt. 0) then + call blank(session_mult(0),132) + call chcopy(session_mult(0), session_in, l) + l = ltrunc(path_in,len(path_in)) + call blank(path_mult(0) ,132) + call chcopy(path_mult(0), path_in, l) + else + write(6,*) 'Reading session file ...' + open (unit=8,file='SESSION.NAME',status='old',err=24) + 21 read (8,*,END=22) + nlin = nlin + 1 + goto 21 + 22 rewind(8) + if (nlin.gt.2) read(8,*,err=24) nsessions + if (nsessions.gt.1) read(8,*,err=24) ifneknekc + do n=0,nsessions-1 + call blank(session_mult(n),132) + call blank(path_mult(n) ,132) + read(8,11,err=24) session_mult(n) + read(8,11,err=24) path_mult(n) + if (nsessions.gt.1) read(8,*,err=24) npsess(n) + enddo + 11 format(a132) + close(8) + endif write(6,*) 'Number of sessions:',nsessions goto 23 24 ierr = 1 @@ -68,6 +80,10 @@ subroutine setupcomm() call err_chk(ierr,' Error while reading SESSION.NAME!$') call bcast(nsessions,ISIZE) + if (nsessions .gt. nsessmax) + & call exitti('nsessmax in SIZE too low!$',nsessmax) + if (nsessions .gt. 1) ifneknek = .true. + call bcast(ifneknekc,LSIZE) do n = 0,nsessions-1 call bcast(npsess(n),ISIZE) @@ -75,18 +91,11 @@ subroutine setupcomm() call bcast(path_mult(n),132*CSIZE) enddo - if (nsessions .gt. 1) ifneknek = .true. - - if (nsessions .gt. nsessmax) - & call exitti('nsessmax in SIZE too low!$',nsessmax) - ! single session run if (.not.ifneknek) then ifneknekc = .false. session = session_mult(0) path = path_mult(0) - call mpi_comm_dup(mpi_comm_world,iglobalcomm,ierr) - intracomm = iglobalcomm return endif @@ -107,31 +116,15 @@ subroutine setupcomm() if (nid_global.ge.nid_global_root(n).and. & nid_global.lt.nid_global_root_next) idsess = n enddo - - call mpi_comm_split(mpi_comm_world,idsess,nid,intracomm,ierr) + call mpi_comm_split(comm,idsess,nid,newcomm,ierr) session = session_mult(idsess) path = path_mult (idsess) - ! setup intercommunication if (ifneknekc) then if (nsessions.gt.2) call exitti( & 'More than 2 coupled sessions are currently not supported!$', $ nsessions) - - if (idsess.eq.0) idsess_neighbor=1 - if (idsess.eq.1) idsess_neighbor=0 - - call mpi_intercomm_create(intracomm,0,mpi_comm_world, - & nid_global_root(idsess_neighbor), 10,intercomm,ierr) - - np_neighbor=npsess(idsess_neighbor) - - ifhigh=.true. - call mpi_intercomm_merge(intercomm, ifhigh, iglobalcomm, ierr) - - ngeom = 2 ! Initialize NEKNEK interface subiterations to 2. - ninter = 1 ! Initialize NEKNEK interface extrapolation order to 1. endif return diff --git a/core/crs_amg.c b/core/crs_amg.c index dde28e743..114bf6515 100644 --- a/core/crs_amg.c +++ b/core/crs_amg.c @@ -5,18 +5,7 @@ #include #include #include -#include "c99.h" -#include "name.h" -#include "types.h" -#include "fail.h" -#include "mem.h" -#include "sort.h" -#include "sarray_sort.h" -#include "gs_defs.h" -#include "comm.h" -#include "crystal.h" -#include "sarray_transfer.h" -#include "gs.h" +#include "gslib.h" #define crs_setup PREFIXED_NAME(crs_amg_setup) #define crs_solve PREFIXED_NAME(crs_amg_solve) diff --git a/core/crs_xxt.c b/core/crs_xxt.c index b8c14c68e..a27d83361 100644 --- a/core/crs_xxt.c +++ b/core/crs_xxt.c @@ -5,18 +5,7 @@ #include #include #include -#include "c99.h" -#include "name.h" -#include "fail.h" -#include "types.h" -#include "tensor.h" -#include "gs_defs.h" -#include "comm.h" -#include "mem.h" -#include "sort.h" -#include "sarray_sort.h" -#include "sparse_cholesky.h" -#include "gs.h" +#include "gslib.h" #define crs_setup PREFIXED_NAME(crs_xxt_setup) #define crs_solve PREFIXED_NAME(crs_xxt_solve) @@ -51,6 +40,166 @@ static unsigned lg(uint v) #undef CHECK } +/* factors: L is in CSR format + D is a diagonal matrix stored as a vector + actual factorization is: + + -1 T + A = (I-L) D (I-L) + + -1 -T -1 + A = (I-L) D (I-L) + + (triangular factor is unit diagonal; the diagonal is not stored) +*/ +struct sparse_cholesky { + uint n, *Lrp, *Lj; + double *L, *D; +}; + +/* + symbolic factorization: finds the sparsity structure of L + + uses the concept of elimination tree: + the parent of node j is node i when L(i,j) is the first + non-zero in column j below the diagonal (i>j) + L's structure is discovered row-by-row; the first time + an entry in column j is set, it must be the parent + + the nonzeros in L are the nonzeros in A + paths up the elimination tree + + linear in the number of nonzeros of L +*/ +static void factor_symbolic(uint n, const uint *Arp, const uint *Aj, + struct sparse_cholesky *out, buffer *buf) +{ + uint *visit = tmalloc(uint,2*n), *parent = visit+n; + uint *Lrp, *Lj; + uint i,nz=0; + + out->n=n; + + for(i=0;i=i) break; + for(;visit[j]!=i;j=parent[j]) { + ++nz, visit[j]=i; + if(parent[j]==n) { parent[j]=i; break; } + } + } + } + + Lrp=out->Lrp=tmalloc(uint,n+1+nz); + Lj =out->Lj =Lrp+n+1; + + Lrp[0]=0; + for(i=0;i=i) break; + for(;visit[j]!=i;j=parent[j]) Ljr[count++]=j, visit[j]=i; + } + sortv(Ljr, Ljr,count,sizeof(uint), buf); + Lrp[i+1]=Lrp[i]+count; + } + free(visit); +} + +/* + numeric factorization: + + L is built row-by-row, using: ( ' indicates transpose ) + + + [ A r ] = [ (I-L) ] [ D^(-1) ] [ (I-L)' -s ] + [ r' a ] [ -s' 1 ] [ 1/d ] [ 1 ] + + = [ A (I-L) D^(-1) (-s) ] + [ r' s' D^(-1) s + 1/d ] + + so, if r' is the next row of A, up to but excluding the diagonal, + then the next row of L, s', obeys + + r = - (I-L) D^(-1) s + + let y = (I-L)^(-1) (-r) + then s = D y, and d = 1/(s' y) + +*/ +static void factor_numeric(uint n, const uint *Arp, const uint *Aj, + const double *A, + struct sparse_cholesky *out, + uint *visit, double *y) +{ + const uint *Lrp=out->Lrp, *Lj=out->Lj; + double *D, *L; + uint i; + + D=out->D=tmalloc(double,n+Lrp[n]); + L=out->L=D+n; + + for(i=0;i=i) { if(j==i) a=A[p]; break; } + y[j]=-A[p]; + } + for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) { + uint q,qe,j=Lj[p]; double lij,yj=y[j]; + for(q=Lrp[j],qe=Lrp[j+1];q!=qe;++q) { + uint k=Lj[q]; if(visit[k]==i) yj+=L[q]*y[k]; + } + y[j]=yj; + L[p]=lij=D[j]*yj; + a-=yj*lij; + } + D[i]=1/a; + } +} + +/* x = A^(-1) b; works when x and b alias */ +void sparse_cholesky_solve( + double *x, const struct sparse_cholesky *fac, double *b) +{ + const uint n=fac->n, *Lrp=fac->Lrp, *Lj=fac->Lj; + const double *L=fac->L, *D=fac->D; + uint i, p,pe; + for(i=0;iptr,n_uints_as_dbls+(double*)buf->ptr); +} + +void sparse_cholesky_free(struct sparse_cholesky *fac) +{ + free(fac->Lrp); fac->Lj=fac->Lrp=0; + free(fac->D); fac->L =fac->D =0; +} + struct csr_mat { uint n, *Arp, *Aj; double *A; }; @@ -848,4 +997,3 @@ void crs_free(struct xxt *data) free(data->vl); free(data); } - diff --git a/core/drive.f b/core/drive.f index bdbc7edd3..ccd4e5e68 100644 --- a/core/drive.f +++ b/core/drive.f @@ -1,6 +1,10 @@ program NEKTON -c - call nek_init(intracomm) + + include 'mpif.h' + integer comm + comm = MPI_COMM_WORLD + + call nek_init(comm) call nek_solve() call nek_end() diff --git a/core/drive1.f b/core/drive1.f index 28552317f..2e31f844f 100644 --- a/core/drive1.f +++ b/core/drive1.f @@ -1,5 +1,5 @@ c----------------------------------------------------------------------- - subroutine nek_init(comm_out) + subroutine nek_init(comm) c include 'SIZE' include 'TOTAL' @@ -24,7 +24,7 @@ subroutine nek_init(comm_out) c COMMON /SCRSF/ DUMMY8(LX1,LY1,LZ1,LELT,3) c COMMON /SCRCG/ DUMM10(LX1,LY1,LZ1,LELT,1) - integer comm_out + integer comm common /nekmpi/ mid,mp,nekcomm,nekgroup,nekreal common /rdump/ ntdump @@ -54,9 +54,10 @@ subroutine nek_init(comm_out) ! set word size for CHARACTER csize = sizeof(ctest) - call setupcomm() - nekcomm = intracomm - comm_out = nekcomm + call setupcomm(comm,newcomm,newcommg,'','') + intracomm = newcomm ! within a session + nekcomm = newcomm + iglobalcomm = newcommg ! across all sessions call iniproc() etimes = dnekclock() diff --git a/core/drive2.f b/core/drive2.f index ae440c482..10511bee1 100644 --- a/core/drive2.f +++ b/core/drive2.f @@ -154,6 +154,7 @@ subroutine setvar include 'GEOM' include 'DEALIAS' include 'TSTEP' + include 'NEKNEK' C C Geometry on Mesh 3 or 1? C @@ -274,12 +275,15 @@ subroutine setvar C NBD = 0 CALL RZERO (DTLAG,10) -C -C Useful constants -C + + ! neknek + ifneknekm = .false. + ninter = 1 + nfld_neknek = ndim + nfield + one = 1. PI = 4.*ATAN(one) -C + RETURN END C diff --git a/core/experimental/crs_hypre.c b/core/experimental/crs_hypre.c index f6cfec291..c75e6484a 100644 --- a/core/experimental/crs_hypre.c +++ b/core/experimental/crs_hypre.c @@ -2,19 +2,7 @@ #include #include #include -#include "c99.h" -#include "name.h" -#include "fail.h" -#include "types.h" -#include "mem.h" -#include "gs_defs.h" -#include "comm.h" -#include "gs.h" -#include "crystal.h" -#include "sort.h" -#include "sarray_sort.h" -#include "sarray_transfer.h" - +#include "gslib.h" #include "crs_hypre.h" #ifdef HYPRE diff --git a/core/fcrs.c b/core/fcrs.c index bc39c6118..66d118b22 100644 --- a/core/fcrs.c +++ b/core/fcrs.c @@ -2,16 +2,11 @@ #include #include #include -#include "c99.h" -#include "name.h" -#include "fail.h" -#include "types.h" -#include "mem.h" -#include "comm.h" +#include "gslib.h" #include "crs.h" /*-------------------------------------------------------------------------- - FORTRAN Interface to coarse solver + FORTRAN wrapper interface to coarse solver --------------------------------------------------------------------------*/ #undef crs_xxt_setup diff --git a/core/hpf.f b/core/hpf.f index bc468df9f..e343506a2 100644 --- a/core/hpf.f +++ b/core/hpf.f @@ -1,20 +1,5 @@ -cc%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -cc -cc Author: Prabal Negi -cc Email : negi@mech.kth.se -cc Description: High pass filtering for stabilization of SEM -cc Last Modified: 27/09/2017 -cc -cc Note: 'implicit none' have been commented out from all routines -cc to maintain backward compatibility. -cc -cc%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -cc---------------------------------------------------------------------- -c Main interface for high-pass filter subroutine MAKE_HPF -c implicit none - include 'SIZE' include 'SOLN' include 'INPUT' ! param(110),(111) @@ -55,7 +40,6 @@ subroutine MAKE_HPF hpf_chi = -1.0*abs(param(103)) c Boyd transform to preserve element boundary values is c linearly unstable when used as forcing. -c keep parameter as false unless you know what you are doing. hpf_ifboyd = .false. nel = nelv diff --git a/core/makefile.template b/core/makefile.template index ff1fcf168..ce41f9f2c 100644 --- a/core/makefile.template +++ b/core/makefile.template @@ -52,7 +52,7 @@ papi.o nek_in_situ.o \ reader_rea.o reader_par.o reader_re2.o \ finiparser.o iniparser.o dictionary.o \ hpf.o \ -sparse_cholesky.o fcrs.o crs_xxt.o crs_amg.o \ +fcrs.o crs_xxt.o crs_amg.o \ fem_amg_preco.o crs_hypre.o \ partitioner.o ################################################################################ @@ -128,7 +128,7 @@ usrfile: @env CASENAME=$(CASENAME) PPS="$(PPS)" $S/mkuserfile nek5000: lib usrfile - $(FC) $(FL2) -o ${BINNAME} $S/drive.f $(CASEDIR)/${CASENAME}.f $(VISITNEK_INCLUDES) obj/${LIBNAME} $(USR_LFLAGS) $(LDFLAGS) + $(FC) $(FL2) -o ${BINNAME} $S/drive.f $(CASEDIR)/${CASENAME}.f $(VISITNEK_INCLUDES) ${LIBNAME} $(USR_LFLAGS) $(LDFLAGS) @if test -f ${BINNAME}; then \ echo ""; \ echo "$(NEK_WARN)"; \ @@ -142,18 +142,18 @@ nek5000: lib usrfile echo -e "\033[1;31;38m" "ERROR: Compilation failed!"; \ echo -e "\033[0m"; \ fi - @rm -f *.i + @rm -f ${LIBNAME} *.i ifeq ($(MPI),0) @rm -rf $S/mpif.h endif lib: objdir $(NOBJS) - @$(AR) cru obj/${LIBNAME} $(NOBJS) - @ranlib obj/${LIBNAME} + @$(AR) cru ${LIBNAME} $(NOBJS) + @ranlib ${LIBNAME} clean: @echo "cleaning Nek5000 ..." - @rm -rf ${CASENAME}.f obj ${BINNAME} + @rm -rf ${LIBNAME} ${CASENAME}.f obj ${BINNAME} ifeq ($(MPI),0) @rm -rf $S/mpif.h endif @@ -255,7 +255,6 @@ $(OBJDIR)/byte.o :$S/byte.c; $(CC) -c $(cFL2) $< -o $@ $(OBJDIR)/chelpers.o :$S/chelpers.c; $(CC) -c $(cFL2) $< -o $@ $(OBJDIR)/fcrs.o :$S/fcrs.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $(HYPRE_IFLAGS) $< -o $@ $(OBJDIR)/crs_xxt.o :$S/crs_xxt.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $< -o $@ -$(OBJDIR)/sparse_cholesky.o :$S/sparse_cholesky.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $< -o $@ $(OBJDIR)/crs_amg.o :$S/crs_amg.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $< -o $@ $(OBJDIR)/fem_amg_preco.o :$S/experimental/fem_amg_preco.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $(HYPRE_IFLAGS) $< -o $@ $(OBJDIR)/crs_hypre.o :$S/experimental/crs_hypre.c; $(CC) -c $(cFL2) $(GSLIB_IFLAGS) $(HYPRE_IFLAGS) $< -o $@ diff --git a/core/makenek.inc b/core/makenek.inc index 38c6ac5f4..74064c8b3 100644 --- a/core/makenek.inc +++ b/core/makenek.inc @@ -246,6 +246,8 @@ esac rm -f $CASENAME.f nek5000 2>/dev/null rm -f ./obj/subuser.o 2>/dev/null +LONGLONGINT=1 + # Check if the compiler adds an underscore to external functions UNDERSCORE=0 cat > test_underscore.f << _ACEOF @@ -283,6 +285,10 @@ if [ $UNDERSCORE -ne 0 ]; then PPLIST="${PPLIST} UNDERSCORE" fi +if [ $LONGLONGINT -ne 0 ]; then + PPLIST="${PPLIST} GLOBAL_LONG_LONG" +fi + if [ $PROFILING -ne 0 ]; then if [ $MPI -ne 0 ]; then PPLIST="${PPLIST} TIMER" @@ -351,14 +357,10 @@ do done # gslib build options -GSLIB_PREFIX="gslib_" -GSLIB_FPREFIX="fgslib_" GSLIB_OPT+=" DESTDIR=.." GSLIB_OPT+=" MPI=$MPI MPIIO=$MPIIO" GSLIB_OPT+=" ADDUS=$UNDERSCORE USREXIT=0 BLAS=2" -GSLIB_OPT+=" PREFIX=$GSLIB_PREFIX FPREFIX=$GSLIB_FPREFIX" -GSLIB_IFLAGS="-DPREFIX=$GSLIB_PREFIX -DFPREFIX=$GSLIB_FPREFIX -DGLOBAL_LONG_LONG" -GSLIB_IFLAGS+=" -I$SOURCE_ROOT_GSLIB/include" +GSLIB_IFLAGS=" -I$SOURCE_ROOT_GSLIB/include" USR_LFLAGS+=" -L$SOURCE_ROOT_GSLIB/lib -lgs" # tweak makefile template diff --git a/core/multimesh.f b/core/multimesh.f index ffa4b577b..56791f013 100644 --- a/core/multimesh.f +++ b/core/multimesh.f @@ -38,9 +38,6 @@ subroutine neknek_setup integer nfld_neknek common /inbc/ nfld_neknek - call fix_geom - call geom_reset(1) ! recompute Jacobians, etc. - if (icalld.eq.0.and.nid.eq.0) write(6,*) 'setup neknek' if (nsessmax.eq.1) @@ -49,23 +46,25 @@ subroutine neknek_setup call setup_neknek_wts if (icalld.eq.0) then - nfld_neknek = ldim+nfield - call nekneksanchk + ! just in case we call setup from usrdat2 + call fix_geom + call geom_reset(1) + call set_intflag call neknekmv if (nid.eq.0) write(6,*) 'session id:', idsess if (nid.eq.0) write(6,*) 'extrapolation order:', ninter if (nid.eq.0) write(6,*) 'nfld_neknek:', nfld_neknek - endif - nfld_min = iglmin_ms(nfld_neknek,1) - nfld_max = iglmax_ms(nfld_neknek,1) - if (nfld_min .ne. nfld_max) then - nfld_neknek = nfld_min - if (nid.eq.0) write(6,*) - $ 'WARNING: reset nfld_neknek to ', nfld_neknek + nfld_min = iglmin_ms(nfld_neknek,1) + nfld_max = iglmax_ms(nfld_neknek,1) + if (nfld_min .ne. nfld_max) then + nfld_neknek = nfld_min + if (nid.eq.0) write(6,*) + $ 'WARNING: reset nfld_neknek to ', nfld_neknek + endif endif - + c Figure out the displacement for the first mesh call setup_int_neknek(dxf,dyf,dzf) !sets up interpolation for 2 meshes diff --git a/core/partitioner.c b/core/partitioner.c index 7f5e6ac9a..2948b1d0f 100644 --- a/core/partitioner.c +++ b/core/partitioner.c @@ -157,7 +157,7 @@ int parMETIS_partMesh(int *part, long long *vl, int nel, int nv, int *opt, comm_ #define fpartMesh FORTRAN_UNPREFIXED(fpartmesh,FPARTMESH) void fpartMesh(long long *el, long long *vl, const int *lelt, int *nell, - const int *nve, comm_ext *fcomm, int *rtval) + const int *nve, int *fcomm, int *rtval) { struct comm comm; struct crystal cr; diff --git a/core/sparse_cholesky.c b/core/sparse_cholesky.c deleted file mode 100644 index 4098b020a..000000000 --- a/core/sparse_cholesky.c +++ /dev/null @@ -1,175 +0,0 @@ -#include -#include -#include -#include -#include "c99.h" -#include "name.h" -#include "fail.h" -#include "types.h" -#include "mem.h" -#include "sort.h" - -#define sparse_cholesky_factor PREFIXED_NAME(sparse_cholesky_factor) -#define sparse_cholesky_solve PREFIXED_NAME(sparse_cholesky_solve ) -#define sparse_cholesky_free PREFIXED_NAME(sparse_cholesky_free ) - -/* factors: L is in CSR format - D is a diagonal matrix stored as a vector - actual factorization is: - - -1 T - A = (I-L) D (I-L) - - -1 -T -1 - A = (I-L) D (I-L) - - (triangular factor is unit diagonal; the diagonal is not stored) -*/ -struct sparse_cholesky { - uint n, *Lrp, *Lj; - double *L, *D; -}; - -/* - symbolic factorization: finds the sparsity structure of L - - uses the concept of elimination tree: - the parent of node j is node i when L(i,j) is the first - non-zero in column j below the diagonal (i>j) - L's structure is discovered row-by-row; the first time - an entry in column j is set, it must be the parent - - the nonzeros in L are the nonzeros in A + paths up the elimination tree - - linear in the number of nonzeros of L -*/ -static void factor_symbolic(uint n, const uint *Arp, const uint *Aj, - struct sparse_cholesky *out, buffer *buf) -{ - uint *visit = tmalloc(uint,2*n), *parent = visit+n; - uint *Lrp, *Lj; - uint i,nz=0; - - out->n=n; - - for(i=0;i=i) break; - for(;visit[j]!=i;j=parent[j]) { - ++nz, visit[j]=i; - if(parent[j]==n) { parent[j]=i; break; } - } - } - } - - Lrp=out->Lrp=tmalloc(uint,n+1+nz); - Lj =out->Lj =Lrp+n+1; - - Lrp[0]=0; - for(i=0;i=i) break; - for(;visit[j]!=i;j=parent[j]) Ljr[count++]=j, visit[j]=i; - } - sortv(Ljr, Ljr,count,sizeof(uint), buf); - Lrp[i+1]=Lrp[i]+count; - } - free(visit); -} - -/* - numeric factorization: - - L is built row-by-row, using: ( ' indicates transpose ) - - - [ A r ] = [ (I-L) ] [ D^(-1) ] [ (I-L)' -s ] - [ r' a ] [ -s' 1 ] [ 1/d ] [ 1 ] - - = [ A (I-L) D^(-1) (-s) ] - [ r' s' D^(-1) s + 1/d ] - - so, if r' is the next row of A, up to but excluding the diagonal, - then the next row of L, s', obeys - - r = - (I-L) D^(-1) s - - let y = (I-L)^(-1) (-r) - then s = D y, and d = 1/(s' y) - -*/ -static void factor_numeric(uint n, const uint *Arp, const uint *Aj, - const double *A, - struct sparse_cholesky *out, - uint *visit, double *y) -{ - const uint *Lrp=out->Lrp, *Lj=out->Lj; - double *D, *L; - uint i; - - D=out->D=tmalloc(double,n+Lrp[n]); - L=out->L=D+n; - - for(i=0;i=i) { if(j==i) a=A[p]; break; } - y[j]=-A[p]; - } - for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) { - uint q,qe,j=Lj[p]; double lij,yj=y[j]; - for(q=Lrp[j],qe=Lrp[j+1];q!=qe;++q) { - uint k=Lj[q]; if(visit[k]==i) yj+=L[q]*y[k]; - } - y[j]=yj; - L[p]=lij=D[j]*yj; - a-=yj*lij; - } - D[i]=1/a; - } -} - -/* x = A^(-1) b; works when x and b alias */ -void sparse_cholesky_solve( - double *x, const struct sparse_cholesky *fac, double *b) -{ - const uint n=fac->n, *Lrp=fac->Lrp, *Lj=fac->Lj; - const double *L=fac->L, *D=fac->D; - uint i, p,pe; - for(i=0;iptr,n_uints_as_dbls+(double*)buf->ptr); -} - -void sparse_cholesky_free(struct sparse_cholesky *fac) -{ - free(fac->Lrp); fac->Lj=fac->Lrp=0; - free(fac->D); fac->L =fac->D =0; -} - diff --git a/core/sparse_cholesky.h b/core/sparse_cholesky.h deleted file mode 100644 index f31cd8a7e..000000000 --- a/core/sparse_cholesky.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef SPARSE_CHOLESKY_H -#define SPARSE_CHOLESKY_H - -#if !defined(TYPES_H) || !defined(MEM_H) -#warning "sparse_cholesky.h" requires "types.h" and "mem.h" -#endif - -#define sparse_cholesky_factor PREFIXED_NAME(sparse_cholesky_factor) -#define sparse_cholesky_solve PREFIXED_NAME(sparse_cholesky_solve ) -#define sparse_cholesky_free PREFIXED_NAME(sparse_cholesky_free ) - -struct sparse_cholesky { - uint n, *Lrp, *Lj; - double *L, *D; -}; - -/* input data is the usual CSR - matrix is n by n - Arp has n+1 elements - elements of row i are A [Arp[i]], ..., A [Arp[i+1]-1] - in columns Aj[Arp[i]], ..., Aj[Arp[i+1]-1] -*/ -void sparse_cholesky_factor(uint n, const uint *Arp, const uint *Aj, - const double *A, - struct sparse_cholesky *out, buffer *buf); - -/* x = A^(-1) b; works when x and b alias */ -void sparse_cholesky_solve( - double *x, const struct sparse_cholesky *fac, double *b); - -void sparse_cholesky_free(struct sparse_cholesky *fac); - -#endif - diff --git a/examples b/examples index 1d78a5e05..de98d7cb5 160000 --- a/examples +++ b/examples @@ -1 +1 @@ -Subproject commit 1d78a5e0549e9825ded46e2273c067efe8d5b319 +Subproject commit de98d7cb5e2c88affcb376ba6d3baacf55d633cd