Skip to content

Commit

Permalink
NAG Fortran compiler (#33)
Browse files Browse the repository at this point in the history
* add makefile for NAG compiler and tweak some for the compilation

* remove added files by mistake

* fixing make.incs and include path for nag compiler

---------

Co-authored-by: KosukeSugita <[email protected]>
Co-authored-by: Manas Rachh <[email protected]>
Co-authored-by: Manas Rachh <[email protected]>
  • Loading branch information
4 people authored Feb 26, 2024
1 parent 6fcc6da commit 71932b4
Show file tree
Hide file tree
Showing 30 changed files with 188 additions and 91 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ lib/*
*.log
*.sh
sbatch*
make.inc
2 changes: 1 addition & 1 deletion docs/fortran-c.rst
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ and the associated pressure
Here $x_{j}$ are the source locations,
$\sigma_{j}$ are the Stokeslet densities,
$\nu_{j}$ are the stresslet orientation vectors, $\mu_{j}$
are the stresslet densities, and rhw xollwxrion of $x$
are the stresslet densities, and the locations $x$
at which the velocity and its gradient are evaluated are referred to
as the evaluation points.

Expand Down
2 changes: 1 addition & 1 deletion make.inc.icc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CC=icc
CXX=icpc
FC=ifort
FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl
FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -w
LIBS=-lm
CLIBS = -lm -ldl -lifcore

Expand Down
6 changes: 2 additions & 4 deletions make.inc.macos.gnu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# makefile overrides
# OS: macOS
# Compiler: gfortran X.X
# Compiler: gfortran X.X/Clang
# OpenMP: enabled
#

Expand All @@ -22,8 +22,6 @@ OMPLIBS = -lgomp
# MATLAB interface:
FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`)
MFLAGS +=-L${FDIR}
MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
#LIBS = -lm -lstdc++.6
#MEXLIBS= -lm -lstdc++.6 -lgfortran -ldl
MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex


8 changes: 7 additions & 1 deletion make.inc.macos.intel
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
CC=icc
CXX=icpc
FC=ifort
FFLAGS= -fPIC -O3 -march=native -funroll-loops -qmkl

FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -std=legacy -w
LIBS=
#CLIBS = -lm -ldl -lifcore
CLIBS = -lm -ldl
Expand All @@ -28,4 +29,9 @@ endif
OMPFLAGS = -qopenmp
OMPLIBS = -qopenmp

# MATLAB interface:
FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`)
MFLAGS +=-L${FDIR}
MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex


30 changes: 30 additions & 0 deletions make.inc.macos.nag
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# make.inc for NAG Fortran compiler
# Online documentation: https://www.nag.com/nagware/np/r71_doc/manual/compiler_2_4.html#OPTIONS

FC=nagfor

# The path of libraries by NAG compiler
LIB_NAG = /usr/local/lib/NAG_Fortran

# Brief descriptions of specified options below:
# -PIC: produce position-independent code
# -O2: optimization at a normal level
# -Ounroll=2: the depth of loo-unrolling
# -f90_sign: use the Fortran 77/90 version of the SIGN intrinsic instead of the Fortran 95 one
# -dcfuns: enable recognition of non-standard double precision complex intrinsic functions.
# -dusty: allows the compilation and execution of legacy software.
# -w=x77: suppresses extension warnings for obsolete but common extensions to Fortran 77.
# -w=unreffed: suppresses warning messages about variables set but never referenced.
# -w=unused: suppresses warning messages about unused entities.
# -ieee=full: set the mode of IEEE arithmetic operation according to full mode.

# Main compile command for NAG Fortran compiler
FFLAGS = -PIC -O2 -Ounroll=1 -f90_sign -dcfuns -dusty -w=obs -w=x77 -w=unreffed -w=unused -ieee=full

# Flags overwritten in makefile
OMPFLAGS = -openmp
# OMPLIBS = -lf71omp64 -L$(LIB_NAG)
OMPLIBS = -lf71omp64 -lf71rts -L$(LIB_NAG)
LIBS = -lf71rts -L$(LIB_NAG)
CLIBS = -lm -ldl -L$(LIB_NAG)
FFLAGS_DYN = -PIC
2 changes: 1 addition & 1 deletion make.inc.windows.mingw
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# OpenMP: default enabled unless specified
#

FFLAGS= -fPIC -O3 -funroll-loops -std=legacy
FFLAGS= -fPIC -O3 -funroll-loops -std=legacy -w

DYNAMICLIB = $(LIBNAME).dll
LIMPLIB = $(LIBNAME)_dll.lib
Expand Down
12 changes: 7 additions & 5 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ FC=gfortran


# set compiler flags for c and fortran
FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy
FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy -w
FFLAGS_DYN= -shared -fPIC
CFLAGS= -fPIC -O3 -march=native -funroll-loops -std=c99
CXXFLAGS= -std=c++11 -DSCTL_PROFILE=-1 -fPIC -O3 -march=native -funroll-loops

Expand Down Expand Up @@ -86,6 +87,7 @@ endif
# vectorized kernel directory
SRCDIR = ./vec-kernels/src
INCDIR = ./vec-kernels/include
FINCDIR = ./src/Helmholtz
LIBDIR = lib-static

# objects to compile
Expand Down Expand Up @@ -192,10 +194,10 @@ usage:
$(CXX) -c $(CXXFLAGS) $< -o $@
%.o: %.c %.h
$(CC) -c $(CFLAGS) $< -o $@
%.o: %.f %.h
$(FC) -c $(FFLAGS) $< -o $@
%.o: %.f
$(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@
%.o: %.f90
$(FC) -c $(FFLAGS) $< -o $@
$(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@

# build the library...
lib: $(STATICLIB) $(DYNAMICLIB)
Expand Down Expand Up @@ -224,7 +226,7 @@ $(STATICLIB): $(OBJS)
ar rcs $(STATICLIB) $(OBJS)
mv $(STATICLIB) lib-static/
$(DYNAMICLIB): $(OBJS)
$(FC) -shared -fPIC $(OBJS) -o $(DYNAMICLIB) $(DYLIBS)
$(FC) $(FFLAGS_DYN) $(OBJS) -o $(DYNAMICLIB) $(DYLIBS)
mv $(DYNAMICLIB) lib/
[ ! -f $(LIMPLIB) ] || mv $(LIMPLIB) lib/

Expand Down
2 changes: 1 addition & 1 deletion src/Common/fmmcommon.f
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ subroutine ireorderi(ndim,n,arr,arrsort,iarr)
c
subroutine drescale(n,a,r)
implicit none
real *8 a(n),r
integer i,n
real *8 a(n),r

C$OMP PARALLEL DO DEFAULT(SHARED)
do i=1,n
Expand Down
2 changes: 1 addition & 1 deletion src/Common/tree_routs3d.f
Original file line number Diff line number Diff line change
Expand Up @@ -1125,11 +1125,11 @@ subroutine getlist4pwdirtest(dir,censrc,centrg,boxsize)
subroutine subdividebox(pos,npts,center,boxsize,
1 isorted,iboxfl,subcenters)
implicit none
integer npts
double precision pos(3,npts)
double precision center(3)
double precision subcenters(3,8)
double precision boxsize
integer npts
integer isorted(*)
integer iboxfl(2,8)

Expand Down
9 changes: 6 additions & 3 deletions src/Common/yrecursion.f
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,8 @@ subroutine zylgndrbr(nmax, z, y)
c branch cut at (0,+i), select the lower branch
c of complex square root
c
if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
c if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
if( dimag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
ccc call prin2('in zylgndrbr, u=*', -u, 2)
ccc call prin2('in zylgndrbr, 1-z^2=*', 1-z*z, 2)
c
Expand Down Expand Up @@ -1498,8 +1499,10 @@ subroutine zylgndrsc(nmax, z,scale, ysc)
c
ztmp = 1-z*z
u=-sqrt(ztmp)
if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
if(imag(u).lt.0) u = dconjg(u)
c if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
c if(imag(u).lt.0) u = dconjg(u)
if(abs(dimag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
if(dimag(u).lt.0) u = dconjg(u)
endif
ysc(0,0)=1
do m=0, nmax
Expand Down
14 changes: 7 additions & 7 deletions src/Helmholtz/hfmm3d.f
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ subroutine hfmm3dmain(nd,eps,zk,
double complex jsort(nd,0:ntj,-ntj:ntj,nexpc)


integer nboxes
integer *8 iaddr(2,nboxes), lmptot
double precision rmlexp(lmptot)

Expand All @@ -575,7 +576,6 @@ subroutine hfmm3dmain(nd,eps,zk,
integer nterms(0:nlevels)
integer *8 ipointer(8),ltree
integer itree(ltree)
integer nboxes
double precision rscales(0:nlevels)
double precision boxsize(0:nlevels)
integer isrcse(2,nboxes),itargse(2,nboxes),iexpcse(2,nboxes)
Expand Down Expand Up @@ -752,7 +752,7 @@ subroutine hfmm3dmain(nd,eps,zk,

zkiupbound = 12*pi
zkrupbound = 16*pi
zi = imag(zk)
zi = dimag(zk)

ilevcutoff = -1

Expand Down Expand Up @@ -1054,7 +1054,7 @@ subroutine hfmm3dmain(nd,eps,zk,
allocate(iboxlexp(nd*(nterms(ilev)+1)*
1 (2*nterms(ilev)+1),8,nthd))
zk2 = zk*boxsize(ilev)
if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and.
if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and.
1 ilev.gt.ilevcutoff) then
c get new pw quadrature

Expand Down Expand Up @@ -1562,7 +1562,7 @@ subroutine hfmm3dmain(nd,eps,zk,
deallocate(pgboxwexp)


else if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
else if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
1 and.ilev.gt.ilevcutoff) then
nquad2 = nterms(ilev)*2.2
if(ifprint.ge.1) print *, "In point and shoot regime"
Expand Down Expand Up @@ -1719,7 +1719,7 @@ subroutine hfmm3dmain(nd,eps,zk,
if(ifcharge.eq.1.and.ifdipole.eq.0) then
do ilev=1,nlevels
zk2 = zk*boxsize(ilev)
if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
1 and.ilev.gt.ilevcutoff) then

C$OMP PARALLEL DO DEFAULT(SHARED)
Expand Down Expand Up @@ -1754,7 +1754,7 @@ subroutine hfmm3dmain(nd,eps,zk,
if(ifcharge.eq.0.and.ifdipole.eq.1) then
do ilev=1,nlevels
zk2 = zk*boxsize(ilev)
if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
1 and.ilev.gt.ilevcutoff) then

C$OMP PARALLEL DO DEFAULT(SHARED)
Expand Down Expand Up @@ -1789,7 +1789,7 @@ subroutine hfmm3dmain(nd,eps,zk,
if(ifcharge.eq.1.and.ifdipole.eq.1) then
do ilev=1,nlevels
zk2 = zk*boxsize(ilev)
if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
1 and.ilev.gt.ilevcutoff) then

C$OMP PARALLEL DO DEFAULT(SHARED)
Expand Down
4 changes: 2 additions & 2 deletions src/Helmholtz/hfmm3d_memest.f
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge,

zkiupbound = 12*pi
zkrupbound = 16*pi
zi = imag(zkfmm)
zi = dimag(zkfmm)

ilevcutoff = -1

Expand All @@ -391,7 +391,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge,

do ilev=2,nlevels
zk2 = zkfmm*boxsize(ilev)
if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and.
if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and.
1 ilev.gt.ilevcutoff) then

ier = 0
Expand Down
5 changes: 2 additions & 3 deletions src/Helmholtz/hfmm3d_mps.f90
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &
integer :: impolesort(nmpole)

! storage stuff for tree and multipole expansions
integer :: lmptemp
integer :: lmptemp,nboxes
integer *8 :: iaddr(2,nboxes), lmptot
double precision :: rmlexp(lmptot)
double precision :: mptemp(lmptemp)
Expand All @@ -406,7 +406,6 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &
integer :: nterms(0:nlevels)
integer *8 :: ipointer(8)
integer :: itree(ltree)
integer :: nboxes
integer :: mnbors,mnlist1, mnlist2,mnlist3,mnlist4
integer :: isrcse(2,nmpole)
integer, allocatable :: nlist1(:),list1(:,:)
Expand Down Expand Up @@ -819,7 +818,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &

! load the necessary quadrature for plane waves
zk2 = zk*boxsize(ilev)
if ( (real(zk2).le.16*pi) .and. (imag(zk2).le.12*pi) &
if ( (real(zk2).le.16*pi) .and. (dimag(zk2).le.12*pi) &
.and. (ifmp .eq. 0) ) then
ier = 0

Expand Down
2 changes: 1 addition & 1 deletion src/Helmholtz/hfmm3dwrap_legacy.f
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,12 @@ subroutine hfmm3dparttarg(ier,iprec,zk,nsource,source,
double complex charge(nsource),dipstr(nsource)
double precision dipvec(3,nsource)

integer ntarg
integer ifpot,iffld,ifpottarg,iffldtarg
double complex pot(nsource),fld(3,nsource)
double complex pottarg(ntarg),fldtarg(3,ntarg)

integer nd,ifpgh,ifpghtarg
integer ntarg
double precision targ(3,ntarg)
double complex, allocatable :: dipvec_in(:,:)
double complex, allocatable :: pottmp(:),gradtmp(:,:)
Expand Down
Loading

0 comments on commit 71932b4

Please sign in to comment.