From b189b6c15816d94b98283bea8efe93140ed8fc69 Mon Sep 17 00:00:00 2001 From: Alex Richert Date: Wed, 31 Jan 2024 14:02:40 -0800 Subject: [PATCH] Replace Numerial Recipes LU-decomp routines with LAPACK ones --- .github/workflows/Intel.yml | 2 +- .github/workflows/Linux.yml | 1 + .github/workflows/Spack.yml | 4 +- .github/workflows/developer.yml | 2 +- CMakeLists.txt | 5 ++ spack/package.py | 1 + src/CMakeLists.txt | 3 +- src/lapack_gen.F | 116 -------------------------------- src/splat.F | 22 ++++-- 9 files changed, 31 insertions(+), 125 deletions(-) delete mode 100644 src/lapack_gen.F diff --git a/.github/workflows/Intel.yml b/.github/workflows/Intel.yml index 22cec949..2c08edeb 100644 --- a/.github/workflows/Intel.yml +++ b/.github/workflows/Intel.yml @@ -37,7 +37,7 @@ jobs: rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt-get update - sudo apt-get install intel-oneapi-openmp intel-oneapi-compiler-fortran-2023.2.1 intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.2.1 + sudo apt-get install intel-oneapi-openmp intel-oneapi-compiler-fortran-2023.2.1 intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.2.1 intel-oneapi-mkl-devel-2023.2.0 echo "source /opt/intel/oneapi/setvars.sh" >> ~/.bash_profile - name: checkout diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 54a38060..e3a3f46a 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -33,6 +33,7 @@ jobs: - name: build run: | + sudo apt install libopenblas-dev cd ip mkdir build cd build diff --git a/.github/workflows/Spack.yml b/.github/workflows/Spack.yml index 6f3558d5..912246d7 100644 --- a/.github/workflows/Spack.yml +++ b/.github/workflows/Spack.yml @@ -33,6 +33,7 @@ jobs: - name: spack-build-and-test run: | + sudo apt install libopenblas-dev git clone -c feature.manyFiles=true https://github.com/spack/spack . spack/share/spack/setup-env.sh spack env create ip-env @@ -42,7 +43,8 @@ jobs: spack add ip@develop%gcc@11 ${{ matrix.variants }} target=x86_64 precision=$(echo ${{ matrix.variants }} | grep -oP " precision=\K[4d8]") if [ "$precision" == "d" ]; then spack add grib-util@develop ; fi - spack external find cmake gmake + spack external find cmake gmake openblas + spack config add "packages:openblas:buildable:false" spack concretize # Run installation and run CTest suite spack install --verbose --fail-fast --test root diff --git a/.github/workflows/developer.yml b/.github/workflows/developer.yml index 2c1ff20e..cd802787 100644 --- a/.github/workflows/developer.yml +++ b/.github/workflows/developer.yml @@ -25,7 +25,7 @@ jobs: - name: Install Dependencies run: | sudo apt-get update - sudo apt-get install doxygen + sudo apt-get install doxygen libopenblas-dev python3 -m pip install gcovr - name: checkout diff --git a/CMakeLists.txt b/CMakeLists.txt index b7803f43..ce09db9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,11 @@ if(OPENMP) find_package(OpenMP REQUIRED COMPONENTS Fortran) endif() +if(CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel|IntelLLVM)$") + set(BLA_VENDOR Intel10_64lp_seq) +endif() +find_package(LAPACK) + # Set compiler flags. if(CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel|IntelLLVM)$") set(CMAKE_Fortran_FLAGS "-g -traceback -assume byterecl -fp-model strict -fpp -auto ${CMAKE_Fortran_FLAGS}") diff --git a/spack/package.py b/spack/package.py index b0b9c844..4854c5c3 100644 --- a/spack/package.py +++ b/spack/package.py @@ -65,6 +65,7 @@ class Ip(CMakePackage): depends_on("sp precision=4", when="@4.1:4 precision=4") depends_on("sp precision=d", when="@4.1:4 precision=d") depends_on("sp precision=8", when="@4.1:4 precision=8") + depends_on("lapack", when="@develop") def cmake_args(self): args = [ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b979c148..a3b0fff9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,7 +14,7 @@ ip_mercator_grid_mod.F90 ip_polar_stereo_grid_mod.F90 ip_rot_equid_cylind_egrid_mod.F90 ip_rot_equid_cylind_grid_mod.F90 ip_constants_mod.F90 ip_grids_mod.F90 ip_grid_factory_mod.F90 ip_interpolators_mod.F90 earth_radius_mod.F90 polfix_mod.F90 -fftpack.F lapack_gen.F ncpus.F spanaly.f spdz2uv.f speps.f spfft1.f spffte.f +fftpack.F ncpus.F spanaly.f spdz2uv.f speps.f spfft1.f spffte.f spfftpt.f splaplac.f splat.F splegend.f sppad.f spsynth.f sptezd.f sptez.f sptezmd.f sptezm.f sptezmv.f sptezv.f sptgpm.f sptgpmv.f sptgps.f sptgpsv.f sptgpt.f sptgptv.f sptrand.f sptran.f sptranf0.f sptranf1.f sptranf.f sptranfv.f @@ -77,6 +77,7 @@ foreach(kind ${kinds}) if(OpenMP_Fortran_FOUND) target_link_libraries(${lib_name} PUBLIC OpenMP::OpenMP_Fortran) endif() + target_link_libraries(${lib_name} PUBLIC LAPACK::LAPACK) list(APPEND LIB_TARGETS ${lib_name}) diff --git a/src/lapack_gen.F b/src/lapack_gen.F deleted file mode 100644 index 36db1e46..00000000 --- a/src/lapack_gen.F +++ /dev/null @@ -1,116 +0,0 @@ -C> @file -C> @brief Two Numerical Recipes routines for matrix inversion From Numerical Recipes. -C> -C> ### Program History Log -C> Date | Programmer | Comments -C> -----|------------|--------- -C> 2012-11-05 | E.Mirvis | separated this generic LU from the splat.F - -C> Solves a system of linear equations, follows call to ludcmp(). -C> -C> @param A -C> @param N -C> @param NP -C> @param INDX -C> @param B - SUBROUTINE LUBKSB(A,N,NP,INDX,B) - REAL A(NP,NP),B(N) - INTEGER INDX(N) - II=0 - DO 12 I=1,N - LL=INDX(I) - SUM=B(LL) - B(LL)=B(I) - IF (II.NE.0)THEN - DO 11 J=II,I-1 - SUM=SUM-A(I,J)*B(J) - 11 CONTINUE - ELSE IF (SUM.NE.0.) THEN - II=I - ENDIF - B(I)=SUM - 12 CONTINUE - DO 14 I=N,1,-1 - SUM=B(I) - IF(I.LT.N)THEN - DO 13 J=I+1,N - SUM=SUM-A(I,J)*B(J) - 13 CONTINUE - ENDIF - B(I)=SUM/A(I,I) - 14 CONTINUE - RETURN - END - -C> Replaces an NxN matrix a with the LU decomposition. -C> -C> @param A -C> @param N -C> @param NP -C> @param INDX - SUBROUTINE LUDCMP(A,N,NP,INDX) -C PARAMETER (NMAX=400,TINY=1.0E-20) - PARAMETER (TINY=1.0E-20) -C==EM==^^^ -C - REAL A(NP,NP),VV(N),D -C REAL A(NP,NP),VV(NMAX),D -C==EM==^^^ - INTEGER INDX(N) - D=1. - DO 12 I=1,N - AAMAX=0. - DO 11 J=1,N - IF (ABS(A(I,J)).GT.AAMAX) AAMAX=ABS(A(I,J)) - 11 CONTINUE - IF (AAMAX.EQ.0.) print *, 'SINGULAR MATRIX.' - VV(I)=1./AAMAX - 12 CONTINUE - DO 19 J=1,N - IF (J.GT.1) THEN - DO 14 I=1,J-1 - SUM=A(I,J) - IF (I.GT.1)THEN - DO 13 K=1,I-1 - SUM=SUM-A(I,K)*A(K,J) - 13 CONTINUE - A(I,J)=SUM - ENDIF - 14 CONTINUE - ENDIF - AAMAX=0. - DO 16 I=J,N - SUM=A(I,J) - IF (J.GT.1)THEN - DO 15 K=1,J-1 - SUM=SUM-A(I,K)*A(K,J) - 15 CONTINUE - A(I,J)=SUM - ENDIF - DUM=VV(I)*ABS(SUM) - IF (DUM.GE.AAMAX) THEN - IMAX=I - AAMAX=DUM - ENDIF - 16 CONTINUE - IF (J.NE.IMAX)THEN - DO 17 K=1,N - DUM=A(IMAX,K) - A(IMAX,K)=A(J,K) - A(J,K)=DUM - 17 CONTINUE - D=-D - VV(IMAX)=VV(J) - ENDIF - INDX(J)=IMAX - IF(J.NE.N)THEN - IF(A(J,J).EQ.0.)A(J,J)=TINY - DUM=1./A(J,J) - DO 18 I=J+1,N - A(I,J)=A(I,J)*DUM - 18 CONTINUE - ENDIF - 19 CONTINUE - IF(A(N,N).EQ.0.)A(N,N)=TINY - RETURN - END diff --git a/src/splat.F b/src/splat.F index 3b27073e..e6269640 100644 --- a/src/splat.F +++ b/src/splat.F @@ -64,7 +64,7 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) $ 146.870307625, 150.011882457, 153.153458019, 156.295034268 / REAL:: DLT,D1=1. REAL AWORK((JMAX+1)/2,((JMAX+1)/2)),BWORK(((JMAX+1)/2)) - INTEGER:: JHE,JHO,J0=0 + INTEGER:: JHE,JHO,J0=0, INFO INTEGER IPVT((JMAX+1)/2) PARAMETER(PI=3.14159265358979,C=(1.-(2./PI)**2)*0.25) C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -133,8 +133,14 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) BWORK(JS)=-D1/(4*(JS-1)**2-1) ENDDO - call ludcmp(awork,jho,jhe,ipvt) - call lubksb(awork,jho,jhe,ipvt,bwork) + ! Call LAPACK routines +#if (LSIZE==4) + CALL SGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL SGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#else + CALL DGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL DGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#endif WLAT(1)=0. DO J=1,JHO @@ -170,8 +176,14 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) BWORK(JS)=-D1/(4*(JS-1)**2-1) ENDDO - call ludcmp(awork,jho,jhe,ipvt,d) - call lubksb(awork,jho,jhe,ipvt,bwork) + ! Call LAPACK routines +#if (LSIZE==4) + CALL SGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL SGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#else + CALL DGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL DGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#endif WLAT(1)=0. DO J=1,JHO