diff --git a/Makefile.in b/Makefile.in index 9b05554f4..0140a468c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -111,7 +111,6 @@ SCOTCH_FLAGS = @FC_DEFINE@USE_SCOTCH $(SCOTCH_INC) @COND_CUDA_TRUE@CUDA = yes @COND_CUDA_FALSE@CUDA = no -# CUDA version 5x & 6x @COND_CUDA5_TRUE@CUDA5 = yes @COND_CUDA5_FALSE@CUDA5 = no @@ -121,12 +120,18 @@ SCOTCH_FLAGS = @FC_DEFINE@USE_SCOTCH $(SCOTCH_INC) @COND_CUDA7_TRUE@CUDA7 = yes @COND_CUDA7_FALSE@CUDA7 = no +@COND_CUDA8_TRUE@CUDA8 = yes +@COND_CUDA8_FALSE@CUDA8 = no + +@COND_CUDA8_TRUE@CUDA9 = yes +@COND_CUDA8_FALSE@CUDA9 = no + # CUDA compilation with linking @COND_CUDA_PLUS_TRUE@CUDA_PLUS = yes @COND_CUDA_PLUS_FALSE@CUDA_PLUS = no # default cuda libraries -# runtime library -lcudart -lcuda and -lcublas needed +# runtime library -lcudart needed, others are optional -lcuda -lcublas CUDA_FLAGS = @CUDA_FLAGS@ CUDA_INC = @CUDA_CPPFLAGS@ -I${SETUP} @@ -144,18 +149,27 @@ CUDA_LINK = @CUDA_LDFLAGS@ @CUDA_LIBS@ -lstdc++ # Kepler (cuda5) : -gencode=arch=compute_35,code=sm_35 # Kepler (cuda6.5,K80): -gencode=arch=compute_37,code=sm_37 # Maxwell (cuda6.5+/cuda7,Quadro K2200): -gencode=arch=compute_50,code=sm_50 +# Pascal (cuda8,P100): -gencode=arch=compute_60,code=sm_60 +# Volta (cuda9,V100): -gencode=arch=compute_70,code=sm_70 GENCODE_20 = -gencode=arch=compute_20,code=\"sm_20,compute_20\" GENCODE_30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\" GENCODE_35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\" GENCODE_37 = -gencode=arch=compute_37,code=\"sm_37\" GENCODE_50 = -gencode=arch=compute_50,code=\"sm_50\" - +GENCODE_60 = -gencode=arch=compute_60,code=\"sm_60,compute_60\" +GENCODE_70 = -gencode=arch=compute_70,code=\"sm_70,compute_70\" + +# cuda preprocessor flag +# CUDA version 9.0 +@COND_CUDA_TRUE@@COND_CUDA8_TRUE@GENCODE = $(GENCODE_70) $(FC_DEFINE)GPU_DEVICE_Volta +# CUDA version 8.0 +@COND_CUDA_TRUE@@COND_CUDA8_TRUE@GENCODE = $(GENCODE_60) $(FC_DEFINE)GPU_DEVICE_Pascal # CUDA version 7.x -@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_50) +@COND_CUDA_TRUE@@COND_CUDA7_TRUE@GENCODE = $(GENCODE_50) $(FC_DEFINE)GPU_DEVICE_Maxwell # CUDA version 6.5 -@COND_CUDA_TRUE@@COND_CUDA6_TRUE@GENCODE = $(GENCODE_37) +@COND_CUDA_TRUE@@COND_CUDA6_TRUE@GENCODE = $(GENCODE_37) $(FC_DEFINE)GPU_DEVICE_K80 # CUDA version 5.x -@COND_CUDA_TRUE@@COND_CUDA5_TRUE@GENCODE = $(GENCODE_35) +@COND_CUDA_TRUE@@COND_CUDA5_TRUE@GENCODE = $(GENCODE_35) $(FC_DEFINE)GPU_DEVICE_K20 # CUDA version 4.x @COND_CUDA_TRUE@@COND_CUDA_PLUS_FALSE@GENCODE = $(GENCODE_20) @@ -292,10 +306,18 @@ help: ####################################### +${SETUP}/version.fh: @GIT_VERSION_DEPS@ + @echo "GEN $@" + @echo "! This file is generated by Make. Do not edit this file!" > $@ + @echo "character(len=*), parameter :: git_version = \"$$(cd ${S_TOP} && git describe --tags)\"" >> $@ + +####################################### + # Get dependencies and rules for building stuff include $(patsubst %, ${S_TOP}/src/%/rules.mk, $(SUBDIRS)) + ####################################### ## @@ -323,6 +345,6 @@ $(foreach dir, $(SUBDIRS), $(eval $(call shortcut,$(dir)))) test : tests # Other old shortcuts -mesh : $E/xmeshfem2D -spec : $E/xspecfem2D +mesh: $E/xmeshfem2D +spec: $E/xspecfem2D .PHONY: mesh spec diff --git a/configure b/configure index c1945dd87..a507ad4a5 100755 --- a/configure +++ b/configure @@ -623,6 +623,7 @@ ac_includes_default="\ ac_subst_vars='LTLIBOBJS LIBOBJS +GIT_VERSION_DEPS CUDA_LIBS CUDA_LDFLAGS CUDA_CPPFLAGS @@ -674,6 +675,7 @@ CPPFLAGS CFLAGS CC FCLIBS +srcdir FCENV OBJEXT EXEEXT @@ -683,6 +685,8 @@ FCFLAGS FC COND_CUDA_PLUS_FALSE COND_CUDA_PLUS_TRUE +COND_CUDA9_FALSE +COND_CUDA9_TRUE COND_CUDA8_FALSE COND_CUDA8_TRUE COND_CUDA7_FALSE @@ -2430,6 +2434,140 @@ ac_config_headers="$ac_config_headers setup/config.h" +# -*- Autoconf -*- + +## --------------------------------------------------------- ## +## Autoconf macros for functions missing in older versions. ## +## --------------------------------------------------------- ## + +# Missing in autoconf < 2.60 + + + +# +# The following two macros are from autoconf 2.68 (which is still new). +# + +# AC_FC_PP_SRCEXT(EXT, [ACTION-IF-SUCCESS], [ACTION-IF-FAILURE]) +# -------------------------------------------------------------- +# Like AC_FC_SRCEXT, set the source-code extension used in Fortran (FC) tests +# to EXT (which defaults to f). Also, look for any necessary additional +# FCFLAGS needed to allow this extension for preprocessed Fortran, and store +# them in the output variable FCFLAGS_ (e.g. FCFLAGS_f90 for EXT=f90). +# If successful, call ACTION-IF-SUCCESS. If unable to compile preprocessed +# source code with EXT, call ACTION-IF-FAILURE, which defaults to failing with +# an error message. +# +# Some compilers allow preprocessing with either a Fortran preprocessor or +# with the C preprocessor (cpp). Prefer the Fortran preprocessor, to deal +# correctly with continuation lines, `//' (not a comment), and preserve white +# space (for fixed form). +# +# (The flags for the current source-code extension, if any, are stored in +# $ac_fcflags_srcext and used automatically in subsequent autoconf tests.) +# +# For ordinary extensions like f90, etcetera, the modified FCFLAGS +# are needed for IBM's xlf*. Also, for Intel's ifort compiler, the +# $FCFLAGS_ variable *must* go immediately before the source file on the +# command line, unlike other $FCFLAGS. Ugh. +# +# Known extensions that enable preprocessing by default, and flags to force it: +# GNU: .F .F90 .F95 .F03 .F08, -cpp for most others, +# -x f77-cpp-input for .f77 .F77; -x f95-cpp-input for gfortran < 4.4 +# SGI: .F .F90, -ftpp or -cpp for .f .f90, -E write preproc to stdout +# -macro_expand enable macro expansion everywhere (with -ftpp) +# -P preproc only, save in .i, no #line's +# SUN: .F .F95, -fpp for others; -xpp={fpp,cpp} for preprocessor selection +# -F preprocess only (save in lowercase extension) +# IBM: .F .F77 .F90 .F95 .F03, -qsuffix=cpp=EXT for extension .EXT to invoke cpp +# -WF,-qnofpp -WF,-qfpp=comment:linecont:nocomment:nolinecont +# -WF,-qlanglvl=classic or not -qnoescape (trigraph problems) +# -d no #line in output, -qnoobject for preprocessing only (output in .f) +# -q{no,}ppsuborigarg substitute original macro args before expansion +# HP: .F, +cpp={yes|no|default} use cpp, -cpp, +cpp_keep save in .i/.i90 +# PGI: -Mpreprocess +# Absoft: .F .FOR .F90 .F95, -cpp for others +# Cray: .F .F90 .FTN, -e Z for others; -F enable macro expansion everywhere +# Intel: .F .F90, -fpp for others, but except for .f and .f90, -Tf may also be +# needed right before the source file name +# PathScale: .F .F90 .F95, -ftpp or -cpp for .f .f90 .f95 +# -macro_expand for expansion everywhere, -P for no #line in output +# Lahey: .F .FOR .F90 .F95, -Cpp +# NAGWare: .F .F90 .F95, .ff .ff90 .ff95 (new), -fpp for others +# Compaq/Tru64: .F .F90, -cpp, -P keep .i file, -P keep .i file +# f2c: .F, -cpp +# g95: .F .FOR .F90 .F95 .F03, -cpp -no-cpp, -E for stdout + + +# AC_FC_PP_DEFINE([ACTION-IF-SUCCESS], [ACTION-IF-FAILURE = FAILURE]) +# ------------------------------------------------------------------- +# Find a flag to specify defines for preprocessed Fortran. Not all +# Fortran compilers use -D. Substitute FC_DEFINE with the result and +# call ACTION-IF-SUCCESS (defaults to nothing) if successful, and +# ACTION-IF-FAILURE (defaults to failing with an error message) if not. +# +# Known flags: +# IBM: -WF,-D +# Lahey/Fujitsu: -Wp,-D older versions??? +# f2c: -D or -Wc,-D +# others: -D + + + +# +# The following three macros are from autoconf 2.69 (which is quite new). +# + +# AC_FC_MODULE_EXTENSION +# ---------------------- +# Find the Fortran 90 module file extension. The module extension is stored +# in the variable FC_MODEXT and empty if it cannot be determined. The result +# or "unknown" is cached in the cache variable ac_cv_fc_module_ext. + + + +# AC_FC_MODULE_FLAG([ACTION-IF-SUCCESS], [ACTION-IF-FAILURE = FAILURE]) +# --------------------------------------------------------------------- +# Find a flag to include Fortran 90 modules from another directory. +# If successful, run ACTION-IF-SUCCESS (defaults to nothing), otherwise +# run ACTION-IF-FAILURE (defaults to failing with an error message). +# The module flag is cached in the ac_cv_fc_module_flag variable. +# It may contain significant trailing whitespace. +# +# Known flags: +# gfortran: -Idir, -I dir (-M dir, -Mdir (deprecated), -Jdir for writing) +# g95: -I dir (-fmod=dir for writing) +# SUN: -Mdir, -M dir (-moddir=dir for writing; +# -Idir for includes is also searched) +# HP: -Idir, -I dir (+moddir=dir for writing) +# IBM: -Idir (-qmoddir=dir for writing) +# Intel: -Idir -I dir (-mod dir for writing) +# Absoft: -pdir +# Lahey: -mod dir +# Cray: -module dir, -p dir (-J dir for writing) +# -e m is needed to enable writing .mod files at all +# Compaq: -Idir +# NAGWare: -I dir +# PathScale: -I dir (but -module dir is looked at first) +# Portland: -module dir (first -module also names dir for writing) +# Fujitsu: -Am -Idir (-Mdir for writing is searched first, then '.', then -I) +# (-Am indicates how module information is saved) + + + +# AC_FC_MODULE_OUTPUT_FLAG([ACTION-IF-SUCCESS], [ACTION-IF-FAILURE = FAILURE]) +# ---------------------------------------------------------------------------- +# Find a flag to write Fortran 90 module information to another directory. +# If successful, run ACTION-IF-SUCCESS (defaults to nothing), otherwise +# run ACTION-IF-FAILURE (defaults to failing with an error message). +# The module flag is cached in the ac_cv_fc_module_output_flag variable. +# It may contain significant trailing whitespace. +# +# For known flags, see the documentation of AC_FC_MODULE_FLAG above. + + + + ############################################################ @@ -2550,7 +2688,7 @@ else want_double_precision=no fi -if test "$want_double_precision" = no; then +if test x"$want_double_precision" = xno; then CUSTOM_REAL=SIZE_REAL CUSTOM_MPI_TYPE=MPI_REAL else @@ -2582,7 +2720,6 @@ fi export COND_DEBUG_FALSE export COND_DEBUG_TRUE - ### ### MPI ### @@ -2616,7 +2753,7 @@ else want_cuda=no fi - if test "$want_cuda" != no; then + if test x"$want_cuda" != xno; then COND_CUDA_TRUE= COND_CUDA_FALSE='#' else @@ -2624,7 +2761,7 @@ else COND_CUDA_FALSE= fi - if test "$want_cuda" = cuda5; then + if test x"$want_cuda" = xcuda5; then COND_CUDA5_TRUE= COND_CUDA5_FALSE='#' else @@ -2632,7 +2769,7 @@ else COND_CUDA5_FALSE= fi - if test "$want_cuda" = cuda6; then + if test x"$want_cuda" = xcuda6; then COND_CUDA6_TRUE= COND_CUDA6_FALSE='#' else @@ -2640,7 +2777,7 @@ else COND_CUDA6_FALSE= fi - if test "$want_cuda" = cuda7; then + if test x"$want_cuda" = xcuda7; then COND_CUDA7_TRUE= COND_CUDA7_FALSE='#' else @@ -2648,7 +2785,7 @@ else COND_CUDA7_FALSE= fi - if test "$want_cuda" = cuda8; then + if test x"$want_cuda" = xcuda8; then COND_CUDA8_TRUE= COND_CUDA8_FALSE='#' else @@ -2656,8 +2793,21 @@ else COND_CUDA8_FALSE= fi -# cuda linking for cuda 5x and 6x and 7x and 8x - if test "$want_cuda" = cuda5 -o "$want_cuda" = cuda6 -o "$want_cuda" = cuda7 -o "$want_cuda" = cuda8; then + if test x"$want_cuda" = xcuda9; then + COND_CUDA9_TRUE= + COND_CUDA9_FALSE='#' +else + COND_CUDA9_TRUE='#' + COND_CUDA9_FALSE= +fi + +# cuda linking for cuda 5x and 6x and 7x and 8x and .. + if test "$want_cuda" = cuda5 \ + -o "$want_cuda" = cuda6 \ + -o "$want_cuda" = cuda7 \ + -o "$want_cuda" = cuda8 \ + -o "$want_cuda" = cuda9 \ +; then COND_CUDA_PLUS_TRUE= COND_CUDA_PLUS_FALSE='#' else @@ -3133,6 +3283,7 @@ FFLAGS="$FCFLAGS" + flags_guess="$SHELL $srcdir/flags.guess" { $as_echo "$as_me:${as_lineno-$LINENO}: running $flags_guess" >&5 $as_echo "$as_me: running $flags_guess" >&6;} @@ -7147,6 +7298,7 @@ if test "$want_mpi" = yes; then : $as_echo "## --- ## ## MPI ## ## --- ##" + # checks MPI include directory ac_ext=cpp @@ -7468,7 +7620,6 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu fi - ############################################################ $as_echo "## ----------------------------------- ## @@ -7479,11 +7630,30 @@ $as_echo "## ----------------------------------- ## ac_config_files="$ac_config_files Makefile setup/constants.h setup/constants_tomography.h setup/precision.h setup/config.fh" +if test -d $srcdir/.git; then : + + GIT_VERSION_DEPS="$srcdir/.git/logs/HEAD" + { $as_echo "$as_me:${as_lineno-$LINENO}: building from git repository" >&5 +$as_echo "$as_me: building from git repository" >&6;} + + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: not a git repository" >&5 +$as_echo "$as_me: not a git repository" >&6;} + ac_config_files="$ac_config_files setup/version.fh" + + +fi + if test "$USE_BUNDLED_SCOTCH" = 1; then ac_config_files="$ac_config_files ${SCOTCH_DIR}/src/Makefile.inc" fi +ac_config_files="$ac_config_files DATA/Par_file:DATA/Par_file DATA/SOURCE:DATA/SOURCE" + + ac_config_commands="$ac_config_commands bin" ac_config_commands="$ac_config_commands obj" @@ -7630,6 +7800,10 @@ if test -z "${COND_CUDA8_TRUE}" && test -z "${COND_CUDA8_FALSE}"; then as_fn_error $? "conditional \"COND_CUDA8\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${COND_CUDA9_TRUE}" && test -z "${COND_CUDA9_FALSE}"; then + as_fn_error $? "conditional \"COND_CUDA9\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${COND_CUDA_PLUS_TRUE}" && test -z "${COND_CUDA_PLUS_FALSE}"; then as_fn_error $? "conditional \"COND_CUDA_PLUS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -8234,7 +8408,10 @@ do "setup/constants_tomography.h") CONFIG_FILES="$CONFIG_FILES setup/constants_tomography.h" ;; "setup/precision.h") CONFIG_FILES="$CONFIG_FILES setup/precision.h" ;; "setup/config.fh") CONFIG_FILES="$CONFIG_FILES setup/config.fh" ;; + "setup/version.fh") CONFIG_FILES="$CONFIG_FILES setup/version.fh" ;; "${SCOTCH_DIR}/src/Makefile.inc") CONFIG_FILES="$CONFIG_FILES ${SCOTCH_DIR}/src/Makefile.inc" ;; + "DATA/Par_file") CONFIG_FILES="$CONFIG_FILES DATA/Par_file:DATA/Par_file" ;; + "DATA/SOURCE") CONFIG_FILES="$CONFIG_FILES DATA/SOURCE:DATA/SOURCE" ;; "bin") CONFIG_COMMANDS="$CONFIG_COMMANDS bin" ;; "obj") CONFIG_COMMANDS="$CONFIG_COMMANDS obj" ;; "DATA") CONFIG_COMMANDS="$CONFIG_COMMANDS DATA" ;; diff --git a/configure.ac b/configure.ac index a28473d9b..3623a92c7 100644 --- a/configure.ac +++ b/configure.ac @@ -15,6 +15,7 @@ AC_CONFIG_SRCDIR([README.md]) AC_CONFIG_HEADER([setup/config.h]) AC_CONFIG_MACRO_DIR([m4]) +m4_include(m4/cit_backports.m4) ############################################################ @@ -28,11 +29,11 @@ AC_CANONICAL_HOST ### AC_ARG_ENABLE([double-precision], - [AC_HELP_STRING([--enable-double-precision], + [AS_HELP_STRING([--enable-double-precision], [solver in double precision @<:@default=no@:>@])], [want_double_precision="$enableval"], [want_double_precision=no]) -if test "$want_double_precision" = no; then +if test x"$want_double_precision" = xno; then CUSTOM_REAL=SIZE_REAL CUSTOM_MPI_TYPE=MPI_REAL else @@ -47,7 +48,7 @@ AC_SUBST([CUSTOM_MPI_TYPE]) ### AC_ARG_ENABLE([debug], - [AC_HELP_STRING([--enable-debug], + [AS_HELP_STRING([--enable-debug], [build with debugging options enabled @<:@default=no@:>@])], [want_debug="$enableval"], [want_debug=no]) @@ -60,7 +61,7 @@ export COND_DEBUG_TRUE ### AC_ARG_WITH([mpi], - [AC_HELP_STRING([--with-mpi], + [AS_HELP_STRING([--with-mpi], [build parallel version @<:@default=no@:>@])], [want_mpi="$withval"], [want_mpi=no]) @@ -75,13 +76,20 @@ AC_ARG_WITH([cuda], [build CUDA GPU enabled version @<:@default=no@:>@])], [want_cuda="$withval"], [want_cuda=no]) -AM_CONDITIONAL([COND_CUDA], [test "$want_cuda" != no]) -AM_CONDITIONAL([COND_CUDA5], [test "$want_cuda" = cuda5]) -AM_CONDITIONAL([COND_CUDA6], [test "$want_cuda" = cuda6]) -AM_CONDITIONAL([COND_CUDA7], [test "$want_cuda" = cuda7]) -AM_CONDITIONAL([COND_CUDA8], [test "$want_cuda" = cuda8]) -# cuda linking for cuda 5x and 6x and 7x and 8x -AM_CONDITIONAL([COND_CUDA_PLUS], [test "$want_cuda" = cuda5 -o "$want_cuda" = cuda6 -o "$want_cuda" = cuda7 -o "$want_cuda" = cuda8]) +AM_CONDITIONAL([COND_CUDA], [test x"$want_cuda" != xno]) +AM_CONDITIONAL([COND_CUDA5], [test x"$want_cuda" = xcuda5]) +AM_CONDITIONAL([COND_CUDA6], [test x"$want_cuda" = xcuda6]) +AM_CONDITIONAL([COND_CUDA7], [test x"$want_cuda" = xcuda7]) +AM_CONDITIONAL([COND_CUDA8], [test x"$want_cuda" = xcuda8]) +AM_CONDITIONAL([COND_CUDA9], [test x"$want_cuda" = xcuda9]) +# cuda linking for cuda 5x and 6x and 7x and 8x and .. +AM_CONDITIONAL([COND_CUDA_PLUS], + [test "$want_cuda" = cuda5 \ + -o "$want_cuda" = cuda6 \ + -o "$want_cuda" = cuda7 \ + -o "$want_cuda" = cuda8 \ + -o "$want_cuda" = cuda9 \] +) ############################################################ @@ -107,6 +115,7 @@ FFLAGS="$FCFLAGS" AC_PROVIDE([AC_PROG_F77]) AC_SUBST([FCENV]) +AC_SUBST(srcdir) flags_guess="$SHELL $srcdir/flags.guess" AC_MSG_NOTICE([running $flags_guess]) @@ -307,6 +316,7 @@ AC_SUBST([FC_MODDIR]) AS_IF([test "$want_mpi" = yes], [ AS_BOX([MPI]) + # checks MPI include directory CIT_MPI_INCDIR([$MPIFC]) ]) @@ -332,12 +342,27 @@ AC_CONFIG_FILES([ setup/config.fh ]) +AS_IF([test -d $srcdir/.git], [ + GIT_VERSION_DEPS="$srcdir/.git/logs/HEAD" + AC_MSG_NOTICE([building from git repository]) + AC_SUBST(GIT_VERSION_DEPS) +], [ + dnl Only substitute if not in a git repository. + AC_MSG_NOTICE([not a git repository]) + AC_CONFIG_FILES([setup/version.fh]) +]) + if test "$USE_BUNDLED_SCOTCH" = 1; then AC_CONFIG_FILES([ ${SCOTCH_DIR}/src/Makefile.inc ]) fi +AC_CONFIG_FILES([ + DATA/Par_file:DATA/Par_file + DATA/SOURCE:DATA/SOURCE +]) + AC_CONFIG_COMMANDS([bin], [AS_MKDIR_P(bin)]) AC_CONFIG_COMMANDS([obj], [AS_MKDIR_P(obj)]) AC_CONFIG_COMMANDS([DATA], [AS_MKDIR_P(DATA)]) diff --git a/m4 b/m4 index abc1b026d..1b6f2ed72 160000 --- a/m4 +++ b/m4 @@ -1 +1 @@ -Subproject commit abc1b026dc99cdbe76ef8e23394a5b41ffad942f +Subproject commit 1b6f2ed72d2365e0e9c3919321acabe57020e74c diff --git a/setup/config.h.in b/setup/config.h.in index 88026de2a..185d1a5e7 100644 --- a/setup/config.h.in +++ b/setup/config.h.in @@ -20,37 +20,37 @@ /* Define if err.h */ #undef HAVE_ERR -/* Define to 1 if you have the < inttypes.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H -/* Define to 1 if you have the < memory.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H -/* Have PTHREAD_PRIO_INHERIT. */ -#undef HAVE_PTHREAD_PRIO_INHERIT +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD /* defined if Scotch is installed */ #undef HAVE_SCOTCH -/* Define to 1 if you have the < stdint.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H -/* Define to 1 if you have the < stdlib.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H -/* Define to 1 if you have the < strings.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H -/* Define to 1 if you have the < string.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_STRING_H -/* Define to 1 if you have the < sys/stat.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H -/* Define to 1 if you have the < sys/types.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H -/* Define to 1 if you have the < unistd.h > header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define if xmmintrin.h */ diff --git a/setup/version.fh.in b/setup/version.fh.in new file mode 100644 index 000000000..107d87b11 --- /dev/null +++ b/setup/version.fh.in @@ -0,0 +1,2 @@ +! This file is only used if building from tarballs. +character(len=*), parameter :: git_version = "@PACKAGE_VERSION@" diff --git a/src/cuda/mesh_constants_cuda.h b/src/cuda/mesh_constants_cuda.h index 0af6d8a57..5bd864651 100644 --- a/src/cuda/mesh_constants_cuda.h +++ b/src/cuda/mesh_constants_cuda.h @@ -181,12 +181,14 @@ // leads up to ~1% performance increase //#define MANUALLY_UNROLLED_LOOPS -// compiler specifications +// CUDA compiler specifications // (optional) use launch_bounds specification to increase compiler optimization +// +// Kepler architecture +#ifdef GPU_DEVICE_K20 // (depending on GPU type, register spilling might slow down the performance) -// (uncomment if desired) +// (uncomment if not desired) #define USE_LAUNCH_BOUNDS - // elastic kernel // note: main kernel is Kernel_2_***_impl() which is limited by shared memory usage to 8 active blocks // while register usage might use up to 9 blocks @@ -201,7 +203,6 @@ // registers per thread = 59 // registers per block = 8192 total = 65536 -> limits active blocks to 8 #define LAUNCH_MIN_BLOCKS 10 - // acoustic kernel // performance statistics: kernel Kernel_2_acoustic_impl(): // shared memory per block = 2200 for Kepler: -> limits active blocks to 16 (maximum possible) @@ -210,6 +211,20 @@ // note: for K20x, using a minimum of 16 blocks leads to register spilling. // this slows down the kernel by ~ 4% #define LAUNCH_MIN_BLOCKS_ACOUSTIC 16 +#endif +// +// Pascal architecture +#ifdef GPU_DEVICE_Pascal +// Pascal P100: Pascal: total of 65536 register size +// careful, as using launch bounds to increase the number of blocks might lead to register spilling. +#undef USE_LAUNCH_BOUNDS +#define LAUNCH_MIN_BLOCKS 10 +#define LAUNCH_MIN_BLOCKS_ACOUSTIC 16 +#endif + +#ifdef USE_LAUNCH_BOUNDS +#pragma message ("\nCompiling with: USE_LAUNCH_BOUNDS enabled\n") +#endif /* ----------------------------------------------------------------------------------------------- */