From 0e94c8e25892bbd48b94603a9a204d3d73eec85e Mon Sep 17 00:00:00 2001 From: Michael Wall Date: Fri, 28 Jun 2024 13:12:43 -0600 Subject: [PATCH] Require BML_USE_POSIX_MEMALIGN to use posix_memalign() Using the HAVE_POSIX_MEMALIGN code path results in ~1.7x slowdown of an MD code which frequently allocates arrays. Require a new macro BML_USE_POSIX_MEMALIGN to be defined, to use the code path. Define by default --- CMakeLists.txt | 11 ++++++++--- build.sh | 3 +++ src/C-interface/bml_allocate.c | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 843d9b77..c7547be3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -671,9 +671,14 @@ endif() add_definitions(-D_POSIX_C_SOURCE=200112L) -check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN) -if(HAVE_POSIX_MEMALIGN) - add_definitions(-DHAVE_POSIX_MEMALIGN) +option(BML_POSIX_MEMALIGN "Use posix_memalign() for allocations if available" ON) + +if(BML_POSIX_MEMALIGN) + check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN) + if(HAVE_POSIX_MEMALIGN) + add_definitions(-DBML_USE_POSIX_MEMALIGN) + message(STATUS "Will use posix_memalign() for allocations") + endif() endif() set(INTEL_OPT FALSE diff --git a/build.sh b/build.sh index fa3c16af..f592aa7c 100755 --- a/build.sh +++ b/build.sh @@ -83,6 +83,7 @@ EOF echo "EXTRA_FFLAGS Extra fortran flags (default is '${EXTRA_FFLAGS}')" echo "EXTRA_LINK_FLAGS Add extra link flags (default is '${EXTRA_LINK_FLAGS}')" echo "BML_OMP_OFFLOAD {yes,no} (default is ${BML_OMP_OFFLOAD})" + echo "BML_POSIX_MEMALIGN Use if available {yes,no} (default is ${BML_POSIX_MEMALIGN})" echo "BML_OFFLOAD_ARCH {NVIDIA, AMD} (default is ${BML_OFFLOAD_ARCH})" echo "GPU_ARCH GPU architecture (default is '${GPU_ARCH}')" echo "BML_CUDA Build with CUDA (default is ${BML_CUDA})" @@ -139,6 +140,7 @@ set_defaults() { : ${FORTRAN_FLAGS:=} : ${EXTRA_LINK_FLAGS:=} : ${BML_OMP_OFFLOAD:=no} + : ${BML_POSIX_MEMALIGN:=yes} : ${BML_OFFLOAD_ARCH:=NVIDIA} : ${GPU_ARCH:=} : ${BML_CUDA:=no} @@ -232,6 +234,7 @@ configure() { -DSCALAPACK_LIBRARIES="${SCALAPACK_LIBRARIES}" \ -DBML_ELPA="${BML_ELPA}" \ -DBML_OPENMP="${BML_OPENMP}" \ + -DBML_POSIX_MEMALIGN="${BML_POSIX_MEMALIGN}" \ -DMKL_GPU="${MKL_GPU}" \ -DBML_MPI="${BML_MPI}" \ -DBML_MPIEXEC_EXECUTABLE="${BML_MPIEXEC_EXECUTABLE}" \ diff --git a/src/C-interface/bml_allocate.c b/src/C-interface/bml_allocate.c index 98921c18..7372cbc6 100644 --- a/src/C-interface/bml_allocate.c +++ b/src/C-interface/bml_allocate.c @@ -53,7 +53,7 @@ bml_allocate_memory( __assume_aligned(ptr, MALLOC_ALIGNMENT); ptr[i] = 0; } -#elif defined(HAVE_POSIX_MEMALIGN) +#elif defined(BML_USE_POSIX_MEMALIGN) char *ptr; posix_memalign((void **) &ptr, MALLOC_ALIGNMENT, size); #pragma omp simd @@ -86,7 +86,7 @@ bml_noinit_allocate_memory( { #if defined(INTEL_OPT) void *ptr = _mm_malloc(size, MALLOC_ALIGNMENT); -#elif defined(HAVE_POSIX_MEMALIGN) +#elif defined(BML_USE_POSIX_MEMALIGN) void *ptr; posix_memalign(&ptr, MALLOC_ALIGNMENT, size); #else