From 121faaf3737ced55ab0d7344c4c2a2edaa970e09 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sat, 27 Jun 2020 09:08:45 -0700 Subject: [PATCH 1/4] whitespace changes Signed-off-by: Jeff Hammond --- src/AtomicMacro.hh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index 4c31c853..2785c64a 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -16,18 +16,20 @@ //Currently not atomic here. But its only used when it does not necissarially need to be atomic. #define ATOMIC_WRITE( x, v ) \ - x = v; + x = v; #define ATOMIC_ADD( x, v ) \ atomicAdd( &x, v ); - + #define ATOMIC_UPDATE( x ) \ atomicAdd( &x, 1 ); #define ATOMIC_CAPTURE( x, v, p ) \ p = atomicAdd( &x, v ); + //If in a CPU OpenMP section use the OpenMP atomics #elif defined (USE_OPENMP_ATOMICS) + #define ATOMIC_WRITE( x, v ) \ _Pragma("omp atomic write") \ x = v; @@ -46,6 +48,7 @@ //If in a serial section, no need to use atomics #else + #define ATOMIC_WRITE( x, v ) \ x = v; @@ -62,6 +65,7 @@ //If in a OpenMP section use the OpenMP atomics #elif defined (USE_OPENMP_ATOMICS) + #define ATOMIC_WRITE( x, v ) \ _Pragma("omp atomic write") \ x = v; @@ -74,12 +78,13 @@ _Pragma("omp atomic update") \ x++; - #define ATOMIC_CAPTURE( x, v, p ) \ - _Pragma("omp atomic capture") \ - {p = x; x = x + v;} + #define ATOMIC_CAPTURE( x, v, p ) \ + _Pragma("omp atomic capture") \ + {p = x; x = x + v;} //If in a serial section, no need to use atomics #else + #define ATOMIC_WRITE( x, v ) \ x = v; @@ -91,4 +96,5 @@ #define ATOMIC_CAPTURE( x, v, p ) \ {p = x; x = x + v;} + #endif From 5bf8b84d5403f853eefef4c63ff932aad2be2eae Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sat, 27 Jun 2020 09:12:01 -0700 Subject: [PATCH 2/4] new implementation of atomics New version uses functions not macros. The use of template functions allows for enforcement of type-safety, which is implemented using static_assert. The old implementation is preserved for posterity. A header guard was added. I found the old macro names confusing, so I used new names, but I map the old names in the source onto them so the application source does not change. Signed-off-by: Jeff Hammond --- src/AtomicMacro.hh | 158 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index 2785c64a..b438665e 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -1,3 +1,8 @@ +#ifndef AtomicMacro_HH_ +#define AtomicMacro_HH_ + +#define USE_MACRO_FUNCTIONS 1 + //Determine which atomics to use based on platform being compiled for // //If compiling with CUDA @@ -8,6 +13,153 @@ #define USE_OPENMP_ATOMICS #endif +// -------------------------------------------------- +// Original Names -> Inline function names +// -------------------------------------------------- +// ATOMIC_WRITE( x, v ) -> ATOMIC_WRITE +// ATOMIC_UPDATE( x ) -> ATOMIC_INCREMENT +// ATOMIC_ADD( x, v ) -> ATOMIC_ADD +// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD +// -------------------------------------------------- + +#if defined (USE_MACRO_FUNCTIONS) + +#define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p)) +#define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x)) + +#if defined(HAVE_CUDA) && defined(__CUDA_ARCH__) + +template +inline void ATOMIC_WRITE(T & x, T v) { + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + atomicAdd( &x, 1 ); +} + +template +inline void ATOMIC_ADD(T& x, T v) { + atomicAdd( &x, v ); +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + p = atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + p = atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + p = atomicAdd( &x, v ); +} + +#elif defined(USE_OPENMP_ATOMICS) + +template +inline void ATOMIC_WRITE(T & x, T v) { + _Pragma("omp atomic write") + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + _Pragma("omp atomic update") + x++; +} + +template +inline void ATOMIC_ADD(T& x, T v) { + _Pragma("omp atomic") + x += v; +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + _Pragma("omp atomic") + x += v; +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +#else // SEQUENTIAL + +template +inline void ATOMIC_WRITE(T & x, T v) { + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + x++; +} + +template +inline void ATOMIC_ADD(T& x, T v) { + x += v; +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + x += v; +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + {p = x; x = x + v;} +} + +#endif // BACKENDS + +#else // ! USE_MACRO_FUNCTIONS #if defined (HAVE_CUDA) @@ -97,4 +249,8 @@ #define ATOMIC_CAPTURE( x, v, p ) \ {p = x; x = x + v;} -#endif +#endif // BACKENDS + +#endif // USE_MACRO_FUNCTIONS + +#endif // AtomicMacro_HH_ From 9040882ecfe88e4a395aa993358931b6a1b05332 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 4 Feb 2022 00:45:49 -0800 Subject: [PATCH 3/4] remove unnecessary comments --- src/AtomicMacro.hh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index b438665e..0b0ceb89 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -3,10 +3,6 @@ #define USE_MACRO_FUNCTIONS 1 -//Determine which atomics to use based on platform being compiled for -// -//If compiling with CUDA - #ifdef HAVE_OPENMP #define USE_OPENMP_ATOMICS #elif HAVE_OPENMP_TARGET From 01337a49323f4f3e8a6f26b7a601b1dc1d588045 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 4 Feb 2022 01:03:34 -0800 Subject: [PATCH 4/4] C++20 atomics Signed-off-by: Jeff Hammond --- src/AtomicMacro.hh | 73 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index 0b0ceb89..0bb9094c 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -23,7 +23,76 @@ #define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p)) #define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x)) -#if defined(HAVE_CUDA) && defined(__CUDA_ARCH__) +#if defined(USE_CXX20_ATOMICS) + + #if (__cplusplus > 201703L) + + #include + + #if defined(__cpp_lib_atomic_float) && defined(__cpp_lib_atomic_ref) + + template + inline void ATOMIC_WRITE(T & x, T v) { + //x = v; + std::atomic_ref r{x}; + r = v; + } + + template + inline void ATOMIC_INCREMENT(T& x) { + //atomicAdd( &x, 1 ); + std::atomic_ref r{x}; + r++; + } + + template + inline void ATOMIC_ADD(T& x, T v) { + //atomicAdd( &x, v ); + std::atomic_ref r{x}; + r+=v; + } + + template + inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + //atomicAdd( &x, v ); + std::atomic_ref r{x}; + r+=v; + } + + template + inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + //p = atomicAdd( &x, v ); + std::atomic_ref r{x}; + p = r.fetch_add(v); + } + + template + inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + //p = atomicAdd( &x, v ); + std::atomic_ref r{x}; + p = r.fetch_add(v); + } + + template + inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + //p = atomicAdd( &x, v ); + std::atomic_ref r{x}; + p = r.fetch_add(v); + } + + #else + #error Your supposedly C++20 compiler doesn't support atomic_ref. + #endif + + #else + #error Sorry, you need C++20. + #endif + +#elif defined(HAVE_CUDA) && defined(__CUDA_ARCH__) template inline void ATOMIC_WRITE(T & x, T v) { @@ -66,6 +135,8 @@ inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { #elif defined(USE_OPENMP_ATOMICS) +#warning Should not be here + template inline void ATOMIC_WRITE(T & x, T v) { _Pragma("omp atomic write")