Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ standard parallelism - CPU version #54

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 242 additions & 9 deletions src/AtomicMacro.hh
Original file line number Diff line number Diff line change
@@ -1,13 +1,236 @@
//Determine which atomics to use based on platform being compiled for
//
//If compiling with CUDA
#ifndef AtomicMacro_HH_
#define AtomicMacro_HH_

#define USE_MACRO_FUNCTIONS 1

#ifdef HAVE_OPENMP
#define USE_OPENMP_ATOMICS
#elif HAVE_OPENMP_TARGET
#define USE_OPENMP_ATOMICS
#endif

#ifdef HAVE_STDPAR
#define USE_CXX20_ATOMICS
#endif

// --------------------------------------------------
// Original Names -> Inline function names
// --------------------------------------------------
// ATOMIC_WRITE( x, v ) -> ATOMIC_WRITE
// ATOMIC_UPDATE( x ) -> ATOMIC_INCREMENT
// ATOMIC_ADD( x, v ) -> ATOMIC_ADD
// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD
// --------------------------------------------------

#if defined (USE_MACRO_FUNCTIONS)

#define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p))
#define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x))

#if defined(USE_CXX20_ATOMICS)

#if (__cplusplus > 201703L)

#include <atomic>

#if defined(__cpp_lib_atomic_float) && defined(__cpp_lib_atomic_ref)

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
//x = v;
std::atomic_ref<T> r{x};
r = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
//atomicAdd( &x, 1 );
std::atomic_ref<T> r{x};
r++;
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
//atomicAdd( &x, v );
std::atomic_ref<T> r{x};
r+=v;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
//atomicAdd( &x, v );
std::atomic_ref<T1> r{x};
r+=v;
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
//p = atomicAdd( &x, v );
std::atomic_ref<T> r{x};
p = r.fetch_add(v);
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
//p = atomicAdd( &x, v );
std::atomic_ref<T1> r{x};
p = r.fetch_add(v);
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
//p = atomicAdd( &x, v );
std::atomic_ref<T1> r{x};
p = r.fetch_add(v);
}

#else
#error Your supposedly C++20 compiler does not support atomic_ref<double>.
#endif

#else
#error Sorry, you need C++20.
#endif

#elif defined(HAVE_CUDA) && defined(__CUDA_ARCH__)

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
atomicAdd( &x, 1 );
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
atomicAdd( &x, v );
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
atomicAdd( &x, v );
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
p = atomicAdd( &x, v );
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
p = atomicAdd( &x, v );
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
p = atomicAdd( &x, v );
}

#elif defined(USE_OPENMP_ATOMICS)

#warning Should not be here

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
_Pragma("omp atomic write")
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
_Pragma("omp atomic update")
x++;
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
_Pragma("omp atomic")
x += v;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
_Pragma("omp atomic")
x += v;
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

#else // SEQUENTIAL

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
x++;
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
x += v;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
x += v;
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
{p = x; x = x + v;}
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
{p = x; x = x + v;}
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
{p = x; x = x + v;}
}

#endif // BACKENDS

#else // ! USE_MACRO_FUNCTIONS

#if defined (HAVE_CUDA)

Expand All @@ -16,18 +239,20 @@

//Currently not atomic here. But its only used when it does not necissarially need to be atomic.
#define ATOMIC_WRITE( x, v ) \
x = v;
x = v;

#define ATOMIC_ADD( x, v ) \
atomicAdd( &x, v );

#define ATOMIC_UPDATE( x ) \
atomicAdd( &x, 1 );

#define ATOMIC_CAPTURE( x, v, p ) \
p = atomicAdd( &x, v );

//If in a CPU OpenMP section use the OpenMP atomics
#elif defined (USE_OPENMP_ATOMICS)

#define ATOMIC_WRITE( x, v ) \
_Pragma("omp atomic write") \
x = v;
Expand All @@ -46,6 +271,7 @@

//If in a serial section, no need to use atomics
#else

#define ATOMIC_WRITE( x, v ) \
x = v;

Expand All @@ -62,6 +288,7 @@

//If in a OpenMP section use the OpenMP atomics
#elif defined (USE_OPENMP_ATOMICS)

#define ATOMIC_WRITE( x, v ) \
_Pragma("omp atomic write") \
x = v;
Expand All @@ -74,12 +301,13 @@
_Pragma("omp atomic update") \
x++;

#define ATOMIC_CAPTURE( x, v, p ) \
_Pragma("omp atomic capture") \
{p = x; x = x + v;}
#define ATOMIC_CAPTURE( x, v, p ) \
_Pragma("omp atomic capture") \
{p = x; x = x + v;}

//If in a serial section, no need to use atomics
#else

#define ATOMIC_WRITE( x, v ) \
x = v;

Expand All @@ -91,4 +319,9 @@

#define ATOMIC_CAPTURE( x, v, p ) \
{p = x; x = x + v;}
#endif

#endif // BACKENDS

#endif // USE_MACRO_FUNCTIONS

#endif // AtomicMacro_HH_
12 changes: 12 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ LDFLAGS =
#LDFLAGS = $(OPENMP_LDFLAGS)


###############################################################################
### C++ parallelism
###############################################################################

#OPTFLAGS = -g -O3

#CXX=clang++-12
#CXXFLAGS = -std=c++20 $(OPTFLAGS) -Wpedantic
#CPPFLAGS = -DHAVE_STDPAR
## Sometimes GCC and Clang do not link this automatically.
#LDFLAGS = -ltbb

###############################################################################
### GCC -- with MPI and OpenMP
###############################################################################
Expand Down
Loading