-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
307 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// | ||
// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. | ||
// | ||
// Produced at the Lawrence Livermore National Laboratory | ||
// | ||
// LLNL-CODE-689114 | ||
// | ||
// All rights reserved. | ||
// | ||
// This file is part of RAJA. | ||
// | ||
// For details about use and distribution, please read RAJA/LICENSE. | ||
// | ||
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// | ||
|
||
#include <cstdlib> | ||
#include <cstring> | ||
#include <iostream> | ||
|
||
#include "memoryManager.hpp" | ||
|
||
#include "RAJA/RAJA.hpp" | ||
|
||
/* | ||
* Vector Addition Example | ||
* | ||
* Computes c = a + b, where a, b, c are vectors of ints. | ||
* It illustrates similarities between a C-style for-loop and a RAJA | ||
* forall loop. | ||
* | ||
* RAJA features shown: | ||
* - `forall` loop iteration template method | ||
* - Index range segment | ||
* - Execution policies | ||
* | ||
* If CUDA is enabled, CUDA unified memory is used. | ||
*/ | ||
|
||
/* | ||
CUDA_BLOCK_SIZE - specifies the number of threads in a CUDA thread block | ||
*/ | ||
#if defined(RAJA_ENABLE_CUDA) | ||
const int CUDA_BLOCK_SIZE = 256; | ||
#endif | ||
|
||
// | ||
// Functions for checking and printing results | ||
// | ||
void checkResult(int* res, int len); | ||
void printResult(int* res, int len); | ||
|
||
|
||
int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) | ||
{ | ||
|
||
std::cout << "\n\nRAJA vector addition example...\n"; | ||
|
||
// | ||
// Define vector length | ||
// | ||
const int N = 1000000; | ||
|
||
// | ||
// Allocate and initialize vector data | ||
// | ||
int *a = memoryManager::allocate<int>(N); | ||
int *b = memoryManager::allocate<int>(N); | ||
int *c = memoryManager::allocate<int>(N); | ||
|
||
for (int i = 0; i < N; ++i) { | ||
a[i] = -i; | ||
b[i] = i; | ||
} | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
|
||
std::cout << "\n Running C-version of vector addition...\n"; | ||
|
||
for (int i = 0; i < N; ++i) { | ||
c[i] = a[i] + b[i]; | ||
} | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
// RAJA::seq_exec policy enforces strictly sequential execution.... | ||
//----------------------------------------------------------------------------// | ||
|
||
std::cout << "\n Running RAJA sequential vector addition...\n"; | ||
|
||
RAJA::forall<RAJA::seq_exec>(RAJA::RangeSegment(0, N), [=] (int i) { | ||
c[i] = a[i] + b[i]; | ||
}); | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
// RAJA::simd_exec policy should force the compiler to generate SIMD | ||
// vectorization optimizations.... | ||
//----------------------------------------------------------------------------// | ||
|
||
std::cout << "\n Running RAJA SIMD vector addition...\n"; | ||
|
||
RAJA::forall<RAJA::simd_exec>(RAJA::RangeSegment(0, N), [=] (int i) { | ||
c[i] = a[i] + b[i]; | ||
}); | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
// RAJA::loop_exec policy means that the compiler is allowed to generate | ||
// optimizations (e.g., SIMD) if it thinks it is safe to do so... | ||
//----------------------------------------------------------------------------// | ||
|
||
std::cout << "\n Running RAJA loop-exec vector addition...\n"; | ||
|
||
RAJA::forall<RAJA::loop_exec>(RAJA::RangeSegment(0, N), [=] (int i) { | ||
c[i] = a[i] + b[i]; | ||
}); | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
|
||
#if defined(RAJA_ENABLE_OPENMP) | ||
std::cout << "\n Running RAJA OpenMP vector addition...\n"; | ||
|
||
RAJA::forall<RAJA::omp_parallel_for_exec>(RAJA::RangeSegment(0, N), [=] (int i) { | ||
c[i] = a[i] + b[i]; | ||
}); | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
#endif | ||
|
||
|
||
//----------------------------------------------------------------------------// | ||
#if defined(RAJA_ENABLE_CUDA) | ||
std::cout << "\n Running RAJA CUDA vector addition...\n"; | ||
|
||
RAJA::forall<RAJA::cuda_exec<CUDA_BLOCK_SIZE>>(RAJA::RangeSegment(0, N), | ||
[=] RAJA_DEVICE (int i) { | ||
c[i] = a[i] + b[i]; | ||
}); | ||
|
||
checkResult(c, N); | ||
//printResult(c, N); | ||
#endif | ||
|
||
//----------------------------------------------------------------------------// | ||
|
||
// | ||
// Clean up. | ||
// | ||
memoryManager::deallocate(a); | ||
memoryManager::deallocate(b); | ||
memoryManager::deallocate(c); | ||
|
||
std::cout << "\n DONE!...\n"; | ||
|
||
return 0; | ||
} | ||
|
||
// | ||
// Function to check result and report P/F. | ||
// | ||
void checkResult(int* res, int len) | ||
{ | ||
bool correct = true; | ||
for (int i = 0; i < len; i++) { | ||
if ( res[i] != 0 ) { correct = false; } | ||
} | ||
if ( correct ) { | ||
std::cout << "\n\t result -- PASS\n"; | ||
} else { | ||
std::cout << "\n\t result -- FAIL\n"; | ||
} | ||
} | ||
|
||
// | ||
// Function to print result. | ||
// | ||
void printResult(int* res, int len) | ||
{ | ||
std::cout << std::endl; | ||
for (int i = 0; i < len; i++) { | ||
std::cout << "result[" << i << "] = " << res[i] << std::endl; | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#-----[Build-type]------ | ||
Build-type = CUDA | ||
#Build-type = CPU | ||
|
||
|
||
#-----[RAJA and CUDA directories]---- | ||
RAJA_DIR ?= /home/arturo/git-repo/RAJA/develop/build | ||
CUDA_DIR ?= /usr/local/cuda-9.0 | ||
|
||
rajaInc = -I$(RAJA_DIR)/include | ||
rajaLib = $(RAJA_DIR)/lib/libRAJA.a | ||
cudaLib = -Wl,-rpath -Wl,$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib64 -lcuda -lcudart -lcudadevrt -lnvToolsExt | ||
#=================================== | ||
|
||
#---[Host compiler]----- | ||
host-compiler = g++-6 | ||
host-compilerFlags = '-O3 -g -std=c++11 -m64 -fopenmp' | ||
compilerFlags = -O3 -g -std=c++11 -m64 -fopenmp | ||
paths = -I ./$(iPath) | ||
paths += $(rajaInc) | ||
linker = $(host-compiler) | ||
#====================== | ||
|
||
#----[device compiler]---- | ||
device-compiler=nvcc | ||
device-flags = -g -std=c++11 -Xptxas=-v -lineinfo --expt-extended-lambda --restrict | ||
device-flags += -ccbin=$(linker) -Xcompiler $(host-compilerFlags) -x=cu -arch=sm_50 | ||
#====================== | ||
|
||
#----[Cuda - Compilation]--------- | ||
ifeq ($(Build-type),CUDA) | ||
main: main.cpp | ||
@echo Compiling for CUDA - start | ||
$(device-compiler) $(device-flags) $(paths) -g -c -o main.o main.cpp | ||
$(linker) -o main main.o $(cudaLib) -fopenmp $(rajaLib) | ||
@echo Compiling for CUDA - end | ||
else | ||
#----[CPU - Compilation]--------- | ||
main: main.cpp | ||
@echo Compiling for CPU - start | ||
$(host-compiler) $(compilerFlags) $(paths) -g -c -o main main.cpp | ||
@echo Compiling for CPU - end | ||
endif | ||
#====================== | ||
|
||
|
||
|
||
#-----[Clean up]------- | ||
clean: | ||
rm main | ||
rm -rf main main.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// | ||
// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. | ||
// | ||
// Produced at the Lawrence Livermore National Laboratory | ||
// | ||
// LLNL-CODE-689114 | ||
// | ||
// All rights reserved. | ||
// | ||
// This file is part of RAJA. | ||
// | ||
// For details about use and distribution, please read RAJA/LICENSE. | ||
// | ||
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// | ||
|
||
#ifndef EXAMPLES_MEMORYMANAGER_HPP | ||
#define EXAMPLES_MEMORYMANAGER_HPP | ||
|
||
#include "RAJA/RAJA.hpp" | ||
#include "RAJA/util/defines.hpp" | ||
|
||
/* | ||
As RAJA does not manage memory we include a general purpose memory | ||
manager which may be used to perform c++ style allocation/deallocation | ||
or allocate/deallocate CUDA unified memory. The type of memory allocated | ||
is dependent on how RAJA was configured. | ||
*/ | ||
namespace memoryManager{ | ||
|
||
template <typename T> | ||
T *allocate(RAJA::Index_type size) | ||
{ | ||
T *ptr; | ||
#if defined(RAJA_ENABLE_CUDA) | ||
cudaMallocManaged((void **)&ptr, sizeof(T) * size, cudaMemAttachGlobal); | ||
#else | ||
ptr = new T[size]; | ||
#endif | ||
return ptr; | ||
} | ||
|
||
template <typename T> | ||
void deallocate(T *&ptr) | ||
{ | ||
if (ptr) { | ||
#if defined(RAJA_ENABLE_CUDA) | ||
cudaFree(ptr); | ||
#else | ||
delete[] ptr; | ||
#endif | ||
ptr = nullptr; | ||
} | ||
} | ||
|
||
}; | ||
#endif |