Skip to content

Commit

Permalink
Merge pull request #317 from ptycho/gpu-hackathon
Browse files Browse the repository at this point in the history
New GPU-hackathon merge - this time with history
  • Loading branch information
daurer authored Apr 9, 2021
2 parents 5409b17 + e70bae1 commit 9b458e7
Show file tree
Hide file tree
Showing 113 changed files with 7,561 additions and 2,011 deletions.
102 changes: 102 additions & 0 deletions archive/cuda_extension/extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
'''
These are the optional extensions for ptypy
'''


from distutils.version import LooseVersion
from distutils.extension import Extension
import os
import multiprocessing
import subprocess
import re
import numpy as np


# this is a hacky version, but is the desired behaviour
class AccelerationExtension(object):
def __init__(self, debug=False):
self.debug = debug
self._options = None

def get_full_options(self):
return self._options

def get_reflection_options(self):
user_options = []
boolean_options = []
for name, description in self._options.items():
if isinstance(description['default'], str):
user_options.append((name+'=', None, description['doc']))
elif isinstance(description['default'], bool):
user_options.append((name, None, description['doc']))
boolean_options.append(name)
else:
raise NotImplementedError("Don't know what to do with parameter:%s of type: %s" % (name, type(description['default'])))
return user_options, boolean_options

def build(self, options):
raise NotImplementedError('You need to implement the build method!')

def getExtension(self):
raise NotImplementedError('You need to return cython extension object.')


class CudaExtension(AccelerationExtension): # probably going to inherit from something.
def __init__(self, *args, **kwargs):
super(CudaExtension, self).__init__(*args, **kwargs)
self._options = {'cudadir': {'default': '',
'doc': 'CUDA directory'},
'cudaflags': {'default': '-gencode arch=compute_35,\\"code=sm_35\\" ' +
'-gencode arch=compute_37,\\"code=sm_37\\" ' +
'-gencode arch=compute_52,\\"code=sm_52\\" ' +
'-gencode arch=compute_60,\\"code=sm_60\\" ' +
'-gencode arch=compute_70,\\"code=sm_70\\" ',
'doc': 'Flags to the CUDA compiler'},
'gputiming': {'default': False,
'doc': 'Do GPU timing'}}

def build(self, options):
cudadir = options['cudadir']
cudaflags = options['cudaflags']
gputiming = options['gputiming']
try:
out = subprocess.check_output(['cmake', '--version'])
except OSError:
raise RuntimeError(
"CMake must be installed to build the CUDA extensions.")

cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)',
out.decode()).group(1))
if cmake_version < '3.8.0':
raise RuntimeError("CMake >= 3.8.0 is required")

srcdir = os.path.abspath('cuda')
buildtmp = os.path.abspath(os.path.join('build', 'cuda'))
cmake_args = [
"-DCMAKE_BUILD_TYPE=" + ("Debug" if self.debug else "Release"),
'-DCMAKE_CUDA_FLAGS={}'.format(cudaflags),
'-DGPU_TIMING={}'.format("ON" if gputiming else "OFF")
]
if cudadir:
cmake_args += '-DCMAKE_CUDA_COMPILER="{}/bin/nvcc"'.format(cudadir)
build_args = ["--config", "Debug" if self.debug else "Release", "--", "-j{}".format(multiprocessing.cpu_count() + 1)]
if not os.path.exists(buildtmp):
os.makedirs(buildtmp)
env = os.environ.copy()
subprocess.check_call(['cmake', srcdir] + cmake_args,
cwd=buildtmp, env=env)
subprocess.check_call(['cmake', '--build', '.'] + build_args,
cwd=buildtmp)
print("Complete.")

def getExtension(self):
libdirs = ['build/cuda']
if 'LD_LIBRARY_PATH' in os.environ:
libdirs += os.environ['LD_LIBRARY_PATH'].split(':')
return Extension('*',
sources=['ptypy/accelerate/cuda/gpu_extension.pyx'],
include_dirs=[np.get_include()],
libraries=['gpu_extension', 'cudart', 'cufft'],
library_dirs=libdirs,
depends=['build/cuda/libgpu_extension.a', ],
language="c++")
47 changes: 47 additions & 0 deletions archive/misc/mpitest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/** This is a simple C++ test to check if cuda-aware MPI works as
* expected.
* It allocates a GPU array and puts 1s into it, then sends it
* across MPI to the receiving rank, which transfers back to
* host and outputs the values.
* The expected output is:
*
* Received 1, 1
*
* Compile with:
* mpic++ -o test mpitest.cpp -L/path/to/cuda/libs -lcudart
*
* Run with:
* mpirun -np 2 test
*/

#include <cstdio>
#include <string>
#include <mpi.h>
#include <cuda_runtime_api.h>
#include <iostream>

int main(int argc, char** argv)
{
MPI_Init(&argc, &argv);

int rank;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

if (rank == 0) {
int* d_send;
cudaMalloc((void**)&d_send, 2*sizeof(int));
int h_send[] = {1, 1};
cudaMemcpy(d_send, h_send, 2*sizeof(int), cudaMemcpyHostToDevice);
MPI_Send(d_send, 2, MPI_INT, 1, 99, MPI_COMM_WORLD);
std::cout << "Data has been sent...\n";
} else if (rank == 1) {
int* d_recv;
cudaMalloc((void**)&d_recv, 2*sizeof(int));
MPI_Recv(d_recv, 2, MPI_INT, 0, 99, MPI_COMM_WORLD, &status);
int h_recv[2];
cudaMemcpy(h_recv, d_recv, 2*sizeof(int), cudaMemcpyDeviceToHost);
std::cout << "Received " << h_recv[0] << ", " << h_recv[1] << "\n";
}

}
Loading

0 comments on commit 9b458e7

Please sign in to comment.