Skip to content

Commit

Permalink
bump version, merge pull request #1 from AMYPAD/devel
Browse files Browse the repository at this point in the history
  • Loading branch information
casperdcl authored Jan 23, 2021
2 parents 0c8778b + 576e790 commit dac6aca
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 50 deletions.
2 changes: 2 additions & 0 deletions cuvec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Targets
install(EXPORT ${PROJECT_NAME}Targets FILE AMYPAD${PROJECT_NAME}Targets.cmake
NAMESPACE AMYPAD:: DESTINATION ${CMAKE_PROJECT_NAME}/cmake)

add_subdirectory(src/example_mod)

# install project

include(CMakePackageConfigHelpers)
Expand Down
4 changes: 4 additions & 0 deletions cuvec/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,8 @@ def asarray(arr, dtype=None, order=None):
Returns a `cuvec.CuVec` view of `arr`, avoiding memory copies if possible.
(`cuvec` equivalent of `numpy.asarray`).
"""
if not isinstance(arr, np.ndarray) and is_raw_cuvec(arr):
res = CuVec(arr)
if dtype is None or res.dtype == np.dtype(dtype):
return CuVec(np.asanyarray(res, order=order))
return CuVec(np.asanyarray(arr, dtype=dtype, order=order))
10 changes: 0 additions & 10 deletions cuvec/include/cuhelpers.h

This file was deleted.

23 changes: 15 additions & 8 deletions cuvec/include/cuvec.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@
#ifndef _CUVEC_H_
#define _CUVEC_H_

#include "cuhelpers.h" // HANDLE_ERROR
#include <cstdio> // fprintf
#include <cstdlib> // std::size_t
#include <limits> // std::numeric_limits
#include <new> // std::bad_alloc
#include <vector> // std::vector
#include <cstdio> // fprintf
#include <cstdlib> // std::size_t
#include <limits> // std::numeric_limits
#include <new> // std::bad_alloc
#include <vector> // std::vector

void HandleError(cudaError_t err, const char *file, int line) {
if (err != cudaSuccess) {
fprintf(stderr, "%s in %s at line %d\n", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}

template <class T> struct CuAlloc {
typedef T value_type;
Expand All @@ -26,7 +32,8 @@ template <class T> struct CuAlloc {
if (n > std::numeric_limits<std::size_t>::max() / sizeof(T)) throw std::bad_alloc();

T *p;
HANDLE_ERROR(cudaMallocManaged(&p, n * sizeof(T))); // p = (T *)malloc(n * sizeof(T));
// p = (T *)malloc(n * sizeof(T));
HandleError(cudaMallocManaged(&p, n * sizeof(T)), __FILE__, __LINE__);
if (p) {
report(p, n);
return p;
Expand All @@ -37,7 +44,7 @@ template <class T> struct CuAlloc {

void deallocate(T *p, std::size_t n) noexcept {
report(p, n, 0);
HANDLE_ERROR(cudaFree(p)); // free(p);
HandleError(cudaFree(p), __FILE__, __LINE__); // free(p);
}

private:
Expand Down
14 changes: 9 additions & 5 deletions cuvec/include/pycuvec.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
#define _PYCUVEC_H_

#include "Python.h"
#include "cuvec.cuh" // CuVec
#include <cstdlib> // malloc, free
#include <sstream> // std::stringstream
#include <typeinfo> // typeid
#include <vector> // std::vector
#include "cuda_fp16.h" // __half
#include "cuvec.cuh" // CuVec
#include <cstdlib> // malloc, free
#include <sstream> // std::stringstream
#include <typeinfo> // typeid
#include <vector> // std::vector

template <typename T> struct PyType {
static const char *format() { return typeid(T).name(); }
Expand Down Expand Up @@ -51,6 +52,9 @@ template <> struct PyType<long long> {
template <> struct PyType<unsigned long long> {
static const char *format() { return "Q"; }
};
template <> struct PyType<__half> {
static const char *format() { return "e"; }
};
template <> struct PyType<float> {
static const char *format() { return "f"; }
};
Expand Down
5 changes: 4 additions & 1 deletion cuvec/pycuvec.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Vector_B,
Vector_c,
Vector_d,
Vector_e,
Vector_f,
Vector_h,
Vector_H,
Expand All @@ -18,7 +19,8 @@
Vector_Q,
)

typecodes = [i for i in array.typecodes if i not in "ulL"]
# u: non-standard np.dype('S2'); l/L: inconsistent between `array` and `numpy`
typecodes = ''.join(i for i in array.typecodes if i not in "ulL") + "e"
vec_types = {
np.dtype('int8'): Vector_b,
np.dtype('uint8'): Vector_B,
Expand All @@ -29,6 +31,7 @@
np.dtype('uint32'): Vector_I,
np.dtype('int64'): Vector_q,
np.dtype('uint64'): Vector_Q,
np.dtype('float16'): Vector_e,
np.dtype('float32'): Vector_f,
np.dtype('float64'): Vector_d}

Expand Down
22 changes: 0 additions & 22 deletions cuvec/src/cuhelpers.cu

This file was deleted.

7 changes: 6 additions & 1 deletion cuvec/src/pycuvec.cu → cuvec/src/cuvec.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ static PyCuVec_tp<int> Vector_i;
static PyCuVec_tp<unsigned int> Vector_I;
static PyCuVec_tp<long long> Vector_q; // _l
static PyCuVec_tp<unsigned long long> Vector_Q; // _L
static PyCuVec_tp<__half> Vector_e;
static PyCuVec_tp<float> Vector_f;
static PyCuVec_tp<double> Vector_d;

Expand Down Expand Up @@ -92,6 +93,10 @@ PyMODINIT_FUNC PyInit_cuvec(void) {
Py_INCREF(&Vector_Q.tp_obj);
PyModule_AddObject(m, "Vector_L", (PyObject *)&Vector_Q.tp_obj);

if (PyType_Ready(&Vector_e.tp_obj) < 0) return NULL;
Py_INCREF(&Vector_e.tp_obj);
PyModule_AddObject(m, Vector_e.name.c_str(), (PyObject *)&Vector_e.tp_obj);

if (PyType_Ready(&Vector_f.tp_obj) < 0) return NULL;
Py_INCREF(&Vector_f.tp_obj);
PyModule_AddObject(m, Vector_f.name.c_str(), (PyObject *)&Vector_f.tp_obj);
Expand All @@ -108,7 +113,7 @@ PyMODINIT_FUNC PyInit_cuvec(void) {
if (date == NULL) return NULL;
PyModule_AddObject(m, "__date__", date);

PyObject *version = Py_BuildValue("s", "0.2.0");
PyObject *version = Py_BuildValue("s", "0.3.0");
if (version == NULL) return NULL;
PyModule_AddObject(m, "__version__", version);

Expand Down
23 changes: 23 additions & 0 deletions cuvec/src/example_mod/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
project(example_mod)
file(GLOB SRC LIST_DIRECTORIES false "*.cu")

include_directories(${Python3_INCLUDE_DIRS})
#include_directories(${Python3_NumPy_INCLUDE_DIRS})

add_library(${PROJECT_NAME} MODULE ${SRC})
target_include_directories(${PROJECT_NAME} PUBLIC
"$<BUILD_INTERFACE:${${CMAKE_PROJECT_NAME}_INCLUDE_DIRS}>"
"$<INSTALL_INTERFACE:${CMAKE_PROJECT_NAME}/include>")
target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} ${CUDA_LIBRARIES})

if(SKBUILD)
python_extension_module(${PROJECT_NAME})
endif()
set_target_properties(${PROJECT_NAME} PROPERTIES
CXX_STANDARD 11
VERSION ${CMAKE_PROJECT_VERSION} SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR}
INTERFACE_${PROJECT_NAME}_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR})
set_property(TARGET ${PROJECT_NAME} APPEND PROPERTY COMPATIBLE_INTERFACE_STRING ${PROJECT_NAME}_MAJOR_VERSION)
install(TARGETS ${PROJECT_NAME}
INCLUDES DESTINATION ${CMAKE_PROJECT_NAME}/include
LIBRARY DESTINATION ${CMAKE_PROJECT_NAME})
55 changes: 55 additions & 0 deletions cuvec/src/example_mod/example_mod.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* Example external extension module using CuVec.
*
* Copyright (2021) Casper da Costa-Luis
*/
#include "Python.h"
#include "pycuvec.cuh" // PyCuVec
/** functions */
/// dst = src + 1
__global__ void _d_incr(float *dst, float *src, int X, int Y) {
int x = threadIdx.x + blockDim.x * blockIdx.x;
if (x >= X) return;
int y = threadIdx.y + blockDim.y * blockIdx.y;
if (y >= Y) return;
dst[y * X + x] = src[y * X + x] + 1;
}
static PyObject *increment_f(PyObject *self, PyObject *args) {
PyCuVec<float> *src;
if (!PyArg_ParseTuple(args, "O", (PyObject **)&src)) return NULL;
std::vector<Py_ssize_t> &N = src->shape;

cudaEvent_t eStart, eAlloc, eKern;
cudaEventCreate(&eStart);
cudaEventCreate(&eAlloc);
cudaEventCreate(&eKern);
cudaEventRecord(eStart);
PyCuVec<float> *dst = PyCuVec_zeros_like(src);
cudaEventRecord(eAlloc);
dim3 thrds((N[1] + 31) / 32, (N[0] + 31) / 32);
dim3 blcks(32, 32);
_d_incr<<<thrds, blcks>>>(dst->vec.data(), src->vec.data(), N[1], N[0]);
// cudaDeviceSynchronize();
cudaEventRecord(eKern);
cudaEventSynchronize(eKern);
float alloc_ms, kernel_ms;
cudaEventElapsedTime(&alloc_ms, eStart, eAlloc);
cudaEventElapsedTime(&kernel_ms, eAlloc, eKern);
// fprintf(stderr, "%.3f ms, %.3f ms\n", alloc_ms, kernel_ms);
return Py_BuildValue("ddO", double(alloc_ms), double(kernel_ms), (PyObject *)dst);
}
static PyMethodDef example_methods[] = {
{"increment_f", increment_f, METH_VARARGS, "Returns (alloc_ms, kernel_ms, input + 1)."},
{NULL, NULL, 0, NULL} // Sentinel
};

/** module */
static struct PyModuleDef example_mod = {PyModuleDef_HEAD_INIT,
"example_mod", // module
"Example external module.",
-1, // module keeps state in global variables
example_methods};
PyMODINIT_FUNC PyInit_example_mod(void) {
Py_Initialize();
return PyModule_Create(&example_mod);
}
19 changes: 16 additions & 3 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ def test_CuVec_creation(caplog):
assert not caplog.record_tuples
w = cu.CuVec(v)
assert [i[1:] for i in caplog.record_tuples] == [(10, "new view")]
nested = cu.asarray(w.cuvec).cuvec
assert nested != w.cuvec, "expected different object"
assert np.asarray(nested).data == np.asarray(w.cuvec).data, "expected same data"

caplog.clear()
assert w[0, 0, 0] == 1
Expand All @@ -60,3 +57,19 @@ def test_CuVec_creation(caplog):
assert v.cuvec is w.cuvec
assert v.data == w.data
assert not caplog.record_tuples


def test_asarray():
v = cu.asarray(np.random.random(shape))
w = cu.CuVec(v)
assert w.cuvec == v.cuvec
assert (w == v).all()
assert np.asarray(w.cuvec).data == np.asarray(v.cuvec).data
x = cu.asarray(w.cuvec)
assert x.cuvec == v.cuvec
assert (x == v).all()
assert np.asarray(x.cuvec).data == np.asarray(v.cuvec).data
y = cu.asarray(x.tolist())
assert y.cuvec != v.cuvec
assert (y == v).all()
assert np.asarray(y.cuvec).data == np.asarray(v.cuvec).data
72 changes: 72 additions & 0 deletions tests/test_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from functools import wraps
from time import time

import numpy as np

import cuvec as cu


def _time_overhead():
tic = time()
pass
res = time() - tic
return res


def timer(func):
@wraps(func)
def inner(*args, **kwargs):
overhead = np.mean([_time_overhead() for _ in range(100)])
tic = time()
res = func(*args, **kwargs)
return (time() - tic - overhead) * 1000, res

return inner


def test_perf(shape=(1337, 42), quiet=False):
# `example_mod` is defined in ../cuvec/src/example_mod/
from cuvec.example_mod import increment_f

overhead = np.mean([_time_overhead() for _ in range(100)])
t = {}
t['create src'], src = timer(cu.zeros)(shape, "float32")

rnd = np.random.random(shape)
tic = time()
src[:] = rnd
t['assign'] = (time() - tic - overhead) * 1000

if not quiet:
t['warmup'], (t['> create dst'], t['> kernel'], _) = timer(increment_f)(src.cuvec)
t['call ext'], (t['- create dst'], t['- kernel'], res) = timer(increment_f)(src.cuvec)
t['view'], dst = timer(cu.asarray)(res)

if not quiet:
print("\n".join(f"{k.ljust(14)} | {v:.3f}" for k, v in t.items()))
assert (src + 1 == dst).all()
# even a fast kernel takes longer than API overhead
assert t['- kernel'] / (t['call ext'] - t['- create dst']) > 0.5
# API call should be <0.1 ms... but set a higher threshold of 2 ms
assert t['call ext'] - t['- create dst'] - t['- kernel'] < 2
return t


if __name__ == "__main__":
try:
from tqdm import trange
except ImportError:
trange = range
nruns = 1000

print("# One run:")
test_perf((1000, 1000))

print("Repeating & averaging performance test metrics over {nruns} runs.")
runs = [test_perf((1000, 1000), True) for _ in trange(nruns)]
pretty = {
'create src': 'Create input', 'assign': 'Assign', 'call ext': 'Call extension',
'- create dst': '-- Create output', '- kernel': '-- Launch kernel', 'view': 'View'}
runs = {pretty[k]: [i[k] for i in runs] for k in runs[0]}
print("\n".join(f"{k.ljust(16)} | {np.mean(v):.3f} ± {np.std(v, ddof=1)/np.sqrt(len(v)):.3f}"
for k, v in runs.items()))

0 comments on commit dac6aca

Please sign in to comment.