Skip to content

Commit

Permalink
REVIEW: fix review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Dec 15, 2023
1 parent 1d6fe10 commit b3f8e2a
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 4 deletions.
43 changes: 39 additions & 4 deletions tools/perf/ucc_pt_coll.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,54 @@

#include "ucc_pt_coll.h"
#include "ucc_pt_cuda.h"
#include "utils/ucc_malloc.h"

ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len,
ucc_memory_type_t mem_type)
{
ucc_status_t status;
int cuda_st;

if (mem_type == UCC_MEMORY_TYPE_CUDA) {
switch (mem_type) {
case UCC_MEMORY_TYPE_CUDA:
*h_ptr = new ucc_mc_buffer_header_t;
(*h_ptr)->mt = UCC_MEMORY_TYPE_CUDA;
cuda_st = ucc_pt_cudaMalloc(&((*h_ptr)->addr), len);
if (cuda_st != 0) {
return UCC_ERR_NO_RESOURCE;
return UCC_ERR_NO_MEMORY;
}
cuda_st = ucc_pt_cudaMemset((*h_ptr)->addr, 0, len);
if (cuda_st != 0) {
ucc_pt_cudaFree((*h_ptr)->addr);
delete *h_ptr;
return UCC_ERR_NO_RESOURCE;
return UCC_ERR_NO_MEMORY;
}
return UCC_OK;
case UCC_MEMORY_TYPE_CUDA_MANAGED:
*h_ptr = new ucc_mc_buffer_header_t;
(*h_ptr)->mt = UCC_MEMORY_TYPE_CUDA_MANAGED;
cuda_st = ucc_pt_cudaMallocManaged(&((*h_ptr)->addr), len);
if (cuda_st != 0) {
return UCC_ERR_NO_MEMORY;
}
cuda_st = ucc_pt_cudaMemset((*h_ptr)->addr, 0, len);
if (cuda_st != 0) {
ucc_pt_cudaFree((*h_ptr)->addr);
delete *h_ptr;
return UCC_ERR_NO_MEMORY;
}
return UCC_OK;
case UCC_MEMORY_TYPE_HOST:
*h_ptr = new ucc_mc_buffer_header_t;
(*h_ptr)->mt = UCC_MEMORY_TYPE_HOST;
(*h_ptr)->addr = ucc_malloc(len, "perftest data");
if (!((*h_ptr)->addr)) {
return UCC_ERR_NO_MEMORY;
}
memset((*h_ptr)->addr, 0, len);
return UCC_OK;
default:
break;
}

status = ucc_mc_alloc(h_ptr, len, mem_type);
Expand All @@ -44,10 +71,18 @@ ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len,

ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr)
{
if (h_ptr->mt == UCC_MEMORY_TYPE_CUDA) {
switch (h_ptr->mt) {
case UCC_MEMORY_TYPE_CUDA:
case UCC_MEMORY_TYPE_CUDA_MANAGED:
ucc_pt_cudaFree(h_ptr->addr);
delete h_ptr;
return UCC_OK;
case UCC_MEMORY_TYPE_HOST:
ucc_free(h_ptr->addr);
delete h_ptr;
return UCC_OK;
default:
break;
}

return ucc_mc_free(h_ptr);
Expand Down
1 change: 1 addition & 0 deletions tools/perf/ucc_pt_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void ucc_pt_cuda_init(void)
LOAD_CUDA_SYM("cudaMalloc", cudaMalloc);
LOAD_CUDA_SYM("cudaFree", cudaFree);
LOAD_CUDA_SYM("cudaMemset", cudaMemset);
LOAD_CUDA_SYM("cudaMallocManaged", cudaMallocManaged);

ucc_pt_cuda_iface.available = 1;
}
12 changes: 12 additions & 0 deletions tools/perf/ucc_pt_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#define cudaSuccess 0
#define cudaStreamNonBlocking 0x01 /**< Stream does not synchronize with stream 0 (the NULL stream) */
#define cudaMemAttachGlobal 0x01 /**< Memory can be accessed by any stream on any device*/
typedef struct CUStream_st *cudaStream_t;

#define STR(x) #x
Expand All @@ -32,6 +33,7 @@ typedef struct ucc_pt_cuda_iface {
int (*streamDestroy)(cudaStream_t stream);
char* (*getErrorString)(int err);
int (*cudaMalloc)(void **devptr, size_t size);
int (*cudaMallocManaged)(void **ptr, size_t size, unsigned int flags);
int (*cudaFree)(void *devptr);
int (*cudaMemset)(void *devptr, int value, size_t count);
} ucc_pt_cuda_iface_t;
Expand Down Expand Up @@ -86,6 +88,16 @@ static inline int ucc_pt_cudaMalloc(void **devptr, size_t size)
return 0;
}

static inline int ucc_pt_cudaMallocManaged(void **ptr, size_t size)
{
if (!ucc_pt_cuda_iface.available) {
return 1;
}
CUDA_CHECK(ucc_pt_cuda_iface.cudaMallocManaged(ptr, size,
cudaMemAttachGlobal));
return 0;
}

static inline int ucc_pt_cudaFree(void *devptr)
{
if (!ucc_pt_cuda_iface.available) {
Expand Down

0 comments on commit b3f8e2a

Please sign in to comment.