Skip to content

Commit

Permalink
fix bwd bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Nov 10, 2023
1 parent 9095f2b commit 9769604
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
7 changes: 6 additions & 1 deletion src/ops/inc_multihead_self_attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,13 @@ void peft_bwd_kernel(IncMultiHeadSelfAttentionMeta const *m,
#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
cudaDataType_t compute_type = cublas_data_type;
#else
// TODO: currently set the default to CUBLAS_COMPUTE_16F for best performance
// For best performance, set the default cublas compute type to
// CUBLAS_COMPUTE_16F for half precision and to
// CUBLAS_COMPUTE_32F_FAST_16F for full precision
cublasComputeType_t compute_type = CUBLAS_COMPUTE_16F;
if (m->output_type[0] == DT_FLOAT) {
compute_type = CUBLAS_COMPUTE_32F_FAST_16F;
}
#endif
for (int i = 0; i < bc->max_requests_per_batch(); i++) {
if (bc->request_completed[i]) {
Expand Down
12 changes: 9 additions & 3 deletions src/ops/kernels/linear_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -457,14 +457,20 @@ void peft_bwd_kernel(LinearMeta const *m,
cudaDataType_t weight_type = ff_to_cuda_datatype(m->weight_type[0]);
cudaDataType_t output_type = ff_to_cuda_datatype(m->output_type[0]);
// update input_grad_ptr and output_grad_ptr offset
input_grad_ptr = static_cast<DT *>(input_grad_ptr) + num_infr_tokens * in_dim;
int num_infr_only_tokens = num_infr_tokens - num_peft_tokens;
input_grad_ptr = static_cast<DT *>(input_grad_ptr) + num_infr_only_tokens * in_dim;
output_grad_ptr =
static_cast<DT *>(output_grad_ptr) + num_infr_tokens * out_dim;
static_cast<DT *>(output_grad_ptr) + num_infr_only_tokens * out_dim;
#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
cudaDataType_t compute_type = output_type;
#else
// TODO: currently set the default to CUBLAS_COMPUTE_16F for best performance
// For best performance, set the default cublas compute type to
// CUBLAS_COMPUTE_16F for half precision and to
// CUBLAS_COMPUTE_32F_FAST_16F for full precision
cublasComputeType_t compute_type = CUBLAS_COMPUTE_16F;
if (m->output_type[0] == DT_FLOAT) {
compute_type = CUBLAS_COMPUTE_32F_FAST_16F;
}
#endif
int output_size = out_dim * num_peft_tokens;
if (m->activation == AC_MODE_RELU) {
Expand Down
22 changes: 22 additions & 0 deletions src/runtime/cuda_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,28 @@ cudnnDataType_t cuda_to_cudnn_datatype(cudaDataType_t type) {
return CUDNN_DATA_FLOAT;
}

void check_device_vs_host_ptr(void const *maybe_devicePtr) {
cudaPointerAttributes attributes;
cudaError_t cudaStatus = cudaPointerGetAttributes(&attributes, maybe_devicePtr);

if (cudaStatus == cudaSuccess) {
// Check attributes and perform actions accordingly
if (attributes.type == cudaMemoryTypeDevice) {
printf("Pointer is allocated in device memory.\n");
} else if (attributes.type == cudaMemoryTypeHost) {
printf("Pointer is allocated in host memory.\n");
} else if (attributes.type == cudaMemoryTypeUnregistered) {
printf("Pointer is unregistered.\n");
} else if (attributes.type == cudaMemoryTypeManaged) {
printf("Pointer is managed.\n");
} else {
printf("Pointer is not allocated in recognized memory type.\n");
}
} else {
fprintf(stderr, "cudaPointerGetAttributes failed: %s\n", cudaGetErrorString(cudaStatus));
}
}

template __global__ void
assign_kernel<half>(half *ptr, coord_t size, half value);
template __global__ void
Expand Down

0 comments on commit 9769604

Please sign in to comment.