Skip to content

Commit

Permalink
Fix abusive cudaMemcpy
Browse files Browse the repository at this point in the history
  • Loading branch information
jacquev6 committed Oct 16, 2023
1 parent 0834268 commit 5031f49
Showing 1 changed file with 11 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,17 @@ void ImproveProfilesWithAccuracyHeuristicOnGpu::improve_model_profile(
std::uniform_real_distribution<float>(0, 1)(host_learning_data.urbgs[model_index]));
check_last_cuda_error_sync_stream(cudaStreamDefault);

// @todo(Project management, soon) Double-check and document why we don't need [model_index] here
copy(gpu_learning_data.profiles[criterion_index][profile_index], host_learning_data.profiles[criterion_index][profile_index]);
// @todo(Performance, later) Can we group this copying somehow?
// Currently we copy just one float from device memory to host memory
// (because just one float is potentialy modified by 'apply_best_move__kernel',
// and we need it back on the device for the next iteration)

// Lov-e-CUDA doesn't provide a way to copy scalars, so we're back to the basics, using cudaMemcpy directly and doing pointer arithmetic.
check_cuda_error(cudaMemcpy(
host_learning_data.profiles[criterion_index][profile_index].data() + model_index,
gpu_learning_data.profiles[criterion_index][profile_index].data() + model_index,
1 * sizeof(float),
cudaMemcpyDeviceToHost));
}

} // namespace lincs

0 comments on commit 5031f49

Please sign in to comment.