diff --git a/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp b/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp index 03044c436b..5c2f3a01c3 100644 --- a/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp +++ b/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp @@ -190,16 +190,43 @@ class CuStateVecCircuitSimulator } // User state provided... - - // FIXME handle case where pointer is a device pointer + // Check if the pointer is a device pointer + cudaPointerAttributes attributes; + HANDLE_CUDA_ERROR(cudaPointerGetAttributes(&attributes, state)); // First allocation, so just set the user provided data here ScopedTraceWithContext( "CuStateVecCircuitSimulator::addQubitsToState cudaMemcpy", stateDimension * sizeof(CudaDataType)); - HANDLE_CUDA_ERROR(cudaMemcpy(deviceStateVector, state, - stateDimension * sizeof(CudaDataType), - cudaMemcpyHostToDevice)); + + if (attributes.type == cudaMemoryTypeDevice) { + int currentDevice; + HANDLE_CUDA_ERROR(cudaGetDevice(¤tDevice)); + + if (attributes.device != currentDevice) { + // Memory is on a different GPU + // Set the device to the device where the memory is located + HANDLE_CUDA_ERROR(cudaSetDevice(attributes.device)); + + // Perform device to device copy + HANDLE_CUDA_ERROR(cudaMemcpy(deviceStateVector, state, + stateDimension * sizeof(CudaDataType), + cudaMemcpyDeviceToDevice)); + + // Restore the current device + HANDLE_CUDA_ERROR(cudaSetDevice(currentDevice)); + } else { + // Memory is on the same GPU + HANDLE_CUDA_ERROR(cudaMemcpy(deviceStateVector, state, + stateDimension * sizeof(CudaDataType), + cudaMemcpyDeviceToDevice)); + } + } else { + // Else, copy from host to device + HANDLE_CUDA_ERROR(cudaMemcpy(deviceStateVector, state, + stateDimension * sizeof(CudaDataType), + cudaMemcpyHostToDevice)); + } return; } @@ -221,11 +248,33 @@ class CuStateVecCircuitSimulator n_blocks, threads_per_block, otherState, (1UL << count)); HANDLE_CUDA_ERROR(cudaGetLastError()); } else { - - // FIXME Handle case where data is already on GPU - HANDLE_CUDA_ERROR(cudaMemcpy(otherState, state, - (1UL << count) * sizeof(CudaDataType), - cudaMemcpyHostToDevice)); + // Check if the pointer is a device pointer + cudaPointerAttributes attributes; + HANDLE_CUDA_ERROR(cudaPointerGetAttributes(&attributes, state)); + + if (attributes.type == cudaMemoryTypeDevice) { + int currentDevice; + HANDLE_CUDA_ERROR(cudaGetDevice(¤tDevice)); + + if (attributes.device != currentDevice) { + // Memory is on a different GPU + // Set the device to the device where the memory is located + HANDLE_CUDA_ERROR(cudaSetDevice(attributes.device)); + + // Perform device to device copy + HANDLE_CUDA_ERROR(cudaMemcpy(otherState, state, + stateDimension * sizeof(CudaDataType), + cudaMemcpyDeviceToDevice)); + + // Restore the current device + HANDLE_CUDA_ERROR(cudaSetDevice(currentDevice)); + } + } else { + // Else, copy from host to device + HANDLE_CUDA_ERROR(cudaMemcpy(otherState, state, + (1UL << count) * sizeof(CudaDataType), + cudaMemcpyHostToDevice)); + } } {