From c3ecffd9f170447aca3e6399da5e03fcb556c974 Mon Sep 17 00:00:00 2001 From: Jacky Date: Sun, 8 Sep 2024 16:39:51 -0400 Subject: [PATCH] TESTL: GPU map --- minitorch/cuda_ops.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/minitorch/cuda_ops.py b/minitorch/cuda_ops.py index 22c2124..cfbbbbb 100644 --- a/minitorch/cuda_ops.py +++ b/minitorch/cuda_ops.py @@ -152,9 +152,18 @@ def _map( out_index = cuda.local.array(MAX_DIMS, numba.int32) in_index = cuda.local.array(MAX_DIMS, numba.int32) - i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x + x = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x # TODO: Implement for Task 3.3. - raise NotImplementedError("Need to implement for Task 3.3") + # raise NotImplementedError("Need to implement for Task 3.3") + out_i = numba.cuda.blockIdx.x * THREADS_PER_BLOCK + numba.cuda.threadIdx.x + if out_i < out.size: + out_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + in_index = numba.cuda.local.array(MAX_DIMS, numba.int32) + to_index(out_i, out_shape, out_index) + broadcast_index(out_index, out_shape, in_shape, in_index) + in_position = index_to_position(in_index, in_strides) + out_position = index_to_position(out_index, out_strides) + out[out_position] = fn(in_storage[in_position]) return cuda.jit()(_map) # type: ignore