Skip to content

Commit

Permalink
Merge pull request #188 from leofang/cuda_core_11
Browse files Browse the repository at this point in the history
Support JIT compilation for CUDA driver & bindings 11.x
  • Loading branch information
leofang authored Oct 28, 2024
2 parents 5066c9f + 74de685 commit e426810
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 23 deletions.
55 changes: 44 additions & 11 deletions cuda_core/cuda/core/experimental/_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

from dataclasses import dataclass
import importlib.metadata
from typing import Optional, Union

import numpy as np
Expand All @@ -15,10 +16,30 @@
from cuda.core.experimental._utils import CUDAError, check_or_create_options, handle_return


# TODO: revisit this treatment for py313t builds
_inited = False
_use_ex = None


def _lazy_init():
global _inited
if _inited:
return

global _use_ex
# binding availability depends on cuda-python version
_py_major_minor = tuple(int(v) for v in (
importlib.metadata.version("cuda-python").split(".")[:2]))
_driver_ver = handle_return(cuda.cuDriverGetVersion())
_use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
_inited = True


@dataclass
class LaunchConfig:
"""
"""
# TODO: expand LaunchConfig to include other attributes
grid: Union[tuple, int] = None
block: Union[tuple, int] = None
stream: Stream = None
Expand All @@ -39,6 +60,8 @@ def __post_init__(self):
if self.shmem_size is None:
self.shmem_size = 0

_lazy_init()

def _cast_to_3_tuple(self, cfg):
if isinstance(cfg, int):
if cfg < 1:
Expand Down Expand Up @@ -67,24 +90,34 @@ def launch(kernel, config, *kernel_args):
if not isinstance(kernel, Kernel):
raise ValueError
config = check_or_create_options(LaunchConfig, config, "launch config")
if config.stream is None:
raise CUDAError("stream cannot be None")

# TODO: can we ensure kernel_args is valid/safe to use here?
# TODO: merge with HelperKernelParams?
kernel_args = ParamHolder(kernel_args)
args_ptr = kernel_args.ptr

driver_ver = handle_return(cuda.cuDriverGetVersion())
if driver_ver >= 12000:
# Note: CUkernel can still be launched via the old cuLaunchKernel and we do not care
# about the CUfunction/CUkernel difference (which depends on whether the "old" or
# "new" module loading APIs are in use). We check both binding & driver versions here
# mainly to see if the "Ex" API is available and if so we use it, as it's more feature
# rich.
if _use_ex:
drv_cfg = cuda.CUlaunchConfig()
drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
if config.stream is None:
raise CUDAError("stream cannot be None")
drv_cfg.hStream = config.stream._handle
drv_cfg.sharedMemBytes = config.shmem_size
drv_cfg.numAttrs = 0 # FIXME

# TODO: merge with HelperKernelParams?
kernel_args = ParamHolder(kernel_args)
args_ptr = kernel_args.ptr

drv_cfg.numAttrs = 0 # TODO
handle_return(cuda.cuLaunchKernelEx(
drv_cfg, int(kernel._handle), args_ptr, 0))
else:
raise NotImplementedError("TODO")
# TODO: check if config has any unsupported attrs
handle_return(cuda.cuLaunchKernel(
int(kernel._handle),
*config.grid,
*config.block,
config.shmem_size,
config.stream._handle,
args_ptr, 0))
56 changes: 44 additions & 12 deletions cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@
#
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import importlib.metadata

from cuda import cuda, cudart
from cuda.core.experimental._utils import handle_return


_backend = {
"new": {
"file": cuda.cuLibraryLoadFromFile,
"data": cuda.cuLibraryLoadData,
"kernel": cuda.cuLibraryGetKernel,
},
"old": {
"file": cuda.cuModuleLoad,
"data": cuda.cuModuleLoadDataEx,
Expand All @@ -20,6 +17,34 @@
}


# TODO: revisit this treatment for py313t builds
_inited = False
_py_major_ver = None
_driver_ver = None
_kernel_ctypes = None


def _lazy_init():
global _inited
if _inited:
return

global _py_major_ver, _driver_ver, _kernel_ctypes
# binding availability depends on cuda-python version
_py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
if _py_major_ver >= 12:
_backend["new"] = {
"file": cuda.cuLibraryLoadFromFile,
"data": cuda.cuLibraryLoadData,
"kernel": cuda.cuLibraryGetKernel,
}
_kernel_ctypes = (cuda.CUfunction, cuda.CUkernel)
else:
_kernel_ctypes = (cuda.CUfunction,)
_driver_ver = handle_return(cuda.cuDriverGetVersion())
_inited = True


class Kernel:

__slots__ = ("_handle", "_module",)
Expand All @@ -29,13 +54,15 @@ def __init__(self):

@staticmethod
def _from_obj(obj, mod):
assert isinstance(obj, (cuda.CUkernel, cuda.CUfunction))
assert isinstance(obj, _kernel_ctypes)
assert isinstance(mod, ObjectCode)
ker = Kernel.__new__(Kernel)
ker._handle = obj
ker._module = mod
return ker

# TODO: implement from_handle()


class ObjectCode:

Expand All @@ -46,26 +73,29 @@ def __init__(self, module, code_type, jit_options=None, *,
symbol_mapping=None):
if code_type not in self._supported_code_type:
raise ValueError
_lazy_init()
self._handle = None

driver_ver = handle_return(cuda.cuDriverGetVersion())
self._loader = _backend["new"] if driver_ver >= 12000 else _backend["old"]
backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
self._loader = _backend[backend]

if isinstance(module, str):
if driver_ver < 12000 and jit_options is not None:
# TODO: this option is only taken by the new library APIs, but we have
# a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
if jit_options is not None:
raise ValueError
module = module.encode()
self._handle = handle_return(self._loader["file"](module))
else:
assert isinstance(module, bytes)
if jit_options is None:
jit_options = {}
if driver_ver >= 12000:
if backend == "new":
args = (module, list(jit_options.keys()), list(jit_options.values()), len(jit_options),
# TODO: support library options
[], [], 0)
else:
args = (module, len(jit_options), jit_options.keys(), jit_options.values())
else: # "old" backend
args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values()))
self._handle = handle_return(self._loader["data"](*args))

self._code_type = code_type
Expand All @@ -83,3 +113,5 @@ def get_kernel(self, name):
name = name.encode()
data = handle_return(self._loader["kernel"](self._handle, name))
return Kernel._from_obj(data, self)

# TODO: implement from_handle()

0 comments on commit e426810

Please sign in to comment.