diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..e77d446ba6 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# do not show up detailed difference on GitHub +source/3rdparty/* linguist-generated=true diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 98360e41e4..df4a109841 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -76,6 +76,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: actions/setup-python@v4 name: Install Python with: diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index adc20c27a9..7b95e6d37b 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -9,13 +9,19 @@ jobs: test_cuda: name: Test Python and C++ on CUDA runs-on: nvidia + # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 + container: + image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 + options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: + - name: Make sudo and git work + run: apt-get update && apt-get install -y sudo git - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: '3.11' - cache: 'pip' + # cache: 'pip' - name: Setup MPI uses: mpi4py/setup-mpi@v1 with: @@ -26,14 +32,17 @@ jobs: && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8 + if: false # skip as we use nvidia image + - name: Set PyPI mirror for Aliyun cloud machine + run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip" + - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip" env: DP_BUILD_TESTING: 1 DP_VARIANT: cuda CUDA_PATH: /usr/local/cuda-11.8 - run: dp --version - - run: pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0 + - run: python -m pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0 - run: source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 @@ -45,10 +54,10 @@ jobs: DP_USE_MPICH2: 1 CUDA_PATH: /usr/local/cuda-11.8 - run: | - export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH - export PATH=${{ github.workspace }}/dp_test/bin:$PATH - pytest -s --cov=deepmd source/lmp/tests - pytest -s --cov=deepmd source/ipi/tests + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH + export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH + python -m pytest -s --cov=deepmd source/lmp/tests + python -m pytest -s --cov=deepmd source/ipi/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc1d5e99eb..50a47499f1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace exclude: "^.+\\.pbtxt$" @@ -27,22 +27,26 @@ repos: hooks: - id: isort files: \.py$ + exclude: ^source/3rdparty - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.291 + rev: v0.0.292 hooks: - id: ruff args: ["--fix"] + exclude: ^source/3rdparty - repo: https://github.com/psf/black-pre-commit-mirror rev: 23.9.1 hooks: - id: black-jupyter + exclude: ^source/3rdparty # numpydoc - repo: https://github.com/Carreau/velin rev: 0.0.12 hooks: - id: velin args: ["--write"] + exclude: ^source/3rdparty # Python inside docs - repo: https://github.com/asottile/blacken-docs rev: 1.16.0 @@ -102,6 +106,7 @@ repos: - --comment-style - "#" - --no-extra-eol + exclude: ^source/3rdparty # HTML - id: insert-license files: \.(html|vue|xml)$ diff --git a/README.md b/README.md index 9b9d0ff27d..5914abe607 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp - [Install GROMACS](doc/install/install-gromacs.md) - [Building conda packages](doc/install/build-conda.md) - [Install Node.js interface](doc/install/install-nodejs.md) + - [Easy install the latest development version](doc/install/easy-install-dev.md) - [Data](doc/data/index.md) - [System](doc/data/system.md) - [Formats of a system](doc/data/data-conv.md) diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py index fa40d332cf..0502684f47 100644 --- a/backend/dynamic_metadata.py +++ b/backend/dynamic_metadata.py @@ -38,6 +38,7 @@ def dynamic_metadata( "pytest", "pytest-cov", "pytest-sugar", + "dpgui", ], "docs": [ "sphinx>=3.1.1", @@ -62,6 +63,9 @@ def dynamic_metadata( "i-PI", *find_libpython_requires, ], + "gui": [ + "dpgui", + ], **get_tf_requirement(tf_version), "cu11": [ "nvidia-cuda-runtime-cu11", diff --git a/deepmd/descriptor/__init__.py b/deepmd/descriptor/__init__.py index ab726d95c8..6ef6c50da5 100644 --- a/deepmd/descriptor/__init__.py +++ b/deepmd/descriptor/__init__.py @@ -14,6 +14,9 @@ from .se_a_ebd import ( DescrptSeAEbd, ) +from .se_a_ebd_v2 import ( + DescrptSeAEbdV2, +) from .se_a_ef import ( DescrptSeAEf, DescrptSeAEfLower, @@ -39,6 +42,7 @@ "DescrptHybrid", "DescrptLocFrame", "DescrptSeA", + "DescrptSeAEbdV2", "DescrptSeAEbd", "DescrptSeAEf", "DescrptSeAEfLower", diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py index c885e73145..bd731004cb 100644 --- a/deepmd/descriptor/descriptor.py +++ b/deepmd/descriptor/descriptor.py @@ -66,16 +66,20 @@ class SomeDescript(Descriptor): """ return Descriptor.__plugins.register(key) + @classmethod + def get_class_by_input(cls, input: dict): + try: + descrpt_type = input["type"] + except KeyError: + raise KeyError("the type of descriptor should be set by `type`") + if descrpt_type in Descriptor.__plugins.plugins: + return Descriptor.__plugins.plugins[descrpt_type] + else: + raise RuntimeError("Unknown descriptor type: " + descrpt_type) + def __new__(cls, *args, **kwargs): if cls is Descriptor: - try: - descrpt_type = kwargs["type"] - except KeyError: - raise KeyError("the type of descriptor should be set by `type`") - if descrpt_type in Descriptor.__plugins.plugins: - cls = Descriptor.__plugins.plugins[descrpt_type] - else: - raise RuntimeError("Unknown descriptor type: " + descrpt_type) + cls = cls.get_class_by_input(kwargs) return super().__new__(cls) @abstractmethod @@ -489,3 +493,19 @@ def build_type_exclude_mask( def explicit_ntypes(self) -> bool: """Explicit ntypes with type embedding.""" return False + + @classmethod + @abstractmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + # call subprocess + cls = cls.get_class_by_input(local_jdata) + return cls.update_sel(global_jdata, local_jdata) diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py index 26736cd653..5ee5ec884b 100644 --- a/deepmd/descriptor/hybrid.py +++ b/deepmd/descriptor/hybrid.py @@ -416,3 +416,21 @@ def pass_tensors_from_frz_model( def explicit_ntypes(self) -> bool: """Explicit ntypes with type embedding.""" return any(ii.explicit_ntypes for ii in self.descrpt_list) + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["list"] = [ + Descriptor.update_sel(global_jdata, sub_jdata) + for sub_jdata in local_jdata["list"] + ] + return local_jdata_cpy diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/descriptor/loc_frame.py index 409e59f5e7..0765be55f8 100644 --- a/deepmd/descriptor/loc_frame.py +++ b/deepmd/descriptor/loc_frame.py @@ -430,3 +430,16 @@ def init_variables( self.dstd = get_tensor_by_name_from_graph( graph, "descrpt_attr%s/t_std" % suffix ) + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + return local_jdata diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py index 3a1ec41ddb..598f6f9ff8 100644 --- a/deepmd/descriptor/se.py +++ b/deepmd/descriptor/se.py @@ -141,3 +141,22 @@ def init_variables( def precision(self) -> tf.DType: """Precision of filter network.""" return self.filter_precision + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + from deepmd.entrypoints.train import ( + update_one_sel, + ) + + # default behavior is to update sel which is a list + local_jdata_cpy = local_jdata.copy() + return update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index cceb72d4fb..8f0051cd4e 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -10,6 +10,7 @@ from deepmd.common import ( cast_precision, get_activation_func, + get_np_precision, get_precision, ) from deepmd.env import ( @@ -30,10 +31,17 @@ from deepmd.nvnmd.utils.config import ( nvnmd_cfg, ) +from deepmd.utils.compress import ( + get_extra_side_embedding_net_variable, + get_two_side_type_embedding, + get_type_embedding, + make_data, +) from deepmd.utils.errors import ( GraphWithoutTensorError, ) from deepmd.utils.graph import ( + get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, ) from deepmd.utils.network import ( @@ -165,6 +173,7 @@ def __init__( uniform_seed: bool = False, multi_task: bool = False, spin: Optional[Spin] = None, + stripped_type_embedding: bool = False, **kwargs, ) -> None: """Constructor.""" @@ -185,6 +194,7 @@ def __init__( self.compress_activation_fn = get_activation_func(activation_function) self.filter_activation_fn = get_activation_func(activation_function) self.filter_precision = get_precision(precision) + self.filter_np_precision = get_np_precision(precision) self.exclude_types = set() for tt in exclude_types: assert len(tt) == 2 @@ -193,6 +203,9 @@ def __init__( self.set_davg_zero = set_davg_zero self.type_one_side = type_one_side self.spin = spin + self.stripped_type_embedding = stripped_type_embedding + self.extra_embeeding_net_variables = None + self.layer_size = len(neuron) # extend sel_a for spin system if self.spin is not None: @@ -463,6 +476,39 @@ def enable_compression( "The size of the next layer of the neural network must be twice the size of the previous layer." % ",".join([str(item) for item in self.filter_neuron]) ) + if self.stripped_type_embedding: + ret_two_side = get_pattern_nodes_from_graph_def( + graph_def, f"filter_type_all{suffix}/.+_two_side_ebd" + ) + ret_one_side = get_pattern_nodes_from_graph_def( + graph_def, f"filter_type_all{suffix}/.+_one_side_ebd" + ) + if len(ret_two_side) == 0 and len(ret_one_side) == 0: + raise RuntimeError( + "can not find variables of embedding net from graph_def, maybe it is not a compressible model." + ) + elif len(ret_one_side) != 0 and len(ret_two_side) != 0: + raise RuntimeError( + "both one side and two side embedding net varaibles are detected, it is a wrong model." + ) + elif len(ret_two_side) != 0: + self.final_type_embedding = get_two_side_type_embedding(self, graph) + self.matrix = get_extra_side_embedding_net_variable( + self, graph_def, "two_side", "matrix", suffix + ) + self.bias = get_extra_side_embedding_net_variable( + self, graph_def, "two_side", "bias", suffix + ) + self.extra_embedding = make_data(self, self.final_type_embedding) + else: + self.final_type_embedding = get_type_embedding(self, graph) + self.matrix = get_extra_side_embedding_net_variable( + self, graph_def, "one_side", "matrix", suffix + ) + self.bias = get_extra_side_embedding_net_variable( + self, graph_def, "one_side", "bias", suffix + ) + self.extra_embedding = make_data(self, self.final_type_embedding) self.compress = True self.table = DPTabulate( @@ -588,6 +634,7 @@ def build( coord = tf.reshape(coord_, [-1, natoms[1] * 3]) box = tf.reshape(box_, [-1, 9]) atype = tf.reshape(atype_, [-1, natoms[1]]) + self.atype = atype op_descriptor = ( build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a @@ -606,6 +653,10 @@ def build( sel_a=self.sel_a, sel_r=self.sel_r, ) + nlist_t = tf.reshape(self.nlist + 1, [-1]) + atype_t = tf.concat([[self.ntypes], tf.reshape(self.atype, [-1])], axis=0) + self.nei_type_vec = tf.nn.embedding_lookup(atype_t, nlist_t) + # only used when tensorboard was set as true tf.summary.histogram("descrpt", self.descrpt) tf.summary.histogram("rij", self.rij) @@ -692,6 +743,8 @@ def _pass_filter( type_embedding = input_dict.get("type_embedding", None) else: type_embedding = None + if self.stripped_type_embedding and type_embedding is None: + raise RuntimeError("type_embedding is required for se_a_tebd_v2 model.") start_index = 0 inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]) output = [] @@ -901,13 +954,89 @@ def _filter_lower( # with (natom x nei_type_i) x 1 xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1]) if type_embedding is not None: - xyz_scatter = self._concat_type_embedding( - xyz_scatter, nframes, natoms, type_embedding - ) - if self.compress: - raise RuntimeError( - "compression of type embedded descriptor is not supported at the moment" + if self.stripped_type_embedding: + if self.type_one_side: + extra_embedding_index = self.nei_type_vec + else: + padding_ntypes = type_embedding.shape[0] + atype_expand = tf.reshape(self.atype, [-1, 1]) + idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei]) + idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei]) + idx = idx_i + idx_j + index_of_two_side = tf.reshape(idx, [-1]) + extra_embedding_index = index_of_two_side + + if not self.compress: + if self.type_one_side: + one_side_type_embedding_suffix = "_one_side_ebd" + net_output = embedding_net( + type_embedding, + self.filter_neuron, + self.filter_precision, + activation_fn=activation_fn, + resnet_dt=self.filter_resnet_dt, + name_suffix=one_side_type_embedding_suffix, + stddev=stddev, + bavg=bavg, + seed=self.seed, + trainable=trainable, + uniform_seed=self.uniform_seed, + initial_variables=self.extra_embeeding_net_variables, + mixed_prec=self.mixed_prec, + ) + net_output = tf.nn.embedding_lookup( + net_output, self.nei_type_vec + ) + else: + type_embedding_nei = tf.tile( + tf.reshape(type_embedding, [1, padding_ntypes, -1]), + [padding_ntypes, 1, 1], + ) # (ntypes) * ntypes * Y + type_embedding_center = tf.tile( + tf.reshape(type_embedding, [padding_ntypes, 1, -1]), + [1, padding_ntypes, 1], + ) # ntypes * (ntypes) * Y + two_side_type_embedding = tf.concat( + [type_embedding_nei, type_embedding_center], -1 + ) # ntypes * ntypes * (Y+Y) + two_side_type_embedding = tf.reshape( + two_side_type_embedding, + [-1, two_side_type_embedding.shape[-1]], + ) + + atype_expand = tf.reshape(self.atype, [-1, 1]) + idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei]) + idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei]) + idx = idx_i + idx_j + index_of_two_side = tf.reshape(idx, [-1]) + self.extra_embedding_index = index_of_two_side + + two_side_type_embedding_suffix = "_two_side_ebd" + net_output = embedding_net( + two_side_type_embedding, + self.filter_neuron, + self.filter_precision, + activation_fn=activation_fn, + resnet_dt=self.filter_resnet_dt, + name_suffix=two_side_type_embedding_suffix, + stddev=stddev, + bavg=bavg, + seed=self.seed, + trainable=trainable, + uniform_seed=self.uniform_seed, + initial_variables=self.extra_embeeding_net_variables, + mixed_prec=self.mixed_prec, + ) + net_output = tf.nn.embedding_lookup(net_output, idx) + net_output = tf.reshape(net_output, [-1, self.filter_neuron[-1]]) + else: + xyz_scatter = self._concat_type_embedding( + xyz_scatter, nframes, natoms, type_embedding ) + if self.compress: + raise RuntimeError( + "compression of type embedded descriptor is not supported when stripped_type_embedding == False" + ) # natom x 4 x outputs_size if nvnmd_cfg.enable: return filter_lower_R42GR( @@ -929,25 +1058,48 @@ def _filter_lower( self.embedding_net_variables, ) if self.compress and (not is_exclude): - if self.type_one_side: - net = "filter_-1_net_" + str(type_i) + if self.stripped_type_embedding: + net_output = tf.nn.embedding_lookup( + self.extra_embedding, extra_embedding_index + ) + net = "filter_net" + info = [ + self.lower[net], + self.upper[net], + self.upper[net] * self.table_config[0], + self.table_config[1], + self.table_config[2], + self.table_config[3], + ] + return op_module.tabulate_fusion_se_atten( + tf.cast(self.table.data[net], self.filter_precision), + info, + xyz_scatter, + tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), + net_output, + last_layer_size=outputs_size[-1], + is_sorted=False, + ) else: - net = "filter_" + str(type_input) + "_net_" + str(type_i) - info = [ - self.lower[net], - self.upper[net], - self.upper[net] * self.table_config[0], - self.table_config[1], - self.table_config[2], - self.table_config[3], - ] - return op_module.tabulate_fusion_se_a( - tf.cast(self.table.data[net], self.filter_precision), - info, - xyz_scatter, - tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), - last_layer_size=outputs_size[-1], - ) + if self.type_one_side: + net = "filter_-1_net_" + str(type_i) + else: + net = "filter_" + str(type_input) + "_net_" + str(type_i) + info = [ + self.lower[net], + self.upper[net], + self.upper[net] * self.table_config[0], + self.table_config[1], + self.table_config[2], + self.table_config[3], + ] + return op_module.tabulate_fusion_se_a( + tf.cast(self.table.data[net], self.filter_precision), + info, + xyz_scatter, + tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), + last_layer_size=outputs_size[-1], + ) else: if not is_exclude: # with (natom x nei_type_i) x out_size @@ -966,6 +1118,9 @@ def _filter_lower( initial_variables=self.embedding_net_variables, mixed_prec=self.mixed_prec, ) + + if self.stripped_type_embedding: + xyz_scatter = xyz_scatter * net_output + xyz_scatter if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift else: @@ -1179,3 +1334,10 @@ def init_variables( self.dstd = new_dstd if self.original_sel is None: self.original_sel = sel + + @property + def explicit_ntypes(self) -> bool: + """Explicit ntypes with type embedding.""" + if self.stripped_type_embedding: + return True + return False diff --git a/deepmd/descriptor/se_a_ebd_v2.py b/deepmd/descriptor/se_a_ebd_v2.py new file mode 100644 index 0000000000..c6e3cebc71 --- /dev/null +++ b/deepmd/descriptor/se_a_ebd_v2.py @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + List, + Optional, +) + +from deepmd.utils.spin import ( + Spin, +) + +from .descriptor import ( + Descriptor, +) +from .se_a import ( + DescrptSeA, +) + +log = logging.getLogger(__name__) + + +@Descriptor.register("se_a_tpe_v2") +@Descriptor.register("se_a_ebd_v2") +class DescrptSeAEbdV2(DescrptSeA): + r"""A compressible se_a_ebd model. + + This model is a warpper for DescriptorSeA, which set stripped_type_embedding=True. + """ + + def __init__( + self, + rcut: float, + rcut_smth: float, + sel: List[str], + neuron: List[int] = [24, 48, 96], + axis_neuron: int = 8, + resnet_dt: bool = False, + trainable: bool = True, + seed: Optional[int] = None, + type_one_side: bool = True, + exclude_types: List[List[int]] = [], + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = "default", + uniform_seed: bool = False, + multi_task: bool = False, + spin: Optional[Spin] = None, + **kwargs, + ) -> None: + DescrptSeA.__init__( + self, + rcut, + rcut_smth, + sel, + neuron=neuron, + axis_neuron=axis_neuron, + resnet_dt=resnet_dt, + trainable=trainable, + seed=seed, + type_one_side=type_one_side, + exclude_types=exclude_types, + set_davg_zero=set_davg_zero, + activation_function=activation_function, + precision=precision, + uniform_seed=uniform_seed, + multi_task=multi_task, + spin=spin, + stripped_type_embedding=True, + **kwargs, + ) diff --git a/deepmd/descriptor/se_a_ef.py b/deepmd/descriptor/se_a_ef.py index fb886483f6..32a62b48f3 100644 --- a/deepmd/descriptor/se_a_ef.py +++ b/deepmd/descriptor/se_a_ef.py @@ -24,13 +24,16 @@ from .descriptor import ( Descriptor, ) +from .se import ( + DescrptSe, +) from .se_a import ( DescrptSeA, ) @Descriptor.register("se_a_ef") -class DescrptSeAEf(Descriptor): +class DescrptSeAEf(DescrptSe): r"""Smooth edition descriptor with Ef. Parameters diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/descriptor/se_a_mask.py index b9181fd6b0..780b34d294 100644 --- a/deepmd/descriptor/se_a_mask.py +++ b/deepmd/descriptor/se_a_mask.py @@ -128,6 +128,7 @@ def __init__( activation_function: str = "tanh", precision: str = "default", uniform_seed: bool = False, + stripped_type_embedding: bool = False, **kwargs, ) -> None: """Constructor.""" @@ -159,6 +160,7 @@ def __init__( # numb of neighbors and numb of descrptors self.nnei_a = np.cumsum(self.sel_a)[-1] self.nnei = self.nnei_a + self.stripped_type_embedding = stripped_type_embedding self.ndescrpt_a = self.nnei_a * 4 self.ndescrpt = self.ndescrpt_a diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py index c962952ec0..8f3be40596 100644 --- a/deepmd/descriptor/se_atten.py +++ b/deepmd/descriptor/se_atten.py @@ -35,6 +35,11 @@ from deepmd.nvnmd.utils.config import ( nvnmd_cfg, ) +from deepmd.utils.compress import ( + get_extra_side_embedding_net_variable, + get_two_side_type_embedding, + make_data, +) from deepmd.utils.graph import ( get_attention_layer_variables_from_graph_def, get_pattern_nodes_from_graph_def, @@ -115,6 +120,11 @@ class DescrptSeAtten(DescrptSeA): When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True. Default value will be True in `se_atten_v2` descriptor. + + Raises + ------ + ValueError + if ntypes is 0. """ def __init__( @@ -173,6 +183,8 @@ def __init__( assert Version(TF_VERSION) > Version( "2" ), "se_atten only support tensorflow version 2.0 or higher." + if ntypes == 0: + raise ValueError("`model/type_map` is not set or empty!") self.stripped_type_embedding = stripped_type_embedding self.smooth = smooth_type_embdding self.ntypes = ntypes @@ -415,12 +427,14 @@ def enable_compression( min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2 ) - self.final_type_embedding = self._get_two_side_type_embedding(graph) - self.matrix = self._get_two_side_embedding_net_variable( - graph_def, "matrix", suffix + self.final_type_embedding = get_two_side_type_embedding(self, graph) + self.matrix = get_extra_side_embedding_net_variable( + self, graph_def, "two_side", "matrix", suffix ) - self.bias = self._get_two_side_embedding_net_variable(graph_def, "bias", suffix) - self.two_embd = self._make_data(self.final_type_embedding) + self.bias = get_extra_side_embedding_net_variable( + self, graph_def, "two_side", "bias", suffix + ) + self.two_embd = make_data(self, self.final_type_embedding) self.davg = get_tensor_by_name_from_graph( graph, "descrpt_attr%s/t_avg" % suffix @@ -429,79 +443,6 @@ def enable_compression( graph, "descrpt_attr%s/t_std" % suffix ) - def _get_two_side_type_embedding(self, graph): - type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd") - type_embedding = type_embedding.astype(self.filter_np_precision) - type_embedding_shape = type_embedding.shape - type_embedding_nei = np.tile( - np.reshape(type_embedding, [1, type_embedding_shape[0], -1]), - [type_embedding_shape[0], 1, 1], - ) # (ntypes) * ntypes * Y - type_embedding_center = np.tile( - np.reshape(type_embedding, [type_embedding_shape[0], 1, -1]), - [1, type_embedding_shape[0], 1], - ) # ntypes * (ntypes) * Y - two_side_type_embedding = np.concatenate( - [type_embedding_nei, type_embedding_center], -1 - ) # ntypes * ntypes * (Y+Y) - two_side_type_embedding = np.reshape( - two_side_type_embedding, [-1, two_side_type_embedding.shape[-1]] - ) - return two_side_type_embedding - - def _get_two_side_embedding_net_variable(self, graph_def, varialbe_name, suffix): - ret = {} - for i in range(1, self.layer_size + 1): - target = get_pattern_nodes_from_graph_def( - graph_def, - f"filter_type_all{suffix}/{varialbe_name}_{i}_two_side_ebd", - ) - node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}_two_side_ebd"] - ret["layer_" + str(i)] = node - return ret - - def _layer_0(self, x, w, b): - return self.filter_activation_fn(tf.matmul(x, w) + b) - - def _layer_1(self, x, w, b): - t = tf.concat([x, x], axis=1) - return t, self.filter_activation_fn(tf.matmul(x, w) + b) + t - - def _make_data(self, xx): - with tf.Session() as sess: - for layer in range(self.layer_size): - if layer == 0: - if self.filter_neuron[0] == 1: - yy = ( - self._layer_0( - xx, - self.matrix["layer_" + str(layer + 1)], - self.bias["layer_" + str(layer + 1)], - ) - + xx - ) - elif self.filter_neuron[0] == 2: - tt, yy = self._layer_1( - xx, - self.matrix["layer_" + str(layer + 1)], - self.bias["layer_" + str(layer + 1)], - ) - else: - yy = self._layer_0( - xx, - self.matrix["layer_" + str(layer + 1)], - self.bias["layer_" + str(layer + 1)], - ) - else: - tt, zz = self._layer_1( - yy, - self.matrix["layer_" + str(layer + 1)], - self.bias["layer_" + str(layer + 1)], - ) - yy = zz - vv = sess.run(zz) - return vv - def build( self, coord_: tf.Tensor, @@ -623,6 +564,8 @@ def build( self.filter_precision, ) self.negative_mask = -(2 << 32) * (1.0 - self.nmask) + # hard coding the magnitude of attention weight shift + self.smth_attn_w_shift = 20.0 # only used when tensorboard was set as true tf.summary.histogram("descrpt", self.descrpt) tf.summary.histogram("rij", self.rij) @@ -658,7 +601,9 @@ def build( ) self.recovered_r = ( tf.reshape( - tf.slice(tf.reshape(self.descrpt, [-1, 4]), [0, 0], [-1, 1]), + tf.slice( + tf.reshape(self.descrpt_reshape, [-1, 4]), [0, 0], [-1, 1] + ), [-1, natoms[0], self.sel_all_a[0]], ) * self.std_looked_up @@ -924,10 +869,26 @@ def _scaled_dot_attn( save_weights=True, ): attn = tf.matmul(Q / temperature, K, transpose_b=True) - attn *= self.nmask - attn += self.negative_mask + if self.smooth: + # (nb x nloc) x nsel + nsel = self.sel_all_a[0] + attn = (attn + self.smth_attn_w_shift) * tf.reshape( + self.recovered_switch, [-1, 1, nsel] + ) * tf.reshape( + self.recovered_switch, [-1, nsel, 1] + ) - self.smth_attn_w_shift + else: + attn *= self.nmask + attn += self.negative_mask attn = tf.nn.softmax(attn, axis=-1) - attn *= tf.reshape(self.nmask, [-1, attn.shape[-1], 1]) + if self.smooth: + attn = ( + attn + * tf.reshape(self.recovered_switch, [-1, 1, nsel]) + * tf.reshape(self.recovered_switch, [-1, nsel, 1]) + ) + else: + attn *= tf.reshape(self.nmask, [-1, attn.shape[-1], 1]) if save_weights: self.attn_weight[layer] = attn[0] # atom 0 if dotr: @@ -1470,3 +1431,21 @@ def build_type_exclude_mask( def explicit_ntypes(self) -> bool: """Explicit ntypes with type embedding.""" return True + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + from deepmd.entrypoints.train import ( + update_one_sel, + ) + + local_jdata_cpy = local_jdata.copy() + return update_one_sel(global_jdata, local_jdata_cpy, True) diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py index c46a90fec3..9c3a8b31e1 100644 --- a/deepmd/entrypoints/__init__.py +++ b/deepmd/entrypoints/__init__.py @@ -16,6 +16,9 @@ from .freeze import ( freeze, ) +from .gui import ( + start_dpgui, +) from .neighbor_stat import ( neighbor_stat, ) @@ -41,4 +44,5 @@ "make_model_devi", "convert", "neighbor_stat", + "start_dpgui", ] diff --git a/deepmd/entrypoints/gui.py b/deepmd/entrypoints/gui.py new file mode 100644 index 0000000000..8b6b9e0a09 --- /dev/null +++ b/deepmd/entrypoints/gui.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""DP-GUI entrypoint.""" + + +def start_dpgui(*, port: int, bind_all: bool, **kwargs): + """Host DP-GUI server. + + Parameters + ---------- + port : int + The port to serve DP-GUI on. + bind_all : bool + Serve on all public interfaces. This will expose your DP-GUI instance + to the network on both IPv4 and IPv6 (where available). + **kwargs + additional arguments + + Raises + ------ + ModuleNotFoundError + The dpgui package is not installed + """ + try: + from dpgui import ( + start_dpgui, + ) + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "To use DP-GUI, please install the dpgui package:\npip install dpgui" + ) from e + start_dpgui(port=port, bind_all=bind_all) diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 2b6f4859d1..782136b542 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -21,6 +21,7 @@ freeze, make_model_devi, neighbor_stat, + start_dpgui, test, train_dp, transfer, @@ -89,6 +90,8 @@ def main(args: Optional[Union[List[str], argparse.Namespace]] = None): neighbor_stat(**dict_args) elif args.command == "train-nvnmd": # nvnmd train_nvnmd(**dict_args) + elif args.command == "gui": + start_dpgui(**dict_args) elif args.command is None: pass else: diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index 716ff482a3..bd7a2ac7ec 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -27,6 +27,9 @@ from deepmd.infer.data_modifier import ( DipoleChargeModifier, ) +from deepmd.model.model import ( + Model, +) from deepmd.train.run_options import ( BUILD, CITATION, @@ -374,7 +377,10 @@ def get_type_map(jdata): def get_nbor_stat(jdata, rcut, one_type: bool = False): - max_rcut = get_rcut(jdata) + # it seems that DeepmdDataSystem does not need rcut + # it's not clear why there is an argument... + # max_rcut = get_rcut(jdata) + max_rcut = rcut type_map = get_type_map(jdata) if type_map and len(type_map) == 0: @@ -472,18 +478,12 @@ def wrap_up_4(xx): return 4 * ((int(xx) + 3) // 4) -def update_one_sel(jdata, descriptor): - if descriptor["type"] == "loc_frame": - return descriptor +def update_one_sel(jdata, descriptor, one_type: bool = False): rcut = descriptor["rcut"] tmp_sel = get_sel( jdata, rcut, - one_type=descriptor["type"] - in ( - "se_atten", - "se_atten_v2", - ), + one_type=one_type, ) sel = descriptor["sel"] if isinstance(sel, int): @@ -503,10 +503,7 @@ def update_one_sel(jdata, descriptor): "not less than %d, but you set it to %d. The accuracy" " of your model may get worse." % (ii, tt, dd) ) - if descriptor["type"] in ( - "se_atten", - "se_atten_v2", - ): + if one_type: descriptor["sel"] = sel = sum(sel) return descriptor @@ -515,18 +512,6 @@ def update_sel(jdata): log.info( "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)" ) - if jdata["model"].get("type") == "pairwise_dprc": - # do not update sel; only find min distance - rcut = get_rcut(jdata) - get_min_nbor_dist(jdata, rcut) - return jdata - elif jdata["model"].get("type") in ("linear_ener", "frozen"): - return jdata - descrpt_data = jdata["model"]["descriptor"] - if descrpt_data["type"] == "hybrid": - for ii in range(len(descrpt_data["list"])): - descrpt_data["list"][ii] = update_one_sel(jdata, descrpt_data["list"][ii]) - else: - descrpt_data = update_one_sel(jdata, descrpt_data) - jdata["model"]["descriptor"] = descrpt_data - return jdata + jdata_cpy = jdata.copy() + jdata_cpy["model"] = Model.update_sel(jdata, jdata["model"]) + return jdata_cpy diff --git a/deepmd/lmp.py b/deepmd/lmp.py index a955844758..fe08cc0a3b 100644 --- a/deepmd/lmp.py +++ b/deepmd/lmp.py @@ -35,13 +35,15 @@ def get_env(paths: List[Optional[str]]) -> str: return ":".join(p for p in paths if p is not None) -def get_library_path(module: str) -> List[str]: +def get_library_path(module: str, filename: str) -> List[str]: """Get library path from a module. Parameters ---------- module : str The module name. + filename : str + The library filename pattern. Returns ------- @@ -53,7 +55,8 @@ def get_library_path(module: str) -> List[str]: except ModuleNotFoundError: return [] else: - return [str(Path(m.__file__).parent)] + libs = sorted(Path(m.__path__[0]).glob(filename)) + return [str(lib) for lib in libs] if platform.system() == "Linux": @@ -63,6 +66,13 @@ def get_library_path(module: str) -> List[str]: else: raise RuntimeError("Unsupported platform") +if platform.system() == "Linux": + preload_env = "LD_PRELOAD" +elif platform.system() == "Darwin": + preload_env = "DYLD_INSERT_LIBRARIES" +else: + raise RuntimeError("Unsupported platform") + tf_dir = tf.sysconfig.get_lib() op_dir = str((Path(__file__).parent / "lib").absolute()) @@ -71,17 +81,24 @@ def get_library_path(module: str) -> List[str]: if platform.system() == "Linux": cuda_library_paths.extend( [ - *get_library_path("nvidia.cuda_runtime.lib"), - *get_library_path("nvidia.cublas.lib"), - *get_library_path("nvidia.cublas.lib"), - *get_library_path("nvidia.cufft.lib"), - *get_library_path("nvidia.curand.lib"), - *get_library_path("nvidia.cusolver.lib"), - *get_library_path("nvidia.cusparse.lib"), - *get_library_path("nvidia.cudnn.lib"), + *get_library_path("nvidia.cuda_runtime.lib", "libcudart.so*"), + *get_library_path("nvidia.cublas.lib", "libcublasLt.so*"), + *get_library_path("nvidia.cublas.lib", "libcublas.so*"), + *get_library_path("nvidia.cufft.lib", "libcufft.so*"), + *get_library_path("nvidia.curand.lib", "libcurand.so*"), + *get_library_path("nvidia.cusolver.lib", "libcusolver.so*"), + *get_library_path("nvidia.cusparse.lib", "libcusparse.so*"), + *get_library_path("nvidia.cudnn.lib", "libcudnn.so*"), ] ) +os.environ[preload_env] = get_env( + [ + os.environ.get(preload_env), + *cuda_library_paths, + ] +) + # set LD_LIBRARY_PATH os.environ[lib_env] = get_env( [ @@ -89,19 +106,12 @@ def get_library_path(module: str) -> List[str]: tf_dir, os.path.join(tf_dir, "python"), op_dir, - *cuda_library_paths, ] ) # preload python library, only for TF<2.12 if find_libpython is not None: libpython = find_libpython() - if platform.system() == "Linux": - preload_env = "LD_PRELOAD" - elif platform.system() == "Darwin": - preload_env = "DYLD_INSERT_LIBRARIES" - else: - raise RuntimeError("Unsupported platform") os.environ[preload_env] = get_env( [ os.environ.get(preload_env), diff --git a/deepmd/model/frozen.py b/deepmd/model/frozen.py index 972acb9185..38f342ebec 100644 --- a/deepmd/model/frozen.py +++ b/deepmd/model/frozen.py @@ -193,3 +193,17 @@ def enable_compression(self, suffix: str = "") -> None: def get_type_map(self) -> list: """Get the type map.""" return self.model.get_type_map() + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + # we don't know how to compress it, so no neighbor statistics here + return local_jdata diff --git a/deepmd/model/linear.py b/deepmd/model/linear.py index 799642ce33..7c527fe9dc 100644 --- a/deepmd/model/linear.py +++ b/deepmd/model/linear.py @@ -128,6 +128,24 @@ def get_type_map(self) -> list: """Get the type map.""" return self.models[0].get_type_map() + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["models"] = [ + Model.update_sel(global_jdata, sub_jdata) + for sub_jdata in local_jdata["models"] + ] + return local_jdata_cpy + class LinearEnergyModel(LinearModel): """Linear energy model make linear combinations of several existing energy models.""" diff --git a/deepmd/model/model.py b/deepmd/model/model.py index 9ae5eacf4f..3f24e42aec 100644 --- a/deepmd/model/model.py +++ b/deepmd/model/model.py @@ -78,36 +78,47 @@ class Model(ABC): Compression information for internal use """ + @classmethod + def get_class_by_input(cls, input: dict): + """Get the class by input data. + + Parameters + ---------- + input : dict + The input data + """ + # infer model type by fitting_type + from deepmd.model.frozen import ( + FrozenModel, + ) + from deepmd.model.linear import ( + LinearEnergyModel, + ) + from deepmd.model.multi import ( + MultiModel, + ) + from deepmd.model.pairwise_dprc import ( + PairwiseDPRc, + ) + + model_type = input.get("type", "standard") + if model_type == "standard": + return StandardModel + elif model_type == "multi": + return MultiModel + elif model_type == "pairwise_dprc": + return PairwiseDPRc + elif model_type == "frozen": + return FrozenModel + elif model_type == "linear_ener": + return LinearEnergyModel + else: + raise ValueError(f"unknown model type: {model_type}") + def __new__(cls, *args, **kwargs): if cls is Model: # init model - # infer model type by fitting_type - from deepmd.model.frozen import ( - FrozenModel, - ) - from deepmd.model.linear import ( - LinearEnergyModel, - ) - from deepmd.model.multi import ( - MultiModel, - ) - from deepmd.model.pairwise_dprc import ( - PairwiseDPRc, - ) - - model_type = kwargs.get("type", "standard") - if model_type == "standard": - cls = StandardModel - elif model_type == "multi": - cls = MultiModel - elif model_type == "pairwise_dprc": - cls = PairwiseDPRc - elif model_type == "frozen": - cls = FrozenModel - elif model_type == "linear_ener": - cls = LinearEnergyModel - else: - raise ValueError(f"unknown model type: {model_type}") + cls = cls.get_class_by_input(kwargs) return cls.__new__(cls, *args, **kwargs) return super().__new__(cls) @@ -471,6 +482,30 @@ def get_feed_dict( feed_dict["t_aparam:0"] = kwargs["aparam"] return feed_dict + @classmethod + @abstractmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: + """Update the selection and perform neighbor statistics. + + Notes + ----- + Do not modify the input data without copying it. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + + Returns + ------- + dict + The updated local data + """ + cls = cls.get_class_by_input(local_jdata) + return cls.update_sel(global_jdata, local_jdata) + class StandardModel(Model): """Standard model, which must contain a descriptor and a fitting. @@ -531,7 +566,7 @@ def __init__( self.descrpt = descriptor else: self.descrpt = Descriptor( - **descriptor, ntypes=len(type_map), spin=self.spin + **descriptor, ntypes=len(self.get_type_map()), spin=self.spin ) if isinstance(fitting_net, Fitting): @@ -613,3 +648,20 @@ def get_rcut(self) -> float: def get_ntypes(self) -> int: """Get the number of types.""" return self.ntypes + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["descriptor"] = Descriptor.update_sel( + global_jdata, local_jdata["descriptor"] + ) + return local_jdata_cpy diff --git a/deepmd/model/multi.py b/deepmd/model/multi.py index b0aa11a109..bfc67b9792 100644 --- a/deepmd/model/multi.py +++ b/deepmd/model/multi.py @@ -122,7 +122,7 @@ def __init__( else: self.descrpt = Descriptor( **descriptor, - ntypes=len(type_map), + ntypes=len(self.get_type_map()), multi_task=True, spin=self.spin, ) @@ -645,3 +645,20 @@ def get_loss(self, loss: dict, lr: dict) -> Dict[str, Loss]: loss_param, lr[fitting_key] ) return loss_dict + + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + local_jdata_cpy = local_jdata.copy() + local_jdata_cpy["descriptor"] = Descriptor.update_sel( + global_jdata, local_jdata["descriptor"] + ) + return local_jdata_cpy diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py index 8f46ec239d..6983a31cfd 100644 --- a/deepmd/model/pairwise_dprc.py +++ b/deepmd/model/pairwise_dprc.py @@ -32,10 +32,6 @@ TypeEmbedNet, ) -from .ener import ( - EnerModel, -) - class PairwiseDPRc(Model): """Pairwise Deep Potential - Range Correction.""" @@ -87,13 +83,13 @@ def __init__( padding=True, ) - self.qm_model = EnerModel( + self.qm_model = Model( **qm_model, type_map=type_map, type_embedding=self.typeebd, compress=compress, ) - self.qmmm_model = EnerModel( + self.qmmm_model = Model( **qmmm_model, type_map=type_map, type_embedding=self.typeebd, @@ -187,6 +183,14 @@ def build( mesh_mixed_type = make_default_mesh(False, True) + # allow loading a frozen QM model that has only QM types + # Note: here we don't map the type between models, so + # the type of the frozen model must be the same as + # the first Ntypes of the current model + if self.get_ntypes() > self.qm_model.get_ntypes(): + natoms_qm = tf.slice(natoms_qm, [0], [self.qm_model.get_ntypes() + 2]) + assert self.get_ntypes() == self.qmmm_model.get_ntypes() + qm_dict = self.qm_model.build( coord_qm, atype_qm, @@ -301,7 +305,7 @@ def get_rcut(self): return max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut()) def get_ntypes(self) -> int: - return self.qm_model.get_ntypes() + return self.ntypes def data_stat(self, data): self.qm_model.data_stat(data) @@ -395,6 +399,26 @@ def get_feed_dict( } return feed_dict + @classmethod + def update_sel(cls, global_jdata: dict, local_jdata: dict): + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + global_jdata : dict + The global data, containing the training section + local_jdata : dict + The local data refer to the current class + """ + from deepmd.entrypoints.train import ( + get_min_nbor_dist, + ) + + # do not update sel; only find min distance + # rcut is not important here + get_min_nbor_dist(global_jdata, 6.0) + return local_jdata + def gather_placeholder( params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs diff --git a/deepmd/op/_tabulate_grad.py b/deepmd/op/_tabulate_grad.py index e91aa5fd2f..8ad8908d7e 100644 --- a/deepmd/op/_tabulate_grad.py +++ b/deepmd/op/_tabulate_grad.py @@ -60,13 +60,15 @@ def _tabulate_fusion_se_atten_grad_cc(op, dy): @ops.RegisterGradient("TabulateFusionSeAttenGrad") def _tabulate_fusion_se_atten_grad_grad_cc(op, dy, dy_, dy_dtwo): - dz_dy = op_module.tabulate_fusion_se_a_grad_grad( + dz_dy = op_module.tabulate_fusion_se_atten_grad_grad( op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], + op.inputs[4], dy, dy_, + dy_dtwo, op.inputs[6], is_sorted=op.get_attr("is_sorted"), ) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 7bd373b492..ae446ef348 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -432,6 +432,11 @@ def descrpt_se_atten_v2_args(): ] +@descrpt_args_plugin.register("se_a_ebd_v2", alias=["se_a_tpe_v2"]) +def descrpt_se_a_ebd_v2_args(): + return descrpt_se_a_args() + + @descrpt_args_plugin.register("se_a_mask") def descrpt_se_a_mask_args(): doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\ diff --git a/deepmd/utils/compress.py b/deepmd/utils/compress.py new file mode 100644 index 0000000000..c6e68dfe19 --- /dev/null +++ b/deepmd/utils/compress.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging + +import numpy as np + +from deepmd.env import ( + tf, +) +from deepmd.utils.graph import ( + get_pattern_nodes_from_graph_def, + get_tensor_by_name_from_graph, +) + +log = logging.getLogger(__name__) + + +def get_type_embedding(self, graph): + type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd") + type_embedding = type_embedding.astype(self.filter_np_precision) + return type_embedding + + +def get_two_side_type_embedding(self, graph): + type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd") + type_embedding = type_embedding.astype(self.filter_np_precision) + type_embedding_shape = type_embedding.shape + + type_embedding_nei = np.tile( + np.reshape(type_embedding, [1, type_embedding_shape[0], -1]), + [type_embedding_shape[0], 1, 1], + ) # (ntypes) * ntypes * Y + type_embedding_center = np.tile( + np.reshape(type_embedding, [type_embedding_shape[0], 1, -1]), + [1, type_embedding_shape[0], 1], + ) # ntypes * (ntypes) * Y + two_side_type_embedding = np.concatenate( + [type_embedding_nei, type_embedding_center], -1 + ) # ntypes * ntypes * (Y+Y) + two_side_type_embedding = np.reshape( + two_side_type_embedding, [-1, two_side_type_embedding.shape[-1]] + ) + return two_side_type_embedding + + +def get_extra_side_embedding_net_variable( + self, graph_def, type_side, varialbe_name, suffix +): + ret = {} + for i in range(1, self.layer_size + 1): + target = get_pattern_nodes_from_graph_def( + graph_def, + f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd", + ) + node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd"] + ret["layer_" + str(i)] = node + return ret + + +def _layer_0(self, x, w, b): + return self.filter_activation_fn(tf.matmul(x, w) + b) + + +def _layer_1(self, x, w, b): + t = tf.concat([x, x], axis=1) + return t, self.filter_activation_fn(tf.matmul(x, w) + b) + t + + +def make_data(self, xx): + with tf.Session() as sess: + for layer in range(self.layer_size): + if layer == 0: + if self.filter_neuron[0] == 1: + yy = ( + _layer_0( + self, + xx, + self.matrix["layer_" + str(layer + 1)], + self.bias["layer_" + str(layer + 1)], + ) + + xx + ) + elif self.filter_neuron[0] == 2: + tt, yy = _layer_1( + self, + xx, + self.matrix["layer_" + str(layer + 1)], + self.bias["layer_" + str(layer + 1)], + ) + else: + yy = _layer_0( + self, + xx, + self.matrix["layer_" + str(layer + 1)], + self.bias["layer_" + str(layer + 1)], + ) + else: + tt, zz = _layer_1( + self, + yy, + self.matrix["layer_" + str(layer + 1)], + self.bias["layer_" + str(layer + 1)], + ) + yy = zz + vv = sess.run(zz) + return vv diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index a4ac190946..fa9325937e 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -82,6 +82,8 @@ def builder(): rcut=self.rcut, ) place_holders["dir"] = tf.placeholder(tf.string) + _min_nbor_dist = tf.reduce_min(_min_nbor_dist) + _max_nbor_size = tf.reduce_max(_max_nbor_size, axis=0) return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"]) with sub_graph.as_default(): @@ -128,10 +130,7 @@ def feed(): } for mn, dt, jj in self.p.generate(self.sub_sess, feed()): - if dt.size != 0: - dt = np.min(dt) - else: - dt = self.rcut + if np.isinf(dt): log.warning( "Atoms with no neighbors found in %s. Please make sure it's what you expected." % jj @@ -145,9 +144,10 @@ def feed(): " training data to remove duplicated atoms." % jj ) self.min_nbor_dist = dt - var = np.max(mn, axis=0) - self.max_nbor_size = np.maximum(var, self.max_nbor_size) + self.max_nbor_size = np.maximum(mn, self.max_nbor_size) + # do sqrt in the final + self.min_nbor_dist = math.sqrt(self.min_nbor_dist) log.info("training data with min nbor dist: " + str(self.min_nbor_dist)) log.info("training data with max nbor size: " + str(self.max_nbor_size)) return self.min_nbor_dist, self.max_nbor_size diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py index 883730b9d9..427887089a 100644 --- a/deepmd/utils/tabulate.py +++ b/deepmd/utils/tabulate.py @@ -176,7 +176,9 @@ def build( """ # tabulate range [lower, upper] with stride0 'stride0' lower, upper = self._get_env_mat_range(min_nbor_dist) - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten): + if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + ): uu = np.max(upper) ll = np.min(lower) xx = np.arange(ll, uu, stride0, dtype=self.data_type) @@ -419,7 +421,9 @@ def _get_bias(self): bias = {} for layer in range(1, self.layer_size + 1): bias["layer_" + str(layer)] = [] - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten): + if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + ): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/bias_{layer}" ] @@ -483,7 +487,9 @@ def _get_matrix(self): matrix = {} for layer in range(1, self.layer_size + 1): matrix["layer_" + str(layer)] = [] - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten): + if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + ): node = self.embedding_net_nodes[ f"filter_type_all{self.suffix}/matrix_{layer}" ] @@ -687,7 +693,9 @@ def _spline5_switch(self, xx, rmin, rmax): def _get_layer_size(self): layer_size = 0 - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten): + if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + ): layer_size = len(self.embedding_net_nodes) // 2 elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): layer_size = len(self.embedding_net_nodes) // ( @@ -737,7 +745,9 @@ def _all_excluded(self, ii: int) -> bool: def _get_table_size(self): table_size = 0 - if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten): + if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten) or isinstance( + self.descrpt, deepmd.descriptor.DescrptSeAEbdV2 + ): table_size = 1 elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA): table_size = self.ntypes * self.ntypes diff --git a/deepmd_cli/main.py b/deepmd_cli/main.py index 8aa6785681..5a0670d8dc 100644 --- a/deepmd_cli/main.py +++ b/deepmd_cli/main.py @@ -571,6 +571,29 @@ def main_parser() -> argparse.ArgumentParser: action="store_true", help="Skip calculating neighbor statistics. Sel checking, automatic sel, and model compression will be disabled.", ) + + # gui + parser_gui = subparsers.add_parser( + "gui", + parents=[parser_log], + help="Serve DP-GUI.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser_gui.add_argument( + "-p", + "--port", + type=int, + default=6042, + help="The port to serve DP-GUI on.", + ) + parser_gui.add_argument( + "--bind_all", + action="store_true", + help=( + "Serve on all public interfaces. This will expose your DP-GUI instance " + "to the network on both IPv4 and IPv6 (where available)." + ), + ) return parser diff --git a/doc/cli.rst b/doc/cli.rst index 4c52a9ede8..668a2df2e3 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -1,3 +1,5 @@ +.. _cli: + Command line interface ====================== diff --git a/doc/development/cicd.md b/doc/development/cicd.md index b323a62385..ad5cd49d84 100644 --- a/doc/development/cicd.md +++ b/doc/development/cicd.md @@ -10,6 +10,6 @@ `Test CUDA` action runs tests on a self-hosted runner with the NVIDIA card. It is not triggered by every PR. The developer who has the permission to manage the label can apply the label `Test CUDA` to a PR to trigger this action. - +## CD - +GitHub Actions is used to build pre-compiled packages for each commit. See the [Easy install the latest development version](../install/easy-install-dev.md) section to learn how to install the latest development version. diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md new file mode 100644 index 0000000000..855c2f1839 --- /dev/null +++ b/doc/install/easy-install-dev.md @@ -0,0 +1,31 @@ +# Easy install the latest development version + +DeePMD-kit is actively developed in the `devel` branch. The documentation of the [`latest`](https://docs.deepmodeling.com/projects/deepmd/en/latest/) version matches the `devel` branch. + +The following is the way to install the pre-compiled packages without [building from source](./install-from-source.md). All of them are built with [GitHub Actions](../development/cicd.md). + +## Install with docker + +The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA support: + +```bash +docker pull ghcr.io/deepmodeling/deepmd-kit:devel +``` + +## Install with pip + +Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`: + +```sh +bash -c 'wget -O /tmp/z.$$ https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip && unzip /tmp/z.$$ -d /tmp/dist.$$ && pip install -U --pre deepmd-kit[gpu,cu11,lmp] --find-links /tmp/dist.$$ && rm -r /tmp/z.$$ /tmp/dist.$$' +``` + +`cu11` and `lmp` are optional, which is the same as the stable version. + +## Download pre-compiled C Library + +The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c.zip), or via a shell command: + +```sh +wget https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c.zip && unzip libdeepmd_c.zip +``` diff --git a/doc/install/index.md b/doc/install/index.md index 2746add48c..8428255f5a 100644 --- a/doc/install/index.md +++ b/doc/install/index.md @@ -8,3 +8,4 @@ - [Install GROMACS](install-gromacs.md) - [Building conda packages](build-conda.md) - [Install Node.js interface](install-nodejs.md) +- [Easy install the latest development version](easy-install-dev.md) diff --git a/doc/install/index.rst b/doc/install/index.rst index 38c0f2544e..5723e6571f 100644 --- a/doc/install/index.rst +++ b/doc/install/index.rst @@ -12,3 +12,4 @@ Installation install-gromacs build-conda install-nodejs + easy-install-dev diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md index d9d6a28bc6..e643660cd1 100644 --- a/doc/install/install-lammps.md +++ b/doc/install/install-lammps.md @@ -17,7 +17,13 @@ cd /some/workspace wget https://github.com/lammps/lammps/archive/stable_2Aug2023_update1.tar.gz tar xf stable_2Aug2023_update1.tar.gz ``` -The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update1`. Now go into the LAMMPS code and copy the DeePMD-kit module like this +The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_update1`. + +Then, you can [build LAMMPS](https://docs.lammps.org/Build.html) with either make or CMake. + +### With make + +Now go into the LAMMPS code and copy the DeePMD-kit module like this ```bash cd lammps-stable_2Aug2023_update1/src/ cp -r $deepmd_source_dir/source/build/USER-DEEPMD . @@ -40,6 +46,35 @@ The DeePMD-kit module can be removed from the LAMMPS source code by make no-user-deepmd ``` +### With CMake + +Now go into the LAMMPS directory and create a directory called `build`: + +```bash +mkdir -p lammps-stable_2Aug2023_update1/build/ +cd lammps-stable_2Aug2023_update1/build/ +``` + +Patch the LAMMPS `CMakeLists.txt` file: + +```bash +echo "include(${deepmd_source_dir}/source/lmp/builtin.cmake)" >> ../cmake/CMakeLists.txt +``` + +It's expected to see one extra line in the end of `CMakeLists.txt`. + +Now build LAMMPS. You can install any other package you want. +```bash +cmake -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -DCMAKE_PREFIX_PATH=${deepmd_root} ../cmake +make -j4 +make install +``` + +If everything works fine, you will end up with an executable `${deepmd_root}/bin/lmp`. +```bash +${deepmd_root}/bin/lmp -h +``` + ## Install LAMMPS (plugin mode) Starting from `8Apr2021`, LAMMPS also provides a plugin mode, allowing one to build LAMMPS and a plugin separately. @@ -56,9 +91,9 @@ The source code of LAMMPS is stored in the directory `lammps-stable_2Aug2023_upd mkdir -p lammps-stable_2Aug2023_update1/build/ cd lammps-stable_2Aug2023_update1/build/ ``` -Now build LAMMPS. Note that `PLUGIN` and `KSPACE` packages must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want. +Now build LAMMPS. Note that `PLUGIN` must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want. ```bash -cmake -D PKG_PLUGIN=ON -D PKG_KSPACE=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${deepmd_root}/lib ../cmake +cmake -D PKG_PLUGIN=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=${deepmd_root} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${deepmd_root}/lib ../cmake make -j4 make install ``` diff --git a/doc/model/dprc.md b/doc/model/dprc.md index 9984ed9b1d..719421108a 100644 --- a/doc/model/dprc.md +++ b/doc/model/dprc.md @@ -29,18 +29,17 @@ As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\te "type": "hybrid", "list" : [ { - "type": "se_e2_a", + "type": "se_a_ebd_v2", "sel": [6, 11, 0, 6, 0, 1], "rcut_smth": 1.00, "rcut": 9.00, "neuron": [12, 25, 50], "exclude_types": [[2, 2], [2, 4], [4, 4], [0, 2], [0, 4], [1, 2], [1, 4], [3, 2], [3, 4], [5, 2], [5, 4]], "axis_neuron": 12, - "set_davg_zero": true, "_comment": " QM/QM interaction" }, { - "type": "se_e2_a", + "type": "se_a_ebd_v2", "sel": [6, 11, 100, 6, 50, 1], "rcut_smth": 0.50, "rcut": 6.00, @@ -54,7 +53,7 @@ As described in the paper, the DPRc model only corrects $E_\text{QM}$ and $E_\te } ``` -{ref}`exclude_types ` can be generated by the following Python script: +{ref}`exclude_types ` can be generated by the following Python script: ```py from itertools import combinations_with_replacement, product @@ -86,7 +85,7 @@ Also, DPRc assumes MM atom energies ({ref}`atom_ener ` only works when {ref}`descriptor/set_davg_zero ` is `true`. +Note that {ref}`atom_ener ` only works when {ref}`descriptor/set_davg_zero ` of the QM/MM part is `true`. ## Run MD simulations diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md index 0596ba5dc8..d89afd09e5 100644 --- a/doc/nvnmd/nvnmd.md +++ b/doc/nvnmd/nvnmd.md @@ -239,10 +239,10 @@ Then you need prepare the configuration file `job.json`, the configuration file "job_name": "test", "command": "/usr/bin/lmp_mpi < in.lmp;", "log_file": "OUTCAR", - "machine_type": "c8_m32_cpu", + "machine_type": "c4_m16_cpu", "job_type": "container", "image_name": "lammps_dp:29Sep2021", - "platform": "hnu", + "platform": "hnugba", "region": "default", "project_id": 0000 } @@ -255,24 +255,24 @@ where items are defined as: | job_name | the name of computing job, which can be named freely | a string | | command | the command to be executed on the computing node | a string | | log_file | the log file that can be viewed at any time during the calculation process, which can be viewed on the Bohrium "Jobs" page | a string | -| machine_type | the machine type used for the job | "c8_m32_cpu" | +| machine_type | the machine type used for the job | "c1_m4_cpu", "c4_m16_cpu", "c8_m32_cpu" | | job_type | the job type | "container" | | image_name | the image name used for the job | "lammps_dp:29Sep2021"| -| platform | resource provider | "hnu" | +| platform | resource provider | "hnugba" | | project_id | the project ID to which the job belongs, which can be viewed on the "Projects" page | a integer | -Notice:The task will use 8 CPU cores for computation, so do not repeatedly use the `mpirun` command, otherwise an error will be reported. All 0000 after "project_id" need to be replaced with your own project ID, which can be viewed on the "Projects" page. Also, the JSON file format requires that no commas be added after the last field within the {}, otherwise, there will be a syntax error. +Notice:The task will use 4 CPU cores for computation, so do not repeatedly use the `mpirun` command, otherwise an error will be reported. All 0000 after "project_id" need to be replaced with your own project ID, which can be viewed on the "Projects" page. Also, the JSON file format requires that no commas be added after the last field within the {}, otherwise, there will be a syntax error. Please check the [documentation](https://github.com/LiuGroupHNU/md-data/blob/master/code/doc/mdpu/hardware.md) for the latest hardware configuration information. In addition, it is necessary to prepare input script of the MD simulation, the ML model named `model.pb` obtained by QNN training and data files containing information required for running an MD simulation (e.g., `coord.lmp` containing initial atom coordinates). In the input script, one needs to specify the pair style as follows ```lammps -pair_style nvnmd model.pb 6 2 +pair_style nvnmd model.pb pair_coeff * * ``` -where `model.pb` is the path to model, `6` is the cutoff radius, `2` is the number of FPGA cards used with the maximum of 2. +where `model.pb` is the path to model. After preparing the configuration file and the required files for calculation, using Lebesgue Utility to submit the job diff --git a/doc/train/train-input.rst b/doc/train/train-input.rst index 893dd0980e..2a32aeb930 100644 --- a/doc/train/train-input.rst +++ b/doc/train/train-input.rst @@ -1,7 +1,7 @@ Training Parameters ====================================== .. note:: - One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for furthur training. + One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_ online or hosted using the :ref:`command line interface ` :code:`dp gui`. All training parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for furthur training. .. dargs:: :module: deepmd.utils.argcheck diff --git a/doc/troubleshooting/howtoset_num_nodes.md b/doc/troubleshooting/howtoset_num_nodes.md index 1415f50c50..8a9beab857 100644 --- a/doc/troubleshooting/howtoset_num_nodes.md +++ b/doc/troubleshooting/howtoset_num_nodes.md @@ -16,7 +16,7 @@ Set the number of processes with: ```bash mpirun -np $num_nodes dp ``` -Note that `mpirun` here should be the same as the MPI used to build software. For example, one can use `mpirun -h` and `lmp -h` to see if `mpirun` and LAMMPS has the same MPI version. +Note that `mpirun` here should be the same as the MPI used to build software. For example, one can use `mpirun --version` and `lmp -h` to see if `mpirun` and LAMMPS has the same MPI version. Sometimes, `$num_nodes` and the nodes information can be directly given by the HPC scheduler system, if the MPI used here is the same as the MPI used to build the scheduler system. Otherwise, one have to manually assign these information. diff --git a/examples/dprc/generalized_force/input.json b/examples/dprc/generalized_force/input.json index 80a9ebeb1b..1bb0f687eb 100644 --- a/examples/dprc/generalized_force/input.json +++ b/examples/dprc/generalized_force/input.json @@ -19,7 +19,7 @@ "type": "hybrid", "list": [ { - "type": "se_e2_a", + "type": "se_a_ebd_v2", "sel": [ 6, 1, diff --git a/examples/dprc/normal/input.json b/examples/dprc/normal/input.json index eba044fd43..f85607f855 100644 --- a/examples/dprc/normal/input.json +++ b/examples/dprc/normal/input.json @@ -19,7 +19,7 @@ "type": "hybrid", "list": [ { - "type": "se_e2_a", + "type": "se_a_ebd_v2", "sel": [ 6, 1, diff --git a/examples/water/se_e2_a_tebd/input.json b/examples/water/se_e2_a_tebd/input.json index 101c1a7a4f..1ec85a968c 100644 --- a/examples/water/se_e2_a_tebd/input.json +++ b/examples/water/se_e2_a_tebd/input.json @@ -16,7 +16,7 @@ "seed": 1 }, "descriptor": { - "type": "se_e2_a", + "type": "se_a_ebd_v2", "sel": [ 46, 92 diff --git a/pyproject.toml b/pyproject.toml index 6c2d5d0601..8c5267567b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ backend-path = ["."] [project] name = "deepmd-kit" -dynamic = ["version", "optional-dependencies", "scripts"] +dynamic = ["version", "optional-dependencies", "scripts", "readme"] description = "A deep learning package for many-body potential energy representation and molecular dynamics" authors = [ {name = "DeepModeling"}, @@ -47,12 +47,14 @@ dependencies = [ 'packaging', ] requires-python = ">=3.7" -readme = "README.md" keywords = ["deepmd"] [project.entry-points."lammps.plugins"] deepmd = "deepmd.lmp:get_op_dir" +[project.entry-points."dpgui"] +"DeePMD-kit" = "deepmd.utils.argcheck:gen_args" + [project.urls] Homepage = "https://github.com/deepmodeling/deepmd-kit" documentation = "https://docs.deepmodeling.com/projects/deepmd" @@ -96,12 +98,31 @@ provider-path = "backend" provider = "backend.dynamic_metadata" provider-path = "backend" +[tool.scikit-build.metadata.readme] +provider = "scikit_build_core.metadata.fancy_pypi_readme" + [[tool.scikit-build.generate]] path = "deepmd_cli/_version.py" template = ''' version = "${version}" ''' +[tool.hatch.metadata.hooks.fancy-pypi-readme] +content-type = "text/markdown" + +[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]] +path = "README.md" + +[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]] +# links +pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)' +replacement = '[\1](https://github.com/deepmodeling/deepmd-kit/tree/master/\g<2>)' + +[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]] +# image +pattern = '(srcset|src)="((?!https?://)\S+?)"' +replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"' + [tool.cibuildwheel] test-command = [ "python -m deepmd -h", @@ -129,9 +150,9 @@ repair-wheel-command = """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then rm -rf [tool.cibuildwheel.linux] repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}" environment-pass = ["CIBW_BUILD", "DP_VARIANT"] -environment = { PIP_PREFER_BINARY="1", DP_VARIANT="cuda", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } +environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ - """{ if [ "$(uname -m)" = "x86_64" ] ; then curl https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run -O && bash cuda_11.8.0_520.61.05_linux.run --silent --toolkit; fi }""", + """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-11-8 cuda-cudart-devel-11-8; fi }""", "yum install -y mpich-devel", ] @@ -205,6 +226,7 @@ select = [ "RUF", # ruff "NPY", # numpy ] + ignore = [ "E501", # line too long "F841", # local variable is assigned to but never used @@ -224,3 +246,6 @@ ignore-init-module-imports = true [tool.ruff.pydocstyle] convention = "numpy" + +[tool.pytest.ini_options] +markers = "run" diff --git a/source/3rdparty/implib/arch/aarch64/config.ini b/source/3rdparty/implib/arch/aarch64/config.ini new file mode 100644 index 0000000000..0c2ea99ee5 --- /dev/null +++ b/source/3rdparty/implib/arch/aarch64/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 8 +SymbolReloc = R_AARCH64_ABS64 diff --git a/source/3rdparty/implib/arch/aarch64/table.S.tpl b/source/3rdparty/implib/arch/aarch64/table.S.tpl new file mode 100644 index 0000000000..936855427c --- /dev/null +++ b/source/3rdparty/implib/arch/aarch64/table.S.tpl @@ -0,0 +1,79 @@ +/* + * Copyright 2018-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + +#define lr x30 +#define ip0 x16 + + .section .note.GNU-stack,"",@progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 8 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + + // Slow path which calls dlsym, taken only on first call. + // Registers are saved according to "Procedure Call Standard for the Arm® 64-bit Architecture". + // For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + // Stack is aligned at 16 bytes + +#define PUSH_PAIR(reg1, reg2) stp reg1, reg2, [sp, #-16]!; .cfi_adjust_cfa_offset 16; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 8 +#define POP_PAIR(reg1, reg2) ldp reg1, reg2, [sp], #16; .cfi_adjust_cfa_offset -16; .cfi_restore reg2; .cfi_restore reg1 + +#define PUSH_WIDE_PAIR(reg1, reg2) stp reg1, reg2, [sp, #-32]!; .cfi_adjust_cfa_offset 32; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 16 +#define POP_WIDE_PAIR(reg1, reg2) ldp reg1, reg2, [sp], #32; .cfi_adjust_cfa_offset -32; .cfi_restore reg2; .cfi_restore reg1 + + // Save only arguments (and lr) + PUSH_PAIR(x0, x1) + PUSH_PAIR(x2, x3) + PUSH_PAIR(x4, x5) + PUSH_PAIR(x6, x7) + PUSH_PAIR(x8, lr) + + ldr x0, [sp, #80] // 16*5 + + PUSH_WIDE_PAIR(q0, q1) + PUSH_WIDE_PAIR(q2, q3) + PUSH_WIDE_PAIR(q4, q5) + PUSH_WIDE_PAIR(q6, q7) + + // Stack is aligned at 16 bytes + + bl _${lib_suffix}_tramp_resolve + + // TODO: pop pc? + + POP_WIDE_PAIR(q6, q7) + POP_WIDE_PAIR(q4, q5) + POP_WIDE_PAIR(q2, q3) + POP_WIDE_PAIR(q0, q1) + + POP_PAIR(x8, lr) + POP_PAIR(x6, x7) + POP_PAIR(x4, x5) + POP_PAIR(x2, x3) + POP_PAIR(x0, x1) + + br lr + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/aarch64/trampoline.S.tpl b/source/3rdparty/implib/arch/aarch64/trampoline.S.tpl new file mode 100644 index 0000000000..18b9e4f030 --- /dev/null +++ b/source/3rdparty/implib/arch/aarch64/trampoline.S.tpl @@ -0,0 +1,40 @@ +/* + * Copyright 2018-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + +1: + // Load address + // TODO: can we do this faster on newer ARMs? + adrp ip0, _${lib_suffix}_tramp_table+$offset + ldr ip0, [ip0, #:lo12:_${lib_suffix}_tramp_table+$offset] + + cbz ip0, 2f + + // Fast path + br ip0 + +2: + // Slow path + mov ip0, $number & 0xffff +#if $number > 0xffff + movk ip0, $number >> 16, lsl #16 +#endif + stp ip0, lr, [sp, #-16]!; .cfi_adjust_cfa_offset 16; .cfi_rel_offset ip0, 0; .cfi_rel_offset lr, 8; + bl _${lib_suffix}_save_regs_and_resolve + ldp ip0, lr, [sp], #16; .cfi_adjust_cfa_offset -16; .cfi_restore lr; .cfi_restore ip0 + b 1b + .cfi_endproc diff --git a/source/3rdparty/implib/arch/arm/config.ini b/source/3rdparty/implib/arch/arm/config.ini new file mode 100644 index 0000000000..4f0870a2b2 --- /dev/null +++ b/source/3rdparty/implib/arch/arm/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 4 +SymbolReloc = R_ARM_ABS32 diff --git a/source/3rdparty/implib/arch/arm/table.S.tpl b/source/3rdparty/implib/arch/arm/table.S.tpl new file mode 100644 index 0000000000..7b99071368 --- /dev/null +++ b/source/3rdparty/implib/arch/arm/table.S.tpl @@ -0,0 +1,90 @@ +/* + * Copyright 2018-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .section .note.GNU-stack,"",%progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 4 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + +#define PUSH_REG(reg) push {reg}; .cfi_adjust_cfa_offset 4; .cfi_rel_offset reg, 0 +#define POP_REG(reg) pop {reg} ; .cfi_adjust_cfa_offset -4; .cfi_restore reg + +// Binutils 2.30 does not like q0 in .cfi_rel_offset +#define PUSH_DREG_PAIR(reg1, reg2) vpush {reg1, reg2}; .cfi_adjust_cfa_offset 16; .cfi_rel_offset reg1, 0; .cfi_rel_offset reg2, 8 +#define POP_DREG_PAIR(reg1, reg2) vpop {reg1, reg2}; .cfi_adjust_cfa_offset -16; .cfi_restore reg1; .cfi_restore reg2 + + // Slow path which calls dlsym, taken only on first call. + // Registers are saved acc. to "Procedure Call Standard for the ARM Architecture". + // For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + // Stack is aligned at 16 bytes at this point + + // Save only arguments (and lr) + PUSH_REG(r0) + ldr r0, [sp, #8] + PUSH_REG(r1) + PUSH_REG(r2) + PUSH_REG(r3) + PUSH_REG(lr) + PUSH_REG(lr) // Align to 8 bytes + + // Arguments can be passed in VFP registers only when hard-float ABI is used + // for arm-gnueabihf target // (http://android-doc.github.io/ndk/guides/abis.html#v7a). + // Use compiler macro to detect this case. +#ifdef __ARM_PCS_VFP + PUSH_DREG_PAIR(d0, d1) + PUSH_DREG_PAIR(d2, d3) + PUSH_DREG_PAIR(d4, d5) + PUSH_DREG_PAIR(d6, d7) + PUSH_DREG_PAIR(d8, d9) + PUSH_DREG_PAIR(d10, d11) + PUSH_DREG_PAIR(d12, d13) + PUSH_DREG_PAIR(d14, d15) + // FIXME: NEON actually supports 32 D-registers but it's unclear how to detect this +#endif + + bl _${lib_suffix}_tramp_resolve(PLT) + +#ifdef __ARM_PCS_VFP + POP_DREG_PAIR(d14, d15) + POP_DREG_PAIR(d12, d13) + POP_DREG_PAIR(d10, d11) + POP_DREG_PAIR(d8, d9) + POP_DREG_PAIR(d6, d7) + POP_DREG_PAIR(d4, d5) + POP_DREG_PAIR(d2, d3) + POP_DREG_PAIR(d0, d1) +#endif + + POP_REG(lr) // TODO: pop pc? + POP_REG(lr) + POP_REG(r3) + POP_REG(r2) + POP_REG(r1) + POP_REG(r0) + + bx lr + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/arm/trampoline.S.tpl b/source/3rdparty/implib/arch/arm/trampoline.S.tpl new file mode 100644 index 0000000000..12a51df017 --- /dev/null +++ b/source/3rdparty/implib/arch/arm/trampoline.S.tpl @@ -0,0 +1,49 @@ +/* + * Copyright 2018-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + +1: + // Load address + // TODO: can we do this faster on newer ARMs? + ldr ip, 3f +2: + add ip, pc, ip + ldr ip, [ip] + + cmp ip, #0 + + // Fast path + bxne ip + + // Slow path + ldr ip, =$number + push {ip} + .cfi_adjust_cfa_offset 4 + PUSH_REG(lr) + bl _${lib_suffix}_save_regs_and_resolve + POP_REG(lr) + add sp, #4 + .cfi_adjust_cfa_offset -4 + b 1b + + // Force constant pool for ldr above + .ltorg + + .cfi_endproc + +3: + .word _${lib_suffix}_tramp_table - (2b + 8) + $offset diff --git a/source/3rdparty/implib/arch/common/init.c.tpl b/source/3rdparty/implib/arch/common/init.c.tpl new file mode 100644 index 0000000000..ad8b19b3e1 --- /dev/null +++ b/source/3rdparty/implib/arch/common/init.c.tpl @@ -0,0 +1,152 @@ +/* + * Copyright 2018-2022 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // For RTLD_DEFAULT +#endif + +#include +#include +#include +#include +#include + +// Sanity check for ARM to avoid puzzling runtime crashes +#ifdef __arm__ +# if defined __thumb__ && ! defined __THUMB_INTERWORK__ +# error "ARM trampolines need -mthumb-interwork to work in Thumb mode" +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define CHECK(cond, fmt, ...) do { \ + if(!(cond)) { \ + fprintf(stderr, "implib-gen: $load_name: " fmt "\n", ##__VA_ARGS__); \ + } \ + } while(0) + +#define HAS_DLOPEN_CALLBACK $has_dlopen_callback +#define HAS_DLSYM_CALLBACK $has_dlsym_callback +#define NO_DLOPEN $no_dlopen +#define LAZY_LOAD $lazy_load + +static void *lib_handle; +static int do_dlclose; +static int is_lib_loading; + +#if ! NO_DLOPEN +static void *load_library() { + if(lib_handle) + return lib_handle; + + is_lib_loading = 1; + +#if HAS_DLOPEN_CALLBACK + extern void *$dlopen_callback(const char *lib_name); + lib_handle = $dlopen_callback("$load_name"); + CHECK(lib_handle, "failed to load library '$load_name' via callback '$dlopen_callback'"); +#else + lib_handle = dlopen("$load_name", RTLD_LAZY | RTLD_GLOBAL); + CHECK(lib_handle, "failed to load library '$load_name' via dlopen: %s", dlerror()); +#endif + + do_dlclose = 1; + is_lib_loading = 0; + + return lib_handle; +} + +static void __attribute__((destructor)) unload_lib() { + if(do_dlclose && lib_handle) + dlclose(lib_handle); +} +#endif + +#if ! NO_DLOPEN && ! LAZY_LOAD +static void __attribute__((constructor)) load_lib() { + load_library(); +} +#endif + +// TODO: convert to single 0-separated string +static const char *const sym_names[] = { + $sym_names + 0 +}; + +#define SYM_COUNT (sizeof(sym_names)/sizeof(sym_names[0]) - 1) + +extern void *_${lib_suffix}_tramp_table[]; + +// Can be sped up by manually parsing library symtab... +void _${lib_suffix}_tramp_resolve(int i) { + assert((unsigned)i < SYM_COUNT); + + CHECK(!is_lib_loading, "library function '%s' called during library load", sym_names[i]); + + void *h = 0; +#if NO_DLOPEN + // Library with implementations must have already been loaded. + if (lib_handle) { + // User has specified loaded library + h = lib_handle; + } else { + // User hasn't provided us the loaded library so search the global namespace. +# ifndef IMPLIB_EXPORT_SHIMS + // If shim symbols are hidden we should search + // for first available definition of symbol in library list + h = RTLD_DEFAULT; +# else + // Otherwise look for next available definition + h = RTLD_NEXT; +# endif + } +#else + h = load_library(); + CHECK(h, "failed to resolve symbol '%s', library failed to load", sym_names[i]); +#endif + +#if HAS_DLSYM_CALLBACK + extern void *$dlsym_callback(void *handle, const char *sym_name); + _${lib_suffix}_tramp_table[i] = $dlsym_callback(h, sym_names[i]); + CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s' via callback $dlsym_callback", sym_names[i]); +#else + // Dlsym is thread-safe so don't need to protect it. + _${lib_suffix}_tramp_table[i] = dlsym(h, sym_names[i]); + CHECK(_${lib_suffix}_tramp_table[i], "failed to resolve symbol '%s' via dlsym: %s", sym_names[i], dlerror()); +#endif +} + +// Helper for user to resolve all symbols +void _${lib_suffix}_tramp_resolve_all(void) { + size_t i; + for(i = 0; i < SYM_COUNT; ++i) + _${lib_suffix}_tramp_resolve(i); +} + +// Allows user to specify manually loaded implementation library. +void _${lib_suffix}_tramp_set_handle(void *handle) { + lib_handle = handle; + do_dlclose = 0; +} + +// Resets all resolved symbols. This is needed in case +// client code wants to reload interposed library multiple times. +void _${lib_suffix}_tramp_reset(void) { + memset(_${lib_suffix}_tramp_table, 0, SYM_COUNT * sizeof(_${lib_suffix}_tramp_table[0])); + lib_handle = 0; + do_dlclose = 0; +} + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/source/3rdparty/implib/arch/e2k/README.md b/source/3rdparty/implib/arch/e2k/README.md new file mode 100644 index 0000000000..eb87f54f85 --- /dev/null +++ b/source/3rdparty/implib/arch/e2k/README.md @@ -0,0 +1,4 @@ +Reference materials: + * Руководство по эффективному программированию на платформе «Эльбрус» (http://www.mcst.ru/files/5ed39a/dd0cd8/50506b/000000/elbrus_prog_2020-05-30.pdf) + * Микропроцессоры и вычислительные комплексы семейства Эльбрус (http://www.mcst.ru/doc/book_121130.pdf) + * https://github.com/OpenE2K diff --git a/source/3rdparty/implib/arch/e2k/config.ini b/source/3rdparty/implib/arch/e2k/config.ini new file mode 100644 index 0000000000..e34cbe2713 --- /dev/null +++ b/source/3rdparty/implib/arch/e2k/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 8 +SymbolReloc = R_E2K_DISP, R_E2K_64_ABS_LIT, R_E2K_64_ABS diff --git a/source/3rdparty/implib/arch/e2k/table.S.tpl b/source/3rdparty/implib/arch/e2k/table.S.tpl new file mode 100644 index 0000000000..9f28f51791 --- /dev/null +++ b/source/3rdparty/implib/arch/e2k/table.S.tpl @@ -0,0 +1,40 @@ +/* + * Copyright 2022 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .ignore strict_delay + .p2align 3 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + + setwd wsz = 0x1, nfx = 1 + + addd 0x0, %g0, %r0 + + disp %ctpr1, _${lib_suffix}_tramp_resolve + call %ctpr1, wbs = 0 + + return %ctpr3 + ct %ctpr3 + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/e2k/trampoline.S.tpl b/source/3rdparty/implib/arch/e2k/trampoline.S.tpl new file mode 100644 index 0000000000..dcd385713b --- /dev/null +++ b/source/3rdparty/implib/arch/e2k/trampoline.S.tpl @@ -0,0 +1,56 @@ +/* + * Copyright 2022 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 3 + .ignore strict_delay + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + + setwd wsz = 0x8, nfx = 0x1 + +1: + // Read table address + { + rrd %ip, %g0 + addd 0x0, [ _f64 _GLOBAL_OFFSET_TABLE_ ], %g1 + } + addd %g0, %g1, %g0 + addd %g0, [ _f64 _${lib_suffix}_tramp_table@GOTOFF ], %g0 + + // Read current function address + ldd [%g0 + $offset], %g0 + + // NULL? + { + cmpesb %g0, 0x0, %pred0 + movtd %g0, %ctpr2 + } + + // Jump to fast path + ct %ctpr2 ? ~%pred0 + + // Or fall through to slow path + +2: + // Initialize parameter + addd 0x0, _f16s $number, %g0 + + // Call resolver + disp %ctpr1, _${lib_suffix}_save_regs_and_resolve + call %ctpr1, wbs = 0x8 + + // Return to fast path + ibranch 1b + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/i386/config.ini b/source/3rdparty/implib/arch/i386/config.ini new file mode 100644 index 0000000000..f93835df74 --- /dev/null +++ b/source/3rdparty/implib/arch/i386/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 4 +SymbolReloc = R_386_32 diff --git a/source/3rdparty/implib/arch/i386/table.S.tpl b/source/3rdparty/implib/arch/i386/table.S.tpl new file mode 100644 index 0000000000..ed7944a1e3 --- /dev/null +++ b/source/3rdparty/implib/arch/i386/table.S.tpl @@ -0,0 +1,82 @@ +/* + * Copyright 2019-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .section .note.GNU-stack,"",@progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 4 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + +#define PUSH_REG(reg) pushl %reg ; .cfi_adjust_cfa_offset 4; .cfi_rel_offset reg, 0 +#define POP_REG(reg) popl %reg ; .cfi_adjust_cfa_offset -4; .cfi_restore reg + + // Slow path which calls dlsym, taken only on first call. + // All registers are stored to handle arbitrary calling conventions + // (except XMM/x87 regs in hope they are not used in resolving code). + // For Dwarf directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + .cfi_def_cfa_offset 4 // Return address + + PUSH_REG(eax) + PUSH_REG(ebx) + PUSH_REG(ecx) + PUSH_REG(edx) // 16 + + PUSH_REG(ebp) + PUSH_REG(edi) + PUSH_REG(esi) + pushfl; .cfi_adjust_cfa_offset 4 // 16 + + subl $$8, %esp + .cfi_adjust_cfa_offset 8 + PUSH_REG(eax) + + call _${lib_suffix}_tramp_resolve@PLT // Stack will be aligned at 16 in call + + addl $$12, %esp + .cfi_adjust_cfa_offset -12 + + popfl; .cfi_adjust_cfa_offset -4 + POP_REG(esi) + POP_REG(edi) + POP_REG(ebp) + + POP_REG(edx) + POP_REG(ecx) + POP_REG(ebx) + POP_REG(eax) + + ret + + .cfi_endproc + + .section .text.__implib.x86.get_pc_thunk.ax,"axG",@progbits,__implib.x86.get_pc_thunk.ax,comdat + .globl __implib.x86.get_pc_thunk.ax + .hidden __implib.x86.get_pc_thunk.ax + .type __implib.x86.get_pc_thunk.ax, %function +__implib.x86.get_pc_thunk.ax: + .cfi_startproc + movl (%esp), %eax + ret + .cfi_endproc diff --git a/source/3rdparty/implib/arch/i386/trampoline.S.tpl b/source/3rdparty/implib/arch/i386/trampoline.S.tpl new file mode 100644 index 0000000000..98b07c4a5c --- /dev/null +++ b/source/3rdparty/implib/arch/i386/trampoline.S.tpl @@ -0,0 +1,33 @@ +/* + * Copyright 2019-2022 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + .cfi_def_cfa_offset 4 // Return address + // add $$0, %rsp Why GDB fails to step over call without this?! + // x86 has no support for PC-relative addressing so code is not very efficient. + // We also trash EAX here (it's call-clobbered in cdecl). + call __implib.x86.get_pc_thunk.ax + addl $$_GLOBAL_OFFSET_TABLE_, %eax + movl $offset+_${lib_suffix}_tramp_table@GOTOFF(%eax), %eax + cmp $$0, %eax + je 2f +1: + jmp *%eax +2: + mov $$$number, %eax + call _${lib_suffix}_save_regs_and_resolve + jmp $sym + .cfi_endproc diff --git a/source/3rdparty/implib/arch/mips/config.ini b/source/3rdparty/implib/arch/mips/config.ini new file mode 100644 index 0000000000..941fcd17a6 --- /dev/null +++ b/source/3rdparty/implib/arch/mips/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 4 +SymbolReloc = R_MIPS_REL32 diff --git a/source/3rdparty/implib/arch/mips/table.S.tpl b/source/3rdparty/implib/arch/mips/table.S.tpl new file mode 100644 index 0000000000..8a0cbb4dd0 --- /dev/null +++ b/source/3rdparty/implib/arch/mips/table.S.tpl @@ -0,0 +1,91 @@ +/* + * Copyright 2022-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .section .note.GNU-stack,"",@progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 4 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + + .set noreorder + .cpload $$25 + .set nomacro + .set noat + + // Slow path which calls dlsym, taken only on first call. + // Registers are saved acc. to "Procedure Call Standard for the MIPS Architecture". + // For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + // TODO: push two regs at once here and in trampoline to avoid temporarily unaligned stack + +#define PUSH_REG(reg) addiu $$sp, $$sp, -4; .cfi_adjust_cfa_offset 4; sw reg, 4($$sp); .cfi_rel_offset reg, 0 +#define POP_REG(reg) addiu $$sp, $$sp, 4; .cfi_adjust_cfa_offset -4; lw reg, 0($$sp); .cfi_restore reg + +// dwarf_num = 32 + reg_num +#define PUSH_FREG(reg, dwarf_num) addiu $$sp, $$sp, -4; .cfi_adjust_cfa_offset 4; swc1 reg, 4($$sp); .cfi_rel_offset dwarf_num, 0 +#define POP_FREG(reg, dwarf_num) addiu $$sp, $$sp, 4; .cfi_adjust_cfa_offset -4; lwc1 reg, 0($$sp); .cfi_restore dwarf_num + + PUSH_REG($$ra) + PUSH_REG($$a0) + PUSH_REG($$a1) + PUSH_REG($$a2) + PUSH_REG($$a3) + PUSH_REG($$a3) // For alignment + +#if 0 + // FIXME: GCC complains about odd FP regs without -modd-spreg + PUSH_FREG($$f12, 44) + PUSH_FREG($$f13, 45) + PUSH_FREG($$f14, 46) + PUSH_FREG($$f15, 47) +#endif + + move $$a0, $$AT + + lw $$25, %call16(_${lib_suffix}_tramp_resolve)($$gp) + .reloc 1f, R_MIPS_JALR, _${lib_suffix}_tramp_resolve +1: jalr $$25 + nop + +#if 0 + POP_FREG($$f15, 47) + POP_FREG($$f14, 46) + POP_FREG($$f13, 45) + POP_FREG($$f12, 44) +#endif + + POP_REG($$a3) + POP_REG($$a3) + POP_REG($$a2) + POP_REG($$a1) + POP_REG($$a0) + POP_REG($$ra) + + jr $$ra + nop + + .set macro + .set reorder + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/mips/trampoline.S.tpl b/source/3rdparty/implib/arch/mips/trampoline.S.tpl new file mode 100644 index 0000000000..a37ec5636f --- /dev/null +++ b/source/3rdparty/implib/arch/mips/trampoline.S.tpl @@ -0,0 +1,75 @@ +/* + * Copyright 2022-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + + .set noreorder + .cpload $$25 + + .set nomacro + .set noat + +1: + // Load address +#if $offset < 32768 + lw $$AT, %got(_${lib_suffix}_tramp_table)($$gp) + lw $$AT, $offset($$AT) +#else + PUSH_REG($$2) + lw $$AT, %got(_${lib_suffix}_tramp_table)($$gp) + .set macro + .set at=$$2 + lw $$AT, $offset($$AT) + .set nomacro + .set noat + POP_REG($$2) +#endif + + beqz $$AT, 3f + nop + +2: + // Fast path + j $$AT + move $$25, $$AT + +3: + // Slow path + + PUSH_REG($$25) + PUSH_REG($$ra) + + // Reserve space for 4 operands according to ABI + addiu $$sp, $$sp, -16; .cfi_adjust_cfa_offset 16 + + li $$AT, $number + lw $$25, %call16(_${lib_suffix}_save_regs_and_resolve)($$gp) + .reloc 4f, R_MIPS_JALR, _${lib_suffix}_save_regs_and_resolve +4: jalr $$25 + nop + + addiu $$sp, $$sp, 16; .cfi_adjust_cfa_offset -16 + + POP_REG($$ra) + POP_REG($$25) + + j 1b + nop + + .set macro + .set reorder + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/mips64/config.ini b/source/3rdparty/implib/arch/mips64/config.ini new file mode 100644 index 0000000000..1714eb58fd --- /dev/null +++ b/source/3rdparty/implib/arch/mips64/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 8 +SymbolReloc = R_MIPS_REL32 diff --git a/source/3rdparty/implib/arch/mips64/table.S.tpl b/source/3rdparty/implib/arch/mips64/table.S.tpl new file mode 100644 index 0000000000..aa2fd35cce --- /dev/null +++ b/source/3rdparty/implib/arch/mips64/table.S.tpl @@ -0,0 +1,90 @@ +/* + * Copyright 2022-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .section .note.GNU-stack,"",@progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 8 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + + .set noreorder + .cpload $$25 + .set nomacro + .set noat + + // Slow path which calls dlsym, taken only on first call. + // Registers are saved acc. to "Procedure Call Standard for the MIPS Architecture". + // For DWARF directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + // TODO: push two regs at once here and in trampoline to avoid temporarily unaligned stack + +#define PUSH_REG(reg) daddiu $$sp, $$sp, -8; .cfi_adjust_cfa_offset 8; sd reg, 0($$sp); .cfi_rel_offset reg, 0 +#define POP_REG(reg) ld reg, 0($$sp); .cfi_restore reg; daddiu $$sp, $$sp, 8; .cfi_adjust_cfa_offset -8 + +// dwarf_num = 32 + reg_num +#define PUSH_FREG(reg, dwarf_num) daddiu $$sp, $$sp, -8; .cfi_adjust_cfa_offset 8; sdc1 reg, 0($$sp); .cfi_rel_offset dwarf_num, 0 +#define POP_FREG(reg, dwarf_num) ldc1 reg, 0($$sp); .cfi_restore dwarf_num; daddiu $$sp, $$sp, 8; .cfi_adjust_cfa_offset -8 + + PUSH_REG($$ra) + PUSH_REG($$gp) + PUSH_REG($$a0) + PUSH_REG($$a1) + PUSH_REG($$a2) + PUSH_REG($$a3) + + PUSH_FREG($$f12, 44) + PUSH_FREG($$f13, 45) + PUSH_FREG($$f14, 46) + PUSH_FREG($$f15, 47) + + lui $$gp, %hi(%neg(%gp_rel(_${lib_suffix}_save_regs_and_resolve))) + daddu $$gp, $$gp, $$25 + daddiu $$gp, $$gp, %lo(%neg(%gp_rel(_${lib_suffix}_save_regs_and_resolve))) + + move $$a0, $$AT + + ld $$25, %call16(_${lib_suffix}_tramp_resolve)($$gp) + .reloc 1f, R_MIPS_JALR, _${lib_suffix}_tramp_resolve +1: jalr $$25 + nop + + POP_FREG($$f15, 47) + POP_FREG($$f14, 46) + POP_FREG($$f13, 45) + POP_FREG($$f12, 44) + + POP_REG($$a3) + POP_REG($$a2) + POP_REG($$a1) + POP_REG($$a0) + POP_REG($$gp) + POP_REG($$ra) + + jr $$ra + nop + + .set macro + .set reorder + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/mips64/trampoline.S.tpl b/source/3rdparty/implib/arch/mips64/trampoline.S.tpl new file mode 100644 index 0000000000..057e87d6ff --- /dev/null +++ b/source/3rdparty/implib/arch/mips64/trampoline.S.tpl @@ -0,0 +1,80 @@ +/* + * Copyright 2022-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + + .set noreorder + .cpload $$25 + .set nomacro + .set noat + +1: + // Load address +#if $offset < 32768 + lui $$AT, %hi(%neg(%gp_rel($sym))) + daddu $$AT, $$AT, $$25 + daddiu $$AT, $$AT, %lo(%neg(%gp_rel($sym))) + ld $$AT, %got_disp(_${lib_suffix}_tramp_table)($$AT) + ld $$AT, $offset($$AT) +#else + PUSH_REG($$2) + lui $$AT, %hi(%neg(%gp_rel($sym))) + daddu $$AT, $$AT, $$25 + daddiu $$AT, $$AT, %lo(%neg(%gp_rel($sym))) + ld $$AT, %got_disp(_${lib_suffix}_tramp_table)($$AT) + .set macro + .set at=$$2 + ld $$AT, $offset($$AT) + .set nomacro + .set noat + POP_REG($$2) +#endif + + beqz $$AT, 3f + nop + +2: + // Fast path + j $$AT + move $$25, $$AT + +3: + // Slow path + + PUSH_REG($$25) + PUSH_REG($$ra) + PUSH_REG($$gp) + + lui $$gp, %hi(%neg(%gp_rel($sym))) + daddu $$gp, $$gp, $$25 + daddiu $$gp, $$gp, %lo(%neg(%gp_rel($sym))) + + ld $$25, %call16(_${lib_suffix}_save_regs_and_resolve)($$gp) + .reloc 4f, R_MIPS_JALR, _${lib_suffix}_save_regs_and_resolve +4: jalr $$25 + li $$AT, $number + + POP_REG($$gp) + POP_REG($$ra) + POP_REG($$25) + + j 1b + nop + + .set macro + .set reorder + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/x86_64/config.ini b/source/3rdparty/implib/arch/x86_64/config.ini new file mode 100644 index 0000000000..6c9087a89c --- /dev/null +++ b/source/3rdparty/implib/arch/x86_64/config.ini @@ -0,0 +1,3 @@ +[Arch] +PointerSize = 8 +SymbolReloc = R_X86_64_64 diff --git a/source/3rdparty/implib/arch/x86_64/table.S.tpl b/source/3rdparty/implib/arch/x86_64/table.S.tpl new file mode 100644 index 0000000000..620d8f3334 --- /dev/null +++ b/source/3rdparty/implib/arch/x86_64/table.S.tpl @@ -0,0 +1,103 @@ +/* + * Copyright 2018-2023 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .section .note.GNU-stack,"",@progbits + + .data + + .globl _${lib_suffix}_tramp_table + .hidden _${lib_suffix}_tramp_table + .align 8 +_${lib_suffix}_tramp_table: + .zero $table_size + + .text + + .globl _${lib_suffix}_tramp_resolve + .hidden _${lib_suffix}_tramp_resolve + + .globl _${lib_suffix}_save_regs_and_resolve + .hidden _${lib_suffix}_save_regs_and_resolve + .type _${lib_suffix}_save_regs_and_resolve, %function +_${lib_suffix}_save_regs_and_resolve: + .cfi_startproc + +#define PUSH_REG(reg) pushq %reg ; .cfi_adjust_cfa_offset 8; .cfi_rel_offset reg, 0 +#define POP_REG(reg) popq %reg ; .cfi_adjust_cfa_offset -8; .cfi_restore reg + +#define DEC_STACK(d) subq $$d, %rsp; .cfi_adjust_cfa_offset d +#define INC_STACK(d) addq $$d, %rsp; .cfi_adjust_cfa_offset -d + +#define PUSH_XMM_REG(reg) DEC_STACK(16); movdqa %reg, (%rsp); .cfi_rel_offset reg, 0 +#define POP_XMM_REG(reg) movdqa (%rsp), %reg; .cfi_restore reg; INC_STACK(16) + + // Slow path which calls dlsym, taken only on first call. + // All registers are stored to handle arbitrary calling conventions + // (except x87 FPU registers which do not have to be preserved). + // For Dwarf directives, read https://www.imperialviolet.org/2017/01/18/cfi.html. + + .cfi_def_cfa_offset 8 // Return address + + // FIXME: AVX (YMM, ZMM) registers are NOT saved to simplify code. + + PUSH_REG(rdi) // 16 + mov 0x10(%rsp), %rdi + PUSH_REG(rax) + PUSH_REG(rbx) // 16 + PUSH_REG(rcx) + PUSH_REG(rdx) // 16 + PUSH_REG(rbp) + PUSH_REG(rsi) // 16 + PUSH_REG(r8) + PUSH_REG(r9) // 16 + PUSH_REG(r10) + PUSH_REG(r11) // 16 + PUSH_REG(r12) + PUSH_REG(r13) // 16 + PUSH_REG(r14) + PUSH_REG(r15) // 16 + PUSH_XMM_REG(xmm0) + PUSH_XMM_REG(xmm1) + PUSH_XMM_REG(xmm2) + PUSH_XMM_REG(xmm3) + PUSH_XMM_REG(xmm4) + PUSH_XMM_REG(xmm5) + PUSH_XMM_REG(xmm6) + PUSH_XMM_REG(xmm7) + + // Stack is just 8-byte aligned but callee will re-align to 16 + call _${lib_suffix}_tramp_resolve + + POP_XMM_REG(xmm7) + POP_XMM_REG(xmm6) + POP_XMM_REG(xmm5) + POP_XMM_REG(xmm4) + POP_XMM_REG(xmm3) + POP_XMM_REG(xmm2) + POP_XMM_REG(xmm1) + POP_XMM_REG(xmm0) // 16 + POP_REG(r15) + POP_REG(r14) // 16 + POP_REG(r13) + POP_REG(r12) // 16 + POP_REG(r11) + POP_REG(r10) // 16 + POP_REG(r9) + POP_REG(r8) // 16 + POP_REG(rsi) + POP_REG(rbp) // 16 + POP_REG(rdx) + POP_REG(rcx) // 16 + POP_REG(rbx) + POP_REG(rax) // 16 + POP_REG(rdi) + + ret + + .cfi_endproc diff --git a/source/3rdparty/implib/arch/x86_64/trampoline.S.tpl b/source/3rdparty/implib/arch/x86_64/trampoline.S.tpl new file mode 100644 index 0000000000..d2bbf10346 --- /dev/null +++ b/source/3rdparty/implib/arch/x86_64/trampoline.S.tpl @@ -0,0 +1,33 @@ +/* + * Copyright 2018-2022 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + + .globl $sym + .p2align 4 + .type $sym, %function +#ifndef IMPLIB_EXPORT_SHIMS + .hidden $sym +#endif +$sym: + .cfi_startproc + .cfi_def_cfa_offset 8 // Return address + // Intel opt. manual says to + // "make the fall-through code following a conditional branch be the likely target for a branch with a forward target" + // to hint static predictor. + cmpq $$0, _${lib_suffix}_tramp_table+$offset(%rip) + je 2f +1: + jmp *_${lib_suffix}_tramp_table+$offset(%rip) +2: + pushq $$$number + .cfi_adjust_cfa_offset 8 + call _${lib_suffix}_save_regs_and_resolve + addq $$8, %rsp + .cfi_adjust_cfa_offset -8 + jmp 1b + .cfi_endproc diff --git a/source/3rdparty/implib/implib-gen.py b/source/3rdparty/implib/implib-gen.py new file mode 100755 index 0000000000..86cfa77378 --- /dev/null +++ b/source/3rdparty/implib/implib-gen.py @@ -0,0 +1,598 @@ +#!/usr/bin/env python3 + +# Copyright 2017-2023 Yury Gribov +# +# The MIT License (MIT) +# +# Use of this source code is governed by MIT license that can be +# found in the LICENSE.txt file. + +""" +Generates static import library for POSIX shared library +""" + +import sys +import os.path +import re +import subprocess +import argparse +import string +import configparser + +me = os.path.basename(__file__) +root = os.path.dirname(__file__) + +def warn(msg): + """Emits a nicely-decorated warning.""" + sys.stderr.write(f'{me}: warning: {msg}\n') + +def error(msg): + """Emits a nicely-decorated error and exits.""" + sys.stderr.write(f'{me}: error: {msg}\n') + sys.exit(1) + +def run(args, stdin=''): + """Runs external program and aborts on error.""" + env = os.environ.copy() + # Force English language + env['LC_ALL'] = 'c' + try: + del env["LANG"] + except KeyError: + pass + with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=env) as p: + out, err = p.communicate(input=stdin.encode('utf-8')) + out = out.decode('utf-8') + err = err.decode('utf-8') + if p.returncode != 0 or err: + error(f"{args[0]} failed with retcode {p.returncode}:\n{err}") + return out, err + +def make_toc(words, renames=None): + "Make an mapping of words to their indices in list" + renames = renames or {} + toc = {} + for i, n in enumerate(words): + name = renames.get(n, n) + toc[i] = name + return toc + +def parse_row(words, toc, hex_keys): + "Make a mapping from column names to values" + vals = {k: (words[i] if i < len(words) else '') for i, k in toc.items()} + for k in hex_keys: + if vals[k]: + vals[k] = int(vals[k], 16) + return vals + +def collect_syms(f): + """Collect ELF dynamic symtab.""" + + # --dyn-syms does not always work for some reason so dump all symtabs + out, _ = run(['readelf', '-sW', f]) + + toc = None + syms = [] + syms_set = set() + for line in out.splitlines(): + line = line.strip() + if not line: + # Next symtab + toc = None + continue + words = re.split(r' +', line) + if line.startswith('Num'): # Header? + if toc is not None: + error("multiple headers in output of readelf") + # Colons are different across readelf versions so get rid of them. + toc = make_toc(map(lambda n: n.replace(':', ''), words)) + elif toc is not None: + sym = parse_row(words, toc, ['Value']) + name = sym['Name'] + if not name: + continue + if name in syms_set: + continue + syms_set.add(name) + sym['Size'] = int(sym['Size'], 0) # Readelf is inconistent on Size format + if '@' in name: + sym['Default'] = '@@' in name + name, ver = re.split(r'@+', name) + sym['Name'] = name + sym['Version'] = ver + else: + sym['Default'] = True + sym['Version'] = None + syms.append(sym) + + if toc is None: + error(f"failed to analyze symbols in {f}") + + # Also collected demangled names + if syms: + out, _ = run(['c++filt'], '\n'.join((sym['Name'] for sym in syms))) + out = out.rstrip("\n") # Some c++filts append newlines at the end + for i, name in enumerate(out.split("\n")): + syms[i]['Demangled Name'] = name + + return syms + +def collect_relocs(f): + """Collect ELF dynamic relocs.""" + + out, _ = run(['readelf', '-rW', f]) + + toc = None + rels = [] + for line in out.splitlines(): + line = line.strip() + if not line: + toc = None + continue + if line == 'There are no relocations in this file.': + return [] + if re.match(r'^\s*Type[0-9]:', line): # Spurious lines for MIPS + continue + if re.match(r'^\s*Offset', line): # Header? + if toc is not None: + error("multiple headers in output of readelf") + words = re.split(r'\s\s+', line) # "Symbol's Name + Addend" + toc = make_toc(words) + elif toc is not None: + line = re.sub(r' \+ ', '+', line) + words = re.split(r'\s+', line) + rel = parse_row(words, toc, ['Offset', 'Info']) + rels.append(rel) + # Split symbolic representation + sym_name = 'Symbol\'s Name + Addend' + if sym_name not in rel and 'Symbol\'s Name' in rel: + # Adapt to different versions of readelf + rel[sym_name] = rel['Symbol\'s Name'] + '+0' + if rel[sym_name]: + p = rel[sym_name].split('+') + if len(p) == 1: + p = ['', p[0]] + rel[sym_name] = (p[0], int(p[1], 16)) + + if toc is None: + error(f"failed to analyze relocations in {f}") + + return rels + +def collect_sections(f): + """Collect section info from ELF.""" + + out, _ = run(['readelf', '-SW', f]) + + toc = None + sections = [] + for line in out.splitlines(): + line = line.strip() + if not line: + continue + line = re.sub(r'\[\s+', '[', line) + words = re.split(r' +', line) + if line.startswith('[Nr]'): # Header? + if toc is not None: + error("multiple headers in output of readelf") + toc = make_toc(words, {'Addr' : 'Address'}) + elif line.startswith('[') and toc is not None: + sec = parse_row(words, toc, ['Address', 'Off', 'Size']) + if 'A' in sec['Flg']: # Allocatable section? + sections.append(sec) + + if toc is None: + error(f"failed to analyze sections in {f}") + + return sections + +def read_unrelocated_data(input_name, syms, secs): + """Collect unrelocated data from ELF.""" + data = {} + with open(input_name, 'rb') as f: + def is_symbol_in_section(sym, sec): + sec_end = sec['Address'] + sec['Size'] + is_start_in_section = sec['Address'] <= sym['Value'] < sec_end + is_end_in_section = sym['Value'] + sym['Size'] <= sec_end + return is_start_in_section and is_end_in_section + for name, s in sorted(syms.items(), key=lambda s: s[1]['Value']): + # TODO: binary search (bisect) + sec = [sec for sec in secs if is_symbol_in_section(s, sec)] + if len(sec) != 1: + error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})") + sec = sec[0] + f.seek(sec['Off']) + data[name] = f.read(s['Size']) + return data + +def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types): + """Identify relocations for each symbol""" + data = {} + for name, s in sorted(syms.items()): + b = bites.get(name) + assert b is not None + if s['Demangled Name'].startswith('typeinfo name'): + data[name] = [('byte', int(x)) for x in b] + continue + data[name] = [] + for i in range(0, len(b), ptr_size): + val = int.from_bytes(b[i*ptr_size:(i + 1)*ptr_size], byteorder='little') + data[name].append(('offset', val)) + start = s['Value'] + finish = start + s['Size'] + # TODO: binary search (bisect) + for rel in rels: + if rel['Type'] in reloc_types and start <= rel['Offset'] < finish: + i = (rel['Offset'] - start) // ptr_size + assert i < len(data[name]) + data[name][i] = 'reloc', rel + return data + +def generate_vtables(cls_tables, cls_syms, cls_data): + """Generate code for vtables""" + c_types = { + 'reloc' : 'const void *', + 'byte' : 'unsigned char', + 'offset' : 'size_t' + } + + ss = [] + ss.append('''\ +#ifdef __cplusplus +extern "C" { +#endif + +''') + + # Print externs + + printed = set() + for name, data in sorted(cls_data.items()): + for typ, val in data: + if typ != 'reloc': + continue + sym_name, addend = val['Symbol\'s Name + Addend'] + sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C? + if sym_name not in cls_syms and sym_name not in printed: + ss.append(f'''\ +extern const char {sym_name}[]; + +''') + + # Collect variable infos + + code_info = {} + + for name, s in sorted(cls_syms.items()): + data = cls_data[name] + if s['Demangled Name'].startswith('typeinfo name'): + declarator = 'const unsigned char %s[]' + else: + field_types = (f'{c_types[typ]} field_{i};' for i, (typ, _) in enumerate(data)) + declarator = 'const struct { %s } %%s' % ' '.join(field_types) # pylint: disable=C0209 # consider-using-f-string + vals = [] + for typ, val in data: + if typ != 'reloc': + vals.append(str(val) + 'UL') + else: + sym_name, addend = val['Symbol\'s Name + Addend'] + sym_name = re.sub(r'@.*', '', sym_name) # Can we pin version in C? + vals.append(f'(const char *)&{sym_name} + {addend}') + code_info[name] = (declarator, '{ %s }' % ', '.join(vals)) # pylint: disable= C0209 # consider-using-f-string + + # Print declarations + + for name, (decl, _) in sorted(code_info.items()): + type_name = name + '_type' + type_decl = decl % type_name + ss.append(f'''\ +typedef {type_decl}; +extern __attribute__((weak)) {type_name} {name}; +''') + + # Print definitions + + for name, (_, init) in sorted(code_info.items()): + type_name = name + '_type' + ss.append(f'''\ +const {type_name} {name} = {init}; +''') + + ss.append('''\ +#ifdef __cplusplus +} // extern "C" +#endif +''') + + return ''.join(ss) + +def read_soname(f): + """Read ELF's SONAME.""" + + out, _ = run(['readelf', '-d', f]) + + for line in out.splitlines(): + line = line.strip() + if not line: + continue + # 0x000000000000000e (SONAME) Library soname: [libndp.so.0] + soname_match = re.search(r'\(SONAME\).*\[(.+)\]', line) + if soname_match is not None: + return soname_match[1] + + return None + +def main(): + """Driver function""" + parser = argparse.ArgumentParser(description="Generate wrappers for shared library functions.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=f"""\ +Examples: + $ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0 + Generating libaccountsservice.so.0.tramp.S... + Generating libaccountsservice.so.0.init.c... +""") + + parser.add_argument('library', + metavar='LIB', + help="Library to be wrapped.") + parser.add_argument('--verbose', '-v', + help="Print diagnostic info", + action='count', + default=0) + parser.add_argument('--dlopen', + help="Emit dlopen call (default)", + dest='dlopen', action='store_true', default=True) + parser.add_argument('--no-dlopen', + help="Do not emit dlopen call (user must load/unload library himself)", + dest='dlopen', action='store_false') + parser.add_argument('--dlopen-callback', + help="Call user-provided custom callback to load library instead of dlopen", + default='') + parser.add_argument('--dlsym-callback', + help="Call user-provided custom callback to resolve a symbol, " + "instead of dlsym", + default='') + parser.add_argument('--library-load-name', + help="Use custom name for dlopened library (default is SONAME)") + parser.add_argument('--lazy-load', + help="Load library on first call to any of it's functions (default)", + dest='lazy_load', action='store_true', default=True) + parser.add_argument('--no-lazy-load', + help="Load library at program start", + dest='lazy_load', action='store_false') + parser.add_argument('--vtables', + help="Intercept virtual tables (EXPERIMENTAL)", + dest='vtables', action='store_true', default=False) + parser.add_argument('--no-vtables', + help="Do not intercept virtual tables (default)", + dest='vtables', action='store_false') + parser.add_argument('--no-weak-symbols', + help="Don't bind weak symbols", dest='no_weak_symbols', + action='store_true', default=False) + parser.add_argument('--target', + help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi " + "(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8, " + "mips/mipsel, mips64/mip64el and e2k are supported)", + default=os.uname()[-1]) + parser.add_argument('--symbol-list', + help="Path to file with symbols that should be present in wrapper " + "(all by default)") + parser.add_argument('--symbol-prefix', + metavar='PFX', + help="Prefix wrapper symbols with PFX", + default='') + parser.add_argument('-q', '--quiet', + help="Do not print progress info", + action='store_true') + parser.add_argument('--outdir', '-o', + help="Path to create wrapper at", + default='./') + + args = parser.parse_args() + + input_name = args.library + verbose = args.verbose + dlopen_callback = args.dlopen_callback + dlsym_callback = args.dlsym_callback + dlopen = args.dlopen + lazy_load = args.lazy_load + if args.target.startswith('arm'): + target = 'arm' # Handle armhf-..., armel-... + elif re.match(r'^i[0-9]86', args.target): + target = 'i386' + elif args.target.startswith('mips64'): + target = 'mips64' # Handle mips64-..., mips64el-..., mips64le-... + elif args.target.startswith('mips'): + target = 'mips' # Handle mips-..., mipsel-..., mipsle-... + else: + target = args.target.split('-')[0] + quiet = args.quiet + outdir = args.outdir + + if args.symbol_list is None: + funs = None + else: + with open(args.symbol_list, 'r') as f: + funs = [] + for line in re.split(r'\r?\n', f.read()): + line = re.sub(r'#.*', '', line) + line = line.strip() + if line: + funs.append(line) + + if args.library_load_name is not None: + load_name = args.library_load_name + else: + load_name = read_soname(input_name) + if load_name is None: + load_name = os.path.basename(input_name) + + # Collect target info + + target_dir = os.path.join(root, 'arch', target) + + if not os.path.exists(target_dir): + error(f"unknown architecture '{target}'") + + cfg = configparser.ConfigParser(inline_comment_prefixes=';') + cfg.read(target_dir + '/config.ini') + + ptr_size = int(cfg['Arch']['PointerSize']) + symbol_reloc_types = set(re.split(r'\s*,\s*', cfg['Arch']['SymbolReloc'])) + + def is_exported(s): + conditions = [ + s['Bind'] != 'LOCAL', + s['Type'] != 'NOTYPE', + s['Ndx'] != 'UND', + s['Name'] not in ['', '_init', '_fini']] + if args.no_weak_symbols: + conditions.append(s['Bind'] != 'WEAK') + return all(conditions) + + syms = list(filter(is_exported, collect_syms(input_name))) + + def is_data_symbol(s): + return (s['Type'] == 'OBJECT' + # Allow vtables if --vtables is on + and not (' for ' in s['Demangled Name'] and args.vtables)) + + exported_data = [s['Name'] for s in syms if is_data_symbol(s)] + if exported_data: + # TODO: we can generate wrappers for const data without relocations (or only code relocations) + warn(f"library '{input_name}' contains data symbols which won't be intercepted: " + + ', '.join(exported_data)) + + # Collect functions + # TODO: warn if user-specified functions are missing + + orig_funs = filter(lambda s: s['Type'] == 'FUNC', syms) + + all_funs = set() + warn_versioned = False + for s in orig_funs: + if not s['Default']: + # TODO: support versions + if not warn_versioned: + warn(f"library {input_name} contains versioned symbols which are NYI") + warn_versioned = True + if verbose: + print(f"Skipping versioned symbol {s['Name']}") + continue + all_funs.add(s['Name']) + + if funs is None: + funs = sorted(list(all_funs)) + if not funs and not quiet: + warn(f"no public functions were found in {input_name}") + else: + missing_funs = [name for name in funs if name not in all_funs] + if missing_funs: + warn("some user-specified functions are not present in library: " + ', '.join(missing_funs)) + funs = [name for name in funs if name in all_funs] + + if verbose: + print("Exported functions:") + for i, fun in enumerate(funs): + print(f" {i}: {fun}") + + # Collect vtables + + if args.vtables: + cls_tables = {} + cls_syms = {} + + for s in syms: + m = re.match(r'^(vtable|typeinfo|typeinfo name) for (.*)', s['Demangled Name']) + if m is not None and is_exported(s): + typ, cls = m.groups() + name = s['Name'] + cls_tables.setdefault(cls, {})[typ] = name + cls_syms[name] = s + + if verbose: + print("Exported classes:") + for cls, _ in sorted(cls_tables.items()): + print(f" {cls}") + + secs = collect_sections(input_name) + if verbose: + print("Sections:") + for sec in secs: + print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), " + f"at {sec['Off']:x}") + + bites = read_unrelocated_data(input_name, cls_syms, secs) + + rels = collect_relocs(input_name) + if verbose: + print("Relocs:") + for rel in rels: + sym_add = rel['Symbol\'s Name + Addend'] + print(f" {rel['Offset']}: {sym_add}") + + cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types) + if verbose: + print("Class data:") + for name, data in sorted(cls_data.items()): + demangled_name = cls_syms[name]['Demangled Name'] + print(f" {name} ({demangled_name}):") + for typ, val in data: + print(" " + str(val if typ != 'reloc' else val['Symbol\'s Name + Addend'])) + + # Generate assembly code + + suffix = os.path.basename(input_name) + lib_suffix = re.sub(r'[^a-zA-Z_0-9]+', '_', suffix) + + tramp_file = f'{suffix}.tramp.S' + with open(os.path.join(outdir, tramp_file), 'w') as f: + if not quiet: + print(f"Generating {tramp_file}...") + with open(target_dir + '/table.S.tpl', 'r') as t: + table_text = string.Template(t.read()).substitute( + lib_suffix=lib_suffix, + table_size=ptr_size*(len(funs) + 1)) + f.write(table_text) + + with open(target_dir + '/trampoline.S.tpl', 'r') as t: + tramp_tpl = string.Template(t.read()) + + for i, name in enumerate(funs): + tramp_text = tramp_tpl.substitute( + lib_suffix=lib_suffix, + sym=args.symbol_prefix + name, + offset=i*ptr_size, + number=i) + f.write(tramp_text) + + # Generate C code + + init_file = f'{suffix}.init.c' + with open(os.path.join(outdir, init_file), 'w') as f: + if not quiet: + print(f"Generating {init_file}...") + with open(os.path.join(root, 'arch/common/init.c.tpl'), 'r') as t: + if funs: + sym_names = ',\n '.join(f'"{name}"' for name in funs) + ',' + else: + sym_names = '' + init_text = string.Template(t.read()).substitute( + lib_suffix=lib_suffix, + load_name=load_name, + dlopen_callback=dlopen_callback, + dlsym_callback=dlsym_callback, + has_dlopen_callback=int(bool(dlopen_callback)), + has_dlsym_callback=int(bool(dlsym_callback)), + no_dlopen=int(not dlopen), + lazy_load=int(lazy_load), + sym_names=sym_names) + f.write(init_text) + if args.vtables: + vtable_text = generate_vtables(cls_tables, cls_syms, cls_data) + f.write(vtable_text) + +if __name__ == '__main__': + main() diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc index fe65947b87..658ec68442 100644 --- a/source/api_cc/src/DataModifier.cc +++ b/source/api_cc/src/DataModifier.cc @@ -31,6 +31,20 @@ void DipoleChargeModifier::init(const std::string& model, options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); deepmd::load_op_library(); + int gpu_num = -1; +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + DPGetDeviceCount(gpu_num); // check current device environment + if (gpu_num > 0) { + options.config.set_allow_soft_placement(true); + options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction( + 0.9); + options.config.mutable_gpu_options()->set_allow_growth(true); + DPErrcheck(DPSetDevice(gpu_rank % gpu_num)); + std::string str = "/gpu:"; + str += std::to_string(gpu_rank % gpu_num); + graph::SetDefaultDevice(str, graph_def); + } +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM deepmd::check_status(NewSession(options, &session)); deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def)); deepmd::check_status(session->Create(*graph_def)); @@ -87,18 +101,18 @@ void DipoleChargeModifier::run_model( Tensor output_f = output_tensors[cc++]; Tensor output_v = output_tensors[cc++]; Tensor output_av = output_tensors[cc++]; - assert(output_f.dims() == 2), "dim of output tensor should be 2"; - assert(output_v.dims() == 2), "dim of output tensor should be 2"; - assert(output_av.dims() == 2), "dim of output tensor should be 2"; + assert(output_f.dims() == 2 && "dim of output tensor should be 2"); + assert(output_v.dims() == 2 && "dim of output tensor should be 2"); + assert(output_av.dims() == 2 && "dim of output tensor should be 2"); int nframes = output_f.dim_size(0); int natoms = output_f.dim_size(1) / 3; - assert(output_f.dim_size(0) == 1), "nframes should match"; - assert(natoms == nall), "natoms should be nall"; - assert(output_v.dim_size(0) == nframes), "nframes should match"; - assert(output_v.dim_size(1) == 9), "dof of virial should be 9"; - assert(output_av.dim_size(0) == nframes), "nframes should match"; - assert(output_av.dim_size(1) == natoms * 9), - "dof of atom virial should be 9 * natoms"; + assert(output_f.dim_size(0) == 1 && "nframes should match"); + assert(natoms == nall && "natoms should be nall"); + assert(output_v.dim_size(0) == nframes && "nframes should match"); + assert(output_v.dim_size(1) == 9 && "dof of virial should be 9"); + assert(output_av.dim_size(0) == nframes && "nframes should match"); + assert(output_av.dim_size(1) == natoms * 9 && + "dof of atom virial should be 9 * natoms"); auto of = output_f.flat(); auto ov = output_v.flat(); diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc index a4b7ddb90f..30ff99497c 100644 --- a/source/api_cc/src/DeepTensor.cc +++ b/source/api_cc/src/DeepTensor.cc @@ -30,6 +30,20 @@ void DeepTensor::init(const std::string &model, options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); deepmd::load_op_library(); + int gpu_num = -1; +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + DPGetDeviceCount(gpu_num); // check current device environment + if (gpu_num > 0) { + options.config.set_allow_soft_placement(true); + options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction( + 0.9); + options.config.mutable_gpu_options()->set_allow_growth(true); + DPErrcheck(DPSetDevice(gpu_rank % gpu_num)); + std::string str = "/gpu:"; + str += std::to_string(gpu_rank % gpu_num); + graph::SetDefaultDevice(str, graph_def); + } +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM deepmd::check_status(NewSession(options, &session)); deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def)); deepmd::check_status(session->Create(*graph_def)); @@ -187,25 +201,27 @@ void DeepTensor::run_model( Tensor output_at = output_tensors[3]; Tensor output_av = output_tensors[4]; // this is the new model, output has to be rank 2 tensor - assert(output_gt.dims() == 2), "dim of output tensor should be 2"; - assert(output_f.dims() == 2), "dim of output tensor should be 2"; - assert(output_v.dims() == 2), "dim of output tensor should be 2"; - assert(output_at.dims() == 2), "dim of output tensor should be 2"; - assert(output_av.dims() == 2), "dim of output tensor should be 2"; + assert(output_gt.dims() == 2 && "dim of output tensor should be 2"); + assert(output_f.dims() == 2 && "dim of output tensor should be 2"); + assert(output_v.dims() == 2 && "dim of output tensor should be 2"); + assert(output_at.dims() == 2 && "dim of output tensor should be 2"); + assert(output_av.dims() == 2 && "dim of output tensor should be 2"); // also check the tensor shapes - assert(output_gt.dim_size(0) == 1), "nframes should match"; - assert(output_gt.dim_size(1) == odim), "dof of global tensor should be odim"; - assert(output_f.dim_size(0) == 1), "nframes should match"; - assert(output_f.dim_size(1) == odim * nall * 3), - "dof of force should be odim * nall * 3"; - assert(output_v.dim_size(0) == 1), "nframes should match"; - assert(output_v.dim_size(1) == odim * 9), "dof of virial should be odim * 9"; - assert(output_at.dim_size(0) == 1), "nframes should match"; - assert(output_at.dim_size(1) == nsel * odim), - "dof of atomic tensor should be nsel * odim"; - assert(output_av.dim_size(0) == 1), "nframes should match"; - assert(output_av.dim_size(1) == odim * nall * 9), - "dof of atomic virial should be odim * nall * 9"; + assert(output_gt.dim_size(0) == 1 && "nframes should match"); + assert(output_gt.dim_size(1) == odim && + "dof of global tensor should be odim"); + assert(output_f.dim_size(0) == 1 && "nframes should match"); + assert(output_f.dim_size(1) == odim * nall * 3 && + "dof of force should be odim * nall * 3"); + assert(output_v.dim_size(0) == 1 && "nframes should match"); + assert(output_v.dim_size(1) == odim * 9 && + "dof of virial should be odim * 9"); + assert(output_at.dim_size(0) == 1 && "nframes should match"); + assert(output_at.dim_size(1) == nsel * odim && + "dof of atomic tensor should be nsel * odim"); + assert(output_av.dim_size(0) == 1 && "nframes should match"); + assert(output_av.dim_size(1) == odim * nall * 9 && + "dof of atomic virial should be odim * nall * 9"); auto ogt = output_gt.flat(); auto of = output_f.flat(); diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 0e2526414d..fad7e374ab 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -849,13 +849,13 @@ void deepmd::select_map(std::vector& out, const int& nall2) { for (int kk = 0; kk < nframes; ++kk) { #ifdef DEBUG - assert(in.size() / stride * stride == in.size()), - "in size should be multiples of stride" + assert(in.size() / stride * stride == in.size() && + "in size should be multiples of stride") #endif for (int ii = 0; ii < in.size() / stride / nframes; ++ii) { #ifdef DEBUG - assert(ii < idx_map.size()), "idx goes over the idx map size"; - assert(idx_map[ii] < out.size()), "mappped idx goes over the out size"; + assert(ii < idx_map.size() && "idx goes over the idx map size"); + assert(idx_map[ii] < out.size() && "mappped idx goes over the out size"); #endif if (idx_map[ii] >= 0) { int to_ii = idx_map[ii]; @@ -896,13 +896,13 @@ void deepmd::select_map_inv(std::vector& out, const std::vector& idx_map, const int& stride) { #ifdef DEBUG - assert(in.size() / stride * stride == in.size()), - "in size should be multiples of stride" + assert(in.size() / stride * stride == in.size() && + "in size should be multiples of stride"); #endif - for (int ii = 0; ii < out.size() / stride; ++ii) { + for (int ii = 0; ii < out.size() / stride; ++ii) { #ifdef DEBUG - assert(ii < idx_map.size()), "idx goes over the idx map size"; - assert(idx_map[ii] < in.size()), "from idx goes over the in size"; + assert(ii < idx_map.size() && "idx goes over the idx map size"); + assert(idx_map[ii] < in.size() && "from idx goes over the in size"); #endif if (idx_map[ii] >= 0) { int from_ii = idx_map[ii]; diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake index 3ebbd4ea62..6f288f2d2b 100644 --- a/source/cmake/Findtensorflow.cmake +++ b/source/cmake/Findtensorflow.cmake @@ -41,6 +41,7 @@ endif() if(BUILD_CPP_IF AND USE_TF_PYTHON_LIBS + AND NOT CMAKE_CROSSCOMPILING AND NOT SKBUILD AND NOT INSTALL_TENSORFLOW) # Here we try to install libtensorflow_cc.so as well as diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh index 6798212086..c8cfa6ea54 100755 --- a/source/install/build_lammps.sh +++ b/source/install/build_lammps.sh @@ -23,7 +23,7 @@ fi cd ${BUILD_TMP_DIR}/lammps-${LAMMPS_VERSION} mkdir -p ${BUILD_TMP_DIR}/lammps-${LAMMPS_VERSION}/build cd ${BUILD_TMP_DIR}/lammps-${LAMMPS_VERSION}/build -cmake -C ../cmake/presets/all_off.cmake -D PKG_PLUGIN=ON -D PKG_KSPACE=ON -D PKG_MOLECULE=ON -DLAMMPS_EXCEPTIONS=yes -D BUILD_SHARED_LIBS=yes -D LAMMPS_INSTALL_RPATH=ON -D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${INSTALL_PREFIX}/lib ../cmake +cmake -C ../cmake/presets/all_off.cmake -D PKG_PLUGIN=ON -D PKG_MOLECULE=ON -DLAMMPS_EXCEPTIONS=yes -D BUILD_SHARED_LIBS=yes -D LAMMPS_INSTALL_RPATH=ON -D CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${INSTALL_PREFIX}/lib ../cmake make -j${NPROC} make install diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 0f5bdb73fb..1631eb8c35 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -13,7 +13,8 @@ if(USE_CUDA_TOOLKIT) add_definitions("-DGOOGLE_CUDA") add_subdirectory(src/gpu) set(EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_cuda) - target_link_libraries(${libname} INTERFACE deepmd_dyn_cudart ${EXTRA_LIBS}) + target_link_libraries(${libname} PUBLIC deepmd_dyn_cudart) + target_link_libraries(${libname} INTERFACE ${EXTRA_LIBS}) # gpu_cuda.h target_include_directories( ${libname} PUBLIC $ diff --git a/source/lib/include/ComputeDescriptor.h b/source/lib/include/ComputeDescriptor.h index 6a0153f17c..7c3eaf4cd2 100644 --- a/source/lib/include/ComputeDescriptor.h +++ b/source/lib/include/ComputeDescriptor.h @@ -829,8 +829,8 @@ void compute_descriptor_se_a_extf(std::vector &descrpt_a, ef[ii] = ef_[ii]; } } - assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12), - "ef should be a normalized std::vector"; + assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 && + "ef should be a normalized std::vector"); // compute the diff of the neighbors std::vector > sel_a_diff(sec_a.back()); @@ -970,8 +970,8 @@ void compute_descriptor_se_a_ef_para(std::vector &descrpt_a, ef[ii] = ef_[ii]; } } - assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12), - "ef should be a normalized vector"; + assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 && + "ef should be a normalized vector"); // compute the diff of the neighbors std::vector > sel_a_diff(sec_a.back()); @@ -1107,8 +1107,8 @@ void compute_descriptor_se_a_ef_vert(std::vector &descrpt_a, ef[ii] = ef_[ii]; } } - assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12), - "ef should be a normalized vector"; + assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 && + "ef should be a normalized vector"); // compute the diff of the neighbors std::vector > sel_a_diff(sec_a.back()); diff --git a/source/lib/include/gpu_cuda.h b/source/lib/include/gpu_cuda.h index 1e750e0ea0..fb467674cb 100644 --- a/source/lib/include/gpu_cuda.h +++ b/source/lib/include/gpu_cuda.h @@ -4,6 +4,7 @@ #include #include +#include #include #include "errors.h" @@ -24,27 +25,31 @@ inline void DPAssert(cudaError_t code, int line, bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "cuda assert: %s %s %d\n", cudaGetErrorString(code), file, - line); + std::string error_msg = "CUDA Runtime library throws an error: " + + std::string(cudaGetErrorString(code)) + + ", in file " + std::string(file) + ": " + + std::to_string(line); if (code == 2) { // out of memory - fprintf(stderr, - "Your memory is not enough, thus an error has been raised " - "above. You need to take the following actions:\n" - "1. Check if the network size of the model is too large.\n" - "2. Check if the batch size of training or testing is too large. " - "You can set the training batch size to `auto`.\n" - "3. Check if the number of atoms is too large.\n" - "4. Check if another program is using the same GPU by execuating " - "`nvidia-smi`. " - "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " - "environment variable.\n"); + error_msg += + "\nYour memory is not enough, thus an error has been raised " + "above. You need to take the following actions:\n" + "1. Check if the network size of the model is too large.\n" + "2. Check if the batch size of training or testing is too large. " + "You can set the training batch size to `auto`.\n" + "3. Check if the number of atoms is too large.\n" + "4. Check if another program is using the same GPU by execuating " + "`nvidia-smi`. " + "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " + "environment variable."; if (abort) { - throw deepmd::deepmd_exception_oom("CUDA Assert"); + throw deepmd::deepmd_exception_oom(error_msg); } } if (abort) { - throw deepmd::deepmd_exception("CUDA Assert"); + throw deepmd::deepmd_exception(error_msg); + } else { + fprintf(stderr, "%s\n", error_msg.c_str()); } } } @@ -56,27 +61,23 @@ inline void nborAssert(cudaError_t code, int line, bool abort = true) { if (code != cudaSuccess) { - fprintf(stderr, "cuda assert: %s %s %d\n", - "DeePMD-kit:\tillegal nbor list sorting", file, line); - if (code == 2) { - // out of memory - fprintf(stderr, - "Your memory is not enough, thus an error has been raised " - "above. You need to take the following actions:\n" - "1. Check if the network size of the model is too large.\n" - "2. Check if the batch size of training or testing is too large. " - "You can set the training batch size to `auto`.\n" - "3. Check if the number of atoms is too large.\n" - "4. Check if another program is using the same GPU by execuating " - "`nvidia-smi`. " - "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " - "environment variable.\n"); + std::string error_msg = "DeePMD-kit: Illegal nbor list sorting: "; + try { + DPAssert(code, file, line, true); + } catch (deepmd::deepmd_exception_oom &e) { + error_msg += e.what(); if (abort) { - throw deepmd::deepmd_exception_oom("CUDA Assert"); + throw deepmd::deepmd_exception_oom(error_msg); + } else { + fprintf(stderr, "%s\n", error_msg.c_str()); + } + } catch (deepmd::deepmd_exception &e) { + error_msg += e.what(); + if (abort) { + throw deepmd::deepmd_exception(error_msg); + } else { + fprintf(stderr, "%s\n", error_msg.c_str()); } - } - if (abort) { - throw deepmd::deepmd_exception("CUDA Assert"); } } } diff --git a/source/lib/include/gpu_rocm.h b/source/lib/include/gpu_rocm.h index bb404720bc..fbd5e1ce3f 100644 --- a/source/lib/include/gpu_rocm.h +++ b/source/lib/include/gpu_rocm.h @@ -4,6 +4,7 @@ #include #include +#include #include // #include // #include @@ -26,10 +27,14 @@ inline void DPAssert(hipError_t code, int line, bool abort = true) { if (code != hipSuccess) { - fprintf(stderr, "hip assert: %s %s %d\n", hipGetErrorString(code), file, - line); + std::string error_msg = "HIP runtime library throws an error: " + + std::string(hipGetErrorString(code)) + + ", in file " + std::string(file) + ": " + + std::to_string(line); if (abort) { - throw deepmd::deepmd_exception("HIP Assert"); + throw deepmd::deepmd_exception(error_msg); + } else { + fprintf(stderr, "%s\n", error_msg.c_str()); } } } @@ -41,10 +46,16 @@ inline void nborAssert(hipError_t code, int line, bool abort = true) { if (code != hipSuccess) { - fprintf(stderr, "hip assert: %s %s %d\n", - "DeePMD-kit:\tillegal nbor list sorting", file, line); - if (abort) { - throw deepmd::deepmd_exception("HIP Assert: illegal nbor list sorting"); + std::string error_msg = "DeePMD-kit: Illegal nbor list sorting: "; + try { + DPAssert(code, file, line, true); + } catch (deepmd::deepmd_exception &e) { + error_msg += e.what(); + if (abort) { + throw deepmd::deepmd_exception(error_msg); + } else { + fprintf(stderr, "%s\n", error_msg.c_str()); + } } } } diff --git a/source/lib/include/neighbor_stat.h b/source/lib/include/neighbor_stat.h new file mode 100644 index 0000000000..79e241f783 --- /dev/null +++ b/source/lib/include/neighbor_stat.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#include "neighbor_list.h" + +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + +namespace deepmd { +template +void neighbor_stat_gpu(const FPTYPE* coord, + const int* type, + const int nloc, + const deepmd::InputNlist& gpu_nlist, + int* max_nbor_size, + FPTYPE* min_nbor_dist, + const int ntypes, + const int MAX_NNEI); +} // namespace deepmd + +#endif diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h index 76a46bbe6c..47c3062449 100644 --- a/source/lib/include/tabulate.h +++ b/source/lib/include/tabulate.h @@ -18,6 +18,7 @@ void tabulate_fusion_se_a_cpu(FPTYPE* out, template void tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x, FPTYPE* dy_dem, + FPTYPE* dy_dtwo, const FPTYPE* table, const FPTYPE* table_info, const FPTYPE* em_x, @@ -35,8 +36,10 @@ void tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy, const FPTYPE* table_info, const FPTYPE* em_x, const FPTYPE* em, + const FPTYPE* two_embed, const FPTYPE* dz_dy_dem_x, const FPTYPE* dz_dy_dem, + const FPTYPE* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, @@ -124,6 +127,7 @@ void tabulate_fusion_se_a_gpu(FPTYPE* out, template void tabulate_fusion_se_a_grad_gpu(FPTYPE* dy_dem_x, FPTYPE* dy_dem, + FPTYPE* dy_dtwo, const FPTYPE* table, const FPTYPE* table_info, const FPTYPE* em_x, @@ -141,8 +145,10 @@ void tabulate_fusion_se_a_grad_grad_gpu(FPTYPE* dz_dy, const FPTYPE* table_info, const FPTYPE* em_x, const FPTYPE* em, + const FPTYPE* two_embed, const FPTYPE* dz_dy_dem_x, const FPTYPE* dz_dy_dem, + const FPTYPE* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, diff --git a/source/lib/src/gpu/cudart/CMakeLists.txt b/source/lib/src/gpu/cudart/CMakeLists.txt index e612ad63ed..0df3f6bda8 100644 --- a/source/lib/src/gpu/cudart/CMakeLists.txt +++ b/source/lib/src/gpu/cudart/CMakeLists.txt @@ -1,7 +1,30 @@ -add_library(deepmd_dyn_cudart SHARED cudart_stub.cc) +enable_language(C ASM) +# Python is usually installed in every Linux distribution... +find_package( + Python3 + COMPONENTS Interpreter + REQUIRED) +get_property( + CUDART_LOCATION + TARGET CUDA::cudart + PROPERTY IMPORTED_LOCATION) +execute_process( + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../3rdparty/implib/implib-gen.py + ${CUDART_LOCATION} --target ${CMAKE_SYSTEM_PROCESSOR} --dlopen-callback + DP_cudart_dlopen --dlsym-callback DP_cudart_dlsym COMMAND_ERROR_IS_FATAL ANY + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +file(GLOB CUDA_STUB_SRC ${CMAKE_CURRENT_BINARY_DIR}/*.tramp.S + ${CMAKE_CURRENT_BINARY_DIR}/*.init.c) + +add_library(deepmd_dyn_cudart SHARED cudart_stub.cc ${CUDA_STUB_SRC}) target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDAToolkit_INCLUDE_DIRS}) -set_target_properties(deepmd_dyn_cudart PROPERTIES INSTALL_RPATH - "${CUDAToolkit_LIBRARY_DIR}") +set_target_properties( + deepmd_dyn_cudart PROPERTIES INSTALL_RPATH "${CUDAToolkit_LIBRARY_DIR}" + BUILD_RPATH "${CUDAToolkit_LIBRARY_DIR}") +target_compile_definitions(deepmd_dyn_cudart PRIVATE IMPLIB_EXPORT_SHIMS) if(BUILD_CPP_IF AND NOT BUILD_PY_IF) install( TARGETS deepmd_dyn_cudart diff --git a/source/lib/src/gpu/cudart/cuda_runtime_10_0.inc b/source/lib/src/gpu/cudart/cuda_runtime_10_0.inc deleted file mode 100644 index 6810c05d67..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_10_0.inc +++ /dev/null @@ -1,1846 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3, dim3, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaConfigureCall"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, - size_t size, - size_t offset) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaSetupArgument"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arg, size, offset); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *); - static auto func_ptr = LoadSymbol("cudaLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync( - void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, cudaGraphNode_t *from, - cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, cudaGraphNode_t *from, - cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_10_1.inc b/source/lib/src/gpu/cudart/cuda_runtime_10_1.inc deleted file mode 100644 index d076cc4ac3..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_10_1.inc +++ /dev/null @@ -1,1854 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_10_2.inc b/source/lib/src/gpu/cudart/cuda_runtime_10_2.inc deleted file mode 100644 index a5a5438b0e..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_10_2.inc +++ /dev/null @@ -1,1907 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaWaitExternalSemaphoresAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_11_0.inc b/source/lib/src/gpu/cudart/cuda_runtime_11_0.inc deleted file mode 100644 index c9cd0a3769..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_11_0.inc +++ /dev/null @@ -1,2639 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -#endif // CUDA_VERSION >= 11030 - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - union cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - const union cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - const union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaSignalExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = - LoadSymbol("cudaWaitExternalSemaphoresAsync_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -#if CUDA_VERSION >= 11010 - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -#endif // CUDA_VERSION >= 11010 - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - union cudaKernelNodeAttrValue *value_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - const union cudaKernelNodeAttrValue *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - const union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -#endif // CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -#endif // CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -#if CUDA_VERSION >= 11020 - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -#endif // CUDA_VERSION >= 11020 - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_11_2.inc b/source/lib/src/gpu/cudart/cuda_runtime_11_2.inc deleted file mode 100644 index 5c0ba7fe6a..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_11_2.inc +++ /dev/null @@ -1,2259 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - union cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, enum cudaStreamAttrID attr, - const union cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamAttrID, - const union cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - union cudaKernelNodeAttrValue *value_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, enum cudaKernelNodeAttrID attr, - const union cudaKernelNodeAttrValue *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaKernelNodeAttrID, - const union cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("_CDECL cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -#if CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -#endif // CUDA_VERSION >= 11030 - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_11_8.inc b/source/lib/src/gpu/cudart/cuda_runtime_11_8.inc deleted file mode 100644 index 8000ce1f92..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_11_8.inc +++ /dev/null @@ -1,2771 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - const cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - const cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, - unsigned long long *pId) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus, pId); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo_v2( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = LoadSymbol("cudaStreamGetCaptureInfo_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchKernelExC( - const cudaLaunchConfig_t *config, const void *func, void **args) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const cudaLaunchConfig_t *, - const void *, void **); - static auto func_ptr = LoadSymbol("cudaLaunchKernelExC"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, func, args); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxPotentialClusterSize(int *clusterSize, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxPotentialClusterSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(clusterSize, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveClusters(int *numClusters, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = LoadSymbol("cudaOccupancyMaxActiveClusters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numClusters, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaArray_t, int); - static auto func_ptr = LoadSymbol("cudaArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, array, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, - cudaMipmappedArray_t mipmap, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaMipmappedArray_t, int); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, mipmap, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaGetTextureAlignmentOffset(size_t *offset, - const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject_v2( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc_v2 *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc_v2 *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc_v2( - struct cudaTextureDesc_v2 *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaTextureDesc_v2 *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectTextureDesc_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - cudaKernelNodeAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - const cudaKernelNodeAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, const cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemAllocNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - struct cudaMemAllocNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemAllocNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemAllocNodeGetParams( - cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemAllocNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, params_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemFreeNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemFreeNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphMemFreeNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dptr_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGraphMemTrim(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceGraphMemTrim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceGetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceSetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, - char *pLogBuffer, size_t bufferSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphNode_t *, char *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiateWithFlags( - cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetEnabled( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphNodeSetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - unsigned int *isEnabled) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - unsigned int *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphNode_t *hErrorNode_out, - enum cudaGraphExecUpdateResult *updateResult_out) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, cudaGraphNode_t *, - enum cudaGraphExecUpdateResult *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, hErrorNode_out, updateResult_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -extern __host__ cudaError_t CUDARTAPI_CDECL -cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaFunction_t *, const void *); - static auto func_ptr = LoadSymbol("_CDECL cudaGetFuncBySymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(functionPtr, symbolPtr); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_12_0.inc b/source/lib/src/gpu/cudart/cuda_runtime_12_0.inc deleted file mode 100644 index 343db23132..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_12_0.inc +++ /dev/null @@ -1,2676 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetTexture1DLinearMaxWidth( - size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct cudaChannelFormatDesc *, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(maxWidthInElements, fmtDesc, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( - enum cudaFlushGPUDirectRDMAWritesTarget target, - enum cudaFlushGPUDirectRDMAWritesScope scope) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, - enum cudaFlushGPUDirectRDMAWritesScope); - static auto func_ptr = - LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(target, scope); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetLimit(enum cudaLimit limit, size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties_v2"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, memPool); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( - void *nvSciSyncAttrList, int device, int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); - static auto func_ptr = - LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(nvSciSyncAttrList, device, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaInitDevice(int device, - unsigned int deviceFlags, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaInitDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, deviceFlags, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetId(cudaStream_t hStream, unsigned long long *streamId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaStreamGetId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, streamId); -} - -extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - cudaStreamAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value_out); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, - const cudaStreamAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, - const cudaStreamAttrValue *); - static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, attr, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); - static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); - static auto func_ptr = - LoadSymbol("cudaThreadExchangeStreamCaptureMode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mode); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( - cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); - static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, pCaptureStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( - cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, - unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), - const cudaGraphNode_t **dependencies_out __dv(0), - size_t *numDependencies_out __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, - cudaGraph_t *, const cudaGraphNode_t **, size_t *); - static auto func_ptr = - LoadSymbol("__CUDART_API_PTSZ(cudaStreamGetCaptureInfo_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, captureStatus_out, id_out, graph_out, - dependencies_out, numDependencies_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( - cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, - size_t, unsigned int); - static auto func_ptr = - LoadSymbol("cudaStreamUpdateCaptureDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, dependencies, numDependencies, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), - unsigned int flags __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( - cudaExternalMemory_t *extMem_out, - const struct cudaExternalMemoryHandleDesc *memHandleDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem_out, memHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( - void **devPtr, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryBufferDesc *bufferDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, - const struct cudaExternalMemoryBufferDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedBuffer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, extMem, bufferDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, - const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, cudaExternalMemory_t, - const struct cudaExternalMemoryMipmappedArrayDesc *); - static auto func_ptr = - LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmap, extMem, mipmapDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extMem); -} - -extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( - cudaExternalSemaphore_t *extSem_out, - const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreHandleDesc *); - static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem_out, semHandleDesc); -} - -extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreSignalParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreSignalParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( - const cudaExternalSemaphore_t *extSemArray, - const struct cudaExternalSemaphoreWaitParams *paramsArray, - unsigned int numExtSems, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, - const struct cudaExternalSemaphoreWaitParams *, - unsigned int, cudaStream_t); - static auto func_ptr = LoadSymbol( - "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSemArray, paramsArray, numExtSems, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); - static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(extSem); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchKernelExC( - const cudaLaunchConfig_t *config, const void *func, void **args) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const cudaLaunchConfig_t *, - const void *, void **); - static auto func_ptr = LoadSymbol("cudaLaunchKernelExC"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config, func, args); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, - cudaHostFn_t fn, - void *userData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); - static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, fn, userData); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, - const void *func, int numBlocks, - int blockSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); - static auto func_ptr = - LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxPotentialClusterSize(int *clusterSize, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxPotentialClusterSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(clusterSize, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveClusters(int *numClusters, const void *func, - const cudaLaunchConfig_t *launchConfig) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); - static auto func_ptr = LoadSymbol("cudaOccupancyMaxActiveClusters"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numClusters, func, launchConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( - cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pPlaneArray, hArray, planeIdx); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaArray_t, int); - static auto func_ptr = LoadSymbol("cudaArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, array, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetMemoryRequirements( - struct cudaArrayMemoryRequirements *memoryRequirements, - cudaMipmappedArray_t mipmap, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, - cudaMipmappedArray_t, int); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetMemoryRequirements"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memoryRequirements, mipmap, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( - struct cudaArraySparseProperties *sparseProperties, - cudaMipmappedArray_t mipmap) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, - cudaMipmappedArray_t); - static auto func_ptr = - LoadSymbol("cudaMipmappedArrayGetSparseProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(sparseProperties, mipmap); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, - size_t size, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, - cudaStream_t hStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaFreeAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, hStream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, - size_t minBytesToKeep) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, minBytesToKeep); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( - cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolSetAccess(cudaMemPool_t memPool, - const struct cudaMemAccessDesc *descList, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, descList, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, - struct cudaMemLocation *location) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); - static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags, memPool, location); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( - cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, - const struct cudaMemPoolProps *); - static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, poolProps); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolDestroy(cudaMemPool_t memPool) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); - static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( - void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, memPool, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( - void *shareableHandle, cudaMemPool_t memPool, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolExportToShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(shareableHandle, memPool, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( - cudaMemPool_t *memPool, void *shareableHandle, - enum cudaMemAllocationHandleType handleType, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); - static auto func_ptr = - LoadSymbol("cudaMemPoolImportFromShareableHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(memPool, shareableHandle, handleType, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( - struct cudaMemPoolPtrExportData *exportData, void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); - static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(exportData, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, - struct cudaMemPoolPtrExportData *exportData) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, - struct cudaMemPoolPtrExportData *); - static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, memPool, exportData); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraph, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( - cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( - cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); - static auto func_ptr = - LoadSymbol("cudaGraphKernelNodeCopyAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hSrc, hDst); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - cudaKernelNodeAttrValue *value_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( - cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, - const cudaKernelNodeAttrValue *value) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, cudaKernelNodeAttrID, const cudaKernelNodeAttrValue *); - static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemcpy3DParms *pCopyParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pCopyParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const void *, const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, - src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, - symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( - cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( - cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( - cudaGraphNode_t node, const void *symbol, const void *src, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( - cudaGraphNode_t node, void *dst, const void *symbol, size_t count, - size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, - size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaMemsetParams *pMemsetParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pMemsetParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( - cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( - cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( - cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( - cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - childGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); - static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, - size_t numDependencies, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreSignalNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresSignalNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeGetParams( - cudaGraphNode_t hNode, - struct cudaExternalSemaphoreWaitNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, params_out); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExternalSemaphoresWaitNodeSetParams( - cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemAllocNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, - struct cudaMemAllocNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, - const cudaGraphNode_t *, size_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemAllocNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, - nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphMemAllocNodeGetParams( - cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, - struct cudaMemAllocNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphMemAllocNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, params_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemFreeNode( - cudaGraphNode_t *pGraphNode, cudaGraph_t graph, - const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphAddMemFreeNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *); - static auto func_ptr = LoadSymbol("cudaGraphMemFreeNodeGetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, dptr_out); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGraphMemTrim(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceGraphMemTrim"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceGetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetGraphMemAttribute( - int device, enum cudaGraphMemAttributeType attr, void *value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); - static auto func_ptr = LoadSymbol("cudaDeviceSetGraphMemAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphClone, originalGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, - cudaGraph_t clonedGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pNode, originalNode, clonedGraph); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pType); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, - cudaGraphNode_t *nodes, - size_t *numNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, nodes, numNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( - cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, pRootNodes, pNumRootNodes); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, - cudaGraphNode_t *from, - cudaGraphNode_t *to, - size_t *numEdges) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, - cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numEdges); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( - cudaGraphNode_t node, cudaGraphNode_t *pDependencies, - size_t *pNumDependencies) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependencies, pNumDependencies); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( - cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, - size_t *pNumDependentNodes) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node, pDependentNodes, pNumDependentNodes); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, - const cudaGraphNode_t *to, size_t numDependencies) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, - const cudaGraphNode_t *, size_t); - static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, from, to, numDependencies); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphDestroyNode(cudaGraphNode_t node) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(node); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - unsigned long long flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - unsigned long long flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - unsigned long long); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphInstantiateWithParams(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, - cudaGraphInstantiateParams *instantiateParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, - cudaGraphInstantiateParams *); - static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pGraphExec, graph, instantiateParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long *flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, unsigned long long *); - static auto func_ptr = LoadSymbol("cudaGraphExecGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaKernelNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaKernelNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecKernelNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemcpy3DParms *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemcpy3DParms *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, - const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, - cudaGraphNode_t node, void *dst, - const void *symbol, size_t count, - size_t offset, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, - const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaMemsetParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaMemsetParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecMemsetNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, - const struct cudaHostNodeParams *pNodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - const struct cudaHostNodeParams *); - static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, pNodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, node, childGraph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); - static auto func_ptr = - LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresSignalNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreSignalNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecExternalSemaphoresWaitNodeSetParams( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaGraphExec_t, cudaGraphNode_t, - const struct cudaExternalSemaphoreWaitNodeParams *); - static auto func_ptr = - LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, nodeParams); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetEnabled( - cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphNodeSetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, - unsigned int *isEnabled) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, - unsigned int *); - static auto func_ptr = LoadSymbol("cudaGraphNodeGetEnabled"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hNode, isEnabled); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, - cudaGraphExecUpdateResultInfo *resultInfo) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, - cudaGraphExecUpdateResultInfo *); - static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hGraphExec, hGraph, resultInfo); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphUpload"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, - cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphExecDestroy(cudaGraphExec_t graphExec) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); - static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graphExec); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); - static auto func_ptr = LoadSymbol("cudaGraphDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( - cudaGraph_t graph, const char *path, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, path, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( - cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object_out, ptr, destroy, initialRefcount, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, - unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( - cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(graph, object, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( - const char *symbol, void **funcPtr, unsigned long long flags, - enum cudaDriverEntryPointQueryResult *driverStatus) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long, - enum cudaDriverEntryPointQueryResult *); - static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, funcPtr, flags, driverStatus); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cuda_runtime_9_0.inc b/source/lib/src/gpu/cudart/cuda_runtime_9_0.inc deleted file mode 100644 index 6753ddcf78..0000000000 --- a/source/lib/src/gpu/cudart/cuda_runtime_9_0.inc +++ /dev/null @@ -1,1421 +0,0 @@ -// Auto-generated, do not edit. - -extern "C" { - -extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceReset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); - static auto func_ptr = - LoadSymbol("cudaDeviceGetStreamPriorityRange"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(leastPriority, greatestPriority); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); - static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(config); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); - static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, pciBusId); -} - -extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, - int len, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pciBusId, len, device); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, event); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); - static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, handle); -} - -extern __host__ cudaError_t CUDARTAPI -cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); - static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(handle, devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( - void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, handle, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadExit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadSetLimit(enum cudaLimit limit, - size_t value) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); - static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(limit, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaThreadGetLimit(size_t *pValue, - enum cudaLimit limit) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); - static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pValue, limit); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); - static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pCacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(cacheConfig); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaGetLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaPeekAtLastError(void) { - using FuncPtr = cudaError_t(CUDARTAPI *)(); - static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorName(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorName"); - if (!func_ptr) return "cudaGetErrorName symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ const char *CUDARTAPI -cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); - static auto func_ptr = LoadSymbol("cudaGetErrorString"); - if (!func_ptr) return "cudaGetErrorString symbol not found."; - return func_ptr(error); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); - static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(prop, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(value, attr, srcDevice, dstDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); - static auto func_ptr = LoadSymbol("cudaChooseDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device, prop); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaSetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaGetDevice(int *device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaGetDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, - int len) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); - static auto func_ptr = LoadSymbol("cudaSetValidDevices"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(device_arr, len); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); - static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); - static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); - static auto func_ptr = LoadSymbol("cudaStreamCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, - int priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); - static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pStream, flags, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetPriority(cudaStream_t hStream, int *priority) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); - static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, priority); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); - static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(hStream, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamDestroy(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( - cudaStream_t stream, cudaEvent_t event, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, event, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, - void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, callback, userData, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaStreamSynchronize(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); - static auto func_ptr = LoadSymbol("cudaStreamQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, - size_t length __dv(0), - unsigned int flags __dv(cudaMemAttachSingle)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(stream, devPtr, length, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); - static auto func_ptr = LoadSymbol("cudaEventCreate"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); - static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaEventRecord"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventQuery"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventSynchronize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaEventDestroy(cudaEvent_t event) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventDestroy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(event); -} - -extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, - cudaEvent_t start, - cudaEvent_t end) { - using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); - static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ms, start, end); -} - -extern __host__ cudaError_t CUDARTAPI -cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( - const void *func, dim3 gridDim, dim3 blockDim, void **args, - size_t sharedMem, cudaStream_t stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( - struct cudaLaunchParams *launchParamsList, unsigned int numDevices, - unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, - unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(launchParamsList, numDevices, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); - static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, cacheConfig); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); - static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, config); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attr, func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); - static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func, attr, value); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForDevice(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForHost(double *d) { - using FuncPtr = cudaError_t(CUDARTAPI *)(double *); - static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(d); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, - int blockSize, - size_t dynamicSMemSize) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); - static auto func_ptr = - LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, - const void *func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); - static auto func_ptr = LoadSymbol( - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3, dim3, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaConfigureCall"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, - size_t size, - size_t offset) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaSetupArgument"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(arg, size, offset); -} - -extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *); - static auto func_ptr = LoadSymbol("cudaLaunch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(func); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( - void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocManaged"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMalloc(void **devPtr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMalloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); - static auto func_ptr = LoadSymbol("cudaMallocHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, - size_t *pitch, - size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMallocPitch"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocArray( - cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, - size_t height __dv(0), unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - size_t, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, width, height, flags); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaFree(void *devPtr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFree"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaFreeHost"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); - static auto func_ptr = LoadSymbol("cudaFreeArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array); -} - -extern __host__ cudaError_t CUDARTAPI -cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); - static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostAlloc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pHost, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, - unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostRegister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr, size, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *); - static auto func_ptr = LoadSymbol("cudaHostUnregister"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); - static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pDevice, pHost, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, - void *pHost) { - using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); - static auto func_ptr = LoadSymbol("cudaHostGetFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pFlags, pHost); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMalloc3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, extent); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, - struct cudaExtent extent, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, - const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int); - static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, desc, extent, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, - const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, - unsigned int numLevels, unsigned int flags __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, - struct cudaExtent, unsigned int, unsigned int); - static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, desc, extent, numLevels, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( - cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, - unsigned int level) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(levelArray, mipmappedArray, level); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( - const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( - const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(p, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, - size_t *total) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); - static auto func_ptr = LoadSymbol("cudaMemGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(free, total); -} - -extern __host__ cudaError_t CUDARTAPI -cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, - unsigned int *flags, cudaArray_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - struct cudaExtent *, unsigned int *, - cudaArray_t); - static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, extent, flags, array); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, - size_t count, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, - const void *src, - int srcDevice, - size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, - const void *src, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t count, enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - count, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, - const void *src, - size_t spitch, size_t width, - size_t height, - enum cudaMemcpyKind kind) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, - size_t, size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( - cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, - cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, - size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - cudaArray_const_t, size_t, size_t, - size_t, size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, - width, height, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( - const void *symbol, const void *src, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, - size_t, enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( - void *dst, const void *symbol, size_t count, size_t offset __dv(0), - enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemcpyAsync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, - size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, - size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dstDevice, src, srcDevice, count, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync( - void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, - size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( - void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, - size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, - size_t, enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( - cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, - size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, - const void *, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( - void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, - size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, - cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, - size_t, size_t, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, - stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( - const void *symbol, const void *src, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(symbol, src, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( - void *dst, const void *symbol, size_t count, size_t offset, - enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, - enum cudaMemcpyKind, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(dst, symbol, count, offset, kind, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, - size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); - static auto func_ptr = LoadSymbol("cudaMemset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, - int value, size_t width, - size_t height) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaMemset2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemset3D( - struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); - static auto func_ptr = LoadSymbol("cudaMemset3D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( - void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemsetAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, value, count, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, - size_t height, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, - cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, pitch, value, width, height, stream); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, - struct cudaExtent extent, cudaStream_t stream __dv(0)) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, - struct cudaExtent, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pitchedDevPtr, value, extent, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, - const void *symbol) { - using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); - static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(size, symbol); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, - cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, dstDevice, stream); -} - -extern __host__ cudaError_t CUDARTAPI -cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, - int device) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, - enum cudaMemoryAdvise, int); - static auto func_ptr = LoadSymbol("cudaMemAdvise"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, count, advice, device); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( - void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, - const void *devPtr, size_t count) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSize, attribute, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( - void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, - size_t numAttributes, const void *devPtr, size_t count) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, - size_t, const void *, size_t); - static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); -} - -extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); - static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(attributes, ptr); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); - static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(canAccessPeer, device, peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); - static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice, flags); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDeviceDisablePeerAccess(int peerDevice) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int); - static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(peerDevice); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( - cudaGraphicsResource_t resource, unsigned int flags) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); - static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(resource, flags); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( - int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); - static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(count, resources, stream); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( - void **devPtr, size_t *size, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedPointer"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(devPtr, size, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( - cudaArray_t *array, cudaGraphicsResource_t resource, - unsigned int arrayIndex, unsigned int mipLevel) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); - static auto func_ptr = - LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(array, resource, arrayIndex, mipLevel); -} - -extern __host__ cudaError_t CUDARTAPI -cudaGraphicsResourceGetMappedMipmappedArray( - cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); - static auto func_ptr = - LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(mipmappedArray, resource); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( - struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, - cudaArray_const_t); - static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(desc, array); -} - -extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) { - using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( - int, int, int, int, enum cudaChannelFormatKind); - static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); - if (!func_ptr) { - return cudaChannelFormatDesc{cudaChannelFormatKind(-1), 0, 0, 0}; - } - return func_ptr(x, y, z, w, f); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTexture( - size_t *offset, const struct textureReference *texref, const void *devPtr, - const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, size); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTexture2D(size_t *offset, const struct textureReference *texref, - const void *devPtr, const struct cudaChannelFormatDesc *desc, - size_t width, size_t height, size_t pitch) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - size_t *, const struct textureReference *, const void *, - const struct cudaChannelFormatDesc *, size_t, size_t, size_t); - static auto func_ptr = LoadSymbol("cudaBindTexture2D"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref, devPtr, desc, width, height, pitch); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( - const struct textureReference *texref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaBindTextureToMipmappedArray(const struct textureReference *texref, - cudaMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct textureReference *, cudaMipmappedArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, mipmappedArray, desc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaUnbindTexture(const struct textureReference *texref) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaUnbindTexture"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( - size_t *offset, const struct textureReference *texref) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); - static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(offset, texref); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( - const struct textureReference **texref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetTextureReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( - const struct surfaceReference *surfref, cudaArray_const_t array, - const struct cudaChannelFormatDesc *desc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - const struct surfaceReference *, cudaArray_const_t, - const struct cudaChannelFormatDesc *); - static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, array, desc); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( - const struct surfaceReference **surfref, const void *symbol) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); - static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfref, symbol); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( - cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, - const struct cudaTextureDesc *pTexDesc, - const struct cudaResourceViewDesc *pResViewDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)( - cudaTextureObject_t *, const struct cudaResourceDesc *, - const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); - static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroyTextureObject(cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( - struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); - static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pTexDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( - struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, - cudaTextureObject_t); - static auto func_ptr = - LoadSymbol("cudaGetTextureObjectResourceViewDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResViewDesc, texObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( - cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, - const struct cudaResourceDesc *); - static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pSurfObject, pResDesc); -} - -extern __host__ cudaError_t CUDARTAPI -cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { - using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); - static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( - struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { - using FuncPtr = - cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); - static auto func_ptr = - LoadSymbol("cudaGetSurfaceObjectResourceDesc"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(pResDesc, surfObject); -} - -extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(driverVersion); -} - -extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI -cudaRuntimeGetVersion(int *runtimeVersion) { - using FuncPtr = cudaError_t(CUDARTAPI *)(int *); - static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(runtimeVersion); -} - -extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( - const void **ppExportTable, const cudaUUID_t *pExportTableId) { - using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); - static auto func_ptr = LoadSymbol("cudaGetExportTable"); - if (!func_ptr) return GetSymbolNotFoundError(); - return func_ptr(ppExportTable, pExportTableId); -} - -} // extern "C" diff --git a/source/lib/src/gpu/cudart/cudart_stub.cc b/source/lib/src/gpu/cudart/cudart_stub.cc index 7ad8529189..8083a0a89d 100644 --- a/source/lib/src/gpu/cudart/cudart_stub.cc +++ b/source/lib/src/gpu/cudart/cudart_stub.cc @@ -10,18 +10,14 @@ #include "cuda_runtime_api.h" -// wraps cuda runtime with dso loader +extern "C" { -namespace { -void *GetDsoHandle() { - static auto handle = []() -> void * { -#if defined(__gnu_linux__) - std::string libname = "libcudart.so"; -#elif defined(__APPLE__) - std::string libname = "libcudart.dylib"; -#elif defined(_WIN32) - std::string libname = "cudart.dll"; -#endif +static cudaError_t DP_CudartGetSymbolNotFoundError() { + return cudaErrorSharedObjectSymbolNotFound; +} + +void *DP_cudart_dlopen(char *libname) { + static auto handle = [](std::string libname) -> void * { #if defined(_WIN32) void *dso_handle = LoadLibrary(libname.c_str()); #else @@ -33,164 +29,20 @@ void *GetDsoHandle() { } std::cerr << "DeePMD-kit: Successfully load " << libname << std::endl; return dso_handle; - }(); + }(std::string(libname)); return handle; } -template -T LoadSymbol(const char *symbol_name) { - void *symbol = nullptr; - void *handle = GetDsoHandle(); - if (handle) { - symbol = dlsym(handle, symbol_name); - } - return reinterpret_cast(symbol); -} - -// the following is copied from TensorFlow -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -cudaError_t GetSymbolNotFoundError() { - return cudaErrorSharedObjectSymbolNotFound; -} -} // namespace - -#define __dv(v) -#define __CUDA_DEPRECATED -// CUDART_VERSION is defined in cuda_runtime_api.h -#if CUDART_VERSION < 10000 -#include "cuda_runtime_9_0.inc" -#elif CUDART_VERSION < 10010 -#include "cuda_runtime_10_0.inc" -#elif CUDART_VERSION < 10020 -#include "cuda_runtime_10_1.inc" -#elif CUDART_VERSION < 11000 -#include "cuda_runtime_10_2.inc" -#elif CUDART_VERSION < 11020 -#include "cuda_runtime_11_0.inc" -#elif CUDART_VERSION < 11080 -#include "cuda_runtime_11_2.inc" -#elif CUDART_VERSION < 12000 -#include "cuda_runtime_11_8.inc" -#else -#include "cuda_runtime_12_0.inc" -#endif -#undef __dv -#undef __CUDA_DEPRECATED - -extern "C" { - -// Following are private symbols in libcudart that got inserted by nvcc. -extern void CUDARTAPI __cudaRegisterFunction(void **fatCubinHandle, - const char *hostFun, - char *deviceFun, - const char *deviceName, - int thread_limit, - uint3 *tid, - uint3 *bid, - dim3 *bDim, - dim3 *gDim, - int *wSize) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle, const char *hostFun, - char *deviceFun, const char *deviceName, - int thread_limit, uint3 *tid, uint3 *bid, - dim3 *bDim, dim3 *gDim, int *wSize); - static auto func_ptr = LoadSymbol("__cudaRegisterFunction"); - if (!func_ptr) { - return; +void *DP_cudart_dlsym(void *handle, const char *sym_name) { + // check if the handle is nullptr, if so, return a function that + // returns cudaErrorSharedObjectSymbolNotFound + if (!handle) { + return reinterpret_cast(&DP_CudartGetSymbolNotFoundError); } - func_ptr(fatCubinHandle, hostFun, deviceFun, deviceName, thread_limit, tid, - bid, bDim, gDim, wSize); -} - -extern void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaUnregisterFatBinary"); - if (!func_ptr) { - return; + void *symbol = dlsym(handle, sym_name); + if (!symbol) { + return reinterpret_cast(&DP_CudartGetSymbolNotFoundError); } - func_ptr(fatCubinHandle); -} - -extern void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle, - char *hostVar, - char *deviceAddress, - const char *deviceName, - int ext, - size_t size, - int constant, - int global) { - using FuncPtr = void(CUDARTAPI *)( - void **fatCubinHandle, char *hostVar, char *deviceAddress, - const char *deviceName, int ext, size_t size, int constant, int global); - static auto func_ptr = LoadSymbol("__cudaRegisterVar"); - if (!func_ptr) { - return; - } - func_ptr(fatCubinHandle, hostVar, deviceAddress, deviceName, ext, size, - constant, global); -} - -extern void **CUDARTAPI __cudaRegisterFatBinary(void *fatCubin) { - using FuncPtr = void **(CUDARTAPI *)(void *fatCubin); - static auto func_ptr = LoadSymbol("__cudaRegisterFatBinary"); - if (!func_ptr) { - return nullptr; - } - return (void **)func_ptr(fatCubin); -} - -extern cudaError_t CUDARTAPI __cudaPopCallConfiguration(dim3 *gridDim, - dim3 *blockDim, - size_t *sharedMem, - void *stream) { - using FuncPtr = cudaError_t(CUDARTAPI *)(dim3 * gridDim, dim3 * blockDim, - size_t * sharedMem, void *stream); - static auto func_ptr = LoadSymbol("__cudaPopCallConfiguration"); - if (!func_ptr) { - return GetSymbolNotFoundError(); - } - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern __host__ __device__ unsigned CUDARTAPI __cudaPushCallConfiguration( - dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) { - using FuncPtr = unsigned(CUDARTAPI *)(dim3 gridDim, dim3 blockDim, - size_t sharedMem, void *stream); - static auto func_ptr = LoadSymbol("__cudaPushCallConfiguration"); - if (!func_ptr) { - return 0; - } - return func_ptr(gridDim, blockDim, sharedMem, stream); -} - -extern char CUDARTAPI __cudaInitModule(void **fatCubinHandle) { - using FuncPtr = char(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaInitModule"); - if (!func_ptr) { - return 0; - } - return func_ptr(fatCubinHandle); -} - -#if CUDART_VERSION >= 10010 -extern void CUDARTAPI __cudaRegisterFatBinaryEnd(void **fatCubinHandle) { - using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle); - static auto func_ptr = LoadSymbol("__cudaRegisterFatBinaryEnd"); - if (!func_ptr) { - return; - } - func_ptr(fatCubinHandle); -} -#endif + return symbol; +}; } diff --git a/source/lib/src/gpu/neighbor_stat.cu b/source/lib/src/gpu/neighbor_stat.cu new file mode 100644 index 0000000000..ef7d3c5f8a --- /dev/null +++ b/source/lib/src/gpu/neighbor_stat.cu @@ -0,0 +1,103 @@ +#include + +#include "device.h" +#include "neighbor_list.h" + +template +__global__ void neighbor_stat_g(const FPTYPE* coord, + const int* type, + const int nloc, + const int* ilist, + int** firstneigh, + const int* numneigh, + int* max_nbor_size, + FPTYPE* min_nbor_dist, + const int ntypes, + const int MAX_NNEI) { + int ithread = blockIdx.x * blockDim.x + threadIdx.x; + int ii = ithread / MAX_NNEI; + int jj = ithread % MAX_NNEI; + // assume the same block has the same ii + __shared__ int cache[TPB]; + cache[threadIdx.x] = 0; + if (ii >= nloc) { + return; + } + int idx_i = ilist[ii]; + if (type[idx_i] < 0) { + // set all to 10000 + min_nbor_dist[ii * MAX_NNEI + jj] = INFINITY; + return; // virtual atom + } + if (jj < numneigh[ii]) { + int idx_j = firstneigh[ii][jj]; + int type_j = type[idx_j]; + if (type_j < 0) { + min_nbor_dist[ii * MAX_NNEI + jj] = INFINITY; + return; // virtual atom + } + __syncthreads(); + FPTYPE rij[3] = {coord[idx_j * 3 + 0] - coord[idx_i * 3 + 0], + coord[idx_j * 3 + 1] - coord[idx_i * 3 + 1], + coord[idx_j * 3 + 2] - coord[idx_i * 3 + 2]}; + // we do not need to use the real index + // we do not need to do slow sqrt for every dist; instead do sqrt in the + // final + min_nbor_dist[ii * MAX_NNEI + jj] = + rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]; + + // atomicAdd(max_nbor_size + ii * ntypes + type_j, 1); + // See https://www.cnblogs.com/neopenx/p/4705320.html + atomicAdd(&cache[type_j], 1); + __syncthreads(); + if (threadIdx.x < ntypes) { + atomicAdd(&max_nbor_size[ii * ntypes + threadIdx.x], cache[threadIdx.x]); + } + } else { + // set others to 10000 + min_nbor_dist[ii * MAX_NNEI + jj] = INFINITY; + } +} + +namespace deepmd { + +template +void neighbor_stat_gpu(const FPTYPE* coord, + const int* type, + const int nloc, + const deepmd::InputNlist& gpu_nlist, + int* max_nbor_size, + FPTYPE* min_nbor_dist, + const int ntypes, + const int MAX_NNEI) { + DPErrcheck(gpuGetLastError()); + DPErrcheck(gpuDeviceSynchronize()); + + DPErrcheck(gpuMemset(max_nbor_size, 0, sizeof(int) * int_64(nloc) * ntypes)); + const int nblock_loc = (nloc * MAX_NNEI + TPB - 1) / TPB; + neighbor_stat_g<<>>( + coord, type, nloc, gpu_nlist.ilist, gpu_nlist.firstneigh, + gpu_nlist.numneigh, max_nbor_size, min_nbor_dist, ntypes, MAX_NNEI); + + DPErrcheck(gpuGetLastError()); + DPErrcheck(gpuDeviceSynchronize()); +} + +template void neighbor_stat_gpu(const float* coord, + const int* type, + const int nloc, + const deepmd::InputNlist& gpu_nlist, + int* max_nbor_size, + float* min_nbor_dist, + const int ntypes, + const int MAX_NNEI); + +template void neighbor_stat_gpu(const double* coord, + const int* type, + const int nloc, + const deepmd::InputNlist& gpu_nlist, + int* max_nbor_size, + double* min_nbor_dist, + const int ntypes, + const int MAX_NNEI); +} // namespace deepmd diff --git a/source/lib/src/gpu/tabulate.cu b/source/lib/src/gpu/tabulate.cu index f424006940..a22742ae19 100644 --- a/source/lib/src/gpu/tabulate.cu +++ b/source/lib/src/gpu/tabulate.cu @@ -253,6 +253,7 @@ template __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial( FPTYPE* dy_dem_x, FPTYPE* dy_dem, + FPTYPE* dy_dtwo, const FPTYPE* table, const FPTYPE* em_x, const FPTYPE* em, @@ -307,6 +308,7 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial( (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx; + FPTYPE oldres = res; FPTYPE t; if (enable_se_atten) { t = two_embed[block_idx * nnei * last_layer_size + @@ -330,6 +332,13 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial( xx) * xx) * (enable_se_atten ? res * t + res : res); + if (enable_se_atten) { + // from ii to ii + (nnei - breakpoint) + for (int ii2 = ii; ii2 < ii + nnei - breakpoint; ii2++) { + dy_dtwo[block_idx * nnei * last_layer_size + ii2 * last_layer_size + + jj] = oldres * res; + } + } } GpuSyncThreads(); for (int kk = 0; kk < MTILE; kk++) { @@ -354,8 +363,10 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial( const FPTYPE* table, const FPTYPE* em_x, const FPTYPE* em, + const FPTYPE* two_embed, const FPTYPE* dz_dy_dem_x, const FPTYPE* dz_dy_dem, + const FPTYPE* dz_dy_dtwo, const FPTYPE lower, const FPTYPE upper, const FPTYPE max, @@ -364,6 +375,7 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial( const int nnei, const int last_layer_size, const bool is_sorted) { + bool enable_se_atten = two_embed != nullptr; GPU_DYNAMIC_SHARED_MEM_DECL(int, _data); const int_64 block_idx = blockIdx.x; // nloc const int thread_idx = threadIdx.x; // last_layer_size @@ -402,12 +414,44 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial( ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx; + FPTYPE two_grad = 0.; + if (enable_se_atten) { + FPTYPE t = two_embed[block_idx * nnei * last_layer_size + + ii * last_layer_size + thread_idx]; + // dz_dy_dtwo * res * em + // res above should be used instead of res + res * t below + two_grad = dz_dy_dtwo[block_idx * nnei * last_layer_size + + ii * last_layer_size + thread_idx] * + res; + res += res * t; + res_grad += res_grad * t; + } + /* + * `dz_dy`(or `iteratorC`) represents the derivative of the variable `out` + * in the function `tabulate_fusion_se_a_fifth_order_polynomial`. + * + * The expression `em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * + * res` utilizes the product rule of derivatives: `(f * g)' = f' * g + f * + * g'`. + * + * This expression can be alternatively expressed as: + * `dz_dy_dem[em_index] * res + em[em_index] * (res_grad * dz_xx)`. + * Note that we can refer to `dz_dy_dem` as `em'` + * + * Therefore, we can rewrite this expression as: `em' * res + em * res'`, + * where `em'` is the derivative of `em` and `res'` is the derivative of + * `res`. Additionally, `res'` can be further represented as: `res_grad * + * dz_xx`. + * + * If `enable_se_atten` is true, `res` will be `res * t + res`, and `res'` + * will become `(res_grad * t + res_grad) * dz_xx`. + */ for (int kk = 0; kk < MTILE; kk++) { int em_index = block_idx * nnei * MTILE + ii * MTILE + kk; iteratorC[kk * last_layer_size + thread_idx] += - (nnei - breakpoint) * - (em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * res); + (nnei - breakpoint) * (em[em_index] * (res_grad * dz_xx + two_grad) + + dz_dy_dem[em_index] * res); } mark_table_idx = table_idx; if (unloop) { @@ -736,6 +780,7 @@ void tabulate_fusion_se_a_gpu(FPTYPE* out, template void tabulate_fusion_se_a_grad_gpu(FPTYPE* dy_dem_x, FPTYPE* dy_dem, + FPTYPE* dy_dtwo, const FPTYPE* table, const FPTYPE* table_info, const FPTYPE* em_x, @@ -756,9 +801,9 @@ void tabulate_fusion_se_a_grad_gpu(FPTYPE* dy_dem_x, tabulate_fusion_se_a_grad_fifth_order_polynomial <<>>( - dy_dem_x, dy_dem, table, em_x, em, two_embed, dy, table_info[0], - table_info[1], table_info[2], table_info[3], table_info[4], nnei, - last_layer_size, is_sorted); + dy_dem_x, dy_dem, dy_dtwo, table, em_x, em, two_embed, dy, + table_info[0], table_info[1], table_info[2], table_info[3], + table_info[4], nnei, last_layer_size, is_sorted); DPErrcheck(gpuGetLastError()); DPErrcheck(gpuDeviceSynchronize()); } @@ -769,8 +814,10 @@ void tabulate_fusion_se_a_grad_grad_gpu(FPTYPE* dz_dy, const FPTYPE* table_info, const FPTYPE* em_x, const FPTYPE* em, + const FPTYPE* two_embed, const FPTYPE* dz_dy_dem_x, const FPTYPE* dz_dy_dem, + const FPTYPE* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, @@ -783,9 +830,9 @@ void tabulate_fusion_se_a_grad_grad_gpu(FPTYPE* dz_dy, DPErrcheck(gpuMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size)); tabulate_fusion_se_a_grad_grad_fifth_order_polynomial <<>>( - dz_dy, table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], - table_info[1], table_info[2], table_info[3], table_info[4], nnei, - last_layer_size, is_sorted); + dz_dy, table, em_x, em, two_embed, dz_dy_dem_x, dz_dy_dem, dz_dy_dtwo, + table_info[0], table_info[1], table_info[2], table_info[3], + table_info[4], nnei, last_layer_size, is_sorted); DPErrcheck(gpuGetLastError()); DPErrcheck(gpuDeviceSynchronize()); } @@ -961,6 +1008,7 @@ template void tabulate_fusion_se_a_gpu(double* out, const bool is_sorted); template void tabulate_fusion_se_a_grad_gpu(float* dy_dem_x, float* dy_dem, + float* dy_dtwo, const float* table, const float* table_info, const float* em_x, @@ -973,6 +1021,7 @@ template void tabulate_fusion_se_a_grad_gpu(float* dy_dem_x, const bool is_sorted); template void tabulate_fusion_se_a_grad_gpu(double* dy_dem_x, double* dy_dem, + double* dy_dtwo, const double* table, const double* table_info, const double* em_x, @@ -989,8 +1038,10 @@ template void tabulate_fusion_se_a_grad_grad_gpu( const float* table_info, const float* em_x, const float* em, + const float* two_embed, const float* dz_dy_dem_x, const float* dz_dy_dem, + const float* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, @@ -1001,8 +1052,10 @@ template void tabulate_fusion_se_a_grad_grad_gpu( const double* table_info, const double* em_x, const double* em, + const double* two_embed, const double* dz_dy_dem_x, const double* dz_dy_dem, + const double* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc index 1cafd36ee2..3e2a1bec62 100644 --- a/source/lib/src/tabulate.cc +++ b/source/lib/src/tabulate.cc @@ -158,6 +158,7 @@ void deepmd::tabulate_fusion_se_a_cpu(FPTYPE* out, template void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x, FPTYPE* dy_dem, + FPTYPE* dy_dtwo, const FPTYPE* table, const FPTYPE* table_info, const FPTYPE* em_x, @@ -171,6 +172,9 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x, bool enable_se_atten = two_embed != nullptr; memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei); memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4); + if (enable_se_atten) { + memset(dy_dtwo, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size); + } FPTYPE const lower = table_info[0]; FPTYPE const upper = table_info[1]; FPTYPE const _max = table_info[2]; @@ -212,6 +216,7 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x, a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx; FPTYPE g = (a1 + (2 * a2 + (3 * a3 + (4 * a4 + 5 * a5 * xx) * xx) * xx) * xx); + FPTYPE resold = res; if (enable_se_atten) { FPTYPE t = two_embed[ii * nnei * last_layer_size + jj * last_layer_size + kk]; @@ -219,18 +224,30 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x, g += t * g; } + FPTYPE dotllrr = dot(ll, rr); if (unloop) { - grad += g * dot(ll, rr) * (nnei - jj); + grad += g * dotllrr * (nnei - jj); dy_dem[ii * nnei * 4 + jj * 4 + 0] += res * rr[0] * (nnei - jj); dy_dem[ii * nnei * 4 + jj * 4 + 1] += res * rr[1] * (nnei - jj); dy_dem[ii * nnei * 4 + jj * 4 + 2] += res * rr[2] * (nnei - jj); dy_dem[ii * nnei * 4 + jj * 4 + 3] += res * rr[3] * (nnei - jj); + if (enable_se_atten) { + // fill from jj to nnei + for (int jj2 = jj; jj2 < nnei; jj2++) { + dy_dtwo[ii * nnei * last_layer_size + jj2 * last_layer_size + + kk] += resold * dotllrr; + } + } } else { - grad += g * dot(ll, rr); + grad += g * dotllrr; dy_dem[ii * nnei * 4 + jj * 4 + 0] += res * rr[0]; dy_dem[ii * nnei * 4 + jj * 4 + 1] += res * rr[1]; dy_dem[ii * nnei * 4 + jj * 4 + 2] += res * rr[2]; dy_dem[ii * nnei * 4 + jj * 4 + 3] += res * rr[3]; + if (enable_se_atten) { + dy_dtwo[ii * nnei * last_layer_size + jj * last_layer_size + kk] += + resold * dotllrr; + } } } dy_dem_x[ii * nnei + jj] = grad; @@ -247,12 +264,15 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy, const FPTYPE* table_info, const FPTYPE* em_x, const FPTYPE* em, + const FPTYPE* two_embed, const FPTYPE* dz_dy_dem_x, const FPTYPE* dz_dy_dem, + const FPTYPE* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, const bool is_sorted) { + bool enable_se_atten = two_embed != nullptr; memset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size); const FPTYPE lower = table_info[0]; const FPTYPE upper = table_info[1]; @@ -298,24 +318,61 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy, ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx; + FPTYPE two_grad = 0.; + if (enable_se_atten) { + FPTYPE t = two_embed[ii * nnei * last_layer_size + + jj * last_layer_size + kk]; + // dz_dy_dtwo * var * ll + // var above should be used instead of var + var * t below + two_grad = dz_dy_dtwo[ii * nnei * last_layer_size + + jj * last_layer_size + kk] * + var; + var += var * t; + var_grad += var_grad * t; + } + + /* + * `dz_dy` represents the derivative of the variable `out` in the + * function `deepmd::tabulate_fusion_se_a_cpu`. + * + * The expression `var * hh[0] + dz_xx * var_grad * ll[0]` utilizes the + * product rule of derivatives: `(f * g)' = f' * g + f * g'`. + * + * This expression can be alternatively expressed as: + * `hh[0] * var + ll[0] * (dz_xx * var_grad)`. + * Note that `hh[0]` is one element of `em`, and `ll[0]` is one element + * of `dz_dy_dem` which is `em'`. + * + * Therefore, we can rewrite this expression as: `em' * var + em * + * var'`, where `em'` is the derivative of `em` and `var'` is the + * derivative of `var`. Additionally, `var'` can be further represented + * as: `var_grad * dz_xx`. + * + * If `enable_se_atten` is true, `var` will be `var * t + var`, and + * `var'` will be `(var_grad * t + var_grad) * dz_xx`. + */ if (unloop) { dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += - (nnei - jj) * (var * hh[0] + dz_xx * var_grad * ll[0]); + (nnei - jj) * + (var * hh[0] + (dz_xx * var_grad + two_grad) * ll[0]); dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += - (nnei - jj) * (var * hh[1] + dz_xx * var_grad * ll[1]); + (nnei - jj) * + (var * hh[1] + (dz_xx * var_grad + two_grad) * ll[1]); dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += - (nnei - jj) * (var * hh[2] + dz_xx * var_grad * ll[2]); + (nnei - jj) * + (var * hh[2] + (dz_xx * var_grad + two_grad) * ll[2]); dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += - (nnei - jj) * (var * hh[3] + dz_xx * var_grad * ll[3]); + (nnei - jj) * + (var * hh[3] + (dz_xx * var_grad + two_grad) * ll[3]); } else { dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += - var * hh[0] + dz_xx * var_grad * ll[0]; + var * hh[0] + (dz_xx * var_grad + two_grad) * ll[0]; dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += - var * hh[1] + dz_xx * var_grad * ll[1]; + var * hh[1] + (dz_xx * var_grad + two_grad) * ll[1]; dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += - var * hh[2] + dz_xx * var_grad * ll[2]; + var * hh[2] + (dz_xx * var_grad + two_grad) * ll[2]; dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += - var * hh[3] + dz_xx * var_grad * ll[3]; + var * hh[3] + (dz_xx * var_grad + two_grad) * ll[3]; } } if (unloop) { @@ -631,6 +688,7 @@ template void deepmd::tabulate_fusion_se_a_cpu( template void deepmd::tabulate_fusion_se_a_grad_cpu( float* dy_dem_x, float* dy_dem, + float* dy_dtwo, const float* table, const float* table_info, const float* em_x, @@ -644,6 +702,7 @@ template void deepmd::tabulate_fusion_se_a_grad_cpu( template void deepmd::tabulate_fusion_se_a_grad_cpu( double* dy_dem_x, double* dy_dem, + double* dy_dtwo, const double* table, const double* table_info, const double* em_x, @@ -660,8 +719,10 @@ template void deepmd::tabulate_fusion_se_a_grad_grad_cpu( const float* table_info, const float* em_x, const float* em, + const float* two_embed, const float* dz_dy_dem_x, const float* dz_dy_dem, + const float* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, @@ -672,8 +733,10 @@ template void deepmd::tabulate_fusion_se_a_grad_grad_cpu( const double* table_info, const double* em_x, const double* em, + const double* two_embed, const double* dz_dy_dem_x, const double* dz_dy_dem, + const double* dz_dy_dtwo, const int nloc, const int nnei, const int last_layer_size, diff --git a/source/lib/tests/test_tabulate_se_a.cc b/source/lib/tests/test_tabulate_se_a.cc index fc0fd04980..ce2defb22c 100644 --- a/source/lib/tests/test_tabulate_se_a.cc +++ b/source/lib/tests/test_tabulate_se_a.cc @@ -726,9 +726,10 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_cpu) { std::vector dy_dem_x(em_x.size()); std::vector dy_dem(em.size()); std::vector dy(nloc * nnei * last_layer_size, 1.0); + std::vector dy_dtwo(nloc * nnei * last_layer_size); deepmd::tabulate_fusion_se_a_grad_cpu( - &dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], nullptr, - &dy[0], nloc, nnei, last_layer_size); + &dy_dem_x[0], &dy_dem[0], &dy_dtwo[0], &table[0], &info[0], &em_x[0], + &em[0], nullptr, &dy[0], nloc, nnei, last_layer_size); EXPECT_EQ(dy_dem_x.size(), nloc * nnei); EXPECT_EQ(dy_dem.size(), nloc * nnei * 4); EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size()); @@ -741,8 +742,8 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_cpu) { } deepmd::tabulate_fusion_se_a_grad_cpu( - &dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], - &two_embed[0], &dy[0], nloc, nnei, last_layer_size); + &dy_dem_x[0], &dy_dem[0], &dy_dtwo[0], &table[0], &info[0], &em_x[0], + &em[0], &two_embed[0], &dy[0], nloc, nnei, last_layer_size); EXPECT_EQ(dy_dem_x.size(), nloc * nnei); EXPECT_EQ(dy_dem.size(), nloc * nnei * 4); EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size()); @@ -802,9 +803,11 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu) { std::vector dy_dem_x(em_x.size(), 0.0); std::vector dy_dem(em.size(), 0.0); std::vector dy(nloc * nnei * last_layer_size, 1.0); + std::vector dy_dtwo(nloc * nnei * last_layer_size, 0.0); double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL, - *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL; + *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL, + *dy_dtwo_dev = nullptr; deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x); deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); deepmd::malloc_device_memory_sync(table_dev, table); @@ -812,8 +815,8 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu) { deepmd::malloc_device_memory_sync(em_dev, em); deepmd::malloc_device_memory_sync(dy_dev, dy); deepmd::tabulate_fusion_se_a_grad_gpu( - dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, nullptr, - dy_dev, nloc, nnei, last_layer_size); + dy_dem_x_dev, dy_dem_dev, dy_dtwo_dev, table_dev, &info[0], em_x_dev, + em_dev, nullptr, dy_dev, nloc, nnei, last_layer_size); deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x); deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); @@ -832,9 +835,10 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu) { deepmd::malloc_device_memory_sync(two_embed_dev, two_embed); deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x); deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem); + deepmd::malloc_device_memory_sync(dy_dtwo_dev, dy_dtwo); deepmd::tabulate_fusion_se_a_grad_gpu( - dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, - two_embed_dev, dy_dev, nloc, nnei, last_layer_size); + dy_dem_x_dev, dy_dem_dev, dy_dtwo_dev, table_dev, &info[0], em_x_dev, + em_dev, two_embed_dev, dy_dev, nloc, nnei, last_layer_size); deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x); deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem); for (int jj = 0; jj < dy_dem_x.size(); ++jj) { diff --git a/source/lmp/builtin.cmake b/source/lmp/builtin.cmake new file mode 100644 index 0000000000..507fe7bf1a --- /dev/null +++ b/source/lmp/builtin.cmake @@ -0,0 +1,31 @@ +# This file should be included in the end of +# ${LAMMPS_SOURCE_DIR}/cmake/CMakeLists.txt +# include(/path/to/deepmd_source/source/lmp/builtin.cmake) + +# assume LAMMPS CMake file has been executed, so these target/variables exist: +# lammps LAMMPS_SOURCE_DIR get_lammps_version + +get_lammps_version(${LAMMPS_SOURCE_DIR}/version.h LAMMPS_VERSION_NUMBER) + +configure_file("${CMAKE_CURRENT_LIST_DIR}/deepmd_version.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/deepmd_version.h" @ONLY) + +file(GLOB DEEPMD_LMP_SRC ${CMAKE_CURRENT_LIST_DIR}/*.cpp) + +find_package(DeePMD REQUIRED) +target_sources( + lammps + PRIVATE ${DEEPMD_LMP_SRC} + ${LAMMPS_SOURCE_DIR}/KSPACE/pppm.cpp # for pppm_dplr + ${LAMMPS_SOURCE_DIR}/KSPACE/fft3d.cpp + ${LAMMPS_SOURCE_DIR}/KSPACE/fft3d_wrap.cpp + ${LAMMPS_SOURCE_DIR}/KSPACE/remap.cpp + ${LAMMPS_SOURCE_DIR}/KSPACE/remap_wrap.cpp + ${LAMMPS_SOURCE_DIR}/EXTRA-FIX/fix_ttm.cpp # for ttm +) +target_link_libraries(lammps PUBLIC DeePMD::deepmd_c) +target_include_directories( + lammps PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR} + ${LAMMPS_SOURCE_DIR}/KSPACE ${LAMMPS_SOURCE_DIR}/EXTRA-FIX) +target_compile_definitions( + lammps PRIVATE "LAMMPS_VERSION_NUMBER=${LAMMPS_VERSION_NUMBER}") diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp index 77bf0d56c0..628f435bb7 100644 --- a/source/lmp/fix_dplr.cpp +++ b/source/lmp/fix_dplr.cpp @@ -127,8 +127,8 @@ FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg) break; } } - assert(map_vec.size() % 2 == 0), - "number of ints provided by type_associate should be even"; + assert(map_vec.size() % 2 == 0 && + "number of ints provided by type_associate should be even"); // dpt.init(model); // dtm.init("frozen_model.pb"); diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt index 9b5f68b574..bfc2253412 100644 --- a/source/lmp/plugin/CMakeLists.txt +++ b/source/lmp/plugin/CMakeLists.txt @@ -49,9 +49,16 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION) configure_file("../deepmd_version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/deepmd_version.h" @ONLY) - file(GLOB LMP_SRC deepmdplugin.cpp ../*.cpp - ${LAMMPS_SOURCE_ROOT}/src/kspace.cpp # for pppm_dplr - ${LAMMPS_SOURCE_ROOT}/src/KSPACE/pppm.cpp) + file( + GLOB + LMP_SRC + deepmdplugin.cpp + ../*.cpp + ${LAMMPS_SOURCE_ROOT}/src/KSPACE/pppm.cpp # for pppm_dplr + ${LAMMPS_SOURCE_ROOT}/src/KSPACE/fft3d.cpp + ${LAMMPS_SOURCE_ROOT}/src/KSPACE/fft3d_wrap.cpp + ${LAMMPS_SOURCE_ROOT}/src/KSPACE/remap.cpp + ${LAMMPS_SOURCE_ROOT}/src/KSPACE/remap_wrap.cpp) if(LAMMPS_VERSION_NUMBER GREATER 20210831) list(APPEND LMP_SRC ${LAMMPS_SOURCE_ROOT}/src/EXTRA-FIX/fix_ttm.cpp )# for ttm diff --git a/source/op/custom_op.h b/source/op/custom_op.h index b2fb290f63..baf95f3fa3 100644 --- a/source/op/custom_op.h +++ b/source/op/custom_op.h @@ -5,6 +5,7 @@ #include #include "device.h" +#include "neighbor_list.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/shape_inference.h" @@ -25,3 +26,30 @@ namespace deepmd { void safe_compute(OpKernelContext* context, std::function ff); }; + +template +void _prepare_coord_nlist_gpu(OpKernelContext* context, + Tensor* tensor_list, + FPTYPE const** coord, + FPTYPE*& coord_cpy, + int const** type, + int*& type_cpy, + int*& idx_mapping, + deepmd::InputNlist& inlist, + int*& ilist, + int*& numneigh, + int**& firstneigh, + int*& jlist, + int*& nbor_list_dev, + int& new_nall, + int& mem_cpy, + int& mem_nnei, + int& max_nbor_size, + const FPTYPE* box, + const int* mesh_tensor_data, + const int mesh_tensor_size, + const int& nloc, + const int& nei_mode, + const float& rcut_r, + const int& max_cpy_trial, + const int& max_nnei_trial); diff --git a/source/op/map_flt_nvnmd.cc b/source/op/map_flt_nvnmd.cc index 5ea63e3624..b23deac9c8 100644 --- a/source/op/map_flt_nvnmd.cc +++ b/source/op/map_flt_nvnmd.cc @@ -114,7 +114,7 @@ class MapFltNvnmdOp : public OpKernel { for (ii = 0; ii < N * D; ii++) { // cal idx and xx xi = x[ii]; - if ((xi < x0) || (xi >= x1)) { + if ((xi < x0) || (xi > x1)) { continue; } // diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc index bb368cf65e..d917c60a5f 100644 --- a/source/op/neighbor_stat.cc +++ b/source/op/neighbor_stat.cc @@ -1,4 +1,6 @@ // SPDX-License-Identifier: LGPL-3.0-or-later +#include "neighbor_stat.h" + #include "custom_op.h" #include "errors.h" #include "neighbor_list.h" @@ -22,6 +24,11 @@ class NeighborStatOp : public OpKernel { public: explicit NeighborStatOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut)); + max_nbor_size_nlist = 1024; + max_cpy_trial = 100; + mem_cpy = 256; + max_nnei_trial = 100; + mem_nnei = 256; } void Compute(OpKernelContext* context) override { @@ -66,7 +73,7 @@ class NeighborStatOp : public OpKernel { errors::InvalidArgument("number of atoms should match")); OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)), errors::InvalidArgument("number of box should be 9")); - + DeviceFunctor()(device, context->eigen_device()); int nei_mode = 0; if (mesh_tensor.shape().dim_size(0) == 6 || mesh_tensor.shape().dim_size(0) == 7) { @@ -99,108 +106,184 @@ class NeighborStatOp : public OpKernel { const FPTYPE* box = box_tensor.flat().data(); const int* mesh = mesh_tensor.flat().data(); int* max_nbor_size = max_nbor_size_tensor->flat().data(); + if (device == "GPU") { +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + std::vector tensor_list(7); + if (nei_mode == 1) { + // Tensor FPTYPE_temp; + TensorShape FPTYPE_shape; + FPTYPE_shape.AddDim(nall * 3); + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::value, + FPTYPE_shape, &tensor_list[0])); - for (int ii = 0; ii < static_cast(max_nbor_size_tensor->NumElements()); - ii++) { - max_nbor_size[ii] = 0; - } + // Tensor double_temp; + TensorShape double_shape; + double_shape.AddDim(18); + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::value, + double_shape, &tensor_list[1])); + // Tensor cpy_temp; + TensorShape cpy_shape; + cpy_shape.AddDim(mem_cpy * 3); + OP_REQUIRES_OK(context, + context->allocate_temp(DataTypeToEnum::value, + cpy_shape, &tensor_list[3])); + // Tensor t_temp; + TensorShape t_shape; + t_shape.AddDim(mem_cpy * 2); + OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, t_shape, + &tensor_list[4])); + } - // set region - boxtensor_t boxt[9] = {0}; - for (int dd = 0; dd < 9; ++dd) { - boxt[dd] = box[dd]; - } - SimulationRegion region; - region.reinitBox(boxt); - // set & normalize coord - std::vector d_coord3(nall * 3); - for (int ii = 0; ii < nall; ++ii) { - for (int dd = 0; dd < 3; ++dd) { - d_coord3[ii * 3 + dd] = coord[ii * 3 + dd]; + // Tensor nlist_temp; + TensorShape nlist_shape; + nlist_shape.AddDim(nloc * 2); + OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, nlist_shape, + &tensor_list[5])); + + TensorShape jlist_shape; + jlist_shape.AddDim(3 * int_64(nloc) * mem_nnei); + OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, jlist_shape, + &tensor_list[6])); + + int* idx_mapping = NULL; + int *ilist = NULL, *numneigh = NULL; + int** firstneigh = NULL; + deepmd::malloc_device_memory(firstneigh, nloc); + int* jlist = NULL; + FPTYPE* coord_cpy; + int* type_cpy; + int frame_nall = nall; + int mesh_tensor_size = static_cast(mesh_tensor.NumElements()); + deepmd::InputNlist gpu_inlist; + int* nbor_list_dev = NULL; + // prepare coord and nlist + _prepare_coord_nlist_gpu( + context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy, + idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist, + nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size_nlist, + box, mesh_tensor.flat().data(), mesh_tensor_size, nloc, nei_mode, + rcut, max_cpy_trial, max_nnei_trial); + + TensorShape min_nbor_dist_shape; + min_nbor_dist_shape.AddDim(nloc * mem_nnei); + Tensor* min_nbor_dist_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, + min_nbor_dist_shape, + &min_nbor_dist_tensor)); + FPTYPE* min_nbor_dist = min_nbor_dist_tensor->flat().data(); + + deepmd::neighbor_stat_gpu(coord, type, nloc, gpu_inlist, + max_nbor_size, min_nbor_dist, ntypes, + mem_nnei); + deepmd::delete_device_memory(firstneigh); +#endif + } else { + for (int ii = 0; + ii < static_cast(max_nbor_size_tensor->NumElements()); ii++) { + max_nbor_size[ii] = 0; + } + + // set region + boxtensor_t boxt[9] = {0}; + for (int dd = 0; dd < 9; ++dd) { + boxt[dd] = box[dd]; } - if (b_norm_atom) { - compute_t inter[3]; - region.phys2Inter(inter, &d_coord3[3 * ii]); + SimulationRegion region; + region.reinitBox(boxt); + // set & normalize coord + std::vector d_coord3(nall * 3); + for (int ii = 0; ii < nall; ++ii) { for (int dd = 0; dd < 3; ++dd) { - if (inter[dd] < 0) { - inter[dd] += 1.; - } else if (inter[dd] >= 1) { - inter[dd] -= 1.; + d_coord3[ii * 3 + dd] = coord[ii * 3 + dd]; + } + if (b_norm_atom) { + compute_t inter[3]; + region.phys2Inter(inter, &d_coord3[3 * ii]); + for (int dd = 0; dd < 3; ++dd) { + if (inter[dd] < 0) { + inter[dd] += 1.; + } else if (inter[dd] >= 1) { + inter[dd] -= 1.; + } } + region.inter2Phys(&d_coord3[3 * ii], inter); } - region.inter2Phys(&d_coord3[3 * ii], inter); } - } - // set type - std::vector d_type(nall); - for (int ii = 0; ii < nall; ++ii) { - d_type[ii] = type[ii]; - } + // set type + std::vector d_type(nall); + for (int ii = 0; ii < nall; ++ii) { + d_type[ii] = type[ii]; + } + + // build nlist + std::vector > d_nlist_a; + std::vector > d_nlist_r; + std::vector nlist_map; + bool b_nlist_map = false; - // build nlist - std::vector > d_nlist_a; - std::vector > d_nlist_r; - std::vector nlist_map; - bool b_nlist_map = false; - - if (nei_mode == 1) { - // std::cout << "I'm in nei_mode 1" << std::endl; - std::vector bk_d_coord3 = d_coord3; - std::vector bk_d_type = d_type; - std::vector ncell, ngcell; - copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, - bk_d_type, rcut, region); - b_nlist_map = true; - std::vector nat_stt(3, 0); - std::vector ext_stt(3), ext_end(3); - for (int dd = 0; dd < 3; ++dd) { - ext_stt[dd] = -ngcell[dd]; - ext_end[dd] = ncell[dd] + ngcell[dd]; + if (nei_mode == 1) { + // std::cout << "I'm in nei_mode 1" << std::endl; + std::vector bk_d_coord3 = d_coord3; + std::vector bk_d_type = d_type; + std::vector ncell, ngcell; + copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, + bk_d_type, rcut, region); + b_nlist_map = true; + std::vector nat_stt(3, 0); + std::vector ext_stt(3), ext_end(3); + for (int dd = 0; dd < 3; ++dd) { + ext_stt[dd] = -ngcell[dd]; + ext_end[dd] = ncell[dd] + ngcell[dd]; + } + ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, -1, rcut, nat_stt, + ncell, ext_stt, ext_end, region, ncell); + } else if (nei_mode == -1) { + ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL); + } else { + throw deepmd::deepmd_exception("unknow neighbor mode"); } - ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, -1, rcut, nat_stt, - ncell, ext_stt, ext_end, region, ncell); - } else if (nei_mode == -1) { - ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL); - } else { - throw deepmd::deepmd_exception("unknow neighbor mode"); - } - int MAX_NNEI = 0; - for (int ii = 0; ii < nloc; ii++) { - MAX_NNEI = - MAX_NNEI < d_nlist_r[ii].size() ? d_nlist_r[ii].size() : MAX_NNEI; - } - // allocate output tensor for deepmd-kit - TensorShape min_nbor_dist_shape; - min_nbor_dist_shape.AddDim(nloc * MAX_NNEI); - Tensor* min_nbor_dist_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, - min_nbor_dist_shape, - &min_nbor_dist_tensor)); - FPTYPE* min_nbor_dist = min_nbor_dist_tensor->flat().data(); - for (int ii = 0; ii < static_cast(min_nbor_dist_tensor->NumElements()); - ii++) { - min_nbor_dist[ii] = 10000.0; - } + int MAX_NNEI = 0; + for (int ii = 0; ii < nloc; ii++) { + MAX_NNEI = + MAX_NNEI < d_nlist_r[ii].size() ? d_nlist_r[ii].size() : MAX_NNEI; + } + // allocate output tensor for deepmd-kit + TensorShape min_nbor_dist_shape; + min_nbor_dist_shape.AddDim(nloc * MAX_NNEI); + Tensor* min_nbor_dist_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, + min_nbor_dist_shape, + &min_nbor_dist_tensor)); + FPTYPE* min_nbor_dist = min_nbor_dist_tensor->flat().data(); + for (int ii = 0; + ii < static_cast(min_nbor_dist_tensor->NumElements()); ii++) { + min_nbor_dist[ii] = 10000.0; + } #pragma omp parallel for - for (int ii = 0; ii < nloc; ii++) { - if (d_type[ii] < 0) { - continue; // virtual atom - } - for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { - int type = d_type[d_nlist_r[ii][jj]]; - if (type < 0) { + for (int ii = 0; ii < nloc; ii++) { + if (d_type[ii] < 0) { continue; // virtual atom } - max_nbor_size[ii * ntypes + type] += 1; - compute_t rij[3] = { - d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], - d_coord3[d_nlist_r[ii][jj] * 3 + 1] - d_coord3[ii * 3 + 1], - d_coord3[d_nlist_r[ii][jj] * 3 + 2] - d_coord3[ii * 3 + 2]}; - min_nbor_dist[ii * MAX_NNEI + jj] = - sqrt(rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]); + for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { + int type = d_type[d_nlist_r[ii][jj]]; + if (type < 0) { + continue; // virtual atom + } + max_nbor_size[ii * ntypes + type] += 1; + compute_t rij[3] = { + d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], + d_coord3[d_nlist_r[ii][jj] * 3 + 1] - d_coord3[ii * 3 + 1], + d_coord3[d_nlist_r[ii][jj] * 3 + 2] - d_coord3[ii * 3 + 2]}; + // we do not need to do slow sqrt for every dist; instead do sqrt in + // the final step + min_nbor_dist[ii * MAX_NNEI + jj] = + rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]; + } } } } @@ -208,6 +291,8 @@ class NeighborStatOp : public OpKernel { private: int nnei; float rcut; + std::string device; + int max_nbor_size_nlist, max_cpy_trial, mem_cpy, max_nnei_trial, mem_nnei; }; #define REGISTER_CPU(T) \ @@ -216,3 +301,14 @@ class NeighborStatOp : public OpKernel { NeighborStatOp); REGISTER_CPU(float); REGISTER_CPU(double); +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#define REGISTER_GPU(T) \ + REGISTER_KERNEL_BUILDER(Name("NeighborStat") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("natoms") \ + .HostMemory("box"), \ + NeighborStatOp); +REGISTER_GPU(float); +REGISTER_GPU(double); +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc index 47541bc69f..048237e042 100644 --- a/source/op/prod_env_mat_multi_device.cc +++ b/source/op/prod_env_mat_multi_device.cc @@ -291,31 +291,31 @@ static void _map_nei_info_gpu(int* nlist, const bool& b_nlist_map); template -static void _prepare_coord_nlist_gpu(OpKernelContext* context, - Tensor* tensor_list, - FPTYPE const** coord, - FPTYPE*& coord_cpy, - int const** type, - int*& type_cpy, - int*& idx_mapping, - deepmd::InputNlist& inlist, - int*& ilist, - int*& numneigh, - int**& firstneigh, - int*& jlist, - int*& nbor_list_dev, - int& new_nall, - int& mem_cpy, - int& mem_nnei, - int& max_nbor_size, - const FPTYPE* box, - const int* mesh_tensor_data, - const int mesh_tensor_size, - const int& nloc, - const int& nei_mode, - const float& rcut_r, - const int& max_cpy_trial, - const int& max_nnei_trial); +void _prepare_coord_nlist_gpu(OpKernelContext* context, + Tensor* tensor_list, + FPTYPE const** coord, + FPTYPE*& coord_cpy, + int const** type, + int*& type_cpy, + int*& idx_mapping, + deepmd::InputNlist& inlist, + int*& ilist, + int*& numneigh, + int**& firstneigh, + int*& jlist, + int*& nbor_list_dev, + int& new_nall, + int& mem_cpy, + int& mem_nnei, + int& max_nbor_size, + const FPTYPE* box, + const int* mesh_tensor_data, + const int mesh_tensor_size, + const int& nloc, + const int& nei_mode, + const float& rcut_r, + const int& max_cpy_trial, + const int& max_nnei_trial); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -1604,31 +1604,31 @@ static void _map_nei_info_gpu(int* nlist, } template -static void _prepare_coord_nlist_gpu(OpKernelContext* context, - Tensor* tensor_list, - FPTYPE const** coord, - FPTYPE*& coord_cpy, - int const** type, - int*& type_cpy, - int*& idx_mapping, - deepmd::InputNlist& inlist, - int*& ilist, - int*& numneigh, - int**& firstneigh, - int*& jlist, - int*& nbor_list_dev, - int& new_nall, - int& mem_cpy, - int& mem_nnei, - int& max_nbor_size, - const FPTYPE* box, - const int* mesh_tensor_data, - const int mesh_tensor_size, - const int& nloc, - const int& nei_mode, - const float& rcut_r, - const int& max_cpy_trial, - const int& max_nnei_trial) { +void _prepare_coord_nlist_gpu(OpKernelContext* context, + Tensor* tensor_list, + FPTYPE const** coord, + FPTYPE*& coord_cpy, + int const** type, + int*& type_cpy, + int*& idx_mapping, + deepmd::InputNlist& inlist, + int*& ilist, + int*& numneigh, + int**& firstneigh, + int*& jlist, + int*& nbor_list_dev, + int& new_nall, + int& mem_cpy, + int& mem_nnei, + int& max_nbor_size, + const FPTYPE* box, + const int* mesh_tensor_data, + const int mesh_tensor_size, + const int& nloc, + const int& nei_mode, + const float& rcut_r, + const int& max_cpy_trial, + const int& max_nnei_trial) { if (nei_mode != 3 && nei_mode != 4) { inlist.inum = nloc; // build nlist by myself diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc index 85ea82803a..6a70f60a96 100644 --- a/source/op/tabulate_multi_device.cc +++ b/source/op/tabulate_multi_device.cc @@ -91,6 +91,20 @@ REGISTER_OP("TabulateFusionSeAttenGrad") .Output("dy_dtwo: T") .Attr("is_sorted: bool = true"); +REGISTER_OP("TabulateFusionSeAttenGradGrad") + .Attr("T: {float, double}") + .Input("table: T") + .Input("table_info: T") + .Input("em_x: T") + .Input("em: T") + .Input("two_embed: T") + .Input("dz_dy_dem_x: T") + .Input("dz_dy_dem: T") + .Input("dz_dy_dtwo: T") + .Input("descriptor: T") + .Output("dz_dy: T") + .Attr("is_sorted: bool = true"); + REGISTER_OP("TabulateFusionSeT") .Attr("T: {float, double} = DT_DOUBLE") .Input("table: T") @@ -248,6 +262,7 @@ class TabulateFusionSeAGradOp : public OpKernel { // flat the tensors FPTYPE* dy_dem_x = dy_dem_x_tensor->flat().data(); FPTYPE* dy_dem = dy_dem_tensor->flat().data(); + FPTYPE* dy_dtwo = nullptr; const FPTYPE* descriptor = descriptor_tensor.flat().data(); const FPTYPE* table = table_tensor.flat().data(); @@ -262,14 +277,14 @@ class TabulateFusionSeAGradOp : public OpKernel { if (device == "GPU") { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM - deepmd::tabulate_fusion_se_a_grad_gpu(dy_dem_x, dy_dem, table, table_info, - em_x, em, two_embed, dy, nloc, nnei, - last_layer_size); + deepmd::tabulate_fusion_se_a_grad_gpu(dy_dem_x, dy_dem, dy_dtwo, table, + table_info, em_x, em, two_embed, dy, + nloc, nnei, last_layer_size); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } else if (device == "CPU") { - deepmd::tabulate_fusion_se_a_grad_cpu(dy_dem_x, dy_dem, table, table_info, - em_x, em, two_embed, dy, nloc, nnei, - last_layer_size); + deepmd::tabulate_fusion_se_a_grad_cpu(dy_dem_x, dy_dem, dy_dtwo, table, + table_info, em_x, em, two_embed, dy, + nloc, nnei, last_layer_size); } } @@ -312,8 +327,10 @@ class TabulateFusionSeAGradGradOp : public OpKernel { const FPTYPE* table_info = table_info_tensor.flat().data(); const FPTYPE* em_x = em_x_tensor.flat().data(); const FPTYPE* em = em_tensor.flat().data(); + const FPTYPE* two_embed = nullptr; const FPTYPE* dz_dy_dem_x = dz_dy_dem_x_tensor.flat().data(); const FPTYPE* dz_dy_dem = dz_dy_dem_tensor.flat().data(); + const FPTYPE* dz_dy_dtwo = nullptr; const int nloc = em_tensor.shape().dim_size(0); const int nnei = em_tensor.shape().dim_size(1); const int last_layer_size = descriptor_tensor.shape().dim_size(2); @@ -321,8 +338,8 @@ class TabulateFusionSeAGradGradOp : public OpKernel { if (device == "GPU") { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM deepmd::tabulate_fusion_se_a_grad_grad_gpu( - dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, - nnei, last_layer_size, is_sorted); + dz_dy, table, table_info, em_x, em, two_embed, dz_dy_dem_x, dz_dy_dem, + dz_dy_dtwo, nloc, nnei, last_layer_size, is_sorted); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM OP_REQUIRES(context, (last_layer_size <= 1024), errors::InvalidArgument( @@ -330,8 +347,8 @@ class TabulateFusionSeAGradGradOp : public OpKernel { "last layer of embedding net must be less than 1024!")); } else if (device == "CPU") { deepmd::tabulate_fusion_se_a_grad_grad_cpu( - dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, - nnei, last_layer_size, is_sorted); + dz_dy, table, table_info, em_x, em, two_embed, dz_dy_dem_x, dz_dy_dem, + dz_dy_dtwo, nloc, nnei, last_layer_size, is_sorted); } } @@ -454,6 +471,7 @@ class TabulateFusionSeAttenGradOp : public OpKernel { // flat the tensors FPTYPE* dy_dem_x = dy_dem_x_tensor->flat().data(); FPTYPE* dy_dem = dy_dem_tensor->flat().data(); + FPTYPE* dy_dtwo = dy_dtwo_tensor->flat().data(); const FPTYPE* descriptor = descriptor_tensor.flat().data(); const FPTYPE* table = table_tensor.flat().data(); @@ -468,14 +486,86 @@ class TabulateFusionSeAttenGradOp : public OpKernel { if (device == "GPU") { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM - deepmd::tabulate_fusion_se_a_grad_gpu(dy_dem_x, dy_dem, table, table_info, - em_x, em, two_embed, dy, nloc, nnei, - last_layer_size, is_sorted); + deepmd::tabulate_fusion_se_a_grad_gpu( + dy_dem_x, dy_dem, dy_dtwo, table, table_info, em_x, em, two_embed, dy, + nloc, nnei, last_layer_size, is_sorted); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } else if (device == "CPU") { - deepmd::tabulate_fusion_se_a_grad_cpu(dy_dem_x, dy_dem, table, table_info, - em_x, em, two_embed, dy, nloc, nnei, - last_layer_size, is_sorted); + deepmd::tabulate_fusion_se_a_grad_cpu( + dy_dem_x, dy_dem, dy_dtwo, table, table_info, em_x, em, two_embed, dy, + nloc, nnei, last_layer_size, is_sorted); + } + } + + private: + bool is_sorted; + std::string device; +}; + +template +class TabulateFusionSeAttenGradGradOp : public OpKernel { + public: + explicit TabulateFusionSeAttenGradGradOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("is_sorted", &is_sorted)); + } + void Compute(OpKernelContext* context) override { + deepmd::safe_compute( + context, [this](OpKernelContext* context) { this->_Compute(context); }); + } + + void _Compute(OpKernelContext* context) { + // Grab the input tensor + int context_input_index = 0; + const Tensor& table_tensor = context->input(context_input_index++); + const Tensor& table_info_tensor = context->input(context_input_index++); + const Tensor& em_x_tensor = context->input(context_input_index++); + const Tensor& em_tensor = context->input(context_input_index++); + const Tensor& two_embed_tensor = context->input(context_input_index++); + const Tensor& dz_dy_dem_x_tensor = context->input(context_input_index++); + const Tensor& dz_dy_dem_tensor = context->input(context_input_index++); + const Tensor& dz_dy_dtwo_tensor = context->input(context_input_index++); + const Tensor& descriptor_tensor = context->input(context_input_index++); + // set size of the sample + OP_REQUIRES(context, (dz_dy_dem_x_tensor.shape().dims() == 2), + errors::InvalidArgument("Dim of input should be 2")); + OP_REQUIRES(context, (dz_dy_dem_tensor.shape().dims() == 3), + errors::InvalidArgument("Dim of input should be 3")); + int context_output_index = 0; + Tensor* dz_dy_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, + descriptor_tensor.shape(), + &dz_dy_tensor)); + DeviceFunctor()(device, context->eigen_device()); + + // flat the tensors + FPTYPE* dz_dy = dz_dy_tensor->flat().data(); + const FPTYPE* table = table_tensor.flat().data(); + const FPTYPE* table_info = table_info_tensor.flat().data(); + const FPTYPE* em_x = em_x_tensor.flat().data(); + const FPTYPE* em = em_tensor.flat().data(); + const FPTYPE* two_embed = two_embed_tensor.flat().data(); + const FPTYPE* dz_dy_dem_x = dz_dy_dem_x_tensor.flat().data(); + const FPTYPE* dz_dy_dem = dz_dy_dem_tensor.flat().data(); + const FPTYPE* dz_dy_dtwo = dz_dy_dtwo_tensor.flat().data(); + const int nloc = em_tensor.shape().dim_size(0); + const int nnei = em_tensor.shape().dim_size(1); + const int last_layer_size = descriptor_tensor.shape().dim_size(2); + + if (device == "GPU") { +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + deepmd::tabulate_fusion_se_a_grad_grad_gpu( + dz_dy, table, table_info, em_x, em, two_embed, dz_dy_dem_x, dz_dy_dem, + dz_dy_dtwo, nloc, nnei, last_layer_size, is_sorted); +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + OP_REQUIRES(context, (last_layer_size <= 1024), + errors::InvalidArgument( + "In the process of model compression, the size of the " + "last layer of embedding net must be less than 1024!")); + } else if (device == "CPU") { + deepmd::tabulate_fusion_se_a_grad_grad_cpu( + dz_dy, table, table_info, em_x, em, two_embed, dz_dy_dem_x, dz_dy_dem, + dz_dy_dtwo, nloc, nnei, last_layer_size, is_sorted); } } @@ -863,6 +953,10 @@ class TabulateFusionSeRGradGradOp : public OpKernel { .Device(DEVICE_CPU) \ .TypeConstraint("T"), \ TabulateFusionSeAttenGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAttenGradGrad") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T"), \ + TabulateFusionSeAttenGradGradOp); \ REGISTER_KERNEL_BUILDER( \ Name("TabulateFusionSeT").Device(DEVICE_CPU).TypeConstraint("T"), \ TabulateFusionSeTOp); \ @@ -887,76 +981,81 @@ REGISTER_CPU(float); REGISTER_CPU(double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#define REGISTER_GPU(T) \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusion") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionGradGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAGradGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeA") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGradGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAGradGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAtten") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAttenOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAttenGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeAttenGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeT") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeTOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeTGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGradGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeTGradGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeR") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeROp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ - TabulateFusionSeRGradOp); \ - REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGradGrad") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("table_info"), \ +#define REGISTER_GPU(T) \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusion") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionGradGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAGradGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeA") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGradGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAGradGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAtten") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAttenOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAttenGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAttenGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAttenGradGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeAttenGradGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeT") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeTOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeTGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGradGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeTGradGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeR") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeROp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ + TabulateFusionSeRGradOp); \ + REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGradGrad") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("table_info"), \ TabulateFusionSeRGradGradOp); REGISTER_GPU(float); REGISTER_GPU(double); diff --git a/source/tests/common.py b/source/tests/common.py index f8ed23df03..9af324896f 100644 --- a/source/tests/common.py +++ b/source/tests/common.py @@ -530,6 +530,85 @@ def strerch_box(old_coord, old_box, new_box): return ncoord.reshape(old_coord.shape) +def finite_difference_fv(sess, energy, feed_dict, t_coord, t_box, delta=1e-6): + """For energy models, compute f, v by finite difference.""" + base_dict = feed_dict.copy() + coord0 = base_dict.pop(t_coord) + box0 = base_dict.pop(t_box) + fdf = -finite_difference( + lambda coord: sess.run( + energy, feed_dict={**base_dict, t_coord: coord, t_box: box0} + ).reshape(-1), + coord0, + delta=delta, + ).reshape(-1) + fdv = -( + finite_difference( + lambda box: sess.run( + energy, + feed_dict={ + **base_dict, + t_coord: strerch_box(coord0, box0, box), + t_box: box, + }, + ).reshape(-1), + box0, + delta=delta, + ) + .reshape([-1, 3, 3]) + .transpose(0, 2, 1) + @ box0.reshape(3, 3) + ).reshape(-1) + return fdf, fdv + + +def check_continuity(f, cc, rcut, delta): + """coord[0:2] to [[0, 0, 0], [rcut+-.5*delta, 0, 0]].""" + cc = cc.reshape([-1, 3]) + cc0 = np.copy(cc) + cc1 = np.copy(cc) + cc0[:2, :] = np.array( + [ + 0.0, + 0.0, + 0.0, + rcut - 0.5 * delta, + 0.0, + 0.0, + ] + ).reshape([-1, 3]) + cc1[:2, :] = np.array( + [ + 0.0, + 0.0, + 0.0, + rcut + 0.5 * delta, + 0.0, + 0.0, + ] + ).reshape([-1, 3]) + return f(cc0.reshape(-1)), f(cc1.reshape(-1)) + + +def check_smooth_efv(sess, energy, force, virial, feed_dict, t_coord, rcut, delta=1e-5): + """Check the smoothness of e, f and v + the returned values are de, df, dv + de[0] are supposed to be closed to de[1] + df[0] are supposed to be closed to df[1] + dv[0] are supposed to be closed to dv[1]. + """ + base_dict = feed_dict.copy() + coord0 = base_dict.pop(t_coord) + [fe, ff, fv] = [ + lambda coord: sess.run(ii, feed_dict={**base_dict, t_coord: coord}).reshape(-1) + for ii in [energy, force, virial] + ] + [de, df, dv] = [ + check_continuity(ii, coord0, rcut, delta=delta) for ii in [fe, ff, fv] + ] + return de, df, dv + + def run_dp(cmd: str) -> int: """Run DP directly from the entry point instead of the subprocess. diff --git a/source/tests/test_gui.py b/source/tests/test_gui.py new file mode 100644 index 0000000000..25fd7e6651 --- /dev/null +++ b/source/tests/test_gui.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import unittest + +from dpgui import ( + generate_dpgui_templates, +) + + +class TestDPGUI(unittest.TestCase): + def test_dpgui_entrypoints(self): + self.assertTrue(len(generate_dpgui_templates()) > 0) diff --git a/source/tests/test_model_compression_se_a_ebd.py b/source/tests/test_model_compression_se_a_ebd.py new file mode 100644 index 0000000000..2a3163b062 --- /dev/null +++ b/source/tests/test_model_compression_se_a_ebd.py @@ -0,0 +1,587 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import subprocess as sp +import unittest + +import numpy as np + +# from deepmd.entrypoints.compress import compress +from common import ( + j_loader, + run_dp, + tests_path, +) + +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.infer import ( + DeepPot, +) + +if GLOBAL_NP_FLOAT_PRECISION == np.float32: + default_places = 4 +else: + default_places = 10 + + +def _file_delete(file): + if os.path.isdir(file): + os.rmdir(file) + elif os.path.isfile(file): + os.remove(file) + + +def _subprocess_run(command): + popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT) + for line in iter(popen.stdout.readline, b""): + if hasattr(line, "decode"): + line = line.decode("utf-8") + line = line.rstrip() + print(line) + popen.wait() + return popen.returncode + + +def _init_models(): + data_file = str(tests_path / os.path.join("model_compression", "data")) + frozen_model = str(tests_path / "dp-original-se-e2-a-v2.pb") + compressed_model = str(tests_path / "dp-compressed-se-e2-a-v2.pb") + INPUT = str(tests_path / "input_se_a_ebd_v2.json") + jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json"))) + jdata["training"]["training_data"]["systems"] = data_file + jdata["training"]["validation_data"]["systems"] = data_file + jdata["model"]["descriptor"] = {} + jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2" + jdata["model"]["descriptor"]["type_one_side"] = False + jdata["model"]["type_embedding"] = {} + jdata["model"]["type_embedding"]["neuron"] = [1] + with open(INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + with open("input_v2_compat.json.tempfile", "w") as fp: + json.dump(jdata, fp, indent=4) + + ret = run_dp("dp train " + INPUT) + np.testing.assert_equal(ret, 0, "DP train failed!") + ret = run_dp("dp freeze -o " + frozen_model) + np.testing.assert_equal(ret, 0, "DP freeze failed!") + ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model) + np.testing.assert_equal(ret, 0, "DP model compression failed!") + return INPUT, frozen_model, compressed_model + + +def _init_models_exclude_types(): + data_file = str(tests_path / os.path.join("model_compression", "data")) + frozen_model = str(tests_path / "dp-original-exclude-types-se-e2-a-ebd-v2.pb") + compressed_model = str(tests_path / "dp-compressed-exclude-types-se-e2-a-ebd-v2.pb") + INPUT = str(tests_path / "input_se_a_ebd_v2.json") + jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json"))) + jdata["model"]["descriptor"] = {} + jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2" + jdata["model"]["descriptor"]["exclude_types"] = [[0, 1]] + jdata["model"]["descriptor"]["type_one_side"] = False + jdata["model"]["type_embedding"] = {} + jdata["model"]["type_embedding"]["neuron"] = [1] + jdata["training"]["training_data"]["systems"] = data_file + jdata["training"]["validation_data"]["systems"] = data_file + with open(INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + + ret = run_dp("dp train " + INPUT) + np.testing.assert_equal(ret, 0, "DP train failed!") + ret = run_dp("dp freeze -o " + frozen_model) + np.testing.assert_equal(ret, 0, "DP freeze failed!") + ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model) + np.testing.assert_equal(ret, 0, "DP model compression failed!") + return INPUT, frozen_model, compressed_model + + +INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models() +INPUT_ET, FROZEN_MODEL_ET, COMPRESSED_MODEL_ET = _init_models_exclude_types() + + +class TestDeepPotAPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + def test_attrs(self): + self.assertEqual(self.dp_original.get_ntypes(), 2) + self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places) + self.assertEqual(self.dp_original.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_original.get_dim_fparam(), 0) + self.assertEqual(self.dp_original.get_dim_aparam(), 0) + + self.assertEqual(self.dp_compressed.get_ntypes(), 2) + self.assertAlmostEqual( + self.dp_compressed.get_rcut(), 6.0, places=default_places + ) + self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_compressed.get_dim_fparam(), 0) + self.assertEqual(self.dp_compressed.get_dim_aparam(), 0) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + box2 = np.concatenate((self.box, self.box)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, box2, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, box2, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + +class TestDeepPotANoPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = None + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + +class TestDeepPotALargeBoxNoPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_ase(self): + from ase import ( + Atoms, + ) + + from deepmd.calculator import ( + DP, + ) + + water0 = Atoms( + "OHHOHH", + positions=self.coords.reshape((-1, 3)), + cell=self.box.reshape((3, 3)), + calculator=DP(FROZEN_MODEL), + ) + water1 = Atoms( + "OHHOHH", + positions=self.coords.reshape((-1, 3)), + cell=self.box.reshape((3, 3)), + calculator=DP(COMPRESSED_MODEL), + ) + ee0 = water0.get_potential_energy() + ff0 = water0.get_forces() + ee1 = water1.get_potential_energy() + ff1 = water1.get_forces() + nframes = 1 + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + + +class TestDeepPotAPBCExcludeTypes(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL_ET) + self.dp_compressed = DeepPot(COMPRESSED_MODEL_ET) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + @classmethod + def tearDownClass(self): + _file_delete(INPUT_ET) + _file_delete(FROZEN_MODEL_ET) + _file_delete(COMPRESSED_MODEL_ET) + _file_delete("out.json") + _file_delete("compress.json") + _file_delete("checkpoint") + _file_delete("model.ckpt.meta") + _file_delete("model.ckpt.index") + _file_delete("model.ckpt.data-00000-of-00001") + _file_delete("model.ckpt-100.meta") + _file_delete("model.ckpt-100.index") + _file_delete("model.ckpt-100.data-00000-of-00001") + _file_delete("model-compression/checkpoint") + _file_delete("model-compression/model.ckpt.meta") + _file_delete("model-compression/model.ckpt.index") + _file_delete("model-compression/model.ckpt.data-00000-of-00001") + _file_delete("model-compression") + _file_delete("input_v2_compat.json") + _file_delete("lcurve.out") + + def test_attrs(self): + self.assertEqual(self.dp_original.get_ntypes(), 2) + self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places) + self.assertEqual(self.dp_original.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_original.get_dim_fparam(), 0) + self.assertEqual(self.dp_original.get_dim_aparam(), 0) + + self.assertEqual(self.dp_compressed.get_ntypes(), 2) + self.assertAlmostEqual( + self.dp_compressed.get_rcut(), 6.0, places=default_places + ) + self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_compressed.get_dim_fparam(), 0) + self.assertEqual(self.dp_compressed.get_dim_aparam(), 0) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + box2 = np.concatenate((self.box, self.box)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, box2, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, box2, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) diff --git a/source/tests/test_model_compression_se_a_ebd_type_one_side.py b/source/tests/test_model_compression_se_a_ebd_type_one_side.py new file mode 100644 index 0000000000..2f3d16b05f --- /dev/null +++ b/source/tests/test_model_compression_se_a_ebd_type_one_side.py @@ -0,0 +1,587 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import subprocess as sp +import unittest + +import numpy as np + +# from deepmd.entrypoints.compress import compress +from common import ( + j_loader, + run_dp, + tests_path, +) + +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.infer import ( + DeepPot, +) + +if GLOBAL_NP_FLOAT_PRECISION == np.float32: + default_places = 4 +else: + default_places = 10 + + +def _file_delete(file): + if os.path.isdir(file): + os.rmdir(file) + elif os.path.isfile(file): + os.remove(file) + + +def _subprocess_run(command): + popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT) + for line in iter(popen.stdout.readline, b""): + if hasattr(line, "decode"): + line = line.decode("utf-8") + line = line.rstrip() + print(line) + popen.wait() + return popen.returncode + + +def _init_models(): + data_file = str(tests_path / os.path.join("model_compression", "data")) + frozen_model = str(tests_path / "dp-original-se-e2-a-v2-one-side.pb") + compressed_model = str(tests_path / "dp-compressed-se-e2-a-v2-one-side.pb") + INPUT = str(tests_path / "input.json") + jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json"))) + jdata["training"]["training_data"]["systems"] = data_file + jdata["training"]["validation_data"]["systems"] = data_file + jdata["model"]["descriptor"] = {} + jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2" + jdata["model"]["descriptor"]["type_one_side"] = True + jdata["model"]["type_embedding"] = {} + jdata["model"]["type_embedding"]["neuron"] = [1] + with open(INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + + ret = run_dp("dp train " + INPUT) + np.testing.assert_equal(ret, 0, "DP train failed!") + ret = run_dp("dp freeze -o " + frozen_model) + np.testing.assert_equal(ret, 0, "DP freeze failed!") + ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model) + np.testing.assert_equal(ret, 0, "DP model compression failed!") + return INPUT, frozen_model, compressed_model + + +def _init_models_exclude_types(): + data_file = str(tests_path / os.path.join("model_compression", "data")) + frozen_model = str(tests_path / "dp-original-exclude-types-se-e2-a-v2-one-side.pb") + compressed_model = str( + tests_path / "dp-compressed-exclude-types-se-e2-a-v2-one-side.pb" + ) + INPUT = str(tests_path / "input.json") + jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json"))) + jdata["model"]["descriptor"] = {} + jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2" + jdata["model"]["descriptor"]["exclude_types"] = [[0, 1]] + jdata["model"]["descriptor"]["type_one_side"] = True + jdata["model"]["type_embedding"] = {} + jdata["model"]["type_embedding"]["neuron"] = [1] + jdata["training"]["training_data"]["systems"] = data_file + jdata["training"]["validation_data"]["systems"] = data_file + with open(INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + + ret = run_dp("dp train " + INPUT) + np.testing.assert_equal(ret, 0, "DP train failed!") + ret = run_dp("dp freeze -o " + frozen_model) + np.testing.assert_equal(ret, 0, "DP freeze failed!") + ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model) + np.testing.assert_equal(ret, 0, "DP model compression failed!") + return INPUT, frozen_model, compressed_model + + +INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models() +INPUT_ET, FROZEN_MODEL_ET, COMPRESSED_MODEL_ET = _init_models_exclude_types() + + +class TestDeepPotAPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + def test_attrs(self): + self.assertEqual(self.dp_original.get_ntypes(), 2) + self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places) + self.assertEqual(self.dp_original.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_original.get_dim_fparam(), 0) + self.assertEqual(self.dp_original.get_dim_aparam(), 0) + + self.assertEqual(self.dp_compressed.get_ntypes(), 2) + self.assertAlmostEqual( + self.dp_compressed.get_rcut(), 6.0, places=default_places + ) + self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_compressed.get_dim_fparam(), 0) + self.assertEqual(self.dp_compressed.get_dim_aparam(), 0) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + box2 = np.concatenate((self.box, self.box)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, box2, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, box2, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + +class TestDeepPotANoPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = None + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + +class TestDeepPotALargeBoxNoPBC(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL) + self.dp_compressed = DeepPot(COMPRESSED_MODEL) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_ase(self): + from ase import ( + Atoms, + ) + + from deepmd.calculator import ( + DP, + ) + + water0 = Atoms( + "OHHOHH", + positions=self.coords.reshape((-1, 3)), + cell=self.box.reshape((3, 3)), + calculator=DP(FROZEN_MODEL), + ) + water1 = Atoms( + "OHHOHH", + positions=self.coords.reshape((-1, 3)), + cell=self.box.reshape((3, 3)), + calculator=DP(COMPRESSED_MODEL), + ) + ee0 = water0.get_potential_energy() + ff0 = water0.get_forces() + ee1 = water1.get_potential_energy() + ff1 = water1.get_forces() + nframes = 1 + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + + +class TestDeepPotAPBCExcludeTypes(unittest.TestCase): + @classmethod + def setUpClass(self): + self.dp_original = DeepPot(FROZEN_MODEL_ET) + self.dp_compressed = DeepPot(COMPRESSED_MODEL_ET) + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 00.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ] + ) + self.atype = [0, 1, 1, 0, 1, 1] + self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]) + + @classmethod + def tearDownClass(self): + _file_delete(INPUT_ET) + _file_delete(FROZEN_MODEL_ET) + _file_delete(COMPRESSED_MODEL_ET) + _file_delete("out.json") + _file_delete("compress.json") + _file_delete("checkpoint") + _file_delete("model.ckpt.meta") + _file_delete("model.ckpt.index") + _file_delete("model.ckpt.data-00000-of-00001") + _file_delete("model.ckpt-100.meta") + _file_delete("model.ckpt-100.index") + _file_delete("model.ckpt-100.data-00000-of-00001") + _file_delete("model-compression/checkpoint") + _file_delete("model-compression/model.ckpt.meta") + _file_delete("model-compression/model.ckpt.index") + _file_delete("model-compression/model.ckpt.data-00000-of-00001") + _file_delete("model-compression") + _file_delete("input_v2_compat.json") + _file_delete("lcurve.out") + + def test_attrs(self): + self.assertEqual(self.dp_original.get_ntypes(), 2) + self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places) + self.assertEqual(self.dp_original.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_original.get_dim_fparam(), 0) + self.assertEqual(self.dp_original.get_dim_aparam(), 0) + + self.assertEqual(self.dp_compressed.get_ntypes(), 2) + self.assertAlmostEqual( + self.dp_compressed.get_rcut(), 6.0, places=default_places + ) + self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"]) + self.assertEqual(self.dp_compressed.get_dim_fparam(), 0) + self.assertEqual(self.dp_compressed.get_dim_aparam(), 0) + + def test_1frame(self): + ee0, ff0, vv0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=False + ) + ee1, ff1, vv1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=False + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_1frame_atm(self): + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + self.coords, self.box, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + self.coords, self.box, self.atype, atomic=True + ) + # check shape of the returns + nframes = 1 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) + + def test_2frame_atm(self): + coords2 = np.concatenate((self.coords, self.coords)) + box2 = np.concatenate((self.box, self.box)) + ee0, ff0, vv0, ae0, av0 = self.dp_original.eval( + coords2, box2, self.atype, atomic=True + ) + ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval( + coords2, box2, self.atype, atomic=True + ) + # check shape of the returns + nframes = 2 + natoms = len(self.atype) + self.assertEqual(ee0.shape, (nframes, 1)) + self.assertEqual(ff0.shape, (nframes, natoms, 3)) + self.assertEqual(vv0.shape, (nframes, 9)) + self.assertEqual(ae0.shape, (nframes, natoms, 1)) + self.assertEqual(av0.shape, (nframes, natoms, 9)) + self.assertEqual(ee1.shape, (nframes, 1)) + self.assertEqual(ff1.shape, (nframes, natoms, 3)) + self.assertEqual(vv1.shape, (nframes, 9)) + self.assertEqual(ae1.shape, (nframes, natoms, 1)) + self.assertEqual(av1.shape, (nframes, natoms, 9)) + + # check values + np.testing.assert_almost_equal(ff0, ff1, default_places) + np.testing.assert_almost_equal(ae0, ae1, default_places) + np.testing.assert_almost_equal(av0, av1, default_places) + np.testing.assert_almost_equal(ee0, ee1, default_places) + np.testing.assert_almost_equal(vv0, vv1, default_places) diff --git a/source/tests/test_model_compression_se_atten.py b/source/tests/test_model_compression_se_atten.py index 30d78669ce..6bab1a3881 100644 --- a/source/tests/test_model_compression_se_atten.py +++ b/source/tests/test_model_compression_se_atten.py @@ -46,10 +46,31 @@ def _subprocess_run(command): # - type embedding FP32, se_atten FP64 # - type embedding FP32, se_atten FP32 tests = [ - {"se_atten precision": "float64", "type embedding precision": "float64"}, - {"se_atten precision": "float64", "type embedding precision": "float32"}, - {"se_atten precision": "float32", "type embedding precision": "float64"}, - {"se_atten precision": "float32", "type embedding precision": "float32"}, + { + "se_atten precision": "float64", + "type embedding precision": "float64", + "smooth_type_embdding": True, + }, + { + "se_atten precision": "float64", + "type embedding precision": "float64", + "smooth_type_embdding": False, + }, + { + "se_atten precision": "float64", + "type embedding precision": "float32", + "smooth_type_embdding": True, + }, + { + "se_atten precision": "float32", + "type embedding precision": "float64", + "smooth_type_embdding": True, + }, + { + "se_atten precision": "float32", + "type embedding precision": "float32", + "smooth_type_embdding": True, + }, ] @@ -73,6 +94,9 @@ def _init_models(): jdata["model"]["descriptor"]["stripped_type_embedding"] = True jdata["model"]["descriptor"]["sel"] = 120 jdata["model"]["descriptor"]["attn_layer"] = 0 + jdata["model"]["descriptor"]["smooth_type_embdding"] = tests[i][ + "smooth_type_embdding" + ] jdata["model"]["type_embedding"] = {} jdata["model"]["type_embedding"]["precision"] = tests[i][ "type embedding precision" @@ -479,9 +503,15 @@ def test_1frame(self): self.assertEqual(ff1.shape, (nframes, natoms, 3)) self.assertEqual(vv1.shape, (nframes, 9)) # check values - np.testing.assert_almost_equal(ff0, ff1, default_places) - np.testing.assert_almost_equal(ee0, ee1, default_places) - np.testing.assert_almost_equal(vv0, vv1, default_places) + np.testing.assert_almost_equal( + ff0, ff1, default_places, err_msg=str(tests[i]) + ) + np.testing.assert_almost_equal( + ee0, ee1, default_places, err_msg=str(tests[i]) + ) + np.testing.assert_almost_equal( + vv0, vv1, default_places, err_msg=str(tests[i]) + ) def test_1frame_atm(self): for i in range(len(tests)): diff --git a/source/tests/test_model_se_a_ebd_v2.py b/source/tests/test_model_se_a_ebd_v2.py new file mode 100644 index 0000000000..71860890ce --- /dev/null +++ b/source/tests/test_model_se_a_ebd_v2.py @@ -0,0 +1,184 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np +from common import ( + DataSystem, + gen_data, + j_loader, +) + +from deepmd.common import ( + j_must_have, +) +from deepmd.descriptor.se_a_ebd_v2 import ( + DescrptSeAEbdV2, +) +from deepmd.env import ( + tf, +) +from deepmd.fit import ( + EnerFitting, +) +from deepmd.model import ( + EnerModel, +) +from deepmd.utils.type_embed import ( + TypeEmbedNet, +) + +GLOBAL_ENER_FLOAT_PRECISION = tf.float64 +GLOBAL_TF_FLOAT_PRECISION = tf.float64 +GLOBAL_NP_FLOAT_PRECISION = np.float64 + + +class TestModel(tf.test.TestCase): + def setUp(self): + gen_data() + + def test_model(self): + jfile = "water_se_a_ebd.json" + jdata = j_loader(jfile) + + systems = j_must_have(jdata, "systems") + set_pfx = j_must_have(jdata, "set_prefix") + batch_size = j_must_have(jdata, "batch_size") + test_size = j_must_have(jdata, "numb_test") + batch_size = 1 + test_size = 1 + stop_batch = j_must_have(jdata, "stop_batch") + rcut = j_must_have(jdata["model"]["descriptor"], "rcut") + + data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None) + + test_data = data.get_test() + numb_test = 1 + + jdata["model"]["descriptor"].pop("type", None) + jdata["model"]["type_embedding"] = {} + jdata["model"]["type_embedding"]["neuron"] = [1] + jdata["model"]["type_embedding"]["resnet_dt"] = False + jdata["model"]["type_embedding"]["seed"] = 1 + typeebd_param = jdata["model"]["type_embedding"] + typeebd = TypeEmbedNet( + neuron=typeebd_param["neuron"], + activation_function=None, + resnet_dt=typeebd_param["resnet_dt"], + seed=typeebd_param["seed"], + uniform_seed=True, + padding=True, + ) + descrpt = DescrptSeAEbdV2( + **jdata["model"]["descriptor"], + ) + jdata["model"]["fitting_net"]["descrpt"] = descrpt + fitting = EnerFitting( + **jdata["model"]["fitting_net"], + ) + # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt) + model = EnerModel(descrpt, fitting, typeebd) + + # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']]) + input_data = { + "coord": [test_data["coord"]], + "box": [test_data["box"]], + "type": [test_data["type"]], + "natoms_vec": [test_data["natoms_vec"]], + "default_mesh": [test_data["default_mesh"]], + } + model._compute_input_stat(input_data) + model.descrpt.bias_atom_e = data.compute_energy_shift() + + t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c") + t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy") + t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force") + t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial") + t_atom_ener = tf.placeholder( + GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener" + ) + t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord") + t_type = tf.placeholder(tf.int32, [None], name="i_type") + t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms") + t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box") + t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh") + is_training = tf.placeholder(tf.bool) + t_fparam = None + + model_pred = model.build( + t_coord, + t_type, + t_natoms, + t_box, + t_mesh, + t_fparam, + suffix="se_a_ebd_v2", + reuse=False, + ) + energy = model_pred["energy"] + force = model_pred["force"] + virial = model_pred["virial"] + atom_ener = model_pred["atom_ener"] + + feed_dict_test = { + t_prop_c: test_data["prop_c"], + t_energy: test_data["energy"][:numb_test], + t_force: np.reshape(test_data["force"][:numb_test, :], [-1]), + t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]), + t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]), + t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]), + t_box: test_data["box"][:numb_test, :], + t_type: np.reshape(test_data["type"][:numb_test, :], [-1]), + t_natoms: test_data["natoms_vec"], + t_mesh: test_data["default_mesh"], + is_training: False, + } + + sess = self.cached_session().__enter__() + sess.run(tf.global_variables_initializer()) + [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test) + + e = e.reshape([-1]) + f = f.reshape([-1]) + v = v.reshape([-1]) + + refe = [5.435394596262052014e-01] + reff = [ + 6.583728125594628944e-02, + 7.228993116083935744e-02, + 1.971543579114074483e-03, + 6.567474563776359853e-02, + 7.809421727465599983e-02, + -4.866958849094786890e-03, + -8.670511901715304004e-02, + 3.525374157021862048e-02, + 1.415748959800727487e-03, + 6.375813001810648473e-02, + -1.139053242798149790e-01, + -4.178593754384440744e-03, + -1.471737787218250215e-01, + 4.189712704724830872e-02, + 7.011731363309440038e-03, + 3.860874082716164030e-02, + -1.136296927731473005e-01, + -1.353471298745012206e-03, + ] + refv = [ + -4.243979601186427253e-01, + 1.097173849143971286e-01, + 1.227299373463585502e-02, + 1.097173849143970314e-01, + -2.462891443164323124e-01, + -5.711664180530139426e-03, + 1.227299373463585502e-02, + -5.711664180530143763e-03, + -6.217348853341628408e-04, + ] + refe = np.reshape(refe, [-1]) + reff = np.reshape(reff, [-1]) + refv = np.reshape(refv, [-1]) + + places = 6 + for ii in range(e.size): + self.assertAlmostEqual(e[ii], refe[ii], places=places) + for ii in range(f.size): + self.assertAlmostEqual(f[ii], reff[ii], places=places) + for ii in range(v.size): + self.assertAlmostEqual(v[ii], refv[ii], places=places) diff --git a/source/tests/test_model_se_atten.py b/source/tests/test_model_se_atten.py index 445959ceb2..5417201a9f 100644 --- a/source/tests/test_model_se_atten.py +++ b/source/tests/test_model_se_atten.py @@ -5,6 +5,8 @@ import numpy as np from common import ( DataSystem, + check_smooth_efv, + finite_difference_fv, gen_data, j_loader, ) @@ -726,3 +728,149 @@ def test_stripped_type_embedding_exclude_types(self): np.testing.assert_almost_equal(des[:, 0:2], 0.0, 10) with self.assertRaises(AssertionError): np.testing.assert_almost_equal(des[:, 2:6], 0.0, 10) + + def test_smoothness_of_stripped_type_embedding_smooth_model(self): + """test: auto-diff, continuity of e,f,v.""" + jfile = "water_se_atten.json" + jdata = j_loader(jfile) + + systems = j_must_have(jdata, "systems") + set_pfx = j_must_have(jdata, "set_prefix") + batch_size = j_must_have(jdata, "batch_size") + test_size = j_must_have(jdata, "numb_test") + batch_size = 1 + test_size = 1 + stop_batch = j_must_have(jdata, "stop_batch") + rcut = j_must_have(jdata["model"]["descriptor"], "rcut") + + data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None) + + test_data = data.get_test() + numb_test = 1 + + jdata["model"]["descriptor"].pop("type", None) + jdata["model"]["descriptor"]["ntypes"] = 2 + jdata["model"]["descriptor"]["stripped_type_embedding"] = True + jdata["model"]["descriptor"]["smooth_type_embdding"] = True + jdata["model"]["descriptor"]["attn_layer"] = 1 + jdata["model"]["descriptor"]["rcut"] = 6.0 + jdata["model"]["descriptor"]["rcut_smth"] = 4.0 + descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True) + jdata["model"]["fitting_net"]["descrpt"] = descrpt + fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True) + typeebd_param = jdata["model"]["type_embedding"] + typeebd = TypeEmbedNet( + neuron=typeebd_param["neuron"], + activation_function=None, + resnet_dt=typeebd_param["resnet_dt"], + seed=typeebd_param["seed"], + uniform_seed=True, + padding=True, + ) + model = EnerModel(descrpt, fitting, typeebd) + + # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']]) + input_data = { + "coord": [test_data["coord"]], + "box": [test_data["box"]], + "type": [test_data["type"]], + "natoms_vec": [test_data["natoms_vec"]], + "default_mesh": [test_data["default_mesh"]], + } + model._compute_input_stat(input_data) + model.descrpt.bias_atom_e = data.compute_energy_shift() + + t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c") + t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy") + t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force") + t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial") + t_atom_ener = tf.placeholder( + GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener" + ) + t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord") + t_type = tf.placeholder(tf.int32, [None], name="i_type") + t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms") + t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box") + t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh") + is_training = tf.placeholder(tf.bool) + t_fparam = None + inputs_dict = {} + + model_pred = model.build( + t_coord, + t_type, + t_natoms, + t_box, + t_mesh, + inputs_dict, + suffix=self.filename + + "-" + + inspect.stack()[0][3] + + "test_model_se_atten_model_compressible", + reuse=False, + ) + energy = model_pred["energy"] + force = model_pred["force"] + virial = model_pred["virial"] + atom_ener = model_pred["atom_ener"] + + feed_dict_test = { + t_prop_c: test_data["prop_c"], + t_energy: test_data["energy"][:numb_test], + t_force: np.reshape(test_data["force"][:numb_test, :], [-1]), + t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]), + t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]), + t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]), + t_box: test_data["box"][:numb_test, :], + t_type: np.reshape(test_data["type"][:numb_test, :], [-1]), + t_natoms: test_data["natoms_vec"], + t_mesh: test_data["default_mesh"], + is_training: False, + } + sess = self.cached_session().__enter__() + sess.run(tf.global_variables_initializer()) + [pe, pf, pv] = sess.run([energy, force, virial], feed_dict=feed_dict_test) + pf, pv = pf.reshape(-1), pv.reshape(-1) + + eps = 1e-4 + delta = 1e-5 + fdf, fdv = finite_difference_fv( + sess, energy, feed_dict_test, t_coord, t_box, delta=eps + ) + np.testing.assert_allclose(pf, fdf, delta) + np.testing.assert_allclose(pv, fdv, delta) + + tested_eps = [1e-3, 1e-4, 1e-5, 1e-6, 1e-7] + for eps in tested_eps: + deltae = eps + deltad = eps + de, df, dv = check_smooth_efv( + sess, + energy, + force, + virial, + feed_dict_test, + t_coord, + jdata["model"]["descriptor"]["rcut"], + delta=eps, + ) + np.testing.assert_allclose(de[0], de[1], rtol=0, atol=deltae) + np.testing.assert_allclose(df[0], df[1], rtol=0, atol=deltad) + np.testing.assert_allclose(dv[0], dv[1], rtol=0, atol=deltad) + + for eps in tested_eps: + deltae = 5.0 * eps + deltad = 5.0 * eps + de, df, dv = check_smooth_efv( + sess, + energy, + force, + virial, + feed_dict_test, + t_coord, + jdata["model"]["descriptor"]["rcut_smth"], + delta=eps, + ) + np.testing.assert_allclose(de[0], de[1], rtol=0, atol=deltae) + np.testing.assert_allclose(df[0], df[1], rtol=0, atol=deltad) + np.testing.assert_allclose(dv[0], dv[1], rtol=0, atol=deltad) diff --git a/source/tests/test_train.py b/source/tests/test_train.py index 3d190ba716..145457260f 100644 --- a/source/tests/test_train.py +++ b/source/tests/test_train.py @@ -174,6 +174,50 @@ def test_skip_loc_frame(self): jdata = update_sel(jdata) self.assertEqual(jdata, expected_out) + def test_skip_frozen(self): + jdata = { + "model": { + "type": "frozen", + } + } + expected_out = jdata.copy() + jdata = update_sel(jdata) + self.assertEqual(jdata, expected_out) + + def test_skip_linear_frozen(self): + jdata = { + "model": { + "type": "linear_ener", + "models": [ + {"type": "frozen"}, + {"type": "frozen"}, + {"type": "frozen"}, + {"type": "frozen"}, + ], + } + } + expected_out = jdata.copy() + jdata = update_sel(jdata) + self.assertEqual(jdata, expected_out) + + @patch("deepmd.entrypoints.train.get_min_nbor_dist") + def test_pairwise_dprc(self, sel_mock): + sel_mock.return_value = 0.5 + jdata = { + "model": { + "type": "pairwise_dprc", + "models": [ + {"type": "frozen"}, + {"type": "frozen"}, + {"type": "frozen"}, + {"type": "frozen"}, + ], + } + } + expected_out = jdata.copy() + jdata = update_sel(jdata) + self.assertEqual(jdata, expected_out) + def test_wrap_up_4(self): self.assertEqual(wrap_up_4(12), 3 * 4) self.assertEqual(wrap_up_4(13), 4 * 4)