diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 2f8eada12b..8c3647a4f4 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -225,7 +225,7 @@ jobs: shell: bash -l {0} run: | cd ${{ env.EXAMPLES_PATH }} - for script in $(find . \( -not -name "_*" -not -name "side-by-side*" -not -name "vectorize.py" -not -name "scan.py" -and -name "*.py" \)) + for script in $(find . \( -not -name "_*" -not -name "side-by-side*" -not -name "scan.py" -and -name "*.py" \)) do echo "Executing ${script}" python ${script} || exit 1 diff --git a/numba_dpex/__init__.py b/numba_dpex/__init__.py index d754de09b9..6ed2e55e34 100644 --- a/numba_dpex/__init__.py +++ b/numba_dpex/__init__.py @@ -15,10 +15,8 @@ import dpctl import llvmlite.binding as ll from numba import __version__ as numba_version -from numba.np.ufunc.decorators import Vectorize from numba_dpex.core.kernel_interface.launcher import call_kernel -from numba_dpex.vectorizers import Vectorize as DpexVectorize from .numba_patches import patch_arrayexpr_tree_to_ir, patch_is_ufunc @@ -135,8 +133,6 @@ def parse_sem_version(version_string: str) -> Tuple[int, int, int]: del load_dpctl_sycl_interface -Vectorize.target_registry.ondemand["dpex"] = lambda: DpexVectorize - from numba_dpex._version import get_versions # noqa E402 __version__ = get_versions()["version"] diff --git a/numba_dpex/examples/vectorize.py b/numba_dpex/examples/vectorize.py deleted file mode 100644 index 6cc3aa8db9..0000000000 --- a/numba_dpex/examples/vectorize.py +++ /dev/null @@ -1,66 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import dpctl -import numpy as np -from numba import float64, vectorize - - -@vectorize(nopython=True) -def ufunc_kernel(x, y): - return x + y - - -def get_device(): - device = None - try: - device = dpctl.select_gpu_device() - except: - try: - device = dpctl.select_cpu_device() - except: - raise RuntimeError("No device found") - return device - - -def test_njit(): - N = 10 - dtype = np.float64 - - A = np.arange(N, dtype=dtype) - B = np.arange(N, dtype=dtype) * 10 - - # Use the environment variable SYCL_DEVICE_FILTER to change the default device. - # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. - device = dpctl.select_default_device() - print("Using device ...") - device.print_device_info() - - with dpctl.device_context(device): - C = ufunc_kernel(A, B) - - print(C) - - print("Done...") - - -@vectorize([float64(float64, float64)], target="dpex") -def vector_add(a, b): - return a + b - - -def test_vectorize(): - A = np.arange(10, dtype=np.float64).reshape((5, 2)) - B = np.arange(10, dtype=np.float64).reshape((5, 2)) - - device = dpctl.select_default_device() - with dpctl.device_context(device): - C = vector_add(A, B) - - print(C) - - -if __name__ == "__main__": - test_njit() - test_vectorize() diff --git a/numba_dpex/tests/test_vectorize.py b/numba_dpex/tests/test_vectorize.py deleted file mode 100644 index ec25ce9864..0000000000 --- a/numba_dpex/tests/test_vectorize.py +++ /dev/null @@ -1,65 +0,0 @@ -#! /usr/bin/env python - -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import numpy as np -import pytest -from numba import float32, float64, int32, int64, vectorize - -list_of_shape = [ - (100, 100), - (100, (10, 10)), - (100, (2, 5, 10)), -] - - -@pytest.fixture(params=list_of_shape) -def shape(request): - return request.param - - -list_of_dtype = [ - (np.int32, int32), - (np.float32, float32), - (np.int64, int64), - (np.float64, float64), -] - - -@pytest.fixture(params=list_of_dtype) -def dtypes(request): - return request.param - - -list_of_input_type = ["array", "scalar"] - - -@pytest.fixture(params=list_of_input_type) -def input_type(request): - return request.param - - -@pytest.mark.xfail -def test_vectorize(shape, dtypes, input_type): - def vector_add(a, b): - return a + b - - dtype, sig_dtype = dtypes - sig = [sig_dtype(sig_dtype, sig_dtype)] - size, shape = shape - - if input_type == "array": - A = np.arange(size, dtype=dtype).reshape(shape) - B = np.arange(size, dtype=dtype).reshape(shape) - elif input_type == "scalar": - A = dtype(1.2) - B = dtype(2.3) - - f = vectorize(sig, target="dpex")(vector_add) - expected = f(A, B) - actual = vector_add(A, B) - - max_abs_err = np.sum(expected) - np.sum(actual) - assert max_abs_err < 1e-5 diff --git a/numba_dpex/vectorizers.py b/numba_dpex/vectorizers.py deleted file mode 100644 index 819adeef3d..0000000000 --- a/numba_dpex/vectorizers.py +++ /dev/null @@ -1,207 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -"""Provide a Dpex target for Numba's ``vectorize`` decorator.""" - -import warnings - -import dpctl -import numpy as np -from numba.np.ufunc import deviceufunc - -import numba_dpex as dpex -from numba_dpex.utils import ( - as_usm_obj, - copy_to_numpy_from_usm_obj, - has_usm_memory, -) - -vectorizer_stager_source = """ -def __vectorized_{name}({args}, __out__): - __tid__ = __dpex__.get_global_id(0) - if __tid__ < __out__.shape[0]: - __out__[__tid__] = __core__({argitems}) -""" - - -class Vectorize(deviceufunc.DeviceVectorize): - def _compile_core(self, sig): - devfn = dpex.func(sig)(self.pyfunc) - return devfn, devfn.cres.signature.return_type - - def _get_globals(self, corefn): - glbl = self.pyfunc.__globals__.copy() - glbl.update({"__dpex__": dpex, "__core__": corefn}) - return glbl - - def _compile_kernel(self, fnobj, sig): - return dpex.kernel(sig)(fnobj) - - def build_ufunc(self): - return UFuncDispatcher(self.kernelmap) - - @property - def _kernel_template(self): - return vectorizer_stager_source - - -class UFuncDispatcher(object): - """ - Invoke the Dpex ufunc specialization for the given inputs. - """ - - def __init__(self, types_to_retty_kernels): - self.functions = types_to_retty_kernels - - def __call__(self, *args, **kws): - """ - Call the kernel launching mechanism - Args: - *args (np.ndarray): NumPy arrays - **kws (optional): - queue (dpctl._sycl_queue.SyclQueue): SYCL queue. - out (np.ndarray): Output array. - """ - return UFuncMechanism.call(self.functions, args, kws) - - def reduce(self, arg, queue=0): - raise NotImplementedError - - -class UFuncMechanism(deviceufunc.UFuncMechanism): - """ - Mechanism to process Input to a SYCL kernel and launch that kernel - """ - - @classmethod - def call(cls, typemap, args, kws): - """ - Perform the entire ufunc call mechanism. - - Args: - typemap (dict): Signature mapped to kernel. - args: Arguments to the @vectorize function. - kws (optional): Optional keywords. Not supported. - - """ - # Handle keywords - queue = dpctl.get_current_queue() - out = kws.pop("out", None) - - if kws: - warnings.warn("unrecognized keywords: %s" % ", ".join(kws)) - - # Begin call resolution - cr = cls(typemap, args) - args = cr.get_arguments() - resty, func = cr.get_function() - - outshape = args[0].shape - - # Adjust output value - if out is not None and cr.is_device_array(out): - out = cr.as_device_array(out) - - def attempt_ravel(a): - if cr.SUPPORT_DEVICE_SLICING: - raise NotImplementedError - - try: - # Call the `.ravel()` method - return a.ravel() - except NotImplementedError: - # If it is not a device array - if not cr.is_device_array(a): - raise - # For device array, retry ravel on the host by first - # copying it back. - else: - hostary = cr.to_host(a, queue).ravel() - return cr.to_device(hostary, queue) - - if args[0].ndim > 1: - args = [attempt_ravel(a) for a in args] - - # Prepare argument on the device - devarys = [] - any_device = True - for a in args: - if cr.is_device_array(a): - devarys.append(a) - else: - dev_a = cr.to_device(a, queue=queue) - devarys.append(dev_a) - - # Launch - shape = args[0].shape - if out is None: - # No output is provided - devout = cr.device_array(shape, resty, queue=queue) - - devarys.extend([devout]) - cr.launch(func, shape[0], queue, devarys) - - if any_device: - # If any of the arguments are on device, - # Keep output on the device - return devout.reshape(outshape) - else: - # Otherwise, transfer output back to host - raise ValueError("copy_to_host() is not yet supported") - - elif cr.is_device_array(out): - # If output is provided and it is a device array, - # Return device array - if out.ndim > 1: - out = attempt_ravel(out) - devout = out - devarys.extend([devout]) - cr.launch(func, shape[0], queue, devarys) - return devout.reshape(outshape) - - else: - # If output is provided and it is a host array, - # Return host array - assert out.shape == shape - assert out.dtype == resty - devout = cr.device_array(shape, resty, queue=queue) - devarys.extend([devout]) - cr.launch(func, shape[0], queue, devarys) - return devout.reshape(outshape) - - def as_device_array(self, obj): - return obj - - def is_device_array(self, obj): - ret = has_usm_memory(obj) - return ret is not None - - def is_host_array(self, obj): - return not self.is_device_array(obj) - - def to_device(self, hostary, queue): - usm_mem = as_usm_obj(hostary, queue=queue, usm_type="shared") - usm_backed_ndary = np.ndarray( - hostary.shape, buffer=usm_mem, dtype=hostary.dtype - ) - return usm_backed_ndary - - def to_host(self, devary, queue): - hostary = np.empty(devary.shape, dtype=devary.dtype) - devary_memview = memoryview(devary) - devary_memview = devary_memview.cast("B") - copy_to_numpy_from_usm_obj(devary_memview, hostary) - - def launch(self, func, count, queue, args): - func[dpex.Range(count)](*args) - - def device_array(self, shape, dtype, queue): - size = np.prod(shape) - itemsize = dtype.itemsize - usm_mem = dpctl.memory.MemoryUSMShared(size * itemsize, queue=queue) - usm_backed_ndary = np.ndarray(shape, buffer=usm_mem, dtype=dtype) - return usm_backed_ndary - - def broadcast_device(self, ary, shape): - raise NotImplementedError("device broadcast_device NIY") diff --git a/scripts/run_examples.sh b/scripts/run_examples.sh index b4dff888f4..9255628863 100755 --- a/scripts/run_examples.sh +++ b/scripts/run_examples.sh @@ -25,7 +25,6 @@ run_checks() { check numba_dpex/examples/sum_reduction.py check numba_dpex/examples/sum_reduction_recursive_ocl.py # check numba_dpex/examples/usm_ndarray.py # See https://github.com/IntelPython/numba-dpex/issues/436 - check numba_dpex/examples/vectorize.py check numba_dpex/examples/auto_offload_examples/sum-1d.py check numba_dpex/examples/auto_offload_examples/sum-2d.py