From 28d779ffc83a565f2590a737d95b178606f424b5 Mon Sep 17 00:00:00 2001 From: Adarsh Yoga Date: Wed, 24 Apr 2024 05:18:56 +0000 Subject: [PATCH 1/4] change private arrays to use new dpex api --- dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py | 8 ++++---- dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py | 4 ++-- dpbench/benchmarks/default/knn/knn_numba_dpex_k.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py b/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py index 52034cef..f5a72b00 100644 --- a/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py +++ b/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py @@ -38,9 +38,9 @@ def count_weighted_pairs_3d_intel_no_slm_ker( n_wi = 32 - dsq_mat = dpex.private.array(shape=(32 * 32), dtype=dtype) - w0_vec = dpex.private.array(shape=(32), dtype=dtype) - w1_vec = dpex.private.array(shape=(32), dtype=dtype) + dsq_mat = kapi.PrivateArray(shape=(32 * 32), dtype=dtype) + w0_vec = kapi.PrivateArray(shape=(32), dtype=dtype) + w1_vec = kapi.PrivateArray(shape=(32), dtype=dtype) offset0 = gr0 * n_wi * lws0 + lid0 offset1 = gr1 * n_wi * lws1 + lid1 @@ -80,7 +80,7 @@ def count_weighted_pairs_3d_intel_no_slm_ker( # update slm_hist. Use work-item private buffer of 16 tfloat elements for k in range(0, slm_hist_size, private_hist_size): - private_hist = dpex.private.array(shape=(32), dtype=dtype) + private_hist = kapi.PrivateArray(shape=(32), dtype=dtype) for p in range(private_hist_size): private_hist[p] = 0.0 diff --git a/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py b/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py index 6bac670c..c89b6b8d 100644 --- a/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py +++ b/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py @@ -56,7 +56,7 @@ def groupByCluster( for i in range(WorkPI): point_id = grid * WorkPI * local_size + i * local_size + lid if point_id < numpoints: - localP = dpex.private.array(dims, dtyp) + localP = kapi.PrivateArray(dims, dtyp) for d in range(dims): localP[d] = arrayP[point_id, d] @@ -179,7 +179,7 @@ def updateLabels( for i in range(WorkPI): point_id = grid * WorkPI * local_size + i * local_size + lid if point_id < numpoints: - localP = dpex.private.array(dims, dtyp) + localP = kapi.PrivateArray(dims, dtyp) for d in range(dims): localP[d] = arrayP[point_id, d] diff --git a/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py b/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py index 8771efa6..4ec82875 100644 --- a/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py +++ b/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py @@ -25,7 +25,7 @@ def _knn_kernel( # noqa: C901: TODO: can we simplify logic? dtype = train.dtype i = item.get_id(0) # here k has to be 5 in order to match with numpy - queue_neighbors = dpex.private.array(shape=(5, 2), dtype=dtype) + queue_neighbors = kapi.PrivateArray(shape=(5, 2), dtype=dtype) for j in range(k): x1 = train[j] From 36e3c36df3973a9515b5ed6686cae55dcdd44a59 Mon Sep 17 00:00:00 2001 From: Adarsh Yoga Date: Wed, 24 Apr 2024 05:19:40 +0000 Subject: [PATCH 2/4] change dpctl.asarray calls in dpcpp framework --- dpbench/infrastructure/frameworks/dpcpp_framework.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpbench/infrastructure/frameworks/dpcpp_framework.py b/dpbench/infrastructure/frameworks/dpcpp_framework.py index 21b4c336..7c47fd12 100644 --- a/dpbench/infrastructure/frameworks/dpcpp_framework.py +++ b/dpbench/infrastructure/frameworks/dpcpp_framework.py @@ -59,7 +59,7 @@ def _copy_to_func_impl(ref_array): else: order = "K" return dpt.asarray( - obj=ref_array, + ref_array, dtype=ref_array.dtype, device=self.sycl_device, copy=None, From 45cb63fe9705c1885d166ef9fe1b30fc26a36729 Mon Sep 17 00:00:00 2001 From: Adarsh Yoga Date: Wed, 24 Apr 2024 05:20:50 +0000 Subject: [PATCH 3/4] fix version pinning in yml file --- conda-recipe/meta.yaml | 2 +- dpbench/configs/framework_info/dpcpp.toml | 2 +- environments/conda-linux-sycl.yml | 5 ++--- environments/conda-win-sycl.yml | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index cb2ff422..b1e19f30 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -17,7 +17,7 @@ build: requirements: build: - {{ compiler('cxx') }} - - {{ compiler('dpcpp') }} ==2024.0.0 # [not osx] + - {{ compiler('dpcpp') }} - sysroot_linux-64 >=2.28 # [linux] host: - python diff --git a/dpbench/configs/framework_info/dpcpp.toml b/dpbench/configs/framework_info/dpcpp.toml index 77ce097d..c1655e91 100644 --- a/dpbench/configs/framework_info/dpcpp.toml +++ b/dpbench/configs/framework_info/dpcpp.toml @@ -10,7 +10,7 @@ postfix = "dpcpp" class = "DpcppFramework" arch = "cpu" sycl_device = "cpu" -dpcpp_version = "IntelLLVM 2024.0.0" +dpcpp_version = "IntelLLVM 2024.1.0" [[framework.postfixes]] impl_postfix = "sycl" diff --git a/environments/conda-linux-sycl.yml b/environments/conda-linux-sycl.yml index e51648a3..d0b4113a 100644 --- a/environments/conda-linux-sycl.yml +++ b/environments/conda-linux-sycl.yml @@ -23,9 +23,8 @@ dependencies: - dpctl - dpnp - numba-dpex - # TODO: fix issues on conda-forge build - - intel::dpcpp_linux-64==2024.0.0 - - intel::dpcpp-cpp-rt==2024.0.0 + - intel::dpcpp_linux-64 + - intel::dpcpp-cpp-rt - cython - cmake - ninja diff --git a/environments/conda-win-sycl.yml b/environments/conda-win-sycl.yml index 89ad4c9b..d3160f22 100644 --- a/environments/conda-win-sycl.yml +++ b/environments/conda-win-sycl.yml @@ -25,7 +25,7 @@ dependencies: - numba-dpex # TODO: switch to conda-forge, but it results in broken OpenCL rt (see below) # - conda-forge::dpcpp_win-64 - - intel::dpcpp_win-64==2024.0.0 + - intel::dpcpp_win-64 # fixing cmake version here, because we need to apply patch for IntelLLVM - cmake==3.26* - cython From 35ee147deb8a7ce7cb5651f5cffb93e2738d280b Mon Sep 17 00:00:00 2001 From: Adarsh Yoga Date: Wed, 24 Apr 2024 05:21:19 +0000 Subject: [PATCH 4/4] fix knn failure on windows due to mismatch in types and precision --- dpbench/benchmarks/default/knn/knn_initialize.py | 4 +++- .../knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp | 12 ++++++------ .../knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dpbench/benchmarks/default/knn/knn_initialize.py b/dpbench/benchmarks/default/knn/knn_initialize.py index 720b4348..bcab0280 100644 --- a/dpbench/benchmarks/default/knn/knn_initialize.py +++ b/dpbench/benchmarks/default/knn/knn_initialize.py @@ -24,7 +24,9 @@ def _gen_data_x(ip_size, data_dim, seed, dtype): def _gen_data_y(ip_size, classes_num, seed): default_rng.seed(seed) - data = default_rng.randint(classes_num, size=ip_size) + data = default_rng.randint( + classes_num, size=ip_size, dtype=types_dict["int"] + ) return data def _gen_train_data(train_size, data_dim, classes_num, seed_train, dtype): diff --git a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp index ac1bcbf3..cd9e2623 100644 --- a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp +++ b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp @@ -7,16 +7,16 @@ template class theKernel; -template struct neighbors +template struct neighbors { FpTy dist; - size_t label; + IntTy label; }; template sycl::event knn_impl(sycl::queue q, FpTy *d_train, - size_t *d_train_labels, + IntTy *d_train_labels, FpTy *d_test, size_t k, size_t classes_num, @@ -33,7 +33,7 @@ sycl::event knn_impl(sycl::queue q, // here k has to be 5 in order to match with numpy no. of // neighbors - struct neighbors queue_neighbors[5]; + struct neighbors queue_neighbors[5]; // count distances for (size_t j = 0; j < k; ++j) { @@ -54,7 +54,7 @@ sycl::event knn_impl(sycl::queue q, for (size_t j = 0; j < k; ++j) { // push queue FpTy new_distance = queue_neighbors[j].dist; - FpTy new_neighbor_label = queue_neighbors[j].label; + IntTy new_neighbor_label = queue_neighbors[j].label; size_t index = j; while (index > 0 && new_distance < queue_neighbors[index - 1].dist) @@ -83,7 +83,7 @@ sycl::event knn_impl(sycl::queue q, // push queue FpTy new_distance = queue_neighbors[k - 1].dist; - FpTy new_neighbor_label = queue_neighbors[k - 1].label; + IntTy new_neighbor_label = queue_neighbors[k - 1].label; size_t index = k - 1; while (index > 0 && diff --git a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp index 97938bb0..43f2b1b9 100644 --- a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp +++ b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp @@ -43,7 +43,7 @@ void knn_sync(dpctl::tensor::usm_ndarray x_train, if (typenum == UAR_FLOAT) { sycl::event res_ev = knn_impl( x_train.get_queue(), x_train.get_data(), - y_train.get_data(), x_test.get_data(), k, + y_train.get_data(), x_test.get_data(), k, classes_num, train_size, test_size, predictions.get_data(), votes_to_classes.get_data(), data_dim);