Merge pull request #348 from IntelPython/fix/private_arrays

Changes to private array declarations to use new API in dpex
IntelPython · Apr 25, 2024 · 4501644 · 4501644
2 parents 2fb19c0 + 35ee147
commit 4501644
Show file tree

Hide file tree

Showing 11 changed files with 23 additions and 22 deletions.
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -17,7 +17,7 @@ build:
 requirements:
   build:
     - {{ compiler('cxx') }}
-    - {{ compiler('dpcpp') }} ==2024.0.0  # [not osx]
+    - {{ compiler('dpcpp') }}
     - sysroot_linux-64 >=2.28  # [linux]
   host:
     - python

diff --git a/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py b/dpbench/benchmarks/default/gpairs/gpairs_numba_dpex_k.py
@@ -38,9 +38,9 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
 
     n_wi = 32
 
-    dsq_mat = dpex.private.array(shape=(32 * 32), dtype=dtype)
-    w0_vec = dpex.private.array(shape=(32), dtype=dtype)
-    w1_vec = dpex.private.array(shape=(32), dtype=dtype)
+    dsq_mat = kapi.PrivateArray(shape=(32 * 32), dtype=dtype)
+    w0_vec = kapi.PrivateArray(shape=(32), dtype=dtype)
+    w1_vec = kapi.PrivateArray(shape=(32), dtype=dtype)
 
     offset0 = gr0 * n_wi * lws0 + lid0
     offset1 = gr1 * n_wi * lws1 + lid1
@@ -80,7 +80,7 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
 
     # update slm_hist. Use work-item private buffer of 16 tfloat elements
     for k in range(0, slm_hist_size, private_hist_size):
-        private_hist = dpex.private.array(shape=(32), dtype=dtype)
+        private_hist = kapi.PrivateArray(shape=(32), dtype=dtype)
         for p in range(private_hist_size):
             private_hist[p] = 0.0
 

diff --git a/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py b/dpbench/benchmarks/default/kmeans/kmeans_numba_dpex_k.py
@@ -56,7 +56,7 @@ def groupByCluster(
         for i in range(WorkPI):
             point_id = grid * WorkPI * local_size + i * local_size + lid
             if point_id < numpoints:
-                localP = dpex.private.array(dims, dtyp)
+                localP = kapi.PrivateArray(dims, dtyp)
                 for d in range(dims):
                     localP[d] = arrayP[point_id, d]
 
@@ -179,7 +179,7 @@ def updateLabels(
         for i in range(WorkPI):
             point_id = grid * WorkPI * local_size + i * local_size + lid
             if point_id < numpoints:
-                localP = dpex.private.array(dims, dtyp)
+                localP = kapi.PrivateArray(dims, dtyp)
                 for d in range(dims):
                     localP[d] = arrayP[point_id, d]
 

diff --git a/dpbench/benchmarks/default/knn/knn_initialize.py b/dpbench/benchmarks/default/knn/knn_initialize.py
@@ -24,7 +24,9 @@ def _gen_data_x(ip_size, data_dim, seed, dtype):
 
     def _gen_data_y(ip_size, classes_num, seed):
         default_rng.seed(seed)
-        data = default_rng.randint(classes_num, size=ip_size)
+        data = default_rng.randint(
+            classes_num, size=ip_size, dtype=types_dict["int"]
+        )
         return data
 
     def _gen_train_data(train_size, data_dim, classes_num, seed_train, dtype):

diff --git a/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py b/dpbench/benchmarks/default/knn/knn_numba_dpex_k.py
@@ -25,7 +25,7 @@ def _knn_kernel(  # noqa: C901: TODO: can we simplify logic?
     dtype = train.dtype
     i = item.get_id(0)
     # here k has to be 5 in order to match with numpy
-    queue_neighbors = dpex.private.array(shape=(5, 2), dtype=dtype)
+    queue_neighbors = kapi.PrivateArray(shape=(5, 2), dtype=dtype)
 
     for j in range(k):
         x1 = train[j]

diff --git a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_kernel.hpp
@@ -7,16 +7,16 @@
 
 template <typename FpTy, typename IntTy> class theKernel;
 
-template <typename FpTy> struct neighbors
+template <typename FpTy, typename IntTy> struct neighbors
 {
     FpTy dist;
-    size_t label;
+    IntTy label;
 };
 
 template <typename FpTy, typename IntTy>
 sycl::event knn_impl(sycl::queue q,
                      FpTy *d_train,
-                     size_t *d_train_labels,
+                     IntTy *d_train_labels,
                      FpTy *d_test,
                      size_t k,
                      size_t classes_num,
@@ -33,7 +33,7 @@ sycl::event knn_impl(sycl::queue q,
 
                 // here k has to be 5 in order to match with numpy no. of
                 // neighbors
-                struct neighbors<FpTy> queue_neighbors[5];
+                struct neighbors<FpTy, IntTy> queue_neighbors[5];
 
                 // count distances
                 for (size_t j = 0; j < k; ++j) {
@@ -54,7 +54,7 @@ sycl::event knn_impl(sycl::queue q,
                 for (size_t j = 0; j < k; ++j) {
                     // push queue
                     FpTy new_distance = queue_neighbors[j].dist;
-                    FpTy new_neighbor_label = queue_neighbors[j].label;
+                    IntTy new_neighbor_label = queue_neighbors[j].label;
                     size_t index = j;
                     while (index > 0 &&
                            new_distance < queue_neighbors[index - 1].dist)
@@ -83,7 +83,7 @@ sycl::event knn_impl(sycl::queue q,
 
                         // push queue
                         FpTy new_distance = queue_neighbors[k - 1].dist;
-                        FpTy new_neighbor_label = queue_neighbors[k - 1].label;
+                        IntTy new_neighbor_label = queue_neighbors[k - 1].label;
                         size_t index = k - 1;
 
                         while (index > 0 &&

diff --git a/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp b/dpbench/benchmarks/default/knn/knn_sycl_native_ext/knn_sycl/_knn_sycl.cpp
@@ -43,7 +43,7 @@ void knn_sync(dpctl::tensor::usm_ndarray x_train,
     if (typenum == UAR_FLOAT) {
         sycl::event res_ev = knn_impl<float, unsigned int>(
             x_train.get_queue(), x_train.get_data<float>(),
-            y_train.get_data<size_t>(), x_test.get_data<float>(), k,
+            y_train.get_data<unsigned int>(), x_test.get_data<float>(), k,
             classes_num, train_size, test_size,
             predictions.get_data<unsigned int>(),
             votes_to_classes.get_data<float>(), data_dim);

diff --git a/dpbench/configs/framework_info/dpcpp.toml b/dpbench/configs/framework_info/dpcpp.toml
@@ -10,7 +10,7 @@ postfix = "dpcpp"
 class = "DpcppFramework"
 arch = "cpu"
 sycl_device = "cpu"
-dpcpp_version = "IntelLLVM 2024.0.0"
+dpcpp_version = "IntelLLVM 2024.1.0"
 
 [[framework.postfixes]]
 impl_postfix = "sycl"

diff --git a/dpbench/infrastructure/frameworks/dpcpp_framework.py b/dpbench/infrastructure/frameworks/dpcpp_framework.py
@@ -59,7 +59,7 @@ def _copy_to_func_impl(ref_array):
             else:
                 order = "K"
             return dpt.asarray(
-                obj=ref_array,
+                ref_array,
                 dtype=ref_array.dtype,
                 device=self.sycl_device,
                 copy=None,

diff --git a/environments/conda-linux-sycl.yml b/environments/conda-linux-sycl.yml
@@ -23,9 +23,8 @@ dependencies:
   - dpctl
   - dpnp
   - numba-dpex
-  # TODO: fix issues on conda-forge build
-  - intel::dpcpp_linux-64==2024.0.0
-  - intel::dpcpp-cpp-rt==2024.0.0
+  - intel::dpcpp_linux-64
+  - intel::dpcpp-cpp-rt
   - cython
   - cmake
   - ninja

diff --git a/environments/conda-win-sycl.yml b/environments/conda-win-sycl.yml
@@ -25,7 +25,7 @@ dependencies:
   - numba-dpex
   # TODO: switch to conda-forge, but it results in broken OpenCL rt (see below)
   # - conda-forge::dpcpp_win-64
-  - intel::dpcpp_win-64==2024.0.0
+  - intel::dpcpp_win-64
   # fixing cmake version here, because we need to apply patch for IntelLLVM
   - cmake==3.26*
   - cython