Skip to content

Commit

Permalink
Switch to experimental dpex kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
adarshyoga committed Feb 21, 2024
1 parent cb51d73 commit 54d5aab
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 75 deletions.
22 changes: 17 additions & 5 deletions dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@

from math import erf, exp, log, sqrt

import numba_dpex as dpex
import numba_dpex.experimental as dpex
from numba_dpex import kernel_api as kapi


@dpex.kernel
def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
def _black_scholes_kernel(
item: kapi.Item, nopt, price, strike, t, rate, volatility, call, put
):
dtype = price.dtype
mr = -rate
sig_sig_two = volatility * volatility * dtype.type(2)

i = dpex.get_global_id(0)
i = item.get_id(0)

P = price[i]
S = strike[i]
Expand All @@ -40,6 +43,15 @@ def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):


def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
_black_scholes_kernel[dpex.Range(nopt)](
nopt, price, strike, t, rate, volatility, call, put
dpex.call_kernel(
_black_scholes_kernel,
kapi.Range(nopt),
nopt,
price,
strike,
t,
rate,
volatility,
call,
put,
)
13 changes: 9 additions & 4 deletions dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import dpnp as np
import numba as nb
import numba_dpex as dpex
import numba_dpex.experimental as dpex
import numpy
from numba_dpex import kernel_api as kapi

NOISE = -1
UNDEFINED = -2
Expand Down Expand Up @@ -50,8 +51,10 @@ def _queue_empty(head, tail):


@dpex.kernel
def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks):
i = dpex.get_global_id(0)
def get_neighborhood(
item: kapi.Item, n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks
):
i = item.get_id(0)

start = i * block_size
stop = n if i + 1 == nblocks else start + block_size
Expand Down Expand Up @@ -130,7 +133,9 @@ def dbscan(n_samples, n_features, data, eps, min_pts):
)
sizes = np.zeros_like(data, shape=n_samples, dtype=np.int64)

get_neighborhood[dpex.Range(n_samples)](
dpex.call_kernel(
get_neighborhood,
kapi.Range(n_samples),
n_samples,
n_features,
data,
Expand Down
24 changes: 15 additions & 9 deletions dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
# SPDX-License-Identifier: Apache-2.0

import numba_dpex as dpex
import numba_dpex.experimental as dpexexp
from numba_dpex import kernel_api as kapi

# This implementation is numba dpex kernel version with atomics.


@dpex.kernel
@dpexexp.kernel
def count_weighted_pairs_3d_intel_no_slm_ker(
nd_item: kapi.NdItem,
n,
nbins,
slm_hist_size,
Expand All @@ -25,14 +28,14 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
result,
):
dtype = x0.dtype
lid0 = dpex.get_local_id(0)
gr0 = dpex.get_group_id(0)
lid0 = nd_item.get_local_id(0)
gr0 = nd_item.get_group().get_group_id(0)

lid1 = dpex.get_local_id(1)
gr1 = dpex.get_group_id(1)
lid1 = nd_item.get_local_id(1)
gr1 = nd_item.get_group().get_group_id(1)

lws0 = dpex.get_local_size(0)
lws1 = dpex.get_local_size(1)
lws0 = nd_item.get_local_range(0)
lws1 = nd_item.get_local_range(1)

n_wi = 20

Expand Down Expand Up @@ -107,7 +110,8 @@ def count_weighted_pairs_3d_intel_no_slm_ker(

pk = k
for p in range(private_hist_size):
dpex.atomic.add(result, pk, private_hist[p])
result_aref = kapi.AtomicRef(result, index=pk)
result_aref.fetch_add(private_hist[p])
pk += 1


Expand Down Expand Up @@ -147,7 +151,9 @@ def gpairs(
ceiling_quotient(nbins, private_hist_size) * private_hist_size
)

count_weighted_pairs_3d_intel_no_slm_ker[dpex.NdRange(gwsRange, lwsRange)](
dpexexp.call_kernel(
count_weighted_pairs_3d_intel_no_slm_ker,
kapi.NdRange(dpex.Range(*gwsRange), dpex.Range(*lwsRange)),
nopt,
nbins,
slm_hist_size,
Expand Down
108 changes: 73 additions & 35 deletions dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from math import sqrt

import numba_dpex as dpex
import numpy
import numba_dpex.experimental as dpexexp
from dpctl import tensor as dpt
from numba_dpex import NdRange
from numba_dpex import kernel_api as kapi


def DivUp(numerator, denominator):
Expand All @@ -25,9 +25,15 @@ def getGroupByCluster( # noqa: C901
):
local_copies = min(4, max(1, DivUp(local_size_, num_centroids)))

@dpex.kernel
@dpexexp.kernel
def groupByCluster(
arrayP, arrayPcluster, arrayC, NewCentroids, NewCount, last
nd_item: kapi.NdItem,
arrayP,
arrayPcluster,
arrayC,
NewCentroids,
NewCount,
last,
):
numpoints = arrayP.shape[0]
localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp)
Expand All @@ -38,9 +44,9 @@ def groupByCluster(
(local_copies, num_centroids), dtype=dpt.int32
)

grid = dpex.get_group_id(0)
lid = dpex.get_local_id(0)
local_size = dpex.get_local_size(0)
grid = nd_item.get_group().get_group_id(0)
lid = nd_item.get_local_id(0)
local_size = nd_item.get_local_range(0)

for i in range(lid, num_centroids * dims, local_size):
localCentroids[i % dims, i // dims] = arrayC[i // dims, i % dims]
Expand All @@ -51,7 +57,7 @@ def groupByCluster(
for lc in range(local_copies):
localNewCount[lc, c] = 0

dpex.barrier(dpex.LOCAL_MEM_FENCE)
kapi.group_barrier(nd_item.get_group())

for i in range(WorkPI):
point_id = grid * WorkPI * local_size + i * local_size + lid
Expand All @@ -73,44 +79,59 @@ def groupByCluster(

lc = lid % local_copies
for d in range(dims):
dpex.atomic.add(
localNewCentroids, (lc, d, nearest_centroid), localP[d]
localNewCentroids_aref = kapi.AtomicRef(
localNewCentroids,
index=(lc, d, nearest_centroid),
address_space=kapi.AddressSpace.LOCAL,
)
localNewCentroids_aref.fetch_add(localP[d])

dpex.atomic.add(localNewCount, (lc, nearest_centroid), 1)
localNewCount_aref = kapi.AtomicRef(
localNewCount,
index=(lc, nearest_centroid),
address_space=kapi.AddressSpace.LOCAL,
)
localNewCount_aref.fetch_add(1)

if last:
arrayPcluster[point_id] = nearest_centroid

dpex.barrier(dpex.LOCAL_MEM_FENCE)
kapi.group_barrier(nd_item.get_group())

for i in range(lid, num_centroids * dims, local_size):
local_centroid_d = dtyp.type(0)
for lc in range(local_copies):
local_centroid_d += localNewCentroids[lc, i % dims, i // dims]

dpex.atomic.add(
NewCentroids,
(i // dims, i % dims),
local_centroid_d,
NewCentroids_aref = kapi.AtomicRef(
NewCentroids, index=(i // dims, i % dims)
)
NewCentroids_aref.fetch_add(local_centroid_d)

for c in range(lid, num_centroids, local_size):
local_centroid_npoints = dpt.int32.type(0)
for lc in range(local_copies):
local_centroid_npoints += localNewCount[lc, c]

dpex.atomic.add(NewCount, c, local_centroid_npoints)
NewCount_aref = kapi.AtomicRef(NewCount, index=c)
NewCount_aref.fetch_add(local_centroid_npoints)

return groupByCluster


@lru_cache(maxsize=1)
def getUpdateCentroids(dims, num_centroids, dtyp, local_size_):
@dpex.kernel
def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount):
lid = dpex.get_local_id(0)
local_size = dpex.get_local_size(0)
@dpexexp.kernel
def updateCentroids(
nd_item: kapi.NdItem,
diff,
arrayC,
arrayCnumpoint,
NewCentroids,
NewCount,
):
lid = nd_item.get_local_id(0)
local_size = nd_item.get_local_range(0)

local_distance = dpex.local.array(local_size_, dtype=dtyp)

Expand All @@ -134,7 +155,7 @@ def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount):
max_distance = max(max_distance, distance)
local_distance[c] = max_distance

dpex.barrier(dpex.LOCAL_MEM_FENCE)
kapi.group_barrier(nd_item.get_group())

if lid == 0:
for c in range(local_size):
Expand All @@ -147,19 +168,19 @@ def updateCentroids(diff, arrayC, arrayCnumpoint, NewCentroids, NewCount):

@lru_cache(maxsize=1)
def getUpdateLabels(dims, num_centroids, dtyp, WorkPI):
@dpex.kernel
def updateLabels(arrayP, arrayPcluster, arrayC):
@dpexexp.kernel
def updateLabels(nd_item: kapi.NdItem, arrayP, arrayPcluster, arrayC):
numpoints = arrayP.shape[0]
localCentroids = dpex.local.array((dims, num_centroids), dtype=dtyp)

grid = dpex.get_group_id(0)
lid = dpex.get_local_id(0)
local_size = dpex.get_local_size(0)
grid = nd_item.get_group().get_group_id(0)
lid = nd_item.get_local_id(0)
local_size = nd_item.get_local_range(0)

for i in range(lid, num_centroids * dims, local_size):
localCentroids[i % dims, i // dims] = arrayC[i // dims, i % dims]

dpex.barrier(dpex.LOCAL_MEM_FENCE)
kapi.group_barrier(nd_item.get_group())

for i in range(WorkPI):
point_id = grid * WorkPI * local_size + i * local_size + lid
Expand Down Expand Up @@ -224,19 +245,36 @@ def kmeans_kernel(
for i in range(niters):
last = i == (niters - 1)
if diff_host < tolerance:
updateLabels[NdRange((global_size,), (local_size,))](
arrayP, arrayPcluster, arrayC
dpexexp.call_kernel(
updateLabels,
kapi.NdRange((global_size,), (local_size,)),
arrayP,
arrayPcluster,
arrayC,
)
break

groupByCluster[NdRange((global_size,), (local_size,))](
arrayP, arrayPcluster, arrayC, NewCentroids, NewCount, last
dpexexp.call_kernel(
groupByCluster,
kapi.NdRange((global_size,), (local_size,)),
arrayP,
arrayPcluster,
arrayC,
NewCentroids,
NewCount,
last,
)

update_centroid_size = min(num_centroids, local_size)
updateCentroids[
NdRange((update_centroid_size,), (update_centroid_size,))
](diff, arrayC, arrayCnumpoint, NewCentroids, NewCount)
dpexexp.call_kernel(
updateCentroids,
kapi.NdRange((update_centroid_size,), (update_centroid_size,)),
diff,
arrayC,
arrayCnumpoint,
NewCentroids,
NewCount,
)
diff_host = dpt.asnumpy(diff)[0]


Expand Down
11 changes: 8 additions & 3 deletions dpbench/benchmarks/knn/knn_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
from math import sqrt

import numba_dpex as dpex
import numba_dpex.experimental as dpexexp
import numpy as np
from numba_dpex import kernel_api as kapi


@dpex.kernel
@dpexexp.kernel
def _knn_kernel( # noqa: C901: TODO: can we simplify logic?
item: kapi.Item,
train,
train_labels,
test,
Expand All @@ -21,7 +24,7 @@ def _knn_kernel( # noqa: C901: TODO: can we simplify logic?
data_dim,
):
dtype = train.dtype
i = dpex.get_global_id(0)
i = item.get_id(0)
# here k has to be 5 in order to match with numpy
queue_neighbors = dpex.private.array(shape=(5, 2), dtype=dtype)

Expand Down Expand Up @@ -106,7 +109,9 @@ def knn(
votes_to_classes,
data_dim,
):
_knn_kernel[dpex.Range(test_size)](
dpexexp.call_kernel(
_knn_kernel,
kapi.Range(test_size),
x_train,
y_train,
x_test,
Expand Down
9 changes: 5 additions & 4 deletions dpbench/benchmarks/l2_norm/l2_norm_numba_dpex_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@

import math

import numba_dpex as dpex
import numba_dpex.experimental as dpex
from numba_dpex import kernel_api as kapi


@dpex.kernel
def l2_norm_kernel(a, d):
i = dpex.get_global_id(0)
def l2_norm_kernel(item: kapi.Item, a, d):
i = item.get_id(0)
a_rows = a.shape[1]
d[i] = 0.0
for k in range(a_rows):
Expand All @@ -18,4 +19,4 @@ def l2_norm_kernel(a, d):


def l2_norm(a, d):
l2_norm_kernel[dpex.Range(a.shape[0])](a, d)
dpex.call_kernel(l2_norm_kernel, kapi.Range(a.shape[0]), a, d)
Loading

0 comments on commit 54d5aab

Please sign in to comment.