-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rodinia/gaussian sycl and ndpx implementation
- Loading branch information
roxx30198
committed
Oct 25, 2023
1 parent
94c2d62
commit 640d447
Showing
20 changed files
with
500 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
add_subdirectory(gaussian) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
add_subdirectory(gaussian_sycl_native_ext) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
""" | ||
Gaussian elimination implementation | ||
This is sycl and numba-dpex implementation for gaussian elimination | ||
Input | ||
--------- | ||
size<int_64> : Forms an input matrix of dimensions (size x size) | ||
Output | ||
-------- | ||
result<array<float>> : Result of the given set of linear equations using | ||
gaussian elimination. | ||
Method: | ||
The gaussian transformations are applied to the input matrix to form the | ||
diagonal matrix in forward elimination, and then the equations are solved | ||
to find the result in back substitution. | ||
""" |
34 changes: 34 additions & 0 deletions
34
dpbench/benchmarks/rodinia/gaussian/gaussian_initialize.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
LAMBDA = -0.01 | ||
|
||
|
||
def initialize(size, types_dict): | ||
import math | ||
|
||
import numpy as np | ||
|
||
dtype = types_dict["float"] | ||
|
||
coe = np.empty((2 * size - 1), dtype=dtype) | ||
a = np.empty((size * size), dtype=dtype) | ||
|
||
for i in range(size): | ||
coe_i = 10 * math.exp(LAMBDA * i) | ||
j = size - 1 + i | ||
coe[j] = coe_i | ||
j = size - 1 - i | ||
coe[j] = coe_i | ||
|
||
for i in range(size): | ||
for j in range(size): | ||
a[i * size + j] = coe[size - 1 - i + j] | ||
|
||
return ( | ||
a, | ||
np.ones(size, dtype=dtype), | ||
np.zeros((size * size), dtype=dtype), | ||
np.zeros(size, dtype=dtype), | ||
) |
107 changes: 107 additions & 0 deletions
107
dpbench/benchmarks/rodinia/gaussian/gaussian_numba_dpex_k.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import dpctl | ||
import numba_dpex | ||
|
||
BLOCK_size_XY = 4 | ||
|
||
|
||
@numba_dpex.kernel() | ||
def gaussian_kernel_1(m, a, size, t): | ||
if ( | ||
numba_dpex.get_local_id(2) | ||
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2) | ||
>= size - 1 - t | ||
): | ||
return | ||
|
||
m[ | ||
size | ||
* ( | ||
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2) | ||
+ numba_dpex.get_local_id(2) | ||
+ t | ||
+ 1 | ||
) | ||
+ t | ||
] = ( | ||
a[ | ||
size | ||
* ( | ||
numba_dpex.get_local_size(2) * numba_dpex.get_group_id(2) | ||
+ numba_dpex.get_local_id(2) | ||
+ t | ||
+ 1 | ||
) | ||
+ t | ||
] | ||
/ a[size * t + t] | ||
) | ||
|
||
|
||
@numba_dpex.kernel() | ||
def gaussian_kernel_2(m, a, b, size, t): | ||
if ( | ||
numba_dpex.get_local_id(2) | ||
+ numba_dpex.get_group_id(2) * numba_dpex.get_local_size(2) | ||
>= size - 1 - t | ||
): | ||
return | ||
|
||
if ( | ||
numba_dpex.get_local_id(1) | ||
+ numba_dpex.get_group_id(1) * numba_dpex.get_local_size(1) | ||
>= size - t | ||
): | ||
return | ||
|
||
xidx = numba_dpex.get_group_id(2) * numba_dpex.get_local_size( | ||
2 | ||
) + numba_dpex.get_local_id(2) | ||
yidx = numba_dpex.get_group_id(1) * numba_dpex.get_local_size( | ||
1 | ||
) + numba_dpex.get_local_id(1) | ||
|
||
a[size * (xidx + 1 + t) + (yidx + t)] -= ( | ||
m[size * (xidx + 1 + t) + t] * a[size * t + (yidx + t)] | ||
) | ||
if yidx == 0: | ||
b[xidx + 1 + t] -= m[size * (xidx + 1 + t) + (yidx + t)] * b[t] | ||
|
||
|
||
def gaussian(a, b, m, size, result): | ||
device = dpctl.SyclDevice() | ||
block_size = device.max_work_group_size | ||
grid_size = int((size / block_size) + 0 if not (size % block_size) else 1) | ||
|
||
blocksize2d = BLOCK_size_XY | ||
gridsize2d = (size / blocksize2d) + (0 if not (size % blocksize2d) else 1) | ||
|
||
global_range = numba_dpex.Range(1, 1, grid_size * block_size) | ||
local_range = numba_dpex.Range(1, 1, block_size) | ||
|
||
dim_blockXY = numba_dpex.Range(1, blocksize2d, blocksize2d) | ||
dim_gridXY = numba_dpex.Range( | ||
1, int(gridsize2d) * blocksize2d, int(gridsize2d) * blocksize2d | ||
) | ||
|
||
for t in range(size - 1): | ||
gaussian_kernel_1[numba_dpex.NdRange(global_range, local_range)]( | ||
m, a, size, t | ||
) | ||
|
||
gaussian_kernel_2[numba_dpex.NdRange(dim_gridXY, dim_blockXY)]( | ||
m, a, b, size, t | ||
) | ||
|
||
for i in range(size): | ||
result[size - i - 1] = b[size - i - 1] | ||
for j in range(i): | ||
result[size - i - 1] -= ( | ||
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1] | ||
) | ||
result[size - i - 1] = ( | ||
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
||
def gaussian(a, b, m, size, result): | ||
# Forward Elimination | ||
for t in range(size - 1): | ||
for i in range(t + 1, size): | ||
m = a[i * size + t] / a[t * size + t] | ||
for j in range(t, size): | ||
a[i * size + j] = a[i * size + j] - m * a[t * size + j] | ||
b[i] = b[i] - m * b[t] | ||
|
||
# Back Substitution | ||
for i in range(size): | ||
result[size - i - 1] = b[size - i - 1] | ||
for j in range(i): | ||
result[size - i - 1] -= ( | ||
a[size * (size - i - 1) + (size - j - 1)] * result[size - j - 1] | ||
) | ||
result[size - i - 1] = ( | ||
result[size - i - 1] / a[size * (size - i - 1) + (size - i - 1)] | ||
) |
14 changes: 14 additions & 0 deletions
14
dpbench/benchmarks/rodinia/gaussian/gaussian_sycl_native_ext/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set(module_name gaussian_sycl) | ||
set(py_module_name _${module_name}) | ||
python_add_library(${py_module_name} MODULE ${module_name}/${py_module_name}.cpp) | ||
add_sycl_to_target(TARGET ${py_module_name} SOURCES ${module_name}/${py_module_name}.cpp) | ||
target_include_directories(${py_module_name} PRIVATE ${Dpctl_INCLUDE_DIRS}) | ||
|
||
file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) | ||
install(TARGETS ${py_module_name} | ||
DESTINATION ${py_module_dest}/${module_name} | ||
) |
7 changes: 7 additions & 0 deletions
7
dpbench/benchmarks/rodinia/gaussian/gaussian_sycl_native_ext/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from .gaussian_sycl._gaussian_sycl import gaussian as gaussian_sycl | ||
|
||
__all__ = ["gaussian_sycl"] |
57 changes: 57 additions & 0 deletions
57
...h/benchmarks/rodinia/gaussian/gaussian_sycl_native_ext/gaussian_sycl/_gaussian_kernel.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
#include <CL/sycl.hpp> | ||
|
||
using namespace sycl; | ||
|
||
template <typename FpTy> | ||
void gaussian_kernel_1(FpTy *m_device, | ||
const FpTy *a_device, | ||
int size, | ||
int t, | ||
sycl::nd_item<3> item_ct1) | ||
{ | ||
if (item_ct1.get_local_id(2) + | ||
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >= | ||
size - 1 - t) | ||
return; | ||
m_device[size * (item_ct1.get_local_range().get(2) * item_ct1.get_group(2) + | ||
item_ct1.get_local_id(2) + t + 1) + | ||
t] = a_device[size * (item_ct1.get_local_range().get(2) * | ||
item_ct1.get_group(2) + | ||
item_ct1.get_local_id(2) + t + 1) + | ||
t] / | ||
a_device[size * t + t]; | ||
} | ||
|
||
template <typename FpTy> | ||
void gaussian_kernel_2(FpTy *m_device, | ||
FpTy *a_device, | ||
FpTy *b_device, | ||
int size, | ||
int j1, | ||
int t, | ||
sycl::nd_item<3> item_ct1) | ||
{ | ||
if (item_ct1.get_local_id(2) + | ||
item_ct1.get_group(2) * item_ct1.get_local_range().get(2) >= | ||
size - 1 - t) | ||
return; | ||
if (item_ct1.get_local_id(1) + | ||
item_ct1.get_group(1) * item_ct1.get_local_range().get(1) >= | ||
size - t) | ||
return; | ||
|
||
int xidx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) + | ||
item_ct1.get_local_id(2); | ||
int yidx = item_ct1.get_group(1) * item_ct1.get_local_range().get(1) + | ||
item_ct1.get_local_id(1); | ||
|
||
a_device[size * (xidx + 1 + t) + (yidx + t)] -= | ||
m_device[size * (xidx + 1 + t) + t] * a_device[size * t + (yidx + t)]; | ||
if (yidx == 0) { | ||
b_device[xidx + 1 + t] -= | ||
m_device[size * (xidx + 1 + t) + (yidx + t)] * b_device[t]; | ||
} | ||
} |
Oops, something went wrong.