Skip to content

Commit

Permalink
Add support of fp32 for sycl implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderKalistratov committed Oct 26, 2023
1 parent 94c2d62 commit 7160c2a
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,11 @@
#include <stdlib.h>
#include <type_traits>

#ifdef __DO_FLOAT__
#define EXP(x) expf(x)
#define LOG(x) logf(x)
#define SQRT(x) sqrtf(x)
#define ERF(x) erff(x)
#define INVSQRT(x) 1.0f / sqrtf(x)

#define QUARTER 0.25f
#define HALF 0.5f
#define TWO 2.0f
#else
#define EXP(x) sycl::exp(x)
#define LOG(x) sycl::log(x)
#define SQRT(x) sycl::sqrt(x)
#define ERF(x) sycl::erf(x)
#define INVSQRT(x) 1.0 / sycl::sqrt(x)

#define QUARTER 0.25
#define HALF 0.5
#define TWO 2.0
#endif

using namespace sycl;

template <typename FpTy>
class BlackScholesKernel;

template <typename FpTy>
void black_scholes_impl(queue Queue,
size_t nopt,
Expand All @@ -41,27 +22,30 @@ void black_scholes_impl(queue Queue,
FpTy *call,
FpTy *put)
{
constexpr FpTy _0_25 = 0.25;
constexpr FpTy _0_5 = 0.5;

auto e = Queue.submit([&](handler &h) {
h.parallel_for<class BlackScholesKernel>(
h.parallel_for<BlackScholesKernel<FpTy>>(
range<1>{nopt}, [=](id<1> myID) {
FpTy mr = -rate;
FpTy sig_sig_two = volatility * volatility * TWO;
FpTy sig_sig_two = volatility * volatility * 2;
int i = myID[0];
FpTy a, b, c, y, z, e;
FpTy d1, d2, w1, w2;

a = LOG(price[i] / strike[i]);
a = sycl::log(price[i] / strike[i]);
b = t[i] * mr;
z = t[i] * sig_sig_two;
c = QUARTER * z;
y = INVSQRT(z);
c = _0_25 * z;
y = sycl::rsqrt(z);
w1 = (a - b + c) * y;
w2 = (a - b - c) * y;
d1 = ERF(w1);
d2 = ERF(w2);
d1 = HALF + HALF * d1;
d2 = HALF + HALF * d2;
e = EXP(b);
d1 = sycl::erf(w1);
d2 = sycl::erf(w2);
d1 = _0_5 + _0_5 * d1;
d2 = _0_5 + _0_5 * d2;
e = sycl::exp(b);
call[i] = price[i] * d1 - strike[i] * e * d2;
put[i] = call[i] - price[i] + strike[i] * e;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,19 @@ void black_scholes_sync(size_t /**/,
if (!ensure_compatibility(price, strike, t, call, put))
throw std::runtime_error("Input arrays are not acceptable.");

if (typenum != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
if (typenum == UAR_FLOAT) {
black_scholes_impl<float>(Queue, nopt, price.get_data<float>(),
strike.get_data<float>(), t.get_data<float>(), rate,
volatility, call.get_data<float>(),
put.get_data<float>());
} else if (typenum == UAR_DOUBLE) {
black_scholes_impl<double>(Queue, nopt, price.get_data<double>(),
strike.get_data<double>(), t.get_data<double>(), rate,
volatility, call.get_data<double>(),
put.get_data<double>());
} else {
throw std::runtime_error("Expected a double or single precision FP array.");
}

black_scholes_impl(Queue, nopt, price.get_data<double>(),
strike.get_data<double>(), t.get_data<double>(), rate,
volatility, call.get_data<double>(),
put.get_data<double>());
}

PYBIND11_MODULE(_black_scholes_sycl, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ void getNeighborhood(size_t n,
}
}

template<typename FpTy>
class DBScanKernel;

template <typename FpTy>
size_t dbscan_impl(queue q,
size_t n_samples,
Expand All @@ -126,14 +129,14 @@ size_t dbscan_impl(queue q,
q.wait();

auto e = q.submit([&](handler &h) {
h.parallel_for<class DBScanKernel>(
h.parallel_for<DBScanKernel<FpTy>>(
range<1>{n_samples}, [=](id<1> myID) {
size_t i1 = myID[0];
size_t i2 = (i1 + 1 == n_samples ? n_samples : i1 + 1);
getNeighborhood<double>(n_samples, n_features, data, i2 - i1,
data + i1 * n_features, eps,
d_indices + i1 * n_samples,
d_sizes + i1);
getNeighborhood<FpTy>(n_samples, n_features, data, i2 - i1,
data + i1 * n_features, eps,
d_indices + i1 * n_samples,
d_sizes + i1);
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,20 @@ size_t dbscan_sync(size_t n_samples,
size_t min_pts)
{
auto queue = data.get_queue();
auto typenum = data.get_typenum();

if (!ensure_compatibility(data))
throw std::runtime_error("Input arrays are not acceptable.");

if (data.get_typenum() != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
if (typenum == UAR_FLOAT) {
return dbscan_impl<float>(queue, n_samples, n_features,
data.get_data<float>(), eps, min_pts);
} else if (typenum == UAR_DOUBLE) {
return dbscan_impl<double>(queue, n_samples, n_features,
data.get_data<double>(), eps, min_pts);
}

return dbscan_impl<double>(queue, n_samples, n_features,
data.get_data<double>(), eps, min_pts);
throw std::runtime_error("Expected a double or single precision FP array.");
}

PYBIND11_MODULE(_dbscan_sycl, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

using namespace sycl;

template <typename FpTy>
class theKernel;

template <typename FpTy>
void l2_norm_impl(queue Queue,
size_t npoints,
Expand All @@ -18,7 +21,7 @@ void l2_norm_impl(queue Queue,
{
Queue
.submit([&](handler &h) {
h.parallel_for<class theKernel>(range<1>{npoints}, [=](id<1> myID) {
h.parallel_for<theKernel<FpTy>>(range<1>{npoints}, [=](id<1> myID) {
size_t i = myID[0];
for (size_t k = 0; k < dims; k++) {
d[i] += a[i * dims + k] * a[i * dims + k];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ void l2_norm_sync(dpctl::tensor::usm_ndarray a, dpctl::tensor::usm_ndarray d)

auto dims = 3;
auto npoints = a.get_size() / dims;

if (a.get_typenum() != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
auto typenum = a.get_typenum();

if (typenum == UAR_FLOAT) {
l2_norm_impl(Queue, npoints, dims, a.get_data<float>(),
d.get_data<float>());
} else if (typenum == UAR_DOUBLE) {
l2_norm_impl(Queue, npoints, dims, a.get_data<double>(),
d.get_data<double>());
} else {
throw std::runtime_error("Expected a double or single precision FP array.");
}

l2_norm_impl(Queue, npoints, dims, a.get_data<double>(),
d.get_data<double>());
}

PYBIND11_MODULE(_l2_norm_sycl, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@
#include <stdlib.h>
#include <type_traits>

#define SIN(x) sycl::sin(x)
#define COS(x) sycl::cos(x)
#define SQRT(x) sycl::sqrt(x)
#define LOG(x) sycl::log(x)

using namespace sycl;

template <typename FpTy>
class RamboKernel;

template <typename FpTy>
event rambo_impl(queue Queue,
size_t nevts,
Expand All @@ -26,20 +24,21 @@ event rambo_impl(queue Queue,
const FpTy *usmQ1,
FpTy *usmOutput)
{
constexpr FpTy pi_v = M_PI;
return Queue.submit([&](handler &h) {
h.parallel_for<class RamboKernel>(range<1>{nevts}, [=](id<1> myID) {
h.parallel_for<RamboKernel<FpTy>>(range<1>{nevts}, [=](id<1> myID) {
for (size_t j = 0; j < nout; j++) {
int i = myID[0];
size_t idx = i * nout + j;

FpTy C = 2.0 * usmC1[idx] - 1.0;
FpTy S = SQRT(1 - C * C);
FpTy F = 2.0 * M_PI * usmF1[idx];
FpTy Q = -LOG(usmQ1[idx]);
FpTy C = 2 * usmC1[idx] - 1;
FpTy S = sycl::sqrt(1 - C * C);
FpTy F = 2 * pi_v * usmF1[idx];
FpTy Q = -sycl::log(usmQ1[idx]);

usmOutput[idx * 4] = Q;
usmOutput[idx * 4 + 1] = Q * S * SIN(F);
usmOutput[idx * 4 + 2] = Q * S * COS(F);
usmOutput[idx * 4 + 1] = Q * S * sycl::sin(F);
usmOutput[idx * 4 + 2] = Q * S * sycl::cos(F);
usmOutput[idx * 4 + 3] = Q * C;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,24 @@ void rambo_sync(size_t nevts,
if (!ensure_compatibility(C1, F1, Q1))
throw std::runtime_error("Input arrays are not acceptable.");

if (C1.get_typenum() != UAR_DOUBLE || F1.get_typenum() != UAR_DOUBLE ||
Q1.get_typenum() != UAR_DOUBLE || output.get_typenum() != UAR_DOUBLE)
{
throw std::runtime_error("Expected a double precision FP array.");
}
if (output.get_typenum() != C1.get_typenum())
throw std::runtime_error("Input arrays are not acceptable.");

auto e = rambo_impl(Queue, nevts, nout, C1.get_data<double>(),
F1.get_data<double>(), Q1.get_data<double>(),
output.get_data<double>());
e.wait();
auto typenum = C1.get_typenum();

if (typenum == UAR_FLOAT) {
auto e = rambo_impl(Queue, nevts, nout, C1.get_data<float>(),
F1.get_data<float>(), Q1.get_data<float>(),
output.get_data<float>());
e.wait();
} else if (typenum == UAR_DOUBLE) {
auto e = rambo_impl(Queue, nevts, nout, C1.get_data<double>(),
F1.get_data<double>(), Q1.get_data<double>(),
output.get_data<double>());
e.wait();
} else {
throw std::runtime_error("Expected a double or single precision FP array.");
}
}

PYBIND11_MODULE(_rambo_sycl, m)
Expand Down

0 comments on commit 7160c2a

Please sign in to comment.