Skip to content

Commit

Permalink
Add tests for sycl implemenation for GHistIndexMatrix (#32)
Browse files Browse the repository at this point in the history
* initial

* move helper functions to a separate file

* lintint

---------

Co-authored-by: Dmitry Razdoburdin <>
  • Loading branch information
razdoburdin authored Feb 15, 2024
1 parent 9c81341 commit cc34883
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 42 deletions.
1 change: 0 additions & 1 deletion plugin/sycl/common/partition_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ inline ::sycl::event PartitionSparseKernel(::sycl::queue* qu,
const size_t* rid = rid_span.begin;
const size_t range_size = rid_span.Size();
const uint32_t* cut_ptrs = gmat.cut_device.Ptrs().DataConst();
const bst_float* cut_vals = gmat.cut_device.Values().DataConst();

size_t* p_rid_buf = rid_buf->data();
return qu->submit([&](::sycl::handler& cgh) {
Expand Down
63 changes: 30 additions & 33 deletions plugin/sycl/data/gradient_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ namespace common {
uint32_t SearchBin(const bst_float* cut_values, const uint32_t* cut_ptrs, Entry const& e) {
auto beg = cut_ptrs[e.index];
auto end = cut_ptrs[e.index + 1];
const auto &values = cut_values;
auto it = std::upper_bound(cut_values + beg, cut_values + end, e.fvalue);
uint32_t idx = it - cut_values;
if (idx == end) {
Expand Down Expand Up @@ -51,25 +50,27 @@ void mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) {

template <typename BinIdxType>
void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
xgboost::common::Span<BinIdxType> index_data_span,
const DeviceMatrix &dmat_device,
BinIdxType* index_data,
const DeviceMatrix &dmat,
size_t nbins,
size_t row_stride,
uint32_t* offsets) {
if (hit_count.size() == 0) return;
const xgboost::Entry *data_ptr = dmat_device.data.DataConst();
const bst_row_t *offset_vec = dmat_device.row_ptr.DataConst();
const size_t num_rows = dmat_device.row_ptr.Size() - 1;
BinIdxType* index_data = index_data_span.data();
if (nbins == 0) return;
const xgboost::Entry *data_ptr = dmat.data.DataConst();
const bst_row_t *offset_vec = dmat.row_ptr.DataConst();
const size_t num_rows = dmat.row_ptr.Size() - 1;
const bst_float* cut_values = cut_device.Values().DataConst();
const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst();
::sycl::buffer<size_t, 1> hit_count_buf(hit_count.data(), hit_count.size());
size_t* hit_count_ptr = hit_count_buff.Data();

USMVector<BinIdxType> sort_buf(&qu, num_rows * row_stride);
BinIdxType* sort_data = sort_buf.Data();
// Sparse case only
if (!offsets) {
// sort_buff has type uint8_t
sort_buff.Resize(&qu, num_rows * row_stride * sizeof(BinIdxType));
}
BinIdxType* sort_data = reinterpret_cast<BinIdxType*>(sort_buff.Data());

qu.submit([&](::sycl::handler& cgh) {
auto hit_count_acc = hit_count_buf.template get_access<::sycl::access::mode::atomic>(cgh);
auto event = qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows), [=](::sycl::item<1> pid) {
const size_t i = pid.get_id(0);
const size_t ibegin = offset_vec[i];
Expand All @@ -79,7 +80,8 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
for (bst_uint j = 0; j < size; ++j) {
uint32_t idx = SearchBin(cut_values, cut_ptrs, data_ptr[ibegin + j]);
index_data[start + j] = offsets ? idx - offsets[j] : idx;
::sycl::atomic_fetch_add<size_t>(hit_count_acc[idx], 1);
AtomicRef<size_t> hit_count_ref(hit_count_ptr[idx]);
hit_count_ref.fetch_add(1);
}
if (!offsets) {
// Sparse case only
Expand All @@ -89,12 +91,12 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
}
}
});
}).wait();
});
qu.memcpy(hit_count.data(), hit_count_ptr, nbins * sizeof(size_t), event);
qu.wait();
}

void GHistIndexMatrix::ResizeIndex(const size_t n_offsets,
const size_t n_index,
const bool isDense) {
void GHistIndexMatrix::ResizeIndex(size_t n_index, bool isDense) {
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
index.SetBinTypeSize(BinTypeSize::kUint8BinsTypeSize);
index.Resize((sizeof(uint8_t)) * n_index);
Expand All @@ -121,6 +123,7 @@ void GHistIndexMatrix::Init(::sycl::queue qu,
const uint32_t nbins = cut.Ptrs().back();
this->nbins = nbins;
hit_count.resize(nbins, 0);
hit_count_buff.Resize(&qu, nbins, 0);

this->p_fmat = p_fmat_device.p_mat;
const bool isDense = p_fmat_device.p_mat->IsDense();
Expand All @@ -136,42 +139,36 @@ void GHistIndexMatrix::Init(::sycl::queue qu,
}
}

const size_t n_offsets = cut.Ptrs().size() - 1;
const size_t n_offsets = cut_device.Ptrs().Size() - 1;
const size_t n_rows = p_fmat_device.row_ptr.Size() - 1;
const size_t n_index = n_rows * row_stride;
ResizeIndex(n_offsets, n_index, isDense);
ResizeIndex(n_index, isDense);

CHECK_GT(cut.Values().size(), 0U);
CHECK_GT(cut_device.Values().Size(), 0U);

uint32_t* offsets = nullptr;
if (isDense) {
index.ResizeOffset(n_offsets);
offsets = index.Offset();
qu.memcpy(offsets, cut.Ptrs().data(), sizeof(uint32_t) * n_offsets).wait_and_throw();
qu.memcpy(offsets, cut_device.Ptrs().DataConst(),
sizeof(uint32_t) * n_offsets).wait_and_throw();
}

if (isDense) {
BinTypeSize curent_bin_size = index.GetBinTypeSize();
if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) {
xgboost::common::Span<uint8_t> index_data_span = {index.data<uint8_t>(),
n_index};
SetIndexData(qu, index_data_span, p_fmat_device, nbins, row_stride, offsets);
SetIndexData(qu, index.data<uint8_t>(), p_fmat_device, nbins, row_stride, offsets);

} else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) {
xgboost::common::Span<uint16_t> index_data_span = {index.data<uint16_t>(),
n_index};
SetIndexData(qu, index_data_span, p_fmat_device, nbins, row_stride, offsets);
SetIndexData(qu, index.data<uint16_t>(), p_fmat_device, nbins, row_stride, offsets);
} else {
CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize);
xgboost::common::Span<uint32_t> index_data_span = {index.data<uint32_t>(),
n_index};
SetIndexData(qu, index_data_span, p_fmat_device, nbins, row_stride, offsets);
SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets);
}
/* For sparse DMatrix we have to store index of feature for each bin
in index field to chose right offset. So offset is nullptr and index is not reduced */
} else {
xgboost::common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
SetIndexData(qu, index_data_span, p_fmat_device, nbins, row_stride, offsets);
SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets);
}
}

Expand Down
16 changes: 8 additions & 8 deletions plugin/sycl/data/gradient_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ class HistogramCuts {
public:
HistogramCuts() {}

explicit HistogramCuts(::sycl::queue qu) {
cut_ptrs_.Resize(&qu, 1, 0u);
}
explicit HistogramCuts(::sycl::queue qu) {}

~HistogramCuts() {
}
Expand Down Expand Up @@ -172,6 +170,9 @@ struct GHistIndexMatrix {
Index index;
/*! \brief hit count of each index */
std::vector<size_t> hit_count;
/*! \brief buffers for calculations */
USMVector<size_t, MemoryType::on_device> hit_count_buff;
USMVector<uint8_t, MemoryType::on_device> sort_buff;
/*! \brief The corresponding cuts */
xgboost::common::HistogramCuts cut;
HistogramCuts cut_device;
Expand All @@ -186,20 +187,19 @@ struct GHistIndexMatrix {
const sycl::DeviceMatrix& p_fmat_device, int max_num_bins);

template <typename BinIdxType>
void SetIndexData(::sycl::queue qu, xgboost::common::Span<BinIdxType> index_data_span,
void SetIndexData(::sycl::queue qu, BinIdxType* index_data,
const sycl::DeviceMatrix &dmat_device,
size_t nbins, size_t row_stride, uint32_t* offsets);

void ResizeIndex(const size_t n_offsets, const size_t n_index,
const bool isDense);
void ResizeIndex(size_t n_index, bool isDense);

inline void GetFeatureCounts(std::vector<size_t>* counts) const {
inline void GetFeatureCounts(size_t* counts) const {
auto nfeature = cut_device.Ptrs().Size() - 1;
for (unsigned fid = 0; fid < nfeature; ++fid) {
auto ibegin = cut_device.Ptrs()[fid];
auto iend = cut_device.Ptrs()[fid + 1];
for (auto i = ibegin; i < iend; ++i) {
(*counts)[fid] += hit_count[i];
*(counts + fid) += hit_count[i];
}
}
}
Expand Down
30 changes: 30 additions & 0 deletions tests/cpp/plugin/sycl_helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*!
* Copyright 2022-2024 XGBoost contributors
*/
#pragma once

#include "../helpers.h"

namespace xgboost::sycl {
template<typename T, typename Container>
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
const Container& host_vector) {
ASSERT_EQ(sycl_vector.Size(), host_vector.size());

size_t size = sycl_vector.Size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
}
}

template<typename T, typename Container>
void VerifySyclVector(const std::vector<T>& sycl_vector, const Container& host_vector) {
ASSERT_EQ(sycl_vector.size(), host_vector.size());

size_t size = sycl_vector.size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
}
}

} // namespace xgboost::sycl
80 changes: 80 additions & 0 deletions tests/cpp/plugin/test_sycl_gradient_index.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Copyright 2021-2024 by XGBoost contributors
*/

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#pragma GCC diagnostic pop

#include "../../../plugin/sycl/data/gradient_index.h"
#include "../../../plugin/sycl/device_manager.h"
#include "sycl_helpers.h"
#include "../helpers.h"

namespace xgboost::sycl::data {

TEST(SyclGradientIndex, HistogramCuts) {
size_t max_bins = 8;

Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});

DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());

auto p_fmat = RandomDataGenerator{512, 16, 0.5}.GenerateDMatrix(true);

xgboost::common::HistogramCuts cut =
xgboost::common::SketchOnDMatrix(&ctx, p_fmat.get(), max_bins);

common::HistogramCuts cut_sycl;
cut_sycl.Init(qu, cut);

VerifySyclVector(cut_sycl.Ptrs(), cut.cut_ptrs_.HostVector());
VerifySyclVector(cut_sycl.Values(), cut.cut_values_.HostVector());
VerifySyclVector(cut_sycl.MinValues(), cut.min_vals_.HostVector());
}

TEST(SyclGradientIndex, Init) {
size_t n_rows = 128;
size_t n_columns = 7;

Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});

DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());

auto p_fmat = RandomDataGenerator{n_rows, n_columns, 0.3}.GenerateDMatrix();

sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());

int max_bins = 256;
common::GHistIndexMatrix gmat_sycl;
gmat_sycl.Init(qu, &ctx, dmat, max_bins);

xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), max_bins, 0.3, false};

{
ASSERT_EQ(gmat_sycl.max_num_bins, max_bins);
ASSERT_EQ(gmat_sycl.nfeatures, n_columns);
}

{
VerifySyclVector(gmat_sycl.hit_count, gmat.hit_count);
}

{
std::vector<size_t> feature_count_sycl(n_columns, 0);
gmat_sycl.GetFeatureCounts(feature_count_sycl.data());

std::vector<size_t> feature_count(n_columns, 0);
gmat.GetFeatureCounts(feature_count.data());
VerifySyclVector(feature_count_sycl, feature_count);
}
}

} // namespace xgboost::sycl::data

0 comments on commit cc34883

Please sign in to comment.