fix the errors. tests passed

IntelPython · Oct 9, 2023 · 04c2bf1 · 04c2bf1
1 parent ad6cbd9
commit 04c2bf1
Show file tree

Hide file tree

Showing 23 changed files with 164 additions and 151 deletions.
diff --git a/include/xgboost/context.h b/include/xgboost/context.h
@@ -184,6 +184,12 @@ struct Context : public XGBoostParameter<Context> {
    * @brief Is XGBoost running on a SYCL GPU?
    */
   [[nodiscard]] bool IsSyclGPU() const { return Device().IsSyclGPU(); }
+  /**
+   * @brief Is XGBoost running on any SYCL device?
+   */
+  [[nodiscard]] bool IsSycl() const { return IsSyclDefault()
+                                             || IsSyclCPU() 
+                                             || IsSyclGPU(); }
 
   /**
    * @brief Get the current device and ordinal.

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
@@ -596,13 +596,13 @@ auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
 
 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
+  auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
   return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
 }
 
 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
+  auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
   return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
 }
 

diff --git a/plugin/updater_oneapi/predictor_oneapi.cc b/plugin/updater_oneapi/predictor_oneapi.cc
@@ -30,8 +30,10 @@ DMLC_REGISTRY_FILE_TAG(predictor_oneapi);
 class PredictorOneAPI : public Predictor {
  public:
   explicit PredictorOneAPI(Context const* context) :
-      Predictor::Predictor{context} {
-    const DeviceOrd device_spec = context->Device();
+      Predictor::Predictor{context} {}
+
+  void Configure(const std::vector<std::pair<std::string, std::string>>& args) override {
+    const DeviceOrd device_spec = ctx_->Device();
 
     bool is_cpu;
     if (device_spec.IsSycl()) {
@@ -44,16 +46,11 @@ class PredictorOneAPI : public Predictor {
     LOG(INFO) << "device = " << device_spec.Name() << ", is_cpu = " << int(is_cpu);
 
     if (is_cpu) {
-      predictor_backend_.reset(Predictor::Create("cpu_predictor", context));
+      predictor_backend_.reset(Predictor::Create("cpu_predictor", ctx_));
     } else{
-      predictor_backend_.reset(Predictor::Create("oneapi_predictor_backend", context));
-    }
-  }
-
-  void Configure(const std::vector<std::pair<std::string, std::string>>& args) override {
-    if (predictor_backend_) {
-      predictor_backend_->Configure(args);
+      predictor_backend_.reset(Predictor::Create("oneapi_predictor_backend", ctx_));
     }
+    predictor_backend_->Configure(args);
   }
 
   void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts,

diff --git a/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc b/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc
@@ -650,7 +650,7 @@ bool QuantileHistMakerOneAPIBackend::Builder<GradientSumT>::UpdatePredictionCach
     return false;
   }
   builder_monitor_.Start("UpdatePredictionCache");
-  // CHECK_GT(out_preds.Size(), 0U);
+  CHECK_GT(out_preds.Size(), 0U);
 
   const size_t stride = out_preds.Stride(0);
   const int buffer_size = out_preds.Size()*stride - stride + 1;
@@ -671,7 +671,7 @@ bool QuantileHistMakerOneAPIBackend::Builder<GradientSumT>::UpdatePredictionCach
         CHECK((*p_last_tree_)[nid].IsLeaf());
       }
       leaf_value = (*p_last_tree_)[nid].LeafValue();
-
+ 
       const size_t* rid = rowset.begin;
       const size_t num_rows = rowset.Size();
 

diff --git a/src/common/linalg_op.cuh b/src/common/linalg_op.cuh
@@ -42,7 +42,7 @@ void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_
 
 template <typename T, int32_t D, typename Fn>
 void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
-  ctx->IsCPU() ? ElementWiseKernelHost(t, ctx->Threads(), fn) : ElementWiseKernelDevice(t, fn);
+  ctx->IsCUDA() ? ElementWiseKernelDevice(t, fn) : ElementWiseKernelHost(t, ctx->Threads(), fn);
 }
 }  // namespace linalg
 }  // namespace xgboost

diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
@@ -55,7 +55,7 @@ void ElementWiseTransformDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr)
 
 template <typename T, int32_t D, typename Fn>
 void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
-  if (!ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
     common::AssertGPUSupport();
   }
   ElementWiseKernelHost(t, ctx->Threads(), fn);

diff --git a/src/common/numeric.cc b/src/common/numeric.cc
@@ -11,13 +11,14 @@
 namespace xgboost {
 namespace common {
 double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
-  if (ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
+      return cuda_impl::Reduce(ctx, values);
+  } else {
     auto const& h_values = values.ConstHostVector();
     auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);
     static_assert(std::is_same<decltype(result), double>::value);
     return result;
   }
-  return cuda_impl::Reduce(ctx, values);
 }
 }  // namespace common
 }  // namespace xgboost
diff --git a/src/common/optional_weight.h b/src/common/optional_weight.h
@@ -26,7 +26,7 @@ inline OptionalWeights MakeOptionalWeights(Context const* ctx,
   if (ctx->IsCUDA()) {
     weights.SetDevice(ctx->gpu_id);
   }
-  return OptionalWeights{ctx->IsCPU() ? weights.ConstHostSpan() : weights.ConstDeviceSpan()};
+  return OptionalWeights{ctx->IsCUDA() ? weights.ConstDeviceSpan() : weights.ConstHostSpan()};
 }
 }  // namespace xgboost::common
 #endif  // XGBOOST_COMMON_OPTIONAL_WEIGHT_H_
diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h
@@ -197,10 +197,10 @@ class RankingCache {
       CHECK_EQ(info.group_ptr_.back(), info.labels.Size())
           << error::GroupSize() << "the size of label.";
     }
-    if (ctx->IsCPU()) {
-      this->InitOnCPU(ctx, info);
-    } else {
+    if (ctx->IsCUDA()) {
       this->InitOnCUDA(ctx, info);
+    } else {
+      this->InitOnCPU(ctx, info);
     }
     if (!info.weights_.Empty()) {
       CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
@@ -218,7 +218,7 @@ class RankingCache {
   // Constructed as [1, n_samples] if group ptr is not supplied by the user
   common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
     group_ptr_.SetDevice(ctx->gpu_id);
-    return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
+    return ctx->IsCUDA() ? group_ptr_.ConstDeviceSpan() : group_ptr_.ConstHostSpan();
   }
 
   [[nodiscard]] auto const& Param() const { return param_; }
@@ -231,10 +231,10 @@ class RankingCache {
       sorted_idx_cache_.SetDevice(ctx->gpu_id);
       sorted_idx_cache_.Resize(predt.size());
     }
-    if (ctx->IsCPU()) {
-      return this->MakeRankOnCPU(ctx, predt);
-    } else {
+    if (ctx->IsCUDA()) {
       return this->MakeRankOnCUDA(ctx, predt);
+    } else {
+      return this->MakeRankOnCPU(ctx, predt);
     }
   }
   // The function simply returns a uninitialized buffer as this is only used by the
@@ -307,18 +307,18 @@ class NDCGCache : public RankingCache {
  public:
   NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
       : RankingCache{ctx, info, p} {
-    if (ctx->IsCPU()) {
-      this->InitOnCPU(ctx, info);
-    } else {
+    if (ctx->IsCUDA()) {
       this->InitOnCUDA(ctx, info);
+    } else {
+      this->InitOnCPU(ctx, info);
     }
   }
 
   linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
     return inv_idcg_.View(ctx->gpu_id);
   }
   common::Span<double const> Discount(Context const* ctx) const {
-    return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
+    return ctx->IsCUDA() ? discounts_.ConstDeviceSpan() : discounts_.ConstHostSpan();
   }
   linalg::VectorView<double> Dcg(Context const* ctx) {
     if (dcg_.Size() == 0) {
@@ -387,10 +387,10 @@ class PreCache : public RankingCache {
  public:
   PreCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
       : RankingCache{ctx, info, p} {
-    if (ctx->IsCPU()) {
-      this->InitOnCPU(ctx, info);
-    } else {
+    if (ctx->IsCUDA()) {
       this->InitOnCUDA(ctx, info);
+    } else {
+      this->InitOnCPU(ctx, info);
     }
   }
 
@@ -399,7 +399,7 @@ class PreCache : public RankingCache {
       pre_.SetDevice(ctx->gpu_id);
       pre_.Resize(this->Groups());
     }
-    return ctx->IsCPU() ? pre_.HostSpan() : pre_.DeviceSpan();
+    return ctx->IsCUDA() ? pre_.DeviceSpan() : pre_.HostSpan();
   }
 };
 
@@ -418,10 +418,10 @@ class MAPCache : public RankingCache {
  public:
   MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
       : RankingCache{ctx, info, p}, n_samples_{static_cast<std::size_t>(info.num_row_)} {
-    if (ctx->IsCPU()) {
-      this->InitOnCPU(ctx, info);
-    } else {
+    if (ctx->IsCUDA()) {
       this->InitOnCUDA(ctx, info);
+    } else {
+      this->InitOnCPU(ctx, info);
     }
   }
 
@@ -430,21 +430,21 @@ class MAPCache : public RankingCache {
       n_rel_.SetDevice(ctx->gpu_id);
       n_rel_.Resize(n_samples_);
     }
-    return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
+    return ctx->IsCUDA() ? n_rel_.DeviceSpan() : n_rel_.HostSpan();
   }
   common::Span<double> Acc(Context const* ctx) {
     if (acc_.Empty()) {
       acc_.SetDevice(ctx->gpu_id);
       acc_.Resize(n_samples_);
     }
-    return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
+    return ctx->IsCUDA() ? acc_.DeviceSpan() : acc_.HostSpan();
   }
   common::Span<double> Map(Context const* ctx) {
     if (map_.Empty()) {
       map_.SetDevice(ctx->gpu_id);
       map_.Resize(this->Groups());
     }
-    return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
+    return ctx->IsCUDA() ? map_.DeviceSpan() : map_.HostSpan();
   }
 };
 

diff --git a/src/common/stats.cc b/src/common/stats.cc
@@ -19,7 +19,7 @@ namespace xgboost {
 namespace common {
 void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
             HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out) {
-  if (!ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
     weights.SetDevice(ctx->gpu_id);
     auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
     auto t_v = t.View(ctx->gpu_id);
@@ -50,16 +50,16 @@ void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<flo
   out->SetDevice(ctx->gpu_id);
   out->Reshape(1);
 
-  if (ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
+      cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
+  } else {
     auto h_v = v.HostView();
     float n = v.Size();
     MemStackAllocator<float, DefaultMaxThreads()> tloc(ctx->Threads(), 0.0f);
     ParallelFor(v.Size(), ctx->Threads(),
                 [&](auto i) { tloc[omp_get_thread_num()] += h_v(i) / n; });
     auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
     out->HostView()(0) = ret;
-  } else {
-    cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
   }
 }
 }  // namespace common

diff --git a/src/context.cc b/src/context.cc
@@ -121,7 +121,7 @@ DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
 
   // handle alias
   std::string s_device = input;
-  if (!std::regex_match(s_device, std::regex("sycl:gpu(:[0-9]+)?")))
+  if (!std::regex_match(s_device, std::regex("sycl(:cpu|:gpu)?(:-1|:[0-9]+)?")))
     s_device = std::regex_replace(s_device, std::regex{"gpu"}, DeviceSym::CUDA());
 
   auto split_it = std::find(s_device.cbegin(), s_device.cend(), ':');

diff --git a/src/learner.cc b/src/learner.cc
@@ -278,7 +278,7 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
   std::swap(base_score_, base_margin);
   // Make sure read access everywhere for thread-safe prediction.
   std::as_const(base_score_).HostView();
-  if (!ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
     std::as_const(base_score_).View(ctx->gpu_id);
   }
   CHECK(std::as_const(base_score_).Data()->HostCanRead());

diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
@@ -46,7 +46,26 @@ template <typename Fn>
 PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
   PackedReduceResult result;
   auto labels = info.labels.View(ctx->gpu_id);
-  if (ctx->IsCPU()) {
+  if (ctx->IsCUDA()) {
+  #if defined(XGBOOST_USE_CUDA)
+      dh::XGBCachingDeviceAllocator<char> alloc;
+      thrust::counting_iterator<size_t> begin(0);
+      thrust::counting_iterator<size_t> end = begin + labels.Size();
+      result = thrust::transform_reduce(
+          thrust::cuda::par(alloc), begin, end,
+          [=] XGBOOST_DEVICE(size_t i) {
+            auto idx = linalg::UnravelIndex(i, labels.Shape());
+            auto sample_id = std::get<0>(idx);
+            auto target_id = std::get<1>(idx);
+            auto res = loss(i, sample_id, target_id);
+            float v{std::get<0>(res)}, wt{std::get<1>(res)};
+            return PackedReduceResult{v, wt};
+          },
+          PackedReduceResult{}, thrust::plus<PackedReduceResult>());
+  #else
+      common::AssertGPUSupport();
+  #endif  //  defined(XGBOOST_USE_CUDA)
+  } else {
     auto n_threads = ctx->Threads();
     std::vector<double> score_tloc(n_threads, 0.0);
     std::vector<double> weight_tloc(n_threads, 0.0);
@@ -69,25 +88,6 @@ PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
     double residue_sum = std::accumulate(score_tloc.cbegin(), score_tloc.cend(), 0.0);
     double weights_sum = std::accumulate(weight_tloc.cbegin(), weight_tloc.cend(), 0.0);
     result = PackedReduceResult{residue_sum, weights_sum};
-  } else {
-#if defined(XGBOOST_USE_CUDA)
-    dh::XGBCachingDeviceAllocator<char> alloc;
-    thrust::counting_iterator<size_t> begin(0);
-    thrust::counting_iterator<size_t> end = begin + labels.Size();
-    result = thrust::transform_reduce(
-        thrust::cuda::par(alloc), begin, end,
-        [=] XGBOOST_DEVICE(size_t i) {
-          auto idx = linalg::UnravelIndex(i, labels.Shape());
-          auto sample_id = std::get<0>(idx);
-          auto target_id = std::get<1>(idx);
-          auto res = loss(i, sample_id, target_id);
-          float v{std::get<0>(res)}, wt{std::get<1>(res)};
-          return PackedReduceResult{v, wt};
-        },
-        PackedReduceResult{}, thrust::plus<PackedReduceResult>());
-#else
-    common::AssertGPUSupport();
-#endif  //  defined(XGBOOST_USE_CUDA)
   }
   return result;
 }
@@ -185,10 +185,10 @@ class PseudoErrorLoss : public MetricNoCache {
     CHECK_EQ(info.labels.Shape(0), info.num_row_);
     auto labels = info.labels.View(ctx_->gpu_id);
     preds.SetDevice(ctx_->gpu_id);
-    auto predts = ctx_->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan();
+    auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();
     info.weights_.SetDevice(ctx_->gpu_id);
-    common::OptionalWeights weights(ctx_->IsCPU() ? info.weights_.ConstHostSpan()
-                                                     : info.weights_.ConstDeviceSpan());
+    common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan() 
+                                                   : info.weights_.ConstHostSpan());
     float slope = this->param_.huber_slope;
     CHECK_NE(slope, 0.0) << "slope for pseudo huber cannot be 0.";
     PackedReduceResult result =
@@ -349,12 +349,13 @@ struct EvalEWiseBase : public MetricNoCache {
     if (info.labels.Size() != 0) {
       CHECK_NE(info.labels.Shape(1), 0);
     }
+    LOG(INFO) << "EvalEWiseBase::Eval 0";
     auto labels = info.labels.View(ctx_->gpu_id);
     info.weights_.SetDevice(ctx_->gpu_id);
-    common::OptionalWeights weights(ctx_->IsCPU() ? info.weights_.ConstHostSpan()
-                                                     : info.weights_.ConstDeviceSpan());
+    common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
+                                                   : info.weights_.ConstHostSpan());
     preds.SetDevice(ctx_->gpu_id);
-    auto predts = ctx_->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan();
+    auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();
 
     auto d_policy = policy_;
     auto result =

diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
@@ -96,13 +96,13 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
 inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,
                            std::int32_t group_idx, MetaInfo const& info, float learning_rate,
                            HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
-  if (ctx->IsCPU()) {
-    detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate,
-                               predt, alpha, p_tree);
-  } else {
+  if (ctx->IsCUDA()) {
     position.SetDevice(ctx->gpu_id);
     detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate,
                                  predt, alpha, p_tree);
+  } else {
+    detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate,
+                               predt, alpha, p_tree);
   }
 }
 }  // namespace obj