Skip to content

Commit

Permalink
Deduplicate global implementation (#139)
Browse files Browse the repository at this point in the history
* deduplicate

* inline

* format
  • Loading branch information
t4c1 authored Feb 6, 2024
1 parent 5963edf commit d5fe215
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 267 deletions.
15 changes: 8 additions & 7 deletions src/portfft/committed_descriptor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ class committed_descriptor_impl;
template <typename Scalar, domain Domain, detail::layout LayoutIn, detail::layout LayoutOut, Idx SubgroupSize,
typename TIn>
std::vector<sycl::event> compute_level(
const typename committed_descriptor_impl<Scalar, Domain>::kernel_data_struct& kd_struct, TIn input, Scalar* output,
TIn input_imag, Scalar* output_imag, const Scalar* twiddles_ptr, const IdxGlobal* factors_triple,
IdxGlobal intermediate_twiddle_offset, IdxGlobal subimpl_twiddle_offset, IdxGlobal input_global_offset,
IdxGlobal committed_size, Idx num_batches_in_l2, IdxGlobal n_transforms, IdxGlobal batch_start, Idx factor_id,
Idx total_factors, complex_storage storage, const std::vector<sycl::event>& dependencies, sycl::queue& queue);
const typename committed_descriptor_impl<Scalar, Domain>::kernel_data_struct& kd_struct, const TIn& input,
Scalar* output, const TIn& input_imag, Scalar* output_imag, const Scalar* twiddles_ptr,
const IdxGlobal* factors_triple, IdxGlobal intermediate_twiddle_offset, IdxGlobal subimpl_twiddle_offset,
IdxGlobal input_global_offset, IdxGlobal committed_size, Idx num_batches_in_l2, IdxGlobal n_transforms,
IdxGlobal batch_start, Idx factor_id, Idx total_factors, complex_storage storage,
const std::vector<sycl::event>& dependencies, sycl::queue& queue);

template <typename Scalar, domain Domain, typename TOut>
sycl::event transpose_level(const typename committed_descriptor_impl<Scalar, Domain>::kernel_data_struct& kd_struct,
Expand Down Expand Up @@ -150,8 +151,8 @@ class committed_descriptor_impl {
template <typename Scalar1, domain Domain1, detail::layout LayoutIn, detail::layout LayoutOut, Idx SubgroupSize,
typename TIn>
friend std::vector<sycl::event> detail::compute_level(
const typename committed_descriptor_impl<Scalar1, Domain1>::kernel_data_struct& kd_struct, TIn input,
Scalar1* output, TIn input_imag, Scalar1* output_imag, const Scalar1* twiddles_ptr,
const typename committed_descriptor_impl<Scalar1, Domain1>::kernel_data_struct& kd_struct, const TIn& input,
Scalar1* output, const TIn& input_imag, Scalar1* output_imag, const Scalar1* twiddles_ptr,
const IdxGlobal* factors_triple, IdxGlobal intermediate_twiddle_offset, IdxGlobal subimpl_twiddle_offset,
IdxGlobal input_global_offset, IdxGlobal committed_size, Idx num_batches_in_l2, IdxGlobal n_transforms,
IdxGlobal batch_start, Idx factor_id, Idx total_factors, complex_storage storage,
Expand Down
Loading

0 comments on commit d5fe215

Please sign in to comment.