-
Notifications
You must be signed in to change notification settings - Fork 4
/
p3a_simd_view.hpp
116 lines (111 loc) · 4.42 KB
/
p3a_simd_view.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#pragma once
#include "Kokkos_Core.hpp"
#include "p3a_for_each.hpp"
namespace p3a {
template <class T>
class simd_view {
private:
using layout = Kokkos::LayoutLeft;
using value_t = typename Kokkos::View<T, layout>::value_type;
using traits_t = typename Kokkos::View<T, layout>::traits;
using specialize_t = typename Kokkos::View<T, layout>::specialize;
using map_t = Kokkos::Impl::ViewMapping<traits_t, specialize_t>;
template <class Abi> using simd_t = simd<value_t, Abi>;
template <class Abi> using mask_t = simd_mask<value_t, Abi>;
private:
Kokkos::View<T, layout> m_view;
map_t m_map;
value_t* m_data = nullptr;
public:
simd_view() = default;
simd_view(Kokkos::View<T, layout> view)
: m_view(view), m_map(view.impl_map()), m_data(view.data())
{}
template <class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<1 == Kokkos::View<T, layout>::Rank, simd_t<Abi>>::type
load(int i, mask_t<Abi> const& mask) const {
return p3a::load(m_data, i, mask);
}
template <class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<2 == Kokkos::View<T, layout>::Rank, simd_t<Abi>>::type
load(int i, int j, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j);
return p3a::load(m_data, idx, mask);
}
template <class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<3 == Kokkos::View<T, layout>::Rank, simd_t<Abi>>::type
load(int i, int j, int k, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j,k);
return p3a::load(m_data, idx, mask);
}
template <class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<4 == Kokkos::View<T, layout>::Rank, simd_t<Abi>>::type
load(int i, int j, int k, int l, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j,k,l);
return p3a::load(m_data, idx, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<1 == Kokkos::View<U, layout>::Rank>::type
store(simd_t<Abi> const& val, int i, mask_t<Abi> const& mask) const {
p3a::store(val, m_data, i, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<2 == Kokkos::View<U, layout>::Rank>::type
store(simd_t<Abi> const& val, int i, int j, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j);
p3a::store(val, m_data, idx, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<3 == Kokkos::View<U, layout>::Rank>::type
store(simd_t<Abi> const& val, int i, int j, int k, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j,k);
p3a::store(val, m_data, idx, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<4 == Kokkos::View<U, layout>::Rank>::type
store(simd_t<Abi> const& val, int i, int j, int k, int l, mask_t<Abi> const& mask) const {
int const idx = m_map.m_impl_offset(i,j,k,l);
p3a::store(val, m_data, idx, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<1 == Kokkos::View<U, layout>::Rank>::type
sum_store(simd_t<Abi> const& val, int i, mask_t<Abi> const& mask) const {
simd_t<Abi> sum = load(i, mask);
sum += val;
store(sum, i, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<2 == Kokkos::View<U, layout>::Rank>::type
sum_store(simd_t<Abi> const& val, int i, int j, mask_t<Abi> const& mask) const {
simd_t<Abi> sum = load(i, j, mask);
sum += val;
store(sum, i, j, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<3 == Kokkos::View<U, layout>::Rank>::type
sum_store(simd_t<Abi> const& val, int i, int j, int k, mask_t<Abi> const& mask) const {
simd_t<Abi> sum = load(i, j, k, mask);
sum += val;
store(sum, i, j, k, mask);
}
template <class Abi, class U = T>
P3A_ALWAYS_INLINE P3A_HOST_DEVICE inline
typename std::enable_if<4 == Kokkos::View<U, layout>::Rank>::type
sum_store(simd_t<Abi> const& val, int i, int j, int k, int l, mask_t<Abi> const& mask) const {
simd_t<Abi> sum = load(i, j, k, l, mask);
sum += val;
store(sum, i, j, k, l, mask);
}
};
}