forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGatedLinearUnit.cpp
73 lines (62 loc) · 2.65 KB
/
GatedLinearUnit.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include <ATen/ATen.h>
#include <ATen/TensorIterator.h>
#include <ATen/native/Activation.h>
namespace at {
namespace meta {
TORCH_META_FUNC(glu) (
const Tensor& self, int64_t dim
) {
// this can't pass anyway because a 0-dimensional tensor has "size" 1, which
// can't be evenly halved, but give a nicer error message here.
TORCH_CHECK(self.dim() > 0, "glu does not support 0-dimensional tensors");
auto wrap_dim = maybe_wrap_dim(dim, self.dim());
const int64_t nIn = self.size(wrap_dim);
TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
wrap_dim, " is size ", nIn);
// size output to half of input
const int64_t selfSize = nIn / 2;
Tensor firstHalf = self.narrow(wrap_dim, 0, selfSize);
Tensor secondHalf = self.narrow(wrap_dim, selfSize, selfSize);
build_borrowing_binary_op(maybe_get_output(), firstHalf, secondHalf);
}
} // namespace meta
namespace native {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_DISPATCH(glu_stub);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_DISPATCH(glu_backward_stub);
TORCH_IMPL_FUNC(glu_out) (const Tensor& self, int64_t dim, const Tensor& out) {
glu_stub(device_type(), *this);
}
Tensor& glu_backward_cpu_out(const Tensor& grad_output, const Tensor& input,
int64_t dim, Tensor& grad_input) {
TORCH_CHECK(input.dim() > 0, "glu does not support 0-dimensional tensors");
auto wrap_dim = maybe_wrap_dim(dim, input.dim());
const int64_t nIn = input.size(wrap_dim);
TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
wrap_dim, " is size ", nIn);
grad_input.resize_as_(input);
const int64_t inputSize = nIn / 2;
// half tensor
Tensor firstHalf = input.narrow(wrap_dim, 0, inputSize);
Tensor secondHalf = input.narrow(wrap_dim, inputSize, inputSize);
Tensor gradInputfirstHalf = grad_input.narrow(wrap_dim, 0, inputSize);
Tensor gradInputsecondHalf = grad_input.narrow(wrap_dim, inputSize, inputSize);
at::sigmoid_out(gradInputfirstHalf, secondHalf);
// for second gradinput half, can get a better performance by fusion
auto iter = at::TensorIteratorConfig()
.add_output(gradInputsecondHalf)
.add_input(gradInputfirstHalf)
.add_input(firstHalf)
.add_input(grad_output)
.build();
glu_backward_stub(iter.device_type(), iter);
gradInputfirstHalf.mul_(grad_output);
return grad_input;
}
Tensor glu_backward_cpu(const Tensor& grad_output, const Tensor& input, int64_t dim) {
auto grad_input = at::empty({0}, input.options());
return glu_backward_cpu_out(grad_output, input, dim, grad_input);
}
} // at::native
} // at