Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add to_grad option to F.assign #577

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion build-tools/code_generator/api_levels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,6 @@
BatchDet_Empty: 276
WarpByFlow_Empty: 277
6:
MinMaxQuantize_fBBBf: 274
MinMaxQuantize_fBBBf: 274
7:
Assign_B: 278
18 changes: 18 additions & 0 deletions build-tools/code_generator/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3575,6 +3575,18 @@ Array Manipulation:

Unlike TensorFlow, the returned Variable has a backward path to `dst`:

If `to_grad` is `True`, the destination is the `gradient` property and the
same value remains in backward.

.. code-block:: python
dst = nn.Variable((2, 3, 4))
src = nn.Variable((2, 3, 4))
assign = F.assign(dst, src, to_grad=True)

assign.forward()
assert np.allclose(dst.g, src.d) # dst.grad is updated instead of dst.data
assert np.allclose(assign.g dst.d)

.. math::

g_{dst} = g_{y}
Expand All @@ -3583,12 +3595,18 @@ Array Manipulation:
doc: A destination N-D array
src:
doc: A source N-D array
arguments:
to_grad:
doc: Flag whether the destination is gradient.
type: bool
default: 'False'
outputs:
y:
doc: An assigned array
c_runtime: not support
function_ids:
Empty: 248
B: 278
GatherNd:
snake_name: gather_nd
doc: |2
Expand Down
14 changes: 10 additions & 4 deletions include/nbla/function/assign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

namespace nbla {

NBLA_REGISTER_FUNCTION_HEADER(Assign);
NBLA_REGISTER_FUNCTION_HEADER(Assign, bool);

/** Assign source array to destination array
The function is defined as
Expand All @@ -32,18 +32,24 @@ y_i = x_i
Inputs:
- destination N-D array
- source N-D array
- to_grad flag

Outputs:
- N-D array identical to source array

\ingroup FunctionImplGrp
*/
template <typename T> class Assign : public BaseFunction<> {
template <typename T> class Assign : public BaseFunction<bool> {
protected:
bool to_grad_;

public:
Assign(const Context &ctx) : BaseFunction(ctx) {}
Assign(const Context &ctx, bool to_grad)
: BaseFunction<bool>(ctx, to_grad), to_grad_(to_grad) {}
virtual ~Assign() {}
virtual shared_ptr<Function> copy() const { return create_Assign(ctx_); }
virtual shared_ptr<Function> copy() const {
return create_Assign(ctx_, to_grad_);
}
virtual int min_inputs() { return 2; }
virtual int min_outputs() { return 1; }
virtual vector<dtypes> in_types() {
Expand Down
32 changes: 28 additions & 4 deletions python/test/function/test_assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,38 @@ def test_assign_forward_backward(seed, ctx, func_name):

# gradients at destination are identical to gradients at assign operation
assert not np.all(dst.g == np.zeros((2, 3, 4)))
assert np.all(dst.g == assign.g)
assert np.all(src.g == np.zeros((2, 3, 4)))
assert_allclose(dst.g, assign.g)
assert_allclose(src.g, np.zeros((2, 3, 4)))

# check accum=False
assign.grad.zero()
dst.g = rng.rand(2, 3, 4)
f = assign.parent
f.forward([dst, src], [assign])
f.backward([dst, src], [assign], accum=[False])
assert np.all(dst.g == assign.g)
assert np.all(src.g == np.zeros((2, 3, 4)))
assert_allclose(dst.g, assign.g)
assert_allclose(src.g, np.zeros((2, 3, 4)))


@pytest.mark.parametrize("ctx, func_name", ctxs)
@pytest.mark.parametrize("seed", [314])
def test_assign_with_to_grad_true(seed, ctx, func_name):
rng = np.random.RandomState(seed)
dst = nn.Variable((2, 3, 4), need_grad=True)
src = nn.Variable((2, 3, 4), need_grad=True)

assign = F.assign(dst, src, to_grad=True)

src.d = rng.rand(2, 3, 4)
assign.forward()

# destination variable should be equal to source variable
assert_allclose(dst.g, src.d)
# output variable of assign function should be equal to soure variable
assert_allclose(assign.g, src.d)

assign.backward()

# gradients remain in backward
assert_allclose(dst.g, assign.g)
assert_allclose(dst.g, src.d)
15 changes: 11 additions & 4 deletions src/nbla/function/generic/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

namespace nbla {

NBLA_REGISTER_FUNCTION_SOURCE(Assign);
NBLA_REGISTER_FUNCTION_SOURCE(Assign, bool);

template <typename T>
void Assign<T>::setup_impl(const Variables &inputs, const Variables &outputs) {
Expand All @@ -35,9 +35,16 @@ void Assign<T>::setup_impl(const Variables &inputs, const Variables &outputs) {
template <typename T>
void Assign<T>::forward_impl(const Variables &inputs,
const Variables &outputs) {
Array *dst = inputs[0]->data()->cast(get_dtype<T>(), this->ctx_, true);

Array *dst, *y;
if (to_grad_) {
dst = inputs[0]->grad()->cast(get_dtype<T>(), this->ctx_, true);
y = outputs[0]->grad()->cast(get_dtype<T>(), this->ctx_, true);
} else {
dst = inputs[0]->data()->cast(get_dtype<T>(), this->ctx_, true);
y = outputs[0]->data()->cast(get_dtype<T>(), this->ctx_, true);
}
const Array *src = inputs[1]->data()->get(get_dtype<T>(), this->ctx_);
Array *y = outputs[0]->data()->cast(get_dtype<T>(), this->ctx_, true);
dst->copy_from(src);
y->copy_from(src);
}
Expand All @@ -46,7 +53,7 @@ template <typename T>
void Assign<T>::backward_impl(const Variables &inputs, const Variables &outputs,
const vector<bool> &propagate_down,
const vector<bool> &accum) {
if (!propagate_down[0])
if (!propagate_down[0] || to_grad_)
return;

Variable gy(outputs[0]->grad());
Expand Down