Skip to content

Commit

Permalink
derivative of softmax is indepedent of max (tinygrad#7009)
Browse files Browse the repository at this point in the history
* derivative of softmax is indepedent of max

* update test
  • Loading branch information
chenyuxyz authored Oct 12, 2024
1 parent cae1c41 commit 04d9b46
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion test/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ def test_softmax_backward(self):
Tensor.manual_seed(0)
x = Tensor.randn(4, 12, 64, 64, requires_grad=True).realize()
x.softmax().sum().backward()
run_schedule(check_schedule(x.grad, 6))
run_schedule(check_schedule(x.grad, 4))

# changed by: multireduce spec
def test_layernorm_onelayer_fusion(self):
Expand Down
2 changes: 1 addition & 1 deletion tinygrad/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,7 +1681,7 @@ def std_mean(self, axis:Optional[Union[int, Sequence[int]]]=None, keepdim=False,

def _softmax(self, axis, dtype:Optional[DTypeLike]=None):
x = self.cast(dtype) if dtype is not None else self
m = x - x.max(axis=axis, keepdim=True)
m = x - x.max(axis=axis, keepdim=True).detach()
e = m.exp()
return m, e, e.sum(axis=axis, keepdim=True)

Expand Down

0 comments on commit 04d9b46

Please sign in to comment.