diff --git a/test/test_ops.py b/test/test_ops.py
index e21a702a0f88..87f5da56875d 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -810,6 +810,10 @@ def test_sigmoid(self):
   def test_sigmoid_extreme(self):
     helper_test_op([(45,65)], torch.sigmoid, Tensor.sigmoid, low=300, high=400)
     helper_test_op([(45,65)], torch.sigmoid, Tensor.sigmoid, low=-400, high=-300)
+    x = Tensor([300.0])
+    self.assertAlmostEqual(x.sigmoid()[0].gradient(x)[0].item(), 0.0)
+    x = Tensor([-300.0])
+    self.assertAlmostEqual(x.sigmoid()[0].gradient(x)[0].item(), 0.0)
   def test_hardsigmoid(self):
     helper_test_op([(45,65)], torch.nn.functional.hardsigmoid, Tensor.hardsigmoid)
     helper_test_op([()], torch.nn.functional.hardsigmoid, Tensor.hardsigmoid)
diff --git a/tinygrad/codegen/uopgraph.py b/tinygrad/codegen/uopgraph.py
index 11ff014ac7bd..6c2178123de2 100644
--- a/tinygrad/codegen/uopgraph.py
+++ b/tinygrad/codegen/uopgraph.py
@@ -313,6 +313,7 @@ def reduce_collapse(acc:UOp, ret:UOp, alu:UOp):
     lambda root: UOp(Ops.SINK, root.dtype, tuple(flatten(x.src if x.op in {Ops.SINK, Ops.UNROLL} else (x,) for x in root.src)), root.arg)
       if any(x.op in {Ops.SINK, Ops.UNROLL} for x in root.src) else None),
   # stable sigmoid
+  (UPat.var("x")*(((UPat.var("x")+1)*(UPat.var("x")+1)).reciprocal()), lambda x: sigmoid_like(x, x.const_like(1))),
   (UPat.var("x")*(((UPat.var("x")+1)*(UPat.var("x")+1)).reciprocal()*UPat.var("y")), sigmoid_like),
   (UPat.var("x")*(((UPat.var("x")+1)*(UPat.var("x")+1)*(UPat.var("x")+1)).reciprocal()), lambda x: sigmoid_like(x, (x+1).reciprocal())),
 ])