diff --git a/src/rulesets/Base/mapreduce.jl b/src/rulesets/Base/mapreduce.jl
index a83f72cc7..3e013df12 100644
--- a/src/rulesets/Base/mapreduce.jl
+++ b/src/rulesets/Base/mapreduce.jl
@@ -417,85 +417,133 @@ end
 end
 
 #####
-##### `foldl`
+##### `mapfoldl(f, g, ::Tuple)`
+#####
+
+using Base: mapfoldl_impl
+
+# For tuples there should be no harm in handling `map` first.
+# This will also catch `mapreduce`.
+
+function rrule(
+        cfg::RuleConfig{>:HasReverseMode}, ::typeof(mapfoldl_impl), f::F, op::G, init, x::Tuple;
+    ) where {F,G}
+    y, backmap = rrule(cfg, map, f, x)
+    z, backred = rrule(cfg, Base.mapfoldl_impl, identity, op, init, y)
+    function mapfoldl_pullback_tuple(dz)
+        _, _, dop, dinit, dy = backred(dz)
+        _, df, dx = backmap(dy)
+        return (NoTangent(), df, dop, dinit, dx)
+    end
+    return z, mapfoldl_pullback_tuple
+end
+
+#####
+##### `foldl(f, ::Tuple)`
 #####
 
 # `foldl` guarantees to execute `f` in order, left to right. So it makes sense even when
-# this `f` is stateful, in which case the gradient must be calculated in the reverse order. 
+# this `f` is stateful, in which case the gradient must be calculated in the reverse order.
 
-# The implementation aims to be efficient for both tuples and arrays, although using accumulate
-# to carry intermediate results along creates arrays of tuples which could be avoided; using a
-# loop can be a few times faster. Note also that it does not return a gradient for `init`.
+# The rule is attached to `Base.mapfoldl_impl` because this gets the `init` keyword as an argument,
+# which is handled below. For tuples, `reduce` also comes here.
 
 function rrule(
-        config::RuleConfig{>:HasReverseMode}, ::typeof(foldl), op::G, x::Union{AbstractArray, Tuple};
-        init=_InitialValue()
+        config::RuleConfig{>:HasReverseMode},
+        ::typeof(Base.mapfoldl_impl),
+        ::typeof(identity),
+        op::G, 
+        init::Base._InitialValue, 
+        x::Tuple;
     ) where {G}
-    list, start = if init === _InitialValue()
-        _drop1(x), first(x)
-    else
-        # Case with init keyword is simpler to understand first!
-        _reshape1(x, :), init  # (vec is for Julia 1.0, accumulate is fussy)
-    end
-    hobbits = accumulate(list; init=(start, nothing)) do (a,_), b
+    hobbits = accumulate(Base.tail(x); init=(first(x), nothing)) do (a, _), b
         # Here `a` is what we would normally cary forward, and `_` ignores
         # the previous iteration's pullback function (needed later),
         # while `b` is the fresh input from `list` as usual.
-        c, back = rrule_via_ad(config, op, a, b)  # LHS is just documentation here!
+        c, back = rrule_via_ad(config, op, a, b)
         # We don't really need to store every `c`, last one is `foldl` output.
         # (The name, BTW, is because "there and back again" is the subtitle of Tolkien's book.)
     end
     y = first(last(hobbits))
-    axe = axes(x)
     project = ProjectTo(x)
-    function unfoldl(dy)
-        trio = accumulate(_reverse1(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
+    function foldl_pullback_tuple(dy)
+        trio = accumulate(reverse(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
             ds, da, db = back(dc)
-            # Don't need to store every `da`, need one for the next iteration + maybe last
+            # Don't need to store every `da`, need one for the next iteration + the last.
         end
         dop = sum(first, trio)
-        dx = map(last, _reverse1(trio))
-        if init === _InitialValue()
-            # `hobbits` is one short
-            dx = _vcat1(trio[end][2], dx)
-        end
-        return (NoTangent(), dop, project(_reshape1(dx, axe)))
+        dx = (trio[end][2], reverse(map(last, trio))...)
+        return (NoTangent(), NoTangent(), ProjectTo(op)(dop), NoTangent(), project(dx))
     end
-    return y, unfoldl
+    return y, foldl_pullback_tuple
 end
 
+function rrule(
+        config::RuleConfig{>:HasReverseMode},
+        ::typeof(Base.mapfoldl_impl),
+        ::typeof(identity),
+        op::G, 
+        init, 
+        x::Tuple;
+    ) where {G}
+    # Trivial case handled here to avoid ambiguities (and necc. because of Base.tail below)
+    foldl_pullback_empty(dy) = (NoTangent(), NoTangent(), NoTangent(), dy, NoTangent())
+    isempty(x) && return init, foldl_pullback_empty
+    
+    # Treat `init` by simply appending it to the `x`:
+    y, back = rrule(config, Base.mapfoldl_impl, identity, op, Base._InitialValue(), (init, x...))
+    project_x = ProjectTo(x)
+    project_in = ProjectTo(init)
+    function foldl_pullback_tuple_init(dy)
+        _, _, dop, _, dxplus = back(dy)
+        return (NoTangent(), NoTangent(), dop, project_in(first(dxplus)), project_x(Base.tail(dxplus)))
+    end
+    return y, foldl_pullback_tuple_init
+end
 
 #####
-##### Iterator-or-Tuple functions
+##### `foldl(f, ::Array)`
 #####
 
-# This zoo of underscore functions helps `foldl` & `accumulate` handle both tuples and arrays,
-# and also provides some alternatives for versions of Julia where iterators weren't supported.
-# Inspired by `Base._reverse`, used in defn of `foldr`.
+# The implementation was originally for both tuples and arrays, although using accumulate
+# to carry intermediate results along creates arrays of tuples which could be avoided.
+# Using a loop can be a few times faster, this should be replaced:
+# https://github.com/FluxML/Zygote.jl/issues/644#issuecomment-628762305
 
-# To support 2nd derivatives, some may need their own gradient rules. And _drop1 should perhaps
-# be replaced by _peel1 like Iterators.peel
+# Note also that it does not return a gradient for `init`, now marked `@not_implemented`.
 
-_reverse1(x) = Iterators.reverse(x)
-_drop1(x) = Iterators.drop(x, 1)
-_zip2(x, y) = zip(x, y)  # for `accumulate`, below
-
-_reverse1(x::Tuple) = reverse(x)
-_drop1(x::Tuple) = Base.tail(x)
-_zip2(x::Tuple{Vararg{Any,N}}, y::Tuple{Vararg{Any,N}}) where N = ntuple(i -> (x[i],y[i]), N)
-
-struct _InitialValue end  # Old versions don't have `Base._InitialValue`
+function rrule(
+        config::RuleConfig{>:HasReverseMode}, ::typeof(Base.mapfoldl_impl), ::typeof(identity), op::G, init, x::Union{AbstractArray, Tuple};
+    ) where {G}
+    start, list = if init === Base._InitialValue()
+        Iterators.peel(x)
+    else
+        # Case with init keyword is simpler to understand first!
+        init, x
+    end
+    hobbits = accumulate(list; init=(start, nothing)) do (a, _), b
+        c, back = rrule_via_ad(config, op, a, b)
+    end
+    y = first(last(hobbits))
+    axe = axes(x)
+    project = ProjectTo(x)
+    function unfoldl(dy)
+        trio = accumulate(Iterators.reverse(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
+            ds, da, db = back(dc)
+        end
+        dop = sum(first, trio)
+        dx = map(last, Iterators.reverse(trio))
+        if init === Base._InitialValue()  # `hobbits` is one short
+            dx = _vcat1(trio[end][2], dx)
+        end
+        d_init = @not_implemented "gradient for foldl does not at present include init, sorry"
+        return (NoTangent(), NoTangent(), dop, d_init, project(reshape(dx, axe)))
+    end
+    return y, unfoldl
+end
 
 _vcat1(x, ys::AbstractVector) = vcat(x, ys)
 _vcat1(x::AbstractArray, ys::AbstractVector) = vcat([x], ys)
-_vcat1(x, ys::Tuple) = (x, ys...)
-
-_reshape1(x::AbstractArray, axe) = reshape(x, axe)
-_reshape1(x::Tuple, axe) = x
-
-_no_tuple_tangent(dx::Tangent) = ChainRulesCore.backing(dx)
-_no_tuple_tangent(dx) = dx
-
 
 #####
 ##### `accumulate`
@@ -503,51 +551,54 @@ _no_tuple_tangent(dx) = dx
 
 # Like `foldl` this by definition works in order, so it makes sense to allow stateful `f`.
 
+# Also like `foldl`, the version with a keyword `init` can't easily be given a gradient.
+# Move it down to: `_accumulate!(op, B, A::AbstractVector, dims::Nothing, init::Nothing)`
+
 function rrule(
-        config::RuleConfig{>:HasReverseMode}, ::typeof(accumulate), op::G, x::Union{AbstractArray, Tuple}; 
-        init=_InitialValue(), dims=nothing
+        config::RuleConfig{>:HasReverseMode},
+        ::typeof(Base._accumulate!), 
+        op::G, y::AbstractVector, 
+        x::AbstractVector, 
+        dims::Nothing, 
+        init,
     ) where {G}
-    isnothing(dims) || dims == 1 && x isa Base.AbstractVecOrTuple || throw(
-        "accumulate(op, x; dims) is not currently supported by ChainRules, sorry"
-        # It's not supported by AD either, so no point calling back, and no regression:
-        # gradient(x -> sum(accumulate(/, x, dims=1)), rand(3,4)) 
-        # ERROR: Mutating arrays is not supported
-    )
-    list, start = if init === _InitialValue()
-        _drop1(x), first(x)
+
+    start, list = if init === nothing
+        Iterators.peel(x)
     else
-        x, init
+        something(init), x
     end
     hobbits = accumulate(list; init = (start, nothing)) do (a, _), b
         c, back = rrule_via_ad(config, op, a, b)
     end
-    y = map(first, hobbits)
-    if init === _InitialValue()
+    if init === nothing
         # `hobbits` is one short, and first one doesn't invoke `op`
-        y = _vcat1(first(x), y)
+        y[1] = first(x)
+        map!(first, @view(y[2:end]), hobbits)
+    else
+        map!(first, y, hobbits)
     end
     axe = axes(x)
     project = ProjectTo(x)
     function decumulate(dy)
-        dy_plain = _no_tuple_tangent(unthunk(dy))
-        rev_list = if init === _InitialValue()
-            # Here we rely on `zip` to stop early. Begin explicit with _reverse1(_drop1(...))
-            # gets "no method matching iterate(::Base.Iterators.Reverse{Base.Iterators.Drop{Array{"
-            _zip2(_reverse1(hobbits), _reverse1(dy_plain))
-        else
-            _zip2(_reverse1(hobbits), _reverse1(dy_plain))
-        end
+        dy_plain = unthunk(dy)
+        rev_list = zip(Iterators.reverse(hobbits), Iterators.reverse(dy_plain))
+        # Here we rely on `zip` to stop early when init === nothing. Begin explicit with Iterators.reverse(Iterators.drop(..., 1))
+        # gets "no method matching iterate(::Base.Iterators.Reverse{Base.Iterators.Drop{Array{"
         trio = accumulate(rev_list; init=(0, ZeroTangent(), 0)) do (_, dc, _), ((_, back), dz)
             ds, da, db = back(dc + dz)
             # Don't need to store every 'da', but need for next iteration, and the last one.
         end
         dop = sum(first, trio)
-        dx = map(last, _reverse1(trio))
-        if init == _InitialValue()
+        dx = map(last, Iterators.reverse(trio))
+        if init == nothing
             # `hobbits` is one short, and the first one is weird
             dx = _vcat1(trio[end][2] + dy_plain[1], dx)
         end
-        return (NoTangent(), dop, project(_reshape1(dx, axe)))
+        dy = @not_implemented "no gradient for `B` in `accumulate!(f, B, A)`, the rule intends to support `accumulate` only"
+        d_init_not = @not_implemented "gradient for accumulate does not at present include init, sorry"
+        d_init = init === nothing ? NoTangent() : Tangent{typeof(init)}(; value = d_init_not)
+        return (NoTangent(), dop, dy, project(reshape(dx, axe)), NoTangent(), d_init)
     end
-    return _reshape1(y, axe), decumulate
+    return reshape(y, axe), decumulate
 end
diff --git a/test/rulesets/Base/mapreduce.jl b/test/rulesets/Base/mapreduce.jl
index 89f41c933..dc350bc2f 100644
--- a/test/rulesets/Base/mapreduce.jl
+++ b/test/rulesets/Base/mapreduce.jl
@@ -2,6 +2,11 @@
 Base.sum(xs::AbstractArray, weights::AbstractArray) = dot(xs, weights)
 struct SumRuleConfig <: RuleConfig{Union{HasReverseMode}} end
 
+const CFG = ChainRulesTestUtils.ADviaRuleConfig()
+
+using Base: mapfoldl_impl, _accumulate!  # for foldl & accumulate rules
+const _INIT = Base._InitialValue()
+
 @testset "Reductions" begin
     @testset "sum(::Tuple)" begin
         test_frule(sum, Tuple(rand(5)))
@@ -213,60 +218,99 @@ struct SumRuleConfig <: RuleConfig{Union{HasReverseMode}} end
     end  # prod
 
     @testset "foldl(f, ::Array)" begin
+        # `foldl(op, itr; init)` goes to `mapfoldr_impl(identity, op, init, itr)`. The rule is
+        # now attached there, as this is the simplest way to handle `init` keyword.
+
         # Simple
-        y1, b1 = rrule(CFG, foldl, *, [1, 2, 3]; init=1)
+        y1, b1 = rrule(CFG, mapfoldl_impl, identity, *, 1, [1, 2, 3])
         @test y1 == 6
-        b1(7) == (NoTangent(), NoTangent(), [42, 21, 14])
+        @test b1(7)[1:3] == (NoTangent(), NoTangent(), NoTangent())
+        @test b1(7)[4] isa ChainRulesCore.NotImplemented
+        @test b1(7)[5] == [42, 21, 14]
 
-        y2, b2 = rrule(CFG, foldl, *, [1 2; 0 4])  # without init, needs vcat
+        y2, b2 = rrule(CFG, mapfoldl_impl, identity, *, _INIT, [1 2; 0 4])  # without init, needs vcat
         @test y2 == 0
-        b2(8) == (NoTangent(), NoTangent(), [0 0; 64 0])  # matrix, needs reshape
+        @test b2(8)[5] == [0 0; 64 0]  # matrix, needs reshape
 
         # Test execution order
         c5 = Counter()
-        y5, b5 = rrule(CFG, foldl, c5, [5, 7, 11])
+        y5, b5 = rrule(CFG, mapfoldl_impl, identity, c5, _INIT, [5, 7, 11])
         @test c5 == Counter(2)
         @test y5 == ((5 + 7)*1 + 11)*2 == foldl(Counter(), [5, 7, 11])
-        @test b5(1) == (NoTangent(), NoTangent(), [12*32, 12*42, 22])
+        @test b5(1)[5] == [12*32, 12*42, 22]
         @test c5 == Counter(42)
 
         c6 = Counter()
-        y6, b6 = rrule(CFG, foldl, c6, [5, 7, 11], init=3)
+        y6, b6 = rrule(CFG, mapfoldl_impl, identity, c6, 3, [5, 7, 11])
         @test c6 == Counter(3)
         @test y6 == (((3 + 5)*1 + 7)*2 + 11)*3 == foldl(Counter(), [5, 7, 11], init=3)
-        @test b6(1) == (NoTangent(), NoTangent(), [63*33*13, 43*13, 23])
+        @test b6(1)[5] == [63*33*13, 43*13, 23]
         @test c6 == Counter(63)
 
         # Test gradient of function
-        y7, b7 = rrule(CFG, foldl, Multiplier(3), [5, 7, 11])
+        y7, b7 = rrule(CFG, mapfoldl_impl, identity, Multiplier(3), _INIT, [5, 7, 11])
         @test y7 == foldl((x,y)->x*y*3, [5, 7, 11])
-        @test b7(1) == (NoTangent(), Tangent{Multiplier{Int}}(x = 2310,), [693, 495, 315])
+        b7_1 = b7(1)
+        @test b7_1[3] == Tangent{Multiplier{Int}}(x = 2310,)
+        @test b7_1[5] == [693, 495, 315]
 
-        y8, b8 = rrule(CFG, foldl, Multiplier(13), [5, 7, 11], init=3)
+        y8, b8 = rrule(CFG, mapfoldl_impl, identity, Multiplier(13), 3, [5, 7, 11])
         @test y8 == 2_537_535 == foldl((x,y)->x*y*13, [5, 7, 11], init=3)
-        @test b8(1) == (NoTangent(), Tangent{Multiplier{Int}}(x = 585585,), [507507, 362505, 230685])
+        b8_1 = b8(1)
+        @test b8_1[3] == Tangent{Multiplier{Int}}(x = 585585,)
+        @test b8_1[5] == [507507, 362505, 230685]
         # To find these numbers:
         # ForwardDiff.derivative(z -> foldl((x,y)->x*y*z, [5,7,11], init=3), 13)
         # ForwardDiff.gradient(z -> foldl((x,y)->x*y*13, z, init=3), [5,7,11]) |> string
 
         # Finite differencing
-        test_rrule(foldl, /, 1 .+ rand(3,4))
-        test_rrule(foldl, *, rand(ComplexF64,3,4); fkwargs=(; init=rand(ComplexF64)))
-        test_rrule(foldl, +, rand(ComplexF64,7); fkwargs=(; init=rand(ComplexF64)))
-        test_rrule(foldl, max, rand(3); fkwargs=(; init=999))
+        test_rrule(mapfoldl_impl, identity, /, _INIT, 1 .+ rand(3,4))
+        test_rrule(mapfoldl_impl, identity, *, rand(ComplexF64), rand(ComplexF64,3,4))
+        test_rrule(mapfoldl_impl, identity, +, rand(ComplexF64), rand(ComplexF64,7))
+        test_rrule(mapfoldl_impl, identity, max, 999, rand(3))
     end
     @testset "foldl(f, ::Tuple)" begin
-        y1, b1 = rrule(CFG, foldl, *, (1,2,3); init=1)
+        y1, b1 = rrule(CFG, mapfoldl_impl, identity, *, 1, (1,2,3))
         @test y1 == 6
-        b1(7) == (NoTangent(), NoTangent(), Tangent{NTuple{3,Int}}(42, 21, 14))
+        @test b1(7)[5] == Tangent{NTuple{3,Int}}(42, 21, 14)
 
-        y2, b2 = rrule(CFG, foldl, *, (1, 2, 0, 4))
+        y2, b2 = rrule(CFG, mapfoldl_impl, identity, *, _INIT, (1, 2, 0, 4))
         @test y2 == 0
-        b2(8) == (NoTangent(), NoTangent(), Tangent{NTuple{4,Int}}(0, 0, 64, 0))
+        @test b2(8)[5] == Tangent{NTuple{4,Int}}(0, 0, 64, 0)
+        
+        # Test execution order
+        c5 = Counter()
+        y5, b5 = rrule(CFG, mapfoldl_impl, identity, c5, _INIT, (5, 7, 11))
+        @test c5 == Counter(2)
+        @test y5 == ((5 + 7)*1 + 11)*2 == foldl(Counter(), (5, 7, 11))
+        @test collect(b5(1)[5]) == [12*32, 12*42, 22]
+        @test c5 == Counter(42)
+
+        c6 = Counter()
+        y6, b6 = rrule(CFG, mapfoldl_impl, identity, c6, 3, (5, 7, 11))
+        @test c6 == Counter(3)
+        @test y6 == (((3 + 5)*1 + 7)*2 + 11)*3 == foldl(Counter(), (5, 7, 11), init=3)
+        @test collect(b6(1)[5]) == [63*33*13, 43*13, 23]
+        @test c6 == Counter(63)
+
+        # Test gradient of function
+        y7, b7 = rrule(CFG, mapfoldl_impl, identity, Multiplier(3), _INIT, (5, 7, 11))
+        @test y7 == foldl((x,y)->x*y*3, (5, 7, 11))
+        b7_1 = b7(1)
+        @test b7_1[3] == Tangent{Multiplier{Int}}(x = 2310,)
+        @test collect(b7_1[5]) == [693, 495, 315]
 
         # Finite differencing
-        test_rrule(foldl, /, Tuple(1 .+ rand(5)))
-        test_rrule(foldl, *, Tuple(rand(ComplexF64, 5)))
+        test_rrule(mapfoldl_impl, identity, /, _INIT, Tuple(1 .+ rand(5)))
+        test_rrule(mapfoldl_impl, identity, *, 1+rand(), Tuple(rand(ComplexF64, 5)))
+        
+        # Trivial case
+        test_rrule(mapfoldl_impl, identity, /, 2pi, ())
+    end
+    @testset "mapfoldl(f, g, ::Tuple)" begin
+        test_rrule(mapfoldl_impl, cbrt, /, _INIT, Tuple(1 .+ rand(5)), check_inferred=false)
+        test_rrule(mapfoldl_impl, abs2, *, 1+rand(), Tuple(rand(ComplexF64, 5)), check_inferred=false)
+        # TODO make the `map(f, ::Tuple)` rule infer better!
     end
 end
 
@@ -323,54 +367,55 @@ end
         end
     end  # cumprod
 
-    @testset "accumulate(f, ::Array)" begin
+    @testset "accumulate(f, ::Vector)" begin
+        # `accumulate(f, A; init)` goes to `_accumulate!(op, B, A, dims::Nothing, init::Nothing)`. 
+        # The rule is now attached there, as this is the simplest way to handle `init` keyword.
+
         # Simple
-        y1, b1 = rrule(CFG, accumulate, *, [1, 2, 3, 4]; init=1)
+        y1, b1 = rrule(CFG, _accumulate!, *, [0, 0, 0, 0], [1, 2, 3, 4], nothing, Some(1))
         @test y1 == [1, 2, 6, 24]
-        @test b1([1, 1, 1, 1]) == (NoTangent(), NoTangent(), [33, 16, 10, 6])
+        @test b1([1, 1, 1, 1])[3] isa ChainRulesCore.NotImplemented
+        @test b1([1, 1, 1, 1])[4] == [33, 16, 10, 6]
+        @test b1([1, 1, 1, 1])[6] isa Tangent{Some{Int}}
+        @test b1([1, 1, 1, 1])[6].value isa ChainRulesCore.NotImplemented
 
-        y2, b2 = rrule(CFG, accumulate, /, [1 2; 3 4])
-        @test y2 ≈ accumulate(/, [1 2; 3 4])
-        @test b2(ones(2, 2))[3] ≈ [1.5416666 -0.104166664; -0.18055555 -0.010416667]  atol=1e-6
+        # y2, b2 = rrule(CFG, _accumulate!, /, [0 0; 0 0], [1 2; 3 4], :, nothing)
+        # @test y2 ≈ accumulate(/, [1 2; 3 4.0])
+        # @test b2(ones(2, 2))[3] ≈ [1.5416666 -0.104166664; -0.18055555 -0.010416667]  atol=1e-6
 
         # Test execution order
         c3 = Counter()
-        y3, b3 = rrule(CFG, accumulate, c3, [5, 7, 11]; init=3)
+        y3, b3 = rrule(CFG, _accumulate!, c3, [0, 0, 0], [5, 7, 11], nothing, Some(3))
         @test c3 == Counter(3)
         @test y3 == [8, 30, 123] == accumulate(Counter(), [5, 7, 11]; init=3)
-        @test b3([1, 1, 1]) == (NoTangent(), NoTangent(), [29169, 602, 23]) # the 23 is clear!
+        @test b3([1, 1, 1])[4] == [29169, 602, 23] # the 23 is clear!
 
         c4 = Counter()
-        y4, b4 = rrule(CFG, accumulate, c4, [5, 7, 11])
+        y4, b4 = rrule(CFG, _accumulate!, c4, [0, 0, 0], [5, 7, 11], nothing, nothing)
         @test c4 == Counter(2)
         @test y4 == [5, (5+7)*1, ((5+7)*1 + 11)*2] == accumulate(Counter(), [5, 7, 11])
-        @test b4([1, 1, 1]) == (NoTangent(), NoTangent(), [417, 42*(1 + 12), 22])
+        @test b4([1, 1, 1])[4] == [417, 42*(1 + 12), 22]
 
         # Test gradient of function
-        y7, b7 = rrule(CFG, accumulate, Multiplier(3), [5, 7, 11])
+        y7, b7 = rrule(CFG, _accumulate!, Multiplier(3), [0, 0, 0], [5, 7, 11], nothing, nothing)
         @test y7 == accumulate((x,y)->x*y*3, [5, 7, 11])
-        @test b7([1, 1, 1]) == (NoTangent(), Tangent{Multiplier{Int}}(x = 2345,), [715, 510, 315])
+        @test b7([1, 1, 1])[2] == Tangent{Multiplier{Int}}(; x = 2345,)
+        @test b7([1, 1, 1])[4] == [715, 510, 315]
 
-        y8, b8 = rrule(CFG, accumulate, Multiplier(13), [5, 7, 11], init=3)
+        y8, b8 = rrule(CFG, _accumulate!, Multiplier(13), [0, 0, 0], [5, 7, 11], nothing, Some(3))
         @test y8 == [195, 17745, 2537535] == accumulate((x,y)->x*y*13, [5, 7, 11], init=3)
-        @test b8([1, 1, 1]) == (NoTangent(), Tangent{Multiplier{Int}}(x = 588330,), [511095, 365040, 230685])
+        @test b8([1, 1, 1])[2] == Tangent{Multiplier{Int}}(; x = 588330,)
+        @test b8([1, 1, 1])[4] == [511095, 365040, 230685]
         # To find these numbers:
         # ForwardDiff.derivative(z -> sum(accumulate((x,y)->x*y*z, [5,7,11], init=3)), 13)
         # ForwardDiff.gradient(z -> sum(accumulate((x,y)->x*y*13, z, init=3)), [5,7,11]) |> string
 
         # Finite differencing
-        test_rrule(accumulate, *, randn(5); fkwargs=(; init=rand()))
-        test_rrule(accumulate, /, 1 .+ rand(3, 4))
-        test_rrule(accumulate, ^, 1 .+ rand(2, 3); fkwargs=(; init=rand()))
-    end
-    @testset "accumulate(f, ::Tuple)" begin
-        # Simple
-        y1, b1 = rrule(CFG, accumulate, *, (1, 2, 3, 4); init=1)
-        @test y1 == (1, 2, 6, 24)
-        @test b1((1, 1, 1, 1)) == (NoTangent(), NoTangent(), Tangent{NTuple{4,Int}}(33, 16, 10, 6))
-
-        # Finite differencing
-        test_rrule(accumulate, *, Tuple(randn(5)); fkwargs=(; init=rand()))
-        test_rrule(accumulate, /, Tuple(1 .+ rand(5)); check_inferred=false)
+        # test_rrule(accumulate, *, randn(5); fkwargs=(; init=rand()))
+        test_rrule(_accumulate!, *, randn(5) ⊢ NoTangent(), randn(5), nothing, Some(rand()))
+        # test_rrule(accumulate, /, 1 .+ rand(3, 4))
+        test_rrule(_accumulate!, /, randn(4) ⊢ NoTangent(), 1 .+ rand(4), nothing, nothing)
+        # test_rrule(accumulate, ^, 1 .+ rand(2, 3); fkwargs=(; init=rand()))
+        test_rrule(_accumulate!, ^, randn(6) ⊢ NoTangent(), 1 .+ rand(6), nothing, Some(rand()))
     end
 end