tidy up

JuliaDiff · Jul 19, 2022 · 413b017 · 413b017
1 parent 9af7a64
commit 413b017
Showing 1 changed file with 31 additions and 65 deletions.
diff --git a/src/rulesets/Base/mapreduce.jl b/src/rulesets/Base/mapreduce.jl
@@ -465,7 +465,7 @@ function rrule(
     y = first(last(hobbits))
     project = ProjectTo(x)
     function foldl_pullback_tuple(dy)
-        trio = accumulate(_reverse1(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
+        trio = accumulate(reverse(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
             ds, da, db = back(dc)
             # Don't need to store every `da`, need one for the next iteration + the last.
         end
@@ -501,78 +501,43 @@ end
 
 # The implementation was originally for both tuples and arrays, although using accumulate
 # to carry intermediate results along creates arrays of tuples which could be avoided.
-# Using a loop can be a few times faster, this should be replaced.
-# Note also that it does not return a gradient for `init`.
+# Using a loop can be a few times faster, this should be replaced:
+# https://github.com/FluxML/Zygote.jl/issues/644#issuecomment-628762305
+
+# Note also that it does not return a gradient for `init`, now marked `@not_implemented`.
 
 function rrule(
-        config::RuleConfig{>:HasReverseMode}, ::typeof(Base.mapfoldl_impl), ::typeof(identity), op::G, init, x::Union{AbstractArray, Tuple};        
+        config::RuleConfig{>:HasReverseMode}, ::typeof(Base.mapfoldl_impl), ::typeof(identity), op::G, init, x::Union{AbstractArray, Tuple};
     ) where {G}
-    list, start = if init === _INIT
-        _drop1(x), first(x)
+    start, list = if init === Base._InitialValue()
+        Iterators.peel(x)
     else
         # Case with init keyword is simpler to understand first!
-        _reshape1(x, :), init  # (vec is for Julia 1.0, accumulate is fussy)
+        init, x
     end
-    hobbits = accumulate(list; init=(start, nothing)) do (a,_), b
-        # Here `a` is what we would normally cary forward, and `_` ignores
-        # the previous iteration's pullback function (needed later),
-        # while `b` is the fresh input from `list` as usual.
-        c, back = rrule_via_ad(config, op, a, b)  # LHS is just documentation here!
-        # We don't really need to store every `c`, last one is `foldl` output.
-        # (The name, BTW, is because "there and back again" is the subtitle of Tolkien's book.)
+    hobbits = accumulate(list; init=(start, nothing)) do (a, _), b
+        c, back = rrule_via_ad(config, op, a, b)
     end
     y = first(last(hobbits))
     axe = axes(x)
     project = ProjectTo(x)
     function unfoldl(dy)
-        trio = accumulate(_reverse1(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
+        trio = accumulate(Iterators.reverse(hobbits); init=(0, dy, 0)) do (_, dc, _), (_, back)
             ds, da, db = back(dc)
-            # Don't need to store every `da`, need one for the next iteration + maybe last
         end
         dop = sum(first, trio)
-        dx = map(last, _reverse1(trio))
-        if init === _INIT
-            # `hobbits` is one short
+        dx = map(last, Iterators.reverse(trio))
+        if init === Base._InitialValue()  # `hobbits` is one short
             dx = _vcat1(trio[end][2], dx)
         end
         d_init = @not_implemented "gradient for foldl does not at present include init, sorry"
-        return (NoTangent(), NoTangent(), dop, d_init, project(_reshape1(dx, axe)))
+        return (NoTangent(), NoTangent(), dop, d_init, project(reshape(dx, axe)))
     end
     return y, unfoldl
 end
 
-
-#####
-##### Iterator-or-Tuple functions
-#####
-
-# This zoo of underscore functions helps `foldl` & `accumulate` handle both tuples and arrays,
-# and also provides some alternatives for versions of Julia where iterators weren't supported.
-# Inspired by `Base._reverse`, used in defn of `foldr`.
-
-# To support 2nd derivatives, some may need their own gradient rules. And _drop1 should perhaps
-# be replaced by _peel1 like Iterators.peel
-
-_reverse1(x) = Iterators.reverse(x)
-_drop1(x) = Iterators.drop(x, 1)
-_zip2(x, y) = zip(x, y)  # for `accumulate`, below
-
-_reverse1(x::Tuple) = reverse(x)
-_drop1(x::Tuple) = Base.tail(x)
-_zip2(x::Tuple{Vararg{Any,N}}, y::Tuple{Vararg{Any,N}}) where N = ntuple(i -> (x[i],y[i]), N)
-
-const _INIT = Base._InitialValue()
-
 _vcat1(x, ys::AbstractVector) = vcat(x, ys)
 _vcat1(x::AbstractArray, ys::AbstractVector) = vcat([x], ys)
-_vcat1(x, ys::Tuple) = (x, ys...)
-
-_reshape1(x::AbstractArray, axe) = reshape(x, axe)
-_reshape1(x::Tuple, axe) = x
-
-_no_tuple_tangent(dx::Tangent) = ChainRulesCore.backing(dx)
-_no_tuple_tangent(dx) = dx
-
 
 #####
 ##### `accumulate`
@@ -584,13 +549,18 @@ _no_tuple_tangent(dx) = dx
 # Move it down to: `_accumulate!(op, B, A::AbstractVector, dims::Nothing, init::Nothing)`
 
 function rrule(
-        config::RuleConfig{>:HasReverseMode}, ::typeof(Base._accumulate!), op::G, y, x::AbstractVector, dims::Nothing, init,
+        config::RuleConfig{>:HasReverseMode},
+        ::typeof(Base._accumulate!), 
+        op::G, y::AbstractVector, 
+        x::AbstractVector, 
+        dims::Nothing, 
+        init,
     ) where {G}
 
-    list, start = if init === nothing
-        _drop1(x), first(x)
+    start, list = if init === nothing
+        Iterators.peel(x)
     else
-        x, something(init)
+        something(init), x
     end
     hobbits = accumulate(list; init = (start, nothing)) do (a, _), b
         c, back = rrule_via_ad(config, op, a, b)
@@ -607,28 +577,24 @@ function rrule(
     axe = axes(x)
     project = ProjectTo(x)
     function decumulate(dy)
-        dy_plain = _no_tuple_tangent(unthunk(dy))
-        rev_list = if init === nothing
-            # Here we rely on `zip` to stop early. Begin explicit with _reverse1(_drop1(...))
-            # gets "no method matching iterate(::Base.Iterators.Reverse{Base.Iterators.Drop{Array{"
-            _zip2(_reverse1(hobbits), _reverse1(dy_plain))
-        else
-            _zip2(_reverse1(hobbits), _reverse1(dy_plain))
-        end
+        dy_plain = unthunk(dy)
+        rev_list = zip(Iterators.reverse(hobbits), Iterators.reverse(dy_plain))
+        # Here we rely on `zip` to stop early when init === nothing. Begin explicit with Iterators.reverse(Iterators.drop(..., 1))
+        # gets "no method matching iterate(::Base.Iterators.Reverse{Base.Iterators.Drop{Array{"
         trio = accumulate(rev_list; init=(0, ZeroTangent(), 0)) do (_, dc, _), ((_, back), dz)
             ds, da, db = back(dc + dz)
             # Don't need to store every 'da', but need for next iteration, and the last one.
         end
         dop = sum(first, trio)
-        dx = map(last, _reverse1(trio))
+        dx = map(last, Iterators.reverse(trio))
         if init == nothing
             # `hobbits` is one short, and the first one is weird
             dx = _vcat1(trio[end][2] + dy_plain[1], dx)
         end
         dy = @not_implemented "no gradient for `B` in `accumulate!(f, B, A)`, the rule intends to support `accumulate` only"
         d_init_not = @not_implemented "gradient for accumulate does not at present include init, sorry"
         d_init = init === nothing ? NoTangent() : Tangent{typeof(init)}(; value = d_init_not)
-        return (NoTangent(), dop, dy, project(_reshape1(dx, axe)), NoTangent(), d_init)
+        return (NoTangent(), dop, dy, project(reshape(dx, axe)), NoTangent(), d_init)
     end
-    return _reshape1(y, axe), decumulate
+    return reshape(y, axe), decumulate
 end