fix: handle function contexts differently from constant contexts (#660)

* fix: handle function contexts differently from constant contexts * Typos * Typo * Fix Enzyme translation * Typo * Forc eannot * Coverage * Pass mode object to translator in Enzyme * Typo * Cleaner error
JuliaDiff · Dec 6, 2024 · 0f3d2c1 · 0f3d2c1 · gdalle · Dec 6, 2024
1 parent 6806fef
commit 0f3d2c1
Show file tree

Hide file tree

Showing 17 changed files with 496 additions and 279 deletions.
diff --git a/DifferentiationInterface/Project.toml b/DifferentiationInterface/Project.toml
@@ -1,7 +1,7 @@
 name = "DifferentiationInterface"
 uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 authors = ["Guillaume Dalle", "Adrian Hill"]
-version = "0.6.26"
+version = "0.6.27"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"

diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceChainRulesCoreExt/reverse_onearg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceChainRulesCoreExt/reverse_onearg.jl
@@ -6,7 +6,11 @@ struct ChainRulesPullbackPrepSamePoint{Y,PB} <: DI.PullbackPrep
 end
 
 function DI.prepare_pullback(
-    f, ::AutoReverseChainRules, x, ty::NTuple, contexts::Vararg{DI.Constant,C}
+    f,
+    ::AutoReverseChainRules,
+    x,
+    ty::NTuple,
+    contexts::Vararg{DI.ConstantOrFunctionOrBackend,C},
 ) where {C}
     return DI.NoPullbackPrep()
 end
@@ -17,7 +21,7 @@ function DI.prepare_pullback_same_point(
     backend::AutoReverseChainRules,
     x,
     ty::NTuple,
-    contexts::Vararg{DI.Constant,C},
+    contexts::Vararg{DI.ConstantOrFunctionOrBackend,C},
 ) where {C}
     rc = ruleconfig(backend)
     y, pb = rrule_via_ad(rc, f, x, map(DI.unwrap, contexts)...)
@@ -30,7 +34,7 @@ function DI.value_and_pullback(
     backend::AutoReverseChainRules,
     x,
     ty::NTuple,
-    contexts::Vararg{DI.Constant,C},
+    contexts::Vararg{DI.ConstantOrFunctionOrBackend,C},
 ) where {C}
     rc = ruleconfig(backend)
     y, pb = rrule_via_ad(rc, f, x, map(DI.unwrap, contexts)...)
@@ -46,7 +50,7 @@ function DI.value_and_pullback(
     ::AutoReverseChainRules,
     x,
     ty::NTuple,
-    contexts::Vararg{DI.Constant,C},
+    contexts::Vararg{DI.ConstantOrFunctionOrBackend,C},
 ) where {C}
     (; y, pb) = prep
     tx = map(ty) do dy
@@ -61,7 +65,7 @@ function DI.pullback(
     ::AutoReverseChainRules,
     x,
     ty::NTuple,
-    contexts::Vararg{DI.Constant,C},
+    contexts::Vararg{DI.ConstantOrFunctionOrBackend,C},
 ) where {C}
     (; pb) = prep
     tx = map(ty) do dy

diff --git a/...ationInterface/ext/DifferentiationInterfaceEnzymeExt/DifferentiationInterfaceEnzymeExt.jl b/...ationInterface/ext/DifferentiationInterfaceEnzymeExt/DifferentiationInterfaceEnzymeExt.jl
@@ -7,6 +7,7 @@ using EnzymeCore:
     Active,
     Annotation,
     BatchDuplicated,
+    BatchDuplicatedNoNeed,
     BatchMixedDuplicated,
     Combined,
     Const,

diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/forward_onearg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/forward_onearg.jl
@@ -18,12 +18,12 @@ function DI.value_and_pushforward(
     tx::NTuple{1},
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
+    mode = forward_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     dx_sametype = convert(typeof(x), only(tx))
     x_and_dx = Duplicated(x, dx_sametype)
-    dy, y = autodiff(
-        forward_withprimal(backend), f_and_df, x_and_dx, map(translate, contexts)...
-    )
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
+    dy, y = autodiff(mode, f_and_df, x_and_dx, annotated_contexts...)
     return y, (dy,)
 end
 
@@ -35,12 +35,12 @@ function DI.value_and_pushforward(
     tx::NTuple{B},
     contexts::Vararg{DI.Context,C},
 ) where {F,B,C}
-    f_and_df = get_f_and_df(f, backend, Val(B))
+    mode = forward_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode, Val(B))
     tx_sametype = map(Fix1(convert, typeof(x)), tx)
     x_and_tx = BatchDuplicated(x, tx_sametype)
-    ty, y = autodiff(
-        forward_withprimal(backend), f_and_df, x_and_tx, map(translate, contexts)...
-    )
+    annotated_contexts = translate(backend, mode, Val(B), contexts...)
+    ty, y = autodiff(mode, f_and_df, x_and_tx, annotated_contexts...)
     return y, values(ty)
 end
 
@@ -52,12 +52,12 @@ function DI.pushforward(
     tx::NTuple{1},
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
+    mode = forward_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     dx_sametype = convert(typeof(x), only(tx))
     x_and_dx = Duplicated(x, dx_sametype)
-    dy = only(
-        autodiff(forward_noprimal(backend), f_and_df, x_and_dx, map(translate, contexts)...)
-    )
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
+    dy = only(autodiff(mode, f_and_df, x_and_dx, annotated_contexts...))
     return (dy,)
 end
 
@@ -69,12 +69,12 @@ function DI.pushforward(
     tx::NTuple{B},
     contexts::Vararg{DI.Context,C},
 ) where {F,B,C}
-    f_and_df = get_f_and_df(f, backend, Val(B))
+    mode = forward_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode, Val(B))
     tx_sametype = map(Fix1(convert, typeof(x)), tx)
     x_and_tx = BatchDuplicated(x, tx_sametype)
-    ty = only(
-        autodiff(forward_noprimal(backend), f_and_df, x_and_tx, map(translate, contexts)...)
-    )
+    annotated_contexts = translate(backend, mode, Val(B), contexts...)
+    ty = only(autodiff(mode, f_and_df, x_and_tx, annotated_contexts...))
     return values(ty)
 end
 
@@ -132,10 +132,9 @@ function DI.gradient(
     backend::AutoEnzyme{<:ForwardMode,<:Union{Nothing,Const}},
     x,
 ) where {F,B}
-    f_and_df = get_f_and_df(f, backend)
-    derivs = gradient(
-        forward_noprimal(backend), f_and_df, x; chunk=Val(B), shadows=prep.shadows
-    )
+    mode = forward_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
+    derivs = gradient(mode, f_and_df, x; chunk=Val(B), shadows=prep.shadows)
     return only(derivs)
 end
 
@@ -145,10 +144,9 @@ function DI.value_and_gradient(
     backend::AutoEnzyme{<:ForwardMode,<:Union{Nothing,Const}},
     x,
 ) where {F,B}
-    f_and_df = get_f_and_df(f, backend)
-    (; derivs, val) = gradient(
-        forward_withprimal(backend), f_and_df, x; chunk=Val(B), shadows=prep.shadows
-    )
+    mode = forward_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
+    (; derivs, val) = gradient(mode, f_and_df, x; chunk=Val(B), shadows=prep.shadows)
     return val, only(derivs)
 end
 
@@ -201,10 +199,9 @@ function DI.jacobian(
     backend::AutoEnzyme{<:Union{ForwardMode,Nothing},<:Union{Nothing,Const}},
     x,
 ) where {F,B}
-    f_and_df = get_f_and_df(f, backend)
-    derivs = jacobian(
-        forward_noprimal(backend), f_and_df, x; chunk=Val(B), shadows=prep.shadows
-    )
+    mode = forward_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
+    derivs = jacobian(mode, f_and_df, x; chunk=Val(B), shadows=prep.shadows)
     jac_tensor = only(derivs)
     return maybe_reshape(jac_tensor, prep.output_length, length(x))
 end
@@ -215,10 +212,9 @@ function DI.value_and_jacobian(
     backend::AutoEnzyme{<:Union{ForwardMode,Nothing},<:Union{Nothing,Const}},
     x,
 ) where {F,B}
-    f_and_df = get_f_and_df(f, backend)
-    (; derivs, val) = jacobian(
-        forward_withprimal(backend), f_and_df, x; chunk=Val(B), shadows=prep.shadows
-    )
+    mode = forward_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
+    (; derivs, val) = jacobian(mode, f_and_df, x; chunk=Val(B), shadows=prep.shadows)
     jac_tensor = only(derivs)
     return val, maybe_reshape(jac_tensor, prep.output_length, length(x))
 end

diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/forward_twoarg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/forward_twoarg.jl
@@ -20,19 +20,14 @@ function DI.value_and_pushforward(
     tx::NTuple{1},
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f!_and_df! = get_f_and_df(f!, backend)
+    mode = forward_noprimal(backend)
+    f!_and_df! = get_f_and_df(f!, backend, mode)
     dx_sametype = convert(typeof(x), only(tx))
     dy_sametype = make_zero(y)
     x_and_dx = Duplicated(x, dx_sametype)
     y_and_dy = Duplicated(y, dy_sametype)
-    autodiff(
-        forward_noprimal(backend),
-        f!_and_df!,
-        Const,
-        y_and_dy,
-        x_and_dx,
-        map(translate, contexts)...,
-    )
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
+    autodiff(mode, f!_and_df!, Const, y_and_dy, x_and_dx, annotated_contexts...)
     return y, (dy_sametype,)
 end
 
@@ -45,19 +40,14 @@ function DI.value_and_pushforward(
     tx::NTuple{B},
     contexts::Vararg{DI.Context,C},
 ) where {F,B,C}
-    f!_and_df! = get_f_and_df(f!, backend, Val(B))
+    mode = forward_noprimal(backend)
+    f!_and_df! = get_f_and_df(f!, backend, mode, Val(B))
     tx_sametype = map(Fix1(convert, typeof(x)), tx)
     ty_sametype = ntuple(_ -> make_zero(y), Val(B))
     x_and_tx = BatchDuplicated(x, tx_sametype)
     y_and_ty = BatchDuplicated(y, ty_sametype)
-    autodiff(
-        forward_noprimal(backend),
-        f!_and_df!,
-        Const,
-        y_and_ty,
-        x_and_tx,
-        map(translate, contexts)...,
-    )
+    annotated_contexts = translate(backend, mode, Val(B), contexts...)
+    autodiff(mode, f!_and_df!, Const, y_and_ty, x_and_tx, annotated_contexts...)
     return y, ty_sametype
 end
 

diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/reverse_onearg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceEnzymeExt/reverse_onearg.jl
@@ -69,13 +69,14 @@ function DI.value_and_pullback(
     ty::NTuple{1},
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = force_annotation(get_f_and_df(f, backend))
     mode = reverse_split_withprimal(backend)
+    f_and_df = force_annotation(get_f_and_df(f, backend, mode))
     IA = guess_activity(typeof(x), mode)
     RA = guess_activity(eltype(ty), mode)
     dx = make_zero(x)
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
     dinputs, result = seeded_autodiff_thunk(
-        mode, only(ty), f_and_df, RA, annotate(IA, x, dx), map(translate, contexts)...
+        mode, only(ty), f_and_df, RA, annotate(IA, x, dx), annotated_contexts...
     )
     new_dx = first(dinputs)
     if isnothing(new_dx)
@@ -93,13 +94,14 @@ function DI.value_and_pullback(
     ty::NTuple{B},
     contexts::Vararg{DI.Context,C},
 ) where {F,B,C}
-    f_and_df = force_annotation(get_f_and_df(f, backend, Val(B)))
     mode = reverse_split_withprimal(backend)
+    f_and_df = force_annotation(get_f_and_df(f, backend, mode, Val(B)))
     IA = batchify_activity(guess_activity(typeof(x), mode), Val(B))
     RA = batchify_activity(guess_activity(eltype(ty), mode), Val(B))
     tx = ntuple(_ -> make_zero(x), Val(B))
+    annotated_contexts = translate(backend, mode, Val(B), contexts...)
     dinputs, result = batch_seeded_autodiff_thunk(
-        mode, ty, f_and_df, RA, annotate(IA, x, tx), map(translate, contexts)...
+        mode, ty, f_and_df, RA, annotate(IA, x, tx), annotated_contexts...
     )
     new_tx = values(first(dinputs))
     if isnothing(new_tx)
@@ -131,18 +133,14 @@ function DI.value_and_pullback!(
     ty::NTuple{1},
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = force_annotation(get_f_and_df(f, backend))
     mode = reverse_split_withprimal(backend)
+    f_and_df = force_annotation(get_f_and_df(f, backend, mode))
     RA = guess_activity(eltype(ty), mode)
     dx_righttype = convert(typeof(x), only(tx))
     make_zero!(dx_righttype)
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
     _, result = seeded_autodiff_thunk(
-        mode,
-        only(ty),
-        f_and_df,
-        RA,
-        Duplicated(x, dx_righttype),
-        map(translate, contexts)...,
+        mode, only(ty), f_and_df, RA, Duplicated(x, dx_righttype), annotated_contexts...
     )
     only(tx) === dx_righttype || copyto!(only(tx), dx_righttype)
     return result, tx
@@ -157,18 +155,14 @@ function DI.value_and_pullback!(
     ty::NTuple{B},
     contexts::Vararg{DI.Context,C},
 ) where {F,B,C}
-    f_and_df = force_annotation(get_f_and_df(f, backend, Val(B)))
     mode = reverse_split_withprimal(backend)
+    f_and_df = force_annotation(get_f_and_df(f, backend, mode, Val(B)))
     RA = batchify_activity(guess_activity(eltype(ty), mode), Val(B))
     tx_righttype = map(Fix1(convert, typeof(x)), tx)
     make_zero!(tx_righttype)
+    annotated_contexts = translate(backend, mode, Val(B), contexts...)
     _, result = batch_seeded_autodiff_thunk(
-        mode,
-        ty,
-        f_and_df,
-        RA,
-        BatchDuplicated(x, tx_righttype),
-        map(translate, contexts)...,
+        mode, ty, f_and_df, RA, BatchDuplicated(x, tx_righttype), annotated_contexts...
     )
     foreach(copyto!, tx, tx_righttype)
     return result, tx
@@ -196,12 +190,13 @@ function DI.gradient(
     x,
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
     mode = reverse_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     IA = guess_activity(typeof(x), mode)
     grad = make_zero(x)
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
     dinputs = only(
-        autodiff(mode, f_and_df, Active, annotate(IA, x, grad), map(translate, contexts)...)
+        autodiff(mode, f_and_df, Active, annotate(IA, x, grad), annotated_contexts...)
     )
     new_grad = first(dinputs)
     if isnothing(new_grad)
@@ -217,12 +212,13 @@ function DI.value_and_gradient(
     x,
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
     mode = reverse_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     IA = guess_activity(typeof(x), mode)
     grad = make_zero(x)
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
     dinputs, result = autodiff(
-        mode, f_and_df, Active, annotate(IA, x, grad), map(translate, contexts)...
+        mode, f_and_df, Active, annotate(IA, x, grad), annotated_contexts...
     )
     new_grad = first(dinputs)
     if isnothing(new_grad)
@@ -263,16 +259,12 @@ function DI.gradient!(
     x,
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
+    mode = reverse_noprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     grad_righttype = grad isa typeof(x) ? grad : prep.grad_righttype
     make_zero!(grad_righttype)
-    autodiff(
-        reverse_noprimal(backend),
-        f_and_df,
-        Active,
-        Duplicated(x, grad_righttype),
-        map(translate, contexts)...,
-    )
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
+    autodiff(mode, f_and_df, Active, Duplicated(x, grad_righttype), annotated_contexts...)
     grad === grad_righttype || copyto!(grad, grad_righttype)
     return grad
 end
@@ -295,15 +287,13 @@ function DI.value_and_gradient!(
     x,
     contexts::Vararg{DI.Context,C},
 ) where {F,C}
-    f_and_df = get_f_and_df(f, backend)
+    mode = reverse_withprimal(backend)
+    f_and_df = get_f_and_df(f, backend, mode)
     grad_righttype = grad isa typeof(x) ? grad : prep.grad_righttype
     make_zero!(grad_righttype)
+    annotated_contexts = translate(backend, mode, Val(1), contexts...)
     _, y = autodiff(
-        reverse_withprimal(backend),
-        f_and_df,
-        Active,
-        Duplicated(x, grad_righttype),
-        map(translate, contexts)...,
+        mode, f_and_df, Active, Duplicated(x, grad_righttype), annotated_contexts...
     )
     grad === grad_righttype || copyto!(grad, grad_righttype)
     return y, grad