error when differentiating `reshape` #2214

simeonschaub · 2024-12-22T12:17:00Z

I get the following error when trying to differentiate a Lux model through Enzyme:

LoadError: Constant memory is stored (or returned) to a differentiable variable.
As a result, Enzyme cannot provably ensure correctness and throws this error.
This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity).
If Enzyme should be able to prove this use non-differentable, open an issue!
To work around this issue, either:
 a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or
 b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.
Mismatched activity for:   store i8* %9, i8* addrspace(11)* %.repack, align 8, !dbg !551, !tbaa !498, !alias.scope !501, !noalias !552 const val:   %9 = load i8*, i8* addrspace(11)* %8, align 8, !dbg !478, !tbaa !498, !alias.scope !501, !noalias !504, !enzyme_type !509, !enzymejl_byref_BITS_VALUE !0, !enzymejl_source_type_Ptr\7BFloat64\7D !0
 value=Unknown object of type Ptr{Float64}
 llvalue=  %9 = load i8*, i8* addrspace(11)* %8, align 8, !dbg !478, !tbaa !498, !alias.scope !501, !noalias !504, !enzyme_type !509, !enzymejl_byref_BITS_VALUE !0, !enzymejl_source_type_Ptr\7BFloat64\7D !0

Stacktrace:
 [1] reshape
   @ ./reshapedarray.jl:60
 [2] reshape
   @ ./reshapedarray.jl:129
 [3] reshape
   @ ./reshapedarray.jl:128
 [4] #14
   @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:8
 [5] WrappedFunction
   @ ~/.julia/packages/Lux/9hFIj/src/layers/basic.jl:255
 [6] apply
   @ ~/.julia/packages/LuxCore/GlbG3/src/LuxCore.jl:155
 [7] macro expansion
   @ ~/.julia/packages/Lux/9hFIj/src/layers/containers.jl:0
 [8] applychain
   @ ~/.julia/packages/Lux/9hFIj/src/layers/containers.jl:482

Stacktrace:
  [1] reshape
    @ ./reshapedarray.jl:60 [inlined]
  [2] reshape
    @ ./reshapedarray.jl:129 [inlined]
  [3] reshape
    @ ./reshapedarray.jl:128 [inlined]
  [4] #14
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:8 [inlined]
  [5] WrappedFunction
    @ ~/.julia/packages/Lux/9hFIj/src/layers/basic.jl:255 [inlined]
  [6] apply
    @ ~/.julia/packages/LuxCore/GlbG3/src/LuxCore.jl:155 [inlined]
  [7] macro expansion
    @ ~/.julia/packages/Lux/9hFIj/src/layers/containers.jl:0 [inlined]
  [8] applychain
    @ ~/.julia/packages/Lux/9hFIj/src/layers/containers.jl:482
  [9] Chain
    @ ~/.julia/packages/Lux/9hFIj/src/layers/containers.jl:480 [inlined]
 [10] AutoEncoder
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:20 [inlined]
 [11] loss_function
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:26 [inlined]
 [12] #1
    @ ~/.julia/packages/Lux/9hFIj/src/helpers/training.jl:251 [inlined]
 [13] diffejulia__1_20688_inner_146wrap
    @ ~/.julia/packages/Lux/9hFIj/src/helpers/training.jl:0
 [14] macro expansion
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:5218 [inlined]
 [15] enzyme_call
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:4764 [inlined]
 [16] CombinedAdjointThunk
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:4636 [inlined]
 [17] autodiff
    @ ~/.julia/packages/Enzyme/ydGh2/src/Enzyme.jl:503 [inlined]
 [18] compute_gradients_impl
    @ ~/.julia/packages/Lux/9hFIj/ext/LuxEnzymeExt/training.jl:8 [inlined]
 [19] compute_gradients(ad::AutoEnzyme{Nothing, Nothing}, obj_fn::typeof(loss_function), data::Array{Float64, 3}, ts::Lux.Training.TrainState{Nothing, Nothing, AutoEncoder{Chain{@NamedTuple{layer_1::WrappedFunction{var"#14#23"}, layer_2::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}}, Nothing}, Chain{@NamedTuple{layer_1::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_2::Dense{typeof(σ), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::WrappedFunction{var"#15#24"}}, Nothing}}, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}}, decoder::@NamedTuple{layer_1::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{}}}, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}, decoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}}, Adam, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_3::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}}, decoder::@NamedTuple{layer_1::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_2::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_3::@NamedTuple{}}}})
    @ Lux.Training ~/.julia/packages/Lux/9hFIj/src/helpers/training.jl:198
 [20] single_train_step_impl!
    @ ~/.julia/packages/Lux/9hFIj/src/helpers/training.jl:301 [inlined]
 [21] single_train_step!(backend::AutoEnzyme{Nothing, Nothing}, obj_fn::typeof(loss_function), data::Array{Float64, 3}, ts::Lux.Training.TrainState{Nothing, Nothing, AutoEncoder{Chain{@NamedTuple{layer_1::WrappedFunction{var"#14#23"}, layer_2::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}}, Nothing}, Chain{@NamedTuple{layer_1::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_2::Dense{typeof(σ), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::WrappedFunction{var"#15#24"}}, Nothing}}, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}}, decoder::@NamedTuple{layer_1::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{}}}, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}, decoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}}, Adam, @NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_3::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}}, decoder::@NamedTuple{layer_1::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_2::@NamedTuple{weight::Optimisers.Leaf{Adam, Tuple{Matrix{Float32}, Matrix{Float32}, Tuple{Float32, Float32}}}, bias::Optimisers.Leaf{Adam, Tuple{Vector{Float32}, Vector{Float32}, Tuple{Float32, Float32}}}}, layer_3::@NamedTuple{}}}})
    @ Lux.Training ~/.julia/packages/Lux/9hFIj/src/helpers/training.jl:276
 [22] train_model!(model::AutoEncoder{Chain{@NamedTuple{layer_1::WrappedFunction{var"#14#23"}, layer_2::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}}, Nothing}, Chain{@NamedTuple{layer_1::Dense{typeof(relu), Int64, Int64, Nothing, Nothing, Static.True}, layer_2::Dense{typeof(σ), Int64, Int64, Nothing, Nothing, Static.True}, layer_3::WrappedFunction{var"#15#24"}}, Nothing}}, ps::@NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}}, decoder::@NamedTuple{layer_1::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_2::@NamedTuple{weight::Matrix{Float32}, bias::Vector{Float32}}, layer_3::@NamedTuple{}}}, st::@NamedTuple{encoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}, decoder::@NamedTuple{layer_1::@NamedTuple{}, layer_2::@NamedTuple{}, layer_3::@NamedTuple{}}}, x_data::Vector{Array{Float64, 3}})
    @ Main ~/Nextcloud/Documents/Research/VAE/reproducer.jl:34
 [23] top-level scope
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:48
in expression starting at /home/simeon/Nextcloud/Documents/Research/VAE/reproducer.jl:48

Unfortunately, I wasn't able to narrow this down to a more minimal example, so here is the code to reproduce:

using Lux, Random, Optimisers, Enzyme

training_data = [rand(28, 28, 32) for _ in 1:10]

@kwdef struct AutoEncoder{E, D} <: LuxCore.AbstractLuxContainerLayer{(:encoder, :decoder)}
    num_hidden::Int = 8
    encoder::E = Chain(
        x -> reshape(x, 784, :),
        Dense(784 => 256, relu),
        Dense(256 => num_hidden, relu),
    )
    decoder::D = Chain(
        Dense(num_hidden => 256, relu),
        Dense(256 => 784, sigmoid),
        x -> reshape(x, 28, 28, :),
    )
end

function ((; encoder, decoder)::AutoEncoder)(x, ps, st)
    encoded, st_encoder = encoder(x, ps.encoder, st.encoder)
    decoded, st_decoder = decoder(encoded, ps.decoder, st.decoder)
    return (; encoded, decoded), (; encoder = st_encoder, decoder = st_decoder)
end

function loss_function(model, ps, st, x)
    (; decoded), st = model(x, ps, st)
    return MSELoss()(decoded, x), st, (;)
end

function train_model!(model, ps, st, x_data)
    train_state = Lux.Training.TrainState(model, ps, st, Adam(0.001f0))

    for x in x_data
        _, loss, _, train_state = Lux.Training.single_train_step!(AutoEnzyme(), loss_function, x, train_state)
        total_loss += loss
    end

    return model, ps, st
end

model = AutoEncoder()

rng = Random.default_rng()
Random.seed!(rng, 0)

ps, st = Lux.setup(rng, model)

train_model!(model, ps, st, training_data)

The text was updated successfully, but these errors were encountered:

wsmoses · 2024-12-22T20:01:16Z

Per the error message, did setting runtime activity resolve it for you?

simeonschaub · 2024-12-23T09:33:25Z

Ok, much simpler reproducer:

autodiff(Reverse, x -> sum(reshape(reshape(x, 4), 2, 2) .- x), Active, Const(rand(2, 2))) # errors
autodiff(set_runtime_activity(Reverse), x -> sum(reshape(reshape(x, 4), 2, 2) .- x), Active, Const(rand(2, 2))) # works

The first one still throws the following error:

LoadError: Constant memory is stored (or returned) to a differentiable variable.
As a result, Enzyme cannot provably ensure correctness and throws this error.
This might be due to the use of a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Runtime-Activity).
If Enzyme should be able to prove this use non-differentable, open an issue!
To work around this issue, either:
 a) rewrite this variable to not be conditionally active (fastest, but requires a code change), or
 b) set the Enzyme mode to turn on runtime activity (e.g. autodiff(set_runtime_activity(Reverse), ...) ). This will maintain correctness, but may slightly reduce performance.
Mismatched activity for:   %30 = phi {} addrspace(10)* [ %23, %L35 ], [ %11, %L24 ] const val:   %11 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !23, !tbaa !30, !alias.scope !33, !noalias !36, !dereferenceable_or_null !41, !align !42, !enzyme_type !43, !enzymejl_source_type_Memory\7BFloat64\7D !0, !enzymejl_byref_MUT_REF !0
 value=Unknown object of type Memory{Float64}
 llvalue=  %11 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !23, !tbaa !30, !alias.scope !33, !noalias !36, !dereferenceable_or_null !41, !align !42, !enzyme_type !43, !enzymejl_source_type_Memory\7BFloat64\7D !0, !enzymejl_byref_MUT_REF !0

Stacktrace:
 [1] reshape
   @ ./reshapedarray.jl:60
 [2] reshape
   @ ./reshapedarray.jl:127
 [3] #83
   @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:38

Stacktrace:
  [1] reshape
    @ ./reshapedarray.jl:54 [inlined]
  [2] reshape
    @ ./reshapedarray.jl:127 [inlined]
  [3] #83
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:38 [inlined]
  [4] diffejulia__83_93822wrap
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:0
  [5] macro expansion
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:5218 [inlined]
  [6] enzyme_call
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:4764 [inlined]
  [7] CombinedAdjointThunk
    @ ~/.julia/packages/Enzyme/ydGh2/src/compiler.jl:4636 [inlined]
  [8] autodiff
    @ ~/.julia/packages/Enzyme/ydGh2/src/Enzyme.jl:503 [inlined]
  [9] autodiff(mode::ReverseMode{false, false, FFIABI, false, false}, f::var"#83#84", ::Type{Active}, args::Const{Matrix{Float64}})
    @ Enzyme ~/.julia/packages/Enzyme/ydGh2/src/Enzyme.jl:524
 [10] top-level scope
    @ ~/Nextcloud/Documents/Research/VAE/reproducer.jl:38

while the second one with runtime activity does indeed work.

simeonschaub · 2024-12-23T09:39:41Z

Both work fine in Julia 1.10, so I presume this is due to Memory related changes?

wsmoses · 2024-12-23T19:31:52Z

yeah almost certainly (especially since the error message says the type of relevance is Memory{Float64})

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

error when differentiating `reshape` #2214

error when differentiating `reshape` #2214

simeonschaub commented Dec 22, 2024

wsmoses commented Dec 22, 2024

simeonschaub commented Dec 23, 2024 •

edited

Loading

simeonschaub commented Dec 23, 2024

wsmoses commented Dec 23, 2024

error when differentiating reshape #2214

error when differentiating reshape #2214

Comments

simeonschaub commented Dec 22, 2024

wsmoses commented Dec 22, 2024

simeonschaub commented Dec 23, 2024 • edited Loading

simeonschaub commented Dec 23, 2024

wsmoses commented Dec 23, 2024

error when differentiating `reshape` #2214

error when differentiating `reshape` #2214

simeonschaub commented Dec 23, 2024 •

edited

Loading