Skip to content

Commit

Permalink
Changes method of -zComp! in ConvLAyer and replaced CartesianIndex wi…
Browse files Browse the repository at this point in the history
…th eachindex in some Nn layers
  • Loading branch information
sylvaticus committed Jan 18, 2024
1 parent 98eb947 commit c6dc373
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 38 deletions.
24 changes: 19 additions & 5 deletions docs/src/tutorials/Image recognition/Image_recognition.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,26 @@ m = NeuralNetworkEstimator(layers=layers,loss=squared_cost,verbosity=HIGH,b
# Training the whole MINST set takes approximatly 16 minutes on a mid-level laptop (on CPU), leading to a test accuracy of 0.969
(x_debug,x_other),(y_debug_oh,y_other_oh) = partition([x_train,y_train_oh],[0.01,0.99],rng=copy(TESTRNG))

= fit!(m,x_debug,y_debug_oh)

## ŷ = fit!(m,x_train,y_train_oh)
#preprocess!.(layers)
# 0.131836 seconds (477.02 k allocations: 53.470 MiB, 72.73% compilation time)
#@code_warntype preprocess!(l5)

##y_true = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh))
# y_true = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh))
= fit!(m,x_debug,y_debug_oh)
#@btime fit!(m,x_debug,y_debug_oh)
# 1%: 15.909 s (1940246 allocations: 1.39 GiB)
# 17.509 s (1039126 allocations: 1.37 GiB)
# 15.766 s (1039111 allocations: 1.37 GiB)
# 14.669 s (3129139 allocations: 1.64 GiB) (w threads)
# 18.119 s (1039121 allocations: 1.37 GiB)
# 14.966 s (1039123 allocations: 1.37 GiB) (whout threads)
# 19.357 s (1039123 allocations: 1.37 GiB)

#println(now(), " ", "*** prefit..." ) #src
#ŷ = fit!(m,x_train,y_train_oh)
#println(now(), " ", "*** postfit..." ) #src

#y_true = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh))
y_true = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh))
ŷ_nonoh = inverse_predict(ohm,ŷ)
accuracy(y_true,ŷ_nonoh)
hcat(y_true,ŷ_nonoh)
Expand Down
50 changes: 44 additions & 6 deletions src/Nn/default_layers/ConvLayer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
y_ids::Vector{SVector{NDPLUS1,Int64}}
"w ids of the convolution (computed in `preprocessing`` - itself at the beginning of `train`"
w_ids::Vector{SVector{NDPLUS2,Int64}}

"A y-dims array of vectors of ids of x(s) contributing to the giving y"
y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1}
"A y-dims array of vectors of corresponding w(s) contributing to the giving y"
y_to_w_ids::Array{Vector{NTuple{NDPLUS2,Int64}},NDPLUS1}

@doc """
$(TYPEDSIGNATURES)
Expand All @@ -59,7 +62,7 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
* `padding`: Integer or 2-elements tuple of tuples of the starting end ending padding across the various dimensions [def: `nothing`, i.e. set the padding required to keep the same dimensions in output (with stride==1)]
* `f`: Activation function [def: `relu`]
* `df`: Derivative of the activation function [default: try to match a known funcion, AD otherwise. Use `nothing` to force AD]
* `kernel_eltype`: Default kernel eltype [def Float64]
* `kernel_eltype`: Kernel eltype [def: `Float64`]
* `kernel_init`: Initial weigths with respect to the input [default: Xavier initialisation]. If explicitly provided, it should be a multidimensional array of `kernel_size` augmented by `nchannels_in` and `nchannels_out` dimensions
* `bias_init`: Initial weigths with respect to the bias [default: Xavier initialisation]. If given it should be a `nchannels_out` vector of scalars.
* `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
Expand Down Expand Up @@ -132,8 +135,11 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
out_size = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-size(kernel_init,d))/stride[d])) for d in 1:nD]...,nchannels_out)
#println(size(layer.weight[1],2))

y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(out_size)]
y_to_w_ids = [Vector{NTuple{nD+2,Int64}}() for i in CartesianIndices(out_size)]

#println(nchannels_out)
new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[])
new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[],y_to_x_ids,y_to_w_ids)
end
end

Expand Down Expand Up @@ -164,7 +170,7 @@ function ConvLayer(input_size_with_channel,kernel_size,nchannels_out;
eltype=kernel_eltype) : zeros(kernel_eltype,nchannels_out),
f = identity,
df = match_known_derivatives(f))
return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df)
return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding, kernel_eltype = kernel_eltype,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df)
end

function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDPLUS2}
Expand Down Expand Up @@ -228,14 +234,20 @@ function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDP
push!(layer.x_ids,((x_idx...,)))
push!(layer.w_ids,w_idx)
push!(layer.y_ids,y_idx)
#println(x_idx)
#println(typeof(x_idx))
#println(y_idx)
#println(typeof(y_idx))
push!(layer.y_to_x_ids[y_idx...],(x_idx...,))
push!(layer.y_to_w_ids[y_idx...],(w_idx...,))
#de_dx_ch_in[idx_x_source...] += dϵ_dz_ch_out[dy_idx...] * w_ch_in_out[w_idx...]

end
end
end
end
end

#=
function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2}
if ndims(x) == 1
reshape(x,size(layer)[1])
Expand All @@ -252,6 +264,32 @@ function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDP
end
return nothing
end
=#

function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x) where {ND,NDPLUS1,NDPLUS2,TF,TDF,WET}
if ndims(x) == 1
reshape(x,size(layer)[1])
end
for y_idx in eachindex(y)
yi = zero(WET)
n = length(layer.y_to_x_ids[y_idx])
@inbounds for idx in 1:n
yi += x[layer.y_to_x_ids[y_idx][idx]...] * layer.weight[layer.y_to_w_ids[y_idx][idx]...]
end
y[y_idx] = yi
end

if(layer.usebias)
output_size = size(y)
for ch_out in 1:output_size[end]
y_ch_out = selectdim(y,NDPLUS1,ch_out)
y_ch_out .+= layer.bias[ch_out]
end
end

return nothing
end


function _dedxComp!(de_dx,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},dϵ_dz) where {ND,NDPLUS1,NDPLUS2}
for idx in 1:length(layer.y_ids)
Expand Down Expand Up @@ -360,7 +398,7 @@ function get_gradient(layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x, next_gr
dbias = zeros(WET,length(layer.bias))
for bias_idx in 1:length(layer.bias)
nchannel_out = bias_idx
dϵ_dz_nchannelOut = selectdim(dϵ_dz,layer.ndims+1,nchannel_out)
dϵ_dz_nchannelOut = selectdim(dϵ_dz,NDPLUS1,nchannel_out)
dbias[bias_idx] = sum(dϵ_dz_nchannelOut)
end
return Learnable((de_dw,dbias))
Expand Down
40 changes: 14 additions & 26 deletions src/Nn/default_layers/PoolingLayer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Representation of a pooling layer in the network
# Fields:
$(TYPEDFIELDS)
"""
struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}} <: AbstractLayer
struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}, WET <: Number} <: AbstractLayer
"Input size (including nchannel_in as last dimension)"
input_size::SVector{NDPLUS1,Int64}
"Output size (including nchannel_out as last dimension)"
Expand Down Expand Up @@ -40,7 +40,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
"A x-dims array of vectors of ids of y reached by the given x"
#x_to_y_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1} # not needed
"A y-dims array of vectors of ids of x(s) contributing to the giving y"
y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1}
y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1}



Expand All @@ -53,6 +53,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
# Positional arguments:
* `input_size`: Shape of the input layer (integer for 1D convolution, tuple otherwise). Do not consider the channels number here.
* `kernel_eltype`: Kernel eltype [def: `Float64`]
* `kernel_size`: Size of the kernel (aka filter) (integer for 1D or hypercube kernels or nD-sized tuple for assymmetric kernels). Do not consider the channels number here.
* `nchannels_in`: Number of channels in input
* `nchannels_out`: Number of channels in output
Expand All @@ -70,6 +71,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
"""
function PoolingLayer(input_size,kernel_size,nchannels_in;
stride = kernel_size,
kernel_eltype = Float64,
padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))),
f = maximum,
df = match_known_derivatives(f))
Expand Down Expand Up @@ -113,9 +115,9 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
output_size_with_nchout = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-kernel_size[d])/stride[d])) for d in 1:nD]...,nchannels_out)

#x_to_y_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(input_size_with_nchin)] # not needed
y_to_x_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(output_size_with_nchout)]
y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(output_size_with_nchout)]

new{nD,nD+1,nD+2,typeof(f),typeof(df)}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids)
new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids)
end
end

Expand All @@ -130,8 +132,9 @@ function PoolingLayer(input_size_with_channel,kernel_size;
stride = kernel_size,
padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))),
f = maximum,
kernel_eltype = Float64,
df = match_known_derivatives(f))
return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; stride=stride,padding=padding,f=f,df=df)
return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; kernel_eltype = kernel_eltype,stride=stride,padding=padding,f=f,df=df)

end

Expand Down Expand Up @@ -211,36 +214,21 @@ $(TYPEDSIGNATURES)
Compute forward pass of a ConvLayer
"""
function forward(layer::PoolingLayer,x)
function forward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2,TF, TDF, WET},x) where {ND,NDPLUS1,NDPLUS2,TF, TDF, WET}

_, output_size = size(layer)
y = zeros(output_size)
for y_idx in CartesianIndices(y)
y_idx = Tuple(y_idx)
x_ids = layer.y_to_x_ids[y_idx...]
y = zeros(WET,output_size)
for y_idx in eachindex(y)
x_ids = layer.y_to_x_ids[y_idx]
x_vals = [x[idx...] for idx in x_ids]
#println(x_vals)
#println(layer.f(x_vals))
y[y_idx...] = layer.f(x_vals)
y[y_idx] = layer.f(x_vals)
end
return y
end

function _zComp!(z,layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2}
for y_idx in CartesianIndices(z)
y_idx = Tuple(y_idx)
x_ids = layer.y_to_x_ids[y_idx...]
x_vals = [x[idx...] for idx in x_ids]
#println(x_vals)
#println(layer.f(x_vals))
y[y_idx...] = layer.f(x_vals)
end

end

function backward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x, next_gradient) where {ND,NDPLUS1,NDPLUS2}
de_dx = zeros(layer.input_size...)
for y_idx in CartesianIndices(next_gradient)
for y_idx in eachindex(next_gradient)
#println("----")
x_ids = layer.y_to_x_ids[y_idx]
x_vals = [x[idx...] for idx in x_ids]
Expand Down
3 changes: 2 additions & 1 deletion test/Nn_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ rmeTrain = relative_mean_error(y,ŷ,normrec=false)
#using BenchmarkTools
#@btime train!($mynn,$x,$y,epochs=60,verbosity=NONE,rng=copy($TESTRNG))
#240.604 ms (1056544 allocations: 107.66 MiB)

#314.504 ms (774762 allocations: 99.39 MiB)


# ==================================
Expand Down Expand Up @@ -573,6 +573,7 @@ ŷ = BetaML.predict(mynn,x)
rmeTrain = relative_mean_error(y,ŷ,normrec=false)
@test rmeTrain < 0.1

a = 1
#l1 = ReshaperLayer((D,1),(6,6,2))
#l2 = ConvLayer((6,6),(2,2),2,4,rng=copy(TESTRNG))
#l3 = PoolingLayer((6,6,4),(2,2))
Expand Down

0 comments on commit c6dc373

Please sign in to comment.