From c6dc373143fae663f5dec7bf7d46a2b89d68fdbc Mon Sep 17 00:00:00 2001 From: Antonello Lobianco Date: Thu, 18 Jan 2024 16:41:53 +0100 Subject: [PATCH] Changes method of -zComp! in ConvLAyer and replaced CartesianIndex with eachindex in some Nn layers --- .../Image recognition/Image_recognition.jl | 24 +++++++-- src/Nn/default_layers/ConvLayer.jl | 50 ++++++++++++++++--- src/Nn/default_layers/PoolingLayer.jl | 40 ++++++--------- test/Nn_tests.jl | 3 +- 4 files changed, 79 insertions(+), 38 deletions(-) diff --git a/docs/src/tutorials/Image recognition/Image_recognition.jl b/docs/src/tutorials/Image recognition/Image_recognition.jl index 2d2d0305..d54d6d96 100644 --- a/docs/src/tutorials/Image recognition/Image_recognition.jl +++ b/docs/src/tutorials/Image recognition/Image_recognition.jl @@ -62,12 +62,26 @@ m = NeuralNetworkEstimator(layers=layers,loss=squared_cost,verbosity=HIGH,b # Training the whole MINST set takes approximatly 16 minutes on a mid-level laptop (on CPU), leading to a test accuracy of 0.969 (x_debug,x_other),(y_debug_oh,y_other_oh) = partition([x_train,y_train_oh],[0.01,0.99],rng=copy(TESTRNG)) -ŷ = fit!(m,x_debug,y_debug_oh) - -## ŷ = fit!(m,x_train,y_train_oh) +#preprocess!.(layers) +# 0.131836 seconds (477.02 k allocations: 53.470 MiB, 72.73% compilation time) +#@code_warntype preprocess!(l5) -##y_true = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh)) -# y_true = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh)) +ŷ = fit!(m,x_debug,y_debug_oh) +#@btime fit!(m,x_debug,y_debug_oh) +# 1%: 15.909 s (1940246 allocations: 1.39 GiB) +# 17.509 s (1039126 allocations: 1.37 GiB) +# 15.766 s (1039111 allocations: 1.37 GiB) +# 14.669 s (3129139 allocations: 1.64 GiB) (w threads) +# 18.119 s (1039121 allocations: 1.37 GiB) +# 14.966 s (1039123 allocations: 1.37 GiB) (whout threads) +# 19.357 s (1039123 allocations: 1.37 GiB) + +#println(now(), " ", "*** prefit..." ) #src +#ŷ = fit!(m,x_train,y_train_oh) +#println(now(), " ", "*** postfit..." ) #src + +#y_true = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh)) +y_true = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh)) ŷ_nonoh = inverse_predict(ohm,ŷ) accuracy(y_true,ŷ_nonoh) hcat(y_true,ŷ_nonoh) diff --git a/src/Nn/default_layers/ConvLayer.jl b/src/Nn/default_layers/ConvLayer.jl index 4ab00215..d066d26b 100644 --- a/src/Nn/default_layers/ConvLayer.jl +++ b/src/Nn/default_layers/ConvLayer.jl @@ -40,7 +40,10 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio y_ids::Vector{SVector{NDPLUS1,Int64}} "w ids of the convolution (computed in `preprocessing`` - itself at the beginning of `train`" w_ids::Vector{SVector{NDPLUS2,Int64}} - + "A y-dims array of vectors of ids of x(s) contributing to the giving y" + y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1} + "A y-dims array of vectors of corresponding w(s) contributing to the giving y" + y_to_w_ids::Array{Vector{NTuple{NDPLUS2,Int64}},NDPLUS1} @doc """ $(TYPEDSIGNATURES) @@ -59,7 +62,7 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio * `padding`: Integer or 2-elements tuple of tuples of the starting end ending padding across the various dimensions [def: `nothing`, i.e. set the padding required to keep the same dimensions in output (with stride==1)] * `f`: Activation function [def: `relu`] * `df`: Derivative of the activation function [default: try to match a known funcion, AD otherwise. Use `nothing` to force AD] - * `kernel_eltype`: Default kernel eltype [def Float64] + * `kernel_eltype`: Kernel eltype [def: `Float64`] * `kernel_init`: Initial weigths with respect to the input [default: Xavier initialisation]. If explicitly provided, it should be a multidimensional array of `kernel_size` augmented by `nchannels_in` and `nchannels_out` dimensions * `bias_init`: Initial weigths with respect to the bias [default: Xavier initialisation]. If given it should be a `nchannels_out` vector of scalars. * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] @@ -132,8 +135,11 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio out_size = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-size(kernel_init,d))/stride[d])) for d in 1:nD]...,nchannels_out) #println(size(layer.weight[1],2)) + y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(out_size)] + y_to_w_ids = [Vector{NTuple{nD+2,Int64}}() for i in CartesianIndices(out_size)] + #println(nchannels_out) - new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[]) + new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[],y_to_x_ids,y_to_w_ids) end end @@ -164,7 +170,7 @@ function ConvLayer(input_size_with_channel,kernel_size,nchannels_out; eltype=kernel_eltype) : zeros(kernel_eltype,nchannels_out), f = identity, df = match_known_derivatives(f)) - return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df) + return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding, kernel_eltype = kernel_eltype,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df) end function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDPLUS2} @@ -228,14 +234,20 @@ function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDP push!(layer.x_ids,((x_idx...,))) push!(layer.w_ids,w_idx) push!(layer.y_ids,y_idx) + #println(x_idx) + #println(typeof(x_idx)) + #println(y_idx) + #println(typeof(y_idx)) + push!(layer.y_to_x_ids[y_idx...],(x_idx...,)) + push!(layer.y_to_w_ids[y_idx...],(w_idx...,)) #de_dx_ch_in[idx_x_source...] += dϵ_dz_ch_out[dy_idx...] * w_ch_in_out[w_idx...] - end end end end end +#= function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2} if ndims(x) == 1 reshape(x,size(layer)[1]) @@ -252,6 +264,32 @@ function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDP end return nothing end +=# + +function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x) where {ND,NDPLUS1,NDPLUS2,TF,TDF,WET} + if ndims(x) == 1 + reshape(x,size(layer)[1]) + end + for y_idx in eachindex(y) + yi = zero(WET) + n = length(layer.y_to_x_ids[y_idx]) + @inbounds for idx in 1:n + yi += x[layer.y_to_x_ids[y_idx][idx]...] * layer.weight[layer.y_to_w_ids[y_idx][idx]...] + end + y[y_idx] = yi + end + + if(layer.usebias) + output_size = size(y) + for ch_out in 1:output_size[end] + y_ch_out = selectdim(y,NDPLUS1,ch_out) + y_ch_out .+= layer.bias[ch_out] + end + end + + return nothing +end + function _dedxComp!(de_dx,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},dϵ_dz) where {ND,NDPLUS1,NDPLUS2} for idx in 1:length(layer.y_ids) @@ -360,7 +398,7 @@ function get_gradient(layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x, next_gr dbias = zeros(WET,length(layer.bias)) for bias_idx in 1:length(layer.bias) nchannel_out = bias_idx - dϵ_dz_nchannelOut = selectdim(dϵ_dz,layer.ndims+1,nchannel_out) + dϵ_dz_nchannelOut = selectdim(dϵ_dz,NDPLUS1,nchannel_out) dbias[bias_idx] = sum(dϵ_dz_nchannelOut) end return Learnable((de_dw,dbias)) diff --git a/src/Nn/default_layers/PoolingLayer.jl b/src/Nn/default_layers/PoolingLayer.jl index c1bb0241..c19d3bc8 100644 --- a/src/Nn/default_layers/PoolingLayer.jl +++ b/src/Nn/default_layers/PoolingLayer.jl @@ -10,7 +10,7 @@ Representation of a pooling layer in the network # Fields: $(TYPEDFIELDS) """ -struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}} <: AbstractLayer +struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}, WET <: Number} <: AbstractLayer "Input size (including nchannel_in as last dimension)" input_size::SVector{NDPLUS1,Int64} "Output size (including nchannel_out as last dimension)" @@ -40,7 +40,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func "A x-dims array of vectors of ids of y reached by the given x" #x_to_y_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1} # not needed "A y-dims array of vectors of ids of x(s) contributing to the giving y" - y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1} + y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1} @@ -53,6 +53,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func # Positional arguments: * `input_size`: Shape of the input layer (integer for 1D convolution, tuple otherwise). Do not consider the channels number here. + * `kernel_eltype`: Kernel eltype [def: `Float64`] * `kernel_size`: Size of the kernel (aka filter) (integer for 1D or hypercube kernels or nD-sized tuple for assymmetric kernels). Do not consider the channels number here. * `nchannels_in`: Number of channels in input * `nchannels_out`: Number of channels in output @@ -70,6 +71,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func """ function PoolingLayer(input_size,kernel_size,nchannels_in; stride = kernel_size, + kernel_eltype = Float64, padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))), f = maximum, df = match_known_derivatives(f)) @@ -113,9 +115,9 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func output_size_with_nchout = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-kernel_size[d])/stride[d])) for d in 1:nD]...,nchannels_out) #x_to_y_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(input_size_with_nchin)] # not needed - y_to_x_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(output_size_with_nchout)] + y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(output_size_with_nchout)] - new{nD,nD+1,nD+2,typeof(f),typeof(df)}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids) + new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids) end end @@ -130,8 +132,9 @@ function PoolingLayer(input_size_with_channel,kernel_size; stride = kernel_size, padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))), f = maximum, + kernel_eltype = Float64, df = match_known_derivatives(f)) - return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; stride=stride,padding=padding,f=f,df=df) + return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; kernel_eltype = kernel_eltype,stride=stride,padding=padding,f=f,df=df) end @@ -211,36 +214,21 @@ $(TYPEDSIGNATURES) Compute forward pass of a ConvLayer """ -function forward(layer::PoolingLayer,x) +function forward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2,TF, TDF, WET},x) where {ND,NDPLUS1,NDPLUS2,TF, TDF, WET} _, output_size = size(layer) - y = zeros(output_size) - for y_idx in CartesianIndices(y) - y_idx = Tuple(y_idx) - x_ids = layer.y_to_x_ids[y_idx...] + y = zeros(WET,output_size) + for y_idx in eachindex(y) + x_ids = layer.y_to_x_ids[y_idx] x_vals = [x[idx...] for idx in x_ids] - #println(x_vals) - #println(layer.f(x_vals)) - y[y_idx...] = layer.f(x_vals) + y[y_idx] = layer.f(x_vals) end return y end -function _zComp!(z,layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2} - for y_idx in CartesianIndices(z) - y_idx = Tuple(y_idx) - x_ids = layer.y_to_x_ids[y_idx...] - x_vals = [x[idx...] for idx in x_ids] - #println(x_vals) - #println(layer.f(x_vals)) - y[y_idx...] = layer.f(x_vals) - end - -end - function backward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x, next_gradient) where {ND,NDPLUS1,NDPLUS2} de_dx = zeros(layer.input_size...) - for y_idx in CartesianIndices(next_gradient) + for y_idx in eachindex(next_gradient) #println("----") x_ids = layer.y_to_x_ids[y_idx] x_vals = [x[idx...] for idx in x_ids] diff --git a/test/Nn_tests.jl b/test/Nn_tests.jl index d67f483f..e628c2aa 100644 --- a/test/Nn_tests.jl +++ b/test/Nn_tests.jl @@ -509,7 +509,7 @@ rmeTrain = relative_mean_error(y,ŷ,normrec=false) #using BenchmarkTools #@btime train!($mynn,$x,$y,epochs=60,verbosity=NONE,rng=copy($TESTRNG)) #240.604 ms (1056544 allocations: 107.66 MiB) - +#314.504 ms (774762 allocations: 99.39 MiB) # ================================== @@ -573,6 +573,7 @@ ŷ = BetaML.predict(mynn,x) rmeTrain = relative_mean_error(y,ŷ,normrec=false) @test rmeTrain < 0.1 +a = 1 #l1 = ReshaperLayer((D,1),(6,6,2)) #l2 = ConvLayer((6,6),(2,2),2,4,rng=copy(TESTRNG)) #l3 = PoolingLayer((6,6,4),(2,2))