Changes method of -zComp! in ConvLAyer and replaced CartesianIndex wi…

…th eachindex in some Nn layers
sylvaticus · Jan 18, 2024 · c6dc373 · c6dc373
1 parent 98eb947
commit c6dc373
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 38 deletions.
diff --git a/docs/src/tutorials/Image recognition/Image_recognition.jl b/docs/src/tutorials/Image recognition/Image_recognition.jl
@@ -62,12 +62,26 @@ m      = NeuralNetworkEstimator(layers=layers,loss=squared_cost,verbosity=HIGH,b
 # Training the whole MINST set takes approximatly 16 minutes on a mid-level laptop (on CPU), leading to a test accuracy of 0.969
 (x_debug,x_other),(y_debug_oh,y_other_oh)  = partition([x_train,y_train_oh],[0.01,0.99],rng=copy(TESTRNG))
 
-ŷ = fit!(m,x_debug,y_debug_oh)
-
-## ŷ = fit!(m,x_train,y_train_oh)
+#preprocess!.(layers)
+# 0.131836 seconds (477.02 k allocations: 53.470 MiB, 72.73% compilation time)
+#@code_warntype preprocess!(l5)
 
-##y_true  = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh))
-# y_true  = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh))
+ŷ = fit!(m,x_debug,y_debug_oh)
+#@btime fit!(m,x_debug,y_debug_oh)
+# 1%: 15.909 s (1940246 allocations: 1.39 GiB)
+#     17.509 s (1039126 allocations: 1.37 GiB)
+#     15.766 s (1039111 allocations: 1.37 GiB)
+#     14.669 s (3129139 allocations: 1.64 GiB) (w threads)
+#     18.119 s (1039121 allocations: 1.37 GiB)
+#     14.966 s (1039123 allocations: 1.37 GiB) (whout threads)
+#      19.357 s (1039123 allocations: 1.37 GiB)
+
+#println(now(), " ", "*** prefit..." )  #src
+#ŷ = fit!(m,x_train,y_train_oh)
+#println(now(), " ", "*** postfit..." )  #src
+
+#y_true  = inverse_predict(ohm,convert(Matrix{Bool},y_train_oh))
+y_true  = inverse_predict(ohm,convert(Matrix{Bool},y_debug_oh))
 ŷ_nonoh = inverse_predict(ohm,ŷ)
 accuracy(y_true,ŷ_nonoh)
 hcat(y_true,ŷ_nonoh)

diff --git a/src/Nn/default_layers/ConvLayer.jl b/src/Nn/default_layers/ConvLayer.jl
@@ -40,7 +40,10 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
    y_ids::Vector{SVector{NDPLUS1,Int64}}
    "w ids of the convolution (computed in `preprocessing`` - itself at the beginning of `train`"
    w_ids::Vector{SVector{NDPLUS2,Int64}}
-
+   "A y-dims array of vectors of ids of x(s) contributing to the giving y"
+   y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1}
+   "A y-dims array of vectors of corresponding w(s) contributing to the giving y"
+   y_to_w_ids::Array{Vector{NTuple{NDPLUS2,Int64}},NDPLUS1}  
 
    @doc """
    $(TYPEDSIGNATURES)
@@ -59,7 +62,7 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
    * `padding`: Integer or 2-elements tuple of tuples of the starting end ending padding across the various dimensions [def: `nothing`, i.e. set the padding required to keep the same dimensions in output (with stride==1)]
    * `f`:   Activation function [def: `relu`]
    * `df`:  Derivative of the activation function [default: try to match a known funcion, AD otherwise. Use `nothing` to force AD]
-   * `kernel_eltype`: Default kernel eltype [def Float64]
+   * `kernel_eltype`: Kernel eltype [def: `Float64`]
    * `kernel_init`:   Initial weigths with respect to the input [default: Xavier initialisation]. If explicitly provided, it should be a multidimensional array of `kernel_size` augmented by `nchannels_in` and `nchannels_out` dimensions
    * `bias_init`:     Initial weigths with respect to the bias [default: Xavier initialisation]. If given it should be a `nchannels_out` vector of scalars.
    * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
@@ -132,8 +135,11 @@ struct ConvLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Functio
       out_size = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-size(kernel_init,d))/stride[d])) for d in 1:nD]...,nchannels_out)
       #println(size(layer.weight[1],2))
 
+      y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(out_size)]
+      y_to_w_ids = [Vector{NTuple{nD+2,Int64}}() for i in CartesianIndices(out_size)]
+
       #println(nchannels_out)
-      new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[])
+      new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}((input_size...,nchannels_in),out_size,kernel_init,usebias,bias_init,padding_start,padding_end,stride,nD,f,df,[],[],[],y_to_x_ids,y_to_w_ids)
    end
 end
 
@@ -164,7 +170,7 @@ function ConvLayer(input_size_with_channel,kernel_size,nchannels_out;
       eltype=kernel_eltype) : zeros(kernel_eltype,nchannels_out),
      f       = identity,
      df      = match_known_derivatives(f))
-     return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df)
+     return ConvLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end],nchannels_out; stride=stride,rng=rng,padding=padding, kernel_eltype = kernel_eltype,kernel_init=kernel_init,usebias=usebias,bias_init=bias_init,f=f,df=df)
 end
 
 function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDPLUS2}
@@ -228,14 +234,20 @@ function preprocess!(layer::ConvLayer{ND,NDPLUS1,NDPLUS2}) where {ND,NDPLUS1,NDP
                push!(layer.x_ids,((x_idx...,)))
                push!(layer.w_ids,w_idx)
                push!(layer.y_ids,y_idx)
+               #println(x_idx)
+               #println(typeof(x_idx))
+               #println(y_idx)
+               #println(typeof(y_idx))
+               push!(layer.y_to_x_ids[y_idx...],(x_idx...,))
+               push!(layer.y_to_w_ids[y_idx...],(w_idx...,))
                #de_dx_ch_in[idx_x_source...] += dϵ_dz_ch_out[dy_idx...] * w_ch_in_out[w_idx...]
-
             end
          end
       end
    end
 end
 
+#=
 function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2}
    if ndims(x) == 1
       reshape(x,size(layer)[1]) 
@@ -252,6 +264,32 @@ function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDP
    end
    return nothing
 end
+=#
+
+function _zComp!(y,layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x) where {ND,NDPLUS1,NDPLUS2,TF,TDF,WET}
+   if ndims(x) == 1
+      reshape(x,size(layer)[1]) 
+   end
+   for y_idx in eachindex(y)
+      yi = zero(WET)   
+      n = length(layer.y_to_x_ids[y_idx])   
+      @inbounds for idx in 1:n
+         yi += x[layer.y_to_x_ids[y_idx][idx]...] * layer.weight[layer.y_to_w_ids[y_idx][idx]...]
+      end
+      y[y_idx] = yi
+   end
+
+   if(layer.usebias)
+      output_size = size(y)
+      for ch_out in 1:output_size[end]
+         y_ch_out = selectdim(y,NDPLUS1,ch_out)
+         y_ch_out .+= layer.bias[ch_out]
+      end
+   end
+
+   return nothing
+end
+
 
 function _dedxComp!(de_dx,layer::ConvLayer{ND,NDPLUS1,NDPLUS2},dϵ_dz) where {ND,NDPLUS1,NDPLUS2}
    for idx in 1:length(layer.y_ids)
@@ -360,7 +398,7 @@ function get_gradient(layer::ConvLayer{ND,NDPLUS1,NDPLUS2,TF,TDF,WET},x, next_gr
       dbias = zeros(WET,length(layer.bias))
       for bias_idx in 1:length(layer.bias)
          nchannel_out = bias_idx
-         dϵ_dz_nchannelOut = selectdim(dϵ_dz,layer.ndims+1,nchannel_out)
+         dϵ_dz_nchannelOut = selectdim(dϵ_dz,NDPLUS1,nchannel_out)
          dbias[bias_idx] = sum(dϵ_dz_nchannelOut)
       end
       return Learnable((de_dw,dbias))

diff --git a/src/Nn/default_layers/PoolingLayer.jl b/src/Nn/default_layers/PoolingLayer.jl
@@ -10,7 +10,7 @@ Representation of a pooling layer in the network
 # Fields:
 $(TYPEDFIELDS)
 """
-struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}} <: AbstractLayer
+struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Function}, WET <: Number} <: AbstractLayer
    "Input size (including nchannel_in as last dimension)"
    input_size::SVector{NDPLUS1,Int64}
    "Output size (including nchannel_out as last dimension)"
@@ -40,7 +40,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
    "A x-dims array of vectors of ids of y reached by the given x"
    #x_to_y_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1} # not needed
    "A y-dims array of vectors of ids of x(s) contributing to the giving y"
-   y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int32}},NDPLUS1}
+   y_to_x_ids::Array{Vector{NTuple{NDPLUS1,Int64}},NDPLUS1}
 
 
 
@@ -53,6 +53,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
 
    # Positional arguments:
    * `input_size`:    Shape of the input layer (integer for 1D convolution, tuple otherwise). Do not consider the channels number here.
+   * `kernel_eltype`: Kernel eltype [def: `Float64`]
    * `kernel_size`:   Size of the kernel (aka filter) (integer for 1D or hypercube kernels or nD-sized tuple for assymmetric kernels). Do not consider the channels number here.
    * `nchannels_in`:  Number of channels in input
    * `nchannels_out`: Number of channels in output
@@ -70,6 +71,7 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
    """
    function PoolingLayer(input_size,kernel_size,nchannels_in;
             stride  = kernel_size,
+            kernel_eltype = Float64,
             padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))),
             f       = maximum,
             df      = match_known_derivatives(f))
@@ -113,9 +115,9 @@ struct PoolingLayer{ND,NDPLUS1,NDPLUS2,TF <: Function, TDF <: Union{Nothing,Func
       output_size_with_nchout = ([1 + Int(floor((input_size[d]+padding_start[d]+padding_end[d]-kernel_size[d])/stride[d])) for d in 1:nD]...,nchannels_out)
 
       #x_to_y_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(input_size_with_nchin)] # not needed
-      y_to_x_ids = [Vector{NTuple{nD+1,Int32}}() for i in CartesianIndices(output_size_with_nchout)]
+      y_to_x_ids = [Vector{NTuple{nD+1,Int64}}() for i in CartesianIndices(output_size_with_nchout)]
 
-      new{nD,nD+1,nD+2,typeof(f),typeof(df)}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids)
+      new{nD,nD+1,nD+2,typeof(f),typeof(df),kernel_eltype}(input_size_with_nchin,output_size_with_nchout,kernel_size_with_nchin_nchout,padding_start,padding_end,stride,nD,f,df,y_to_x_ids)
    end
 end
 
@@ -130,8 +132,9 @@ function PoolingLayer(input_size_with_channel,kernel_size;
      stride  = kernel_size,
      padding = nothing, # (zeros(Int64,length(input_size)),zeros(Int64,length(input_size))),
      f       = maximum,
+     kernel_eltype = Float64,
      df      = match_known_derivatives(f))
-     return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; stride=stride,padding=padding,f=f,df=df)
+     return PoolingLayer(input_size_with_channel[1:end-1],kernel_size,input_size_with_channel[end]; kernel_eltype = kernel_eltype,stride=stride,padding=padding,f=f,df=df)
 
 end
 
@@ -211,36 +214,21 @@ $(TYPEDSIGNATURES)
 Compute forward pass of a ConvLayer
 
 """
-function forward(layer::PoolingLayer,x)
+function forward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2,TF, TDF, WET},x) where {ND,NDPLUS1,NDPLUS2,TF, TDF, WET}
 
    _, output_size = size(layer)
-   y    = zeros(output_size)
-   for y_idx in CartesianIndices(y)
-      y_idx       = Tuple(y_idx)
-      x_ids       = layer.y_to_x_ids[y_idx...]
+   y    = zeros(WET,output_size)
+   for y_idx in eachindex(y)
+      x_ids       = layer.y_to_x_ids[y_idx]
       x_vals      = [x[idx...] for idx in x_ids]
-      #println(x_vals)
-      #println(layer.f(x_vals))
-      y[y_idx...] = layer.f(x_vals)
+      y[y_idx] = layer.f(x_vals)
    end
    return y
 end
 
-function _zComp!(z,layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x) where {ND,NDPLUS1,NDPLUS2}
-   for y_idx in CartesianIndices(z)
-      y_idx       = Tuple(y_idx)
-      x_ids       = layer.y_to_x_ids[y_idx...]
-      x_vals      = [x[idx...] for idx in x_ids]
-      #println(x_vals)
-      #println(layer.f(x_vals))
-      y[y_idx...] = layer.f(x_vals)
-   end
-
-end
-
 function backward(layer::PoolingLayer{ND,NDPLUS1,NDPLUS2},x, next_gradient) where {ND,NDPLUS1,NDPLUS2}
    de_dx     = zeros(layer.input_size...)
-   for y_idx in CartesianIndices(next_gradient)
+   for y_idx in eachindex(next_gradient)
       #println("----")
       x_ids       = layer.y_to_x_ids[y_idx]
       x_vals      = [x[idx...] for idx in x_ids]

diff --git a/test/Nn_tests.jl b/test/Nn_tests.jl
@@ -509,7 +509,7 @@ rmeTrain = relative_mean_error(y,ŷ,normrec=false)
 #using BenchmarkTools
 #@btime train!($mynn,$x,$y,epochs=60,verbosity=NONE,rng=copy($TESTRNG))
 #240.604 ms (1056544 allocations: 107.66 MiB)
-
+#314.504 ms (774762 allocations: 99.39 MiB)
 
 
 # ==================================
@@ -573,6 +573,7 @@ ŷ        = BetaML.predict(mynn,x)
 rmeTrain = relative_mean_error(y,ŷ,normrec=false)
 @test rmeTrain  < 0.1
 
+a = 1
 #l1       = ReshaperLayer((D,1),(6,6,2))
 #l2       = ConvLayer((6,6),(2,2),2,4,rng=copy(TESTRNG))
 #l3       = PoolingLayer((6,6,4),(2,2))