diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000..226f604 --- /dev/null +++ b/.github/workflows/CompatHelper.yml @@ -0,0 +1,17 @@ +name: CompatHelper + +on: + schedule: + - cron: '00 00 * * *' + +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..14b8de4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,71 @@ +name: CI +on: + pull_request: + branches: + - main + push: + branches: + - main + tags: '*' +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. + - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x64 + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: actions/cache@v1 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + file: lcov.info + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - run: | + julia --project=docs -e ' + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate()' + - run: | + julia --project=docs -e ' + using Documenter: doctest + using MrFOR_resources_init_fr # change MyAwesomePackage to the name of your package + doctest(MrFor_resources_init_fr)' # change MyAwesomePackage to the name of your package + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v1 diff --git a/Project.toml b/Project.toml index 502ba8e..ce4ccb6 100644 --- a/Project.toml +++ b/Project.toml @@ -2,3 +2,6 @@ name = "MrFOR_resources_init_fr" uuid = "78598d3f-d65b-4630-9136-a3bfe5c9de2f" authors = ["Antonello Lobianco "] version = "0.1.0" + +[deps] +MrFOR_resources = "731fd045-8cb1-4036-8c04-a2a477f3265a" diff --git a/README.md b/README.md index 6142425..7df7f10 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ # MrFOR_resources_init_fr Initialisation package of the resource module of MrFOR specific for France + +For timber Volumes, we'll be using the National Inventory row data. + +[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://ecoformod.github.io/MrFOR_resources_init_fr.jl/stable) +[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://ecoformod.github.io/MrFOR_resources_init_fr.jl/dev) +[![Build status (Github Actions)](https://github.com/ecoformod/Resourceload_france.jl/workflows/CI/badge.svg)](https://github.com/ecoformod/MrFOR_resources_init_fr.jl/actions) +[![codecov.io](http://codecov.io/github/ecoformod/MrFOR_resources_init_fr.jl/coverage.svg?branch=main)](http://codecov.io/github/ecoformod/MrFOR_resources_init_fr.jl?branch=main) + + diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..a303fff --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +build/ +site/ diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..dfa65cd --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,2 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..8b21325 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,18 @@ +using Documenter +using MrFOR_resources_init_fr + +push!(LOAD_PATH,"../src/") +makedocs(sitename="MrFOR_resources_init_fr.jl Documentation", + pages = [ + "Index" => "index.md", + "An other page" => "anotherPage.md", + ], + format = Documenter.HTML(prettyurls = false) +) +# Documenter can also automatically deploy documentation to gh-pages. +# See "Hosting Documentation" and deploydocs() in the Documenter manual +# for more information. +deploydocs( + repo = "github.com/sylvaticus/MrFOR_resources_init_fr.jl.git", + devbranch = "main" +) diff --git a/docs/src/anotherPage.md b/docs/src/anotherPage.md new file mode 100644 index 0000000..7dfd307 --- /dev/null +++ b/docs/src/anotherPage.md @@ -0,0 +1,48 @@ +# The MrFOR_resources_init_fr Module + + +```@docs +MrFOR_resources_init_fr +``` + +## Module Index + +```@index +Modules = [MrFOR_resources_init_fr] +Order = [:constant, :type, :function, :macro] +``` +## Detailed API + +```@autodocs +Modules = [MrFOR_resources_init_fr] +Order = [:constant, :type, :function, :macro] +``` + +# Some manual code that is executed during doc compilation + +```@setup abc +using DataFrames +println("This is printed during doc compilation") +@info +a = [1,2] +b = a .+ 1 +``` + +```@example abc +b # hide +``` + +```@example abc +DataFrame(A=[1,2,3],B=[10,20,30]) # hide +``` + + +Test + +```@eval +using DataFrames, Latexify +df = DataFrame(a=[1,2,3],b=[10,20,30]) +nothing +mdtable(df,latex=false) +``` + diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..ffe1832 --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,3 @@ +# MrFOR_resources_init_fr.jl + +Documentation for [MrFOR_resources_init_fr.jl](https://github.com/sylvaticus/MrFOR_resources_init_fr.jl/) diff --git a/src/MrFOR_resources_init_fr.jl b/src/MrFOR_resources_init_fr.jl index 7dbd529..af613e6 100644 --- a/src/MrFOR_resources_init_fr.jl +++ b/src/MrFOR_resources_init_fr.jl @@ -1,5 +1,32 @@ + +""" + MrFOR_resources_init_fr + +Here you are, you found an awesome package :-) + +""" module MrFOR_resources_init_fr -greet() = print("Hello World!") +export data_path + +using Statistics, Random, Downloads # Standard Library (shipped with Julia) +using FTPClient +using DataFrames, CSV, Plots, DataDeps, ZipFile, DataStructures, JLD2, Pipe +using GeoFormatTypes, ArchGDAL, Rasters, RasterDataSources +using BetaML +#import DecisionTree +#plotlyjs() +#gr() +#pythonplot() +Random.seed!(123) # fix random seed + +(X,Y) = 1,2 # Workaround for Rasters.jl bug https://github.com/rafaqz/DimensionalData.jl/issues/493 +# Note that the first dimension is X (the cols!) and second is Y (the row!), i.e. opposite of matrix ordering! + +data_path = joinpath(@__DIR__,"data") +ENV["RASTERDATASOURCES_PATH"] = data_path +include("getdata.jl") + + end # module MrFOR_resources_init_fr diff --git a/src/crunchdata.jl b/src/crunchdata.jl new file mode 100644 index 0000000..ef77263 --- /dev/null +++ b/src/crunchdata.jl @@ -0,0 +1,137 @@ + + +function impute_X(data_path,X;force=false,verbosity=1) + verbosity >= 1 && println("Iputing missing values..") + dest = joinpath(data_path,"temp","full_X.jld") + + if ispath(dest) && !force + return JLD2.load(dest, "full_X") + end + + if !isdir(joinpath(data_path,"temp")) + mkdir(joinpath(data_path,"temp")) + end + + #mod = RFImputer(n_trees=30,max_depth=10,recursive_passages=2) + mod = UniversalImputer(estimator=DecisionTree.RandomForestRegressor(max_depth=10,n_trees=10), + fit_function = DecisionTree.fit!, predict_function=DecisionTree.predict, + recursive_passages=3) + X_full_matrix = fit!(mod,Matrix(X)) + X_full = DataFrame(X_full_matrix,names(X)) + + JLD2.jldopen(dest, "w") do f + f["full_X"] = X_full + end + + return X_full +end + + +function get_nn_trained_model(xtrain,ytrain,xval,yval;force=false,model_file="models.jld",maxepochs=5,scmodel=Scaler(),verbosity=STD) + #model_file="models.jld" + #force = false + #xtrain = xtrain_s + #sditrain = sditrain_s + #stemntrain = stemntrain_s + #epochs=[5,5,5] + #verbosity = STD + + if !force + models = model_load(model_file) + haskey(models,"bml_nnmodel") && return models["bml_nnmodel"] + end + + # Prestep: scaling + xtrain_s = predict(scmodel,xtrain) + xval_s = predict(scmodel,xval) + + verbosity >= STD && @info "Training vol model.." + #= + # Define the Artificial Neural Network model + l1_age = ReplicatorLayer(8) + l1_sp = ReplicatorLayer(8) + l1_soil = DenseLayer(44,60, f=relu) + l1_oth = DenseLayer(4,6, f=relu) + l1 = GroupedLayer([l1_age,l1_sp,l1_soil,l1_oth]) + + l2_age = DenseLayer(8,3, f=relu) + l2_sp = DenseLayer(8,3, f=relu) + l2_soil = DenseLayer(60,5, f=relu) + l2_oth = ReplicatorLayer(6) + l2 = GroupedLayer([l2_age,l2_sp,l2_soil,l2_oth]) + + l3_age = ReplicatorLayer(3) + l3_sp = ReplicatorLayer(3) + l3_soiloth = DenseLayer(11,11) + l3 = GroupedLayer([l3_age,l3_sp,l3_soiloth]) + + l4 = DenseLayer(17,17, f=relu) + l5 = DenseLayer(17,1, f=relu) + nnm = NeuralNetworkEstimator(layers=[l1,l2,l3,l4,l5], batch_size=16, epochs=1, verbosity=verbosity) + =# + nnm = NeuralNetworkEstimator( batch_size=16, epochs=1, verbosity=verbosity) + + rmes_train = Float64[] + rmes_test = Float64[] + for e in 1:maxepochs + verbosity >= STD && @info "Epoch $e ..." + # Train the model (using the ADAM optimizer by default) + fit!(nnm,xtrain_s,ytrain) # Fit the model to the (scaled) data + ŷtrain = predict(nnm,xtrain_s) + ŷval = predict(nnm,xval_s ) + rme_train = relative_mean_error(ytrain,ŷtrain) # 0.1517 # 0.1384 # 0.165 + rme_test = relative_mean_error(yval,ŷval) + push!(rmes_train,rme_train) + push!(rmes_test,rme_test) + end + display(plot([rmes_train[2:end] rmes_test[2:end]],title="Rel mean error per epoch", labels=["train rme" "test rme"])) + display(plot(info(nnm)["loss_per_epoch"][2:end],title="Loss per epoch", label=nothing)) + + model_save(model_file,true;bml_nnmodel=nnm) + return nnm + +end + + +function get_rf_trained_model(xtrain,ytrain;force=false,model_file="models.jld",verbosity=STD) + #DecisionTree.fit!(rfmodel,fit!(Scaler(),xtrain),ytrain) + #ŷtrain = DecisionTree.predict(rfmodel,fit!(Scaler(),xtrain)) + #ŷtest = DecisionTree.predict(rfmodel,fit!(Scaler(),xtest)) + + models = model_load(model_file) + (haskey(models,"bml_rfmodel") && !force ) && return models["bml_rfmodel"] + # Using Random Forest model + rfmodel = RandomForestEstimator(max_depth=20,verbosity=verbosity) + # rfmodel = DecisionTree.RandomForestRegressor() + fit!(rfmodel,xtrain,ytrain) + model_save("models.jld",false;bml_rfmodel=rfmodel) + return rfmodel + +end + + +function get_estvol(x_s,mod,nAgeClasses,nSpgrClasses;force=false,data_file="estvol.csv") + if ispath(data_file) && !force + return CSV.read(data_file,DataFrame) + end + estvol = DataFrame(r=Int64[],c=Int64[],ecoreg=Int64[],spgr=Int64[],agegr=Int64[],estvol=Float64[]) + x_mod = copy(x_s) + for px in 1:size(x_mod,1), is in 1:nSpgrClasses, ia in 1:nAgeClasses, #size(X_full,1) + r = X_full.R[px] + c = X_full.C[px] + ecoreg_px = ecoregion_full[px] + spgr = is + agegr = ia + # modifying the record that we use as feature to predict the volumes + x_mod[px,1:nAgeClasses] .= 0.0 + x_mod[px,1+nAgeClasses:nAgeClasses+nSpgrClasses] .= 0.0 + x_mod[px,ia] = 1.0 + x_mod[px,is+nAgeClasses] = 1.0 + + evol = predict(mod,x_mod[px,:]')[1] + #evol= 1.0 + push!(estvol,[r,c,ecoreg_px,spgr,agegr,evol]) + end + CSV.write("estvol.csv",estvol) + return estvol +end diff --git a/src/getdata.jl b/src/getdata.jl new file mode 100644 index 0000000..ec524ee --- /dev/null +++ b/src/getdata.jl @@ -0,0 +1,376 @@ +# See also Copernicus or CHELSA for climate: +# https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset +# https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview +# https://os.zhdk.cloud.switch.ch/envicloud/chelsa/chelsa_V2/EUR11/documentation/CHELSA_EUR11_technical_documentation.pdf + +# ------------------------------------------------------------------------------ +# Utility functions.... + +""" + unzip(file,exdir=“”) + +Unzip a zipped archive using ZipFile + +# Arguments +- `file`: a zip archive to unzip and extract (absolure or relative path) +- `exdir=""`: an optional directory to specify the root of the folder where to extract the archive (absolute or relative). + +# Notes +- The function doesn’t perform a check to see if all the zipped files have a common root. + +# Examples + +```julia +julia> unzip("myarchive.zip","myoutputdata") +``` +""" +function unzip(file,exdir="") + fileFullPath = isabspath(file) ? file : joinpath(pwd(),file) + basePath = dirname(fileFullPath) + outPath = (exdir == "" ? basePath : (isabspath(exdir) ? exdir : joinpath(pwd(),exdir))) + isdir(outPath) ? "" : mkdir(outPath) + zarchive = ZipFile.Reader(fileFullPath) + for f in zarchive.files + fullFilePath = joinpath(outPath,f.name) + if (endswith(f.name,"/") || endswith(f.name,"\\")) + mkdir(fullFilePath) + else + write(fullFilePath, read(f)) + end + end + close(zarchive) +end + +# Modified to get the info if it is a directory and to list info on any directories, not only current one +function FTP_readdir(ftp::FTP,dir=pwd(ftp);details=false) + resp = nothing + try + resp = FTPClient.ftp_command(ftp.ctxt, "LIST $dir") + catch err + if isa(err, FTPClient.FTPClientError) + err.msg = "Failed to list directories." + end + rethrow() + end + dir = split(read(resp.body, String), '\n') + dir = filter(x -> !isempty(x), dir) + + if(details) + entries = NamedTuple{(:type, :permissions, :n, :user, :group, :size, :date, :name), Tuple{Char, SubString{String}, SubString{String}, SubString{String}, SubString{String}, SubString{String}, SubString{String}, String}}[] + for item in dir + details = split(item) + details_nt = (type=details[1][1],permissions=details[1][2:end],n=details[2],user=details[3],group=details[4],size=details[5],date=join(details[6:8], ' '),name=join(details[9:end], ' ')) + push!(entries,details_nt) + end + return entries + else + names = [join(split(line)[9:end], ' ') for line in dir] + return names + end +end + +function download_dir(ftp,dir="";as="",verbosity=0, recursive=true, force=false, dryrun=false, mode=binary_mode, exclude=[], ftp_basepath="",dest_basepath="" ) # latest two used for recursion only + (dir == "") && (dir = pwd(ftp)) + if as == "" + if endswith(dir,'/') + as = split(dir,'/')[end-1] + else + as = split(dir,'/')[end] + end + end + if ftp_basepath == "" + if startswith(dir,"/") + ftp_basepath = dir + else + ftp_basepath = joinpath(pwd(ftp),dir) + end + end + (dest_basepath == "") && (dest_basepath = as) + verbosity > 0 && println("Processing ftp directory `$(ftp_basepath)`:") + items = FTP_readdir(ftp,dir;details=true) + if !isempty(items) + if !ispath(dest_basepath) + mkdir(dest_basepath) + elseif force + rm(dest_basepath,recursive=true) + mkdir(dest_basepath) + else + error("`$(dest_basepath)` exists on local system. Use `force=true` to override.") + end + end + for item in items + ftpname = joinpath(ftp_basepath,item.name) + destname = joinpath(dest_basepath,item.name) + if(item.type != 'd') + ftpname in exclude && continue + verbosity > 1 && print(" - downloading ftp file `$(ftpname)` as local file `$(destname)`...") + if ispath(destname) + if force + rm(destname) + else + error("`$(destname)` exists on local system. Use `force=true` to override.") + end + end + dryrun ? write(destname,ftpname) : FTPClient.download(ftp, ftpname, destname, mode=mode) + verbosity > 1 && println(" done!") + elseif recursive + newdir = joinpath(ftp_basepath,item.name) + download_dir(ftp,newdir;as=destname,ftp_basepath=newdir,dest_basepath=destname, verbosity=verbosity, recursive=recursive, force=force, mode=mode, dryrun=dryrun, exclude=exclude) + end + end +end + +function expand_classes(input_filename,classes;verbosity=2,force=false,to=nothing,writefile=true) + out_dir = joinpath(dirname(input_filename),"classes") + base_name = splitext(basename(input_filename))[1] + class_vars = OrderedDict(["$(base_name)_cl_$(cl)" => joinpath(out_dir,"$(base_name)_cl_$(cl).tif") for cl in classes]) + (ispath(out_dir) && !force ) && return class_vars + rm(out_dir,recursive=true,force=true) + mkdir(out_dir) + base_raster = Raster(input_filename) #|> replace_missing + class_rasters = Dict{String,Raster}() + (nC,nR) = size(base_raster) + missval = missingval(base_raster) + missval_float = Float32(missval) + for cl in classes + (verbosity>1) && println("Processing class $cl ...") + class_rasters["cl_$(cl)"] = map(x-> ( (x == missval) ? missval_float : (x == cl ? 1.0f0 : 0.0f0 ) ), base_raster) + outfile = joinpath(out_dir,"$(base_name)_cl_$(cl).tif") + if !isnothing(to) + class_rasters["cl_$(cl)"] = resample(class_rasters["cl_$(cl)"],to=to,method=:average) + end + writefile && write(outfile, class_rasters["cl_$(cl)"] ) + end + return class_vars +end + +# ------------------------------------------------------------------------------ +# Specific download functions + +function get_improved_forest_structure_data(data_path;force=false) + # Associated paper: https://doi.org/10.3390/rs14020395 + # Alternative manual data download: wget -r -nH --cut-dirs=2 -nc ftp://anonymous@palantir.boku.ac.at//Public/ImprovedForestCharacteristics + dest_dir = joinpath(data_path,"ImprovedForestCharacteristics") + forest_struct_vars = OrderedDict( + "agecl1" => joinpath(dest_dir,"Age","agecl_1_perc.tif"), + "agecl2" => joinpath(dest_dir,"Age","agecl_2_perc.tif"), + "agecl3" => joinpath(dest_dir,"Age","agecl_3_perc.tif"), + "agecl4" => joinpath(dest_dir,"Age","agecl_4_perc.tif"), + "agecl5" => joinpath(dest_dir,"Age","agecl_5_perc.tif"), + "agecl6" => joinpath(dest_dir,"Age","agecl_6_perc.tif"), + "agecl7" => joinpath(dest_dir,"Age","agecl_7_perc.tif"), + "agecl8" => joinpath(dest_dir,"Age","agecl_8_perc.tif"), + #"sdi" => joinpath(dest_dir,"Stand_Density_Index", "sdi.tif"), + #"stemn" => joinpath(dest_dir,"Stem_Number", "nha.tif"), + "spgr1" => joinpath(dest_dir,"Tree_Species_Group","tsg_1_perc.tif"), + "spgr2" => joinpath(dest_dir,"Tree_Species_Group","tsg_2_perc.tif"), + "spgr3" => joinpath(dest_dir,"Tree_Species_Group","tsg_3_perc.tif"), + "spgr4" => joinpath(dest_dir,"Tree_Species_Group","tsg_4_perc.tif"), + "spgr5" => joinpath(dest_dir,"Tree_Species_Group","tsg_5_perc.tif"), + "spgr6" => joinpath(dest_dir,"Tree_Species_Group","tsg_6_perc.tif"), + "spgr7" => joinpath(dest_dir,"Tree_Species_Group","tsg_7_perc.tif"), + "spgr8" => joinpath(dest_dir,"Tree_Species_Group","tsg_8_perc.tif"), + ) + if isdir(dest_dir) && !force + return forest_struct_vars + end + ftp_init(); + ftp = FTP(hostname = "palantir.boku.ac.at", username = "anonymous", password = "") + download_dir(ftp,"/Public/ImprovedForestCharacteristics",as=dest_dir,verbosity=2,force=force) + return forest_struct_vars +end + +function get_dtm(data_path;force=false,to=nothing) + dtm_filename = joinpath(data_path,"WorldClim","Elevation","dtm_1Km_eu.tif") + dtm_vars = OrderedDict("dtm"=>dtm_filename) + if ispath(joinpath(data_path,"WorldClim","Elevation")) && !force + return dtm_vars + end + if(force) + rm(joinpath(data_path,"WorldClim","Elevation"), recursive=true, force=true) + end + dtm_w_filename = getraster(WorldClim{Elevation}, :elev; res="30s") + + if !isnothing(to) + dtm_w = Raster(dtm_w_filename) + dtm_eu = resample(dtm_w,to=Yraster,method=:average) + write(dtm_filename, dtm_eu) + else + mv(dtm_w_filename,dtm_filename) + end + return dtm_vars +end + + +function get_past_npp(data_path;force=false,to=nothing) + # Associated paper: https://doi.org/doi:10.3390/rs8070554 + dest_dir = joinpath(data_path,"MODIS_EURO") + npp_filename = joinpath(data_path,"MODIS_EURO","npp_mean_eu_8km.tif") + npp_vars = OrderedDict("npp" => npp_filename) + npp_hd_filename = joinpath(data_path,"MODIS_EURO","EU_NPP_mean_2000_2012.tif") + if isdir(dest_dir) && !force + return npp_vars + end + ftp_init(); + ftp = FTP(hostname = "palantir.boku.ac.at", username = "anonymous", password = "") + download_dir(ftp,"/Public/MODIS_EURO",as=dest_dir,verbosity=2,force=force,exclude=["/Public/MODIS_EURO/Neumann et al._2016_Creating a Regional MODIS Satellite-Driven Net Primary Production Dataset for European Forests.pdf"]) + if isnothing(to) + cp(npp_hd_filename,npp_filename) + return npp_vars + else + npp_hr = Raster(npp_hd_filename,missingval=65535.0f0) + npp = resample(npp_hr,to=Yraster,method=:average) + write(npp_filename, npp, force=true) + return npp_vars + end +end + + +function download_soil_data(data_path,soil_codes,soil_ph_vars,soil_chem_vars;force=false,verbose=false) + + # Soil: + #= + All soil datasets: + https://esdac.jrc.ec.europa.eu/resource-type/european-soil-database-soil-properties + + Soil_physical + https://esdac.jrc.ec.europa.eu/content/topsoil-physical-properties-europe-based-lucas-topsoil-data + https://www.sciencedirect.com/science/article/pii/S0016706115300173 + + Soil chemistry + https://esdac.jrc.ec.europa.eu/content/chemical-properties-european-scale-based-lucas-topsoil-data + https://www.sciencedirect.com/science/article/pii/S0016706119304768 + + Soil other derived data + https://esdac.jrc.ec.europa.eu/content/european-soil-database-derived-data + Hiederer, R. 2013. Mapping Soil Properties for Europe - Spatial Representation of Soil Database Attributes. Luxembourg: Publications Office of the European Union - 2013 - 47pp. EUR26082EN Scientific and Technical Research series, ISSN 1831-9424, doi:10.2788/94128 + Hiederer, R. 2013. Mapping Soil Typologies - Spatial Decision Support Applied to European Soil Database. Luxembourg: Publications Office of the European Union - 2013 - 147pp. EUR25932EN Scientific and Technical Research series, ISSN 1831-9424, doi:10.2788/8728 + =# + soil_path = joinpath(data_path,"soil") + + for var in soil_ph_vars + urlname = "https://esdac.jrc.ec.europa.eu/$(soil_codes[1])/_33_GSM/$(var)_Extra.zip" + zipname = joinpath(soil_path,"$(var).zip") + zipfolder = joinpath(soil_path,var) + #tifname = joinpath(soil_path,var,"$(var).tif") + (ispath(zipfolder) && !force ) && continue + Downloads.download(urlname,zipname, verbose=verbose) + unzip(zipname,zipfolder) + rm(zipname) + end + + for var in soil_chem_vars + urlname = "https://esdac.jrc.ec.europa.eu/public_path/shared_folder/dataset/$(soil_codes[2])/$(var).zip" + zipname = joinpath(soil_path,"$(var).zip") + zipfolder = joinpath(soil_path,var) + #tifname = joinpath(soil_path,var,"$(var).tif") + (ispath(zipfolder) && !force ) && continue + Downloads.download(urlname,zipname, verbose=verbose) + unzip(zipname,zipfolder) + rm(zipname) + end + + urlname = "https://esdac.jrc.ec.europa.eu/$(soil_codes[3])/_24_DER/STU_EU_Layers.zip" + zipname = joinpath(soil_path,"STU_EU_Layers.zip") + zipfolder = joinpath(soil_path,"STU_EU_Layers") + + (ispath(zipfolder) && !force ) && return nothing + Downloads.download(urlname,zipname, verbose=verbose) + unzip(zipname,zipfolder) + rm(zipname) +end + +function get_soil_data(data_path,soil_codes,soil_ph_vars,soil_ph_vars2,soil_chem_vars,soil_chem_vars2,soil_oth_vars;force=false,verbose=false,to=nothing) + soil_path = joinpath(data_path,"soil") + soil_classes = 1:12 + download_soil_data(data_path,soil_codes,soil_ph_vars,soil_chem_vars;force=force,verbose=verbose) + texture_filename = joinpath(data_path,"soil","TextureUSDA","textureUSDA.tif") + texture_classes = expand_classes(texture_filename,soil_classes;verbosity=2,force=force,to=to) + soil_vars1 = vcat(soil_ph_vars,soil_chem_vars,soil_oth_vars) + soil_vars2 = vcat(soil_ph_vars2,soil_chem_vars2,soil_oth_vars) + soil_vars = OrderedDict{String,String}() + nvars_ph_chem = length(soil_ph_vars)+length(soil_chem_vars) + for (i,var) in enumerate(soil_vars1) + if i <= nvars_ph_chem + if var != "TextureUSDA" + saved_file = joinpath(soil_path,var,"$(soil_vars2[i]).tif") + final_file = joinpath(soil_path,var,"$(soil_vars2[i])_eu.tif") + if ispath(final_file) && !force + soil_vars[var] = final_file + continue + end + if !isnothing(to) + orig_raster = Raster(saved_file) + resampled_raster = resample(orig_raster,to=to,method=:average) + write(final_file, resampled_raster, force=true) + else + cp(saved_file, final_file) + end + soil_vars[var] = final_file + else + soil_vars = OrderedDict(soil_vars..., texture_classes...) + end + else + saved_file = joinpath(soil_path,"STU_EU_Layers","$(soil_vars2[i]).rst") + final_file = joinpath(soil_path,"STU_EU_Layers","$(soil_vars2[i])_eu.tif") + if ispath(final_file) && !force + soil_vars[var] = final_file + continue + end + if !isnothing(to) + orig_raster = Raster(saved_file,crs="EPSG:3035") # ,missingval=0.0f0) missing value is zero, but zero is used also as true value! + resampled_raster = resample(orig_raster,to=to,method=:average) + write(final_file, resampled_raster, force=true) + else + cp(saved_file, final_file) + end + soil_vars[var] = final_file + end + end + return soil_vars +end + + +function get_ecoregions_data(data_path;force=false,verbose=false,to=nothing) + # landing page: https://www.eea.europa.eu/en/datahub/datahubitem-view/c8c4144a-8c0e-4686-9422-80dbf86bc0cb?activeAccordion= + #force = false + #verbose = true + #to = nothing + #to = Yraster + + subpath = joinpath(data_path,"ecoregions") + classes = collect([1:9;11:13]) # classes 10, 14 and 15 are out of the study area + urlname = "https://sdi.eea.europa.eu/datashare/s/sTnNeQK69iYNgCe/download" + dataname = "eea_r_3035_1_km_env-zones_p_2018_v01_r00" + zippath = joinpath(subpath,"$(dataname).zip") + zipfolder = joinpath(subpath,dataname) + tifname = joinpath(zipfolder,"$(dataname).tif") + tifname_1km = joinpath(zipfolder,"ecoregions_1km.tif") + tifname_8km = joinpath(zipfolder,"ecoregions_8km.tif") + (ispath(zipfolder) && !force ) && return Dict("ecoregions" => tifname_8km) #expand_classes(tifname_1km,classes;verbosity=verbose,force=false,to=to) + + Downloads.download(urlname,zippath, verbose=verbose) + rm(zipfolder, force=true,recursive=true) + unzip(zippath,subpath) # the zip archive contain itself the folder with the data inside + rm(zippath) + + # before the extraction of the classes we get the base raster res from 100m to 1km + base_raster = Raster(tifname) + base_raster_1km = resample(base_raster, res=1000) + base_raster_8km = resample(base_raster_1km, to=to, method=:mode) + write(tifname_8km, base_raster_8km, force=true) + return Dict("ecoregions" => tifname_8km) + + #= + #base_raster_1km_Float32 = Float32.(base_raster_1km) + # replacing 0 as missing value to 99, as we need 0 as a value ! + missval = missingval(base_raster_1km) + base_raster_1km = map(x-> ( (x == missval) ? UInt8(99) : x), base_raster_1km) + base_raster_1km = rebuild(base_raster_1km; missingval=UInt8(99)) + write(tifname_1km, base_raster_1km, force=true) + + ecoregions_classes = expand_classes(tifname_1km,classes;verbosity=2,force=force,to=to) + + return ecoregions_classes + =# +end \ No newline at end of file diff --git a/src/yield_model.jl b/src/yield_model.jl new file mode 100644 index 0000000..95f144a --- /dev/null +++ b/src/yield_model.jl @@ -0,0 +1,287 @@ +cd(@__DIR__) +using Pkg +Pkg.activate(".") +# Pkg.add(["Plots", "DataFrames", "DataDeps", "ArchGDAL", "Rasters", "RasterDataSources","BetaML"]) +# Pkg.instantiate() +using Statistics, Random, Downloads # Standard Library (shipped with Julia) +using FTPClient +using DataFrames, CSV, Plots, DataDeps, ZipFile, DataStructures, JLD2, Pipe +using GeoFormatTypes, ArchGDAL, Rasters, RasterDataSources +using BetaML +#import DecisionTree +#plotlyjs() +#gr() +#pythonplot() +Random.seed!(123) # fix random seed + +(X,Y) = 1,2 # Workaround for Rasters.jl bug https://github.com/rafaqz/DimensionalData.jl/issues/493 +# Note that the first dimension is X (the cols!) and second is Y (the row!), i.e. opposite of matrix ordering! + +data_path = joinpath(@__DIR__,"data") +ENV["RASTERDATASOURCES_PATH"] = data_path +include("getdata.jl") + +# We first get our Y raster as everything will be set based on it.. +Yfilename = joinpath(data_path,"vol.tif") +ispath(Yfilename) || Downloads.download("ftp://anonymous@palantir.boku.ac.at//Public/ImprovedForestCharacteristics/Volume/vol.tif",Yfilename) +Yraster = Raster(Yfilename) |> replace_missing +(nC,nR) = size(Yraster) + + +# European coordinates +# - geographical +long_lb, long_ub = -12, 35 +lat_lb, lat_ub = 33, 72 +eu_bounds_geo = Rasters.X(long_lb .. long_ub), Rasters.Y(lat_lb .. lat_ub) +# - projected +crs = "EPSG:3035" # projectd units are meters +X_lb, X_ub = dims(Yraster)[1][1]-8000,dims(Yraster)[1][end]+80000 # 8km margin +Y_lb, Y_ub = dims(Yraster)[2][1]-8000,dims(Yraster)[2][end]+80000 # 8km margin +eu_bounds_proj = Rasters.X(X_lb .. X_ub), Rasters.Y(Y_lb .. Y_ub) + +# use these as: myraster_eu = myraster_w[eu_bounds_geo...] + +# Get the codes from the links in the download page after having filled the form for chemical, physical and other data in the forms below: +# https://esdac.jrc.ec.europa.eu/content/topsoil-physical-properties-europe-based-lucas-topsoil-data +# https://esdac.jrc.ec.europa.eu/content/chemical-properties-european-scale-based-lucas-topsoil-data +# https://esdac.jrc.ec.europa.eu/content/european-soil-database-derived-data +soil_codes = ["wyz_856","60","wyz_856"] +soil_ph_vars = ["Clay","Silt","Sand","CoarseFragments","BulkDensity","TextureUSDA","AWC"] +soil_ph_vars2 = ["Clay","Silt1","Sand1","Coarse_fragments","Bulk_density","textureUSDA","AWC"] # some vars have slighly different name in the zipped file, argh! +soil_chem_vars = ["pH_H2O","pH_CaCl","CEC","Caco3","CN","N","P","K","pH_H2O_ratio_Cacl"] +soil_chem_vars2 = ["pH_H2O","pH_CaCl","CEC","CaCO3","CN","N","P","K","pH_H2O_CaCl"] # some vars have slighly different name in the zipped file, argh! +soil_oth_vars = [ + #"STU_EU_ALLOCATE", + "STU_EU_DEPTH_ROOTS", "STU_EU_T_CLAY", "STU_EU_S_CLAY", "STU_EU_T_SAND", + "STU_EU_S_SAND", "STU_EU_T_SILT", "STU_EU_S_SILT", "STU_EU_T_OC", + "STU_EU_S_OC", "STU_EU_T_BD", "STU_EU_S_BD", "STU_EU_T_GRAVEL", + "STU_EU_S_GRAVEL", "SMU_EU_T_TAWC", "SMU_EU_S_TAWC", "STU_EU_T_TAWC", + "STU_EU_S_TAWC"] + + +# ------------------------------------------------------------------------------ +# Downloading (if needed) and resampling to Y (if needed) + +# The objective of this task is to have the data saved on disk and be ready for +# the analysis at the geo resolution of Y. +# Analysis can also be done on a case-by-case manner. The result is always a file. +# Normally a "force" parameter will dictate if redownload/reanalysis is required +# --------- + +# Forest structure data +forest_struct_vars = get_improved_forest_structure_data(data_path,force=false) # only download, no analysis needed + +# --------- +# dtm +dtm_vars = get_dtm(data_path,force=false, to=Yraster) # resample + +# --------- +# npm +npp_vars = get_past_npp(data_path;force=false,to=Yraster) + +# --------- +# soil +soil_vars = get_soil_data(data_path,soil_codes,soil_ph_vars,soil_ph_vars2,soil_chem_vars,soil_chem_vars2,soil_oth_vars,force=false,to=Yraster) + + +# --------- +# ecoregions +# This data will not be used for training as in our tests it doesn't improve predictions (we used the classes) and it may change with cc if we want to model cc +ecoregions_vars = get_ecoregions_data(data_path,force=false,to=Yraster) +ecoregions = Raster(ecoregions_vars["ecoregions"]) + +Xmeta = OrderedDict(forest_struct_vars...,soil_vars...,dtm_vars...,npp_vars...,) # var name => file path + + + + +# ------------------------------------------------------------------------------ +# Loading transformed data + + +Xnames = collect(keys(Xmeta)) +nXnames = length(Xnames) +Xrasters = OrderedDict{String,Raster}([i => Raster(Xmeta[i]) |> replace_missing for i in Xnames]) + +X = DataFrame([Symbol(Xnames[i]) => Float64[] for i in 1:length(Xnames)]) +X.R = Int64[] +X.C = Int64[] +Y = Float64[] +ecoregion = Union{Int64,Missing}[] + +allowmissing!(X) + +# Reformatting the data in a large matrix (records [pixels] x variables) +for r in 1:nR + for c in 1:nC + ismissing(Yraster[c,r]) && continue + row = Dict([Xnames[i] => Xrasters[Xnames[i]][c,r] for i in 1:nXnames]) + row["R"] = r + row["C"] = c + push!(X,row) + push!(Y,Yraster[c,r]) + push!(ecoregion,ecoregions[c,r]) + end +end + +# ------------------------------------------------------------------------------ +# Separating sdi and stemn for 2 steps predictions + + + +# ------------------------------------------------------------------------------ +# Imputing missing data... +includet("crunchdata.jl") + +#XY = hcat(X,Y) +#XYfull = dropmissing(XY) +#X_full= XYfull[:,1:end-1] +#Y = XYfull[:,end] +X_full = impute_X(data_path,X,force=false) +nXnames = length(names(X_full)) +ecoregion_full = Int64.(fit!(RFImputer(forced_categorical_cols=[nXnames+1]),hcat(Matrix(X_full),ecoregion))[:,nXnames+1]) + +fields_not_to_scale = [(f,i) for (i,f) in enumerate(names(X_full)) if mean(X_full[!,f]) > 0.001 && mean(X_full[!,f]) < 10] + +fields_not_to_scale_names, fields_not_to_scale_idx = getindex.(fields_not_to_scale,1) , getindex.(fields_not_to_scale,2) +scalermodel = Scaler(skip=fields_not_to_scale_idx) +fit!(scalermodel,Matrix(X_full)) + +# ------------------------------------------------------------------------------ +# Running the ML model.... + +# Split the data in training/testing sets +((xtrain,xval,xtest),(ytrain,yval,ytest)) = partition([Matrix(X_full),Y],[0.6,0.2,0.2]) +(ntrain, nval,ntest) = size.([xtrain,xval,xtest],1) +nD = size(X_full,2) +x_s = predict(scalermodel,Matrix(X_full)) +xtrain_s = predict(scalermodel,Matrix(xtrain)) # xtrain scaled +xval_s = predict(scalermodel,Matrix(xtest)) # xtest scaled +xtest_s = predict(scalermodel,Matrix(xtest)) # xtest scaled + + +nnm = get_nn_trained_model(xtrain,ytrain,xval,yval;force=false,model_file="models.jld",maxepochs=50,scmodel =scalermodel) + +plot(info(nnm)["loss_per_epoch"][1:end],title="Loss per epoch", label=nothing) + + +# Obtain predictions and test them against the ground true observations + +ŷtrain = predict(nnm,xtrain_s) +ŷtest = predict(nnm,xtest_s ) + +rme_train = relative_mean_error(ytrain,ŷtrain) # 0.1517 # 0.1384 # 0.165 +rme_test = relative_mean_error(ytest,ŷtest) # 0.1613 # 0.1766 # 0.183 + +# Plotting observed vs estimated... +scatter(ytrain,ŷtrain,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in training period",xrange=[0,1000],yrange=[0,1000]) +scatter(ytest,ŷtest,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in testing period",xrange=[0,1000],yrange=[0,1000]) + +# ------------------------------------------------------------------------------ +# Creating a raster with the estimated values + +Y_est = @pipe X_full |> Matrix |> predict(scalermodel,_) |> predict(nnm,_) +vol_est = deepcopy(Yraster) + +for i in 1:length(Y_est) + r = convert(Int64,X_full[i,"R"]) + c = Int64(X_full[i,"C"]) + # println("$r , $c : $(vol_est[c,r]) (true) \t $(Y_est[i]) (est)") + vol_est[c,r] = Y_est[i] #missing #Y_est[i] +end + +plot(Yraster,title="Actual volumes") +plot(vol_est,title="Model estimated volumes (NN model)") + +# Only relative to test pixels + +vol_true = deepcopy(Yraster) +vol_est = deepcopy(Yraster) + +# initialisation with missing data +[vol_true[c,r] = missing for r in 1:nR, c in 1:nC] +[vol_est[c,r] = missing for r in 1:nR, c in 1:nC] + + +for i in 1:length(ŷtest) + r = convert(Int64,xtest[i,end-1]) + c = Int64(xtest[i,end]) + vol_true[c,r] = ytest[i] + vol_est[c,r] = ŷtest[i] +end + +plot(vol_true,title="Actual volumes (test pixels)") +plot(vol_est,title="Model estimated volumes (test pixels NN model)") + + +# ------------------------------------------------------------------------------ +# Using Random Forest model + +rfm = get_rf_trained_model(xtrain_s,ytrain;force=false,model_file="models.jld") +ŷtrain = predict(rfm, predict(scalermodel,xtrain)) +ŷtest = predict(rfm, predict(scalermodel,xtest)) + +rme_train = relative_mean_error(ytrain,ŷtrain) # 0.109 0.109 0.077 +rme_test = relative_mean_error(ytest,ŷtest) # 0.193 0.2114 0.2226 + +# Plotting observed vs estimated... +scatter(ytrain,ŷtrain,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in training period") +scatter(ytest,ŷtest,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in testing period") + +Y_est = @pipe X_full |> Matrix |> predict(scalermodel,_) |> predict(rfm,_) +vol_est = deepcopy(Yraster) + +for i in 1:length(Y_est) + r = X[i,"R"] + c = X[i,"C"] + # println("$r , $c : $(vol_est[r,c]) (true) \t $(Y_est[i]) (est)") + vol_est[c,r] = Y_est[i] +end + +plot(Yraster,title="Actual volumes") +plot(vol_est,title="Model estimated volumes (rf)") + +# ------------------------------------------------------------------------------ +# Manual test with a single class +Xm = copy(X_full) + +Xm.agecl1 .= 1.0 +Xm.agecl2 .= 0.0 +Xm.agecl3 .= 0.0 +Xm.agecl4 .= 0.0 +Xm.agecl5 .= 0.0 +Xm.agecl6 .= 0.0 +Xm.agecl7 .= 0.0 +Xm.agecl8 .= 0.0 + +Ya1 = @pipe Xm |> Matrix |> predict(scalermodel,_) |> predict(nnm,_) +vol_est = deepcopy(Yraster) +for i in 1:length(Y_est) + r = X[i,"R"] + c = X[i,"C"] + # println("$r , $c : $(vol_est[r,c]) (true) \t $(Y_est[i]) (est)") + vol_est[c,r] = Ya1[i] +end + +plot(Yraster,title="Actual volumes") +plot(vol_est,title="Model estimated volumes age cl 1") + +## + +estvol = get_estvol(x_s,nnm,8,8;force=true,data_file="estvol.csv") + +estvol_byreg = DataFrames.combine(groupby(estvol,["ecoreg","spgr","agegr"]), "estvol" => median => "estvol", nrow) + +estvol_byreg[estvol_byreg.ecoreg .== 1 .&& estvol_byreg.spgr .== 2 ,:] + +for spgr in 1:8 + for er in 0:13 + if er == 0 + plot(estvol_byreg[estvol_byreg.ecoreg .== er .&& estvol_byreg.spgr .== spgr ,"estvol"], labels="er $er", title="Fitted volumes spgr $spgr") + elseif er < 13 + plot!(estvol_byreg[estvol_byreg.ecoreg .== er .&& estvol_byreg.spgr .== spgr ,"estvol"], labels="er $er", title="Fitted volumes spgr $spgr") + else + display(plot!(estvol_byreg[estvol_byreg.ecoreg .== er .&& estvol_byreg.spgr .== spgr ,"estvol"], labels="er $er", title="Fitted volumes spgr $spgr")) + end + end +end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..0c36332 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,2 @@ +[deps] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..e5e44bd --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,5 @@ +using Test, MrFOR_resources_init_fr + +out = plusTwo(3) + +@test out == 5 diff --git a/test/temp_file.jl b/test/temp_file.jl new file mode 100644 index 0000000..4f391b4 --- /dev/null +++ b/test/temp_file.jl @@ -0,0 +1,193 @@ +using Resourceload_france + +# We first get our Y raster as everything will be set based on it.. +Yfilename = joinpath(data_path,"vol.tif") +ispath(Yfilename) || Downloads.download("ftp://anonymous@palantir.boku.ac.at//Public/ImprovedForestCharacteristics/Volume/vol.tif",Yfilename) +Yraster = Raster(Yfilename) |> replace_missing +(nC,nR) = size(Yraster) + + +# European coordinates +# - geographical +long_lb, long_ub = -12, 35 +lat_lb, lat_ub = 33, 72 +eu_bounds_geo = Rasters.X(long_lb .. long_ub), Rasters.Y(lat_lb .. lat_ub) +# - projected +crs = "EPSG:3035" # projectd units are meters +X_lb, X_ub = dims(Yraster)[1][1]-8000,dims(Yraster)[1][end]+80000 # 8km margin +Y_lb, Y_ub = dims(Yraster)[2][1]-8000,dims(Yraster)[2][end]+80000 # 8km margin +eu_bounds_proj = Rasters.X(X_lb .. X_ub), Rasters.Y(Y_lb .. Y_ub) + +# use these as: myraster_eu = myraster_w[eu_bounds_geo...] + +# Get the codes from the links in the download page after having filled the form for chemical, physical and other data in the forms below: +# https://esdac.jrc.ec.europa.eu/content/topsoil-physical-properties-europe-based-lucas-topsoil-data +# https://esdac.jrc.ec.europa.eu/content/chemical-properties-european-scale-based-lucas-topsoil-data +# https://esdac.jrc.ec.europa.eu/content/european-soil-database-derived-data +soil_codes = ["wyz_856","60","wyz_856"] +soil_ph_vars = ["Clay","Silt","Sand","CoarseFragments","BulkDensity","TextureUSDA","AWC"] +soil_ph_vars2 = ["Clay","Silt1","Sand1","Coarse_fragments","Bulk_density","textureUSDA","AWC"] # some vars have slighly different name in the zipped file, argh! +soil_chem_vars = ["pH_H2O","pH_CaCl","CEC","Caco3","CN","N","P","K","pH_H2O_ratio_Cacl"] +soil_chem_vars2 = ["pH_H2O","pH_CaCl","CEC","CaCO3","CN","N","P","K","pH_H2O_CaCl"] # some vars have slighly different name in the zipped file, argh! +soil_oth_vars = [ + #"STU_EU_ALLOCATE", + "STU_EU_DEPTH_ROOTS", "STU_EU_T_CLAY", "STU_EU_S_CLAY", "STU_EU_T_SAND", + "STU_EU_S_SAND", "STU_EU_T_SILT", "STU_EU_S_SILT", "STU_EU_T_OC", + "STU_EU_S_OC", "STU_EU_T_BD", "STU_EU_S_BD", "STU_EU_T_GRAVEL", + "STU_EU_S_GRAVEL", "SMU_EU_T_TAWC", "SMU_EU_S_TAWC", "STU_EU_T_TAWC", + "STU_EU_S_TAWC"] + + +# ------------------------------------------------------------------------------ +# Downloading (if needed) and resampling to Y (if needed) + +# The objective of this task is to have the data saved on disk and be ready for +# the analysis at the geo resolution of Y. +# Analysis can also be done on a case-by-case manner. The result is always a file. +# Normally a "force" parameter will dictate if redownload/reanalysis is required +# --------- + +# Forest structure data +forest_struct_vars = get_improved_forest_structure_data(data_path,force=false) # only download, no analysis needed + +# --------- +# dtm +dtm_vars = get_dtm(data_path,force=false, to=Yraster) # resample + +# --------- +# npm +npp_vars = get_past_npp(data_path;force=false,to=Yraster) + +# --------- +# soil +soil_vars = get_soil_data(data_path,soil_codes,soil_ph_vars,soil_ph_vars2,soil_chem_vars,soil_chem_vars2,soil_oth_vars,force=false,to=Yraster) + + +# --------- +# ecoregions +# This data will not be used for training as in our tests it doesn't improve predictions (we used the classes) and it may change with cc if we want to model cc +ecoregions_vars = get_ecoregions_data(data_path,force=false,to=Yraster) +ecoregions = Raster(ecoregions_vars["ecoregions"]) + +Xmeta = OrderedDict(forest_struct_vars...,soil_vars...,dtm_vars...,npp_vars...,) # var name => file path + + + + +# ------------------------------------------------------------------------------ +# Loading transformed data + + +Xnames = collect(keys(Xmeta)) +nXnames = length(Xnames) +Xrasters = OrderedDict{String,Raster}([i => Raster(Xmeta[i]) |> replace_missing for i in Xnames]) + +X = DataFrame([Symbol(Xnames[i]) => Float64[] for i in 1:length(Xnames)]) +X.R = Int64[] +X.C = Int64[] +Y = Float64[] +ecoregion = Union{Int64,Missing}[] + +allowmissing!(X) + +# Reformatting the data in a large matrix (records [pixels] x variables) +for r in 1:nR + for c in 1:nC + ismissing(Yraster[c,r]) && continue + row = Dict([Xnames[i] => Xrasters[Xnames[i]][c,r] for i in 1:nXnames]) + row["R"] = r + row["C"] = c + push!(X,row) + push!(Y,Yraster[c,r]) + push!(ecoregion,ecoregions[c,r]) + end +end + +# ------------------------------------------------------------------------------ +# Separating sdi and stemn for 2 steps predictions + + + +# ------------------------------------------------------------------------------ +# Imputing missing data... +includet("crunchdata.jl") + +#XY = hcat(X,Y) +#XYfull = dropmissing(XY) +#X_full= XYfull[:,1:end-1] +#Y = XYfull[:,end] +X_full = impute_X(data_path,X,force=false) +nXnames = length(names(X_full)) +ecoregion_full = Int64.(fit!(RFImputer(forced_categorical_cols=[nXnames+1]),hcat(Matrix(X_full),ecoregion))[:,nXnames+1]) + +fields_not_to_scale = [(f,i) for (i,f) in enumerate(names(X_full)) if mean(X_full[!,f]) > 0.001 && mean(X_full[!,f]) < 10] + +fields_not_to_scale_names, fields_not_to_scale_idx = getindex.(fields_not_to_scale,1) , getindex.(fields_not_to_scale,2) +scalermodel = Scaler(skip=fields_not_to_scale_idx) +fit!(scalermodel,Matrix(X_full)) + +# ------------------------------------------------------------------------------ +# Running the ML model.... + +# Split the data in training/testing sets +((xtrain,xval,xtest),(ytrain,yval,ytest)) = partition([Matrix(X_full),Y],[0.6,0.2,0.2]) +(ntrain, nval,ntest) = size.([xtrain,xval,xtest],1) +nD = size(X_full,2) +x_s = predict(scalermodel,Matrix(X_full)) +xtrain_s = predict(scalermodel,Matrix(xtrain)) # xtrain scaled +xval_s = predict(scalermodel,Matrix(xtest)) # xtest scaled +xtest_s = predict(scalermodel,Matrix(xtest)) # xtest scaled + + +nnm = get_nn_trained_model(xtrain,ytrain,xval,yval;force=false,model_file="models.jld",maxepochs=50,scmodel =scalermodel) + +plot(info(nnm)["loss_per_epoch"][1:end],title="Loss per epoch", label=nothing) + + +# Obtain predictions and test them against the ground true observations + +ŷtrain = predict(nnm,xtrain_s) +ŷtest = predict(nnm,xtest_s ) + +rme_train = relative_mean_error(ytrain,ŷtrain) # 0.1517 # 0.1384 # 0.165 +rme_test = relative_mean_error(ytest,ŷtest) # 0.1613 # 0.1766 # 0.183 + +# Plotting observed vs estimated... +scatter(ytrain,ŷtrain,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in training period",xrange=[0,1000],yrange=[0,1000]) +scatter(ytest,ŷtest,xlabel="vols (obs)",ylabel="vols (est)",label=nothing,title="Est vs. obs in testing period",xrange=[0,1000],yrange=[0,1000]) + +# ------------------------------------------------------------------------------ +# Creating a raster with the estimated values + +Y_est = @pipe X_full |> Matrix |> predict(scalermodel,_) |> predict(nnm,_) +vol_est = deepcopy(Yraster) + +for i in 1:length(Y_est) + r = convert(Int64,X_full[i,"R"]) + c = Int64(X_full[i,"C"]) + # println("$r , $c : $(vol_est[c,r]) (true) \t $(Y_est[i]) (est)") + vol_est[c,r] = Y_est[i] #missing #Y_est[i] +end + +plot(Yraster,title="Actual volumes") +plot(vol_est,title="Model estimated volumes (NN model)") + +# Only relative to test pixels + +vol_true = deepcopy(Yraster) +vol_est = deepcopy(Yraster) + +# initialisation with missing data +[vol_true[c,r] = missing for r in 1:nR, c in 1:nC] +[vol_est[c,r] = missing for r in 1:nR, c in 1:nC] + + +for i in 1:length(ŷtest) + r = convert(Int64,xtest[i,end-1]) + c = Int64(xtest[i,end]) + vol_true[c,r] = ytest[i] + vol_est[c,r] = ŷtest[i] +end + +plot(vol_true,title="Actual volumes (test pixels)") +plot(vol_est,title="Model estimated volumes (test pixels NN model)") \ No newline at end of file