-
Notifications
You must be signed in to change notification settings - Fork 3
/
randomforest.R
123 lines (100 loc) · 5.88 KB
/
randomforest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
library(rgdal)
library(raster)
library(RStoolbox)
setwd("D:/MedWater_Katrin/")
#load the aoi and buffer it (so that no pixels are lost when masking to aoi)
aoi <- readOGR("ISRAEL_WestBank_Western_Aquifer_Basin/Yarkon_Taninim_clipped.shp")
aoi_buffer <- buffer(aoi, 0.1)
aoi_utm <- readOGR("ISRAEL_WestBank_Western_Aquifer_Basin/Yarkon_Taninim_clipped_UTM_WGS_36N.shp")
#list all available swi files and extract date information from file name
swi_data <- data.frame(file = list.files("SWI_Daten/M0056793", pattern = ".nc$", recursive = TRUE, full.names = TRUE), stringsAsFactors = FALSE)
swi_data$date <- substr(swi_data$file, nchar(swi_data$file)-33, nchar(swi_data$file)-26)
#filter swi data: only files from 2016
swi_data <- swi_data[grepl("2016", swi_data$file),]
#list all available s1k0 files and extract date information from file name
s1k0_data <- data.frame(file = list.files("S_1_K0", pattern = glob2rx("*db*.tif$"), recursive = TRUE, full.names = TRUE), stringsAsFactors = FALSE)
s1k0_data$date <- lapply(s1k0_data$file, FUN = function(x){substr(strsplit(x, "/")[[1]][2],1,8)})
#filter s1k0 data: only files with fixed extent
s1k0_data <- s1k0_data[grepl("ext_fix", s1k0_data$file),]
#merge file information --> only dates left for which both data is available
merged_data <- merge(swi_data, s1k0_data, by = "date")
#load additional data
aspect <- brick("S_1_K0/additional_data/aspect_20_resample.tif")
convergence <- brick("S_1_K0/additional_data/convergence_index_20_resample.tif")
elevation <- brick("S_1_K0/additional_data/elevation_20_resample.tif")
slope <- brick("S_1_K0/additional_data/slope_deg_20_resample.tif")
twi <- brick("S_1_K0/additional_data/twi_20_resample.tif")
ndvi <- brick("S_1_K0/additional_data/NDVI_mean_and_stdev.tif")
names(ndvi) <- c("ndvi_mean","ndvi_sd")
ndwi <- brick("S_1_K0/additional_data/NDWI_mean_and_stdev.tif")
names(ndwi) <- c("nwdi_mean", "ndwi_sd")
#stack all additional data and remove single layers from memory
data_stack <- stack(aspect, convergence, elevation, slope, twi, ndvi, ndwi)
names(data_stack) <- c("aspect", "convergence", "elevation", "slope", "twi", "ndvi_mean", "ndvi_sd", "ndwi_mean", "ndwi_sd")
rm(aspect, convergence, elevation, slope, twi,ndvi, ndwi)
#create a template with size/extent of swi files
swi_template <- brick(merged_data$file.x[1], varname = "SWI_001")
proj4string(swi_template)<- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
swi_template <- crop(swi_template,aoi_buffer)
swi_template <- mask(swi_template, aoi_buffer)
swi_template <- projectRaster(swi_template, crs =("+proj=utm +zone=36 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"))
swi_template <- mask(swi_template, aoi_utm)
swi_template <- resample(swi_template, data_stack, method = "ngb")
names(swi_template) <- "swi"
#create spatial points dataframe (all pixels of swi with data = 1 point) and randomly select 0.1% of the points
example_points <- rasterToPoints(swi_template, spatial = TRUE)
example_points <- example_points[sample(1:nrow(example_points), 0.001*nrow(example_points)),]
#--> these points are used for training/validation of the random forest for all soil depths
#--> data is saved in randomforest.RData
#vector of all soil depths
varnames <- c("SWI_001", "SWI_005","SWI_010","SWI_015","SWI_020","SWI_040", "SWI_060", "SWI_100")
#varnames <- c("SWI_040")
#v <- varnames[1]
#do the following for all soil depths
for(v in varnames){
print(v)
#create dataframe where all results are saved (rmse and r² of random forest and variable importance of all variables)
model_output <- data.frame(date = merged_data$date, rmse_rf = NA, R2_rf = NA,
s1_k0_importance = NA, aspect_importance = NA, convergence_importance = NA,
elevation_importance = NA, slope_importance = NA, twi_importance = NA,
ndvi_mean_importance = NA, ndvi_sd_importance = NA, ndwi_importance = NA,
ndwi_sd_importance = NA)
#i = 1
#for each date do the following
for(i in c(1:nrow(merged_data))){
print(i)
#load the swi layer of the specific date
s1 <- brick(merged_data$file.y[i])
names(s1) <- "s1_k0"
#load the swi layer and mask and resample it to the aoi/s1k0
swi <- brick(merged_data$file.x[i], varname = v)
proj4string(swi)<- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
swi <- crop(swi,aoi_buffer)
swi <- mask(swi, aoi_buffer)
swi <- projectRaster(swi, crs =("+proj=utm +zone=36 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"))
swi <- mask(swi, aoi_utm)
swi <- resample(swi, s1, method = "ngb")
names(swi) <- "swi"
#stack all predictor variable
classification_stack <- stack(s1, data_stack)
#extract the values of the swi values at the example points
trainvals <- data.frame(extract(swi, example_points))
names(trainvals) <- "swi"
traindata <- example_points
traindata@data <- trainvals
#create the random forest
rf1 <- superClass(img = classification_stack, trainData = traindata, trainPartition = 0.6, responseCol = "swi",
model = "rf", mode = "regression", predict = TRUE)
#save the prediction map
writeRaster(rf1$map, paste0("S_1_K0/RandomForest/rf_prediction_", v, "_", merged_data$date[i], ".tif"), overwrite = TRUE)
#extract the rmse, r² and variable importances of the random forest and the vairables
rmse <- rf1$validation$performance$RMSE
r2 <- rf1$validation$performance$Rsquared
importances <- c(rf1$model$finalModel$importance)
#save the parameters in the output dataframe
model_output[i, c(2:ncol(model_output))] <- c(rmse, r2, importances)
rm(s1, swi, classification_stack, trainvals, traindata, rmse, r2, importances, rf1)
}
#save the output dataframe
write.csv(model_output, paste0("S_1_K0/RandomForest/model_output_", v, ".csv"))
}