From db03e0e9960b033fe958681300e3398ddaa8f747 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 16:51:28 +0530 Subject: [PATCH 01/10] Add batch normalization to CNN model - Inserted batch normalization layers after convolutional and dense layers - Aims to improve model stability and performance - May help reduce internal covariate shift and allow for higher learning rates --- modules/assim.sequential/R/downscale_function.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 30280dab8d..2e0eb13924 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -146,8 +146,10 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ for (i in seq_along(carbon_data)) { model <- keras3::keras_model_sequential() |> keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |> + keras3::layer_batch_normalization() |> keras3::layer_flatten() |> keras3::layer_dense(units = 64, activation = 'relu') |> + keras3::layer_batch_normalization() |> keras3::layer_dense(units = 1) model |> keras3::compile( From 499ee2d0d3644661535fee767e9e99d673079e46 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 17:01:38 +0530 Subject: [PATCH 02/10] Add dropout layers to CNN model - Inserted dropout layers after batch normalization in convolutional and dense layers - Set dropout rate to 0.3 - Aims to reduce overfitting and improve generalization - May enhance model robustness and performance on unseen data --- modules/assim.sequential/R/downscale_function.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 2e0eb13924..9c26343e72 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -147,9 +147,11 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ model <- keras3::keras_model_sequential() |> keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |> keras3::layer_batch_normalization() |> + keras3::layer_dropout(rate = 0.3) |> keras3::layer_flatten() |> keras3::layer_dense(units = 64, activation = 'relu') |> keras3::layer_batch_normalization() |> + keras3::layer_dropout(rate = 0.3) |> keras3::layer_dense(units = 1) model |> keras3::compile( From cabc159ec364f7b26907734d6dc94f7803179ca7 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 17:05:28 +0530 Subject: [PATCH 03/10] Add exponential decay learning rate scheduler - Implemented learning rate scheduler using exponential decay - Initial learning rate set to 0.001 - Decay steps: 1000, decay rate: 0.9 - Aims to improve training stability and convergence - May help fine-tune model performance over training epochs --- modules/assim.sequential/R/downscale_function.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 9c26343e72..d56ce02cfe 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -154,9 +154,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ keras3::layer_dropout(rate = 0.3) |> keras3::layer_dense(units = 1) + # Learning rate scheduler + lr_schedule <- keras3::learning_rate_schedule_exponential_decay( + initial_learning_rate = 0.001, + decay_steps = 1000, + decay_rate = 0.9 + ) + model |> keras3::compile( loss = 'mean_squared_error', - optimizer = keras3::optimizer_adam(), + optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), metrics = c('mean_absolute_error') ) From b9c5d5337c4a6b7fba374931170a2326ec2bf74e Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 17:08:02 +0530 Subject: [PATCH 04/10] Implement early stopping in CNN model - Added early stopping callback - Monitor: validation loss - Patience: 10 epochs - Restore best weights: True - Aims to prevent overfitting and optimize training duration - May improve model generalization and reduce unnecessary computation --- modules/assim.sequential/R/downscale_function.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index d56ce02cfe..c49061f0c0 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -166,6 +166,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), metrics = c('mean_absolute_error') ) + + # Early stopping callback + early_stopping <- keras3::callback_early_stopping( + monitor = 'val_loss', + patience = 10, + restore_best_weights = TRUE + ) model |> keras3::fit( x = x_train, @@ -173,6 +180,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ epochs = 100, batch_size = 32, validation_split = 0.2, + callbacks = list(early_stopping), verbose = 0 ) From ce26566190ff69ed628f3033744e0649b01a7ba4 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 17:10:25 +0530 Subject: [PATCH 05/10] Increase maximum number of epochs in CNN model - Raised max epochs from 100 to 500 - Allows for potentially longer training time - Aims to give model more opportunity to learn complex patterns - Works in conjunction with early stopping for optimal training duration - May lead to improved model performance and accuracy --- modules/assim.sequential/R/downscale_function.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index c49061f0c0..5212724cc9 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -177,7 +177,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ model |> keras3::fit( x = x_train, y = y_train[, i], - epochs = 100, + epochs = 500, # Increased max epochs batch_size = 32, validation_split = 0.2, callbacks = list(early_stopping), From ac0461d14a7accc8afdd1acb842f49846e0abe1d Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sat, 10 Aug 2024 17:12:29 +0530 Subject: [PATCH 06/10] Final refactor made some final refactoring changes to keep the code standardised and even . --- modules/assim.sequential/R/downscale_function.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 5212724cc9..2cd7f28221 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -166,7 +166,7 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), metrics = c('mean_absolute_error') ) - + # Early stopping callback early_stopping <- keras3::callback_early_stopping( monitor = 'val_loss', From d2069e0e36ab4b8a2604ab9629dae89dc4c06b2f Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sun, 11 Aug 2024 13:11:36 +0530 Subject: [PATCH 07/10] Added comments to architecture added short and concise comments to the architecture in attempt to make the choice and the reasoning of the architecture design of the model self explanatory . --- modules/assim.sequential/R/downscale_function.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 2cd7f28221..64661b88d9 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -140,18 +140,28 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions[[i]] <- stats::predict(models[[i]], test_data) } } else if (model_type == "cnn") { + # Reshape input data for CNN x_train <- keras3::array_reshape(x_train, c(nrow(x_train), 1, ncol(x_train))) x_test <- keras3::array_reshape(x_test, c(nrow(x_test), 1, ncol(x_test))) for (i in seq_along(carbon_data)) { + # Define the CNN model architecture model <- keras3::keras_model_sequential() |> + # 1D Convolutional layer: Extracts local features from input data keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |> + # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift keras3::layer_batch_normalization() |> + # Dropout: Randomly sets 30% of inputs to 0, reducing overfitting and improving generalization keras3::layer_dropout(rate = 0.3) |> + # Flatten: Converts 3D output to 1D for dense layer input keras3::layer_flatten() |> + # Dense layer: Learns complex combinations of features keras3::layer_dense(units = 64, activation = 'relu') |> + # Second batch normalization: Further stabilizes learning in deeper layers keras3::layer_batch_normalization() |> + # Second dropout: Additional regularization to prevent overfitting in final layers keras3::layer_dropout(rate = 0.3) |> + # Output layer: Single neuron for regression prediction keras3::layer_dense(units = 1) # Learning rate scheduler From e2cee084281f0f9d981d5ba125305c4a90cdccef Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sun, 11 Aug 2024 13:23:46 +0530 Subject: [PATCH 08/10] comment for CNN predictions added comments over the cnn prediction snippet --- modules/assim.sequential/R/downscale_function.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 64661b88d9..777d3566b4 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -195,7 +195,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ ) models[[i]] <- model - + + #CNN predictions cnn_predict <- function(model, newdata, scaling_params) { newdata <- scale(newdata, center = scaling_params$mean, scale = scaling_params$sd) newdata <- keras3::array_reshape(newdata, c(nrow(newdata), 1, ncol(newdata))) From ba4196d8efa9144976be3795f6efc8983fe0a196 Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sun, 11 Aug 2024 13:26:24 +0530 Subject: [PATCH 09/10] More code code comments added more comments for clearity --- modules/assim.sequential/R/downscale_function.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index 777d3566b4..d4974bab38 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -146,12 +146,13 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ for (i in seq_along(carbon_data)) { # Define the CNN model architecture + # Used dual batch normalization and dropout as the first set of batch normalization and dropout operates on the lower-level features extracted by the convolutional layer, the second set works on the higher-level features learned by the dense layer. model <- keras3::keras_model_sequential() |> # 1D Convolutional layer: Extracts local features from input data keras3::layer_conv_1d(filters = 64, kernel_size = 1, activation = 'relu', input_shape = c(1, length(covariate_names))) |> # Batch normalization: Normalizes layer inputs, stabilizes learning, reduces internal covariate shift keras3::layer_batch_normalization() |> - # Dropout: Randomly sets 30% of inputs to 0, reducing overfitting and improving generalization + # Dropout: Randomly sets some of inputs to 0, reducing overfitting and improving generalization keras3::layer_dropout(rate = 0.3) |> # Flatten: Converts 3D output to 1D for dense layer input keras3::layer_flatten() |> From 8fdb1d5444ba4d4ac66ea3a5f9ee58325f394a4a Mon Sep 17 00:00:00 2001 From: Sambhav Dixit <94298612+sambhavnoobcoder@users.noreply.github.com> Date: Sun, 11 Aug 2024 13:29:19 +0530 Subject: [PATCH 10/10] yet more comments this should resolve all the places comments should occur in the CNN . all the code snippets are successfully covered now . --- modules/assim.sequential/R/downscale_function.R | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R index d4974bab38..27645f9dd7 100644 --- a/modules/assim.sequential/R/downscale_function.R +++ b/modules/assim.sequential/R/downscale_function.R @@ -171,7 +171,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ decay_steps = 1000, decay_rate = 0.9 ) - + + # Compile the model model |> keras3::compile( loss = 'mean_squared_error', optimizer = keras3::optimizer_adam(learning_rate = lr_schedule), @@ -184,7 +185,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ patience = 10, restore_best_weights = TRUE ) - + + # Train the model model |> keras3::fit( x = x_train, y = y_train[, i], @@ -194,7 +196,8 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ callbacks = list(early_stopping), verbose = 0 ) - + + # Store the trained model models[[i]] <- model #CNN predictions @@ -204,12 +207,16 @@ SDA_downscale <- function(preprocessed, date, carbon_pool, covariates, model_typ predictions <- stats::predict(model, newdata) return(as.vector(predictions)) } - + + # Create a prediction raster from covariates prediction_rast <- terra::rast(covariates) + + # Generate spatial predictions using the trained model maps[[i]] <- terra::predict(prediction_rast, model = models[[i]], fun = cnn_predict, scaling_params = scaling_params) - + + # Make predictions on held-out test data predictions[[i]] <- cnn_predict(models[[i]], x_data[-sample, ], scaling_params) } } else {