Skip to content

Commit

Permalink
Merge pull request #118 from IDEMSInternational/LC_changes
Browse files Browse the repository at this point in the history
export to bucket to handle multiple stations
  • Loading branch information
lilyclements authored May 22, 2024
2 parents 8a275c7 + f621bf6 commit 2f54cb9
Show file tree
Hide file tree
Showing 12 changed files with 131 additions and 123 deletions.
191 changes: 97 additions & 94 deletions R/annual_rainfall_summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,21 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
# do the summaries exist already?
get_summaries <- epicsadata::get_summaries_data(country, station_id, summary = "annual_rainfall_summaries")
summary_data <- get_summaries[[1]]
timestamp <- get_summaries[[2]]
# what if the definitions is different? Have an override option.
# if the summary data exists, and if you do not want to override it then:
if (nrow(summary_data) > 0 & override == FALSE) {
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = get_summaries[[2]])
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = timestamp)
if (nrow(file_name) == 0) {
list_return[[1]] <- (definitions(country, station_id, summaries = summaries))
} else {
list_return[[1]] <- (definitions(country, station_id, summaries = summaries, paste0(station_id, ".", get_summaries[[2]])))
list_return[[1]] <- (definitions(country, station_id, summaries = summaries, paste0(station_id, ".", timestamp)))
}
} else {
# Get data definitions and summary definitions
definitions <- definitions(country = country, station_id = station_id, summaries = summaries)
if (!is.null(timestamp)) file_name <- paste0(station_id, ".", timestamp)
else file_name <- station_id
definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = summaries, file = file_name)
definitions_season <- NULL

# Check if all elements in summaries are present in definitions
Expand All @@ -56,7 +59,7 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
summaries <- c(summaries, paste0("end_", def_end_type))

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = paste0("end_", def_end_type))
definitions_2 <- definitions(country = country, station_id = station_id, summaries = paste0("end_", def_end_type), file = file_name)

# if there's no definitions file, then set the end type to be the other one and check for definitions file
if (length(definitions_2) == 0){
Expand Down Expand Up @@ -84,101 +87,101 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
} else {
definitions[[i]]$end_type <- "rains"
}
#great, we have at least one given so we're happy.
# set end_type as season by default
} else { # if no end type is specified, and no summaries are specified
# specified in the code asked for (if both, then end_seasons as per Roger's recommendation)
definitions_season <- definitions(country = "zm", station_id = "1", summaries = c("end_rains", "end_season"))
if ("end_season" %in% names(definitions_season)){
definitions[[i]]$end_type <- "season"
} else if ("end_rains" %in% names(definitions_season)){
definitions[[i]]$end_type <- "rains"
} else {
stop("Cannot calculate seasonal_rain without end_rains or end_season in definitions file.")
#great, we have at least one given so we're happy.
# set end_type as season by default
} else { # if no end type is specified, and no summaries are specified
# specified in the code asked for (if both, then end_seasons as per Roger's recommendation)
definitions_season <- definitions(country = "zm", station_id = "1", summaries = c("end_rains", "end_season"))
if ("end_season" %in% names(definitions_season)){
definitions[[i]]$end_type <- "season"
} else if ("end_rains" %in% names(definitions_season)){
definitions[[i]]$end_type <- "rains"
} else {
stop("Cannot calculate seasonal_rain without end_rains or end_season in definitions file.")
}
}
}
}
}
}

# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)
# For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables
data_names <- epicsadata::data_definitions(names(daily), TRUE)
daily <- check_and_rename_variables(daily, data_names)

# Check if start_rains and end_rains are required for seasonal_rain and seasonal_length
if (any(grepl("seasonal_", summaries))){
if (!"start_rains" %in% summaries){
summaries <- c(summaries, "start_rains")

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = "start_rains")

# if there's no definitions file, throw error
if (length(definitions_2) == 0){
stop(paste0("Cannot calculate seasonal summaries without start_rains in definitions file."))
} else {
definitions <- c(definitions, definitions_2)
# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)
# For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables
data_names <- epicsadata::data_definitions(names(daily), TRUE)
daily <- check_and_rename_variables(daily, data_names)

# Check if start_rains and end_rains are required for seasonal_rain and seasonal_length
if (any(grepl("seasonal_", summaries))){
if (!"start_rains" %in% summaries){
summaries <- c(summaries, "start_rains")

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = "start_rains")

# if there's no definitions file, throw error
if (length(definitions_2) == 0){
stop(paste0("Cannot calculate seasonal summaries without start_rains in definitions file."))
} else {
definitions <- c(definitions, definitions_2)
}
}
}
require_end_rains <- any(grepl("seasonal_", summaries)) & (any(grepl("end_", summaries)))
# run the checks to create this above, so we should never have this as false

summary_data <- NULL

# Calculate summaries ==================================================================
if ("start_rains" %in% summaries) {
start_rains <- annual_rainfall_start_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, start_rains)
summary_data$start_rains_doy <- as.integer(summary_data$start_rains_doy)
}

if ("end_rains" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_rains$s_start_doy <- definitions$start_rains$s_start_doy
end_rains <- annual_rainfall_end_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_rains)
summary_data$end_rains_doy <- as.integer(summary_data$end_rains_doy)
}

if ("end_season" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_season$s_start_doy <- definitions$start_rains$s_start_doy
end_season <- annual_rainfall_end_season(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_season)
summary_data$end_season_doy <- as.integer(summary_data$end_season_doy)
}

if ("seasonal_rain" %in% summaries) {
season_rain <- annual_rainfall_seasonal_rain(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if ("seasonal_length" %in% summaries) {
season_rain <- annual_rainfall_seasonal_length(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if (!is.null(definitions$start_rains$s_start_doy) | !is.null(definitions$end_season$s_start_doy) | !is.null(definitions$end_rains$s_start_doy)){
summary_data$year <- factor(sub("-.*", "", summary_data$year))
}

if ("annual_rain" %in% summaries) {
annual_rain <- annual_rainfall_annual_rain(definitions, daily, data_names)
annual_rain$year <- factor(annual_rain$year)
summary_data <- join_null_data(summary_data, annual_rain)
}

names_definitions <- unique(names(definitions))
definitions <- unique(definitions)
names(definitions) <- names_definitions

summary_data <- summary_data %>%
dplyr::mutate(dplyr::across(dplyr::ends_with("_date"), ~as.character(.))) %>%
dplyr::filter(year %in% unique(daily[[data_names$year]]))
list_return[[1]] <- definitions
}
require_end_rains <- any(grepl("seasonal_", summaries)) & (any(grepl("end_", summaries)))
# run the checks to create this above, so we should never have this as false

summary_data <- NULL

# Calculate summaries ==================================================================
if ("start_rains" %in% summaries) {
start_rains <- annual_rainfall_start_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, start_rains)
summary_data$start_rains_doy <- as.integer(summary_data$start_rains_doy)
}

if ("end_rains" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_rains$s_start_doy <- definitions$start_rains$s_start_doy
end_rains <- annual_rainfall_end_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_rains)
summary_data$end_rains_doy <- as.integer(summary_data$end_rains_doy)
}

if ("end_season" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_season$s_start_doy <- definitions$start_rains$s_start_doy
end_season <- annual_rainfall_end_season(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_season)
summary_data$end_season_doy <- as.integer(summary_data$end_season_doy)
}

if ("seasonal_rain" %in% summaries) {
season_rain <- annual_rainfall_seasonal_rain(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if ("seasonal_length" %in% summaries) {
season_rain <- annual_rainfall_seasonal_length(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if (!is.null(definitions$start_rains$s_start_doy) | !is.null(definitions$end_season$s_start_doy) | !is.null(definitions$end_rains$s_start_doy)){
summary_data$year <- factor(sub("-.*", "", summary_data$year))
}

if ("annual_rain" %in% summaries) {
annual_rain <- annual_rainfall_annual_rain(definitions, daily, data_names)
annual_rain$year <- factor(annual_rain$year)
summary_data <- join_null_data(summary_data, annual_rain)
}

names_definitions <- unique(names(definitions))
definitions <- unique(definitions)
names(definitions) <- names_definitions

summary_data <- summary_data %>%
dplyr::mutate(dplyr::across(dplyr::ends_with("_date"), ~as.character(.))) %>%
dplyr::filter(year %in% unique(daily[[data_names$year]]))
list_return[[1]] <- definitions
}
# rename
list_return[[2]] <- summary_data
return(list_return)
}
# rename
list_return[[2]] <- summary_data
return(list_return)
}
1 change: 1 addition & 0 deletions R/crop_success_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ crop_success_probabilities <- function(country,
}
} else {
definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = "crops_success")

# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)

Expand Down
21 changes: 11 additions & 10 deletions R/export_r_instat_to_bucket.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' @param month The month data.
#' @param summaries A character vector specifying the types of summaries to include.
#' @param country `character(1)` The country code of the data.
#' @param station_id `character` The id's of the stations to analyze. Either a
#' @param station_id `character` The id's of the stations to analyse. Either a
#' single value or a vector.
#' @param include_summary_data Logical indicating whether to include summary data in the export.
#' @param annual_rainfall_data Annual rainfall summary data.
Expand Down Expand Up @@ -45,19 +45,20 @@ export_r_instat_to_bucket <- function(data, data_by_year, data_by_year_month = N

definitions_data <- epicsadata::collate_definitions_data(data = data, data_by_year = data_by_year, data_by_year_month = data_by_year_month, crop_data = crop_data_name, rain = rain, tmin = tmin, tmax = tmax, year = year, month = month, summaries = summaries)
# Save into bucket
add_definitions_to_bucket(country = country, station_id = station_id, new_definitions = definitions_data, timestamp = timestamp)
purrr::map(.x = station_id,
.f = ~add_definitions_to_bucket(country = country, station_id = .x, new_definitions = definitions_data, timestamp = timestamp))

if (include_summary_data){
# function to read summary data from R-Instat into summaries in buckets
if ("annual_rainfall" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = annual_rainfall_data, summary = "annual_rainfall_summaries", timestamp = timestamp)

if ("annual_temperature" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = annual_temperature_data, summary = "annual_temperature_summaries", timestamp = timestamp)

if ("monthly_temperature" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = monthly_temperature_data, summary = "monthly_temperature_summaries", timestamp = timestamp)
if ("annual_rainfall" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = annual_rainfall_data, summary = "annual_rainfall_summaries", timestamp = timestamp))

if ("crop_success" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = crop_success_data, summary = "crop_success_probabilities", timestamp = timestamp)

if ("start_season" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = season_start_data, summary = "season_start_probabilities", timestamp = timestamp)
if ("annual_temperature" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = annual_temperature_data, summary = "annual_temperature_summaries", timestamp = timestamp))

if ("monthly_temperature" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = monthly_temperature_data, summary = "monthly_temperature_summaries", timestamp = timestamp))

if ("crop_success" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = crop_success_data, summary = "crop_success_probabilities", timestamp = timestamp))

if ("start_season" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = season_start_data, summary = "season_start_probabilities", timestamp = timestamp))
}
return("Uploaded to Bucket")
}
11 changes: 7 additions & 4 deletions R/season_start_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,21 @@ season_start_probabilities <- function(country,
# do the summaries exist already?
get_summaries <- epicsadata::get_summaries_data(country, station_id, summary = "season_start_probabilities")
summary_data <- get_summaries[[1]]
timestamp <- get_summaries[[2]]
# what if the definitions is different? Have an override option.
# if the summary data exists, and if you do not want to override it then:
if (nrow(summary_data) > 0 & override == FALSE) {
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = get_summaries[[2]])
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = timestamp)
if (nrow(file_name) == 0) {
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities"))
} else {
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities", paste0(station_id, ".", get_summaries[[2]])))
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities", paste0(station_id, ".", timestamp)))
}
} else {

definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = "season_start_probabilities")
if (!is.null(timestamp)) file_name <- paste0(station_id, ".", timestamp)
else file_name <- station_id
definitions <- epicsawrap::definitions(country = country, station_id = station_id,
summaries = "season_start_probabilities", file = file_name)
# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)

Expand Down
2 changes: 1 addition & 1 deletion man/export_r_instat_to_bucket.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-annual_rainfall_summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ library(testthat)
# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/test_1_annual_rainfall_summaries.rds")
country <- "zm"
station_id <- "test_1"
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are calculated", {
result <- suppressWarnings(annual_rainfall_summaries(country, station_id, override = TRUE))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-crop_success_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ library(testthat)
# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/crop_success_probabilities_test_1.rds")
country <- "zm"
station_id <- "test_1"
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are called", {
result <- suppressWarnings(crop_success_probabilities(country, station_id))
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-extremes_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ test_that("Correct summaries are called", {
expect_equal(nrow(result), nrow(test_1_results))
})

test_that("Correct summaries are calculated", {
result <- suppressWarnings(extremes_summaries(country, station_id, override = TRUE)[[2]])
expect_equal(nrow(result), nrow(test_1_results))
})
# test_that("Correct summaries are calculated", {
# result <- suppressWarnings(extremes_summaries(country, station_id, override = TRUE)[[2]])
# expect_equal(nrow(result), nrow(test_1_results))
# })
6 changes: 3 additions & 3 deletions tests/testthat/test-season_start_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ library(testthat)

# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/season_start_probabilities_test_1.rds")
country <- "zm"
station_id <- "test_1"
test_1_results <- readRDS("testdata/season_start_probabilities_r_data_test_1.rds")
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are called", {
result <- suppressWarnings(season_start_probabilities(country, station_id))
Expand Down
Loading

0 comments on commit 2f54cb9

Please sign in to comment.