Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

export to bucket to handle multiple stations #118

Merged
merged 7 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 97 additions & 94 deletions R/annual_rainfall_summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,21 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
# do the summaries exist already?
get_summaries <- epicsadata::get_summaries_data(country, station_id, summary = "annual_rainfall_summaries")
summary_data <- get_summaries[[1]]
timestamp <- get_summaries[[2]]
# what if the definitions is different? Have an override option.
# if the summary data exists, and if you do not want to override it then:
if (nrow(summary_data) > 0 & override == FALSE) {
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = get_summaries[[2]])
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = timestamp)
if (nrow(file_name) == 0) {
list_return[[1]] <- (definitions(country, station_id, summaries = summaries))
} else {
list_return[[1]] <- (definitions(country, station_id, summaries = summaries, paste0(station_id, ".", get_summaries[[2]])))
list_return[[1]] <- (definitions(country, station_id, summaries = summaries, paste0(station_id, ".", timestamp)))
}
} else {
# Get data definitions and summary definitions
definitions <- definitions(country = country, station_id = station_id, summaries = summaries)
if (!is.null(timestamp)) file_name <- paste0(station_id, ".", timestamp)
else file_name <- station_id
definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = summaries, file = file_name)
definitions_season <- NULL

# Check if all elements in summaries are present in definitions
Expand All @@ -56,7 +59,7 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
summaries <- c(summaries, paste0("end_", def_end_type))

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = paste0("end_", def_end_type))
definitions_2 <- definitions(country = country, station_id = station_id, summaries = paste0("end_", def_end_type), file = file_name)

# if there's no definitions file, then set the end type to be the other one and check for definitions file
if (length(definitions_2) == 0){
Expand Down Expand Up @@ -84,101 +87,101 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual
} else {
definitions[[i]]$end_type <- "rains"
}
#great, we have at least one given so we're happy.
# set end_type as season by default
} else { # if no end type is specified, and no summaries are specified
# specified in the code asked for (if both, then end_seasons as per Roger's recommendation)
definitions_season <- definitions(country = "zm", station_id = "1", summaries = c("end_rains", "end_season"))
if ("end_season" %in% names(definitions_season)){
definitions[[i]]$end_type <- "season"
} else if ("end_rains" %in% names(definitions_season)){
definitions[[i]]$end_type <- "rains"
} else {
stop("Cannot calculate seasonal_rain without end_rains or end_season in definitions file.")
#great, we have at least one given so we're happy.
# set end_type as season by default
} else { # if no end type is specified, and no summaries are specified
# specified in the code asked for (if both, then end_seasons as per Roger's recommendation)
definitions_season <- definitions(country = "zm", station_id = "1", summaries = c("end_rains", "end_season"))
if ("end_season" %in% names(definitions_season)){
definitions[[i]]$end_type <- "season"
} else if ("end_rains" %in% names(definitions_season)){
definitions[[i]]$end_type <- "rains"
} else {
stop("Cannot calculate seasonal_rain without end_rains or end_season in definitions file.")
}
}
}
}
}
}

# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)
# For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables
data_names <- epicsadata::data_definitions(names(daily), TRUE)
daily <- check_and_rename_variables(daily, data_names)

# Check if start_rains and end_rains are required for seasonal_rain and seasonal_length
if (any(grepl("seasonal_", summaries))){
if (!"start_rains" %in% summaries){
summaries <- c(summaries, "start_rains")

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = "start_rains")

# if there's no definitions file, throw error
if (length(definitions_2) == 0){
stop(paste0("Cannot calculate seasonal summaries without start_rains in definitions file."))
} else {
definitions <- c(definitions, definitions_2)
# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)
# For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables
data_names <- epicsadata::data_definitions(names(daily), TRUE)
daily <- check_and_rename_variables(daily, data_names)

# Check if start_rains and end_rains are required for seasonal_rain and seasonal_length
if (any(grepl("seasonal_", summaries))){
if (!"start_rains" %in% summaries){
summaries <- c(summaries, "start_rains")

# checking we have a definitions file
definitions_2 <- definitions(country = country, station_id = station_id, summaries = "start_rains")

# if there's no definitions file, throw error
if (length(definitions_2) == 0){
stop(paste0("Cannot calculate seasonal summaries without start_rains in definitions file."))
} else {
definitions <- c(definitions, definitions_2)
}
}
}
require_end_rains <- any(grepl("seasonal_", summaries)) & (any(grepl("end_", summaries)))
# run the checks to create this above, so we should never have this as false

summary_data <- NULL

# Calculate summaries ==================================================================
if ("start_rains" %in% summaries) {
start_rains <- annual_rainfall_start_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, start_rains)
summary_data$start_rains_doy <- as.integer(summary_data$start_rains_doy)
}

if ("end_rains" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_rains$s_start_doy <- definitions$start_rains$s_start_doy
end_rains <- annual_rainfall_end_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_rains)
summary_data$end_rains_doy <- as.integer(summary_data$end_rains_doy)
}

if ("end_season" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_season$s_start_doy <- definitions$start_rains$s_start_doy
end_season <- annual_rainfall_end_season(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_season)
summary_data$end_season_doy <- as.integer(summary_data$end_season_doy)
}

if ("seasonal_rain" %in% summaries) {
season_rain <- annual_rainfall_seasonal_rain(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if ("seasonal_length" %in% summaries) {
season_rain <- annual_rainfall_seasonal_length(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if (!is.null(definitions$start_rains$s_start_doy) | !is.null(definitions$end_season$s_start_doy) | !is.null(definitions$end_rains$s_start_doy)){
summary_data$year <- factor(sub("-.*", "", summary_data$year))
}

if ("annual_rain" %in% summaries) {
annual_rain <- annual_rainfall_annual_rain(definitions, daily, data_names)
annual_rain$year <- factor(annual_rain$year)
summary_data <- join_null_data(summary_data, annual_rain)
}

names_definitions <- unique(names(definitions))
definitions <- unique(definitions)
names(definitions) <- names_definitions

summary_data <- summary_data %>%
dplyr::mutate(dplyr::across(dplyr::ends_with("_date"), ~as.character(.))) %>%
dplyr::filter(year %in% unique(daily[[data_names$year]]))
list_return[[1]] <- definitions
}
require_end_rains <- any(grepl("seasonal_", summaries)) & (any(grepl("end_", summaries)))
# run the checks to create this above, so we should never have this as false

summary_data <- NULL

# Calculate summaries ==================================================================
if ("start_rains" %in% summaries) {
start_rains <- annual_rainfall_start_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, start_rains)
summary_data$start_rains_doy <- as.integer(summary_data$start_rains_doy)
}

if ("end_rains" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_rains$s_start_doy <- definitions$start_rains$s_start_doy
end_rains <- annual_rainfall_end_rains(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_rains)
summary_data$end_rains_doy <- as.integer(summary_data$end_rains_doy)
}

if ("end_season" %in% summaries) {
if (!is.null(definitions$start_rains$s_start_doy)) definitions$end_season$s_start_doy <- definitions$start_rains$s_start_doy
end_season <- annual_rainfall_end_season(definitions, daily, data_names)
summary_data <- join_null_data(summary_data, end_season)
summary_data$end_season_doy <- as.integer(summary_data$end_season_doy)
}

if ("seasonal_rain" %in% summaries) {
season_rain <- annual_rainfall_seasonal_rain(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if ("seasonal_length" %in% summaries) {
season_rain <- annual_rainfall_seasonal_length(definitions, daily, summary_data, data_names, summaries)
summary_data <- dplyr::full_join(summary_data, season_rain)
}

if (!is.null(definitions$start_rains$s_start_doy) | !is.null(definitions$end_season$s_start_doy) | !is.null(definitions$end_rains$s_start_doy)){
summary_data$year <- factor(sub("-.*", "", summary_data$year))
}

if ("annual_rain" %in% summaries) {
annual_rain <- annual_rainfall_annual_rain(definitions, daily, data_names)
annual_rain$year <- factor(annual_rain$year)
summary_data <- join_null_data(summary_data, annual_rain)
}

names_definitions <- unique(names(definitions))
definitions <- unique(definitions)
names(definitions) <- names_definitions

summary_data <- summary_data %>%
dplyr::mutate(dplyr::across(dplyr::ends_with("_date"), ~as.character(.))) %>%
dplyr::filter(year %in% unique(daily[[data_names$year]]))
list_return[[1]] <- definitions
}
# rename
list_return[[2]] <- summary_data
return(list_return)
}
# rename
list_return[[2]] <- summary_data
return(list_return)
}
1 change: 1 addition & 0 deletions R/crop_success_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ crop_success_probabilities <- function(country,
}
} else {
definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = "crops_success")

# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)

Expand Down
21 changes: 11 additions & 10 deletions R/export_r_instat_to_bucket.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' @param month The month data.
#' @param summaries A character vector specifying the types of summaries to include.
#' @param country `character(1)` The country code of the data.
#' @param station_id `character` The id's of the stations to analyze. Either a
#' @param station_id `character` The id's of the stations to analyse. Either a
#' single value or a vector.
#' @param include_summary_data Logical indicating whether to include summary data in the export.
#' @param annual_rainfall_data Annual rainfall summary data.
Expand Down Expand Up @@ -45,19 +45,20 @@ export_r_instat_to_bucket <- function(data, data_by_year, data_by_year_month = N

definitions_data <- epicsadata::collate_definitions_data(data = data, data_by_year = data_by_year, data_by_year_month = data_by_year_month, crop_data = crop_data_name, rain = rain, tmin = tmin, tmax = tmax, year = year, month = month, summaries = summaries)
# Save into bucket
add_definitions_to_bucket(country = country, station_id = station_id, new_definitions = definitions_data, timestamp = timestamp)
purrr::map(.x = station_id,
.f = ~add_definitions_to_bucket(country = country, station_id = .x, new_definitions = definitions_data, timestamp = timestamp))

if (include_summary_data){
# function to read summary data from R-Instat into summaries in buckets
if ("annual_rainfall" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = annual_rainfall_data, summary = "annual_rainfall_summaries", timestamp = timestamp)

if ("annual_temperature" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = annual_temperature_data, summary = "annual_temperature_summaries", timestamp = timestamp)

if ("monthly_temperature" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = monthly_temperature_data, summary = "monthly_temperature_summaries", timestamp = timestamp)
if ("annual_rainfall" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = annual_rainfall_data, summary = "annual_rainfall_summaries", timestamp = timestamp))

if ("crop_success" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = crop_success_data, summary = "crop_success_probabilities", timestamp = timestamp)

if ("start_season" %in% summaries) add_summaries_to_bucket(country = country, station_id = station_id, data = season_start_data, summary = "season_start_probabilities", timestamp = timestamp)
if ("annual_temperature" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = annual_temperature_data, summary = "annual_temperature_summaries", timestamp = timestamp))

if ("monthly_temperature" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = monthly_temperature_data, summary = "monthly_temperature_summaries", timestamp = timestamp))

if ("crop_success" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = crop_success_data, summary = "crop_success_probabilities", timestamp = timestamp))

if ("start_season" %in% summaries) purrr::map(.x = station_id, .f = ~add_summaries_to_bucket(country = country, station_id = .x, data = season_start_data, summary = "season_start_probabilities", timestamp = timestamp))
}
return("Uploaded to Bucket")
}
11 changes: 7 additions & 4 deletions R/season_start_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,21 @@ season_start_probabilities <- function(country,
# do the summaries exist already?
get_summaries <- epicsadata::get_summaries_data(country, station_id, summary = "season_start_probabilities")
summary_data <- get_summaries[[1]]
timestamp <- get_summaries[[2]]
# what if the definitions is different? Have an override option.
# if the summary data exists, and if you do not want to override it then:
if (nrow(summary_data) > 0 & override == FALSE) {
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = get_summaries[[2]])
file_name <- epicsadata::get_objects_in_bucket(country, station_id, timestamp = timestamp)
if (nrow(file_name) == 0) {
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities"))
} else {
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities", paste0(station_id, ".", get_summaries[[2]])))
list_return[[1]] <- (definitions(country, station_id, summaries = "season_start_probabilities", paste0(station_id, ".", timestamp)))
}
} else {

definitions <- epicsawrap::definitions(country = country, station_id = station_id, summaries = "season_start_probabilities")
if (!is.null(timestamp)) file_name <- paste0(station_id, ".", timestamp)
else file_name <- station_id
definitions <- epicsawrap::definitions(country = country, station_id = station_id,
summaries = "season_start_probabilities", file = file_name)
# Fetch daily data and preprocess
daily <- epicsadata::get_daily_data(country = country, station_id = station_id)

Expand Down
2 changes: 1 addition & 1 deletion man/export_r_instat_to_bucket.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-annual_rainfall_summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ library(testthat)
# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/test_1_annual_rainfall_summaries.rds")
country <- "zm"
station_id <- "test_1"
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are calculated", {
result <- suppressWarnings(annual_rainfall_summaries(country, station_id, override = TRUE))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-crop_success_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ library(testthat)
# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/crop_success_probabilities_test_1.rds")
country <- "zm"
station_id <- "test_1"
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are called", {
result <- suppressWarnings(crop_success_probabilities(country, station_id))
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-extremes_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ test_that("Correct summaries are called", {
expect_equal(nrow(result), nrow(test_1_results))
})

test_that("Correct summaries are calculated", {
result <- suppressWarnings(extremes_summaries(country, station_id, override = TRUE)[[2]])
expect_equal(nrow(result), nrow(test_1_results))
})
# test_that("Correct summaries are calculated", {
# result <- suppressWarnings(extremes_summaries(country, station_id, override = TRUE)[[2]])
# expect_equal(nrow(result), nrow(test_1_results))
# })
6 changes: 3 additions & 3 deletions tests/testthat/test-season_start_probabilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ library(testthat)

# Test case 1
epicsadata::gcs_auth_file(file = "testdata/epicsa_token.json")
test_1_results <- readRDS("testdata/season_start_probabilities_test_1.rds")
country <- "zm"
station_id <- "test_1"
test_1_results <- readRDS("testdata/season_start_probabilities_r_data_test_1.rds")
country <- "zm_test"
station_id <- "r_data_test_1"

test_that("Correct summaries are called", {
result <- suppressWarnings(season_start_probabilities(country, station_id))
Expand Down
Loading
Loading