diff --git a/R/annual_rainfall_start_rains.R b/R/annual_rainfall_start_rains.R index 5cfb69f..8066a35 100644 --- a/R/annual_rainfall_start_rains.R +++ b/R/annual_rainfall_start_rains.R @@ -80,7 +80,11 @@ annual_rainfall_start_rains <- function(definitions, daily, data_names){ if (is.null(definitions$start_rains$max_rain)) stop("Missing value in start_rains definitions for max_rain. max_rain needed since dry_period = TRUE.") if (is.null(definitions$start_rains$period_max_dry_days)) stop("Missing value in start_rains definitions for period_max_dry_days. period_max_dry_days needed since dry_period = TRUE.") } - start_rains <- rpicsa::start_rains(daily, date_time = data_names$date, station = data_names$station, year = data_names$year, rain = data_names$rain, + start_rains <- rpicsa::start_rains(daily, + date_time = data_names$date, + station = data_names$station, + year = data_names$year, + rain = data_names$rain, threshold = definitions$start_rains$threshold, start_day = definitions$start_rains$start_day, end_day = definitions$start_rains$end_day, diff --git a/R/data_definitions.R b/R/data_definitions.R index ca679d3..5bc4403 100644 --- a/R/data_definitions.R +++ b/R/data_definitions.R @@ -40,9 +40,9 @@ data_definitions <- function(data_names, variations <- list( station = c("station_name", "name", "station", "station_id"), date = c("date", "Date"), - year = c("year"), + year = c("year", "s_year"), month = c("month_abbr", "month", "month_val"), #, starts_with("month")), - doy = c("DOY", "doy_366", "doy"), + doy = c("DOY", "doy_366", "doy", "s_doy", "s_doy_366", "s_DOY"), day = c("day"), rain = c("rain", "rainfall", "precipitation", "PRECIP"), tmax = c("tmax", "max_temperature", "maximum", "max", "temperature_max", "TMPMAX"), diff --git a/R/get_definitions_data.R b/R/get_definitions_data.R index 47dc9fd..89144dc 100644 --- a/R/get_definitions_data.R +++ b/R/get_definitions_data.R @@ -1,38 +1,38 @@ #' Get Definitions Data #' #' This function retrieves definitions data for weather stations or specific definitions IDs from a Google Cloud Storage (GCS) bucket. -#' It includes logic to handle station-based retrieval or fetch data directly using a specified `definition_id`. It also handles +#' It includes logic to handle station-based retrieval or fetch data directly using a specified `definitions_id`. It also handles #' timestamp management to ensure the most recent definitions file is imported. #' #' @param country A character vector of length 1 specifying the country from which to get the definitions data. #' Options depend on the implementation of `get_bucket_name()`, with common options being `"mz"` (Mozambique) #' and `"zm"` (Zambia). #' @param station_id A character vector specifying the ID(s) of the station(s) for which to get the definitions data. -#' If `NULL`, data is fetched using the `definition_id`. Defaults to `NULL`. -#' @param definition_id A character string specifying the ID of the definitions to retrieve. If `NULL` and `station_id` is provided, +#' If `NULL`, data is fetched using the `definitions_id`. Defaults to `NULL`. +#' @param definitions_id A character string specifying the ID of the definitions to retrieve. If `NULL` and `station_id` is provided, #' the most recent definitions ID is determined from metadata. Defaults to `NULL`. #' @param file A character string specifying the name of a specific file to import, in the format `"STATIONNAME.TIMESTAMP"`. #' If `NULL`, the most recent definitions file is fetched automatically. Defaults to `NULL`. #' #' @return A data frame or list containing the definitions data: #' - If `station_id` is provided, returns data specific to the station(s). -#' - If `station_id` is `NULL`, returns data specific to the `definition_id`. +#' - If `station_id` is `NULL`, returns data specific to the `definitions_id`. #' #' @details #' - When `station_id` is provided, the function fetches the corresponding definitions data for each station. -#' - If `station_id` is `NULL`, the function directly retrieves data based on the provided `definition_id`. +#' - If `station_id` is `NULL`, the function directly retrieves data based on the provided `definitions_id`. #' - The function uses Google Cloud Storage to retrieve the files, ensuring that the most recent versions are accessed when `file` is `NULL`. #' - For multiple stations, the function returns a combined data frame. #' #' @export -get_definitions_data <- function(country, station_id = NULL, definition_id = NULL, file = NULL) { +get_definitions_data <- function(country, station_id = NULL, definitions_id = NULL, file = NULL) { if (length(country) > 1) stop("'country' must be of length 1") if (is.null(station_id)){ # return the data for that definitions ID bucket_name <- get_bucket_name(country) files <- googleCloudStorageR::gcs_list_objects(bucket = bucket_name, - prefix = paste0("definitions/", definition_id, "."), + prefix = paste0("definitions/", definitions_id, "."), versions = TRUE) if (nrow(files) == 0) { stop("No files found. Check country and station_id")} @@ -41,12 +41,12 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL json_files <- files$name # Check if multiple json files found. If so, take hte most recent one. - if (length(json_files) >= 1) definition_id <- extract_most_recent_json(json_files) - f <- paste0("definitions/", definition_id, ".json") + if (length(json_files) >= 1) definitions_id <- extract_most_recent_json(json_files) + f <- paste0("definitions/", definitions_id, ".json") if (file.exists(f)) { definitions_data <- jsonlite::read_json(f) } else { - f <- update_definitions_data(country, definition_id) + f <- update_definitions_data(country, definitions_id) definitions_data <- f #jsonlite::write_json(f) } return(definitions_data) @@ -56,16 +56,16 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL names(dfs) <- station_id station_data <- station_metadata(country = country, station_id = station_id) - definition_id_list <- lapply(station_data$definition_id, function(x) x[length(x)]) - names(definition_id_list) <- station_id + definitions_id_list <- lapply(station_data$definitions_id, function(x) x[length(x)]) + names(definitions_id_list) <- station_id if (is.null(file)){ bucket_name <- get_bucket_name(country) for (i in seq_along(station_id)) { - if (is.null(definition_id)) definition_id <- definition_id_list[[i]] + if (is.null(definitions_id)) definitions_id <- definitions_id_list[[i]] # List all files in the "definitions" directory for the station files <- googleCloudStorageR::gcs_list_objects(bucket = bucket_name, - prefix = paste0("definitions/", definition_id, "."), + prefix = paste0("definitions/", definitions_id, "."), versions = TRUE) if (nrow(files) == 0) { stop("No files found. Check country and station_id")} @@ -76,13 +76,13 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL # Check if multiple json files found. If so, take hte most recent one. if (length(json_files) >= 1){ # Extract timestamps from file names - definition_id[i] <- extract_most_recent_json(json_files) + definitions_id[i] <- extract_most_recent_json(json_files) } - f <- paste0("definitions/", definition_id[i], ".json") + f <- paste0("definitions/", definitions_id[i], ".json") if (file.exists(f)) { dfs[[i]] <- jsonlite::read_json(f) } else { - f <- update_definitions_data(country, definition_id[i]) + f <- update_definitions_data(country, definitions_id[i]) dfs[[i]] <- f #jsonlite::write_json(f) } } diff --git a/R/update_season_start_probabilities_from_definition.R b/R/update_season_start_probabilities_from_definition.R index 596a72d..c70b72a 100644 --- a/R/update_season_start_probabilities_from_definition.R +++ b/R/update_season_start_probabilities_from_definition.R @@ -23,7 +23,7 @@ update_season_start_probabilities_from_definition <- function(country, station_i if (!is.null(station_id)){ definitions_data <- get_definitions_data(country = country, station_id = station_id) } else { - definitions_data <- get_definitions_data(country = country, definition_id = definition_id) + definitions_data <- get_definitions_data(country = country, definitions_id = definition_id) } # If start-of-rains data is not provided, compute it using daily rainfall data diff --git a/R/update_summaries_from_definition.R b/R/update_summaries_from_definition.R index c5c4c04..32c0ee7 100644 --- a/R/update_summaries_from_definition.R +++ b/R/update_summaries_from_definition.R @@ -33,7 +33,7 @@ update_rainfall_summaries_from_definition <- function(country = "zm_workshops", if (!is.null(station_id)){ definitions_data <- get_definitions_data(country = country, station_id = station_id) } else { - definitions_data <- get_definitions_data(country = country, definition_id = definition_id) + definitions_data <- get_definitions_data(country = country, definitions_id = definition_id) } # Initialize variables for storing summary data and summaries diff --git a/R/update_total_temperature_summaries_from_definition.R b/R/update_total_temperature_summaries_from_definition.R index 59fe24f..3015071 100644 --- a/R/update_total_temperature_summaries_from_definition.R +++ b/R/update_total_temperature_summaries_from_definition.R @@ -19,7 +19,7 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor if (!is.null(station_id)){ definitions_data <- get_definitions_data(country = country, station_id = station_id) } else { - definitions_data <- get_definitions_data(country = country, definition_id = definition_id) + definitions_data <- get_definitions_data(country = country, definitions_id = definition_id) } data_names <- data_definitions(names(daily_data), FALSE, FALSE) @@ -29,7 +29,7 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor summaries_list <- c("min_tmin", "mean_tmin", "max_tmin", "min_tmax", "mean_tmax", "max_tmax") for (summary in summaries_list){ - definition_to <- unlist(definitions[[summary]]$to) + definition_to <- unlist(definitions_data[[summary]]$to) summary_type <- gsub("_.*$", "", summary) summary_variable <- gsub("^.*_", "", summary) @@ -42,21 +42,24 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor tmin = if (summary_variable == "tmin") data_names$tmin else NULL, summaries = summary_type, to = to, - na_rm = as.logical(definitions[[summary]]$na_rm), - na_prop = definitions[[summary]]$na_prop, - na_n = definitions[[summary]]$na_n, - na_consec = definitions[[summary]]$na_consec, - na_n_non = definitions[[summary]]$na_n_non) + na_rm = as.logical(definitions_data[[summary]]$na_rm), + na_prop = definitions_data[[summary]]$na_prop, + na_n = definitions_data[[summary]]$na_n, + na_consec = definitions_data[[summary]]$na_consec, + na_n_non = definitions_data[[summary]]$na_n_non) } } + print(length(summary_data)) + if (length(summary_data) > 1){ summary_data <- Reduce(function(x, y) dplyr::full_join(x, y), summary_data) } else { summary_data <- summary_data[[1]] } - summary_data$year <- as.integer(summary_data$year) - if (!is.null(summary_data_monthly$month)) summary_data_monthly$month <- as.integer(forcats::as_factor(summary_data_monthly$month)) + + #summary_data$year <- as.integer(summary_data$year) + if (!is.null(summary_data$month)) summary_data$month <- as.integer(forcats::as_factor(summary_data$month)) return(summary_data) } diff --git a/man/get_definitions_data.Rd b/man/get_definitions_data.Rd index 6674902..026c697 100644 --- a/man/get_definitions_data.Rd +++ b/man/get_definitions_data.Rd @@ -7,7 +7,7 @@ get_definitions_data( country, station_id = NULL, - definition_id = NULL, + definitions_id = NULL, file = NULL ) } @@ -17,9 +17,9 @@ Options depend on the implementation of \code{get_bucket_name()}, with common op and \code{"zm"} (Zambia).} \item{station_id}{A character vector specifying the ID(s) of the station(s) for which to get the definitions data. -If \code{NULL}, data is fetched using the \code{definition_id}. Defaults to \code{NULL}.} +If \code{NULL}, data is fetched using the \code{definitions_id}. Defaults to \code{NULL}.} -\item{definition_id}{A character string specifying the ID of the definitions to retrieve. If \code{NULL} and \code{station_id} is provided, +\item{definitions_id}{A character string specifying the ID of the definitions to retrieve. If \code{NULL} and \code{station_id} is provided, the most recent definitions ID is determined from metadata. Defaults to \code{NULL}.} \item{file}{A character string specifying the name of a specific file to import, in the format \code{"STATIONNAME.TIMESTAMP"}. @@ -29,18 +29,18 @@ If \code{NULL}, the most recent definitions file is fetched automatically. Defau A data frame or list containing the definitions data: \itemize{ \item If \code{station_id} is provided, returns data specific to the station(s). -\item If \code{station_id} is \code{NULL}, returns data specific to the \code{definition_id}. +\item If \code{station_id} is \code{NULL}, returns data specific to the \code{definitions_id}. } } \description{ This function retrieves definitions data for weather stations or specific definitions IDs from a Google Cloud Storage (GCS) bucket. -It includes logic to handle station-based retrieval or fetch data directly using a specified \code{definition_id}. It also handles +It includes logic to handle station-based retrieval or fetch data directly using a specified \code{definitions_id}. It also handles timestamp management to ensure the most recent definitions file is imported. } \details{ \itemize{ \item When \code{station_id} is provided, the function fetches the corresponding definitions data for each station. -\item If \code{station_id} is \code{NULL}, the function directly retrieves data based on the provided \code{definition_id}. +\item If \code{station_id} is \code{NULL}, the function directly retrieves data based on the provided \code{definitions_id}. \item The function uses Google Cloud Storage to retrieve the files, ensuring that the most recent versions are accessed when \code{file} is \code{NULL}. \item For multiple stations, the function returns a combined data frame. }