Skip to content

Commit

Permalink
update definitions_id and bug fix i dta definition names
Browse files Browse the repository at this point in the history
  • Loading branch information
lilyclements committed Dec 10, 2024
1 parent 9fc0376 commit b349074
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 37 deletions.
6 changes: 5 additions & 1 deletion R/annual_rainfall_start_rains.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ annual_rainfall_start_rains <- function(definitions, daily, data_names){
if (is.null(definitions$start_rains$max_rain)) stop("Missing value in start_rains definitions for max_rain. max_rain needed since dry_period = TRUE.")
if (is.null(definitions$start_rains$period_max_dry_days)) stop("Missing value in start_rains definitions for period_max_dry_days. period_max_dry_days needed since dry_period = TRUE.")
}
start_rains <- rpicsa::start_rains(daily, date_time = data_names$date, station = data_names$station, year = data_names$year, rain = data_names$rain,
start_rains <- rpicsa::start_rains(daily,
date_time = data_names$date,
station = data_names$station,
year = data_names$year,
rain = data_names$rain,
threshold = definitions$start_rains$threshold,
start_day = definitions$start_rains$start_day,
end_day = definitions$start_rains$end_day,
Expand Down
4 changes: 2 additions & 2 deletions R/data_definitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ data_definitions <- function(data_names,
variations <- list(
station = c("station_name", "name", "station", "station_id"),
date = c("date", "Date"),
year = c("year"),
year = c("year", "s_year"),
month = c("month_abbr", "month", "month_val"), #, starts_with("month")),
doy = c("DOY", "doy_366", "doy"),
doy = c("DOY", "doy_366", "doy", "s_doy", "s_doy_366", "s_DOY"),
day = c("day"),
rain = c("rain", "rainfall", "precipitation", "PRECIP"),
tmax = c("tmax", "max_temperature", "maximum", "max", "temperature_max", "TMPMAX"),
Expand Down
34 changes: 17 additions & 17 deletions R/get_definitions_data.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
#' Get Definitions Data
#'
#' This function retrieves definitions data for weather stations or specific definitions IDs from a Google Cloud Storage (GCS) bucket.
#' It includes logic to handle station-based retrieval or fetch data directly using a specified `definition_id`. It also handles
#' It includes logic to handle station-based retrieval or fetch data directly using a specified `definitions_id`. It also handles
#' timestamp management to ensure the most recent definitions file is imported.
#'
#' @param country A character vector of length 1 specifying the country from which to get the definitions data.
#' Options depend on the implementation of `get_bucket_name()`, with common options being `"mz"` (Mozambique)
#' and `"zm"` (Zambia).
#' @param station_id A character vector specifying the ID(s) of the station(s) for which to get the definitions data.
#' If `NULL`, data is fetched using the `definition_id`. Defaults to `NULL`.
#' @param definition_id A character string specifying the ID of the definitions to retrieve. If `NULL` and `station_id` is provided,
#' If `NULL`, data is fetched using the `definitions_id`. Defaults to `NULL`.
#' @param definitions_id A character string specifying the ID of the definitions to retrieve. If `NULL` and `station_id` is provided,
#' the most recent definitions ID is determined from metadata. Defaults to `NULL`.
#' @param file A character string specifying the name of a specific file to import, in the format `"STATIONNAME.TIMESTAMP"`.
#' If `NULL`, the most recent definitions file is fetched automatically. Defaults to `NULL`.
#'
#' @return A data frame or list containing the definitions data:
#' - If `station_id` is provided, returns data specific to the station(s).
#' - If `station_id` is `NULL`, returns data specific to the `definition_id`.
#' - If `station_id` is `NULL`, returns data specific to the `definitions_id`.
#'
#' @details
#' - When `station_id` is provided, the function fetches the corresponding definitions data for each station.
#' - If `station_id` is `NULL`, the function directly retrieves data based on the provided `definition_id`.
#' - If `station_id` is `NULL`, the function directly retrieves data based on the provided `definitions_id`.
#' - The function uses Google Cloud Storage to retrieve the files, ensuring that the most recent versions are accessed when `file` is `NULL`.
#' - For multiple stations, the function returns a combined data frame.
#'
#' @export
get_definitions_data <- function(country, station_id = NULL, definition_id = NULL, file = NULL) {
get_definitions_data <- function(country, station_id = NULL, definitions_id = NULL, file = NULL) {
if (length(country) > 1) stop("'country' must be of length 1")
if (is.null(station_id)){
# return the data for that definitions ID
bucket_name <- get_bucket_name(country)

files <- googleCloudStorageR::gcs_list_objects(bucket = bucket_name,
prefix = paste0("definitions/", definition_id, "."),
prefix = paste0("definitions/", definitions_id, "."),
versions = TRUE)

if (nrow(files) == 0) { stop("No files found. Check country and station_id")}
Expand All @@ -41,12 +41,12 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL
json_files <- files$name

# Check if multiple json files found. If so, take hte most recent one.
if (length(json_files) >= 1) definition_id <- extract_most_recent_json(json_files)
f <- paste0("definitions/", definition_id, ".json")
if (length(json_files) >= 1) definitions_id <- extract_most_recent_json(json_files)
f <- paste0("definitions/", definitions_id, ".json")
if (file.exists(f)) {
definitions_data <- jsonlite::read_json(f)
} else {
f <- update_definitions_data(country, definition_id)
f <- update_definitions_data(country, definitions_id)
definitions_data <- f #jsonlite::write_json(f)
}
return(definitions_data)
Expand All @@ -56,16 +56,16 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL
names(dfs) <- station_id

station_data <- station_metadata(country = country, station_id = station_id)
definition_id_list <- lapply(station_data$definition_id, function(x) x[length(x)])
names(definition_id_list) <- station_id
definitions_id_list <- lapply(station_data$definitions_id, function(x) x[length(x)])
names(definitions_id_list) <- station_id

if (is.null(file)){
bucket_name <- get_bucket_name(country)
for (i in seq_along(station_id)) {
if (is.null(definition_id)) definition_id <- definition_id_list[[i]]
if (is.null(definitions_id)) definitions_id <- definitions_id_list[[i]]
# List all files in the "definitions" directory for the station
files <- googleCloudStorageR::gcs_list_objects(bucket = bucket_name,
prefix = paste0("definitions/", definition_id, "."),
prefix = paste0("definitions/", definitions_id, "."),
versions = TRUE)

if (nrow(files) == 0) { stop("No files found. Check country and station_id")}
Expand All @@ -76,13 +76,13 @@ get_definitions_data <- function(country, station_id = NULL, definition_id = NUL
# Check if multiple json files found. If so, take hte most recent one.
if (length(json_files) >= 1){
# Extract timestamps from file names
definition_id[i] <- extract_most_recent_json(json_files)
definitions_id[i] <- extract_most_recent_json(json_files)
}
f <- paste0("definitions/", definition_id[i], ".json")
f <- paste0("definitions/", definitions_id[i], ".json")
if (file.exists(f)) {
dfs[[i]] <- jsonlite::read_json(f)
} else {
f <- update_definitions_data(country, definition_id[i])
f <- update_definitions_data(country, definitions_id[i])
dfs[[i]] <- f #jsonlite::write_json(f)
}
}
Expand Down
2 changes: 1 addition & 1 deletion R/update_season_start_probabilities_from_definition.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ update_season_start_probabilities_from_definition <- function(country, station_i
if (!is.null(station_id)){
definitions_data <- get_definitions_data(country = country, station_id = station_id)
} else {
definitions_data <- get_definitions_data(country = country, definition_id = definition_id)
definitions_data <- get_definitions_data(country = country, definitions_id = definition_id)
}

# If start-of-rains data is not provided, compute it using daily rainfall data
Expand Down
2 changes: 1 addition & 1 deletion R/update_summaries_from_definition.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ update_rainfall_summaries_from_definition <- function(country = "zm_workshops",
if (!is.null(station_id)){
definitions_data <- get_definitions_data(country = country, station_id = station_id)
} else {
definitions_data <- get_definitions_data(country = country, definition_id = definition_id)
definitions_data <- get_definitions_data(country = country, definitions_id = definition_id)
}

# Initialize variables for storing summary data and summaries
Expand Down
21 changes: 12 additions & 9 deletions R/update_total_temperature_summaries_from_definition.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor
if (!is.null(station_id)){
definitions_data <- get_definitions_data(country = country, station_id = station_id)
} else {
definitions_data <- get_definitions_data(country = country, definition_id = definition_id)
definitions_data <- get_definitions_data(country = country, definitions_id = definition_id)
}

data_names <- data_definitions(names(daily_data), FALSE, FALSE)
Expand All @@ -29,7 +29,7 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor
summaries_list <- c("min_tmin", "mean_tmin", "max_tmin",
"min_tmax", "mean_tmax", "max_tmax")
for (summary in summaries_list){
definition_to <- unlist(definitions[[summary]]$to)
definition_to <- unlist(definitions_data[[summary]]$to)
summary_type <- gsub("_.*$", "", summary)
summary_variable <- gsub("^.*_", "", summary)

Expand All @@ -42,21 +42,24 @@ update_total_temperature_summaries_from_definition <- function(country = "zm_wor
tmin = if (summary_variable == "tmin") data_names$tmin else NULL,
summaries = summary_type,
to = to,
na_rm = as.logical(definitions[[summary]]$na_rm),
na_prop = definitions[[summary]]$na_prop,
na_n = definitions[[summary]]$na_n,
na_consec = definitions[[summary]]$na_consec,
na_n_non = definitions[[summary]]$na_n_non)
na_rm = as.logical(definitions_data[[summary]]$na_rm),
na_prop = definitions_data[[summary]]$na_prop,
na_n = definitions_data[[summary]]$na_n,
na_consec = definitions_data[[summary]]$na_consec,
na_n_non = definitions_data[[summary]]$na_n_non)
}
}

print(length(summary_data))

if (length(summary_data) > 1){
summary_data <- Reduce(function(x, y) dplyr::full_join(x, y), summary_data)
} else {
summary_data <- summary_data[[1]]
}
summary_data$year <- as.integer(summary_data$year)
if (!is.null(summary_data_monthly$month)) summary_data_monthly$month <- as.integer(forcats::as_factor(summary_data_monthly$month))

#summary_data$year <- as.integer(summary_data$year)
if (!is.null(summary_data$month)) summary_data$month <- as.integer(forcats::as_factor(summary_data$month))
return(summary_data)
}

Expand Down
12 changes: 6 additions & 6 deletions man/get_definitions_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b349074

Please sign in to comment.