diff --git a/DESCRIPTION b/DESCRIPTION index b8ee798..e260c5b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,16 +12,19 @@ RoxygenNote: 7.2.3 Depends: R (>= 2.10) Imports: + DBI, dplyr, epicsadata, forcats, googleCloudStorageR, jsonlite, + lubridate, magrittr, purrr, rlang, rpicsa, - testthat + testthat, + tidyr Remotes: IDEMSInternational/epicsadata, IDEMSInternational/rpicsa \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 1d87476..4aba272 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,7 +19,10 @@ export(export_r_instat_to_bucket) export(extremes_summaries) export(gcs_auth_file) export(get_binary_file) +export(get_climsoft_conn) +export(get_daily_data) export(get_definitions_id_from_metadata) +export(import_from_climsoft) export(join_null_data) export(monthly_temperature_summaries) export(reformat_annual_summaries) @@ -27,9 +30,12 @@ export(reformat_crop_success) export(reformat_season_start) export(reformat_temperature_summaries) export(season_start_probabilities) +export(set_climsoft_conn) export(setup) export(station_metadata) export(update_metadata_definition_id) +importFrom(DBI,dbConnect) +importFrom(RMySQL,MySQL) importFrom(epicsadata,gcs_auth_file) importFrom(magrittr,"%>%") importFrom(rlang,":=") diff --git a/R/annual_rainfall_summaries.R b/R/annual_rainfall_summaries.R index 02b01e4..0f85e37 100644 --- a/R/annual_rainfall_summaries.R +++ b/R/annual_rainfall_summaries.R @@ -7,6 +7,7 @@ #' @param country `character(1)` The country code of the data. #' @param station_id `character` The id's of the stations to analyse. Either a #' single value or a vector. +#' @param call A character vector specifying where to call the raw data from if calling raw data. #' @param summaries `character` The names of the summaries to produce. #' @param override A logical argument default `FALSE` indicating whether to calculate the summaries still, even if they are stored already in the bucket. #' @@ -16,8 +17,10 @@ #' @examples #' #annual_rainfall_summaries(country = "zm", station_id = "01122", summaries = "annual_rain") #' #annual_rainfall_summaries(country = "zm", station_id = "16", summaries = c("start_rains", "end_rains", "annual_rain", "seasonal_rain")) #, "end_season")) -annual_rainfall_summaries <- function(country, station_id, summaries = c("annual_rain", "start_rains", "end_rains", "end_season", "seasonal_rain", "seasonal_length"), override = FALSE) { +annual_rainfall_summaries <- function(country, station_id, call = c("climsoft", "googlebuckets"), + summaries = c("annual_rain", "start_rains", "end_rains", "end_season", "seasonal_rain", "seasonal_length"), override = FALSE) { list_return <- NULL + call <- match.arg(call) # we get the definitions_id from station_id metadata. definitions_id <- get_definitions_id_from_metadata(country, station_id) @@ -26,7 +29,7 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual get_summaries <- epicsadata::get_summaries_data(country, station_id, summary = "annual_rainfall_summaries") summary_data <- get_summaries[[1]] timestamp <- get_summaries[[2]] - + # what if the definitions is different? Have an override option. # if the summary data exists, and if you do not want to override it then: if (nrow(summary_data) > 0 & override == FALSE) { @@ -133,12 +136,14 @@ annual_rainfall_summaries <- function(country, station_id, summaries = c("annual } } } - # Fetch daily data and preprocess - daily <- epicsadata::get_daily_data(country = country, station_id = station_id) + daily <- get_daily_data(country = country, station_id = station_id, call_from = call) + # For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables data_names <- epicsadata::data_definitions(names(daily), TRUE) daily <- check_and_rename_variables(daily, data_names) + if (class(daily$date) != "Date") daily$date <- as.Date(daily$date) + if (!"year" %in% names(daily)) daily$year <- lubridate::year(daily$date) # Check if start_rains and end_rains are required for seasonal_rain and seasonal_length if (any(grepl("seasonal_", summaries))){ diff --git a/R/check_and_rename_variables.R b/R/check_and_rename_variables.R index 971f3ad..cad08d0 100644 --- a/R/check_and_rename_variables.R +++ b/R/check_and_rename_variables.R @@ -33,9 +33,9 @@ check_and_rename_variables <- function(data, data_names) { month = c("month_abbr", "month"), doy = c("DOY", "doy_366", "doy"), day = c("day"), - rain = c("rain", "rainfall", "precipitation"), - tmax = c("tmax", "max_temperature", "maximum", "max", "temperature_max"), - tmin = c("tmin","min_temperature", "minimum", "min", "temperature_min")) + rain = c("rain", "rainfall", "precipitation", "PRECIP"), + tmax = c("tmax", "max_temperature", "maximum", "max", "temperature_max", "TMPMAX"), + tmin = c("tmin","min_temperature", "minimum", "min", "temperature_min", "TMPMIN")) # Loop through the missing variable names @@ -66,4 +66,4 @@ check_and_rename_variables <- function(data, data_names) { } return(data) -} \ No newline at end of file +} diff --git a/R/climsoft_connection_functions.R b/R/climsoft_connection_functions.R new file mode 100644 index 0000000..bbeede6 --- /dev/null +++ b/R/climsoft_connection_functions.R @@ -0,0 +1,37 @@ +#' Set Climsoft Connection +#' +#' Establishes a connection to a Climsoft database and stores it in a package environment for later use. +#' +#' @param dbname Name of the database. +#' @param user Username for database access. +#' @param password Password for database access. +#' @param host Host where the database server is located. +#' @param port Port number on which the database server is running. +#' +#' @return Invisible. The function does not return anything but stores the connection in a designated package environment. +#' +#' @examples +#' #set_climsoft_conn("climsoft_db", "user", "password", "localhost", "3306") +#' +#' @importFrom DBI dbConnect +#' @importFrom RMySQL MySQL +#' @export +set_climsoft_conn <- function(dbname, user, password, host, port){ + conn <- DBI::dbConnect(drv = RMySQL::MySQL(), dbname = dbname, + user = user, password = password, host = host, port = port) + pkg_env$conn <- conn +} + +#' Get Climsoft Connection +#' +#' Retrieves the stored Climsoft database connection from the package environment. +#' +#' @return The database connection object. +#' +#' @examples +#' #con <- get_climsoft_conn() +#' +#' @export +get_climsoft_conn <- function(){ + get("conn", envir = pkg_env) +} diff --git a/R/crop_success_probabilities.R b/R/crop_success_probabilities.R index baf1eaf..6cdfb9d 100644 --- a/R/crop_success_probabilities.R +++ b/R/crop_success_probabilities.R @@ -4,6 +4,7 @@ #' @param country `character(1)` The country code of the data. #' @param station_id `character` The id's of the stations to analyse. Either a #' single value or a vector. +#' @param call A character vector specifying where to call the raw data from if calling raw data. #' @param water_requirements \code{numeric} Vector containing water requirements requirements. #' @param planting_dates \code{numeric} Vector containing planting dates requirements. #' @param planting_length \code{numeric} Vector containing seasonal crop length requirements. @@ -28,6 +29,7 @@ #' # planting_length = c(100, 150), planting_dates = c(90, 100, 110)) crop_success_probabilities <- function(country, station_id, + call = c("climsoft", "googlebuckets"), planting_dates = NULL, water_requirements = NULL, planting_length = NULL, @@ -99,17 +101,20 @@ crop_success_probabilities <- function(country, # if we are overriding, then we are overriding for our start_rains definition too, meaning we need to recalculate that if (override){ # Fetch daily data and preprocess - daily <- epicsadata::get_daily_data(country = country, station_id = station_id) + daily <- get_daily_data(country = country, station_id = station_id, call_from = call) # For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables data_names <- epicsadata::data_definitions(names(daily), TRUE) daily <- check_and_rename_variables(daily, data_names) + if (class(daily$date) != "Date") daily$date <- as.Date(daily$date) + if (!"year" %in% names(daily)) daily$year <- lubridate::year(daily$date) + } else { data_names <- NULL data_names$station <- "station" } - season_data <- annual_rainfall_summaries(country = country, station_id = station_id, summaries = c("start_rains", "seasonal_length", "seasonal_rain"), override = override) # end rains or end season? + season_data <- annual_rainfall_summaries(country = country, station_id = station_id, call = call, summaries = c("start_rains", "seasonal_length", "seasonal_rain"), override = override) # end rains or end season? #offset <- season_data[[1]]$start_rains$s_start_doy definitions$crops_success$planting_length <- check_and_set_parameter("planting_length", "planting_length") diff --git a/R/get_daily_data.R b/R/get_daily_data.R new file mode 100644 index 0000000..3a59891 --- /dev/null +++ b/R/get_daily_data.R @@ -0,0 +1,42 @@ +#' Get Daily Data +#' +#' @param country A character vector specifying the country or countries from which to get the data. Common options are `"mz"`, `"zm"`, and `"zm_test"`. Any defined in `get_bucket_name()`. +#' @param station_id A character string specifying the ID of the station for which to get the daily data. +#' +#' @return A data frame containing the daily data for the specified station and country. +#' @export +#' +#' @examples # +get_daily_data <- function(country, station_id, call_from = c("climsoft", "googlebuckets")) { + call_from <- match.arg(call_from) + if (length(country) > 1) stop("'country' must be of length 1") + station_id <- as.character(station_id) + + if (call_from == "climsoft"){ + # if you call from climsoft + climsoft_info <- station_metadata(country = country, station_id = station_id)$climsoft_list + if (is.null(get_climsoft_conn())) stop("Set climsoft connection with set_climsoft_conn() function.") + station_data <- import_from_climsoft(con = get_climsoft_conn(), + stations = station_id, + include_station_info = FALSE, + elementfiltercolumn = climsoft_info[[1]]$elementfiltercolumn, + elements = climsoft_info[[1]]$elements) + } else { + # if you call from googlebuckets + dfs <- vector("list", length(station_id)) + names(dfs) <- station_id + for (i in seq_along(station_id)) { + f <- paste0(country, "/", "data", "/", station_id[i], ".rds") + if (file.exists(f)) { + dfs[[i]] <- readRDS(f) + } else { + f <- epicsadata::update_daily_data(country, station_id[i]) + dfs[[i]] <- f#saveRDS(o, file = f) + } + } + if (length(station_id) > 1) { + station_data <- dplyr::bind_rows(dfs) + } else station_data <- dfs[[1]] + } + return(station_data) +} diff --git a/R/global.R b/R/global.R new file mode 100644 index 0000000..a3a14e1 --- /dev/null +++ b/R/global.R @@ -0,0 +1 @@ +pkg_env <- new.env(parent = emptyenv()) diff --git a/R/import_from_climsoft.R b/R/import_from_climsoft.R new file mode 100644 index 0000000..d5d230a --- /dev/null +++ b/R/import_from_climsoft.R @@ -0,0 +1,112 @@ +#' Import Data from Climsoft +#' +#' Connects to a Climsoft database and imports data based on the specified filters for stations and elements, with options to include observation flags and station information. +#' +#' @param con Connection object to the Climsoft database, default is the result of \code{get_climsoft_conn()}. +#' @param stationfiltercolumn Name of the column to filter by stations, default is 'stationId'. +#' @param stations Vector of station IDs to filter the data, defaults to an empty vector. +#' @param elementfiltercolumn Name of the column to filter by elements, default is 'elementId'. +#' @param elements Vector of element IDs to filter the data, defaults to an empty vector. +#' @param include_observation_flags Boolean, if TRUE includes observation flags in the output, defaults to FALSE. +#' @param include_station_info Boolean, if TRUE includes station metadata in the output, defaults to FALSE. +#' @param start_date Start date for filtering the observations, format should be Date, defaults to NULL. +#' @param end_date End date for filtering the observations, format should be Date, defaults to NULL. +#' +#' @return A list containing Climsoft station and observation data based on the filters applied. If `include_station_info` is TRUE, the list will have two elements: 'Metadata' with station details and 'Daily data' with observations. +#' +#' @examples +#' con <- get_climsoft_conn() +#' data <- import_from_climsoft(con, stations = c("101", "102"), elements = c("1", "2"), start_date = as.Date("2020-01-01"), end_date = as.Date("2020-01-31")) +#' +#' @export +import_from_climsoft <- function(con = get_climsoft_conn(), + stationfiltercolumn = "stationId", + stations = c(), + elementfiltercolumn = "elementId", + elements = c(), + include_observation_flags = FALSE, + include_station_info = FALSE, + start_date = NULL, + end_date = NULL) { + con <- con # get connection + + #get stations database data and station ids values + if (length(stations) > 0) { + #construct a string of station values from the passed station vector eg of result ('191','122') + passed_station_values <- paste0("(", paste0("'", stations, "'", collapse = ", "), ")") + + #get the station info of the passed station values + db_station_info <- DBI::dbGetQuery(con, paste0( "SELECT * FROM station WHERE ", stationfiltercolumn, " IN ", passed_station_values, ";")) + + #set values of station ids only + if (stationfiltercolumn == "stationId") { + station_ids_values <- passed_station_values + } else{ + station_ids_values <- paste0("(", paste0("'", db_station_info$stationId, "'", collapse = ", "),")") + } + } + + #if there are no elements passed then stop and throw error + if (length(elements) < 1) stop("start_date must be of type Date.") + + #set values of element ids only + if (elementfiltercolumn == "elementId") { + #get element id values directly from passed data + element_ids_values <- paste0("(", paste0(elements, collapse = ", "), ")") + } else{ + #get element id values from the database + passed_element_values <- paste0("(", paste0("'", elements, "'", collapse = ", "), ")") + db_elements_ids <- DBI::dbGetQuery( con, paste0("SELECT elementId FROM obselement WHERE ", elementfiltercolumn, " IN ", passed_element_values, ";" )) + element_ids_values <- paste0("(", paste0(sprintf("%d", db_elements_ids$elementId), collapse = ", "), ")") + } + + if(include_elements_info) { + db_elements_info <- DBI::dbGetQuery(con, paste0("SELECT elementId, elementName, abbreviation, description, elementtype, upperLimit, lowerLimit, units FROM obselement WHERE elementId ", " IN ", element_ids_values, ";" )) + } + + flags_column_col_sql <- " " + if (include_observation_flags) { + flags_column_col_sql <- ", observationfinal.flag AS flag" + } + + #get databounds filter query if dates have been passed + date_bounds_filter <- "" + if (!is.null(start_date)) { + if (!lubridate::is.Date(start_date)) + stop("start_date must be of type Date.") + start_date <- format(start_date, format = "%Y-%m-%d") + date_bounds_filter = paste0(date_bounds_filter, " AND obsDatetime >= ", sQuote(start_date)) + } + if (!is.null(end_date)) { + if (!lubridate::is.Date(end_date)) + stop("end_date must be of type Date.") + end_date <- format(end_date, format = "%Y-%m-%d") + date_bounds_filter <- paste0(date_bounds_filter," AND obsDatetime <=", sQuote(end_date)) + } + + #construct observation data sql query and get data from database + if (length(stations) > 0) { + #if stations passed get observation data of selected elements of passed stations + db_observation_data <- DBI::dbGetQuery(con, paste0("SELECT observationfinal.recordedFrom As station, obselement.abbreviation AS element, observationfinal.obsDatetime AS datetime, observationfinal.obsValue AS obsvalue", flags_column_col_sql, " FROM observationfinal INNER JOIN obselement ON observationfinal.describedBy = obselement.elementId WHERE observationfinal.recordedFrom IN ", station_ids_values, " AND observationfinal.describedBy IN ", element_ids_values, date_bounds_filter, " ORDER BY observationfinal.recordedFrom, observationfinal.describedBy;")) + } else{ + #if stations have not been passed get observation data of passed elements of all stations + db_observation_data <- DBI::dbGetQuery(con, paste0("SELECT observationfinal.recordedFrom As station, obselement.abbreviation AS element, observationfinal.obsDatetime AS datetime, observationfinal.obsValue AS obsvalue", flags_column_col_sql, " FROM observationfinal INNER JOIN obselement ON observationfinal.describedBy = obselement.elementId WHERE observationfinal.describedBy IN ", element_ids_values, date_bounds_filter, " ORDER BY observationfinal.recordedFrom, observationfinal.describedBy;")) + + #then get the stations ids (uniquely) from the observation data and use the ids to get station info + station_ids_values <- paste0("(", paste0("'", as.character(unique(db_observation_data$station) ), "'", collapse = ", "), ")") + db_station_info <- DBI::dbGetQuery(con, paste0("SELECT * FROM station WHERE stationId IN ", station_ids_values, ";" )) + } + + if(unstack_data){ + db_observation_data <- tidyr::pivot_wider(db_observation_data, + names_from = element, + values_from = obsvalue) + } + if (include_station_info) { + data_list <- list(db_station_info, db_observation_data) + names(data_list) <- c("Metadata", "Daily data") + } else { + data_list <- db_observation_data + } + return(data_list) +} \ No newline at end of file diff --git a/R/season_start_probabilities.R b/R/season_start_probabilities.R index ed7e114..2f35dfe 100644 --- a/R/season_start_probabilities.R +++ b/R/season_start_probabilities.R @@ -4,6 +4,7 @@ #' @param country `character(1)` The country code of the data. #' @param station_id `character` The id's of the stations to analyse. Either a #' single value or a vector. +#' @param call A character vector specifying where to call the raw data from if calling raw data. #' @param start_dates `numeric` A vector of start dates (in doy format) to calculate the probabilities of the season starting on or before. #' @param override A logical argument default `FALSE` indicating whether to calculate the summaries still, even if they are stored already in the bucket. #' @@ -20,6 +21,7 @@ #' #season_start_probabilities(country = "zm", station_id = "16", start_dates = c(10, 20, 100)) season_start_probabilities <- function(country, station_id, + call = c("climsoft", "googlebuckets"), start_dates = NULL, override = FALSE) { list_return <- NULL @@ -62,16 +64,17 @@ season_start_probabilities <- function(country, # if we are overriding, then we are overriding for our start_rains definition too, meaning we need to recalculate that if (override){ # Fetch daily data and preprocess - daily <- epicsadata::get_daily_data(country = country, station_id = station_id) - + daily <- get_daily_data(country = country, station_id = station_id, call_from = call) # For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables data_names <- epicsadata::data_definitions(names(daily), TRUE) daily <- check_and_rename_variables(daily, data_names) + if (class(daily$date) != "Date") daily$date <- as.Date(daily$date) + if (!"year" %in% names(daily)) daily$year <- lubridate::year(daily$date) } else { data_names <- NULL data_names$station <- "station" } - season_data <- annual_rainfall_summaries(country = country, station_id = station_id, summaries = c("start_rains"), override = override) + season_data <- annual_rainfall_summaries(country = country, station_id = station_id, call = call, summaries = c("start_rains"), override = override) if (is.null(start_dates)){ start_dates <- definitions$season_start_probabilities$specified_day if (length(start_dates) == 0) stop("start_dates parameter missing in definitions file.") diff --git a/R/total_temperature_summaries.R b/R/total_temperature_summaries.R index a82ab55..76eacb5 100644 --- a/R/total_temperature_summaries.R +++ b/R/total_temperature_summaries.R @@ -4,6 +4,7 @@ #' #' @param country A character string specifying the country code of the data. #' @param station_id A character vector specifying the ID(s) of the station(s) to analyse. +#' @param call A character vector specifying where to call the raw data from if calling raw data. #' @param summaries A character vector specifying the names of the summaries to produce. #' @param to A character string indicating whether the summaries should be generated for "annual" or "monthly" data. #' @param override A logical argument default `FALSE` indicating whether to calculate the summaries still, even if they are stored already in the bucket. @@ -15,10 +16,12 @@ #' #total_temperature_summaries(country = "zm", station_id = "1", summaries = c("mean_tmin", "mean_tmax", "min_tmin", "max_tmax"), to = "annual") total_temperature_summaries <- function(country, station_id, + call = c("climsoft", "googlebuckets"), summaries = c("mean_tmin", "mean_tmax", "min_tmin", "min_tmax", "max_tmin", "max_tmax"), to = c("annual", "monthly"), override = FALSE) { to <- match.arg(to) + call <- match.arg(call) list_return <- NULL # we get the definitions_id from station_id metadata. @@ -44,19 +47,15 @@ total_temperature_summaries <- function(country, } else { definitions <- definitions(country = country, definitions_id, summaries = summaries) - # Fetch daily data and preprocess - daily <- epicsadata::get_daily_data(country = country, station_id = station_id) + # Fetch daily data and preprocess + daily <- get_daily_data(country = country, station_id = station_id, call_from = call) + # For the variable names to be set as a certain default, set TRUE here, and run check_and_rename_variables data_names <- epicsadata::data_definitions(names(daily), TRUE) daily <- check_and_rename_variables(daily, data_names) - - # # even though we can have tmax and tmin defined together, it's being done this way - # # in case different parameters are defined for tmax and for tmin. - # summary_data <- expand.grid(year = unique(daily[[data_names$year]]), - # station = unique(daily[[data_names$station]])) - # names(summary_data) <- c(data_names$year, data_names$station) - # + if (class(daily$date) != "Date") daily$date <- as.Date(daily$date) + if (!"year" %in% names(daily)) daily$year <- lubridate::year(daily$date) summary_data <- NULL for (summary in summaries) { diff --git a/man/annual_rainfall_summaries.Rd b/man/annual_rainfall_summaries.Rd index 61300e9..8363931 100644 --- a/man/annual_rainfall_summaries.Rd +++ b/man/annual_rainfall_summaries.Rd @@ -7,6 +7,7 @@ annual_rainfall_summaries( country, station_id, + call = c("climsoft", "googlebuckets"), summaries = c("annual_rain", "start_rains", "end_rains", "end_season", "seasonal_rain", "seasonal_length"), override = FALSE @@ -18,6 +19,8 @@ annual_rainfall_summaries( \item{station_id}{\code{character} The id's of the stations to analyse. Either a single value or a vector.} +\item{call}{A character vector specifying where to call the raw data from if calling raw data.} + \item{summaries}{\code{character} The names of the summaries to produce.} \item{override}{A logical argument default \code{FALSE} indicating whether to calculate the summaries still, even if they are stored already in the bucket.} diff --git a/man/crop_success_probabilities.Rd b/man/crop_success_probabilities.Rd index be275d1..ec45738 100644 --- a/man/crop_success_probabilities.Rd +++ b/man/crop_success_probabilities.Rd @@ -7,6 +7,7 @@ crop_success_probabilities( country, station_id, + call = c("climsoft", "googlebuckets"), planting_dates = NULL, water_requirements = NULL, planting_length = NULL, @@ -20,6 +21,8 @@ crop_success_probabilities( \item{station_id}{\code{character} The id's of the stations to analyse. Either a single value or a vector.} +\item{call}{A character vector specifying where to call the raw data from if calling raw data.} + \item{planting_dates}{\code{numeric} Vector containing planting dates requirements.} \item{water_requirements}{\code{numeric} Vector containing water requirements requirements.} diff --git a/man/get_climsoft_conn.Rd b/man/get_climsoft_conn.Rd new file mode 100644 index 0000000..3727a67 --- /dev/null +++ b/man/get_climsoft_conn.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/climsoft_connection_functions.R +\name{get_climsoft_conn} +\alias{get_climsoft_conn} +\title{Get Climsoft Connection} +\usage{ +get_climsoft_conn() +} +\value{ +The database connection object. +} +\description{ +Retrieves the stored Climsoft database connection from the package environment. +} +\examples{ +#con <- get_climsoft_conn() + +} diff --git a/man/get_daily_data.Rd b/man/get_daily_data.Rd new file mode 100644 index 0000000..55198fc --- /dev/null +++ b/man/get_daily_data.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_daily_data.R +\name{get_daily_data} +\alias{get_daily_data} +\title{Get Daily Data} +\usage{ +get_daily_data(country, station_id, call_from = c("climsoft", "googlebuckets")) +} +\arguments{ +\item{country}{A character vector specifying the country or countries from which to get the data. Common options are \code{"mz"}, \code{"zm"}, and \code{"zm_test"}. Any defined in \code{get_bucket_name()}.} + +\item{station_id}{A character string specifying the ID of the station for which to get the daily data.} +} +\value{ +A data frame containing the daily data for the specified station and country. +} +\description{ +Get Daily Data +} +\examples{ +# +} diff --git a/man/import_from_climsoft.Rd b/man/import_from_climsoft.Rd new file mode 100644 index 0000000..b706de0 --- /dev/null +++ b/man/import_from_climsoft.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import_from_climsoft.R +\name{import_from_climsoft} +\alias{import_from_climsoft} +\title{Import Data from Climsoft} +\usage{ +import_from_climsoft( + con = get_climsoft_conn(), + stationfiltercolumn = "stationId", + stations = c(), + elementfiltercolumn = "elementId", + elements = c(), + include_observation_flags = FALSE, + include_station_info = FALSE, + start_date = NULL, + end_date = NULL +) +} +\arguments{ +\item{con}{Connection object to the Climsoft database, default is the result of \code{get_climsoft_conn()}.} + +\item{stationfiltercolumn}{Name of the column to filter by stations, default is 'stationId'.} + +\item{stations}{Vector of station IDs to filter the data, defaults to an empty vector.} + +\item{elementfiltercolumn}{Name of the column to filter by elements, default is 'elementId'.} + +\item{elements}{Vector of element IDs to filter the data, defaults to an empty vector.} + +\item{include_observation_flags}{Boolean, if TRUE includes observation flags in the output, defaults to FALSE.} + +\item{include_station_info}{Boolean, if TRUE includes station metadata in the output, defaults to FALSE.} + +\item{start_date}{Start date for filtering the observations, format should be Date, defaults to NULL.} + +\item{end_date}{End date for filtering the observations, format should be Date, defaults to NULL.} +} +\value{ +A list containing Climsoft station and observation data based on the filters applied. If \code{include_station_info} is TRUE, the list will have two elements: 'Metadata' with station details and 'Daily data' with observations. +} +\description{ +Connects to a Climsoft database and imports data based on the specified filters for stations and elements, with options to include observation flags and station information. +} +\examples{ +con <- get_climsoft_conn() +data <- import_from_climsoft(con, stations = c("101", "102"), elements = c("1", "2"), start_date = as.Date("2020-01-01"), end_date = as.Date("2020-01-31")) + +} diff --git a/man/season_start_probabilities.Rd b/man/season_start_probabilities.Rd index f70a413..b1afa47 100644 --- a/man/season_start_probabilities.Rd +++ b/man/season_start_probabilities.Rd @@ -7,6 +7,7 @@ season_start_probabilities( country, station_id, + call = c("climsoft", "googlebuckets"), start_dates = NULL, override = FALSE ) @@ -17,6 +18,8 @@ season_start_probabilities( \item{station_id}{\code{character} The id's of the stations to analyse. Either a single value or a vector.} +\item{call}{A character vector specifying where to call the raw data from if calling raw data.} + \item{start_dates}{\code{numeric} A vector of start dates (in doy format) to calculate the probabilities of the season starting on or before.} \item{override}{A logical argument default \code{FALSE} indicating whether to calculate the summaries still, even if they are stored already in the bucket.} diff --git a/man/set_climsoft_conn.Rd b/man/set_climsoft_conn.Rd new file mode 100644 index 0000000..db9ac39 --- /dev/null +++ b/man/set_climsoft_conn.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/climsoft_connection_functions.R +\name{set_climsoft_conn} +\alias{set_climsoft_conn} +\title{Set Climsoft Connection} +\usage{ +set_climsoft_conn(dbname, user, password, host, port) +} +\arguments{ +\item{dbname}{Name of the database.} + +\item{user}{Username for database access.} + +\item{password}{Password for database access.} + +\item{host}{Host where the database server is located.} + +\item{port}{Port number on which the database server is running.} +} +\value{ +Invisible. The function does not return anything but stores the connection in a designated package environment. +} +\description{ +Establishes a connection to a Climsoft database and stores it in a package environment for later use. +} +\examples{ +#set_climsoft_conn("climsoft_db", "user", "password", "localhost", "3306") + +} diff --git a/man/total_temperature_summaries.Rd b/man/total_temperature_summaries.Rd index f72bd0c..68eaf65 100644 --- a/man/total_temperature_summaries.Rd +++ b/man/total_temperature_summaries.Rd @@ -7,6 +7,7 @@ total_temperature_summaries( country, station_id, + call = c("climsoft", "googlebuckets"), summaries = c("mean_tmin", "mean_tmax", "min_tmin", "min_tmax", "max_tmin", "max_tmax"), to = c("annual", "monthly"), override = FALSE @@ -17,6 +18,8 @@ total_temperature_summaries( \item{station_id}{A character vector specifying the ID(s) of the station(s) to analyse.} +\item{call}{A character vector specifying where to call the raw data from if calling raw data.} + \item{summaries}{A character vector specifying the names of the summaries to produce.} \item{to}{A character string indicating whether the summaries should be generated for "annual" or "monthly" data.} diff --git a/tests/testthat/test-annual_rainfall_summaries.R b/tests/testthat/test-annual_rainfall_summaries.R index e8bea90..04031eb 100644 --- a/tests/testthat/test-annual_rainfall_summaries.R +++ b/tests/testthat/test-annual_rainfall_summaries.R @@ -8,7 +8,7 @@ country <- "zm_test" station_id <- "r_data_test_1" test_that("Correct summaries are calculated", { - result <- suppressWarnings(annual_rainfall_summaries(country, station_id, override = TRUE)) + result <- suppressWarnings(annual_rainfall_summaries(country, station_id, call = "googlebuckets", override = TRUE)) expect_true(identical(result[[2]], test_1_results)) }) diff --git a/tests/testthat/test-crop_success_probabilities.R b/tests/testthat/test-crop_success_probabilities.R index d81700f..758ab2a 100644 --- a/tests/testthat/test-crop_success_probabilities.R +++ b/tests/testthat/test-crop_success_probabilities.R @@ -13,6 +13,6 @@ test_that("Correct summaries are called", { }) test_that("Correct summaries are calculated", { - result <- suppressWarnings(crop_success_probabilities(country, station_id, override = TRUE)) + result <- suppressWarnings(crop_success_probabilities(country, station_id, call = "googlebuckets", override = TRUE)) expect_true(identical(result[[2]], test_2_results)) }) diff --git a/tests/testthat/test-season_start_probabilities.R b/tests/testthat/test-season_start_probabilities.R index 71eb040..b4f5bf1 100644 --- a/tests/testthat/test-season_start_probabilities.R +++ b/tests/testthat/test-season_start_probabilities.R @@ -12,6 +12,6 @@ test_that("Correct summaries are called", { }) test_that("Correct summaries are calculated", { - result <- suppressWarnings(season_start_probabilities(country, station_id, override = TRUE)) + result <- suppressWarnings(season_start_probabilities(country, station_id, call = "googlebuckets", override = TRUE)) expect_true(identical(result[[2]], test_1_results)) }) diff --git a/tests/testthat/test-total_temperature_summaries.R b/tests/testthat/test-total_temperature_summaries.R index 280f6d6..6165b8d 100644 --- a/tests/testthat/test-total_temperature_summaries.R +++ b/tests/testthat/test-total_temperature_summaries.R @@ -20,11 +20,13 @@ test_that("Correct summaries are called", { test_that("Correct summaries are calculated", { result_annual <- suppressWarnings(total_temperature_summaries(country, station_id, + call = "googlebuckets", to = "annual", summaries = c("mean_tmin", "mean_tmax"), override = TRUE)) result_monthly <- suppressWarnings(total_temperature_summaries(country, station_id, + call = "googlebuckets", to = "monthly", summaries = c("mean_tmin", "mean_tmax"), override = TRUE)) @@ -36,6 +38,7 @@ test_that("Correct summaries are calculated", { test_that("Correct summaries are called", { result_annual <- suppressWarnings(total_temperature_summaries(country, station_id, summaries = c("min_tmin"), + call = "googlebuckets", to = "annual", override = TRUE)) expect_true(identical(names(result_annual[[2]]), c("station", "year", "min_tmin"))) diff --git a/tests/testthat/testdata/test_2_annual_rainfall_summaries.rds b/tests/testthat/testdata/test_2_annual_rainfall_summaries.rds index 60a0e61..c524748 100644 Binary files a/tests/testthat/testdata/test_2_annual_rainfall_summaries.rds and b/tests/testthat/testdata/test_2_annual_rainfall_summaries.rds differ