-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding ability to import raw data from climsoft
- Loading branch information
1 parent
ea6bf2d
commit 2e2478f
Showing
24 changed files
with
371 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Set Climsoft Connection | ||
#' | ||
#' Establishes a connection to a Climsoft database and stores it in a package environment for later use. | ||
#' | ||
#' @param dbname Name of the database. | ||
#' @param user Username for database access. | ||
#' @param password Password for database access. | ||
#' @param host Host where the database server is located. | ||
#' @param port Port number on which the database server is running. | ||
#' | ||
#' @return Invisible. The function does not return anything but stores the connection in a designated package environment. | ||
#' | ||
#' @examples | ||
#' #set_climsoft_conn("climsoft_db", "user", "password", "localhost", "3306") | ||
#' | ||
#' @importFrom DBI dbConnect | ||
#' @importFrom RMySQL MySQL | ||
#' @export | ||
set_climsoft_conn <- function(dbname, user, password, host, port){ | ||
conn <- DBI::dbConnect(drv = RMySQL::MySQL(), dbname = dbname, | ||
user = user, password = password, host = host, port = port) | ||
pkg_env$conn <- conn | ||
} | ||
|
||
#' Get Climsoft Connection | ||
#' | ||
#' Retrieves the stored Climsoft database connection from the package environment. | ||
#' | ||
#' @return The database connection object. | ||
#' | ||
#' @examples | ||
#' #con <- get_climsoft_conn() | ||
#' | ||
#' @export | ||
get_climsoft_conn <- function(){ | ||
get("conn", envir = pkg_env) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#' Get Daily Data | ||
#' | ||
#' @param country A character vector specifying the country or countries from which to get the data. Common options are `"mz"`, `"zm"`, and `"zm_test"`. Any defined in `get_bucket_name()`. | ||
#' @param station_id A character string specifying the ID of the station for which to get the daily data. | ||
#' | ||
#' @return A data frame containing the daily data for the specified station and country. | ||
#' @export | ||
#' | ||
#' @examples # | ||
get_daily_data <- function(country, station_id, call_from = c("climsoft", "googlebuckets")) { | ||
call_from <- match.arg(call_from) | ||
if (length(country) > 1) stop("'country' must be of length 1") | ||
station_id <- as.character(station_id) | ||
|
||
if (call_from == "climsoft"){ | ||
# if you call from climsoft | ||
climsoft_info <- station_metadata(country = country, station_id = station_id)$climsoft_list | ||
if (is.null(get_climsoft_conn())) stop("Set climsoft connection with set_climsoft_conn() function.") | ||
station_data <- import_from_climsoft(con = get_climsoft_conn(), | ||
stations = station_id, | ||
include_station_info = FALSE, | ||
elementfiltercolumn = climsoft_info[[1]]$elementfiltercolumn, | ||
elements = climsoft_info[[1]]$elements) | ||
} else { | ||
# if you call from googlebuckets | ||
dfs <- vector("list", length(station_id)) | ||
names(dfs) <- station_id | ||
for (i in seq_along(station_id)) { | ||
f <- paste0(country, "/", "data", "/", station_id[i], ".rds") | ||
if (file.exists(f)) { | ||
dfs[[i]] <- readRDS(f) | ||
} else { | ||
f <- epicsadata::update_daily_data(country, station_id[i]) | ||
dfs[[i]] <- f#saveRDS(o, file = f) | ||
} | ||
} | ||
if (length(station_id) > 1) { | ||
station_data <- dplyr::bind_rows(dfs) | ||
} else station_data <- dfs[[1]] | ||
} | ||
return(station_data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pkg_env <- new.env(parent = emptyenv()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#' Import Data from Climsoft | ||
#' | ||
#' Connects to a Climsoft database and imports data based on the specified filters for stations and elements, with options to include observation flags and station information. | ||
#' | ||
#' @param con Connection object to the Climsoft database, default is the result of \code{get_climsoft_conn()}. | ||
#' @param stationfiltercolumn Name of the column to filter by stations, default is 'stationId'. | ||
#' @param stations Vector of station IDs to filter the data, defaults to an empty vector. | ||
#' @param elementfiltercolumn Name of the column to filter by elements, default is 'elementId'. | ||
#' @param elements Vector of element IDs to filter the data, defaults to an empty vector. | ||
#' @param include_observation_flags Boolean, if TRUE includes observation flags in the output, defaults to FALSE. | ||
#' @param include_station_info Boolean, if TRUE includes station metadata in the output, defaults to FALSE. | ||
#' @param start_date Start date for filtering the observations, format should be Date, defaults to NULL. | ||
#' @param end_date End date for filtering the observations, format should be Date, defaults to NULL. | ||
#' | ||
#' @return A list containing Climsoft station and observation data based on the filters applied. If `include_station_info` is TRUE, the list will have two elements: 'Metadata' with station details and 'Daily data' with observations. | ||
#' | ||
#' @examples | ||
#' con <- get_climsoft_conn() | ||
#' data <- import_from_climsoft(con, stations = c("101", "102"), elements = c("1", "2"), start_date = as.Date("2020-01-01"), end_date = as.Date("2020-01-31")) | ||
#' | ||
#' @export | ||
import_from_climsoft <- function(con = get_climsoft_conn(), | ||
stationfiltercolumn = "stationId", | ||
stations = c(), | ||
elementfiltercolumn = "elementId", | ||
elements = c(), | ||
include_observation_flags = FALSE, | ||
include_station_info = FALSE, | ||
start_date = NULL, | ||
end_date = NULL) { | ||
con <- con # get connection | ||
|
||
#get stations database data and station ids values | ||
if (length(stations) > 0) { | ||
#construct a string of station values from the passed station vector eg of result ('191','122') | ||
passed_station_values <- paste0("(", paste0("'", stations, "'", collapse = ", "), ")") | ||
|
||
#get the station info of the passed station values | ||
db_station_info <- DBI::dbGetQuery(con, paste0( "SELECT * FROM station WHERE ", stationfiltercolumn, " IN ", passed_station_values, ";")) | ||
|
||
#set values of station ids only | ||
if (stationfiltercolumn == "stationId") { | ||
station_ids_values <- passed_station_values | ||
} else{ | ||
station_ids_values <- paste0("(", paste0("'", db_station_info$stationId, "'", collapse = ", "),")") | ||
} | ||
} | ||
|
||
#if there are no elements passed then stop and throw error | ||
if (length(elements) < 1) stop("start_date must be of type Date.") | ||
|
||
#set values of element ids only | ||
if (elementfiltercolumn == "elementId") { | ||
#get element id values directly from passed data | ||
element_ids_values <- paste0("(", paste0(elements, collapse = ", "), ")") | ||
} else{ | ||
#get element id values from the database | ||
passed_element_values <- paste0("(", paste0("'", elements, "'", collapse = ", "), ")") | ||
db_elements_ids <- DBI::dbGetQuery( con, paste0("SELECT elementId FROM obselement WHERE ", elementfiltercolumn, " IN ", passed_element_values, ";" )) | ||
element_ids_values <- paste0("(", paste0(sprintf("%d", db_elements_ids$elementId), collapse = ", "), ")") | ||
} | ||
|
||
if(include_elements_info) { | ||
db_elements_info <- DBI::dbGetQuery(con, paste0("SELECT elementId, elementName, abbreviation, description, elementtype, upperLimit, lowerLimit, units FROM obselement WHERE elementId ", " IN ", element_ids_values, ";" )) | ||
} | ||
|
||
flags_column_col_sql <- " " | ||
if (include_observation_flags) { | ||
flags_column_col_sql <- ", observationfinal.flag AS flag" | ||
} | ||
|
||
#get databounds filter query if dates have been passed | ||
date_bounds_filter <- "" | ||
if (!is.null(start_date)) { | ||
if (!lubridate::is.Date(start_date)) | ||
stop("start_date must be of type Date.") | ||
start_date <- format(start_date, format = "%Y-%m-%d") | ||
date_bounds_filter = paste0(date_bounds_filter, " AND obsDatetime >= ", sQuote(start_date)) | ||
} | ||
if (!is.null(end_date)) { | ||
if (!lubridate::is.Date(end_date)) | ||
stop("end_date must be of type Date.") | ||
end_date <- format(end_date, format = "%Y-%m-%d") | ||
date_bounds_filter <- paste0(date_bounds_filter," AND obsDatetime <=", sQuote(end_date)) | ||
} | ||
|
||
#construct observation data sql query and get data from database | ||
if (length(stations) > 0) { | ||
#if stations passed get observation data of selected elements of passed stations | ||
db_observation_data <- DBI::dbGetQuery(con, paste0("SELECT observationfinal.recordedFrom As station, obselement.abbreviation AS element, observationfinal.obsDatetime AS datetime, observationfinal.obsValue AS obsvalue", flags_column_col_sql, " FROM observationfinal INNER JOIN obselement ON observationfinal.describedBy = obselement.elementId WHERE observationfinal.recordedFrom IN ", station_ids_values, " AND observationfinal.describedBy IN ", element_ids_values, date_bounds_filter, " ORDER BY observationfinal.recordedFrom, observationfinal.describedBy;")) | ||
} else{ | ||
#if stations have not been passed get observation data of passed elements of all stations | ||
db_observation_data <- DBI::dbGetQuery(con, paste0("SELECT observationfinal.recordedFrom As station, obselement.abbreviation AS element, observationfinal.obsDatetime AS datetime, observationfinal.obsValue AS obsvalue", flags_column_col_sql, " FROM observationfinal INNER JOIN obselement ON observationfinal.describedBy = obselement.elementId WHERE observationfinal.describedBy IN ", element_ids_values, date_bounds_filter, " ORDER BY observationfinal.recordedFrom, observationfinal.describedBy;")) | ||
|
||
#then get the stations ids (uniquely) from the observation data and use the ids to get station info | ||
station_ids_values <- paste0("(", paste0("'", as.character(unique(db_observation_data$station) ), "'", collapse = ", "), ")") | ||
db_station_info <- DBI::dbGetQuery(con, paste0("SELECT * FROM station WHERE stationId IN ", station_ids_values, ";" )) | ||
} | ||
|
||
if(unstack_data){ | ||
db_observation_data <- tidyr::pivot_wider(db_observation_data, | ||
names_from = element, | ||
values_from = obsvalue) | ||
} | ||
if (include_station_info) { | ||
data_list <- list(db_station_info, db_observation_data) | ||
names(data_list) <- c("Metadata", "Daily data") | ||
} else { | ||
data_list <- db_observation_data | ||
} | ||
return(data_list) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.