-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new function to visualise database content
- Loading branch information
Showing
4 changed files
with
287 additions
and
109 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,109 +1,110 @@ | ||
#' Archive the data from an areal database | ||
#' | ||
#' @param pattern [\code{character(1)}]\cr a regular expression used to filter | ||
#' files to load. | ||
#' @param variables [\code{character(.)}]\cr columns, typically observed | ||
#' variables, to select. | ||
#' @param compress [\code{logical(1)}]\cr whether or not the database should be | ||
#' compressed into a \emph{tar.gz} archive. Will delete the database folder in | ||
#' \code{outPath}. | ||
#' @param outPath [\code{character(1)}]\cr directory, where the archive should | ||
#' be stored. | ||
#' @details This function prepares and packages the data into an archiveable | ||
#' form. This contains geopacakge files for geometries and csv files for all | ||
#' tables, such as inventory, matching and thematic data tables. | ||
#' @return no return value, called for the side-effect of creating a database | ||
#' archive. | ||
#' @importFrom checkmate assertCharacter assertLogical assertDirectoryExists | ||
#' testDirectoryExists | ||
#' @importFrom purrr map | ||
#' @importFrom stringr str_split | ||
#' @importFrom readr write_csv | ||
#' @importFrom archive archive_write_dir | ||
#' @importFrom utils capture.output sessionInfo tar | ||
#' @export | ||
|
||
adb_archive <- function(pattern = NULL, variables = NULL, compress = FALSE, outPath = NULL){ | ||
|
||
assertCharacter(x = pattern, len = 1, null.ok = TRUE) | ||
assertCharacter(x = variables, any.missing = FALSE, null.ok = TRUE) | ||
assertLogical(x = compress, len = 1, any.missing = FALSE) | ||
assertDirectoryExists(x = outPath, access = "rw") | ||
|
||
# set internal paths | ||
intPaths <- paste0(getOption(x = "adb_path")) | ||
|
||
# derive current version | ||
load(paste0(intPaths, "/db_info.RData")) | ||
version <- paste0(db_info$version, "_", format(Sys.Date(), "%Y%m%d")) | ||
|
||
# derive path | ||
archivePath <- paste0(outPath, "arealDB_", version, "/") | ||
message("\n-> creating archive '", archivePath, "'") | ||
dir.create(path = archivePath) | ||
dir.create(path = paste0(archivePath, "geometries")) | ||
dir.create(path = paste0(archivePath, "tables")) | ||
dir.create(path = paste0(archivePath, "meta")) | ||
|
||
message("-> archiving tables") | ||
stage3tables_full <- list.files(path = paste0(intPaths, "/tables/stage3"), full.names = TRUE) | ||
if(length(stage3tables_full) != 0){ | ||
|
||
stage3tables <- map(.x = list.files(path = paste0(intPaths, "/tables/stage3")), .f = function(ix){ | ||
temp <- str_split(ix, "[.]")[[1]][1] | ||
}) |> | ||
unlist() | ||
|
||
pb <- progress_bar$new(format = "[:bar] :current/:total (:percent)", total = length(stage3tables)) | ||
for(i in seq_along(stage3tables)){ | ||
temp <- readRDS(file = stage3tables_full[i]) | ||
write_csv(x = temp, file = paste0(archivePath, "tables/", stage3tables[i], ".csv"), na = "") | ||
pb$tick() | ||
} | ||
|
||
} | ||
|
||
message("-> archiving geometries") | ||
stage3geometries_full <- list.files(path = paste0(intPaths, "/geometries/stage3"), full.names = TRUE) | ||
if(length(stage3geometries_full) != 0){ | ||
|
||
stage3geometries <- list.files(path = paste0(intPaths, "/geometries/stage3")) | ||
file.copy(from = stage3geometries_full, | ||
to = paste0(archivePath, "geometries/", stage3geometries), | ||
overwrite = TRUE) | ||
|
||
} | ||
|
||
message("-> archiving inventory tables") | ||
adb_inventory(type = "dataseries") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_dataseries.csv"), na = "") | ||
adb_inventory(type = "geometries") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_geometries.csv"), na = "") | ||
adb_inventory(type = "tables") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_tables.csv"), na = "") | ||
adb_ontology(type = "ontology") |> | ||
write_csv(file = paste0(archivePath, "meta/ontology.csv"), na = "") | ||
adb_ontology(type = "gazetteer") |> | ||
write_csv(file = paste0(archivePath, "meta/gazetteer.csv"), na = "") | ||
|
||
message("-> archiving metadata") | ||
sI <- sessionInfo() | ||
save(sI, file = paste0(archivePath, "R_sessionInfo.RData")) | ||
write_lines(x = capture.output(sI), file = paste0(archivePath, "R_sessionInfo.txt")) | ||
save(db_info, file = paste0(archivePath, "dbInfo.RData")) | ||
db_desc_lines <- paste0("version:\n", db_info$version, "\n\n", | ||
"authors:\n", paste0("creator: ", paste0(db_info$author$cre, collapse = ", "), "\nauthor: ", paste0(db_info$author$aut, collapse = ", "), "\ncontributor: ", paste0(db_info$author$ctb, collapse = ", ")), "\n\n", | ||
"licence:\n", db_info$licence, "\n\n", | ||
"gazetteer:\n", db_info$gazetteer, "\n\n", # these two should presumably be replaced with a version label as well | ||
"ontology:\n", unique(db_info$ontology), "\n\n", # these two should presumably be replaced with a version label as well | ||
"variables:\n", paste0(db_info$variables, collapse = ", "), "\n\n") | ||
write_lines(x = db_desc_lines, file = paste0(archivePath, "dbInfo.txt")) | ||
|
||
if(compress){ | ||
message("-> compressing database archive") | ||
archive_write_dir(archive = paste0(outPath, "arealDB_", version, ".7z"), | ||
dir = archivePath, format = "7zip") | ||
unlink(archivePath, recursive = TRUE) | ||
} | ||
|
||
} | ||
#' Archive the data from an areal database | ||
#' | ||
#' @param pattern [\code{character(1)}]\cr a regular expression used to filter | ||
#' files to load. | ||
#' @param variables [\code{character(.)}]\cr columns, typically observed | ||
#' variables, to select. | ||
#' @param compress [\code{logical(1)}]\cr whether or not the database should be | ||
#' compressed into a \emph{tar.gz} archive. Will delete the database folder in | ||
#' \code{outPath}. | ||
#' @param outPath [\code{character(1)}]\cr directory, where the archive should | ||
#' be stored. | ||
#' @details This function prepares and packages the data into an archiveable | ||
#' form. This contains geopacakge files for geometries and csv files for all | ||
#' tables, such as inventory, matching and thematic data tables. | ||
#' @return no return value, called for the side-effect of creating a database | ||
#' archive. | ||
#' @importFrom checkmate assertCharacter assertLogical assertDirectoryExists | ||
#' testDirectoryExists | ||
#' @importFrom purrr map | ||
#' @importFrom stringr str_split | ||
#' @importFrom readr write_csv | ||
#' @importFrom archive archive_write_dir | ||
#' @importFrom utils capture.output sessionInfo tar | ||
#' @export | ||
|
||
adb_archive <- function(pattern = NULL, variables = NULL, compress = FALSE, | ||
outPath = NULL){ | ||
|
||
assertCharacter(x = pattern, len = 1, null.ok = TRUE) | ||
assertCharacter(x = variables, any.missing = FALSE, null.ok = TRUE) | ||
assertLogical(x = compress, len = 1, any.missing = FALSE) | ||
assertDirectoryExists(x = outPath, access = "rw") | ||
|
||
# set internal paths | ||
intPaths <- paste0(getOption(x = "adb_path")) | ||
|
||
# derive current version | ||
load(paste0(intPaths, "/db_info.RData")) | ||
version <- paste0(db_info$version, "_", format(Sys.Date(), "%Y%m%d")) | ||
|
||
# derive path | ||
archivePath <- paste0(outPath, "arealDB_", version, "/") | ||
message("\n-> creating archive '", archivePath, "'") | ||
dir.create(path = archivePath) | ||
dir.create(path = paste0(archivePath, "geometries")) | ||
dir.create(path = paste0(archivePath, "tables")) | ||
dir.create(path = paste0(archivePath, "meta")) | ||
|
||
message("-> archiving tables") | ||
stage3tables_full <- list.files(path = paste0(intPaths, "/tables/stage3"), full.names = TRUE) | ||
if(length(stage3tables_full) != 0){ | ||
|
||
stage3tables <- map(.x = list.files(path = paste0(intPaths, "/tables/stage3")), .f = function(ix){ | ||
temp <- str_split(ix, "[.]")[[1]][1] | ||
}) |> | ||
unlist() | ||
|
||
pb <- progress_bar$new(format = "[:bar] :current/:total (:percent)", total = length(stage3tables)) | ||
for(i in seq_along(stage3tables)){ | ||
temp <- readRDS(file = stage3tables_full[i]) | ||
write_csv(x = temp, file = paste0(archivePath, "tables/", stage3tables[i], ".csv"), na = "") | ||
pb$tick() | ||
} | ||
|
||
} | ||
|
||
message("-> archiving geometries") | ||
stage3geometries_full <- list.files(path = paste0(intPaths, "/geometries/stage3"), full.names = TRUE) | ||
if(length(stage3geometries_full) != 0){ | ||
|
||
stage3geometries <- list.files(path = paste0(intPaths, "/geometries/stage3")) | ||
file.copy(from = stage3geometries_full, | ||
to = paste0(archivePath, "geometries/", stage3geometries), | ||
overwrite = TRUE) | ||
|
||
} | ||
|
||
message("-> archiving inventory tables") | ||
adb_inventory(type = "dataseries") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_dataseries.csv"), na = "") | ||
adb_inventory(type = "geometries") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_geometries.csv"), na = "") | ||
adb_inventory(type = "tables") |> | ||
write_csv(file = paste0(archivePath, "meta/inv_tables.csv"), na = "") | ||
adb_ontology(type = "ontology") |> | ||
write_csv(file = paste0(archivePath, "meta/ontology.csv"), na = "") | ||
adb_ontology(type = "gazetteer") |> | ||
write_csv(file = paste0(archivePath, "meta/gazetteer.csv"), na = "") | ||
|
||
message("-> archiving metadata") | ||
sI <- sessionInfo() | ||
save(sI, file = paste0(archivePath, "R_sessionInfo.RData")) | ||
write_lines(x = capture.output(sI), file = paste0(archivePath, "R_sessionInfo.txt")) | ||
save(db_info, file = paste0(archivePath, "dbInfo.RData")) | ||
db_desc_lines <- paste0("version:\n", db_info$version, "\n\n", | ||
"authors:\n", paste0("creator: ", paste0(db_info$author$cre, collapse = ", "), "\nauthor: ", paste0(db_info$author$aut, collapse = ", "), "\ncontributor: ", paste0(db_info$author$ctb, collapse = ", ")), "\n\n", | ||
"licence:\n", db_info$licence, "\n\n", | ||
"gazetteer:\n", db_info$gazetteer, "\n\n", # these two should presumably be replaced with a version label as well | ||
"ontology:\n", unique(db_info$ontology), "\n\n", # these two should presumably be replaced with a version label as well | ||
"variables:\n", paste0(db_info$variables, collapse = ", "), "\n\n") | ||
write_lines(x = db_desc_lines, file = paste0(archivePath, "dbInfo.txt")) | ||
|
||
if(compress){ | ||
message("-> compressing database archive") | ||
archive_write_dir(archive = paste0(outPath, "arealDB_", version, ".7z"), | ||
dir = archivePath, format = "7zip") | ||
unlink(archivePath, recursive = TRUE) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#' Visualise database contents | ||
#' | ||
#' @param ... combination of column name in the ontology and value to filter | ||
#' that column by to build a tree of the concepts nested into it; see | ||
#' \code{\link[ontologics]{make_tree}}. | ||
#' @param territory description | ||
#' @param level description | ||
#' @param year description | ||
#' @param diagnose description | ||
#' @return returns ... | ||
#' @importFrom dplyr mutate select | ||
#' @importFrom sf st_read | ||
#' @export | ||
|
||
adb_visualise <- function(..., territory = NULL, level = NULL, year = NULL, | ||
diagnose = FALSE){ | ||
|
||
# territory <- list(al1 = "Brazil"); level = "al3"; year = 2010; diagnose = FALSE | ||
|
||
assertList(x = territory, types = "character", any.missing = FALSE, null.ok = TRUE) | ||
assertCharacter(x = level, len = 1, any.missing = FALSE, null.ok = TRUE) | ||
assertIntegerish(x = year, min.len = 1, any.missing = FALSE, null.ok = TRUE) | ||
assertLogical(x = diagnose, len = 1, all.missing = FALSE) | ||
|
||
adb_path <- getOption(x = "adb_path") | ||
inv <- readRDS(paste0(adb_path, "/_meta/inventory.rds")) | ||
|
||
gazPath <- paste0(getOption(x = "gazetteer_path")) | ||
gazClasses <- get_class(ontology = gazPath) | ||
topClass <- paste0(getOption(x = "gazetteer_top")) | ||
|
||
inv_tables <- inv$tables | ||
inv_geoms <- inv$geometries | ||
inv_series <- inv$dataseries | ||
|
||
tables <- list.files(path = paste0(adb_path, "/tables/stage3"), full.names = TRUE) | ||
geometries <- list.files(path = paste0(adb_path, "/geometries/stage3"), full.names = TRUE) | ||
|
||
# first, get all items down until the "topClass" | ||
allItems <- make_tree(class = topClass, reverse = TRUE, ontology = gazPath) | ||
top <- get_concept(class = names(territory), label = territory[[1]], ontology = gazPath) %>% | ||
pull(id) | ||
|
||
# then select those that are determined by "broadest" | ||
fin <- NULL | ||
outIDs <- top | ||
while(is.null(fin)){ | ||
childID <- allItems %>% | ||
filter(has_broader %in% top) %>% | ||
pull(id) | ||
if(length(childID) != 0){ | ||
top <- childID | ||
outIDs <- c(outIDs, childID) | ||
} else { | ||
fin <- TRUE | ||
} | ||
} | ||
|
||
tableNames <- allItems |> | ||
filter(id %in% outIDs & class == "al1") | ||
|
||
for(i in seq_along(tableNames$label)){ | ||
|
||
nation <- tableNames$label[i] | ||
|
||
table <- readRDS(file = tables[str_detect(string = tables, pattern = paste0(nation, ".rds"))]) | ||
|
||
# build metadata | ||
tabIDs <- table |> | ||
distinct(tabID) | ||
|
||
temp_inv_tables <- inv_tables |> | ||
filter(tabID %in% tabIDs$tabID) | ||
|
||
if(is.null(level)){ | ||
tempLvls <- temp_inv_tables |> | ||
distinct(level) |> | ||
pull() | ||
} else { | ||
assertSubset(x = level, choices = unique(temp_inv_tables$level)) | ||
tempLvls <- level | ||
} | ||
|
||
geometry <- st_read(dsn = geometries[str_detect(string = geometries, pattern = paste0(nation, ".gpkg"))], | ||
layer = max(tempLvls)) | ||
|
||
diag <- list() | ||
if(diagnose){ | ||
|
||
# visually diagnose some missing data | ||
diag$gazID <- table |> | ||
filter(is.na(gazID)) |> | ||
distinct(gazMatch) | ||
diag$ontoID <- table |> | ||
filter(is.na(ontoID)) |> | ||
distinct(ontoMatch) | ||
|
||
} else { | ||
diag <- NULL | ||
} | ||
|
||
meta <- table |> | ||
group_by(gazID, geoID) |> | ||
summarise(sources = n_distinct(tabID), | ||
animals = n_distinct(animal), | ||
years = n_distinct(year), | ||
min_year = min(year), | ||
max_year = max(year)) |> | ||
ungroup() | ||
|
||
# summarise variables | ||
var <- table |> | ||
filter(!is.na(gazID)) |> | ||
filter(year == taregetYear) |> | ||
filter(str_detect(ontoMatch, "close")) |> | ||
pivot_wider(id_cols = c(gazID), names_from = animal, values_from = number_heads, values_fn = mean) | ||
|
||
# put it together | ||
full <- geometry |> | ||
filter(geoID %in% temp_inv_tables$geoID) |> | ||
left_join(meta, by = c("gazID", "geoID")) |> | ||
left_join(var, by = "gazID") | ||
|
||
thisGeoID <- full |> | ||
as_tibble() |> | ||
distinct(geoID) | ||
|
||
temp_inv_geoms <- inv_geoms |> | ||
filter(geoID %in% thisGeoID$geoID) | ||
temp_inv_series <- inv_series |> | ||
filter(datID %in% temp_inv_geoms$datID) | ||
|
||
|
||
|
||
|
||
} | ||
|
||
|
||
|
||
return(diag) | ||
|
||
} |
Oops, something went wrong.