Merge pull request #95 from b-cubed-eu/harmonise-documentation

Harmonise documentation
b-cubed-eu · Sep 10, 2024 · 2314eff · 2314eff
2 parents acb3a51 + cd906f8
commit 2314eff
Show file tree

Hide file tree

Showing 74 changed files with 1,375 additions and 1,450 deletions.
diff --git a/.zenodo.json b/.zenodo.json
@@ -1,6 +1,6 @@
 {
   "title": "gcube: Simulating Biodiversity Data Cubes",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "license": "MIT",
   "upload_type": "software",
   "description": "<p>Simulation framework for biodiversity data cubes.<\/p>",

diff --git a/CITATION.cff b/CITATION.cff
@@ -25,4 +25,4 @@ abstract: "Simulation framework for biodiversity data cubes."
 identifiers:
 - type: url
   value: https://b-cubed-eu.github.io/gcube/
-version: 0.3.0
+version: 0.4.0
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gcube
 Title: Simulating Biodiversity Data Cubes
-Version: 0.3.0
+Version: 0.4.0
 Authors@R: c(
     person("Ward", "Langeraert", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-5900-8109", affiliation = "Research Institute for Nature and Forest (INBO)")),
@@ -50,8 +50,7 @@ Imports:
     stats,
     terra,
     tidyr,
-    vegan,
-    withr
+    vegan
 Suggests: 
     ggExtra,
     ggplot2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -27,6 +27,7 @@ importFrom(gstat,gstat)
 importFrom(gstat,vgm)
 importFrom(methods,formalArgs)
 importFrom(mnormt,rmnorm)
+importFrom(purrr,map)
 importFrom(purrr,pmap)
 importFrom(purrr,quietly)
 importFrom(rlang,.data)
@@ -39,8 +40,8 @@ importFrom(stats,setNames)
 importFrom(terra,global)
 importFrom(terra,rast)
 importFrom(terra,rasterize)
+importFrom(terra,res)
 importFrom(terra,spatSample)
 importFrom(terra,vect)
 importFrom(tidyr,unnest)
 importFrom(vegan,decostand)
-importFrom(withr,local_seed)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# gcube 0.4.0
+
+*	Consolidate documentation across all functions, README, and vignettes.
+* Update `sample_occurrences_from_raster()`
+  - Use `lapply()` instead of for-loop
+  - Randomise points in raster cells.
+* Fix issues (#37, #70, #76).
+
 # gcube 0.3.0
 
 *	`generate_taxonomy()` also creates species key.

diff --git a/R/add_coordinate_uncertainty.R b/R/add_coordinate_uncertainty.R
@@ -1,15 +1,20 @@
 #' Add coordinate uncertainty to observations
 #'
-#' Adds a column to the observations sf object with the coordinate uncertainty
-#' in meters.
+#' This function adds a column to the input dataframe or sf object containing
+#' the coordinate uncertainty for each observation, measured in meters.
 #'
-#'
-#' @param observations An sf object with POINT geometry.
+#' @param observations An sf object with POINT geometry or a simple
+#' dataframe representing the observations. This object contains the observation
+#' points to which the coordinate uncertainty will be added.
 #' @param coords_uncertainty_meters A numeric value or a vector of numeric
-#' values indicating the coordinate uncertainty associated with observations.
+#' values representing the coordinate uncertainty (in meters) associated with
+#' each observation. If a single numeric value is provided, it will be applied
+#' to all observations. If a numeric vector is provided, it must be the same
+#' length as the number of observations.
 #'
-#' @return An sf object with POINT geometry with an additional column
-#' `coordinateUncertaintyInMeters`.
+#' @returns The input data frame or an sf object with POINT geometry, with an
+#' additional column named `coordinateUncertaintyInMeters` that contains the
+#' coordinate uncertainty values in meters.
 #'
 #' @export
 #'
@@ -19,31 +24,24 @@
 #' @family main
 #'
 #' @examples
+#' # Create dataframe with sampling status column
+#' observations_data <- data.frame(
+#'     time_point = 1,
+#'     sampling_prob = seq(0.5, 1, 0.1)
+#'   )
 #'
-#' library(sf)
-#' library(dplyr)
-#'
-#' set.seed(123)
-#'
-#' # Create four random points
-#' n_points <- 4
-#' xlim <- c(3841000, 3842000)
-#' ylim <- c(3110000, 3112000)
-#' observations_sf <- data.frame(
-#'   lat = runif(n_points, ylim[1], ylim[2]),
-#'     long = runif(n_points, xlim[1], xlim[2])) %>%
-#'     st_as_sf(coords = c("long", "lat"), crs = 3035)
-#'
-#'  # provide a fixed uncertainty for all points
-#'  add_coordinate_uncertainty(
-#'    observations_sf,
-#'    coords_uncertainty_meters = 1000
-#'    )
+#' # provide a fixed uncertainty for all points
+#' add_coordinate_uncertainty(
+#'   observations_data,
+#'   coords_uncertainty_meters = 1000
+#'  )
 #'
 #' # add variability in uncertainty. For example, using gamma distribution
+#' uncertainty_vec <- seq(50, 100, 10)
+#'
 #' add_coordinate_uncertainty(
-#'   observations_sf,
-#'   coords_uncertainty_meters = rgamma(n_points, shape = 5, rate = 0.1)
+#'   observations_data,
+#'   coords_uncertainty_meters = uncertainty_vec
 #' )
 
 add_coordinate_uncertainty <- function(
@@ -52,10 +50,9 @@ add_coordinate_uncertainty <- function(
   ### Start checks
   # 1. Check input type and length
   # Check if observations is an sf object
-  stopifnot("`observations` must be an sf object with POINT geometry." =
-              inherits(observations, "sf") &&
-              sf::st_geometry_type(observations,
-                                   by_geometry = FALSE) == "POINT")
+  stopifnot("`observations` must be an sf object or a dataframe." =
+              inherits(observations, "sf") ||
+              inherits(observations, "data.frame"))
 
   # Check if coords_uncertainty_meters is numeric
   stopifnot("`coords_uncertainty_meters` must be  numeric vector." =

diff --git a/R/apply_manual_sampling_bias.R b/R/apply_manual_sampling_bias.R
@@ -1,18 +1,19 @@
-#' Generate a sampling bias via a grid
+#' Apply manual sampling bias to occurrences via a grid
 #'
-#' The function adds a sampling bias weight column containing the sample
-#' probability based on bias weights within each cell of a given grid layer.
+#' This function adds a sampling bias weight column to an sf object containing
+#' occurrences. The sampling probabilities are based on bias weights within each
+#' cell of a provided grid layer.
 #'
-#' @param occurrences_sf An sf object with POINT geometry.
-#' @param bias_weights A raster layer (sf object with POLYGON geometry). The
-#' raster of bias weights to be applied to the sampling of occurrences. This sf
-#' object should contain a `bias_weight` and `geometry` column. Higher weights
-#' indicate a higher probability of sampling. Weights must be numeric values
-#' between 0 and 1 OR positive integers that will be rescaled to values between
-#' 0 and 1.
+#' @param occurrences_sf An sf object with POINT geometry representing the
+#' occurrences.
+#' @param bias_weights An `sf` object with POLYGON geometry representing the
+#' grid with bias weights. This sf object should contain a `bias_weight` column
+#' and a `geometry` column. Higher weights indicate a higher probability of
+#' sampling. Weights must be numeric values between 0 and 1 or positive
+#' integers, which will be rescaled to values between 0 and 1.
 #'
-#' @returns An sf object with POINT geometry with a bias_weight column
-#' containing the sampling probability based on sampling bias.
+#' @returns An sf object with POINT geometry that includes a `bias_weight`
+#' column containing the sampling probabilities based on the sampling bias.
 #'
 #' @export
 #'
@@ -27,43 +28,31 @@
 #' library(dplyr)
 #' library(ggplot2)
 #'
-#' # Set seed for reproducibility
-#' set.seed(123)
+#' # Create polygon
+#' plgn <- st_polygon(list(cbind(c(5, 10, 8, 2, 3, 5), c(2, 1, 7, 9, 5, 2))))
 #'
-#' # Simulate some occurrence data with coordinates and time points
-#' num_points <- 10
-#' occurrences <- data.frame(
-#'   lon = runif(num_points, min = -180, max = 180),
-#'   lat = runif(num_points, min = -90, max = 90),
-#'   time_point = 0
-#' )
+#' # Get occurrence points
+#' occurrences_sf <- simulate_occurrences(plgn)
 #'
-#' # Convert the occurrence data to an sf object
-#' occurrences_sf <- st_as_sf(occurrences, coords = c("lon", "lat"))
-#'
-#' # Create raster grid
-#' grid <- st_make_grid(occurrences_sf) %>%
+#' # Create grid with bias weights
+#' grid <- st_make_grid(
+#'     plgn,
+#'     n = c(10, 10),
+#'     square = TRUE) %>%
 #'   st_sf()
+#' grid$bias_weight <- runif(nrow(grid), min = 0, max = 1)
 #'
-#' # Bias weights between 0 and 1
-#' grid1 <- grid %>%
-#'   mutate(bias_weight = runif(nrow(grid), min = 0, max = 1))
-#'
-#' apply_manual_sampling_bias(occurrences_sf, grid1)
-#'
-#' # Bias weights larger than 1
-#' grid2 <- grid %>%
-#'   mutate(bias_weight = rpois(nrow(grid), 5))
-#'
-#' occurrence_bias_sf <- apply_manual_sampling_bias(occurrences_sf, grid2)
-#' occurrence_bias_sf
+#' # Calculate occurrence bias
+#' occurrence_bias <- apply_manual_sampling_bias(occurrences_sf, grid)
+#' occurrence_bias
 #'
 #' # Visualise where the bias is
 #' ggplot() +
-#'  geom_sf(data = grid2) +
-#'  geom_sf_text(data = grid2, aes(label = bias_weight)) +
-#'  geom_sf(data = occurrence_bias_sf, aes(colour = bias_weight)) +
-#'  scale_color_gradient(trans = "reverse")
+#'   geom_sf(data = plgn) +
+#'   geom_sf(data = grid, alpha = 0) +
+#'   geom_sf(data = occurrence_bias, aes(colour = bias_weight)) +
+#'   geom_sf_text(data = grid, aes(label = round(bias_weight, 2))) +
+#'   theme_minimal()
 
 apply_manual_sampling_bias <- function(occurrences_sf, bias_weights) {
   ### Start checks

diff --git a/R/apply_polygon_sampling_bias.R b/R/apply_polygon_sampling_bias.R
@@ -1,20 +1,25 @@
-#' Generate a sampling bias via a polygon
+#' Apply sampling bias to occurrences via a polygon
 #'
-#' The function adds a sampling bias weight column containing the sample
-#' probability based on bias strength within a given polygon.
+#' This function adds a sampling bias weight column to an `sf` object containing
+#' occurrences based on a given polygonal area. The bias is determined by the
+#' specified bias strength, which adjusts the probability of sampling within
+#' the polygonal area.
 #'
-#' @param occurrences_sf An sf object with POINT geometry.
-#' @param bias_area An sf object with POLYGON geometry. The area in which the
+#' @param occurrences_sf An sf object with POINT geometry representing the
+#' occurrences.
+#' @param bias_area An sf object with POLYGON geometry specifying the area where
 #' sampling will be biased.
-#' @param bias_strength A positive numeric value. The strength of the bias to
-#' be applied in the biased area (as a multiplier). Above 1, area will be
-#' oversampled. Below 1, area will be undersampled. For example, a value of 50
-#' will result in 50 times more samples within the bias_area than outside.
-#' Conversely, a value of 0.5 will result in half less samples within the
-#' bias_area than outside.
+#' @param bias_strength A positive numeric value that represents the strength of
+#' the bias to be applied within the `bias_area`. Values greater than 1 will
+#' increase the sampling probability within the polygon relative to outside
+#' (oversampling), while values between 0 and 1 will decrease it
+#' (undersampling). For instance, a value of 50 will make the probability 50
+#' times higher within the `bias_area` compared to outside, whereas a value of
+#' 0.5 will make it half as likely.
 #'
-#' @returns An sf object with POINT geometry with a bias_weight column
-#' containing the sampling probability based on sampling bias.
+#' @returns An sf object with POINT geometry that includes a `bias_weight`
+#' column containing the sampling probabilities based on the bias area and
+#' strength.
 #'
 #' @export
 #'
@@ -30,15 +35,12 @@
 #' library(dplyr)
 #' library(ggplot2)
 #'
-#' # Set seed for reproducibility
-#' set.seed(123)
-#'
 #' # Simulate some occurrence data with coordinates and time points
 #' num_points <- 10
 #' occurrences <- data.frame(
 #'   lon = runif(num_points, min = -180, max = 180),
 #'   lat = runif(num_points, min = -90, max = 90),
-#'   time_point = 0
+#'   time_point = 1
 #'   )
 #'
 #' # Convert the occurrence data to an sf object
@@ -47,7 +49,7 @@
 #' # Create bias_area polygon overlapping at least two of the points
 #' selected_observations <- st_union(occurrences_sf[2:3,])
 #' bias_area <- st_convex_hull(selected_observations) %>%
-#'   st_buffer(dist = 100) %>%
+#'   st_buffer(dist = 50) %>%
 #'   st_as_sf()
 #'
 #' occurrence_bias_sf <- apply_polygon_sampling_bias(
@@ -58,11 +60,11 @@
 #'
 #' # Visualise where the bias is
 #' occurrence_bias_sf %>%
-#'   mutate(bias_weight_f = as.factor(round(bias_weight, 3))) %>%
+#'   mutate(bias_weight = as.factor(round(bias_weight, 3))) %>%
 #'   ggplot() +
 #'     geom_sf(data = bias_area) +
-#'     geom_sf(aes(colour = bias_weight_f)) +
-#'     ggtitle("Sampling Bias via Polygon")
+#'     geom_sf(aes(colour = bias_weight)) +
+#'     theme_minimal()
 
 apply_polygon_sampling_bias <- function(
     occurrences_sf,