Skip to content

Commit

Permalink
Merge pull request #90 from dieghernan/newgroups
Browse files Browse the repository at this point in the history
Re-think group handling
  • Loading branch information
dieghernan authored Mar 15, 2023
2 parents 0f18f67 + e2a8392 commit 3c76cfd
Show file tree
Hide file tree
Showing 38 changed files with 388 additions and 176 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ export(group_data)
export(group_indices)
export(group_keys)
export(group_rows)
export(group_rows.SpatVector)
export(group_size)
export(group_vars)
export(groups)
Expand Down
5 changes: 1 addition & 4 deletions R/as_spatvector.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,7 @@ as_spatvector.data.frame <- function(x, ..., geom = c("lon", "lat"), crs = "") {

# Make groups
if (dplyr::is_grouped_df(x)) {
vars <- dplyr::group_vars(x)

# Add groups metadata
attr(v, "group_vars") <- vars
v <- group_prepare_spat(v, x)
}

return(v)
Expand Down
66 changes: 60 additions & 6 deletions R/as_tibble-Spat.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,16 @@ as_tibble.SpatVector <- function(x, ..., geom = NULL, .name_repair = "unique") {

# Grouped
if (is_grouped_spatvector(x)) {
vars <- group_vars(x)
# Add class
class(df) <- c("grouped_df", class(df))
attr(df, "groups") <- attr(x, "groups")

df <- dplyr::group_by(df, across_all_of(vars))
# Validate
dplyr::validate_grouped_df(df)
}

df <- check_regroups(df)

# Set attributes if present
if (!is.na(pull_crs(x))) {
attr(df, "crs") <- pull_crs(x)
Expand Down Expand Up @@ -187,16 +192,21 @@ as_tbl_vector_internal <- function(x) {

todf <- as.data.frame(x, geom = "WKT")
todf[is.na(todf)] <- NA
todf <- tibble::as_tibble(todf)

# Grouped
if (is_grouped_spatvector(x)) {
vars <- group_vars(x)
# Add class
class(todf) <- c("grouped_df", class(todf))
attr(todf, "groups") <- attr(x, "groups")

todf <- dplyr::group_by(todf, across_all_of(vars))
} else {
todf <- dplyr::as_tibble(todf)
# Validate
dplyr::validate_grouped_df(todf)
}

todf <- check_regroups(todf)


# Set attributes
attr(todf, "source") <- "SpatVector"
attr(todf, "crs") <- terra::crs(x)
Expand Down Expand Up @@ -268,3 +278,47 @@ make_safe_names <- function(x, geom = NULL, messages = TRUE) {

#' @export
tibble::as_tibble


#' Validate construction of groups. This is needed since that mixing terra
#' syntax with tidy syntax can modify group data (i.e. remove columns, change
#' number of rows) that won't be captured by tidyterra
#'
#' @noRd
check_regroups <- function(x) {
if (!dplyr::is_grouped_df(x)) {
return(x)
}

gvars <- dplyr::group_vars(x)
val_vars <- gvars %in% names(x)
all_vars <- all(val_vars)
any_var <- any(val_vars)

if (isFALSE(any_var)) {
cli::cli_alert_warning(paste(
"`group_vars()` missing on data.",
" Have you mixed terra and tidyterra syntax?"
))
cli::cli_bullets(c(i = "ungrouping data"))
return(dplyr::ungroup(x))
}

if (isFALSE(all_vars)) {
regroup_vars <- gvars[val_vars]

ung <- dplyr::ungroup(x)
return(dplyr::group_by(ung, across_all_of(regroup_vars)))
}

# Check rows have been kept
dif_rows <- all(sum(group_size(x)) == nrow(x))

if (isFALSE(dif_rows)) {
regroup_vars <- gvars[val_vars]
ung <- dplyr::ungroup(x)
return(dplyr::group_by(ung, across_all_of(regroup_vars)))
}

return(x)
}
77 changes: 43 additions & 34 deletions R/bind-rows-SpatVector.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ bind_spat_rows <- function(..., .id = NULL) {
# If is a list unlist the first level
dots <- dots[[1]]
}

named_list <- as.character(seq_len(length(dots)))

# Named lists
Expand All @@ -106,7 +107,6 @@ bind_spat_rows <- function(..., .id = NULL) {
}
}


# Checks
# Ensure first is SpatVector
if (!inherits(dots[[1]], "SpatVector")) {
Expand All @@ -120,13 +120,44 @@ bind_spat_rows <- function(..., .id = NULL) {
# Get templates
template <- dots[[1]]

# First get all as tibbles
alltibbs <- lapply(seq_len(length(dots)), function(i) {
x <- dots[[i]]

# First is always a SpatVector
if (i == 1) {
frst <- as_tibble(x)

# Case when first is only geometry, need to add a mock var
if (nrow(frst) == 0) {
frst <- tibble::tibble(first_empty = seq_len(nrow(x)))
}

return(frst)
}

# Rest of cases

if (inherits(x, "SpatVector")) {
return(as_tibble(x))
}


if (inherits(x, "sf")) {
return(sf::st_drop_geometry(x))
}

return(x)
})

# Now get all geoms
# Ensure all are SpatVectors and add ids if required
allspatvect <- lapply(seq_len(length(dots)), function(i) {
x <- dots[[i]]

if (inherits(x, c("SpatVector", "sf", "sfc"))) {
x <- crs_compare(x, template, i)
return(x)
return(x[, 0])
}

# If tibble convert (internally) to SpatVector
Expand Down Expand Up @@ -154,44 +185,22 @@ bind_spat_rows <- function(..., .id = NULL) {
attr(x, "crs") <- terra::crs(template)
attr(x, "geomtype") <- terra::geomtype(template)

as_spat_internal(x)
as_spat_internal(x)[, 0]
})
vend <- do.call("rbind", allspatvect)

# Adjust NAs
df <- as_tibble(vend)
df[is.na(df)] <- NA

vend <- cbind(vend[, 0], df)

# Regen groups
vend <- group_prepare_spat(vend, template)

# If id not requested we are done
if (is.null(.id)) {
return(vend)
}

# Need to add a variable with id

# Create vector of indexes identifying source of each row
rows_vect <- unlist(lapply(allspatvect, nrow))
theindex <- unlist(lapply(seq_len(length(rows_vect)), function(x) {
rep(x, rows_vect[x])
}))

keep_names <- names(df)

df[[.id]] <- named_list[theindex]
# Get geoms
vend <- do.call("rbind", allspatvect)

# Rearrange if the id var has been added
if (!.id %in% keep_names) {
df <- df[, c(.id, keep_names)]
# Get binded rows
if (length(named_list) == length(alltibbs)) {
names(alltibbs) <- named_list
}

vend <- cbind(vend[, 0], df)
binded <- dplyr::bind_rows(alltibbs, .id = .id)
vend <- cbind(vend[, 0], binded)

vend <- group_prepare_spat(vend, template)
# Regen groups
vend <- group_prepare_spat(vend, binded)

vend
}
Expand Down
73 changes: 48 additions & 25 deletions R/count-tally-SpatVector.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
count.SpatVector <- function(x, ..., wt = NULL, sort = FALSE, name = NULL,
.drop = group_by_drop_default(x),
.dissolve = TRUE) {
# Maybe regroup
if (!missing(...)) {
out <- group_by(x, ..., .add = TRUE, .drop = .drop)
} else {
Expand All @@ -84,20 +85,31 @@ count.SpatVector <- function(x, ..., wt = NULL, sort = FALSE, name = NULL,

vend <- tally(out, sort = sort, name = name)

# Prepare a template for groups
template <- dplyr::count(as_tibble(x), ...,
sort = sort, name = name,
.drop = .drop
)


# Dissolve if requested
if (.dissolve) {
keepdf <- as_tibble(vend)

var_index <- make_safe_index("tterra_index", keepdf)
df <- data.frame(n = seq_len(nrow(keepdf)))
names(df) <- var_index
vend <- cbind(vend[, 0], df)
vend[[var_index]] <- seq_len(nrow(keepdf))
vend <- terra::aggregate(vend, by = var_index, dissolve = TRUE)
vend <- cbind(vend[, 0], keepdf)
}

# Ensure groups
vend <- group_prepare_spat(vend, x)
vend <- ungroup(vend)

# Re-group based on the template
if (dplyr::is_grouped_df(template)) {
gvars <- dplyr::group_vars(template)
vend <- group_by(vend, across_all_of(gvars))
}


vend
Expand All @@ -110,32 +122,43 @@ dplyr::count
#' @export
#' @name count.SpatVector
tally.SpatVector <- function(x, wt = NULL, sort = FALSE, name = NULL) {
tbl <- as_tibble(x)
spatv <- x

# Use tbl unsorted
tallyed <- dplyr::tally(tbl, sort = FALSE, name = name)

if (is_grouped_spatvector(spatv)) {
var_index <- make_safe_index("tterra_index", spatv)
df <- data.frame(n = group_indices(spatv))
names(df) <- var_index
spatv <- cbind(spatv, df)
newgeom <- terra::aggregate(spatv, by = var_index, dissolve = FALSE)
} else {
newgeom <- terra::aggregate(spatv, dissolve = FALSE)
# Use terra method on ungrouped
if (!is_grouped_spatvector(x)) {
vargroup <- make_safe_index("tterra_index", x)
x[[vargroup]] <- "UNIQUE"

vend <- terra::aggregate(x, by = vargroup, dissolve = FALSE, count = TRUE)
# Keep aggregation only and rename
vend <- vend[, "agg_n"]
if (is.null(name)) name <- "n"

names(vend) <- name
return(vend)
}

vend <- cbind(newgeom[, 0], tallyed)
# Get tibble and index of rows
tblforindex <- as_tibble(x)
# Get a template
template <- dplyr::tally(tblforindex, sort = sort, name = name)

# Ensure groups
vend <- group_prepare_spat(vend, tallyed)
vargroup <- dplyr::group_vars(tblforindex)
x <- x[, vargroup]
vend <- terra::aggregate(x, by = vargroup, dissolve = FALSE, count = TRUE)
# Keep and rename
vend <- vend[, c(vargroup, "agg_n")]

if (sort) {
# Arrange
order_v <- rev(names(vend))[1]
sort_order <- as_tibble(vend)[[order_v]]
vend <- vend[order(sort_order, decreasing = TRUE), ]
# Re-sort
vend <- vend[order(vend$agg_n, decreasing = TRUE), ]
}
names(vend) <- names(template)
vend <- ungroup(vend)


# Re-group based on the template
if (dplyr::is_grouped_df(template)) {
gvars <- dplyr::group_vars(template)
vend <- group_by(vend, across_all_of(gvars))
}

vend
Expand Down
Loading

0 comments on commit 3c76cfd

Please sign in to comment.