diff --git a/DESCRIPTION b/DESCRIPTION index e6b5bba..45a06a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: zoomerjoin Title: Superlatively Fast Fuzzy Joins -Version: 0.1.4 +Version: 0.1.2.9000 Authors@R: c( person("Beniamino", "Green", , "beniamino.green@yale.edu", role = c("aut", "cre", "cph")), person("Etienne", "Bacher", email = "etienne.bacher@protonmail.com", role = "ctb"), @@ -15,6 +15,7 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 SystemRequirements: Cargo (>= 1.56) (Rust's package manager), rustc Imports: + collapse, dplyr, tibble, tidyr diff --git a/NEWS.md b/NEWS.md index a58cd49..d809562 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# zoomerjoin (development version) + +* Several performance improvements (#101, #104). + # zoomerjoin 0.1.2 * Submitted Package to CRAN diff --git a/R/jaccard_join_core.R b/R/jaccard_join_core.R index ec7e7cb..a7a51df 100644 --- a/R/jaccard_join_core.R +++ b/R/jaccard_join_core.R @@ -152,20 +152,25 @@ jaccard_join <- function (a, b, mode, by, salt_by, n_gram_width, n_bands, return(matches) } - not_matched_a <- ! seq(nrow(a)) %in% match_table[,1] - not_matched_b <- ! seq(nrow(b)) %in% match_table[,2] - - if (mode == "left") { - matches <- dplyr::bind_rows(matches,a[not_matched_a,]) - } else if (mode == "right") { - matches <- dplyr::bind_rows(matches,b[not_matched_b,]) - } else if (mode == "full") { - matches <- dplyr::bind_rows(matches,a[not_matched_a,],b[not_matched_b,]) - } else if (mode == "anti") { - matches <- dplyr::bind_rows(a[not_matched_a,], b[not_matched_b,]) - } else { - stop("Invalid Mode Selected!") - } - - return(matches) + switch( + mode, + "left" = { + not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1]) + matches <- dplyr::bind_rows(matches, a[not_matched_a,]) + }, + "right" = { + not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2]) + matches <- dplyr::bind_rows(matches, b[not_matched_b,]) + }, + "full" = { + not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1]) + not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2]) + matches <- dplyr::bind_rows(matches, a[not_matched_a,], b[not_matched_b,]) + }, + "anti" = { + not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1]) + not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2]) + matches <- dplyr::bind_rows(a[not_matched_a,], b[not_matched_b,]) + } + ) }