Skip to content

Commit

Permalink
Merge pull request #104 from etiennebacher/more-speedup
Browse files Browse the repository at this point in the history
Import `collapse` to reduce the time taken by `%in%`
  • Loading branch information
beniaminogreen authored Feb 12, 2024
2 parents 8392cd4 + 4b79275 commit c15ae70
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 17 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: zoomerjoin
Title: Superlatively Fast Fuzzy Joins
Version: 0.1.4
Version: 0.1.2.9000
Authors@R: c(
person("Beniamino", "Green", , "[email protected]", role = c("aut", "cre", "cph")),
person("Etienne", "Bacher", email = "[email protected]", role = "ctb"),
Expand All @@ -15,6 +15,7 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
SystemRequirements: Cargo (>= 1.56) (Rust's package manager), rustc
Imports:
collapse,
dplyr,
tibble,
tidyr
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# zoomerjoin (development version)

* Several performance improvements (#101, #104).

# zoomerjoin 0.1.2

* Submitted Package to CRAN
Expand Down
37 changes: 21 additions & 16 deletions R/jaccard_join_core.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,20 +152,25 @@ jaccard_join <- function (a, b, mode, by, salt_by, n_gram_width, n_bands,
return(matches)
}

not_matched_a <- ! seq(nrow(a)) %in% match_table[,1]
not_matched_b <- ! seq(nrow(b)) %in% match_table[,2]

if (mode == "left") {
matches <- dplyr::bind_rows(matches,a[not_matched_a,])
} else if (mode == "right") {
matches <- dplyr::bind_rows(matches,b[not_matched_b,])
} else if (mode == "full") {
matches <- dplyr::bind_rows(matches,a[not_matched_a,],b[not_matched_b,])
} else if (mode == "anti") {
matches <- dplyr::bind_rows(a[not_matched_a,], b[not_matched_b,])
} else {
stop("Invalid Mode Selected!")
}

return(matches)
switch(
mode,
"left" = {
not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1])
matches <- dplyr::bind_rows(matches, a[not_matched_a,])
},
"right" = {
not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2])
matches <- dplyr::bind_rows(matches, b[not_matched_b,])
},
"full" = {
not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1])
not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2])
matches <- dplyr::bind_rows(matches, a[not_matched_a,], b[not_matched_b,])
},
"anti" = {
not_matched_a <- collapse::`%!iin%`(seq(nrow(a)), match_table[, 1])
not_matched_b <- collapse::`%!iin%`(seq(nrow(b)), match_table[, 2])
matches <- dplyr::bind_rows(a[not_matched_a,], b[not_matched_b,])
}
)
}

0 comments on commit c15ae70

Please sign in to comment.