Skip to content

Commit

Permalink
Increase code coverage (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher authored Feb 15, 2024
1 parent f5f02c5 commit 03799b9
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 14 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ Suggests:
fuzzyjoin,
igraph,
knitr,
microbenchmark,
profmem,
purrr,
rmarkdown,
stringdist,
testthat (>= 3.0.0),
tidyverse,
purrr,
microbenchmark,
profmem
vdiffr
Config/testthat/edition: 3
URL: https://beniamino.org/zoomerjoin/
BugReports: https://github.com/beniaminogreen/zoomerjoin/issues/
Expand Down
2 changes: 2 additions & 0 deletions R/on_load.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# nocov start
.onAttach <- function(libname, pkgname) {
if (Sys.getenv("_R_CHECK_LIMIT_CORES_") != "") {
if (as.logical(Sys.getenv("_R_CHECK_LIMIT_CORES_"))) {
Expand All @@ -6,3 +7,4 @@
}
}
}
# nocov end
2 changes: 1 addition & 1 deletion R/string_group.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#' @importFrom stats runif
#' @importFrom utils installed.packages packageVersion
jaccard_string_group <- function(string, n_gram_width = 2, n_bands = 45, band_width = 8, threshold = .7, progress = FALSE) {
if (system.file(package = "igraph") == "") {
if (!requireNamespace("igraph")) {
stop("library 'igraph' must be installed to run this function")
}

Expand Down
61 changes: 61 additions & 0 deletions tests/testthat/_snaps/curves/basic-euclidean-curve.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
61 changes: 61 additions & 0 deletions tests/testthat/_snaps/curves/basic-jaccard-curve.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions tests/testthat/test-curves.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
test_that("euclidean curve", {
vdiffr::expect_doppelganger(
"Basic Euclidean curve",
euclidean_curve(20, 5, r = 0.5)
)
})

test_that("jaccard curve", {
expect_error(
jaccard_curve(c(20, 10), 1),
"single integer"
)
expect_error(
jaccard_curve(1, c(20, 10)),
"single integer"
)
vdiffr::expect_doppelganger(
"Basic Jaccard curve",
jaccard_curve(20, 5)
)
})
44 changes: 34 additions & 10 deletions tests/testthat/test-test_logical_lsh_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,42 +50,50 @@ test_that("jaccard_inner_join works on tiny dataset", {
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2))
expect_identical(sort(test$id_2), c(1, 2))
})

test_that("jaccard_full_join works on tiny dataset", {
capture_messages(
test <- jaccard_full_join(dataset_1, dataset_2, threshold = .6, n_bands = 300)
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2, 3))
expect_identical(sort(test$id_2), c(1, 2, 3))
})

test_that("jaccard_left_join works on tiny dataset", {
capture_messages(
test <- jaccard_left_join(dataset_1, dataset_2, threshold = .6, n_bands = 300)
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2, 3))
expect_identical(sort(test$id_2), c(1, 2))
})

test_that("jaccard_right_join works on tiny dataset", {
capture_messages(
test <- jaccard_right_join(dataset_1, dataset_2, threshold = .6, n_bands = 300)
)


expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2))
expect_identical(sort(test$id_2), c(1, 2, 3))
})

test_that("jaccard_anti_join works on tiny dataset", {
capture_messages(
test <- jaccard_anti_join(dataset_1, dataset_2, threshold = .6, n_bands = 300)
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))
expect_identical(sort(test$id_1, na.last = TRUE), c(3, NA))
expect_identical(sort(test$id_2, na.last = TRUE), c(3, NA))
})

test_that("jaccard_inner_join gives same results as stringdist_inner_join", {
for (i in 1:20) {
capture_messages({
Expand Down Expand Up @@ -165,7 +173,6 @@ test_that("hamming_inner_join works on tiny dataset", {
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2))
expect_identical(sort(test$id_2), c(1, 2))
})
Expand All @@ -176,7 +183,6 @@ test_that("hamming_full_join works on tiny dataset", {
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2, 3))
expect_identical(sort(test$id_2), c(1, 2, 3))
})
Expand All @@ -187,7 +193,6 @@ test_that("hamming_left_join works on tiny dataset", {
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2, 3))
expect_identical(sort(test$id_2), c(1, 2))
})
Expand All @@ -197,13 +202,21 @@ test_that("hamming_right_join works on tiny dataset", {
test <- hamming_right_join(dataset_1, dataset_2, threshold = 3, band_width = 1, n_bands = 300)
)


expect_true(all(test$id_1 == test$id_2, na.rm = T))

expect_identical(sort(test$id_1), c(1, 2))
expect_identical(sort(test$id_2), c(1, 2, 3))
})

test_that("hamming_anti_join works on tiny dataset", {
capture_messages(
test <- hamming_anti_join(dataset_1, dataset_2, threshold = 3, band_width = 1, n_bands = 300)
)

expect_true(all(test$id_1 == test$id_2, na.rm = T))
expect_identical(sort(test$id_1, na.last = TRUE), c(3, NA))
expect_identical(sort(test$id_2, na.last = TRUE), c(3, NA))
})

test_that("jaccard_inner_join gives same results as stringdist_inner_join", {
for (i in 1:20) {
capture_messages({
Expand All @@ -218,6 +231,17 @@ test_that("jaccard_inner_join gives same results as stringdist_inner_join", {
}
})

test_that("hamming: arg 'similarity_column' works", {
capture_messages(
test <- hamming_inner_join(
dataset_1, dataset_2,
threshold = 3, band_width = 1, n_bands = 300,
similarity_column = "sim"
)
)
expect_equal(test$sim, hamming_distance(test$string.x, test$string.y))
})


test_that("seed works for hamming joins", {
for (i in 1:15) {
Expand Down

0 comments on commit 03799b9

Please sign in to comment.