diff --git a/articles/guided_tour.html b/articles/guided_tour.html index a93edd6..0d04954 100644 --- a/articles/guided_tour.html +++ b/articles/guided_tour.html @@ -190,20 +190,20 @@

Basic Syntax:= 20, band_width = 6, threshold = .8 ) print(Sys.time() - start_time) -
## Time difference of 0.01206851 secs
+
## Time difference of 0.01161695 secs
 print(join_out)
## # A tibble: 8 × 4
 ##       a field.x                                                      b field.y  
 ##   <dbl> <chr>                                                    <dbl> <chr>    
-## 1    88 scheuer for congress 1980                                  667 scheuer …
-## 2   292 bill bradley for u s senate '84                            913 bill bra…
-## 3   378 guarini for congress 1982                                  883 guarini …
-## 4   238 4th congressional district democratic party                518 16th con…
-## 5   302 americans for good government inc                          910 american…
-## 6   230 pipefitters local union 524                                998 pipefitt…
-## 7   319 7th congressional district democratic party of wisconsin   792 8th cong…
-## 8   378 guarini for congress 1982                                  606 guarini …
+## 1 378 guarini for congress 1982 606 guarini … +## 2 378 guarini for congress 1982 883 guarini … +## 3 238 4th congressional district democratic party 518 16th con… +## 4 88 scheuer for congress 1980 667 scheuer … +## 5 230 pipefitters local union 524 998 pipefitt… +## 6 302 americans for good government inc 910 american… +## 7 292 bill bradley for u s senate '84 913 bill bra… +## 8 319 7th congressional district democratic party of wisconsin 792 8th cong…

The first two arguments, a, and b, are direct analogues of the dplyr arguments, and are the two data frames you want to join. The by field also acts the diff --git a/articles/matching_vectors.html b/articles/matching_vectors.html index af0d7a0..f92e658 100644 --- a/articles/matching_vectors.html +++ b/articles/matching_vectors.html @@ -169,7 +169,7 @@

Demonstrationn_matches <- nrow(joined_out) time_taken <- Sys.time() - start print(paste("found", n_matches, "matches in", round(time_taken), "seconds")) -#> [1] "found 100000 matches in 20 seconds" +#> [1] "found 100000 matches in 16 seconds"

Zoomerjoin is able to easily find all pairs in just under 30s (perhaps longer on the runner that renders the website), even though the points lie in high-dimensional (d=100) space. This makes zoomerjoin a diff --git a/pkgdown.yml b/pkgdown.yml index a2517a7..0d8a58c 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -5,7 +5,7 @@ articles: benchmarks: benchmarks.html guided_tour: guided_tour.html matching_vectors: matching_vectors.html -last_built: 2024-06-03T14:59Z +last_built: 2024-07-01T01:36Z urls: reference: https://beniaminogreen.github.io/zoomerjoin/reference article: https://beniaminogreen.github.io/zoomerjoin/articles diff --git a/reference/euclidean-joins.html b/reference/euclidean-joins.html index 5bb3029..782406d 100644 --- a/reference/euclidean-joins.html +++ b/reference/euclidean-joins.html @@ -202,29 +202,29 @@

Exampleseuclidean_inner_join(X_1, X_2, by = c("V1", "V2"), threshold = .00005) #> V1.x V2.x id_1 V1.y V2.y id_2 #> 1 0.7777778 0.7777778 8 0.7777779 0.7777779 8 -#> 2 0.3333333 0.3333333 4 0.3333334 0.3333334 4 -#> 3 0.6666667 0.6666667 7 0.6666668 0.6666668 7 -#> 4 0.1111111 0.1111111 2 0.1111112 0.1111112 2 -#> 5 1.0000000 1.0000000 10 1.0000001 1.0000001 10 -#> 6 0.2222222 0.2222222 3 0.2222223 0.2222223 3 -#> 7 0.4444444 0.4444444 5 0.4444445 0.4444445 5 -#> 8 0.8888889 0.8888889 9 0.8888890 0.8888890 9 -#> 9 0.5555556 0.5555556 6 0.5555557 0.5555557 6 -#> 10 0.0000000 0.0000000 1 0.0000001 0.0000001 1 +#> 2 0.2222222 0.2222222 3 0.2222223 0.2222223 3 +#> 3 0.4444444 0.4444444 5 0.4444445 0.4444445 5 +#> 4 0.5555556 0.5555556 6 0.5555557 0.5555557 6 +#> 5 0.3333333 0.3333333 4 0.3333334 0.3333334 4 +#> 6 0.8888889 0.8888889 9 0.8888890 0.8888890 9 +#> 7 1.0000000 1.0000000 10 1.0000001 1.0000001 10 +#> 8 0.0000000 0.0000000 1 0.0000001 0.0000001 1 +#> 9 0.1111111 0.1111111 2 0.1111112 0.1111112 2 +#> 10 0.6666667 0.6666667 7 0.6666668 0.6666668 7 # keep all observations from X_1, regardless of whether they have a match euclidean_inner_join(X_1, X_2, by = c("V1", "V2"), threshold = .00005) #> V1.x V2.x id_1 V1.y V2.y id_2 -#> 1 0.3333333 0.3333333 4 0.3333334 0.3333334 4 -#> 2 1.0000000 1.0000000 10 1.0000001 1.0000001 10 -#> 3 0.6666667 0.6666667 7 0.6666668 0.6666668 7 -#> 4 0.4444444 0.4444444 5 0.4444445 0.4444445 5 +#> 1 1.0000000 1.0000000 10 1.0000001 1.0000001 10 +#> 2 0.1111111 0.1111111 2 0.1111112 0.1111112 2 +#> 3 0.5555556 0.5555556 6 0.5555557 0.5555557 6 +#> 4 0.7777778 0.7777778 8 0.7777779 0.7777779 8 #> 5 0.0000000 0.0000000 1 0.0000001 0.0000001 1 -#> 6 0.8888889 0.8888889 9 0.8888890 0.8888890 9 -#> 7 0.5555556 0.5555556 6 0.5555557 0.5555557 6 +#> 6 0.3333333 0.3333333 4 0.3333334 0.3333334 4 +#> 7 0.4444444 0.4444444 5 0.4444445 0.4444445 5 #> 8 0.2222222 0.2222222 3 0.2222223 0.2222223 3 -#> 9 0.7777778 0.7777778 8 0.7777779 0.7777779 8 -#> 10 0.1111111 0.1111111 2 0.1111112 0.1111112 2 +#> 9 0.6666667 0.6666667 7 0.6666668 0.6666668 7 +#> 10 0.8888889 0.8888889 9 0.8888890 0.8888890 9