From 0c549664d0f5ff70ae5c58b96c65456c30fd5d38 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Sun, 30 Apr 2023 01:01:11 -0400 Subject: [PATCH] Fixed auto-vectorization of `&&` and `||` and added docs. --- NEWS.md | 4 ++++ Project.toml | 2 +- docs/examples/UserGuide/filter.jl | 34 +++++++++++++++++++++++++++---- src/docstrings.jl | 10 +++++++++ src/parsing.jl | 3 +++ 5 files changed, 48 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index a3f2165..83e1ac4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # Tidier.jl updates +## v0.7.5 - 2023-04-30 +- Fixed bug to ensure that `&&` and `||` are auto-vectorized +- Added docstrings and examples to show different ways of filtering by multiple "and" conditions, including `&&`, `&`, and separating multiple expressions with commas. + ## v0.7.4 - 2023-04-11 - Added `as_float()`, `as_integer()`, and `as_string()` diff --git a/Project.toml b/Project.toml index 697ef26..e98d84e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Tidier" uuid = "f0413319-3358-4bb0-8e7c-0c83523a93bd" authors = ["Karandeep Singh"] -version = "0.7.4" +version = "0.7.5" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/docs/examples/UserGuide/filter.jl b/docs/examples/UserGuide/filter.jl index dff88a3..f80e95d 100644 --- a/docs/examples/UserGuide/filter.jl +++ b/docs/examples/UserGuide/filter.jl @@ -8,10 +8,36 @@ movies = dataset("ggplot2", "movies"); # Let’s take a look at the movies whose budget was more than average. We will select only the first 5 rows for the sake of brevity. @chain movies begin - @mutate(Budget = Budget / 1_000_000) - @filter(Budget >= mean(skipmissing(Budget))) - @select(Title, Budget) - @slice(1:5) + @mutate(Budget = Budget / 1_000_000) + @filter(Budget >= mean(skipmissing(Budget))) + @select(Title, Budget) + @slice(1:5) +end + +# Let's search for movies that have at least 200 votes and a rating of greater than or equal to 8. There are 3 ways you can specify an "and" condition inside of `Tidier.jl`. + +## The first option is to use the short-circuiting `&&` operator as shown below. This is the preferred approach because the second expression is only evaluated (per element) if the first one is true. + +@chain movies begin + @filter(Votes >= 200 && Rating >= 8) + @select(Title, Votes, Rating) + @slice(1:5) +end + +## The second option is to use the bitwise `&` operator. However, there is a key difference in syntax. Because the `&` operator takes a higher operator precendence than `>=`, you have to wrap the `>=` expressions inside of parentheses to ensure that the expression is evaluated correctly. + +@chain movies begin + @filter((Votes >= 200) & (Rating >= 8)) + @select(Title, Votes, Rating) + @slice(1:5) +end + +## Finally, for "and" conditions only, you can separate the expressions with commas, similar to the behavior of `filter()` in `tidyverse`. + +@chain movies begin + @filter(Votes >= 200, Rating >= 8) + @select(Title, Votes, Rating) + @slice(1:5) end # Now let's see how to use `@filter()` with `in`. Here's an example with a tuple. diff --git a/src/docstrings.jl b/src/docstrings.jl index e7d18e7..477541d 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -475,6 +475,16 @@ julia> @chain df begin 2 │ d 4 14 3 │ e 5 15 +julia> @chain df begin + @filter(b >= 3 && c >= 14) + end +2×3 DataFrame + Row │ a b c + │ Char Int64 Int64 +─────┼──────────────────── + 1 │ d 4 14 + 2 │ e 5 15 + julia> @chain df begin @filter(b in (1, 3)) end diff --git a/src/parsing.jl b/src/parsing.jl index 29da34d..674c269 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -302,6 +302,9 @@ function parse_autovec(tidy_expr::Union{Expr,Symbol}) fn_new = Symbol("." * string(fn)) return :($fn_new($(args...))) end + elseif hasproperty(x, :head) && (x.head == :&& || x.head == :||) + x.head = Symbol("." * string(x.head)) + return x end return x end