Skip to content

Commit

Permalink
Merge pull request #51 from drizk1/replace_missing_if
Browse files Browse the repository at this point in the history
replace_missing, missing_if, @rename_with
  • Loading branch information
Karandeep Singh authored Nov 18, 2023
2 parents ca0c6e0 + f159d4b commit 591bff3
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierData"
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
authors = ["Karandeep Singh"]
version = "0.12.2"
version = "0.13.0"

[deps]
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ To support R-style programming, TidierData.jl is implemented using macros.
TidierData.jl currently supports the following top-level macros:

- `@glimpse()`
- `@select()`, `@rename()`, and `@distinct()`
- `@select()` and `@distinct()`
- `@rename()` and `@rename_with()`
- `@mutate()` and `@transmute()`
- `@summarize()` and `@summarise()`
- `@filter()`
Expand Down Expand Up @@ -106,6 +107,7 @@ TidierData.jl also supports the following helper functions:
- `everything()`, `starts_with()`, `ends_with()`, `matches()`, and `contains()`
- `as_float()`, `as_integer()`, and `as_string()`
- `is_float()`, `is_integer()`, and `is_string()`
- `missing_if()` and `replace_missing()`

See the documentation [Home](https://tidierorg.github.io/TidierData.jl/latest/) page for a guide on how to get started, or the [Reference](https://tidierorg.github.io/TidierData.jl/latest/reference/) page for a detailed guide to each of the macros and functions.

Expand Down
15 changes: 15 additions & 0 deletions docs/examples/UserGuide/fill_missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,18 @@ end
@fill_missing(a, "down")
end

# ## `replace_missing()`
# The `replace_missing` function facilitates the replacement of `missing` values with a specified replacement.

@chain df begin
@mutate(b = replace_missing(b, 2))
end

# ## `missing_if()`
# The `missing_if` function is used to introduce `missing` values under specific conditions.

@chain df begin
@mutate(b = missing_if(b, 5))
end

# Both `missing_if` and `replace_missing` are not type specifc.
4 changes: 3 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ TidierData.jl currently supports the following top-level macros:
```@raw html
!!! example "Top-level macros:"
- `@glimpse()`
- `@select()`, `@rename()`, and `@distinct()`
- `@select()` and `@distinct()`
- `@rename()` and `@rename_with()`
- `@mutate()` and `@transmute()`
- `@summarize()` and `@summarise()`
- `@filter()`
Expand Down Expand Up @@ -120,6 +121,7 @@ TidierData.jl also supports the following helper functions:
- `everything()`, `starts_with()`, `ends_with()`, `matches()`, and `contains()`
- `as_float()`, `as_integer()`, and `as_string()`
- `is_float()`, `is_integer()`, and `is_string()`
- `missing_if()` and `replace_missing()`
```

See the [Reference](https://tidierorg.github.io/TidierData.jl/latest/reference/) page for a detailed guide to each of the macros and functions.
Expand Down
55 changes: 53 additions & 2 deletions src/TidierData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ using Reexport
@reexport using ShiftedArrays: lag, lead

export TidierData_set, across, desc, n, row_number, everything, starts_with, ends_with, matches, if_else, case_when, ntile,
as_float, as_integer, as_string, is_float, is_integer, is_string, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter,
as_float, as_integer, as_string, is_float, is_integer, is_string, missing_if, replace_missing, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter,
@group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join,
@pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_missing, @glimpse, @separate,
@unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max
@unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max, @rename_with

# Package global variables
const code = Ref{Bool}(false) # output DataFrames.jl code?
Expand Down Expand Up @@ -608,4 +608,55 @@ macro glimpse(df, width = 80)
return df_expr
end

"""
$docstring_rename_with
"""
macro rename_with(df, fn, exprs...)
interpolated_exprs = parse_interpolation.(exprs)

tidy_exprs = [i[1] for i in interpolated_exprs]
any_found_n = any([i[2] for i in interpolated_exprs])
any_found_row_number = any([i[3] for i in interpolated_exprs])

tidy_exprs = parse_tidy.(tidy_exprs)
df_expr = quote
local df_copy = copy($(esc(df)))

if $any_found_n
if df_copy isa GroupedDataFrame
transform!(df_copy, nrow => :TidierData_n; ungroup = false)
else
transform!(df_copy, nrow => :TidierData_n)
end
end

if $any_found_row_number
if df_copy isa GroupedDataFrame
transform!(df_copy, eachindex => :TidierData_row_number; ungroup = false)
else
transform!(df_copy, eachindex => :TidierData_row_number)
end
end

local columns_to_rename
if isempty($(esc(exprs)))
columns_to_rename = names(df_copy)
else
columns_to_rename = names(select(copy(df_copy), $(tidy_exprs...)))
end

local renames = filter(p -> p.first in columns_to_rename, Pair.(names(df_copy), $(esc(fn)).(names(df_copy))))

if df_copy isa GroupedDataFrame
rename!(df_copy, renames; ungroup = false)
else
rename!(df_copy, renames)
end

df_copy
end

return df_expr
end

end
106 changes: 106 additions & 0 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2567,4 +2567,110 @@ julia> @chain df begin
2 │ missing 0.3 missing
3 │ 0.2 2.0 0.2
```
"""
const docstring_missing_if =
"""
missing_if(x, value)
Replace a specific `value` with `missing` in `x`.
## Arguments
- `x`: The input value which can be of any type. If `x` is already `missing` or equals `value`, the function will return `missing`. Otherwise, it returns `x` unaltered.
- `value`: The specific value to be checked against.
## Examples
```jldoctest
julia> df = DataFrame(
a = [1, missing, 3, 4],
b = ["apple", "apple", "banana", "cherry"]
);
julia> @chain df begin
@mutate(a = missing_if(a, 4),
b = missing_if(b, "apple"))
end
4×2 DataFrame
Row │ a b
│ Int64? String?
─────┼──────────────────
1 │ 1 missing
2 │ missing missing
3 │ 3 banana
4 │ missing cherry
```
"""

const docstring_replace_missing =
"""
replace_missing(x, replacement)
Replace `missing` values in `x` with a specified `replacement` value.
# Arguments
- `x`: The input value which can be of any type. If `x` is `missing`, the function will return `replacement`. Otherwise, it returns `x` unaltered.
- `replacement`: The value to replace `missing` with in `x`.
# Examples
```jldoctest
julia> df = DataFrame(
a = [1, missing, 3, 4],
b = [4, 5, missing, 8]
);
julia> @chain df begin
@mutate(a = replace_missing(a, 100),
b = replace_missing(b, 35))
end
4×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 4
2 │ 100 5
3 │ 3 35
4 │ 4 8
```
"""

const docstring_rename_with =
"""
@rename_with(df, fn, exprs...)
Renames the chosen column names using a function
# Arguments
- `df`: a DataFrame
- `fn`: desired function to (such as str_remove_all from TidierStrings)
- `exprs`: One or more unquoted variable names separated by commas. Variable names
can also be used as their positions in the data, like `x:y`, to select
a range of variables. Variables names can also be chosen with starts with. Defaults to all columns if empty.
# Examples
```jldoctest
julia> function str_remove_all(column, pattern::String)
if ismissing(column)
return column
end
patterns = split(pattern, '|')
for p in patterns
column = replace(column, strip(p) => "")
end
return column
end;
julia> df = DataFrame(
term_a = ["apple", "banana", "cherry"],
document_a = ["doc_1", "doc2", "doc3"],
_n_ = [1, 2, 3]
);
julia> @rename_with(df, str -> str_remove_all(str, "_a"), !term_a)
3×3 DataFrame
Row │ term_a document _n_
│ String String Int64
─────┼─────────────────────────
1 │ apple doc_1 1
2 │ banana doc2 2
3 │ cherry doc3 3
```
"""
12 changes: 11 additions & 1 deletion src/missings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,14 @@ macro fill_missing(df, args...)
fill_missing($(esc(df)), [$(cols_quoted...)], $method)
end
end
end
end

"""
$docstring_missing_if
"""
missing_if(x, value) = ismissing(x) ? x : (x == value ? missing : x)

"""
$docstring_replace_missing
"""
replace_missing(x, replacement) = ismissing(x) ? replacement : x
28 changes: 19 additions & 9 deletions src/separate_unite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,24 @@ end
$docstring_unite
"""
macro unite(df, new_col, from_cols, sep)
new_col = QuoteNode(new_col)

if @capture(from_cols, (args__,))
elseif @capture(from_cols, [args__])
new_col_quoted = QuoteNode(new_col)
interpolated_from_cols, _, _ = parse_interpolation(from_cols)

if @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__])
args = QuoteNode.(args)
from_cols_expr = :[$(args...)]
else
from_cols_expr = quote
if typeof($interpolated_from_cols) <: Tuple
collect(Symbol.($interpolated_from_cols))

else
$interpolated_from_cols
end
end
end

args = QuoteNode.(args)
var_expr = quote
unite($(esc(df)), $new_col, [$(args...)], $sep)

return quote
unite($(esc(df)), $new_col_quoted, $(from_cols_expr), $(esc(sep)))
end
end
end

2 comments on commit 591bff3

@kdpsingh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/95573

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.13.0 -m "<description of version>" 591bff3e10971e56b8a59558f35e0e4f30ebfa6f
git push origin v0.13.0

Please sign in to comment.