From 0b3e1e080d2d6c5b124a7196cc2ce88ccc6cf1a1 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Thu, 21 Dec 2023 17:35:49 -0500 Subject: [PATCH 1/2] simplified column selection --- src/missings.jl | 40 +++++++++++++++++++++------------------- src/separate_unite.jl | 22 ++++------------------ 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/missings.jl b/src/missings.jl index 6af74d42..a59f9c79 100644 --- a/src/missings.jl +++ b/src/missings.jl @@ -40,26 +40,27 @@ function fill_missing(df::DataFrame, method::String) return fill_missing(df, Symbol.(names(df)), method) end -function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String) +function fill_missing(df::DataFrame, columns, method::String) new_df = copy(df) - - for col in cols + cols_expr = columns isa Expr ? (columns,) : columns + column_symbols = names(df, Cols(cols_expr...)) + for col_sym in column_symbols if method == "down" - last_observation = new_df[1, col] + last_observation = new_df[1, col_sym] for i in 1:nrow(new_df) - if ismissing(new_df[i, col]) - new_df[i, col] = last_observation + if ismissing(new_df[i, col_sym]) + new_df[i, col_sym] = last_observation else - last_observation = new_df[i, col] + last_observation = new_df[i, col_sym] end end elseif method == "up" - next_observation = new_df[end, col] + next_observation = new_df[end, col_sym] for i in nrow(new_df):-1:1 - if ismissing(new_df[i, col]) - new_df[i, col] = next_observation + if ismissing(new_df[i, col_sym]) + new_df[i, col_sym] = next_observation else - next_observation = new_df[i, col] + next_observation = new_df[i, col_sym] end end else @@ -70,12 +71,14 @@ function fill_missing(df::DataFrame, cols::Vector{Symbol}, method::String) return new_df end -function fill_missing(gdf::GroupedDataFrame, cols::Vector{Symbol}, method::String) +function fill_missing(gdf::GroupedDataFrame, columns, method::String) group_cols = groupcols(gdf) results = [] + cols_expr = columns isa Expr ? (columns,) : columns + column_symbols = names(gdf, Cols(cols_expr...)) for group in gdf # call the DataFrame version of fill_missing on the SubDataFrame - processed_group = fill_missing(DataFrame(group), cols, method) + processed_group = fill_missing(DataFrame(group), column_symbols, method) push!(results, processed_group) end combined_df = vcat(results...) @@ -100,13 +103,12 @@ macro fill_missing(df, args...) end end - cols = args[1:(length(args)-1)] - method = args[length(args)] + interpolated_exprs = parse_interpolation.(args[1:(length(args)-1)]) + tidy_exprs = [i[1] for i in interpolated_exprs] + tidy_exprs = parse_tidy.(tidy_exprs) - # Requires Julia 1.9 - # cols..., method = args - - cols_quoted = QuoteNode.(cols) + method = esc(last(args)) + cols_quoted = tidy_exprs return quote if $(esc(df)) isa GroupedDataFrame diff --git a/src/separate_unite.jl b/src/separate_unite.jl index 0f265c9c..be34947e 100644 --- a/src/separate_unite.jl +++ b/src/separate_unite.jl @@ -146,26 +146,12 @@ end function separate_rows(df::Union{DataFrame, GroupedDataFrame}, columns, delimiter::Union{Regex, String}) is_grouped = df isa GroupedDataFrame grouping_columns = is_grouped ? groupcols(df) : Symbol[] - # Ungroup if necessary temp_df = copy(is_grouped ? parent(df) : df) - # temp_df = copy(df) - - # Convert all references to column symbols - column_symbols = [] - for col in columns - if col isa Integer - push!(column_symbols, Symbol(names(temp_df)[col])) - elseif col isa AbstractRange - append!(column_symbols, Symbol.(names(temp_df)[collect(col)])) - elseif typeof(col) <: Between - # Get the column indices for the Between range - col_indices = DataFrames.index(temp_df)[col] - append!(column_symbols, Symbol.(names(temp_df)[col_indices])) - else - push!(column_symbols, Symbol(col)) - end - end + + cols_expr = columns isa Expr ? (columns,) : columns + column_symbols = names(df, Cols(cols_expr...)) + column_symbols = Symbol.(column_symbols) # Initialize an array to hold expanded data for each column expanded_data = Dict{Symbol, Vector{Any}}() From 1bb492189fe1fb53d0d55b6814fd8cafec8e3250 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Fri, 22 Dec 2023 09:09:23 -0500 Subject: [PATCH 2/2] bumped version, added news --- NEWS.md | 4 ++++ Project.toml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6d894b1e..83745563 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # TidierData.jl updates +## v0.14.3 - 2023-12-22 +- Adds support for interpolation and tidy selection in `@fill_missing` +- Fixes tidy selection in `@separate_rows()` + ## v0.14.2 - 2023-12-21 - `@slice()` now supports interpolation and user-defined functions - Adds `where()` diff --git a/Project.toml b/Project.toml index 32a77476..20b11173 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierData" uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80" authors = ["Karandeep Singh"] -version = "0.14.2" +version = "0.14.3" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"