From cecf10c32cabc9899e2683772ba72dea5f0bee4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alonso=20Mart=C3=ADnez=20Cisneros?= Date: Mon, 3 Apr 2023 17:52:26 +0200 Subject: [PATCH 1/4] Implementing functions proposed in #48 --- src/Tidier.jl | 2 ++ src/type_conversions.jl | 36 ++++++++++++++++++++++++++++++++++++ test/runtests.jl | 28 ++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 src/type_conversions.jl diff --git a/src/Tidier.jl b/src/Tidier.jl index d9e80bd..722e3dc 100644 --- a/src/Tidier.jl +++ b/src/Tidier.jl @@ -18,6 +18,7 @@ export Tidier_set, across, desc, n, row_number, starts_with, ends_with, matches, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter, @group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, @pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_na + as_float, as_integer, as_string # Package global variables const code = Ref{Bool}(false) # output DataFrames.jl code? @@ -35,6 +36,7 @@ include("conditionals.jl") include("pseudofunctions.jl") include("helperfunctions.jl") include("ntile.jl") +include("type_conversions.jl") # Function to set global variables """ diff --git a/src/type_conversions.jl b/src/type_conversions.jl new file mode 100644 index 0000000..5c5a23a --- /dev/null +++ b/src/type_conversions.jl @@ -0,0 +1,36 @@ + +function as_float(value)::Union{AbstractFloat, Missing} + try + convert(AbstractFloat, value) + catch + missing + end +end + +function as_float(value::String)::Union{Float64, Missing} + try + parse(Float64, value) + catch + missing + end +end + +function as_integer(value)::Union{Integer, Missing} + try + convert(Integer, value) + catch + missing + end +end + +function as_integer(value::String)::Union{Int64, Missing} + try + parse(Int64, value) + catch + missing + end +end + +function as_string(value)::String + string(value) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 8a1eda5..eda2fee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,9 +3,37 @@ module TestTidier using Tidier using Test using Documenter +using DataFrames DocMeta.setdocmeta!(Tidier, :DocTestSetup, :(using Tidier); recursive=true) doctest(Tidier) +@testset "type conversions" begin + conversion_test = DataFrame( + non_floats = ["1.0", "2", "hello", 1, 2.0], + non_ints = ["1", "1.0", "hello", 1.5, 2.0], + non_strings = ["1", "1.0", "hello", 1, 2.0] + ) + + conversion_truth = DataFrame( + non_floats = [1.0, 2.0, missing, 1.0, 2.0], + non_ints = [1, missing, missing, missing, 2], + non_strings = ["1", "1.0", "hello", "1", "2.0"] + ) + + conversion_test = @chain conversion_test begin + @mutate(non_floats = as_float(non_floats)) + @mutate(non_ints = as_integer(non_ints)) + @mutate(non_strings = as_string(non_strings)) + end + + res = isequal.(conversion_test, conversion_truth) + + @test all(res.non_floats) + @test all(res.non_ints) + @test all(res.non_strings) +end + + end \ No newline at end of file From 1f24583249fa9e17bb4fc7f1a9cba22314dbb96c Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 11 Apr 2023 23:35:14 -0400 Subject: [PATCH 2/4] Cleaned up type conversions, functions, removed return type restrictions, added `passmissing()` to handle `missing` values, reserving `catch()` for parsing failures. --- src/Tidier.jl | 6 ++-- src/docstrings.jl | 78 +++++++++++++++++++++++++++++++++++++++++ src/type_conversions.jl | 41 ++++++++++++++-------- 3 files changed, 107 insertions(+), 18 deletions(-) diff --git a/src/Tidier.jl b/src/Tidier.jl index 8b9a72a..a762731 100644 --- a/src/Tidier.jl +++ b/src/Tidier.jl @@ -15,9 +15,9 @@ using Reexport @reexport using ShiftedArrays: lag, lead export Tidier_set, across, desc, n, row_number, starts_with, ends_with, matches, if_else, case_when, ntile, - @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter, @group_by, @ungroup, @slice, - @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, @pivot_wider, @pivot_longer, - @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_na, @glimpse, as_float, as_integer, as_string + as_float, as_integer, as_string, @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter, + @group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, + @pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_na, @glimpse # Package global variables const code = Ref{Bool}(false) # output DataFrames.jl code? diff --git a/src/docstrings.jl b/src/docstrings.jl index 8ed68ea..e7d18e7 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1869,4 +1869,82 @@ Groups: a [100] .b Int64 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, .c String a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, ``` +""" + +const docstring_as_float = +""" + as_float(value) + +Convert a number or string to a Float64 data type. + +This is a useful helper for type conversions. Missing values are propagated. + +# Arguments +- `value`: An `AbstractString`, `Number`, or `missing` value. + +# Examples +```jldoctest +julia> as_float(1) +1.0 + +julia> as_float("1.5") +1.5 + +julia> as_float(missing) +missing +``` +""" + +const docstring_as_integer = +""" + as_integer(value) + +Convert a number or string to an Int64 data type. + +This is a useful helper for type conversions. Missing values are propagated. Any values after the decimal point are removed. + +# Arguments +- `value`: An `AbstractString`, `Number`, or `missing` value. + +# Examples +```jldoctest +julia> as_integer(1) +1 + +julia> as_integer(1.5) +1 + +julia> as_integer("2") +2 + +julia> as_integer("2.5") +2 + +julia> as_integer(missing) +missing +``` +""" + +const docstring_as_string = +""" + as_string(value) + +Convert a number or string to a String data type. + +This is a useful helper for type conversions. Missing values are propagated. + +# Arguments +- `value`: An `AbstractString`, `Number`, or `missing` value. + +# Examples +```jldoctest +julia> as_string(1) +"1" + +julia> as_string(1.5) +"1.5" + +julia> as_string(missing) +missing +``` """ \ No newline at end of file diff --git a/src/type_conversions.jl b/src/type_conversions.jl index 5c5a23a..d77f402 100644 --- a/src/type_conversions.jl +++ b/src/type_conversions.jl @@ -1,36 +1,47 @@ - -function as_float(value)::Union{AbstractFloat, Missing} +""" +$docstring_as_float +""" +function as_float(value) try - convert(AbstractFloat, value) + passmissing(convert)(Float64, value) catch - missing + missing # if parsing failure end end -function as_float(value::String)::Union{Float64, Missing} +function as_float(value::AbstractString) try - parse(Float64, value) + passmissing(parse)(Float64, value) catch - missing + missing # if parsing failure end end -function as_integer(value)::Union{Integer, Missing} +""" +$docstring_as_integer +""" +function as_integer(value) try - convert(Integer, value) + passmissing(floor)(value) |> + x -> passmissing(convert)(Int64, x) catch - missing + missing # if parsing failure end end -function as_integer(value::String)::Union{Int64, Missing} +function as_integer(value::AbstractString) try - parse(Int64, value) + passmissing(parse)(Float64, value) |> + x -> passmissing(floor)(x) |> + x -> passmissing(convert)(Int64, x) catch - missing + missing # if parsing failure end end -function as_string(value)::String - string(value) +""" +$docstring_as_string +""" +function as_string(value) + passmissing(string)(value) end \ No newline at end of file From 300d131b6e15856a760595110e1a52fd6483e170 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 11 Apr 2023 23:38:13 -0400 Subject: [PATCH 3/4] Updated README.md, index.md, NEWS.md, bumped version to 0.7.4. --- NEWS.md | 3 +++ Project.toml | 2 +- README.md | 1 + docs/src/index.md | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ffe6a84..a3f2165 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # Tidier.jl updates +## v0.7.4 - 2023-04-11 +- Added `as_float()`, `as_integer()`, and `as_string()` + ## v0.7.3 - 2023-04-10 - Added `@glimpse()` diff --git a/Project.toml b/Project.toml index 937a8d5..697ef26 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Tidier" uuid = "f0413319-3358-4bb0-8e7c-0c83523a93bd" authors = ["Karandeep Singh"] -version = "0.7.3" +version = "0.7.4" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/README.md b/README.md index cfc5842..47cb405 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Tidier.jl also supports the following helper functions: - `ntile()` - `lag()` and `lead()` - `starts_with()`, `ends_with()`, `matches()`, and `contains()` +- `as_float()`, `as_integer()`, and `as_string()` See the documentation [Home](https://tidierorg.github.io/Tidier.jl/dev/) page for a guide on how to get started, or the [Reference](https://tidierorg.github.io/Tidier.jl/dev/reference/) page for a detailed guide to each of the macros and functions. diff --git a/docs/src/index.md b/docs/src/index.md index 5469e7d..fbcd68c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -115,6 +115,7 @@ Tidier.jl also supports the following helper functions: - `ntile()` - `lag()` and `lead()` - `starts_with()`, `ends_with()`, `matches()`, and `contains()` + - `as_float()`, `as_integer()`, and `as_string()` ``` See the [Reference](https://tidierorg.github.io/Tidier.jl/dev/reference/) page for a detailed guide to each of the macros and functions. From bef75ce383a6d10e6f04a70ecf2b3bad99df04ac Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 11 Apr 2023 23:44:36 -0400 Subject: [PATCH 4/4] Moved tests from runtests.jl to docstrings. --- test/runtests.jl | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index eda2fee..8a1eda5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,37 +3,9 @@ module TestTidier using Tidier using Test using Documenter -using DataFrames DocMeta.setdocmeta!(Tidier, :DocTestSetup, :(using Tidier); recursive=true) doctest(Tidier) -@testset "type conversions" begin - conversion_test = DataFrame( - non_floats = ["1.0", "2", "hello", 1, 2.0], - non_ints = ["1", "1.0", "hello", 1.5, 2.0], - non_strings = ["1", "1.0", "hello", 1, 2.0] - ) - - conversion_truth = DataFrame( - non_floats = [1.0, 2.0, missing, 1.0, 2.0], - non_ints = [1, missing, missing, missing, 2], - non_strings = ["1", "1.0", "hello", "1", "2.0"] - ) - - conversion_test = @chain conversion_test begin - @mutate(non_floats = as_float(non_floats)) - @mutate(non_ints = as_integer(non_ints)) - @mutate(non_strings = as_string(non_strings)) - end - - res = isequal.(conversion_test, conversion_truth) - - @test all(res.non_floats) - @test all(res.non_ints) - @test all(res.non_strings) -end - - end \ No newline at end of file