From 7ffd70c283550307bf1e4b71bdb94129e5832d5a Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 00:35:43 -0400 Subject: [PATCH] Change file path for data files. --- src/TidierFiles.jl | 144 ++++++++++++++++++++++----------------------- src/docstrings.jl | 57 ++++++++++-------- src/statsfiles.jl | 42 ++++++------- src/xlfiles.jl | 32 +++++----- test/runtests.jl | 24 ++++---- 5 files changed, 152 insertions(+), 147 deletions(-) diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl index 77b2e68..ead47f9 100644 --- a/src/TidierFiles.jl +++ b/src/TidierFiles.jl @@ -20,29 +20,6 @@ include("fwf.jl") include("xlfiles.jl") include("statsfiles.jl") -""" -$docstring_write_csv -""" -function write_csv( - x::DataFrame, - file::String; - missingstring::String = "NA", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Configure threading - CSV.write( - file, - x, - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1 ) -end - """ $docstring_read_csv """ @@ -107,30 +84,6 @@ function read_csv(file; return df end -""" -$docstring_write_tsv -""" -function write_tsv( - x::DataFrame, - file::String; - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to TSV - CSV.write( - file, - x, - delim = '\t', # Use tab as the delimiter for TSV - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - """ $docstring_read_delim """ @@ -250,31 +203,6 @@ function read_tsv(file; return df end -""" -$docstring_write_table -""" -function write_table( - x::DataFrame, - file::String; - delim::Char = '\t', # Default to TSV, but allow flexibility - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to a file with the specified delimiter - CSV.write( - file, - x, - delim = delim, # Flexible delimiter based on argument - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - """ $docstring_read_table """ @@ -334,4 +262,76 @@ function read_table(file; return df end +""" +$docstring_write_csv +""" +function write_csv( + x::DataFrame, + file::String; + missingstring::String = "NA", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Configure threading + CSV.write( + file, + x, + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1 ) +end + +""" +$docstring_write_tsv +""" +function write_tsv( + x::DataFrame, + file::String; + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to TSV + CSV.write( + file, + x, + delim = '\t', # Use tab as the delimiter for TSV + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end + +""" +$docstring_write_table +""" +function write_table( + x::DataFrame, + file::String; + delim::Char = '\t', # Default to TSV, but allow flexibility + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to a file with the specified delimiter + CSV.write( + file, + x, + delim = delim, # Flexible delimiter based on argument + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end + end \ No newline at end of file diff --git a/src/docstrings.jl b/src/docstrings.jl index 0a3cdb5..e6e0573 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -18,7 +18,7 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples ```jldoctest -julia> read_csv(joinpath(tempdir(), "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) +julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64? @@ -50,7 +50,7 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col # Examples ```jldoctest -julia> read_tsv(joinpath(tempdir(), "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) +julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64 @@ -83,7 +83,7 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite # Examples ```jldoctest -julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration +julia> read_delim("csvtest.csv", delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration 6×3 DataFrame Row │ Column1 Column2 Column3 │ String3 String7 String7 @@ -97,8 +97,6 @@ julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = f ``` """ - - const docstring_read_fwf = """ read_fwf(filepath::String; num_lines::Int=4, col_names=nothing) @@ -112,8 +110,12 @@ Read fixed-width format (FWF) files into a DataFrame. - `skip_to`=0: Number of lines at the beginning of the file to skip before reading data. - `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines. # Examples -```jldoctest -julia> path = joinpath(tempdir(), "fwftest.txt"); +```jldoctest +julia> path = "fwftest.txt"; + +julia> open(path, "w") do file + write(file, fwf_data) + end; julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) 3×5 DataFrame @@ -126,7 +128,6 @@ julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", " ``` """ - const docstring_fwf_empty = """ fwf_empty(filepath::String; num_lines::Int=4, col_names=nothing) @@ -143,10 +144,14 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of strings representing the column names. # Examples ```jldoctest -julia> fwf_empty(joinpath(tempdir(), "fwftest.txt")) +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + +julia> fwf_empty(path) ([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"]) -julia> fwf_empty(joinpath(tempdir(), "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) +julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) ([13, 5, 8, 20, 8], ["Name", "Age", "ID", "Position", "Salary"]) ``` """ @@ -170,7 +175,7 @@ Write a DataFrame to a CSV (comma-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_csv(df, joinpath(tempdir(), "csvtest.csv")); +julia> write_csv(df, "csvtest.csv"); ``` """ @@ -192,7 +197,7 @@ Write a DataFrame to a TSV (tab-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); +julia> write_tsv(df, "tsvtest.tsv"); ``` """ @@ -213,7 +218,7 @@ Read a table from a file where columns are separated by any amount of whitespace -`kwargs`: Additional keyword arguments passed to CSV.File. # Examples ```jldoctest -julia> read_table(joinpath(tempdir(), "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"]) +julia> read_table("tabletest.txt", skip = 2, n_max = 3, col_select = ["Name"]) 3×1 DataFrame Row │ Name │ String7 @@ -244,7 +249,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_table(df, joinpath(tempdir(), "tabletest.txt")); +julia> write_table(df, "tabletest.txt"); ``` """ @@ -267,7 +272,7 @@ Read data from an Excel file into a DataFrame. # Examples ```jldoctest -julia> read_xlsx(joinpath(tempdir(), "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) +julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) 3×3 DataFrame Row │ integers strings floats │ Any String Float64 @@ -296,7 +301,7 @@ julia> df = DataFrame(integers=[1, 2, 3, 4], julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); -julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); ``` """ @@ -316,7 +321,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(tempdir(), "test.dta")) +julia> read_sas("test.dta") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -341,7 +346,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(tempdir(), "test.sas7bdat")) +julia> read_sas("test.sas7bdat") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -349,7 +354,7 @@ julia> read_sas(joinpath(tempdir(), "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sas(joinpath(tempdir(), "test.xpt")) +julia> read_sas("test.xpt") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -373,7 +378,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sav(joinpath(tempdir(), "test.sav")) +julia> read_sav("test.sav") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -381,7 +386,7 @@ julia> read_sav(joinpath(tempdir(), "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sav(joinpath(tempdir(), "test.por")) +julia> read_sav("test.por") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -404,7 +409,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df, joinpath(tempdir(), "test.sav")) +julia> write_sav(df, "test.sav") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -412,7 +417,7 @@ julia> write_sav(df, joinpath(tempdir(), "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sav(df, joinpath(tempdir(), "test.por")) +julia> write_sav(df, "test.por") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -434,7 +439,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat")) +julia> write_sas(df , "test.sas7bdat") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -442,7 +447,7 @@ julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sas(df , joinpath(tempdir(), "test.xpt")) +julia> write_sas(df , "test.xpt") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -465,7 +470,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_dta(df , joinpath(tempdir(), "test.dta")) +julia> write_dta(df , "test.dta") 2×2 ReadStatTable: Row │ AA AB │ String Float64? diff --git a/src/statsfiles.jl b/src/statsfiles.jl index 5225d97..2012350 100644 --- a/src/statsfiles.jl +++ b/src/statsfiles.jl @@ -1,24 +1,3 @@ -""" -$docstring_write_sas -""" -function write_sas(df::DataFrame, path::String) - writestat(path, df); -end - -""" -$docstring_write_sav -""" -function write_sav(df::DataFrame, path::String) - return writestat(path, df); -end - -""" -$docstring_write_dta -""" -function write_dta(df::DataFrame, path::String) - return writestat(path, df); -end - """ $docstring_read_sas """ @@ -152,4 +131,25 @@ function read_dta(data_file; df = DataFrame(ReadStatTables.readstat(file_to_read; kwargs...)) return df +end + +""" +$docstring_write_sas +""" +function write_sas(df::DataFrame, path::String) + writestat(path, df); +end + +""" +$docstring_write_sav +""" +function write_sav(df::DataFrame, path::String) + return writestat(path, df); +end + +""" +$docstring_write_dta +""" +function write_dta(df::DataFrame, path::String) + return writestat(path, df); end \ No newline at end of file diff --git a/src/xlfiles.jl b/src/xlfiles.jl index ea4057e..eeafd75 100644 --- a/src/xlfiles.jl +++ b/src/xlfiles.jl @@ -32,22 +32,6 @@ function convert_column(column) end end -""" -$docstring_write_xlsx -""" -function write_xlsx(x; path::String, overwrite::Bool=false) - # Handling a single DataFrame input - if x isa Pair{String, DataFrame} - # Single sheet: Convert the single DataFrame to the required structure - XLSX.writetable(path, x, overwrite=overwrite) - elseif x isa Tuple - # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable - XLSX.writetable(path, x..., overwrite=overwrite) - else - error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") - end -end - """ $docstring_read_xlsx """ @@ -134,4 +118,20 @@ function read_xlsx( end return data +end + +""" +$docstring_write_xlsx +""" +function write_xlsx(x; path::String, overwrite::Bool=false) + # Handling a single DataFrame input + if x isa Pair{String, DataFrame} + # Single sheet: Convert the single DataFrame to the required structure + XLSX.writetable(path, x, overwrite=overwrite) + elseif x isa Tuple + # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable + XLSX.writetable(path, x..., overwrite=overwrite) + else + error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index a336d58..edb4095 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,27 +26,27 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin Hank Zuse 45 12345 System Analyst 120,000 """ - file = open(joinpath(tempdir(), "fwftest.txt"), "w") - write(file, fwf_data) - close(file) + open("fwftest.txt", "w") do file + write(file, fwf_data) + end; df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); - write_csv(df, joinpath(tempdir(), "csvtest.csv")); - write_table(df, joinpath(tempdir(), "tabletest.txt")); - write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); + write_csv(df, "csvtest.csv"); + write_table(df, "tabletest.txt"); + write_tsv(df, "tsvtest.tsv"); df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5]); df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); - write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); + write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); - write_sav(df, joinpath(tempdir(), "test.sav")); - write_sav(df, joinpath(tempdir(), "test.por")); - write_sas(df , joinpath(tempdir(), "test.sas7bdat")); - write_sas(df , joinpath(tempdir(), "test.xpt")); - write_dta(df , joinpath(tempdir(), "test.dta")); + write_sav(df, "test.sav"); + write_sav(df, "test.por"); + write_sas(df , "test.sas7bdat"); + write_sas(df , "test.xpt"); + write_dta(df ,"test.dta"); end); recursive=true)