From 2a99ddebe90a6ef0af0bde151e980200bcbc027e Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Mon, 8 Apr 2024 09:06:50 -0400 Subject: [PATCH 01/13] Updated gitignore, files, Project.toml, doctests --- .gitignore | 3 ++ Manifest.toml | 75 +++++++++++++++++++++--------------- Project.toml | 8 ++++ docs/Project.toml | 2 +- src/TidierFiles.jl | 6 +-- src/docstrings.jl | 66 ++++++++++++++++--------------- src/statsfiles.jl | 6 +-- testing_files/test.dta | Bin 1857 -> 1857 bytes testing_files/test.por | 4 +- testing_files/test.sas7bdat | Bin 16384 -> 16384 bytes testing_files/test.sav | Bin 467 -> 467 bytes testing_files/test.xpt | Bin 1120 -> 1120 bytes testing_files/xlsxtest.xlsx | Bin 7130 -> 7130 bytes 13 files changed, 97 insertions(+), 73 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d0539c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/Manifest.toml +/.vscode +/testing_files/ \ No newline at end of file diff --git a/Manifest.toml b/Manifest.toml index ba2a110..6383867 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.9.1" +julia_version = "1.10.0" manifest_format = "2.0" project_hash = "13c005244b6149473cd018459c12b37a69eeb16a" @@ -59,7 +59,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.2+0" +version = "1.0.5+1" [[deps.ConcurrentUtilities]] deps = ["Serialization", "Sockets"] @@ -174,9 +174,9 @@ version = "2.44.0+2" [[deps.HTTP]] deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "995f762e0182ebc50548c434c171a5bb6635f8e4" +git-tree-sha1 = "8e59b47b9dc525b70550ca082ce85bcd7f5477cd" uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.4" +version = "1.10.5" [[deps.IOCapture]] deps = ["Logging", "Random"] @@ -229,21 +229,26 @@ version = "1.2.2" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" +version = "0.6.4" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" +version = "8.4.0+0" [[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" +version = "1.11.0+1" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -267,6 +272,11 @@ git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" version = "1.0.3" +[[deps.MappedArrays]] +git-tree-sha1 = "2dab0221fe2b0f2cb6754eaa743cc266339f527e" +uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" +version = "0.4.2" + [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -286,20 +296,20 @@ version = "1.1.9" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+0" +version = "2.28.2+1" [[deps.Missings]] deps = ["DataAPI"] -git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.1.0" +version = "1.2.0" [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.10.11" +version = "2023.1.10" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" @@ -308,7 +318,7 @@ version = "1.2.0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.21+4" +version = "0.3.23+2" [[deps.OpenSSL]] deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] @@ -318,9 +328,9 @@ version = "1.4.2" [[deps.OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "60e3045590bd104a16fefb12836c00c0ef8c7f8c" +git-tree-sha1 = "3da7367955dcc5c54c1ba4d402ccdc09a1a3e046" uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "3.0.13+0" +version = "3.0.13+1" [[deps.OrderedCollections]] git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" @@ -330,7 +340,7 @@ version = "1.6.3" [[deps.PCRE2_jll]] deps = ["Artifacts", "Libdl"] uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" -version = "10.42.0+0" +version = "10.42.0+1" [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] @@ -341,7 +351,7 @@ version = "2.8.1" [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.9.0" +version = "1.10.0" [[deps.PooledArrays]] deps = ["DataAPI", "Future"] @@ -376,14 +386,14 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] -deps = ["SHA", "Serialization"] +deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[deps.ReadStatTables]] -deps = ["CEnum", "DataAPI", "Dates", "InlineStrings", "PooledArrays", "PrecompileTools", "PrettyTables", "ReadStat_jll", "SentinelArrays", "StructArrays", "Tables"] -git-tree-sha1 = "7a2c572e97f5588f5774d2b9024cad18401ec977" +deps = ["CEnum", "DataAPI", "Dates", "InlineStrings", "MappedArrays", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "ReadStat_jll", "SentinelArrays", "StructArrays", "Tables"] +git-tree-sha1 = "97140dfb54eabb5e99d13d1e09554d7456bb184c" uuid = "52522f7a-9570-4e34-8ac6-c005c74d4b84" -version = "0.2.6" +version = "0.3.1" [[deps.ReadStat_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] @@ -432,11 +442,12 @@ version = "1.2.1" [[deps.SparseArrays]] deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.10.0" [[deps.Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -version = "1.9.0" +version = "1.10.0" [[deps.StringManipulation]] deps = ["PrecompileTools"] @@ -463,9 +474,9 @@ version = "0.6.18" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [[deps.SuiteSparse_jll]] -deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" -version = "5.10.1+6" +version = "7.2.1+1" [[deps.TOML]] deps = ["Dates"] @@ -494,9 +505,9 @@ deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.TranscodingStreams]] -git-tree-sha1 = "14389d51751169994b2e1317d5c72f7dc4f21045" +git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349" uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.10.6" +version = "0.10.7" weakdeps = ["Random", "Test"] [deps.TranscodingStreams.extensions] @@ -533,9 +544,9 @@ version = "0.10.1" [[deps.XML2_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "07e470dabc5a6a4254ffebc29a1b3fc01464e105" +git-tree-sha1 = "532e22cf7be8462035d092ff21fada7527e2c488" uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.12.5+0" +version = "2.12.6+0" [[deps.ZipFile]] deps = ["Libdl", "Printf", "Zlib_jll"] @@ -546,19 +557,19 @@ version = "0.10.1" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.13+0" +version = "1.2.13+1" [[deps.libblastrampoline_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.8.0+0" +version = "5.8.0+1" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" +version = "1.52.0+1" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" +version = "17.4.0+2" diff --git a/Project.toml b/Project.toml index a53bfe2..aa8e1be 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,14 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] julia = "1.9" +CSV = "0.10" +DataFrames = "1.5" +Dates = "1.9" +Documenter = "0.27, 1" +HTTP = "1.10" +ReadStatTables = "0.3" +Reexport = "0.2, 1" +XLSX = "0.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/docs/Project.toml b/docs/Project.toml index b7e7618..87526c2 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,5 +8,5 @@ Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0 +XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" TidierFiles = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db" \ No newline at end of file diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl index a45039f..1138f5f 100644 --- a/src/TidierFiles.jl +++ b/src/TidierFiles.jl @@ -103,7 +103,7 @@ function write_csv( file, x, append = append, - writeheader = col_names && !append, + header = col_names && !append, missingstring = missingstring, newline = eol, threaded = num_threads > 1 ) @@ -185,7 +185,7 @@ function write_tsv( x, delim = '\t', # Use tab as the delimiter for TSV append = append, - writeheader = col_names && !append, + header = col_names && !append, missingstring = missingstring, newline = eol, threaded = num_threads > 1) @@ -331,7 +331,7 @@ function write_table( x, delim = delim, # Flexible delimiter based on argument append = append, - writeheader = col_names && !append, + header = col_names && !append, missingstring = missingstring, newline = eol, threaded = num_threads > 1) diff --git a/src/docstrings.jl b/src/docstrings.jl index ef88e0d..ca1e0c7 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -21,7 +21,7 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col julia> read_csv(joinpath(testing_files_path, "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score - │ Int64 String7 Int64? + │ Int64 String7 Int64? ─────┼───────────────────────── 1 │ 3 missing 77 2 │ 4 David 85 @@ -52,8 +52,8 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col ```jldoctest julia> read_tsv(joinpath(testing_files_path, "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame - Row │ ID Name Score - │ Int64 String7 Int64 + Row │ ID Name Score + │ Int64 String7 Int64 ─────┼─────────────────────── 1 │ 3 missing 77 2 │ 4 David 85 @@ -260,7 +260,7 @@ Read data from an Excel file into a DataFrame. -`range`: Specifies a specific range of cells to be read from the sheet. If nothing, the entire sheet is read. -`col_names`: Indicates whether the first row of the specified range should be treated as column names. If false, columns will be named automatically. -`col_types`: Allows specifying column types explicitly. Can be a single type applied to all columns, a list or a dictionary mapping column names or indices to types. If nothing, types will be inferred. --`missingstring`: The string that represents missing values in the Excel file. +-`missingstring`: The value or vector that represents missing values in the Excel file. -`trim_ws`: Whether to trim leading and trailing whitespace from cells in the Excel file. -`skip`: Number of rows to skip at the beginning of the sheet or range before reading data. -`n_max`: The maximum number of rows to read from the sheet or range, after skipping. Inf means read all available rows. @@ -291,7 +291,9 @@ Write a DataFrame, or multiple DataFrames, to an Excel file. # Examples ```jldoctest -julia> df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5]); +julia> df = DataFrame(integers=[1, 2, 3, 4], + strings=["This", "Package makes", "File reading/writing", "even smoother"], + floats=[10.2, 20.3, 30.4, 40.5]); julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); @@ -343,7 +345,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, julia> read_sas(joinpath(testing_files_path, "test.sas7bdat")) 2×2 DataFrame Row │ AA AB - │ String Float64 + │ String3 Float64 ─────┼────────────────── 1 │ sav 10.1 2 │ por 10.2 @@ -351,7 +353,7 @@ julia> read_sas(joinpath(testing_files_path, "test.sas7bdat")) julia> read_sas(joinpath(testing_files_path, "test.xpt")) 2×2 DataFrame Row │ AA AB - │ String Float64 + │ String3 Float64 ─────┼────────────────── 1 │ sav 10.1 2 │ por 10.2 @@ -405,19 +407,19 @@ julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); julia> write_sav(df , joinpath(testing_files_path, "test.sav")) 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 julia> write_sav(df , joinpath(testing_files_path, "test.por")) 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ const docstring_write_sas = @@ -435,19 +437,19 @@ julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); julia> write_sav(df , joinpath(testing_files_path, "test.sas7bdat")) 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 julia> write_sav(df , joinpath(testing_files_path, "test.xpt")) 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ @@ -466,10 +468,10 @@ julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); julia> write_dta(df , joinpath(testing_files_path, "test.dta")) 2×2 ReadStatTable: - Row │ AA AB - │ String Float64 -─────┼───────────────── - 1 │ sav 10.1 - 2 │ por 10.2 + Row │ AA AB + │ String Float64? +─────┼────────────────── + 1 │ sav 10.1 + 2 │ por 10.2 ``` """ \ No newline at end of file diff --git a/src/statsfiles.jl b/src/statsfiles.jl index 297035e..14d8e68 100644 --- a/src/statsfiles.jl +++ b/src/statsfiles.jl @@ -31,7 +31,7 @@ function read_sas(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 @@ -76,7 +76,7 @@ function read_sav(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 @@ -121,7 +121,7 @@ function read_dta(data_file; :row_limit => n_max == Inf ? nothing : n_max, # Convert Inf to nothing for unlimited :row_offset => skip, # Skip the specified number of rows :ntasks => num_threads > 1 ? num_threads : nothing, # Use num_threads for parallel reading if > 1 - :convert_datetime => true, # Assuming default behavior is to convert datetime + # :convert_datetime => true, # Assuming default behavior is to convert datetime :apply_value_labels => true, # Apply value labels if available :file_encoding => encoding, # Set file encoding if provided :handler_encoding => encoding != nothing ? encoding : "UTF-8" # Set handler encoding, default to UTF-8 diff --git a/testing_files/test.dta b/testing_files/test.dta index 36a2c0fbf70b83ca64aa108c149be0df1154c067..e1032ccaa20f4185add2e01babc98e0d2dd4b397 100644 GIT binary patch delta 28 jcmX@ecaU#Fg`k0jf@49Ef{}rdiGqQtm4Wfb&K+z3eEbMy delta 28 jcmX@ecaU#Fg`knCf^TAxf{}rdiGrb_m7&4L&K+z3e4_|l diff --git a/testing_files/test.por b/testing_files/test.por index fa874a4..95c7122 100644 --- a/testing_files/test.por +++ b/testing_files/test.por @@ -3,7 +3,7 @@ 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst uvwxyz .<(+|&[]!$*);^-/|,%_>?`:#@'="000000~000000000000000000000{}\0000000000000 -00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202403256/111 -02918/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 +00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202404086/050 +31718/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 /70/2/AB5/9/2/5/9/2/E0/F3/savA.2TTTTTTTTTNL5OO5E86AQKLF4DBN91H8O69O28QFPEJDHOB7F /3/porA.5TTTTTTTTTHCBJIASGCLNBD08QNGI34HICJI4HN1KT8R5IMF/ZZZZZZZZZZZZZZZZZZZZZZZ diff --git a/testing_files/test.sas7bdat b/testing_files/test.sas7bdat index 51fbe965a0db630cfe52e883538cdd999c401ab3..314176d955cecd7c86de6ce08998f56ebc3075a9 100644 GIT binary patch delta 24 ecmZo@U~Fh$oUojG>MM(TjtmSAKzidQKYIXr7779Y delta 24 ecmZo@U~Fh$oUokxW|GN0M+OE5AiZ&upFIF|1_^Qi diff --git a/testing_files/test.sav b/testing_files/test.sav index e95e0bc967fa64aa2c51d5c39422086444811fc5..0a5be601563d2a8b9c0983c4efb332eb1529cc31 100644 GIT binary patch delta 34 qcmcc2e3^MdjG%#qf@49Ef{}@Vsg;4Tm7)2>LYs*VEF0IVF#-U#2ntsK delta 34 qcmcc2e3^MdjG&RJf^TAxf{}@#p_QS5m6657LYs*VEF0IVF#-U!feKIn diff --git a/testing_files/test.xpt b/testing_files/test.xpt index 4ea905fdae3acde4dcb27e8eff65a95f3e7aef0b..6d96dc0961b83654f2d4bf461167c240da2efd39 100644 GIT binary patch delta 86 ucmaFB@qlB(1O)>N$ABOs6DtE#D+6OILvwWg#Ete7-#;Wq*=BtvLnZ(=%@^kY delta 86 ucmaFB@qlB(1O+2gU&kOL6DvbQD?!TM7)IyAR_xjbs-|}!~`G=MhQ+ZV=}L#9mtr;ev;N;x<}FqOrMmr0Mjf| f5P4TAXE488$_7lIg7O8VUBUbaX)`cAQQ8v#`sO|X delta 200 zcmca*e#@LUz?+#xgn@y9gTc45awG2!HfA8b`2^b{Mld6ngM%5&sNmkf1ZHUS)j_1z z@Y_K|-U>!TM7)IyAR_xjbs-|}!~`G=MhQ+ZV=}L#9mtr;ev;N;x<}FqOrMmr0Mjf| f5P4TAXE488$_7lIg7O8VUBUbaX)`cAQQ8v#IqN>L From ed66a9a850c5e34d61de4eadc31b081ad0ec6f1a Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Mon, 8 Apr 2024 09:08:58 -0400 Subject: [PATCH 02/13] remove Manifest.toml --- Manifest.toml | 575 -------------------------------------------------- 1 file changed, 575 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 6383867..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,575 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.10.0" -manifest_format = "2.0" -project_hash = "13c005244b6149473cd018459c12b37a69eeb16a" - -[[deps.ANSIColoredPrinters]] -git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" -uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" -version = "0.0.1" - -[[deps.AbstractTrees]] -git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.4.5" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.BitFlags]] -git-tree-sha1 = "2dc09997850d68179b69dafb58ae806167a32b1b" -uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" -version = "0.1.8" - -[[deps.CEnum]] -git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.5.0" - -[[deps.CSV]] -deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] -git-tree-sha1 = "a44910ceb69b0d44fe262dd451ab11ead3ed0be8" -uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -version = "0.10.13" - -[[deps.CodecZlib]] -deps = ["TranscodingStreams", "Zlib_jll"] -git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73" -uuid = "944b1d66-785c-5afd-91f1-9de20f533193" -version = "0.7.4" - -[[deps.Compat]] -deps = ["TOML", "UUIDs"] -git-tree-sha1 = "c955881e3c981181362ae4088b35995446298b80" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "4.14.0" -weakdeps = ["Dates", "LinearAlgebra"] - - [deps.Compat.extensions] - CompatLinearAlgebraExt = "LinearAlgebra" - -[[deps.CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.5+1" - -[[deps.ConcurrentUtilities]] -deps = ["Serialization", "Sockets"] -git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" -uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.1" - -[[deps.ConstructionBase]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "260fd2400ed2dab602a7c15cf10c1933c59930a2" -uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" -version = "1.5.5" - - [deps.ConstructionBase.extensions] - ConstructionBaseIntervalSetsExt = "IntervalSets" - ConstructionBaseStaticArraysExt = "StaticArrays" - - [deps.ConstructionBase.weakdeps] - IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.Crayons]] -git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.1.1" - -[[deps.DataAPI]] -git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.16.0" - -[[deps.DataFrames]] -deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.6.1" - -[[deps.DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "0f4b5d62a88d8f59003e43c25a8a90de9eb76317" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.18" - -[[deps.DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.DocStringExtensions]] -deps = ["LibGit2"] -git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.9.3" - -[[deps.Documenter]] -deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"] -git-tree-sha1 = "4a40af50e8b24333b9ec6892546d9ca5724228eb" -uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "1.3.0" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.ExceptionUnwrapping]] -deps = ["Test"] -git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" -uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" -version = "0.1.10" - -[[deps.Expat_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" -uuid = "2e619515-83b5-522b-bb60-26c02a35a201" -version = "2.5.0+0" - -[[deps.EzXML]] -deps = ["Printf", "XML2_jll"] -git-tree-sha1 = "380053d61bb9064d6aa4a9777413b40429c79901" -uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" -version = "1.2.0" - -[[deps.FilePathsBase]] -deps = ["Compat", "Dates", "Mmap", "Printf", "Test", "UUIDs"] -git-tree-sha1 = "9f00e42f8d99fdde64d40c8ea5d14269a2e2c1aa" -uuid = "48062228-2e41-5def-b9a4-89aafe57970f" -version = "0.9.21" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[deps.Git]] -deps = ["Git_jll"] -git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e" -uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" -version = "1.3.1" - -[[deps.Git_jll]] -deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] -git-tree-sha1 = "d18fb8a1f3609361ebda9bf029b60fd0f120c809" -uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" -version = "2.44.0+2" - -[[deps.HTTP]] -deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "8e59b47b9dc525b70550ca082ce85bcd7f5477cd" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.5" - -[[deps.IOCapture]] -deps = ["Logging", "Random"] -git-tree-sha1 = "8b72179abc660bfab5e28472e019392b97d0985c" -uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" -version = "0.2.4" - -[[deps.InlineStrings]] -deps = ["Parsers"] -git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461" -uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" -version = "1.4.0" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.InvertedIndices]] -git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.3.0" - -[[deps.IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[deps.JLLWrappers]] -deps = ["Artifacts", "Preferences"] -git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.5.0" - -[[deps.JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.4" - -[[deps.LaTeXStrings]] -git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" -uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.3.1" - -[[deps.LazilyInitializedFields]] -git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" -uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" -version = "1.2.2" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.4" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "8.4.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibGit2_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] -uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" -version = "1.6.4+0" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.11.0+1" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.Libiconv_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" -uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" -version = "1.17.0+0" - -[[deps.LinearAlgebra]] -deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.LoggingExtras]] -deps = ["Dates", "Logging"] -git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "1.0.3" - -[[deps.MappedArrays]] -git-tree-sha1 = "2dab0221fe2b0f2cb6754eaa743cc266339f527e" -uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" -version = "0.4.2" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MarkdownAST]] -deps = ["AbstractTrees", "Markdown"] -git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" -uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" -version = "0.1.2" - -[[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] -git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.9" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+1" - -[[deps.Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.2.0" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2023.1.10" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OpenBLAS_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+2" - -[[deps.OpenSSL]] -deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "af81a32750ebc831ee28bdaaba6e1067decef51e" -uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.4.2" - -[[deps.OpenSSL_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "3da7367955dcc5c54c1ba4d402ccdc09a1a3e046" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "3.0.13+1" - -[[deps.OrderedCollections]] -git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.6.3" - -[[deps.PCRE2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" -version = "10.42.0+1" - -[[deps.Parsers]] -deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.8.1" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.10.0" - -[[deps.PooledArrays]] -deps = ["DataAPI", "Future"] -git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.4.3" - -[[deps.PrecompileTools]] -deps = ["Preferences"] -git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" -uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -version = "1.2.1" - -[[deps.Preferences]] -deps = ["TOML"] -git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.4.3" - -[[deps.PrettyTables]] -deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] -git-tree-sha1 = "88b895d13d53b5577fd53379d913b9ab9ac82660" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "2.3.1" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.ReadStatTables]] -deps = ["CEnum", "DataAPI", "Dates", "InlineStrings", "MappedArrays", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "ReadStat_jll", "SentinelArrays", "StructArrays", "Tables"] -git-tree-sha1 = "97140dfb54eabb5e99d13d1e09554d7456bb184c" -uuid = "52522f7a-9570-4e34-8ac6-c005c74d4b84" -version = "0.3.1" - -[[deps.ReadStat_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "28e990e90ca643e99f3ec0188089c1816e8b46f4" -uuid = "a4dc8951-f1cc-5499-9034-9ec1c3e64557" -version = "1.1.9+0" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - -[[deps.RegistryInstances]] -deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] -git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" -uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" -version = "0.1.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.SentinelArrays]] -deps = ["Dates", "Random"] -git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" -uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" -version = "1.4.1" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.SimpleBufferStream]] -git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" -uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" -version = "1.1.0" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.2.1" - -[[deps.SparseArrays]] -deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -version = "1.10.0" - -[[deps.Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -version = "1.10.0" - -[[deps.StringManipulation]] -deps = ["PrecompileTools"] -git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" -uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" -version = "0.3.4" - -[[deps.StructArrays]] -deps = ["ConstructionBase", "DataAPI", "Tables"] -git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be" -uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" -version = "0.6.18" - - [deps.StructArrays.extensions] - StructArraysAdaptExt = "Adapt" - StructArraysGPUArraysCoreExt = "GPUArraysCore" - StructArraysSparseArraysExt = "SparseArrays" - StructArraysStaticArraysExt = "StaticArrays" - - [deps.StructArrays.weakdeps] - Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" - GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" - SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" - -[[deps.SuiteSparse_jll]] -deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] -uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" -version = "7.2.1+1" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.3" - -[[deps.TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.1" - -[[deps.Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] -git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.11.1" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.TranscodingStreams]] -git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.10.7" -weakdeps = ["Random", "Test"] - - [deps.TranscodingStreams.extensions] - TestExt = ["Test", "Random"] - -[[deps.URIs]] -git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" -uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -version = "1.5.1" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.WeakRefStrings]] -deps = ["DataAPI", "InlineStrings", "Parsers"] -git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" -uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" -version = "1.4.2" - -[[deps.WorkerUtilities]] -git-tree-sha1 = "cd1659ba0d57b71a464a29e64dbc67cfe83d54e7" -uuid = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" -version = "1.6.1" - -[[deps.XLSX]] -deps = ["Artifacts", "Dates", "EzXML", "Printf", "Tables", "ZipFile"] -git-tree-sha1 = "319b05e790046f18f12b8eae542546518ef1a88f" -uuid = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" -version = "0.10.1" - -[[deps.XML2_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] -git-tree-sha1 = "532e22cf7be8462035d092ff21fada7527e2c488" -uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" -version = "2.12.6+0" - -[[deps.ZipFile]] -deps = ["Libdl", "Printf", "Zlib_jll"] -git-tree-sha1 = "f492b7fe1698e623024e873244f10d89c95c340a" -uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" -version = "0.10.1" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.13+1" - -[[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" -version = "5.8.0+1" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.52.0+1" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+2" From ab85b6ed516eb1f2cca23bafa665b072ed036b0f Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Wed, 10 Apr 2024 00:19:22 -0400 Subject: [PATCH 03/13] Update doctest folder to tempdir(). --- src/TidierFiles.jl | 139 ++++++++++++++++++------------------ src/statsfiles.jl | 43 ++++++----- src/xlfiles.jl | 34 ++++----- test/runtests.jl | 27 ++++++- testing_files/test.dta | Bin 1857 -> 1857 bytes testing_files/test.por | 4 +- testing_files/test.sas7bdat | Bin 16384 -> 16384 bytes testing_files/test.sav | Bin 467 -> 467 bytes testing_files/test.xpt | Bin 1120 -> 1120 bytes testing_files/xlsxtest.xlsx | Bin 7130 -> 7130 bytes 10 files changed, 133 insertions(+), 114 deletions(-) diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl index 1138f5f..77b2e68 100644 --- a/src/TidierFiles.jl +++ b/src/TidierFiles.jl @@ -20,6 +20,28 @@ include("fwf.jl") include("xlfiles.jl") include("statsfiles.jl") +""" +$docstring_write_csv +""" +function write_csv( + x::DataFrame, + file::String; + missingstring::String = "NA", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Configure threading + CSV.write( + file, + x, + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1 ) +end """ $docstring_read_csv @@ -85,34 +107,34 @@ function read_csv(file; return df end - """ -$docstring_write_csv +$docstring_write_tsv """ -function write_csv( +function write_tsv( x::DataFrame, file::String; - missingstring::String = "NA", + missingstring::String = "", append::Bool = false, col_names::Bool = true, eol::String = "\n", num_threads::Int = Threads.nthreads()) - # Configure threading + # Write DataFrame to TSV CSV.write( file, x, + delim = '\t', # Use tab as the delimiter for TSV append = append, header = col_names && !append, missingstring = missingstring, newline = eol, - threaded = num_threads > 1 ) + threaded = num_threads > 1) end """ -$docstring_read_tsv +$docstring_read_delim """ -function read_tsv(file; +function read_delim(file; delim='\t', col_names=true, skip=0, @@ -123,7 +145,7 @@ function read_tsv(file; escape_double=true, ntasks::Int = Threads.nthreads(), # Default ntasks value num_threads::Union{Int, Nothing}=nothing) # Optional num_threads - + # Use num_threads if provided, otherwise stick with ntasks effective_ntasks = isnothing(num_threads) ? ntasks : num_threads @@ -138,9 +160,9 @@ function read_tsv(file; delim = delim, header = col_names === true ? 1 : 0, skipto = skipto + 1, + select = col_select, footerskip = 0, limit = limit, - select = col_select, comment = comment, missingstring = missingstring, escapechar = escape_double ? '"' : '\\', @@ -148,14 +170,17 @@ function read_tsv(file; normalizenames = false, ntasks = effective_ntasks > 1 ) - # Read the TSV file into a DataFrame + # Filter options to remove any set to `nothing` + # clean_options = Dict{Symbol,Any}(filter(p -> !isnothing(p[2]), read_options)) + + # Read the file into a DataFrame if startswith(file, "http://") || startswith(file, "https://") # Fetch the content from the URL response = HTTP.get(file) # Ensure the request was successful if response.status != 200 - error("Failed to fetch the TSV file: HTTP status code ", response.status) + error("Failed to fetch the delim file: HTTP status code ", response.status) end # Read the CSV data from the fetched content using cleaned options @@ -168,33 +193,9 @@ function read_tsv(file; end """ -$docstring_write_tsv -""" -function write_tsv( - x::DataFrame, - file::String; - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to TSV - CSV.write( - file, - x, - delim = '\t', # Use tab as the delimiter for TSV - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - -""" -$docstring_read_delim +$docstring_read_tsv """ -function read_delim(file; +function read_tsv(file; delim='\t', col_names=true, skip=0, @@ -205,7 +206,7 @@ function read_delim(file; escape_double=true, ntasks::Int = Threads.nthreads(), # Default ntasks value num_threads::Union{Int, Nothing}=nothing) # Optional num_threads - + # Use num_threads if provided, otherwise stick with ntasks effective_ntasks = isnothing(num_threads) ? ntasks : num_threads @@ -220,9 +221,9 @@ function read_delim(file; delim = delim, header = col_names === true ? 1 : 0, skipto = skipto + 1, - select = col_select, footerskip = 0, limit = limit, + select = col_select, comment = comment, missingstring = missingstring, escapechar = escape_double ? '"' : '\\', @@ -230,17 +231,14 @@ function read_delim(file; normalizenames = false, ntasks = effective_ntasks > 1 ) - # Filter options to remove any set to `nothing` - # clean_options = Dict{Symbol,Any}(filter(p -> !isnothing(p[2]), read_options)) - - # Read the file into a DataFrame + # Read the TSV file into a DataFrame if startswith(file, "http://") || startswith(file, "https://") # Fetch the content from the URL response = HTTP.get(file) # Ensure the request was successful if response.status != 200 - error("Failed to fetch the delim file: HTTP status code ", response.status) + error("Failed to fetch the TSV file: HTTP status code ", response.status) end # Read the CSV data from the fetched content using cleaned options @@ -252,6 +250,31 @@ function read_delim(file; return df end +""" +$docstring_write_table +""" +function write_table( + x::DataFrame, + file::String; + delim::Char = '\t', # Default to TSV, but allow flexibility + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to a file with the specified delimiter + CSV.write( + file, + x, + delim = delim, # Flexible delimiter based on argument + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end + """ $docstring_read_table """ @@ -311,30 +334,4 @@ function read_table(file; return df end - -""" -$docstring_write_table -""" -function write_table( - x::DataFrame, - file::String; - delim::Char = '\t', # Default to TSV, but allow flexibility - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to a file with the specified delimiter - CSV.write( - file, - x, - delim = delim, # Flexible delimiter based on argument - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - end \ No newline at end of file diff --git a/src/statsfiles.jl b/src/statsfiles.jl index 14d8e68..5225d97 100644 --- a/src/statsfiles.jl +++ b/src/statsfiles.jl @@ -1,3 +1,24 @@ +""" +$docstring_write_sas +""" +function write_sas(df::DataFrame, path::String) + writestat(path, df); +end + +""" +$docstring_write_sav +""" +function write_sav(df::DataFrame, path::String) + return writestat(path, df); +end + +""" +$docstring_write_dta +""" +function write_dta(df::DataFrame, path::String) + return writestat(path, df); +end + """ $docstring_read_sas """ @@ -131,26 +152,4 @@ function read_dta(data_file; df = DataFrame(ReadStatTables.readstat(file_to_read; kwargs...)) return df -end - - -""" -$docstring_write_sas -""" -function write_sas(df::DataFrame, path::String) - writestat(path, df); -end - -""" -$docstring_write_sav -""" -function write_sav(df::DataFrame, path::String) - return writestat(path, df); -end - -""" -$docstring_write_dta -""" -function write_dta(df::DataFrame, path::String) - return writestat(path, df); end \ No newline at end of file diff --git a/src/xlfiles.jl b/src/xlfiles.jl index d3fe70a..ea4057e 100644 --- a/src/xlfiles.jl +++ b/src/xlfiles.jl @@ -32,6 +32,22 @@ function convert_column(column) end end +""" +$docstring_write_xlsx +""" +function write_xlsx(x; path::String, overwrite::Bool=false) + # Handling a single DataFrame input + if x isa Pair{String, DataFrame} + # Single sheet: Convert the single DataFrame to the required structure + XLSX.writetable(path, x, overwrite=overwrite) + elseif x isa Tuple + # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable + XLSX.writetable(path, x..., overwrite=overwrite) + else + error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") + end +end + """ $docstring_read_xlsx """ @@ -118,20 +134,4 @@ function read_xlsx( end return data -end - -""" -$docstring_write_xlsx -""" -function write_xlsx(x; path::String, overwrite::Bool=false) - # Handling a single DataFrame input - if x isa Pair{String, DataFrame} - # Single sheet: Convert the single DataFrame to the required structure - XLSX.writetable(path, x, overwrite=overwrite) - elseif x isa Tuple - # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable - XLSX.writetable(path, x..., overwrite=overwrite) - else - error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") - end -end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 4be3871..a10d334 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,8 +7,31 @@ using Documenter DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin using TidierFiles # Determine the package root directory dynamically - project_root = dirname(dirname(pathof(TidierFiles))) - testing_files_path = joinpath(project_root, "testing_files") + # project_root = dirname(dirname(pathof(TidierFiles))) + testing_files_path = joinpath(tempdir(), "testing_files") + + # Need to write fwf data because there is no `write_fwf()` function + # For every other file, write_* is written before read_* to ensure that + # the read_* function can read the output of the corresponding + # write_* function + + fwf_data = """ + John Smith 35 12345 Software Engineer 120,000 + Jane Doe 29 2345 Marketing Manager 95,000 + Alice Jones 42 123456 CEO 250,000 + Bob Brown 31 12345 Product Manager 110,000 + Charlie Day 28 345 Sales Associate 70,000 + Diane Poe 35 23456 Data Scientist 130,000 + Eve Stone 40 123456 Chief Financial Off 200,000 + Frank Moore 33 1234 Graphic Designer 80,000 + Grace Lee 27 123456 Software Developer 115,000 + Hank Zuse 45 12345 System Analyst 120,000 + """ + + file = open(joinpath(testing_files_path, "fwftest.csv"), "w") + write(file, fwf_data) + close(file) + end); recursive=true) doctest(TidierFiles) diff --git a/testing_files/test.dta b/testing_files/test.dta index e1032ccaa20f4185add2e01babc98e0d2dd4b397..8269b14f506664d00bb20aa302b205584761e9bf 100644 GIT binary patch delta 13 UcmX@ecaU#FJCmW&#?BpV03@UZ6951J delta 13 UcmX@ecaU#FJClL&#?BpV03@RY6951J diff --git a/testing_files/test.por b/testing_files/test.por index 95c7122..82c24bd 100644 --- a/testing_files/test.por +++ b/testing_files/test.por @@ -3,7 +3,7 @@ 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst uvwxyz .<(+|&[]!$*);^-/|,%_>?`:#@'="000000~000000000000000000000{}\0000000000000 -00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202404086/050 -31718/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 +00000000000000000000000000000000000000000000000000000000SPSSPORTA8/202404086/051 +20718/ReadStat317/https://github.com/WizardMac/ReadStat42/51K/73/2/AA1/9/0/1/9/0 /70/2/AB5/9/2/5/9/2/E0/F3/savA.2TTTTTTTTTNL5OO5E86AQKLF4DBN91H8O69O28QFPEJDHOB7F /3/porA.5TTTTTTTTTHCBJIASGCLNBD08QNGI34HICJI4HN1KT8R5IMF/ZZZZZZZZZZZZZZZZZZZZZZZ diff --git a/testing_files/test.sas7bdat b/testing_files/test.sas7bdat index 314176d955cecd7c86de6ce08998f56ebc3075a9..ae26ce8d9a7084e7188e10cde1d792fc80d76191 100644 GIT binary patch delta 24 ecmZo@U~Fh$oUn}hfaGh7dyWhYKzidwe|rFVc?uH% delta 24 ecmZo@U~Fh$oUn}BVd^W3dyWhYKzidwe|rFU7YYdg diff --git a/testing_files/test.sav b/testing_files/test.sav index 0a5be601563d2a8b9c0983c4efb332eb1529cc31..35e3bc0e323b3c509db4105b1b16d407232f5779 100644 GIT binary patch delta 22 ecmcc2e3^Md2CJcwm4W%hLYs*VEF0IVF#-TtR0oCt delta 22 ecmcc2e3^Md2CIRwm7)2>LYs*VEF0IVF#-TtUk8T( diff --git a/testing_files/test.xpt b/testing_files/test.xpt index 6d96dc0961b83654f2d4bf461167c240da2efd39..994c99bd852305f41706257863cd6de22cab2420 100644 GIT binary patch delta 64 ucmaFB@qlB(Y*9laD+6-_3&(&UBNHnFQ!6lk;zs+4@895*+pN!I$OHg>@enEzHx5JYd1;9_KI@|?^gX%FO0_LZ~&QQeZ(AnJsqC5U2{ f0?WBbxqx_GQnn!KB!tH=?FQn7OPhnJ3DRBw8q+kk delta 177 zcmca*e#@LEz?+#xgn@y9gCX5xBhOAYrgV?Z$JrJ$g6JF$PG%5Y&b^TdL~HTYgT+_# z+k^RU1Y^K_FX2Kkf3K(>nEzHx5JYd1-~v&Tc_i&Yl&_=>i0YQK22m#@EkP8s6j;th c$_2#hlClL+Cm}q3X*UorT-qE&O_25i08fcBIsgCw From 1c9980c6a70fe0a08cf84a419bfb1e5b4f793239 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Wed, 10 Apr 2024 00:26:03 -0400 Subject: [PATCH 04/13] Update test_files_path to tempdir() --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index a10d334..7fe7701 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,7 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin using TidierFiles # Determine the package root directory dynamically # project_root = dirname(dirname(pathof(TidierFiles))) - testing_files_path = joinpath(tempdir(), "testing_files") + testing_files_path = tempdir() # Need to write fwf data because there is no `write_fwf()` function # For every other file, write_* is written before read_* to ensure that From 53331297902ef00fa0d1eada70eda4775a092de0 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Wed, 10 Apr 2024 00:42:43 -0400 Subject: [PATCH 05/13] Update to runtest.jl --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 7fe7701..81efd40 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,7 +28,7 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin Hank Zuse 45 12345 System Analyst 120,000 """ - file = open(joinpath(testing_files_path, "fwftest.csv"), "w") + file = open(joinpath(testing_files_path, "fwftest.txt"), "w") write(file, fwf_data) close(file) From 5799ee55ab2cda4ef477ba35d2367ff6abed7a5a Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Wed, 10 Apr 2024 00:57:59 -0400 Subject: [PATCH 06/13] Update runtests.jl and fixed bug in docstrings. --- src/docstrings.jl | 9 ++++----- test/runtests.jl | 25 +++++++++++++++++++++---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index ca1e0c7..07a39e4 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -225,7 +225,6 @@ julia> read_table(joinpath(testing_files_path, "tabletest.txt"), skip = 2, n_max ``` """ - const docstring_write_table = """ write_table(x, file; delim = '\t', na, append, col_names, eol, num_threads) @@ -405,7 +404,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df , joinpath(testing_files_path, "test.sav")) +julia> write_sav(df, joinpath(testing_files_path, "test.sav")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -413,7 +412,7 @@ julia> write_sav(df , joinpath(testing_files_path, "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sav(df , joinpath(testing_files_path, "test.por")) +julia> write_sav(df, joinpath(testing_files_path, "test.por")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -435,7 +434,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df , joinpath(testing_files_path, "test.sas7bdat")) +julia> write_sas(df , joinpath(testing_files_path, "test.sas7bdat")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -443,7 +442,7 @@ julia> write_sav(df , joinpath(testing_files_path, "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sav(df , joinpath(testing_files_path, "test.xpt")) +julia> write_sas(df , joinpath(testing_files_path, "test.xpt")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? diff --git a/test/runtests.jl b/test/runtests.jl index 81efd40..b26fcf0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,15 +5,14 @@ using Test using Documenter DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin - using TidierFiles + using DataFrames, TidierFiles # Determine the package root directory dynamically # project_root = dirname(dirname(pathof(TidierFiles))) testing_files_path = tempdir() # Need to write fwf data because there is no `write_fwf()` function - # For every other file, write_* is written before read_* to ensure that - # the read_* function can read the output of the corresponding - # write_* function + # Because each doctest runs independently, need to write all the files + # here to ensure they are available to the read_ functions. fwf_data = """ John Smith 35 12345 Software Engineer 120,000 @@ -31,6 +30,24 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin file = open(joinpath(testing_files_path, "fwftest.txt"), "w") write(file, fwf_data) close(file) + + df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + write_csv(df, joinpath(testing_files_path, "csvtest.csv")); + write_table(df, joinpath(testing_files_path, "tabletest.txt")); + write_tsv(df, joinpath(testing_files_path, "tsvtest.tsv")); + + df = DataFrame(integers=[1, 2, 3, 4], + strings=["This", "Package makes", "File reading/writing", "even smoother"], + floats=[10.2, 20.3, 30.4, 40.5]); + df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); + write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(testing_files_path, "xlsxtest.xlsx"), overwrite = true); + + df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + write_sav(df, joinpath(testing_files_path, "test.sav")); + write_sav(df, joinpath(testing_files_path, "test.por")); + write_sas(df , joinpath(testing_files_path, "test.sas7bdat")); + write_sas(df , joinpath(testing_files_path, "test.xpt")); + write_dta(df , joinpath(testing_files_path, "test.dta")); end); recursive=true) From 1b4b202b0a2b5f3a5244050ff54290288ca35dd1 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Wed, 10 Apr 2024 01:04:36 -0400 Subject: [PATCH 07/13] Replace testing_files_path with tempdir() to fix docs build --- src/docstrings.jl | 44 ++++++++++++++++++++++---------------------- test/runtests.jl | 21 ++++++++++----------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index 07a39e4..0a3cdb5 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -18,7 +18,7 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples ```jldoctest -julia> read_csv(joinpath(testing_files_path, "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) +julia> read_csv(joinpath(tempdir(), "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64? @@ -50,7 +50,7 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col # Examples ```jldoctest -julia> read_tsv(joinpath(testing_files_path, "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) +julia> read_tsv(joinpath(tempdir(), "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64 @@ -83,7 +83,7 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite # Examples ```jldoctest -julia> read_delim(joinpath(testing_files_path, "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration +julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration 6×3 DataFrame Row │ Column1 Column2 Column3 │ String3 String7 String7 @@ -113,7 +113,7 @@ Read fixed-width format (FWF) files into a DataFrame. - `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines. # Examples ```jldoctest -julia> path = joinpath(testing_files_path, "fwftest.txt"); +julia> path = joinpath(tempdir(), "fwftest.txt"); julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) 3×5 DataFrame @@ -143,10 +143,10 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of strings representing the column names. # Examples ```jldoctest -julia> fwf_empty(joinpath(testing_files_path, "fwftest.txt")) +julia> fwf_empty(joinpath(tempdir(), "fwftest.txt")) ([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"]) -julia> fwf_empty(joinpath(testing_files_path, "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) +julia> fwf_empty(joinpath(tempdir(), "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) ([13, 5, 8, 20, 8], ["Name", "Age", "ID", "Position", "Salary"]) ``` """ @@ -170,7 +170,7 @@ Write a DataFrame to a CSV (comma-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_csv(df, joinpath(testing_files_path, "csvtest.csv")); +julia> write_csv(df, joinpath(tempdir(), "csvtest.csv")); ``` """ @@ -192,7 +192,7 @@ Write a DataFrame to a TSV (tab-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_tsv(df, joinpath(testing_files_path, "tsvtest.tsv")); +julia> write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); ``` """ @@ -213,7 +213,7 @@ Read a table from a file where columns are separated by any amount of whitespace -`kwargs`: Additional keyword arguments passed to CSV.File. # Examples ```jldoctest -julia> read_table(joinpath(testing_files_path, "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"]) +julia> read_table(joinpath(tempdir(), "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"]) 3×1 DataFrame Row │ Name │ String7 @@ -244,7 +244,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_table(df, joinpath(testing_files_path, "tabletest.txt")); +julia> write_table(df, joinpath(tempdir(), "tabletest.txt")); ``` """ @@ -267,7 +267,7 @@ Read data from an Excel file into a DataFrame. # Examples ```jldoctest -julia> read_xlsx(joinpath(testing_files_path, "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) +julia> read_xlsx(joinpath(tempdir(), "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) 3×3 DataFrame Row │ integers strings floats │ Any String Float64 @@ -296,7 +296,7 @@ julia> df = DataFrame(integers=[1, 2, 3, 4], julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); -julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(testing_files_path, "xlsxtest.xlsx"), overwrite = true); +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); ``` """ @@ -316,7 +316,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(testing_files_path, "test.dta")) +julia> read_sas(joinpath(tempdir(), "test.dta")) 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -341,7 +341,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(testing_files_path, "test.sas7bdat")) +julia> read_sas(joinpath(tempdir(), "test.sas7bdat")) 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -349,7 +349,7 @@ julia> read_sas(joinpath(testing_files_path, "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sas(joinpath(testing_files_path, "test.xpt")) +julia> read_sas(joinpath(tempdir(), "test.xpt")) 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -373,7 +373,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sav(joinpath(testing_files_path, "test.sav")) +julia> read_sav(joinpath(tempdir(), "test.sav")) 2×2 DataFrame Row │ AA AB │ String Float64 @@ -381,7 +381,7 @@ julia> read_sav(joinpath(testing_files_path, "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sav(joinpath(testing_files_path, "test.por")) +julia> read_sav(joinpath(tempdir(), "test.por")) 2×2 DataFrame Row │ AA AB │ String Float64 @@ -404,7 +404,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df, joinpath(testing_files_path, "test.sav")) +julia> write_sav(df, joinpath(tempdir(), "test.sav")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -412,7 +412,7 @@ julia> write_sav(df, joinpath(testing_files_path, "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sav(df, joinpath(testing_files_path, "test.por")) +julia> write_sav(df, joinpath(tempdir(), "test.por")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -434,7 +434,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sas(df , joinpath(testing_files_path, "test.sas7bdat")) +julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -442,7 +442,7 @@ julia> write_sas(df , joinpath(testing_files_path, "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sas(df , joinpath(testing_files_path, "test.xpt")) +julia> write_sas(df , joinpath(tempdir(), "test.xpt")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -465,7 +465,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_dta(df , joinpath(testing_files_path, "test.dta")) +julia> write_dta(df , joinpath(tempdir(), "test.dta")) 2×2 ReadStatTable: Row │ AA AB │ String Float64? diff --git a/test/runtests.jl b/test/runtests.jl index b26fcf0..a336d58 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,6 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin using DataFrames, TidierFiles # Determine the package root directory dynamically # project_root = dirname(dirname(pathof(TidierFiles))) - testing_files_path = tempdir() # Need to write fwf data because there is no `write_fwf()` function # Because each doctest runs independently, need to write all the files @@ -27,27 +26,27 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin Hank Zuse 45 12345 System Analyst 120,000 """ - file = open(joinpath(testing_files_path, "fwftest.txt"), "w") + file = open(joinpath(tempdir(), "fwftest.txt"), "w") write(file, fwf_data) close(file) df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); - write_csv(df, joinpath(testing_files_path, "csvtest.csv")); - write_table(df, joinpath(testing_files_path, "tabletest.txt")); - write_tsv(df, joinpath(testing_files_path, "tsvtest.tsv")); + write_csv(df, joinpath(tempdir(), "csvtest.csv")); + write_table(df, joinpath(tempdir(), "tabletest.txt")); + write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5]); df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); - write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(testing_files_path, "xlsxtest.xlsx"), overwrite = true); + write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); - write_sav(df, joinpath(testing_files_path, "test.sav")); - write_sav(df, joinpath(testing_files_path, "test.por")); - write_sas(df , joinpath(testing_files_path, "test.sas7bdat")); - write_sas(df , joinpath(testing_files_path, "test.xpt")); - write_dta(df , joinpath(testing_files_path, "test.dta")); + write_sav(df, joinpath(tempdir(), "test.sav")); + write_sav(df, joinpath(tempdir(), "test.por")); + write_sas(df , joinpath(tempdir(), "test.sas7bdat")); + write_sas(df , joinpath(tempdir(), "test.xpt")); + write_dta(df , joinpath(tempdir(), "test.dta")); end); recursive=true) From 7ffd70c283550307bf1e4b71bdb94129e5832d5a Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 00:35:43 -0400 Subject: [PATCH 08/13] Change file path for data files. --- src/TidierFiles.jl | 144 ++++++++++++++++++++++----------------------- src/docstrings.jl | 57 ++++++++++-------- src/statsfiles.jl | 42 ++++++------- src/xlfiles.jl | 32 +++++----- test/runtests.jl | 24 ++++---- 5 files changed, 152 insertions(+), 147 deletions(-) diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl index 77b2e68..ead47f9 100644 --- a/src/TidierFiles.jl +++ b/src/TidierFiles.jl @@ -20,29 +20,6 @@ include("fwf.jl") include("xlfiles.jl") include("statsfiles.jl") -""" -$docstring_write_csv -""" -function write_csv( - x::DataFrame, - file::String; - missingstring::String = "NA", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Configure threading - CSV.write( - file, - x, - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1 ) -end - """ $docstring_read_csv """ @@ -107,30 +84,6 @@ function read_csv(file; return df end -""" -$docstring_write_tsv -""" -function write_tsv( - x::DataFrame, - file::String; - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to TSV - CSV.write( - file, - x, - delim = '\t', # Use tab as the delimiter for TSV - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - """ $docstring_read_delim """ @@ -250,31 +203,6 @@ function read_tsv(file; return df end -""" -$docstring_write_table -""" -function write_table( - x::DataFrame, - file::String; - delim::Char = '\t', # Default to TSV, but allow flexibility - missingstring::String = "", - append::Bool = false, - col_names::Bool = true, - eol::String = "\n", - num_threads::Int = Threads.nthreads()) - - # Write DataFrame to a file with the specified delimiter - CSV.write( - file, - x, - delim = delim, # Flexible delimiter based on argument - append = append, - header = col_names && !append, - missingstring = missingstring, - newline = eol, - threaded = num_threads > 1) -end - """ $docstring_read_table """ @@ -334,4 +262,76 @@ function read_table(file; return df end +""" +$docstring_write_csv +""" +function write_csv( + x::DataFrame, + file::String; + missingstring::String = "NA", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Configure threading + CSV.write( + file, + x, + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1 ) +end + +""" +$docstring_write_tsv +""" +function write_tsv( + x::DataFrame, + file::String; + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to TSV + CSV.write( + file, + x, + delim = '\t', # Use tab as the delimiter for TSV + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end + +""" +$docstring_write_table +""" +function write_table( + x::DataFrame, + file::String; + delim::Char = '\t', # Default to TSV, but allow flexibility + missingstring::String = "", + append::Bool = false, + col_names::Bool = true, + eol::String = "\n", + num_threads::Int = Threads.nthreads()) + + # Write DataFrame to a file with the specified delimiter + CSV.write( + file, + x, + delim = delim, # Flexible delimiter based on argument + append = append, + header = col_names && !append, + missingstring = missingstring, + newline = eol, + threaded = num_threads > 1) +end + end \ No newline at end of file diff --git a/src/docstrings.jl b/src/docstrings.jl index 0a3cdb5..e6e0573 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -18,7 +18,7 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples ```jldoctest -julia> read_csv(joinpath(tempdir(), "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) +julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64? @@ -50,7 +50,7 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col # Examples ```jldoctest -julia> read_tsv(joinpath(tempdir(), "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"]) +julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame Row │ ID Name Score │ Int64 String7 Int64 @@ -83,7 +83,7 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite # Examples ```jldoctest -julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration +julia> read_delim("csvtest.csv", delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration 6×3 DataFrame Row │ Column1 Column2 Column3 │ String3 String7 String7 @@ -97,8 +97,6 @@ julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = f ``` """ - - const docstring_read_fwf = """ read_fwf(filepath::String; num_lines::Int=4, col_names=nothing) @@ -112,8 +110,12 @@ Read fixed-width format (FWF) files into a DataFrame. - `skip_to`=0: Number of lines at the beginning of the file to skip before reading data. - `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines. # Examples -```jldoctest -julia> path = joinpath(tempdir(), "fwftest.txt"); +```jldoctest +julia> path = "fwftest.txt"; + +julia> open(path, "w") do file + write(file, fwf_data) + end; julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) 3×5 DataFrame @@ -126,7 +128,6 @@ julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", " ``` """ - const docstring_fwf_empty = """ fwf_empty(filepath::String; num_lines::Int=4, col_names=nothing) @@ -143,10 +144,14 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of strings representing the column names. # Examples ```jldoctest -julia> fwf_empty(joinpath(tempdir(), "fwftest.txt")) +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + +julia> fwf_empty(path) ([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"]) -julia> fwf_empty(joinpath(tempdir(), "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) +julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]) ([13, 5, 8, 20, 8], ["Name", "Age", "ID", "Position", "Salary"]) ``` """ @@ -170,7 +175,7 @@ Write a DataFrame to a CSV (comma-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_csv(df, joinpath(tempdir(), "csvtest.csv")); +julia> write_csv(df, "csvtest.csv"); ``` """ @@ -192,7 +197,7 @@ Write a DataFrame to a TSV (tab-separated values) file. ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); +julia> write_tsv(df, "tsvtest.tsv"); ``` """ @@ -213,7 +218,7 @@ Read a table from a file where columns are separated by any amount of whitespace -`kwargs`: Additional keyword arguments passed to CSV.File. # Examples ```jldoctest -julia> read_table(joinpath(tempdir(), "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"]) +julia> read_table("tabletest.txt", skip = 2, n_max = 3, col_select = ["Name"]) 3×1 DataFrame Row │ Name │ String7 @@ -244,7 +249,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth ```jldoctest julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); -julia> write_table(df, joinpath(tempdir(), "tabletest.txt")); +julia> write_table(df, "tabletest.txt"); ``` """ @@ -267,7 +272,7 @@ Read data from an Excel file into a DataFrame. # Examples ```jldoctest -julia> read_xlsx(joinpath(tempdir(), "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) +julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) 3×3 DataFrame Row │ integers strings floats │ Any String Float64 @@ -296,7 +301,7 @@ julia> df = DataFrame(integers=[1, 2, 3, 4], julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); -julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); ``` """ @@ -316,7 +321,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(tempdir(), "test.dta")) +julia> read_sas("test.dta") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -341,7 +346,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sas(joinpath(tempdir(), "test.sas7bdat")) +julia> read_sas("test.sas7bdat") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -349,7 +354,7 @@ julia> read_sas(joinpath(tempdir(), "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sas(joinpath(tempdir(), "test.xpt")) +julia> read_sas("test.xpt") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -373,7 +378,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, # Examples ```jldoctest -julia> read_sav(joinpath(tempdir(), "test.sav")) +julia> read_sav("test.sav") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -381,7 +386,7 @@ julia> read_sav(joinpath(tempdir(), "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> read_sav(joinpath(tempdir(), "test.por")) +julia> read_sav("test.por") 2×2 DataFrame Row │ AA AB │ String Float64 @@ -404,7 +409,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sav(df, joinpath(tempdir(), "test.sav")) +julia> write_sav(df, "test.sav") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -412,7 +417,7 @@ julia> write_sav(df, joinpath(tempdir(), "test.sav")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sav(df, joinpath(tempdir(), "test.por")) +julia> write_sav(df, "test.por") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -434,7 +439,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat")) +julia> write_sas(df , "test.sas7bdat") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -442,7 +447,7 @@ julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat")) 1 │ sav 10.1 2 │ por 10.2 -julia> write_sas(df , joinpath(tempdir(), "test.xpt")) +julia> write_sas(df , "test.xpt") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -465,7 +470,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_dta(df , joinpath(tempdir(), "test.dta")) +julia> write_dta(df , "test.dta") 2×2 ReadStatTable: Row │ AA AB │ String Float64? diff --git a/src/statsfiles.jl b/src/statsfiles.jl index 5225d97..2012350 100644 --- a/src/statsfiles.jl +++ b/src/statsfiles.jl @@ -1,24 +1,3 @@ -""" -$docstring_write_sas -""" -function write_sas(df::DataFrame, path::String) - writestat(path, df); -end - -""" -$docstring_write_sav -""" -function write_sav(df::DataFrame, path::String) - return writestat(path, df); -end - -""" -$docstring_write_dta -""" -function write_dta(df::DataFrame, path::String) - return writestat(path, df); -end - """ $docstring_read_sas """ @@ -152,4 +131,25 @@ function read_dta(data_file; df = DataFrame(ReadStatTables.readstat(file_to_read; kwargs...)) return df +end + +""" +$docstring_write_sas +""" +function write_sas(df::DataFrame, path::String) + writestat(path, df); +end + +""" +$docstring_write_sav +""" +function write_sav(df::DataFrame, path::String) + return writestat(path, df); +end + +""" +$docstring_write_dta +""" +function write_dta(df::DataFrame, path::String) + return writestat(path, df); end \ No newline at end of file diff --git a/src/xlfiles.jl b/src/xlfiles.jl index ea4057e..eeafd75 100644 --- a/src/xlfiles.jl +++ b/src/xlfiles.jl @@ -32,22 +32,6 @@ function convert_column(column) end end -""" -$docstring_write_xlsx -""" -function write_xlsx(x; path::String, overwrite::Bool=false) - # Handling a single DataFrame input - if x isa Pair{String, DataFrame} - # Single sheet: Convert the single DataFrame to the required structure - XLSX.writetable(path, x, overwrite=overwrite) - elseif x isa Tuple - # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable - XLSX.writetable(path, x..., overwrite=overwrite) - else - error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") - end -end - """ $docstring_read_xlsx """ @@ -134,4 +118,20 @@ function read_xlsx( end return data +end + +""" +$docstring_write_xlsx +""" +function write_xlsx(x; path::String, overwrite::Bool=false) + # Handling a single DataFrame input + if x isa Pair{String, DataFrame} + # Single sheet: Convert the single DataFrame to the required structure + XLSX.writetable(path, x, overwrite=overwrite) + elseif x isa Tuple + # Multiple sheets: Unpack the tuple of pairs directly to XLSX.writetable + XLSX.writetable(path, x..., overwrite=overwrite) + else + error("Input must be a Pair of a sheet name and a DataFrame or a Tuple of such Pairs for multiple sheets.") + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index a336d58..edb4095 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,27 +26,27 @@ DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin Hank Zuse 45 12345 System Analyst 120,000 """ - file = open(joinpath(tempdir(), "fwftest.txt"), "w") - write(file, fwf_data) - close(file) + open("fwftest.txt", "w") do file + write(file, fwf_data) + end; df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); - write_csv(df, joinpath(tempdir(), "csvtest.csv")); - write_table(df, joinpath(tempdir(), "tabletest.txt")); - write_tsv(df, joinpath(tempdir(), "tsvtest.tsv")); + write_csv(df, "csvtest.csv"); + write_table(df, "tabletest.txt"); + write_tsv(df, "tsvtest.tsv"); df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5]); df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); - write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true); + write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); - write_sav(df, joinpath(tempdir(), "test.sav")); - write_sav(df, joinpath(tempdir(), "test.por")); - write_sas(df , joinpath(tempdir(), "test.sas7bdat")); - write_sas(df , joinpath(tempdir(), "test.xpt")); - write_dta(df , joinpath(tempdir(), "test.dta")); + write_sav(df, "test.sav"); + write_sav(df, "test.por"); + write_sas(df , "test.sas7bdat"); + write_sas(df , "test.xpt"); + write_dta(df ,"test.dta"); end); recursive=true) From bbd7134ee4f9fb395f22739fac47aebbc6cac4e8 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 00:40:00 -0400 Subject: [PATCH 09/13] Fix docstring --- src/docstrings.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index e6e0573..d16fa36 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -144,7 +144,9 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of strings representing the column names. # Examples ```jldoctest -julia> open("fwftest.txt", "w") do file +julia> path = "fwftest.txt"; + +julia> open(path, "w") do file write(file, fwf_data) end; From c18a50f1432b40c5e8bc29b66a70d29f0c56b6d8 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 00:46:14 -0400 Subject: [PATCH 10/13] Update docstrings --- src/docstrings.jl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index d16fa36..9c84be2 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -113,10 +113,6 @@ Read fixed-width format (FWF) files into a DataFrame. ```jldoctest julia> path = "fwftest.txt"; -julia> open(path, "w") do file - write(file, fwf_data) - end; - julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) 3×5 DataFrame Row │ Name Age ID Position Salary @@ -146,10 +142,6 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a ```jldoctest julia> path = "fwftest.txt"; -julia> open(path, "w") do file - write(file, fwf_data) - end; - julia> fwf_empty(path) ([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"]) From 2c8848487bd2ad1160ae910126b363734c521113 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 01:05:23 -0400 Subject: [PATCH 11/13] Set makedocs clean to false --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index 70d9ace..0a032b1 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,7 +10,7 @@ DocMeta.setdocmeta!(TidierFiles, recursive=true) makedocs( modules=[TidierFiles], - clean=true, + clean=false, doctest=true, #format = Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), sitename="TidierFiles.jl", From f55d7b15fb1a77ee2f19628b5487dfba62a67afd Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 01:45:28 -0400 Subject: [PATCH 12/13] Moved setup code into doctests. --- docs/make.jl | 2 +- src/docstrings.jl | 81 +++++++++++++++++++++++++++++++++++++++-------- test/runtests.jl | 42 ------------------------ 3 files changed, 68 insertions(+), 57 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 0a032b1..70d9ace 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,7 +10,7 @@ DocMeta.setdocmeta!(TidierFiles, recursive=true) makedocs( modules=[TidierFiles], - clean=false, + clean=true, doctest=true, #format = Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), sitename="TidierFiles.jl", diff --git a/src/docstrings.jl b/src/docstrings.jl index 9c84be2..f08637f 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -17,7 +17,11 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true. `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_csv(df, "csvtest.csv"); + julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"]) 3×3 DataFrame Row │ ID Name Score @@ -49,7 +53,11 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads. # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_tsv(df, "tsvtest.tsv"); + julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"]) 3×3 DataFrame Row │ ID Name Score @@ -82,7 +90,11 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads. # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_csv(df, "csvtest.csv"); + julia> read_delim("csvtest.csv", delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration 6×3 DataFrame Row │ Column1 Column2 Column3 @@ -111,6 +123,13 @@ Read fixed-width format (FWF) files into a DataFrame. - `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines. # Examples ```jldoctest +julia> fwf_data = + "John Smith 35 12345 Software Engineer 120,000 \\nJane Doe 29 2345 Marketing Manager 95,000 \\nAlice Jones 42 123456 CEO 250,000 \\nBob Brown 31 12345 Product Manager 110,000 \\nCharlie Day 28 345 Sales Associate 70,000 \\nDiane Poe 35 23456 Data Scientist 130,000 \\nEve Stone 40 123456 Chief Financial Off 200,000 \\nFrank Moore 33 1234 Graphic Designer 80,000 \\nGrace Lee 27 123456 Software Developer 115,000 \\nHank Zuse 45 12345 System Analyst 120,000 "; + +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + julia> path = "fwftest.txt"; julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3) @@ -139,7 +158,14 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a - A vector of integers representing the detected column widths. - A vector of strings representing the column names. # Examples -```jldoctest +```jldoctest +julia> fwf_data = + "John Smith 35 12345 Software Engineer 120,000 \\nJane Doe 29 2345 Marketing Manager 95,000 \\nAlice Jones 42 123456 CEO 250,000 \\nBob Brown 31 12345 Product Manager 110,000 \\nCharlie Day 28 345 Sales Associate 70,000 \\nDiane Poe 35 23456 Data Scientist 130,000 \\nEve Stone 40 123456 Chief Financial Off 200,000 \\nFrank Moore 33 1234 Graphic Designer 80,000 \\nGrace Lee 27 123456 Software Developer 115,000 \\nHank Zuse 45 12345 System Analyst 120,000 "; + +julia> open("fwftest.txt", "w") do file + write(file, fwf_data) + end; + julia> path = "fwftest.txt"; julia> fwf_empty(path) @@ -150,7 +176,6 @@ julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position" ``` """ - const docstring_write_csv = """ write_csv(DataFrame, filepath; na = "", append = false, col_names = true, missingstring, eol = "\n", num_threads = Threads.nthreads()) @@ -212,6 +237,10 @@ Read a table from a file where columns are separated by any amount of whitespace -`kwargs`: Additional keyword arguments passed to CSV.File. # Examples ```jldoctest +julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); + +julia> write_table(df, "tabletest.txt"); + julia> read_table("tabletest.txt", skip = 2, n_max = 3, col_select = ["Name"]) 3×1 DataFrame Row │ Name @@ -265,7 +294,15 @@ Read data from an Excel file into a DataFrame. -`guess_max`: The maximum number of rows to scan for type guessing and column names detection. Only relevant if col_types is nothing or col_names is true. If nothing, a default heuristic is used. # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(integers=[1, 2, 3, 4], + strings=["This", "Package makes", "File reading/writing", "even smoother"], + floats=[10.2, 20.3, 30.4, 40.5]); + +julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); + +julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); + julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2]) 3×3 DataFrame Row │ integers strings floats @@ -314,8 +351,12 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest -julia> read_sas("test.dta") +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_dta(df, "test.dta"); + +julia> read_dta("test.dta") 2×2 DataFrame Row │ AA AB │ String3 Float64 @@ -339,7 +380,11 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_sas(df, "test.sas7bdat"); + julia> read_sas("test.sas7bdat") 2×2 DataFrame Row │ AA AB @@ -348,6 +393,8 @@ julia> read_sas("test.sas7bdat") 1 │ sav 10.1 2 │ por 10.2 +julia> write_sas(df, "test.xpt"); + julia> read_sas("test.xpt") 2×2 DataFrame Row │ AA AB @@ -357,7 +404,7 @@ julia> read_sas("test.xpt") 2 │ por 10.2 """ -const docstring_read_sav = +const docstring_read_sav = """ function read_sav(data_file; encoding=nothing, col_select=nothing, skip=0, n_max=Inf) Read data from a SPSS (.sav and .por) file into a DataFrame, supporting both local and remote sources. @@ -371,7 +418,11 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf, `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1 # Examples -```jldoctest +```jldoctest +julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); + +julia> write_sav(df, "test.sav"); + julia> read_sav("test.sav") 2×2 DataFrame Row │ AA AB @@ -380,6 +431,8 @@ julia> read_sav("test.sav") 1 │ sav 10.1 2 │ por 10.2 +julia> write_sav(df, "test.por"); + julia> read_sav("test.por") 2×2 DataFrame Row │ AA AB @@ -433,7 +486,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_sas(df , "test.sas7bdat") +julia> write_sas(df, "test.sas7bdat") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -441,7 +494,7 @@ julia> write_sas(df , "test.sas7bdat") 1 │ sav 10.1 2 │ por 10.2 -julia> write_sas(df , "test.xpt") +julia> write_sas(df, "test.xpt") 2×2 ReadStatTable: Row │ AA AB │ String Float64? @@ -464,7 +517,7 @@ Arguments ```jldoctest julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); -julia> write_dta(df , "test.dta") +julia> write_dta(df, "test.dta") 2×2 ReadStatTable: Row │ AA AB │ String Float64? diff --git a/test/runtests.jl b/test/runtests.jl index edb4095..6462a73 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,48 +6,6 @@ using Documenter DocMeta.setdocmeta!(TidierFiles, :DocTestSetup, :(begin using DataFrames, TidierFiles - # Determine the package root directory dynamically - # project_root = dirname(dirname(pathof(TidierFiles))) - - # Need to write fwf data because there is no `write_fwf()` function - # Because each doctest runs independently, need to write all the files - # here to ensure they are available to the read_ functions. - - fwf_data = """ - John Smith 35 12345 Software Engineer 120,000 - Jane Doe 29 2345 Marketing Manager 95,000 - Alice Jones 42 123456 CEO 250,000 - Bob Brown 31 12345 Product Manager 110,000 - Charlie Day 28 345 Sales Associate 70,000 - Diane Poe 35 23456 Data Scientist 130,000 - Eve Stone 40 123456 Chief Financial Off 200,000 - Frank Moore 33 1234 Graphic Designer 80,000 - Grace Lee 27 123456 Software Developer 115,000 - Hank Zuse 45 12345 System Analyst 120,000 - """ - - open("fwftest.txt", "w") do file - write(file, fwf_data) - end; - - df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]); - write_csv(df, "csvtest.csv"); - write_table(df, "tabletest.txt"); - write_tsv(df, "tsvtest.tsv"); - - df = DataFrame(integers=[1, 2, 3, 4], - strings=["This", "Package makes", "File reading/writing", "even smoother"], - floats=[10.2, 20.3, 30.4, 40.5]); - df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]); - write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true); - - df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]); - write_sav(df, "test.sav"); - write_sav(df, "test.por"); - write_sas(df , "test.sas7bdat"); - write_sas(df , "test.xpt"); - write_dta(df ,"test.dta"); - end); recursive=true) doctest(TidierFiles) From 6a5b2e021c5582a9d6d5ee03db9e554b29920739 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Fri, 12 Apr 2024 02:06:19 -0400 Subject: [PATCH 13/13] Tweaks to README and index.md --- README.md | 84 ++++++++++++++++++++++++++--------------------- docs/src/index.md | 54 +++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 2c20622..52840e5 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,13 @@ ## What is TidierFiles.jl? -TidierFiles.jl leverages the `CSV.jl`, `XLSX.jl`, and `ReadStatTables.jl` packages to reimplement the R `haven` and `readr` packages. -Currently supported file types +TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages. + +Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats. + + +Currently supported file types: - `read_csv` and `write_csv` - `read_tsv` and `write_tsv` - `read_xlsx` and `write_xlsx` @@ -20,12 +24,25 @@ Currently supported file types - `read_dta` and `write_dta` (.dta) # Examples -For CSVs (also TSV, white space tables, other delimters) -``` -df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5], dates=[Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], times=[Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)]) -write_csv(df, "/Users/danielrizk/Downloads/testing.csv" , col_names= true) -read_csv("/Users/danielrizk/Downloads/testing.csv", missingstring=["40.5", "10.2"]) + +Here is an example of how to write and read a CSV file. + +```julia +using TidierFiles + +df = DataFrame( + integers = [1, 2, 3, 4], + strings = ["This", "Package makes", "File reading/writing", "even smoother"], + floats = [10.2, 20.3, 30.4, 40.5], + dates = [Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], + times = [Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)] + ) + +write_csv(df, "testing.csv" , col_names = true) + +read_csv("testing.csv", missingstring=["40.5", "10.2"]) ``` + ``` 4×5 DataFrame Row │ integers strings floats dates times @@ -37,37 +54,28 @@ read_csv("/Users/danielrizk/Downloads/testing.csv", missingstring=["40.5", "10.2 4 │ 4 even smoother missing 2018-02-23 19:40:00:00 ``` -For Excel files -``` -df2 = DataFrames.DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]) -write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path, overwrite = true) -read_excel(path, sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [20.3]) -``` -``` -3×5 DataFrame - Row │ integers strings floats dates times - │ Int64 String Any Date Time -─────┼─────────────────────────────────────────────────────────────── - 1 │ 2 Package makes missing 2018-02-21 19:20:00 - 2 │ 3 File reading/writing 30.4 2018-02-22 19:30:00 - 3 │ 4 even smoother 40.5 2018-02-23 19:40:00 +The file reading functions include the following keyword arguments: +- `path` +- `missingstring` +- `col_names` +- `col_select` +- `num_threads` +- `skip` +- `n_max` +- `delim` (where applicable) +The path can be a file available either locally or on the web. + +```julia +read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"]) ``` -FOR FWF files -``` -path = "fwftest.txt" -read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=6) -## `fwf_empty` will parse and guess path widths based on padding and user determined number of lines -``` -``` -6×5 DataFrame - Row │ Name Age ID Position Salary - │ String String String String String -─────┼──────────────────────────────────────────────────────────── - 1 │ Bob Brown 31 12345 Product Manager $110,000 - 2 │ Charlie Day 28 34 Sales Associate $70,000 - 3 │ Diane Poe 35 23456 Data Scientist $130,000 - 4 │ Eve Stone 4 123456 Chief Financial Off $200,000 - 5 │ Frank Moore 33 1234 Graphic Designer $80,000 - 6 │ Grace Lee 27 123456 Software Developer $115,000 + ``` +3×2 DataFrame + Row │ ID Score + │ Int64? Int64 +─────┼──────────────── + 1 │ 3 77 + 2 │ missing 85 + 3 │ 5 95 +``` \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index d9f2546..cbeb60a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,13 +1,13 @@ +# TidierFiles.jl + ## What is TidierFiles.jl? -TidierFiles.jl is a 100% Julia implementation of the readr and haven R packages. -Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl -seeks to harmonize file reading/writing by unifying the arguments across multiple -file types. +TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages. + +Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats. + -TidierFiles.jl currently supports -```@raw html -!!! example +Currently supported file types: - `read_csv` and `write_csv` - `read_tsv` and `write_tsv` - `read_xlsx` and `write_xlsx` @@ -17,9 +17,39 @@ TidierFiles.jl currently supports - `read_sav` and `write_sav` (.sav and .por) - `read_sas` and `write_sas` (.sas7bdat and .xpt) - `read_dta` and `write_dta` (.dta) + +# Examples + +Here is an example of how to write and read a CSV file. + +```julia +using TidierFiles + +df = DataFrame( + integers = [1, 2, 3, 4], + strings = ["This", "Package makes", "File reading/writing", "even smoother"], + floats = [10.2, 20.3, 30.4, 40.5], + dates = [Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], + times = [Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)] + ) + +write_csv(df, "testing.csv" , col_names = true) + +read_csv("testing.csv", missingstring=["40.5", "10.2"]) ``` -Read functions include the following arguments and support HTTP reading: +``` +4×5 DataFrame + Row │ integers strings floats dates times + │ Int64 String31 Float64? Date Time +─────┼───────────────────────────────────────────────────────────────── + 1 │ 1 This missing 2018-02-20 19:10:00 + 2 │ 2 Package makes 20.3 2018-02-21 19:20:00 + 3 │ 3 File reading/writing 30.4 2018-02-22 19:30:00 + 4 │ 4 even smoother missing 2018-02-23 19:40:00:00 +``` + +The file reading functions include the following keyword arguments: - `path` - `missingstring` - `col_names` @@ -27,14 +57,14 @@ Read functions include the following arguments and support HTTP reading: - `num_threads` - `skip` - `n_max` -- `delim` (where applies) +- `delim` (where applicable) -```julia -using TidierFiles +The path can be a file available either locally or on the web. +```julia read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"]) - ``` + ``` 3×2 DataFrame Row │ ID Score