Merge pull request #22 from TidierOrg/add_typesandallargs

TidierOrg · Nov 13, 2024 · ac1de0d · ac1de0d · drizk1 · Nov 13, 2024
2 parents 7075aee + c190c36
commit ac1de0d
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 137 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierFiles"
 uuid = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db"
 authors = ["Daniel Rizk <[email protected]> and contributors"]
-version = "0.1.5"
+version = "0.1.6"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"

diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ read_csv("testing.csv", missingstring=["40.5", "10.2"])
 
 The file reading functions include the following keyword arguments:
 - `path`
-- `missingstring`
+- `missing_value`
 - `col_names`
 - `col_select`
 - `num_threads`

diff --git a/docs/examples/UserGuide/delim.jl b/docs/examples/UserGuide/delim.jl
@@ -4,15 +4,15 @@ using TidierFiles
 
 # ## read_csv/tsv/delim
 
-read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"])
+read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missing_value = ["4"])
 
-#read_csv(file; delim=',', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=1)
+#read_csv(file; delim=',', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=1)
 
-#read_tsv(file; delim='\t', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_tsv(file; delim='\t', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
-#read_delim(file; delim='\t', decimal = '.', groupmark = nothing col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_delim(file; delim='\t', decimal = '.', groupmark = nothing col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
-#read_csv2(file; delim=';', decimal = ',', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_csv2(file; delim=';', decimal = ',', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
 #These functions read a delimited file (CSV, TSV, or custom delimiter) into a DataFrame. The arguments are:
 
@@ -22,35 +22,35 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 # - `skip`: Number of lines to skip before reading data. Default is 0.
 # - `n_max`: Maximum number of rows to read. Default is `Inf` (read all rows).
 # - `comment`: Character indicating comment lines to ignore. Default is `nothing`.
-# - `missingstring`: String(s) representing missing values. Default is `""`.
+# - `missing_value`: String(s) representing missing values. Default is `""`.
 # - `col_select`: Optional vector of symbols or strings to select columns to load. Default is `nothing`.
 # - `groupmark`: A symbol that separates groups of digits Default is `nothing`.
 # - `decimal`: An ASCII Char argument that is used when parsing float values. Default is '.'.
 # - `escape_double`: Interpret two consecutive quote characters as a single quote. Default is `true`.
-# - `col_types`: Optional specification of column types. Default is `nothing` (types are inferred).
+# - `col_types`: Optional specification of column types using a Dict. Default is `nothing` (types are inferred).
 # - `num_threads`: Number of threads to use for parallel execution. Default is 1 for `read_csv` and the number of available threads for `read_tsv` and `read_delim`.
-
+# - `kwarg`: any CSV.jl argument can be passed to any of the above functions with correct syntax, and it will be supported. 
 # The functions return a DataFrame containing the parsed data from the file.
 
 # ## `write_csv` and `write_tsv`
 
-# write_csv(x, file; missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_csv(x, file; missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
-# write_tsv(x, file; missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_tsv(x, file; missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
 # These functions write a DataFrame to a CSV or TSV file. The arguments are:
 
 # - `x`: The DataFrame to write.
 # - `file`: The path to the output file.
-# - `missingstring`: The string to represent missing values. Default is an empty string.
+# - `missing_value`: The string to represent missing values. Default is an empty string.
 # - `append`: Whether to append to an existing file. Default is `false`.
 # - `col_names`: Whether to write column names as the first line. Default is `true`.
 # - `eol`: The end-of-line character. Default is `"\n"`.
 # - `num_threads`: The number of threads to use for writing. Default is the number of available threads.
 
 # ## `read_table`
 
-# read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select=nothing, missingstring="", num_threads)
+# read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select=nothing, missing_value="", num_threads)
 
 # This function reads a table from a whitespace-delimited file into a DataFrame. The arguments are:
 
@@ -60,18 +60,18 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 # - `n_max`: Maximum number of lines to read. Default is `Inf` (read all lines).
 # - `comment`: Character or string indicating comment lines to ignore. Default is `nothing`.
 # - `col_select`: Optional vector of symbols or strings to select columns to load. Default is `nothing`.
-# - `missingstring`: The string representing missing values. Default is `""`.
+# - `missing_value`: The string representing missing values. Default is `""`.
 # - `num_threads`: The number of threads to use for writing. Default is the number of available threads.
 
 # ## `write_table`
-# write_table(x, file; delim='\t', missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_table(x, file; delim='\t', missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
 # This function writes a DataFrame to a file with customizable delimiter and options. The arguments are:
 
 # - `x`: The DataFrame to write.
 # - `file`: The path to the output file.
 # - `delim`: The field delimiter. Default is `'\t'` (tab-separated).
-# - `missingstring`: The string to represent missing values. Default is `""`.
+# - `missing_value`: The string to represent missing values. Default is `""`.
 # - `append`: Whether to append to an existing file. Default is `false`.
 # - `col_names`: Whether to write column names as the first line. Default is `true`.
 # - `eol`: The end-of-line character. Default is `"\n"`.

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -45,7 +45,7 @@ df = DataFrame(
 
 write_csv(df, "testing.csv" , col_names = true)
 
-read_csv("testing.csv", missingstring=["40.5", "10.2"])
+read_csv("testing.csv", missing_value=["40.5", "10.2"])
 ```
 
 ```
@@ -61,7 +61,7 @@ read_csv("testing.csv", missingstring=["40.5", "10.2"])
 
 The file reading functions include the following keyword arguments:
 - `path`
-- `missingstring`
+- `missing_value`
 - `col_names`
 - `col_select`
 - `num_threads`
@@ -72,7 +72,7 @@ The file reading functions include the following keyword arguments:
 The path can be a file available either locally or on the web.
 
 ```julia
-read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"])
+read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missing_value = ["4"])
 ```
 ```
 3×2 DataFrame