From 685107fcfdcff594e4fe122203a6c5f6796a7865 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Mon, 11 Nov 2024 19:50:45 -0500
Subject: [PATCH 1/4] adds `col_types` and all args for csv.jl based readers

---
 src/TidierFiles.jl | 170 ++++++++++++++++++++++++++-------------------
 1 file changed, 97 insertions(+), 73 deletions(-)

diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl
index f75bc03..08c5b4c 100644
--- a/src/TidierFiles.jl
+++ b/src/TidierFiles.jl
@@ -31,6 +31,7 @@ $docstring_read_csv
 """
 function read_csv(files;
                   delim=',',
+                  decimal='.',
                   col_names=true,
                   skip=0,
                   n_max=Inf,
@@ -39,7 +40,9 @@ function read_csv(files;
                   missingstring="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
-                  num_threads::Union{Int, Nothing}=nothing)
+                  num_threads::Union{Int, Nothing}=nothing, 
+                  col_types = nothing,
+                  kwargs...)  # Catch any other keyword arguments
 
     # Normalize input to always be a vector of files
     file_list = (typeof(files) <: AbstractString) ? [files] : files
@@ -53,22 +56,25 @@ function read_csv(files;
     # Calculate skipto and header
     skipto = skip + (col_names === true ? 1 : 0)
 
-    # Prepare CSV reading options
-    read_options = (
-        delim = delim,
-        header = col_names === true ? 1 : 0,
-        skipto = skipto + 1,
-        footerskip = 0,
-        select = col_select,
-        limit = limit,
-        comment = comment,
-        missingstring = missingstring,
-        escapechar = escape_double ? '"' : '\\',
-        quotechar = '"',
-        normalizenames = false,
-        ntasks = effective_ntasks > 1
+    read_options = Dict(
+        :delim => delim,
+        :decimal => decimal,
+        :header => col_names === true ? 1 : 0,
+        :skipto => skipto + 1,
+        :footerskip => 0,
+        :select => col_select,
+        :limit => limit,
+        :comment => comment,
+        :missingstring => missingstring,
+        :escapechar => escape_double ? '"' : '\\',
+        :quotechar => '"',
+        :ntasks => effective_ntasks > 1, 
+        :types => col_types
     )
 
+    # Merge additional keyword arguments into the read_options dictionary
+    merge!(read_options, kwargs)
+
     # Initialize an empty DataFrame
     final_df = DataFrame()
 
@@ -94,22 +100,24 @@ function read_csv(files;
     return final_df
 end
 
+
 """
 $docstring_read_delim
 """
 function read_delim(files;
-                    delim='\t',
-                    decimal='.',
-                    col_names=true,
-                    skip=0,
-                    n_max=Inf,
-                    groupmark=nothing,
-                    col_select=nothing,
-                    comment=nothing,
-                    missingstring="",
-                    escape_double=true,
-                    ntasks::Int = Threads.nthreads(),
-                    num_threads::Union{Int, Nothing}=nothing)
+                  delim=',',
+                  decimal='.',
+                  col_names=true,
+                  skip=0,
+                  n_max=Inf,
+                  col_select=nothing,
+                  comment=nothing,
+                  missingstring="",
+                  escape_double=true,
+                  ntasks::Int = Threads.nthreads(),
+                  num_threads::Union{Int, Nothing}=nothing, 
+                  col_types = nothing,
+                  kwargs...) 
 
     # Normalize input to always be a vector of files
     file_list = (typeof(files) <: AbstractString) ? [files] : files
@@ -124,22 +132,23 @@ function read_delim(files;
     skipto = skip + (col_names === true ? 1 : 0)
 
     # Prepare CSV reading options
-    read_options = (
-        delim = delim,
-        decimal = decimal,
-        header = col_names === true ? 1 : 0,
-        skipto = skipto + 1,
-        select = col_select,
-        groupmark = groupmark,
-        footerskip = 0,
-        limit = limit,
-        comment = comment,
-        missingstring = missingstring,
-        escapechar = escape_double ? '"' : '\\',
-        quotechar = '"',
-        normalizenames = false,
-        ntasks = effective_ntasks > 1
+    read_options = Dict(
+        :delim => delim,
+        :decimal => decimal,
+        :header => col_names === true ? 1 : 0,
+        :skipto => skipto + 1,
+        :select => col_select,
+        :footerskip => 0,
+        :limit => limit,
+        :comment => comment,
+        :missingstring => missingstring,
+        :escapechar => escape_double ? '"' : '\\',
+        :quotechar => '"',
+        :normalizenames => false,
+        :ntasks => effective_ntasks > 1,
+        :types => col_types
     )
+    merge!(read_options, kwargs)
 
     # Initialize an empty DataFrame
     final_df = DataFrame()
@@ -172,6 +181,7 @@ $docstring_read_tsv
 """
 function read_tsv(files;
                   delim='\t',
+                  decimal='.',
                   col_names=true,
                   skip=0,
                   n_max=Inf,
@@ -180,7 +190,10 @@ function read_tsv(files;
                   missingstring="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
-                  num_threads::Union{Int, Nothing}=nothing)
+                  num_threads::Union{Int, Nothing}=nothing,
+                  col_types = nothing,
+                  groupmark=nothing,
+                  kwargs...)
 
     # Normalize input to always be a vector of files
     file_list = (typeof(files) <: AbstractString) ? [files] : files
@@ -194,22 +207,27 @@ function read_tsv(files;
     # Calculate skipto and header
     skipto = skip + (col_names === true ? 1 : 0)
 
-    # Prepare CSV reading options
-    read_options = (
-        delim = delim,
-        header = col_names === true ? 1 : 0,
-        skipto = skipto + 1,
-        footerskip = 0,
-        limit = limit,
-        select = col_select,
-        comment = comment,
-        missingstring = missingstring,
-        escapechar = escape_double ? '"' : '\\',
-        quotechar = '"',
-        normalizenames = false,
-        ntasks = effective_ntasks > 1
+    read_options = Dict(
+        :delim => delim,
+        :decimal => decimal,
+        :header => col_names === true ? 1 : 0,
+        :skipto => skipto + 1,
+        :select => col_select,
+        :groupmark => groupmark,
+        :footerskip => 0,
+        :limit => limit,
+        :comment => comment,
+        :missingstring => missingstring,
+        :escapechar => escape_double ? '"' : '\\',
+        :quotechar => '"',
+        :normalizenames => false,
+        :ntasks => effective_ntasks > 1,
+        :types => col_types
     )
 
+    # Merge additional keyword arguments into the read_options dictionary
+    merge!(read_options, kwargs)
+
     # Initialize an empty DataFrame
     final_df = DataFrame()
 
@@ -251,7 +269,10 @@ function read_csv2(files;
                   missingstring="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
-                  num_threads::Union{Int, Nothing}=nothing)
+                  num_threads::Union{Int, Nothing}=nothing,
+                  col_types = nothing,
+                  kwargs...
+                  )
 
     # Normalize input to always be a vector of files
     file_list = (typeof(files) <: AbstractString) ? [files] : files
@@ -266,22 +287,25 @@ function read_csv2(files;
     skipto = skip + (col_names === true ? 1 : 0)
 
     # Prepare CSV reading options
-    read_options = (
-        delim = delim,
-        decimal = decimal,
-        header = col_names === true ? 1 : 0,
-        groupmark = groupmark,
-        skipto = skipto + 1,
-        footerskip = 0,
-        select = col_select,
-        limit = limit,
-        comment = comment,
-        missingstring = missingstring,
-        escapechar = escape_double ? '"' : '\\',
-        quotechar = '"',
-        normalizenames = false,
-        ntasks = effective_ntasks > 1
+    read_options = Dict(
+        :delim => delim,
+        :decimal => decimal,
+        :header => col_names === true ? 1 : 0,
+        :skipto => skipto + 1,
+        :select => col_select,
+        :groupmark => groupmark,
+        :footerskip => 0,
+        :limit => limit,
+        :comment => comment,
+        :missingstring => missingstring,
+        :escapechar => escape_double ? '"' : '\\',
+        :quotechar => '"',
+        :normalizenames => false,
+        :ntasks => effective_ntasks > 1,
+        :types => col_types
     )
+    
+    merge!(read_options, kwargs)
 
     # Initialize an empty DataFrame
     final_df = DataFrame()

From d3b51a2cfcdf744478dd0e7f72afd6c074ad0176 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Tue, 12 Nov 2024 18:23:11 -0500
Subject: [PATCH 2/4] docstring fix, missing string now missing value

---
 Project.toml                     |  2 +-
 README.md                        |  2 +-
 docs/examples/UserGuide/delim.jl | 28 ++++++------
 docs/src/index.md                |  6 +--
 src/TidierFiles.jl               | 32 +++++++-------
 src/docstrings.jl                | 73 +++++++++++++++++++-------------
 src/xlfiles.jl                   |  6 +--
 7 files changed, 82 insertions(+), 67 deletions(-)

diff --git a/Project.toml b/Project.toml
index 5db264f..43c98cd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierFiles"
 uuid = "8ae5e7a9-bdd3-4c93-9cc3-9df4d5d947db"
 authors = ["Daniel Rizk <rizk.daniel.12@gmail.com> and contributors"]
-version = "0.1.5"
+version = "0.1.6"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
diff --git a/README.md b/README.md
index e3d4830..d38ba71 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ read_csv("testing.csv", missingstring=["40.5", "10.2"])
 
 The file reading functions include the following keyword arguments:
 - `path`
-- `missingstring`
+- `missing_value`
 - `col_names`
 - `col_select`
 - `num_threads`
diff --git a/docs/examples/UserGuide/delim.jl b/docs/examples/UserGuide/delim.jl
index d4232de..baae173 100644
--- a/docs/examples/UserGuide/delim.jl
+++ b/docs/examples/UserGuide/delim.jl
@@ -4,15 +4,15 @@ using TidierFiles
 
 # ## read_csv/tsv/delim
 
-read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"])
+read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missing_value = ["4"])
 
-#read_csv(file; delim=',', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=1)
+#read_csv(file; delim=',', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=1)
 
-#read_tsv(file; delim='\t', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_tsv(file; delim='\t', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
-#read_delim(file; delim='\t', decimal = '.', groupmark = nothing col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_delim(file; delim='\t', decimal = '.', groupmark = nothing col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
-#read_csv2(file; delim=';', decimal = ',', col_names=true, skip=0, n_max=Inf, comment=nothing, missingstring="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
+#read_csv2(file; delim=';', decimal = ',', col_names=true, skip=0, n_max=Inf, comment=nothing, missing_value="", col_select=nothing, escape_double=true, col_types=nothing, num_threads=Threads.nthreads())
 
 #These functions read a delimited file (CSV, TSV, or custom delimiter) into a DataFrame. The arguments are:
 
@@ -22,27 +22,27 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 # - `skip`: Number of lines to skip before reading data. Default is 0.
 # - `n_max`: Maximum number of rows to read. Default is `Inf` (read all rows).
 # - `comment`: Character indicating comment lines to ignore. Default is `nothing`.
-# - `missingstring`: String(s) representing missing values. Default is `""`.
+# - `missing_value`: String(s) representing missing values. Default is `""`.
 # - `col_select`: Optional vector of symbols or strings to select columns to load. Default is `nothing`.
 # - `groupmark`: A symbol that separates groups of digits Default is `nothing`.
 # - `decimal`: An ASCII Char argument that is used when parsing float values. Default is '.'.
 # - `escape_double`: Interpret two consecutive quote characters as a single quote. Default is `true`.
-# - `col_types`: Optional specification of column types. Default is `nothing` (types are inferred).
+# - `col_types`: Optional specification of column types using a Dict. Default is `nothing` (types are inferred).
 # - `num_threads`: Number of threads to use for parallel execution. Default is 1 for `read_csv` and the number of available threads for `read_tsv` and `read_delim`.
 
 # The functions return a DataFrame containing the parsed data from the file.
 
 # ## `write_csv` and `write_tsv`
 
-# write_csv(x, file; missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_csv(x, file; missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
-# write_tsv(x, file; missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_tsv(x, file; missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
 # These functions write a DataFrame to a CSV or TSV file. The arguments are:
 
 # - `x`: The DataFrame to write.
 # - `file`: The path to the output file.
-# - `missingstring`: The string to represent missing values. Default is an empty string.
+# - `missing_value`: The string to represent missing values. Default is an empty string.
 # - `append`: Whether to append to an existing file. Default is `false`.
 # - `col_names`: Whether to write column names as the first line. Default is `true`.
 # - `eol`: The end-of-line character. Default is `"\n"`.
@@ -50,7 +50,7 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 
 # ## `read_table`
 
-# read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select=nothing, missingstring="", num_threads)
+# read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select=nothing, missing_value="", num_threads)
 
 # This function reads a table from a whitespace-delimited file into a DataFrame. The arguments are:
 
@@ -60,18 +60,18 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 # - `n_max`: Maximum number of lines to read. Default is `Inf` (read all lines).
 # - `comment`: Character or string indicating comment lines to ignore. Default is `nothing`.
 # - `col_select`: Optional vector of symbols or strings to select columns to load. Default is `nothing`.
-# - `missingstring`: The string representing missing values. Default is `""`.
+# - `missing_value`: The string representing missing values. Default is `""`.
 # - `num_threads`: The number of threads to use for writing. Default is the number of available threads.
 
 # ## `write_table`
-# write_table(x, file; delim='\t', missingstring="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
+# write_table(x, file; delim='\t', missing_value="", append=false, col_names=true, eol="\n", num_threads=Threads.nthreads())
 
 # This function writes a DataFrame to a file with customizable delimiter and options. The arguments are:
 
 # - `x`: The DataFrame to write.
 # - `file`: The path to the output file.
 # - `delim`: The field delimiter. Default is `'\t'` (tab-separated).
-# - `missingstring`: The string to represent missing values. Default is `""`.
+# - `missing_value`: The string to represent missing values. Default is `""`.
 # - `append`: Whether to append to an existing file. Default is `false`.
 # - `col_names`: Whether to write column names as the first line. Default is `true`.
 # - `eol`: The end-of-line character. Default is `"\n"`.
diff --git a/docs/src/index.md b/docs/src/index.md
index b905a7b..24830b1 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -45,7 +45,7 @@ df = DataFrame(
 
 write_csv(df, "testing.csv" , col_names = true)
 
-read_csv("testing.csv", missingstring=["40.5", "10.2"])
+read_csv("testing.csv", missing_value=["40.5", "10.2"])
 ```
 
 ```
@@ -61,7 +61,7 @@ read_csv("testing.csv", missingstring=["40.5", "10.2"])
 
 The file reading functions include the following keyword arguments:
 - `path`
-- `missingstring`
+- `missing_value`
 - `col_names`
 - `col_select`
 - `num_threads`
@@ -72,7 +72,7 @@ The file reading functions include the following keyword arguments:
 The path can be a file available either locally or on the web.
 
 ```julia
-read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missingstring = ["4"])
+read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testing_files/csvtest.csv", skip = 2, n_max = 3, col_select = ["ID", "Score"], missing_value = ["4"])
 ```
 ```
 3×2 DataFrame
diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl
index 08c5b4c..a7a5961 100644
--- a/src/TidierFiles.jl
+++ b/src/TidierFiles.jl
@@ -37,7 +37,7 @@ function read_csv(files;
                   n_max=Inf,
                   col_select=nothing,
                   comment=nothing,
-                  missingstring="",
+                  missing_value="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
                   num_threads::Union{Int, Nothing}=nothing, 
@@ -65,7 +65,7 @@ function read_csv(files;
         :select => col_select,
         :limit => limit,
         :comment => comment,
-        :missingstring => missingstring,
+        :missingstring => missing_value,
         :escapechar => escape_double ? '"' : '\\',
         :quotechar => '"',
         :ntasks => effective_ntasks > 1, 
@@ -112,7 +112,7 @@ function read_delim(files;
                   n_max=Inf,
                   col_select=nothing,
                   comment=nothing,
-                  missingstring="",
+                  missing_value="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
                   num_threads::Union{Int, Nothing}=nothing, 
@@ -141,7 +141,7 @@ function read_delim(files;
         :footerskip => 0,
         :limit => limit,
         :comment => comment,
-        :missingstring => missingstring,
+        :missingstring => missing_value,
         :escapechar => escape_double ? '"' : '\\',
         :quotechar => '"',
         :normalizenames => false,
@@ -187,7 +187,7 @@ function read_tsv(files;
                   n_max=Inf,
                   col_select=nothing,
                   comment=nothing,
-                  missingstring="",
+                  missing_value="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
                   num_threads::Union{Int, Nothing}=nothing,
@@ -217,7 +217,7 @@ function read_tsv(files;
         :footerskip => 0,
         :limit => limit,
         :comment => comment,
-        :missingstring => missingstring,
+        :missingstring => missing_value,
         :escapechar => escape_double ? '"' : '\\',
         :quotechar => '"',
         :normalizenames => false,
@@ -266,7 +266,7 @@ function read_csv2(files;
                   n_max=Inf,
                   col_select=nothing,
                   comment=nothing,
-                  missingstring="",
+                  missing_value="",
                   escape_double=true,
                   ntasks::Int = Threads.nthreads(),
                   num_threads::Union{Int, Nothing}=nothing,
@@ -297,7 +297,7 @@ function read_csv2(files;
         :footerskip => 0,
         :limit => limit,
         :comment => comment,
-        :missingstring => missingstring,
+        :missingstring => missing_value,
         :escapechar => escape_double ? '"' : '\\',
         :quotechar => '"',
         :normalizenames => false,
@@ -348,7 +348,7 @@ function read_table(file;
         n_max=Inf, 
         comment=nothing, 
         col_select=nothing,
-        missingstring="",
+        missing_value="",
         kwargs...)
     # Open the file and preprocess the lines
     processed_lines, header = open(file, "r") do io
@@ -391,7 +391,7 @@ function read_table(file;
     df = CSV.File(IOBuffer(join(processed_lines, "\n")); 
                   delim=' ', 
                   header=header_option,  # Pass correct header
-                  missingstring=missingstring,
+                  missingstring=missing_value,
                   select=col_select,
                   kwargs...) |> DataFrame
 
@@ -404,7 +404,7 @@ $docstring_write_csv
 function write_csv(
     x::DataFrame,
     file::String;
-    missingstring::String = "NA",
+    missing_value::String = "NA",
     append::Bool = false,
     col_names::Bool = true,
     eol::String = "\n",
@@ -416,7 +416,7 @@ function write_csv(
         x,
         append = append,
         header = col_names && !append,
-        missingstring = missingstring,
+        missingstring = missing_value,
         newline = eol,
         threaded = num_threads > 1    )
 end
@@ -427,7 +427,7 @@ $docstring_write_tsv
 function write_tsv(
     x::DataFrame,
     file::String;
-    missingstring::String = "",
+    missing_value::String = "",
     append::Bool = false,
     col_names::Bool = true,
     eol::String = "\n",
@@ -440,7 +440,7 @@ function write_tsv(
         delim = '\t',  # Use tab as the delimiter for TSV
         append = append,
         header = col_names && !append,
-        missingstring = missingstring,
+        missingstring = missing_value,
         newline = eol,
         threaded = num_threads > 1)
 end
@@ -452,7 +452,7 @@ function write_table(
     x::DataFrame,
     file::String;
     delim::Char = '\t',  # Default to TSV, but allow flexibility
-    missingstring::String = "",
+    missing_value::String = "",
     append::Bool = false,
     col_names::Bool = true,
     eol::String = "\n",
@@ -465,7 +465,7 @@ function write_table(
         delim = delim,  # Flexible delimiter based on argument
         append = append,
         header = col_names && !append,
-        missingstring = missingstring,
+        missingstring = missing_value,
         newline = eol,
         threaded = num_threads > 1)
 end
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 622f02f..814650c 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -1,19 +1,21 @@
 const docstring_read_csv  =
 """
     read_csv(file; delim=',',col_names=true, skip=0, n_max=Inf, 
-        comment=nothing, missingstring="", col_select, escape_double=true, col_types=nothing, num_threads = 1)
+        comment=nothing, missing_value="", col_select, escape_double=true, col_types=nothing, num_threads = 1)
 
 Reads a CSV file or URL into a DataFrame, with options to specify delimiter, column names, and other CSV parsing options.
 
 # Arguments
 - `file`: Path or vector of paths to the CSV file or a URL to a CSV file.
 - `delim`: The character delimiting fields in the file. Default is ','.
+- `decimal`: Character argument for what character decimal should be. Default is `.`
 - `col_names`: Indicates if the first row of the CSV is used as column names. Can be true, false, or an array of strings. Default is true.
 - `skip`: Number of initial lines to skip before reading data. Default is 0.
 - `n_max`: Maximum number of rows to read. Default is Inf (read all rows).
 - `col_select`: Optional vector of symbols or strings to select which columns to load.
+- `col_types`: Optional Dict to allow for column type specification
 - `comment`: Character that starts a comment line. Lines beginning with this character are ignored. Default is nothing (no comment lines).
-- `missingstring`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
+- `missing_value`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
 - `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true.
 - `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1
 # Examples
@@ -22,33 +24,44 @@ julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva
 
 julia> write_csv(df, "csvtest.csv");
 
-julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"])
+julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missing_value = ["95", "Charlie"])
 3×3 DataFrame
- Row │ ID     Name     Score   
-     │ Int64  String7  Int64?  
+ Row │ ID     Name      Score   
+     │ Int64  String7?  Int64?  
+─────┼──────────────────────────
+   1 │     3  missing        77
+   2 │     4  David          85
+   3 │     5  Eva       missing 
+
+julia> read_csv("csvtest.csv", skip = 2, n_max = 3, col_types = Dict(:ID => Float64))
+3×3 DataFrame
+ Row │ ID       Name     Score 
+     │ Float64  String7  Int64 
 ─────┼─────────────────────────
-   1 │     3  missing       77
-   2 │     4  David         85
-   3 │     5  Eva      missing 
+   1 │     3.0  Charlie     77
+   2 │     4.0  David       85
+   3 │     5.0  Eva         95
 ```
 """
 
 const docstring_read_tsv  =
 """
     read_tsv(file; delim='\t',col_names=true, skip=0, n_max=Inf, 
-        comment=nothing, missingstring="", col_select, escape_double=true, col_types=nothing)
+        comment=nothing, missing_value="", col_select, escape_double=true, col_types=nothing)
 
 Reads a TSV file or URL into a DataFrame, with options to specify delimiter, column names, and other CSV parsing options.
 
 # Arguments
 - `file`: Path or vector of paths to the TSV file or a URL to a TSV file.
 - `delim`: The character delimiting fields in the file. Default is ','.
+- `decimal`: Character argument for what character decimal should be. Default is `.`
 - `col_names`: Indicates if the first row of the CSV is used as column names. Can be true, false, or an array of strings. Default is true.
 - `skip`: Number of initial lines to skip before reading data. Default is 0.
 - `n_max`: Maximum number of rows to read. Default is Inf (read all rows).
 - `col_select`: Optional vector of symbols or strings to select which columns to load.
 - `comment`: Character that starts a comment line. Lines beginning with this character are ignored. Default is nothing (no comment lines).
-- `missingstring`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
+- `col_types`: Optional Dict to allow for column type specification
+- `missing_value`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
 - `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true.
 - `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads.
 
@@ -58,33 +71,35 @@ julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva
 
 julia> write_tsv(df, "tsvtest.tsv");
 
-julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"])
+julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missing_value = ["Charlie"])
 3×3 DataFrame
- Row │ ID     Name     Score 
-     │ Int64  String7  Int64 
-─────┼───────────────────────
-   1 │     3  missing     77
-   2 │     4  David       85
-   3 │     5  Eva         95
+ Row │ ID     Name      Score 
+     │ Int64  String7?  Int64 
+─────┼────────────────────────
+   1 │     3  missing      77
+   2 │     4  David        85
+   3 │     5  Eva          95
 ```
 """
 
 const docstring_read_delim = 
 """
     read_delim(file; delim='\t',col_names=true, skip=0, n_max=Inf, 
-        comment=nothing, missingstring="", col_select, escape_double=true, col_types=nothing)
+        comment=nothing, missing_value="", col_select, escape_double=true, col_types=nothing)
 
 Reads a delimited file or URL into a DataFrame, with options to specify delimiter, column names, and other CSV parsing options.
 
 # Arguments
 - `file`: Path or vector of paths to the CSV file or a URL to a CSV file.
 - `delim`: The character delimiting fields in the file. Default is ','.
+- `decimal`: Character argument for what character decimal should be. Default is `.`
 - `col_names`: Indicates if the first row of the CSV is used as column names. Can be true, false, or an array of strings. Default is true.
 - `skip`: Number of initial lines to skip before reading data. Default is 0.
 - `n_max`: Maximum number of rows to read. Default is Inf (read all rows).
 - `col_select`: Optional vector of symbols or strings to select which columns to load.
 - `comment`: Character that starts a comment line. Lines beginning with this character are ignored. Default is nothing (no comment lines).
-- `missingstring`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
+- `col_types`: Optional Dict to allow for column type specification
+- `missing_value`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
 - `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true.
 - `col_types`: An optional specification of column types, can be a single type applied to all columns, or a collection of types with one for each column. Default is nothing (types are inferred).
 - `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads.
@@ -178,13 +193,13 @@ julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position"
 
 const docstring_write_csv  =
 """
-    write_csv(DataFrame, filepath; na = "", append = false, col_names = true, missingstring, eol = "\n", num_threads = Threads.nthreads())
+    write_csv(DataFrame, filepath; na = "", append = false, col_names = true, missing_value, eol = "\n", num_threads = Threads.nthreads())
 Write a DataFrame to a CSV (comma-separated values) file.
 
 # Arguments
 - `x`: The DataFrame to write to the CSV file.
 - `file`: The path to the output CSV file.
-- `missingstring`: = "": The string to represent missing values in the output file. Default is an empty string.
+- `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
 - `append`: Whether to append to the file if it already exists. Default is false.
 - `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
 - `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
@@ -200,13 +215,13 @@ julia> write_csv(df, "csvtest.csv");
 
 const docstring_write_tsv  =
 """
-    write_tsv(DataFrame, filepath; na = "", append = false, col_names = true, missingstring, eol = "\n", num_threads = Threads.nthreads())
+    write_tsv(DataFrame, filepath; na = "", append = false, col_names = true, missing_value, eol = "\n", num_threads = Threads.nthreads())
 Write a DataFrame to a TSV (tab-separated values) file.
 
 # Arguments
 - `x`: The DataFrame to write to the TSV file.
 - `file`: The path to the output TSV file.
-- `missingstring`: = "": The string to represent missing values in the output file. Default is an empty string.
+- `missing_value`: = "": The string to represent missing values in the output file. Default is an empty string.
 - `append`: Whether to append to the file if it already exists. Default is false.
 - `col_names`: = true: Whether to write column names as the first line of the file. Default is true.
 - `eol`: = "\n": The end-of-line character to use in the output file. Default is the newline character.
@@ -222,7 +237,7 @@ julia> write_tsv(df, "tsvtest.tsv");
 
 const docstring_read_table =
 """
-    read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select, missingstring="", kwargs...)
+    read_table(file; col_names=true, skip=0, n_max=Inf, comment=nothing, col_select, missing_value="", kwargs...)
 
 Read a table from a file where columns are separated by any amount of whitespace, processing it into a DataFrame.
 
@@ -233,7 +248,7 @@ Read a table from a file where columns are separated by any amount of whitespace
 - `n_max`: The maximum number of lines to read from the file, after skipping. Inf means read all lines.
 - `col_select`: Optional vector of symbols or strings to select which columns to load.
 - `comment`: A character or string indicating the start of a comment. Lines starting with this character are ignored.
-- `missingstring`: The string that represents missing values in the table.
+- `missing_value`: The string that represents missing values in the table.
 - `kwargs`: Additional keyword arguments passed to CSV.File.
 # Examples
 ```jldoctest 
@@ -262,7 +277,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth
 - `x`: The DataFrame to write to a file.
 - `file`: The path to the file where the DataFrame will be written.
 -delim: Character to use as the field delimiter. The default is tab ('\t'), making it a TSV (tab-separated values) file by default, but can be changed to accommodate other formats.
-- `missingstring`: The string to represent missing data in the output file.
+- `missing_value`: The string to represent missing data in the output file.
 - `append`: Whether to append to the file if it already exists. If false, the file will be overwritten.
 - `col_names`: Whether to write column names as the first line of the file. If appending to an existing file with append = true, column names will not be written regardless of this parameter's value.
 - `eol`: The end-of-line character to use in the file. Defaults to "\n".
@@ -278,7 +293,7 @@ julia> write_table(df, "tabletest.txt");
 
 const docstring_read_xlsx =
 """
-    read_xlsx(path; sheet, range, col_names, col_types, missingstring, trim_ws, skip, n_max, guess_max)
+    read_xlsx(path; sheet, range, col_names, col_types, missing_value, trim_ws, skip, n_max, guess_max)
 Read data from an Excel file into a DataFrame.
 
 # Arguments
@@ -287,7 +302,7 @@ Read data from an Excel file into a DataFrame.
 - `range`: Specifies a specific range of cells to be read from the sheet. If nothing, the entire sheet is read.
 - `col_names`: Indicates whether the first row of the specified range should be treated as column names. If false, columns will be named automatically.
 - `col_types`: Allows specifying column types explicitly. Can be a single type applied to all columns, a list or a dictionary mapping column names or indices to types. If nothing, types will be inferred.
-- `missingstring`: The value or vector that represents missing values in the Excel file.
+- `missing_value`: The value or vector that represents missing values in the Excel file.
 - `trim_ws`: Whether to trim leading and trailing whitespace from cells in the Excel file.
 - `skip`: Number of rows to skip at the beginning of the sheet or range before reading data.
 - `n_max`: The maximum number of rows to read from the sheet or range, after skipping. Inf means read all available rows.
@@ -303,7 +318,7 @@ julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]);
 
 julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true);
 
-julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2])
+julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missing_value = [2])
 3×3 DataFrame
  Row │ integers  strings               floats   
      │ Int64?    String?               Float64? 
diff --git a/src/xlfiles.jl b/src/xlfiles.jl
index dfde3e5..cc0c0e7 100644
--- a/src/xlfiles.jl
+++ b/src/xlfiles.jl
@@ -46,7 +46,7 @@ function read_xlsx(
     sheet = nothing,
     range = nothing,
     col_names = true,
-    missingstring = "",
+    missing_value = "",
     trim_ws = true,
     skip = 0,
     n_max = Inf
@@ -87,8 +87,8 @@ function read_xlsx(
     end
 
     # Replace missing strings with `missing` if applicable
-    if !isempty(missingstring)
-        for missing_value in missingstring
+    if !isempty(missing_value)
+        for missing_value in missing_value
             for col in names(data)
                 data[!, col] = replace(data[!, col], missing_value => missing)
             end

From 87c0a3f142d8bd632608bbce1096bf9fafcaac1e Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Tue, 12 Nov 2024 19:50:17 -0500
Subject: [PATCH 3/4] final doc update

---
 docs/examples/UserGuide/delim.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/examples/UserGuide/delim.jl b/docs/examples/UserGuide/delim.jl
index baae173..0d0ad05 100644
--- a/docs/examples/UserGuide/delim.jl
+++ b/docs/examples/UserGuide/delim.jl
@@ -29,7 +29,7 @@ read_csv("https://raw.githubusercontent.com/TidierOrg/TidierFiles.jl/main/testin
 # - `escape_double`: Interpret two consecutive quote characters as a single quote. Default is `true`.
 # - `col_types`: Optional specification of column types using a Dict. Default is `nothing` (types are inferred).
 # - `num_threads`: Number of threads to use for parallel execution. Default is 1 for `read_csv` and the number of available threads for `read_tsv` and `read_delim`.
-
+# - `kwarg`: any CSV.jl argument can be passed to any of the above functions with correct syntax, and it will be supported. 
 # The functions return a DataFrame containing the parsed data from the file.
 
 # ## `write_csv` and `write_tsv`

From c190c366e1cd4d01e7712d5bf9ead589d2b5fb5e Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Tue, 12 Nov 2024 20:04:05 -0500
Subject: [PATCH 4/4] one last docstring change

---
 src/docstrings.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/docstrings.jl b/src/docstrings.jl
index 814650c..c0c469f 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -101,7 +101,6 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite
 - `col_types`: Optional Dict to allow for column type specification
 - `missing_value`: String that represents missing values in the CSV. Default is "", can be set to a vector of multiple items.
 - `escape_double`: Indicates whether to interpret two consecutive quote characters as a single quote in the data. Default is true.
-- `col_types`: An optional specification of column types, can be a single type applied to all columns, or a collection of types with one for each column. Default is nothing (types are inferred).
 - `num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Default is the number of available threads.
 
 # Examples
@@ -302,7 +301,7 @@ Read data from an Excel file into a DataFrame.
 - `range`: Specifies a specific range of cells to be read from the sheet. If nothing, the entire sheet is read.
 - `col_names`: Indicates whether the first row of the specified range should be treated as column names. If false, columns will be named automatically.
 - `col_types`: Allows specifying column types explicitly. Can be a single type applied to all columns, a list or a dictionary mapping column names or indices to types. If nothing, types will be inferred.
-- `missing_value`: The value or vector that represents missing values in the Excel file.
+- `missing_value`: The value or vector that represents missing values in the Excel file. Unlike CSV.jl based functions, everything does not need to be written as a string
 - `trim_ws`: Whether to trim leading and trailing whitespace from cells in the Excel file.
 - `skip`: Number of rows to skip at the beginning of the sheet or range before reading data.
 - `n_max`: The maximum number of rows to read from the sheet or range, after skipping. Inf means read all available rows.