Skip to content

Commit

Permalink
Change file path for data files.
Browse files Browse the repository at this point in the history
  • Loading branch information
kdpsingh committed Apr 12, 2024
1 parent 1b4b202 commit 7ffd70c
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 147 deletions.
144 changes: 72 additions & 72 deletions src/TidierFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,6 @@ include("fwf.jl")
include("xlfiles.jl")
include("statsfiles.jl")

"""
$docstring_write_csv
"""
function write_csv(
x::DataFrame,
file::String;
missingstring::String = "NA",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Configure threading
CSV.write(
file,
x,
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1 )
end

"""
$docstring_read_csv
"""
Expand Down Expand Up @@ -107,30 +84,6 @@ function read_csv(file;
return df
end

"""
$docstring_write_tsv
"""
function write_tsv(
x::DataFrame,
file::String;
missingstring::String = "",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Write DataFrame to TSV
CSV.write(
file,
x,
delim = '\t', # Use tab as the delimiter for TSV
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1)
end

"""
$docstring_read_delim
"""
Expand Down Expand Up @@ -250,31 +203,6 @@ function read_tsv(file;
return df
end

"""
$docstring_write_table
"""
function write_table(
x::DataFrame,
file::String;
delim::Char = '\t', # Default to TSV, but allow flexibility
missingstring::String = "",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Write DataFrame to a file with the specified delimiter
CSV.write(
file,
x,
delim = delim, # Flexible delimiter based on argument
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1)
end

"""
$docstring_read_table
"""
Expand Down Expand Up @@ -334,4 +262,76 @@ function read_table(file;
return df
end

"""
$docstring_write_csv
"""
function write_csv(
x::DataFrame,
file::String;
missingstring::String = "NA",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Configure threading
CSV.write(
file,
x,
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1 )
end

"""
$docstring_write_tsv
"""
function write_tsv(
x::DataFrame,
file::String;
missingstring::String = "",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Write DataFrame to TSV
CSV.write(
file,
x,
delim = '\t', # Use tab as the delimiter for TSV
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1)
end

"""
$docstring_write_table
"""
function write_table(
x::DataFrame,
file::String;
delim::Char = '\t', # Default to TSV, but allow flexibility
missingstring::String = "",
append::Bool = false,
col_names::Bool = true,
eol::String = "\n",
num_threads::Int = Threads.nthreads())

# Write DataFrame to a file with the specified delimiter
CSV.write(
file,
x,
delim = delim, # Flexible delimiter based on argument
append = append,
header = col_names && !append,
missingstring = missingstring,
newline = eol,
threaded = num_threads > 1)
end

end
57 changes: 31 additions & 26 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Reads a CSV file or URL into a DataFrame, with options to specify delimiter, col
`num_threads`: specifies the number of concurrent tasks or threads to use for processing, allowing for parallel execution. Defaults to 1
# Examples
```jldoctest
julia> read_csv(joinpath(tempdir(), "csvtest.csv"), skip = 2, n_max = 3, missingstring = ["95", "Charlie"])
julia> read_csv("csvtest.csv", skip = 2, n_max = 3, missingstring = ["95", "Charlie"])
3×3 DataFrame
Row │ ID Name Score
│ Int64 String7 Int64?
Expand Down Expand Up @@ -50,7 +50,7 @@ Reads a TSV file or URL into a DataFrame, with options to specify delimiter, col
# Examples
```jldoctest
julia> read_tsv(joinpath(tempdir(), "tsvtest.tsv"), skip = 2, n_max = 3, missingstring = ["Charlie"])
julia> read_tsv("tsvtest.tsv", skip = 2, n_max = 3, missingstring = ["Charlie"])
3×3 DataFrame
Row │ ID Name Score
│ Int64 String7 Int64
Expand Down Expand Up @@ -83,7 +83,7 @@ Reads a delimited file or URL into a DataFrame, with options to specify delimite
# Examples
```jldoctest
julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration
julia> read_delim("csvtest.csv", delim = ",", col_names = false, num_threads = 4) # col_names are false here for the purpose of demonstration
6×3 DataFrame
Row │ Column1 Column2 Column3
│ String3 String7 String7
Expand All @@ -97,8 +97,6 @@ julia> read_delim(joinpath(tempdir(), "csvtest.csv"), delim = ",", col_names = f
```
"""



const docstring_read_fwf =
"""
read_fwf(filepath::String; num_lines::Int=4, col_names=nothing)
Expand All @@ -112,8 +110,12 @@ Read fixed-width format (FWF) files into a DataFrame.
- `skip_to`=0: Number of lines at the beginning of the file to skip before reading data.
- `n_max`=nothing: Maximum number of lines to read from the file. If nothing, read all lines.
# Examples
```jldoctest
julia> path = joinpath(tempdir(), "fwftest.txt");
```jldoctest
julia> path = "fwftest.txt";
julia> open(path, "w") do file
write(file, fwf_data)
end;
julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3)
3×5 DataFrame
Expand All @@ -126,7 +128,6 @@ julia> read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "
```
"""


const docstring_fwf_empty =
"""
fwf_empty(filepath::String; num_lines::Int=4, col_names=nothing)
Expand All @@ -143,10 +144,14 @@ num_lines::Int=4: Number of lines to sample from the beginning of the file for a
- A vector of strings representing the column names.
# Examples
```jldoctest
julia> fwf_empty(joinpath(tempdir(), "fwftest.txt"))
julia> open("fwftest.txt", "w") do file
write(file, fwf_data)
end;
julia> fwf_empty(path)
([13, 5, 8, 20, 8], ["Column_1", "Column_2", "Column_3", "Column_4", "Column_5"])
julia> fwf_empty(joinpath(tempdir(), "fwftest.txt"), num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"])
julia> fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"])
([13, 5, 8, 20, 8], ["Name", "Age", "ID", "Position", "Salary"])
```
"""
Expand All @@ -170,7 +175,7 @@ Write a DataFrame to a CSV (comma-separated values) file.
```jldoctest
julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]);
julia> write_csv(df, joinpath(tempdir(), "csvtest.csv"));
julia> write_csv(df, "csvtest.csv");
```
"""

Expand All @@ -192,7 +197,7 @@ Write a DataFrame to a TSV (tab-separated values) file.
```jldoctest
julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]);
julia> write_tsv(df, joinpath(tempdir(), "tsvtest.tsv"));
julia> write_tsv(df, "tsvtest.tsv");
```
"""

Expand All @@ -213,7 +218,7 @@ Read a table from a file where columns are separated by any amount of whitespace
-`kwargs`: Additional keyword arguments passed to CSV.File.
# Examples
```jldoctest
julia> read_table(joinpath(tempdir(), "tabletest.txt"), skip = 2, n_max = 3, col_select = ["Name"])
julia> read_table("tabletest.txt", skip = 2, n_max = 3, col_select = ["Name"])
3×1 DataFrame
Row │ Name
│ String7
Expand Down Expand Up @@ -244,7 +249,7 @@ Write a DataFrame to a file, allowing for customization of the delimiter and oth
```jldoctest
julia> df = DataFrame(ID = 1:5, Name = ["Alice", "Bob", "Charlie", "David", "Eva"], Score = [88, 92, 77, 85, 95]);
julia> write_table(df, joinpath(tempdir(), "tabletest.txt"));
julia> write_table(df, "tabletest.txt");
```
"""

Expand All @@ -267,7 +272,7 @@ Read data from an Excel file into a DataFrame.
# Examples
```jldoctest
julia> read_xlsx(joinpath(tempdir(), "xlsxtest.xlsx"), sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2])
julia> read_xlsx("xlsxtest.xlsx", sheet = "REPORT_A", skip = 1, n_max = 4, missingstring = [2])
3×3 DataFrame
Row │ integers strings floats
│ Any String Float64
Expand Down Expand Up @@ -296,7 +301,7 @@ julia> df = DataFrame(integers=[1, 2, 3, 4],
julia> df2 = DataFrame(AA=["aa", "bb"], AB=[10.1, 10.2]);
julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path=joinpath(tempdir(), "xlsxtest.xlsx"), overwrite = true);
julia> write_xlsx(("REPORT_A" => df, "REPORT_B" => df2); path="xlsxtest.xlsx", overwrite = true);
```
"""

Expand All @@ -316,7 +321,7 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf,
# Examples
```jldoctest
julia> read_sas(joinpath(tempdir(), "test.dta"))
julia> read_sas("test.dta")
2×2 DataFrame
Row │ AA AB
│ String3 Float64
Expand All @@ -341,15 +346,15 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf,
# Examples
```jldoctest
julia> read_sas(joinpath(tempdir(), "test.sas7bdat"))
julia> read_sas("test.sas7bdat")
2×2 DataFrame
Row │ AA AB
│ String3 Float64
─────┼──────────────────
1 │ sav 10.1
2 │ por 10.2
julia> read_sas(joinpath(tempdir(), "test.xpt"))
julia> read_sas("test.xpt")
2×2 DataFrame
Row │ AA AB
│ String3 Float64
Expand All @@ -373,15 +378,15 @@ n_max=Inf: Maximum number of rows to read from the file, after skipping. If Inf,
# Examples
```jldoctest
julia> read_sav(joinpath(tempdir(), "test.sav"))
julia> read_sav("test.sav")
2×2 DataFrame
Row │ AA AB
│ String Float64
─────┼─────────────────
1 │ sav 10.1
2 │ por 10.2
julia> read_sav(joinpath(tempdir(), "test.por"))
julia> read_sav("test.por")
2×2 DataFrame
Row │ AA AB
│ String Float64
Expand All @@ -404,15 +409,15 @@ Arguments
```jldoctest
julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]);
julia> write_sav(df, joinpath(tempdir(), "test.sav"))
julia> write_sav(df, "test.sav")
2×2 ReadStatTable:
Row │ AA AB
│ String Float64?
─────┼──────────────────
1 │ sav 10.1
2 │ por 10.2
julia> write_sav(df, joinpath(tempdir(), "test.por"))
julia> write_sav(df, "test.por")
2×2 ReadStatTable:
Row │ AA AB
│ String Float64?
Expand All @@ -434,15 +439,15 @@ Arguments
```jldoctest
julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]);
julia> write_sas(df , joinpath(tempdir(), "test.sas7bdat"))
julia> write_sas(df , "test.sas7bdat")
2×2 ReadStatTable:
Row │ AA AB
│ String Float64?
─────┼──────────────────
1 │ sav 10.1
2 │ por 10.2
julia> write_sas(df , joinpath(tempdir(), "test.xpt"))
julia> write_sas(df , "test.xpt")
2×2 ReadStatTable:
Row │ AA AB
│ String Float64?
Expand All @@ -465,7 +470,7 @@ Arguments
```jldoctest
julia> df = DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]);
julia> write_dta(df , joinpath(tempdir(), "test.dta"))
julia> write_dta(df , "test.dta")
2×2 ReadStatTable:
Row │ AA AB
│ String Float64?
Expand Down
Loading

0 comments on commit 7ffd70c

Please sign in to comment.