From a8213426014919466111a7b27a6446f01b47909c Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 14 Apr 2024 22:41:39 -0400
Subject: [PATCH 1/2] adds parquet/arrow support

---
 Project.toml                       |  4 +-
 README.md                          |  4 +-
 docs/examples/UserGuide/Arrow.jl   | 19 +++++++
 docs/examples/UserGuide/parquet.jl | 20 +++++++
 docs/mkdocs.yml                    |  2 +
 docs/src/index.md                  |  6 +-
 src/TidierFiles.jl                 |  6 +-
 src/arrow_files.jl                 | 73 +++++++++++++++++++++++++
 src/docstrings.jl                  | 88 +++++++++++++++++++++++++++++-
 src/parquet_files.jl               | 75 +++++++++++++++++++++++++
 10 files changed, 291 insertions(+), 6 deletions(-)
 create mode 100644 docs/examples/UserGuide/Arrow.jl
 create mode 100644 docs/examples/UserGuide/parquet.jl
 create mode 100644 src/arrow_files.jl
 create mode 100644 src/parquet_files.jl

diff --git a/Project.toml b/Project.toml
index aa8e1be..505cb3d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,17 +4,18 @@ authors = ["Daniel Rizk <rizkytennis@gmail.com> and contributors"]
 version = "0.1.0"
 
 [deps]
+Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+Parquet2 = "98572fba-bba0-415d-956f-fa77e587d26d"
 ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
 
 [compat]
-julia = "1.9"
 CSV = "0.10"
 DataFrames = "1.5"
 Dates = "1.9"
@@ -23,6 +24,7 @@ HTTP = "1.10"
 ReadStatTables = "0.3"
 Reexport = "0.2, 1"
 XLSX = "0.10"
+julia = "1.9"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/README.md b/README.md
index 52840e5..b4c9f15 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 
 TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages.
 
-Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats.
+Powered by the CSV.jl, XLSX.jl, ReadStatTables.jl, Arrow.jl, and Parquet2.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats.
 
 
 Currently supported file types:
@@ -22,6 +22,8 @@ Currently supported file types:
 - `read_sav` and `write_sav` (.sav and .por)
 - `read_sas` and `write_sas` (.sas7bdat and .xpt)
 - `read_dta` and `write_dta` (.dta) 
+- `read_arrow` and `write_arrow`
+- `read_parquet` and `write_parquet`
 
 # Examples
 
diff --git a/docs/examples/UserGuide/Arrow.jl b/docs/examples/UserGuide/Arrow.jl
new file mode 100644
index 0000000..d04dd7e
--- /dev/null
+++ b/docs/examples/UserGuide/Arrow.jl
@@ -0,0 +1,19 @@
+# Arrow file reading and writing is powered by Arrow.jl
+# ## `read_arrow`
+# read_arrow(path; skip=0, n_max=Inf, col_select=nothing)
+
+# This function reads a Parquet (.parquet) file into a DataFrame. The arguments are:
+
+# - `path`: The path to the .parquet file.
+# - `skip`: Number of initial rows to skip before reading data. Default is 0.
+# - `n_max`: Maximum number of rows to read. Default is `Inf` (read all rows).
+# - `col_select`: Optional vector of symbols or strings to select which columns to load. Default is `nothing` (load all columns).
+
+# ## `write_arrow` 
+# `write_arrow(df, path)`
+
+# This function writes a DataFrame to a Parquet (.parquet) file. The arguments are:
+
+# - `df`: The DataFrame to be written to a file.
+# - `path`: The path where the .parquet file will be created. If a file at this path already exists, it will be overwritten.
+# - Additional arguments for writing arrow files are not outlined here, but should be available through the same interface of `Arrow.write`. Refer to Arrow.jl [documentation](https://arrow.apache.org/julia/stable/manual/#Arrow.write) at their page for further explanation.
\ No newline at end of file
diff --git a/docs/examples/UserGuide/parquet.jl b/docs/examples/UserGuide/parquet.jl
new file mode 100644
index 0000000..d636dd6
--- /dev/null
+++ b/docs/examples/UserGuide/parquet.jl
@@ -0,0 +1,20 @@
+# Parquet file reading and writing is powered by Parquet2.jl
+# ## `read_parquet`
+# read_parquet(path; col_names=true, skip=0, n_max=Inf, col_select=nothing)
+
+# This function reads a Parquet (.parquet) file into a DataFrame. The arguments are:
+
+# - `path`: The path to the .parquet file.
+# - `col_names`: Indicates if the first row of the file is used as column names. Default is `true`.
+# - `skip`: Number of initial rows to skip before reading data. Default is 0.
+# - `n_max`: Maximum number of rows to read. Default is `Inf` (read all rows).
+# - `col_select`: Optional vector of symbols or strings to select which columns to load. Default is `nothing` (load all columns).
+
+# ## `write_parquet` 
+# `write_parquet(df, path)`
+
+# This function writes a DataFrame to a Parquet (.parquet) file. The arguments are:
+
+# - `df`: The DataFrame to be written to a file.
+# - `path`: The path where the .parquet file will be created. If a file at this path already exists, it will be overwritten.
+# - Additional arguments for writing parquet files are not outlined here, but should be available through the same interface of `Parquet2.writefile`. Refer to [documentation](https://expandingman.gitlab.io/Parquet2.jl/#Writing-Data) at their page for further explanation.
\ No newline at end of file
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index c6f3a86..72ad018 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -119,4 +119,6 @@ nav:
   - "Delimited Files": "examples/generated/UserGuide/delim.md"
   - "Excel Files": "examples/generated/UserGuide/xl.md"
   - "Stats Files": "examples/generated/UserGuide/stats.md"
+  - "Arrow Files": "examples/generated/UserGuide/Arrow.md"
+  - "Parquet Files": "examples/generated/UserGuide/parquet.md
   - "Reference" : "reference.md"
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index cbeb60a..169c788 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,10 +1,12 @@
 # TidierFiles.jl
 
+<img src="/assets/logo.png" align="right" style="padding-left:10px;" width="150"/>
+
 ## What is TidierFiles.jl?
 
 TidierFiles.jl is a 100% Julia implementation of the readr, haven, readxl, and writexl R packages.
 
-Powered by the CSV.jl, XLSX.jl and ReadStatTables.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats.
+Powered by the CSV.jl, XLSX.jl, ReadStatTables.jl, Arrow.jl and Parquet2.jl packages, TidierFiles.jl aims to bring a consistent interface to the reading and writing of tabular data, including a consistent syntax to read files locally versus from the web and consistent keyword arguments across data formats.
 
 
 Currently supported file types:
@@ -17,6 +19,8 @@ Currently supported file types:
 - `read_sav` and `write_sav` (.sav and .por)
 - `read_sas` and `write_sas` (.sas7bdat and .xpt)
 - `read_dta` and `write_dta` (.dta) 
+- `read_arrow` and `write_arrow`
+- `read_parquet` and `write_parquet`
 
 # Examples
 
diff --git a/src/TidierFiles.jl b/src/TidierFiles.jl
index ead47f9..00dbc28 100644
--- a/src/TidierFiles.jl
+++ b/src/TidierFiles.jl
@@ -7,18 +7,22 @@ using Dates #bc XLSX type parsing does not seem to be working so i made some aut
 using HTTP
 using ReadStatTables
 using Reexport
+using Parquet2
+using Arrow
 
 @reexport using DataFrames: DataFrame
 
 export read_csv, write_csv, read_tsv, write_tsv, read_table, write_table, read_delim, read_xlsx, write_xlsx, 
  read_fwf, write_fwf, fwf_empty, fwf_positions, fwf_positions, read_sav, read_sas, read_dta, write_sav, write_sas, 
- write_dta
+ write_dta, read_arrow, write_arrow, read_parquet, write_parquet
  
 
 include("docstrings.jl")
 include("fwf.jl")
 include("xlfiles.jl")
 include("statsfiles.jl")
+include("parquet_files.jl")
+include("arrow_files.jl")
 
 """
 $docstring_read_csv
diff --git a/src/arrow_files.jl b/src/arrow_files.jl
new file mode 100644
index 0000000..7cd4da7
--- /dev/null
+++ b/src/arrow_files.jl
@@ -0,0 +1,73 @@
+"""
+$docstring_read_arrow
+"""
+function read_arrow(data_file;
+                    col_select=nothing,
+                    skip=0,
+                    n_max=Inf)
+    # Determine if the file is a local file or a URL
+    if startswith(data_file, "http://") || startswith(data_file, "https://")
+        # Fetch the content from the URL
+        response = HTTP.get(data_file)
+
+        # Ensure the request was successful
+        if response.status != 200
+            error("Failed to fetch the Arrow file: HTTP status code ", response.status)
+        end
+
+        # Use the content fetched from the URL as an IOBuffer for reading
+        file_to_read = IOBuffer(response.body)
+    else
+        # Use the local file path
+        file_to_read = data_file
+    end
+
+    # Load the Arrow file into a DataFrame directly
+    df = DataFrame(Arrow.Table(file_to_read); copycols=false)
+
+    # Apply column selection if specified
+    if !isnothing(col_select)
+        df = select(df, col_select)  # Use the select function for safe column selection
+    end
+
+    # Apply row limit and skip if specified
+    if !isinf(n_max) || skip > 0
+        start_row = skip + 1
+        end_row = !isinf(n_max) ? start_row + n_max - 1 : nrow(df)
+        df = df[start_row:min(end_row, nrow(df)), :]
+    end
+
+    return df
+end
+
+"""
+$docstring_write_arrow
+"""
+function write_arrow(tbl, file::String; append=false, compress=:lz4, alignment=8, 
+                        dictencode=false, dictencodenested=false, denseunions=true, 
+                        largelists=false, maxdepth=6, num_threads=Threads.nthreads())
+
+
+        # Prepare keyword arguments for Arrow.write
+        write_options = Dict(
+        # :compress => compressor,
+            :alignment => alignment,
+            :dictencode => dictencode,
+            :dictencodenested => dictencodenested,
+            :denseunions => denseunions,
+            :largelists => largelists,
+            :maxdepth => maxdepth,
+            :ntasks => num_threads
+        )
+
+        # Write the data to file
+        if append
+            # Open the file in append mode and write
+            open(file, "a") do io
+                Arrow.write(io, tbl; write_options..., file=true)
+            end
+        else
+            # Write directly to file, creating or overwriting by default
+            Arrow.write(file, tbl; write_options...)
+        end
+end
\ No newline at end of file
diff --git a/src/docstrings.jl b/src/docstrings.jl
index f08637f..ad1df95 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -319,7 +319,7 @@ const docstring_write_xlsx =
     write_xlsx(x; path, overwrite)
 Write a DataFrame, or multiple DataFrames, to an Excel file.
 
-#Arguments
+# Arguments
 -`x`: The data to write. Can be a single Pair{String, DataFrame} for writing one sheet, or a Tuple of such pairs for writing multiple sheets. The String in each pair specifies the sheet name, and the DataFrame is the data to write to that sheet.
 -`path`: The path to the Excel file where the data will be written.
 -`overwrite`: Defaults to false. Whether to overwrite an existing file. If false, an error is thrown when attempting to write to an existing file.
@@ -525,4 +525,88 @@ julia> write_dta(df, "test.dta")
    1 │    sav      10.1
    2 │    por      10.2
 ```
-"""
\ No newline at end of file
+"""
+
+const docstring_write_arrow =
+"""
+    write_arrow(df, path)
+Write a DataFrame to an Arrow (.arrow) file.
+Arguments
+-`df`: The DataFrame to be written to a file.
+-`path`: String as path where the .dta file will be created. If a file at this path already exists, it will be overwritten.
+# Examples
+```jldoctest 
+julia> df = DataFrame(AA=["Arr", "ow"], AB=[10.1, 10.2]);
+
+julia> write_arrow(df , "test.arrow");
+```
+"""
+
+const docstring_read_arrow =
+"""
+    read_arrow(df, path)
+Read an Arrow file (.arrow) to a DataFrame.
+Arguments
+-`df`: The DataFrame to be written to a file.
+-`path`: String as path where the .dta file will be created. If a file at this path already exists, it will be overwritten.
+`skip`: Number of initial lines to skip before reading data. Default is 0.
+`n_max`: Maximum number of rows to read. Default is Inf (read all rows).
+-`col_select`: Optional vector of symbols or strings to select which columns to load.
+# Examples
+```jldoctest 
+julia> df = DataFrame(AA=["Arr", "ow"], AB=[10.1, 10.2]);
+
+julia> write_arrow(df , "test.arrow");
+
+julia> read_arrow("test.arrow")
+2×2 DataFrame
+ Row │ AA      AB      
+     │ String  Float64 
+─────┼─────────────────
+   1 │ Arr        10.1
+   2 │ ow         10.2
+```
+"""
+
+const docstring_write_parquet =
+"""
+    write_parquet(df, )
+Write a DataFrame to an Parquet (.parquet) file.
+Arguments
+-`df`: The DataFrame to be written to a file.
+-`path`: String as path where the .dta file will be created. If a file at this path already exists, it will be overwritten.
+# Examples
+```jldoctest 
+julia> df = DataFrame(AA=["Par", "quet"], AB=[10.1, 10.2]);
+
+julia> write_parquet(df, "test.parquet");
+```
+"""
+
+const docstring_read_parquet =
+"""
+    read_parquet(df, path)
+Read a Paquet File (.parquet) to a DataFrame..
+Arguments
+-`df`: The DataFrame to be written to a file.
+-`path`: String as path where the .dta file will be created. If a file at this path already exists, it will be overwritten.
+`col_names`: Indicates if the first row of the CSV is used as column names. Can be true, false, or an array of strings. Default is true.
+`skip`: Number of initial lines to skip before reading data. Default is 0.
+`n_max`: Maximum number of rows to read. Default is Inf (read all rows).
+-`col_select`: Optional vector of symbols or strings to select which columns to load.
+# Examples
+```jldoctest 
+julia> df = DataFrame(AA=["Par", "quet"], AB=[10.1, 10.2]);
+
+julia> write_parquet(df, "test.parquet");
+
+julia> read_parquet("test.parquet")
+2×2 DataFrame
+ Row │ AA      AB      
+     │ String  Float64 
+─────┼─────────────────
+   1 │ Par        10.1
+   2 │ quet       10.2
+```
+"""
+
diff --git a/src/parquet_files.jl b/src/parquet_files.jl
new file mode 100644
index 0000000..84b002c
--- /dev/null
+++ b/src/parquet_files.jl
@@ -0,0 +1,75 @@
+"""
+$docstring_read_parquet
+"""
+function read_parquet(data_file;
+                      col_select=nothing,
+                      skip=0,
+                      n_max=Inf,
+                      col_names=true)  # Handle column names display
+    # Determine if the file is a local file or a URL
+    if startswith(data_file, "http://") || startswith(data_file, "https://")
+        # Fetch the content from the URL
+        response = HTTP.get(data_file)
+
+        # Ensure the request was successful
+        if response.status != 200
+            error("Failed to fetch the Parquet file: HTTP status code ", response.status)
+        end
+
+        # Use the content fetched from the URL as an IOBuffer for reading
+        file_to_read = IOBuffer(response.body)
+    else
+        # Use the local file path
+        file_to_read = data_file
+    end
+
+    # Open the dataset
+    ds = Parquet2.Dataset(file_to_read)
+    df = DataFrame(ds; copycols=false)  # Load the entire dataset initially
+
+    # Apply column selection if provided
+    if !isnothing(col_select)
+        # Ensure column names are in the correct format
+        col_select = [typeof(c) === Symbol ? string(c) : c for c in col_select]
+        df = select(df, col_select)
+    end
+
+    # Apply skip and limit
+    if skip > 0 || !isinf(n_max)
+        start_idx = max(1, skip + 1)
+        end_idx = !isinf(n_max) ? start_idx + n_max - 1 : nrow(df)
+        df = df[start_idx:min(end_idx, nrow(df)), :]
+    end
+
+    # If column names should not be displayed as headers
+    if !col_names
+        # Create a DataFrame with the original column names as the first row
+        col_names_df = DataFrame([transpose(names(df))], [:ColumnNames])
+        # Concatenate the DataFrame with column names as the first row
+        df = vcat(col_names_df, df)
+        # Rename columns to generic names
+        rename!(df, Symbol.(:Column, 1:ncol(df)))
+    end
+
+    return df
+end
+
+"""
+$docstring_write_parquet
+"""
+function write_parquet(data, filename::String; buffer::Union{IO, Nothing}=nothing, 
+    npages::Union{Int, Dict}=1, 
+    compression_codec::Union{Symbol, Dict}=Dict(), 
+    column_metadata::Union{Dict, Pair}=Dict(),
+    metadata::Dict=Dict())
+        # Choose the appropriate method to write data based on `buffer` presence
+        if isnothing(buffer)
+        # Write directly to file with options
+        Parquet2.writefile(filename, data; npages=npages, compression_codec=compression_codec, 
+        column_metadata=column_metadata, metadata=metadata)
+    else
+    # Write to the provided buffer
+        Parquet2.writefile(buffer, data; npages=npages, compression_codec=compression_codec, 
+        column_metadata=column_metadata, metadata=metadata)
+    end
+end
\ No newline at end of file

From 0d31094a18eaac660a1a93779655ae2e9efce8ba Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 14 Apr 2024 22:54:26 -0400
Subject: [PATCH 2/2] addedmissing quote, deleted old testing file.

---
 docs/mkdocs.yml |  2 +-
 src/testing.jl  | 82 -------------------------------------------------
 2 files changed, 1 insertion(+), 83 deletions(-)
 delete mode 100644 src/testing.jl

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 72ad018..e3917f2 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -120,5 +120,5 @@ nav:
   - "Excel Files": "examples/generated/UserGuide/xl.md"
   - "Stats Files": "examples/generated/UserGuide/stats.md"
   - "Arrow Files": "examples/generated/UserGuide/Arrow.md"
-  - "Parquet Files": "examples/generated/UserGuide/parquet.md
+  - "Parquet Files": "examples/generated/UserGuide/parquet.md"
   - "Reference" : "reference.md"
\ No newline at end of file
diff --git a/src/testing.jl b/src/testing.jl
deleted file mode 100644
index 46344b3..0000000
--- a/src/testing.jl
+++ /dev/null
@@ -1,82 +0,0 @@
-xl_path = "/Users/danielrizk/Downloads/Assignment_Datasets/import.xlsx"
-read_excel(xl_path)
-
-df1 = DataFrames.DataFrame(COL1=[10,20,30], COL2=["First", "Second", "Third"])
-df2 = DataFrames.DataFrame(AA=["sav", "por"], AB=[10.1, 10.2])
-
-
-write_xlsx(("REPORT_A" => df1, "REPORT_B" => df2); path="/Users/danielrizk/Downloads/report.xlsx", overwrite = true)
-read_excel("/Users/danielrizk/Downloads/report.xlsx", sheet = "REPORT_B", skip = 1, n_max = 4, missingstring = [10.2])
-write_xlsx("REPORT_A" => df1; path="multi_sheet_report.xlsx")
-
-XLSX.writetable("/Users/danielrizk/Downloads/report.xlsx", sheets)
-
-
-XLSX.writetable("/Users/danielrizk/Downloads/report.xlsx", "REPORT_A" => df1, "REPORT_B" => df2)
-
-csv_path ="/Users/danielrizk/Downloads/TidierDB.jl/mtcars.csv"
-read_csv(csv_path)  
-
-
-df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5], dates=[Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], times=[Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)], datetimes=[Dates.DateTime(2018,5,20,19,10), Dates.DateTime(2018,5,20,19,20), Dates.DateTime(2018,5,20,19,30), Dates.DateTime(2018,5,20,19,40)])
-df1 = DataFrames.DataFrame(COL1=[10,20,30], COL2=["First", "Second", "Third"])
-
-mtcarsastsv = read_csv(csv_path, col_names = true)
-write_tsv(mtcarsastsv, "/Users/danielrizk/Downloads/mtcars.tsv"  )
-read_tsv("/Users/danielrizk/Downloads/mtcars.tsv", num_threads = 5)
-write_csv(mtcars, "/Users/danielrizk/Downloads/mtcars.csv")
-
-read_csv("/Users/danielrizk/Downloads/mtcars.csv", col_names = true, num_threads = 5, missingstring = ["4"])
-read_delim("/Users/danielrizk/Downloads/mtcars.tsv", delim = "\t")
-read_delim("/Users/danielrizk/Downloads/mtcars.csv", delim = ",")
-
-read_csv("/Users/danielrizk/Downloads/mtcars.tsv")
-df = DataFrame(integers=[1, 2, 3, 4], strings=["This", "Package makes", "File reading/writing", "even smoother"], floats=[10.2, 20.3, 30.4, 40.5], dates=[Date(2018,2,20), Date(2018,2,21), Date(2018,2,22), Date(2018,2,23)], times=[Dates.Time(19,10), Dates.Time(19,20), Dates.Time(19,30), Dates.Time(19,40)], datetimes=[Dates.DateTime(2018,5,20,19,10), Dates.DateTime(2018,5,20,19,20), Dates.DateTime(2018,5,20,19,30), Dates.DateTime(2018,5,20,19,40)])
-write_csv(df, "/Users/danielrizk/Downloads/testing.csv" , col_names= true, num_threads = 2)
-read_csv("/Users/danielrizk/Downloads/testing.csv", missingstring=["40.5", "10.2"])
-
-tsv_path = "/Users/danielrizk/Downloads/pythonsratch/UPDATED_NLP_COURSE/TextFiles/moviereviews.tsv"
-
-
-read_excel("https://freetestdata.com/wp-content/uploads/2021/09/Free_Test_Data_100KB_XLSX.xlsx")
-read_tsv("/Users/danielrizk/opt/anaconda3/pkgs/gensim-4.1.2-py39he9d5cce_0/lib/python3.9/site-packages/gensim/test/test_data/wordsim353.tsv")
-read_tsv(tsv_path,col_names = false)
-
-
-read_fwf("/Users/danielrizk/Downloads/fwftest.txt")
-read_table("/Users/danielrizk/Downloads/fwftest.txt", col_names= false)
-
-read_csv("https://github.com/tidyverse/readr/raw/main/inst/extdata/mtcars.csv", skip = 4, missingstring = ["1"])
-read_tsv("https://github.com/tidyverse/readr/raw/main/inst/extdata/mtcars.csv", skip = 4, missingstring = ["1"])
-read_delim("https://github.com/tidyverse/readr/raw/main/inst/extdata/mtcars.csv", skip = 4, missingstring = ["1"])
-
-write_table(df, "/Users/danielrizk/Downloads/fwftest2.txt")
-read_table( "/Users/danielrizk/Downloads/fwftest2.txt")
-
-
-read_sas("/Users/danielrizk/Downloads/naws_all.sas7bdat", skip = 10, n_max=44 )
-
-read_sav("/Users/danielrizk/Downloads/naws_all.sav", skip = 10, n_max=44)
-
-read_dta("/Users/danielrizk/Downloads/naws_all.dta", skip = 15, n_max=44, num_threads = 10)
-
-writestat("/Users/danielrizk/Downloads/test.dta", df)
-using ReadStatTables
-read_dta("https://www.dol.gov/sites/dolgov/files/ETA/naws/pdfs/NAWS_EPA.zip")
-
-readstat
-using HTTP
-col_names = ["Name", "Age", "ID", "Position", "Salary"]
-df2
-widths_colnames = fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"])
-read_fwf(path, fwf_empty(path, num_lines=4, col_names = ["Name", "Age", "ID", "Position", "Salary"]), skip_to=3, n_max=3)
-
-read_fwf("testing_files/fwftest.txt", fwf_empty("testing_files/fwftest.txt", num_lines= 4))
-fwf_empty("testing_files/fwftest.txt")
-df = DataFrames.DataFrame(AA=["sav", "por"], AB=[10.1, 10.2]);
-
-write_sav(df2 , "/Users/danielrizk/Downloads/test2.sav")
-write_sav(df2 , "/Users/danielrizk/Downloads/test2.por")
-
-
-read_dta( "/Users/danielrizk/Downloads/test2.dta")
\ No newline at end of file