diff --git a/DESCRIPTION b/DESCRIPTION index b08cceb..8f3070d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rio Type: Package Title: A Swiss-Army Knife for Data I/O -Version: 1.0.0 +Version: 1.0.1 Authors@R: c(person("Jason", "Becker", role = "aut", email = "jason@jbecker.co"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), @@ -49,7 +49,6 @@ Imports: curl (>= 0.6), data.table (>= 1.11.2), readxl (>= 0.1.1), - arrow (>= 0.17.0), tibble, stringi, writexl, @@ -72,7 +71,8 @@ Suggests: rmatio, xml2 (>= 1.2.0), yaml, - qs + qs, + arrow (>= 0.17.0) License: GPL-2 VignetteBuilder: knitr Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 495ab42..1ee8937 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# rio 1.0.1 + +* POTENTIALLY BREAKING: Due to compiling time concerns, roll back the decision to move `arrow` to `Imports`. It is now `Suggests`. `setclass = "arrow"` works if `arrow` is installed. + # rio 1.0.0 * Stop loading the entire namespace of a suggested package when it is available #296 diff --git a/R/import.R b/R/import.R index 61cd644..d8cb780 100644 --- a/R/import.R +++ b/R/import.R @@ -6,7 +6,7 @@ #' @param setclass An optional character vector specifying one or more classes #' to set on the import. By default, the return object is always a #' \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or -#' \dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table) or \dQuote{data.table} (if using +#' \dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table; the suggested package `arrow` must be installed) or \dQuote{data.table} (if using #' data.table). Other values are ignored, such that a data.frame is returned. #' The parameter takes precedents over parameters in \dots which set a different class. #' @param which This argument is used to control import from multi-object files; as a rule `import` only ever returns a single data frame (use [import_list()] to import multiple data frames from a multi-object file). If `file` is a compressed directory, `which` can be either a character string specifying a filename or an integer specifying which file (in locale sort order) to extract from the compressed directory. For Excel spreadsheets, this can be used to specify a sheet name or number. For .Rdata files, this can be an object name. For HTML files, it identifies which table to extract (from document order). Ignored otherwise. A character string value will be used as a regular expression, such that the extracted file is the first match of the regular expression against the file names in the archive. @@ -102,6 +102,9 @@ #' @seealso [import_list()], [characterize()], [gather_attrs()], [export()], [convert()] #' @export import <- function(file, format, setclass = getOption("rio.import.class", "data.frame"), which, ...) { + if (setclass %in% c("arrow", "arrow_table")) { + .check_pkg_availability("arrow") + } .check_file(file, single_only = TRUE) if (R.utils::isUrl(file)) { file <- remote_to_local(file, format = format) diff --git a/R/import_methods.R b/R/import_methods.R index ea39038..4331ff0 100644 --- a/R/import_methods.R +++ b/R/import_methods.R @@ -152,6 +152,7 @@ import_delim <- function(file, which = 1, sep = "auto", header = "auto", strings #' @export .import.rio_feather <- function(file, which = 1, ...) { + .check_pkg_availability("arrow") .docall(arrow::read_feather, ..., args = list(file = file)) } @@ -386,6 +387,7 @@ extract_html_row <- function(x, empty_value) { #' @export .import.rio_parquet <- function(file, which = 1, ...) { + .check_pkg_availability("arrow") .docall(arrow::read_parquet, ..., args = list(file = file, as_data_frame = TRUE)) } diff --git a/R/set_class.R b/R/set_class.R index 0eaaf0b..2f64ce5 100644 --- a/R/set_class.R +++ b/R/set_class.R @@ -12,6 +12,8 @@ set_class <- function(x, class = NULL) { } if (any(c("arrow", "arrow_table") %in% class)) { + ## because setclass can be used without import, must check again + .check_pkg_availability("arrow") return(.ensure_arrow(x)) } return(.ensure_data_frame(x)) diff --git a/R/sysdata.rda b/R/sysdata.rda index d150011..4cd6504 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/README.md b/README.md index 933a4fb..0262a75 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,6 @@ The full list of supported formats is below: | Archive files (handled by tar) | bzip2 / xz / tar | utils | utils | Default | | | Gzip files | gz / gzip | base | base | Default | | | Zip files | zip | utils | utils | Default | | -| Apache Arrow (Parquet) | parquet | arrow | arrow | Default | | | CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | | | Comma-separated data | csv | data.table | data.table | Default | | | Comma-separated data (European) | csv2 | data.table | data.table | Default | | @@ -143,7 +142,6 @@ The full list of supported formats is below: | Epiinfo | epiinfo / rec | foreign | | Default | | | Excel | excel / xlsx | readxl | writexl | Default | | | Excel (Legacy) | xls | readxl | | Default | | -| Feather R/Python interchange format | feather | arrow | arrow | Default | | | Fixed-width format data | fwf | utils | utils | Default | | | Fortran data | fortran | utils | | Default | No recognized extension | | Google Sheets | googlesheets | data.table | | Default | As comma-separated data | @@ -163,9 +161,11 @@ The full list of supported formats is below: | Text Representations of R Objects | dump | base | base | Default | | | Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | | | XBASE database files | dbf | foreign | foreign | Default | | +| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | | | Clipboard | clipboard | clipr | clipr | Suggest | default is tsv | | EViews | eviews / wf1 | hexView | | Suggest | | | Fast Storage | fst | fst | fst | Suggest | | +| Feather R/Python interchange format | feather | arrow | arrow | Suggest | | | Graphpad Prism | pzfx | pzfx | pzfx | Suggest | | | HTML Tables | htm / html | xml2 | xml2 | Suggest | | | JSON | json | jsonlite | jsonlite | Suggest | | diff --git a/data-raw/single.json b/data-raw/single.json index 3d292d8..874edf8 100644 --- a/data-raw/single.json +++ b/data-raw/single.json @@ -2,7 +2,7 @@ { "input": "parquet", "format": "parquet", - "type": "import", + "type": "suggest", "format_name": "Apache Arrow (Parquet)", "import_function": "arrow::read_parquet", "export_function": "arrow::write_parquet", @@ -209,7 +209,7 @@ { "input": "feather", "format": "feather", - "type": "import", + "type": "suggest", "format_name": "Feather R/Python interchange format", "import_function": "arrow::read_feather", "export_function": "arrow::write_feather", diff --git a/man/import.Rd b/man/import.Rd index 6f004f4..21e80bc 100644 --- a/man/import.Rd +++ b/man/import.Rd @@ -20,7 +20,7 @@ import( \item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or -\dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table) or \dQuote{data.table} (if using +\dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table; the suggested package \code{arrow} must be installed) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned. The parameter takes precedents over parameters in \dots which set a different class.} diff --git a/man/import_list.Rd b/man/import_list.Rd index 7e115e3..e3081b0 100644 --- a/man/import_list.Rd +++ b/man/import_list.Rd @@ -20,7 +20,7 @@ import_list( \item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or -\dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table) or \dQuote{data.table} (if using +\dQuote{tibble} (if using tibble), \dQuote{arrow}, \dQuote{arrow_table} (if using arrow table; the suggested package \code{arrow} must be installed) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned. The parameter takes precedents over parameters in \dots which set a different class.} diff --git a/tests/testthat/test_format_feather.R b/tests/testthat/test_format_feather.R index cd895da..d7b772c 100644 --- a/tests/testthat/test_format_feather.R +++ b/tests/testthat/test_format_feather.R @@ -1,11 +1,14 @@ context("feather imports/exports") +skip_if_not_installed("arrow") require("datasets") test_that("Export to feather", { + skip_if_not_installed("arrow") expect_true(export(iris, "iris.feather") %in% dir()) }) test_that("Import from feather", { + skip_if_not_installed("arrow") expect_true(is.data.frame(import("iris.feather"))) }) diff --git a/tests/testthat/test_format_parquet.R b/tests/testthat/test_format_parquet.R index c0bf107..334b3b9 100644 --- a/tests/testthat/test_format_parquet.R +++ b/tests/testthat/test_format_parquet.R @@ -2,6 +2,7 @@ context("Parquet imports/exports") require("datasets") test_that("Export to and import from parquet", { + skip_if_not_installed("arrow") expect_true(export(iris, "iris.parquet") %in% dir()) expect_true(is.data.frame(import("iris.parquet"))) unlink("iris.parquet") diff --git a/tests/testthat/test_identical.R b/tests/testthat/test_identical.R index 6e5bb3c..c554155 100644 --- a/tests/testthat/test_identical.R +++ b/tests/testthat/test_identical.R @@ -19,6 +19,7 @@ unlink("mtcars.rds") unlink("mtcars.R") unlink("mtcars.RData") test_that("Data identical (R formats), feather", { + skip_if_not_installed("arrow") expect_equivalent(import(export(mtcars, "mtcars.feather")), mtcars) unlink("mtcars.feather") }) diff --git a/tests/testthat/test_import_list.R b/tests/testthat/test_import_list.R index f8878e9..2f25010 100644 --- a/tests/testthat/test_import_list.R +++ b/tests/testthat/test_import_list.R @@ -129,6 +129,8 @@ test_that("Universal dummy `which` #326", { test_that("Universal dummy `which` (Suggests) #326", { skip_if_not_installed("qs") + skip_if_not_installed("arrow") + skip_if_not_installed("readODS") formats <- c("qs", "parquet", "ods") for (format in formats) { tempzip <- tempfile(fileext = paste0(".", format, ".zip")) diff --git a/tests/testthat/test_set_class.R b/tests/testthat/test_set_class.R index 48f7239..b8eb568 100644 --- a/tests/testthat/test_set_class.R +++ b/tests/testthat/test_set_class.R @@ -27,6 +27,7 @@ test_that("Set object class as data.table", { test_that("Set object class as arrow table", { skip_if(getRversion() <= "4.2") + skip_if_not_installed("arrow") mtcars_arrow <- arrow::arrow_table(mtcars) expect_false(inherits(set_class(mtcars_arrow), "data.frame")) ## arrow table is not data.frame expect_true(inherits(set_class(mtcars, class = "arrow"), "ArrowTabular")) @@ -39,8 +40,19 @@ test_that("Set object class as arrow table", { test_that("ArrowTabular can be exported", { skip_if(getRversion() <= "4.2") + skip_if_not_installed("arrow") mtcars_arrow <- arrow::arrow_table(mtcars) expect_error(export(mtcars_arrow, "mtcars.csv"), NA) ## no concept of rownames expect_true(inherits(import("mtcars.csv"), "data.frame")) unlink("mtcars.csv") }) + +test_that("Simulate arrow is not installed, #376", { + ## although this is pretty meaningless + with_mocked_bindings({ + export(mtcars, "mtcars.csv") + expect_error(import("mtcars.csv", setclass = "arrow"), "Suggested package") + }, .check_pkg_availability = function(pkg, lib.loc = NULL) { + stop("Suggested package `", pkg, "` is not available. Please install it individually or use `install_formats()`", call. = FALSE) + }) +})