From fb3bc514542240ca9427a328b9223d47728d42a2 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Mon, 15 Jul 2024 11:23:43 +0200 Subject: [PATCH] Add back default support for parquet ref #315 --- DESCRIPTION | 3 ++- NEWS.md | 2 ++ R/export.R | 2 +- R/export_methods.R | 2 +- R/import.R | 2 +- R/import_methods.R | 4 ++-- R/sysdata.rda | Bin 2349 -> 2356 bytes README.md | 2 +- data-raw/single.json | 6 +++--- man/export.Rd | 2 +- man/import.Rd | 2 +- man/rio.Rd | 1 + 12 files changed, 16 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3a5a1d6..0f45569 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,7 +53,8 @@ Imports: writexl, lifecycle, R.utils, - readr + readr, + nanoparquet Suggests: datasets, bit64, diff --git a/NEWS.md b/NEWS.md index 90d04a1..92aee45 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # rio 1.1.1.999 (development) * Fix lintr issues #434 (h/t @bisaloo Hugo Gruson) +* Drop support for R < 4.0.0 see #436 +* Add support for parquet in the import tier using `nanoparquet` see rio 1.0.1 below. Bug fixes diff --git a/R/export.R b/R/export.R index 5eea904..54765ae 100644 --- a/R/export.R +++ b/R/export.R @@ -32,7 +32,7 @@ #' \item Weka Attribute-Relation File Format (.arff), using [foreign::write.arff()] #' \item Fixed-width format data (.fwf), using [utils::write.table()] with `row.names = FALSE`, `quote = FALSE`, and `col.names = FALSE` #' \item [CSVY](https://github.com/csvy) (CSV with a YAML metadata header) using [data.table::fwrite()]. -#' \item Apache Arrow Parquet (.parquet), using [arrow::write_parquet()] +#' \item Apache Arrow Parquet (.parquet), using [nanoparquet::write_parquet()] #' \item Feather R/Python interchange format (.feather), using [arrow::write_feather()] #' \item Fast storage (.fst), using [fst::write.fst()] #' \item JSON (.json), using [jsonlite::toJSON()]. In this case, `x` can be a variety of R objects, based on class mapping conventions in this paper: [https://arxiv.org/abs/1403.2805](https://arxiv.org/abs/1403.2805). diff --git a/R/export_methods.R b/R/export_methods.R index 1d07a10..cebde95 100644 --- a/R/export_methods.R +++ b/R/export_methods.R @@ -282,7 +282,7 @@ export_delim <- function(file, x, fwrite = lifecycle::deprecated(), sep = "\t", #' @export .export.rio_parquet <- function(file, x, ...) { - .docall(arrow::write_parquet, ..., args = list(x = x, sink = file)) + .docall(nanoparquet::write_parquet, ..., args = list(x = x, file = file)) } #' @export diff --git a/R/import.R b/R/import.R index 81277fc..9465498 100644 --- a/R/import.R +++ b/R/import.R @@ -42,7 +42,7 @@ #' \item Fortran data (no recognized extension), using [utils::read.fortran()] #' \item Fixed-width format data (.fwf), using a faster version of [utils::read.fwf()] that requires a `widths` argument and by default in rio has `stringsAsFactors = FALSE` #' \item [CSVY](https://github.com/csvy) (CSV with a YAML metadata header) using [data.table::fread()]. -#' \item Apache Arrow Parquet (.parquet), using [arrow::read_parquet()] +#' \item Apache Arrow Parquet (.parquet), using [nanoparquet::read_parquet()] #' \item Feather R/Python interchange format (.feather), using [arrow::read_feather()] #' \item Fast storage (.fst), using [fst::read.fst()] #' \item JSON (.json), using [jsonlite::fromJSON()] diff --git a/R/import_methods.R b/R/import_methods.R index b4add3a..6bac032 100644 --- a/R/import_methods.R +++ b/R/import_methods.R @@ -413,8 +413,8 @@ extract_html_row <- function(x, empty_value) { #' @export .import.rio_parquet <- function(file, which = 1, ...) { - .check_pkg_availability("arrow") - .docall(arrow::read_parquet, ..., args = list(file = file, as_data_frame = TRUE)) + #.check_pkg_availability("arrow") + .docall(nanoparquet::read_parquet, ..., args = list(file = file, options = nanoparquet::parquet_options(class = "data.frame"))) } #' @export diff --git a/R/sysdata.rda b/R/sysdata.rda index 3007435d7b6d08ac9e9aa576621fea19273aab5a..dbdb0b4c41db70b2c64563a9c2e31fdecf892e24 100644 GIT binary patch literal 2356 zcmV-43Cs3ET4*^jL0KkKSp{5W_5cgjf589$|M%^0u$Vvh{qVp4|MWlr00CeMKYMY+ zxIl0-Ccr3-fvso^g&0XBn3^V@rbd&(dPk87>7g-{V47$#GHHo~Xn9XW&<#}fqct%y zO;1xAMt}eS00000&>8^KLxV<*13{oNVgn;XAOJMeAOHb^00>l2)f#@NL()8^nq&c{ znqgeeO6fXNU7MQtP?(^M#>B7YDAJ@bhLl@^-dsefUl4dO|VkPe6fQ%u zRRr%9W(#~GlgZ+vpnx0V;hha+Avc~E^wZ4q3>t2m4X$QCcG~9?%t;nd;lv^sh;YTu z;iACLT<1=jbX!qHEKF(rEV!yDxUX!|R~zz5MySHwXC6FsbCNaFU3#0~dktxRS_g*A+5AWdxs8p^S;CMJx+ zP!%CUhr|+*9atL#g2Iu7b;;jOjFDZm*<}Q;hRO(g>d}oel^iEFgX3Zei6rj$xbL7^!Gp(zSdCV`@WC``a4 zAwx*HmM{T#Gzc3=7vWiM&YSSv2>azxPyNnMos?@o3u9{y_|(^qb`!~2_EZ;F6{9Rc zgg_s3)zwZ{a_r`E7i()pej=~E#yG4s1|0F36oy(irSyqWt7Xow#>oWYu5Bwh&_pw>3)SNcu5H@e@g(?J$yqo)ciXpem`CwQEwNYwVv?ZvT&REoFt1iNVhG#idzDmpK zDcdfRFQTT5s)!(@EDMm>rmD8`cc~RpUwK{gMaAj}E%qdY6|N|Qzl)FNi(eNwvB7}h zES;TOyvI?k-BTZ&U}||*2jLeXc;F)`oY6*Y1}rWdEZ*o0styU;YuGYwf0tfQ3D8Hx zDmj)y*KhHj5p-WepuC>}+& z$jIx>KE@&oGG#M|Clj6yf~iINs@X$M$Vg4zSFP%(uCa6#5dQ{|?wb&z5= zgNS_pPVvU63fUk^k)=hpiqQK1x&0~iuPBQo446D_k_t)*B#Xp)gkrO3U#ut(23_0n z?)s57y3!3_MdrgSMPf0GP*Ci$nItIk$VY_V6Ua@;p`nj5qwLgQN{=N&cNfb0il8C8 zf#!jr{Wz4u*oM{SqZaI}l*v~{TLr5l6@+O8>9YM3eJ#h|gB}_WM z@vM%oRoQ(QbceuZ4n$)?T0Of|$!0s$grZSQLkTp35FxM#m1k zib}3S`33?xD=|l6u65?fG@##FYlfn#7an$Yzw7S4M<$dfqJd15QxcO-B8qYMB9!w~ ziAtg@APN$YC}JTZ0)~1F@FZ9y1&mxQA8>d~Uy>>>xb_f<2#1GcOhxGK!5?kFA8+Dm z4#KN(NGv_@mct`rCD2q?w;<76G|A_fO!O9|BRXx2ZLwx%aDwPHwB>>ajio4x6qdX~ zc;nD=G$yT}W(_N6m_aBwluI_K}8xhK-?K2 z^p}`eH4+hLNG3!9QvL${2hjgJ3D84d_X&XDH(}AoEHv~SyJccg);*}aL2FAs7Z<1b=#xp`Sg?Z{L;3mUc;~^vF@gtdTW?-nKZKYk82h`T} zoa%kYCZ%dgrAD5AKEzTkZw7H>CWWOxvl8GL%9ODK+MS#_}DV=^XDl&-(C(vP#)qCNb z6R4P&dtE-q)xlk*J=PR~%x6TioCu>i)z13C;ndq+NKvOxVOh$w;3e)hFl|Ty=wZ(7 zQ;oyLI45Yle^8BE1l9<|Hl{iHb8r#lBx)&J4lqdh(fdZaOFprb)_k6*F3aA;A(K`i zTjWZ0dFm9N&6S5Sbx|+u>ToVm){?6mFMAD`FyVq!~Xb7dvUM|~lr6pV>gR%1xe{(qyt;YlLYFN(s#S~VL^ zMq-0=DMA)BISQXy^4gGFlh`BGpq5;|bX|3}ob0b5cZVdvV?fH8xgJU2k067yhk=X)5HxQ~ z71`62$aAu@0wP%HTjpr2JnRNwDQGBjj4{IwDw@fqMWMPgnA=ifw9wgr+ilR-8g8p$ zoEf-jwAj-G8PmRoBy#$zJlr%1hE!-qy*WiuLw4}#%8{&SL6{S-X1EIvhJzGk)nYhT zJQs-WF-cw(qj1oB!q=zWMt%XL$whmu8!mb?2V#_GyQwRL9^JRru13Z3beuIIgZN`l zzG|0Afy2@DCs=v-BD9d=P7LbAlGX$b} aw%pAvhz>StX72Y<{}*yaI8cxUTxIrwVLcWA literal 2349 zcmV+|3DWjLT4*^jL0KkKS}f)_^f1=B?nZYLcOz)(+NnV^hQS0Bn_1GjDyrYKs`ec(?OunXwyJ>Pf!5WPs%e? z@I=(nh-3f&000008UsKYXf#b4Dd`hK)b$33r~m)}4FCWQ00w{wH6#@_r=&47WCKQk z0MGyc0iXZ?0wqBd*rUolr9DTf>S)l-LFEkqG|!4`pl2!(F5XF-Ah5FsOeTHuFXlcDMN?*A(L4UgnvD@3RIk}&8MgV}5*K=Ugw z4h1*3no%r3gz(_;F60rv(3@&ow0rINFGw?+SbJP!|B#oSO z36iCgrYcAqY#9i}x{d;q;)oF><6}2^&!rSz#*FjymsMcD59jbWqF~H8GBZoTs6gEzN6zgL9`f74Z!BFJS+7y!E(1P!DM@vOn8ejByIKM7P*`DG{0$}}I22-L%6^)=Bu3F55#Y71+M z(T*UxAP(uPYMia))49l9ZLHPv6@Cscyy9N??REnuB!aOoXrWFJhf0U4eR|Q~qdVOBRQ|Ma9 zEbW$uOV?~%E}FO7lSEJ{stycH6P$puGBjMmWZDSgSg|qh%P37)U{+2MNUpxC)9oFj zHpHEa2&M^k;gJ!l(GTH@Toc|Ic&eFc80!>JsEiW4bQ^v_Flj+VoEkgc)3`4>4|Os| z_lFO+LEL+acJgCzLV|J+=y4X&jG3g@-3N-kg)E)c(k+5HNuEMB;*|j*8caG$^(mZ* z2y_o_xj7FbW@_o|g5KRi+P7$^sPp`#MY6F*F)OocDyWP`)6#)vNkSbsGDE_#UZh|| zOo6aGo@|E2SPOFkOa@ExWEKGXe)I09(!3%pkTPKJwn!#INWvrP5qim)dV+xM!PoQ} z&tfLmN+yl&H4m4juT0R<-$z(Y)#Fc8osS7d%K!Si(wg(z!mkN-e=)?=kB6S+h zX)3tSXj~(Xvh;N-=Uz-hN)469Zgfmr}`lh5f1LCn2adwxgFMkcYcO& z>MFr{L1EgY z)y)Xz=+(?;A3NM#1fb<8qHV|6#h8qCABdHX0kF4Z7n%-nPmM!jM#usWC1I2~ds zT1!a0onjHUq&g8uW%2nB9tsgg=to)%KKMiilGP2ijDc^E($^J)QN0eDxN0kylUG4u z#c4Y{U0v=(%_3OBOa{iqY;2NVM8-2RGzEETE7~T*L*_46Zg*35xXNZU%gEe~T` z7E@klGU9G47P~dJaiyW+bD^sW7>$hdZwsJ9qq58IWgdkh6rr~RNrmXrST^cNM6^#5 z#B5Yn&j=>SA+fr`4QMP7(@6NM=b;vjDv`Kg$Oxz!uEoQcN_U$|D!|5vo4Tol@E+z% zu~5&t154a`H7%k1O_nUCvM9=L-1q~#du?n`(Pfs*t{cMUkbxsgsmYCcmBj*>+SNy`S4_oLP|z_a;`rnBezV7DKIphHhlQuCH6>#bCj zv~#i<;8vd}Z(q>8WOAd66K1b;f>L1|vy|q75@|W&fQ{5|faxj`X8Oqp=tB)1sgzj0 z4F(}2e@naCqM=-S;~k>wNutnL4JGxrZ+cino1Og~RZ{{Nn!>_axQ#~}3)0rYkV3wk zTiGr(A(xv61P;p$RVohvD^LY%@}9&GLqkI&AtUhwm@Gn~T`xp(al@|7qLzfpTbl;e z#S548iP^$ag9gf7w*~{9mEtY!%cZz^JEUCD9dko7Nj2ySZh|JO8R&kc#y&Q<3E(*mjdH&kJyhe zyYZhh7~2yV(H=0A1V|~5D^V{5AaHQGGt@02Vo)zCO1uoCaTAdP9H5YA3!qw)Fq{k1 z2pWvW_PXpUsZ<)|2BY{98qv$-BwSV)Pa&CMPcGKCVr1S-*|iENM9q^j&q|k5ae?1l T