diff --git a/.Rbuildignore b/.Rbuildignore index 1e67d4e3..87ca042c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,6 +15,7 @@ ^cran-comments\.md$ README.html$ +paper/ pkgdown/ utility/ lastMiKTeXException/ diff --git a/paper/images/Figure1.png b/paper/images/Figure1.png new file mode 100644 index 00000000..cee97ef3 Binary files /dev/null and b/paper/images/Figure1.png differ diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 00000000..2fcb258f --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,143 @@ +@article{Harris2019, + title = {The REDCap consortium: Building an international community of software platform partners}, + journal = {Journal of Biomedical Informatics}, + volume = {95}, + pages = {103208}, + year = {2019}, + issn = {1532-0464}, + doi = {https://doi.org/10.1016/j.jbi.2019.103208}, + url = {https://www.sciencedirect.com/science/article/pii/S1532046419301261}, + author = {Paul A. Harris and Robert Taylor and Brenda L. Minor and Veida Elliott and Michelle Fernandez and Lindsay O'Neal and Laura McLeod and Giovanni Delacqua and Francesco Delacqua and Jacqueline Kirby and Stephany N. Duda}, + keywords = {Medical informatics, Electronic data capture, Clinical research, Translational research}, + abstract = {The Research Electronic Data Capture (REDCap) data management platform was developed in 2004 to address an institutional need at Vanderbilt University, then shared with a limited number of adopting sites beginning in 2006. Given bi-directional benefit in early sharing experiments, we created a broader consortium sharing and support model for any academic, non-profit, or government partner wishing to adopt the software. Our sharing framework and consortium-based support model have evolved over time along with the size of the consortium (currently more than 3200 REDCap partners across 128 countries). While the “REDCap Consortium” model represents only one example of how to build and disseminate a software platform, lessons learned from our approach may assist other research institutions seeking to build and disseminate innovative technologies.} +} + +@article{Harris2009, + title = {Research electronic data capture (REDCap)—A metadata-driven methodology and workflow process for providing translational research informatics support}, + journal = {Journal of Biomedical Informatics}, + volume = {42}, + number = {2}, + pages = {377-381}, + year = {2009}, + issn = {1532-0464}, + doi = {https://doi.org/10.1016/j.jbi.2008.08.010}, + url = {https://www.sciencedirect.com/science/article/pii/S1532046408001226}, + author = {Paul A. Harris and Robert Taylor and Robert Thielke and Jonathon Payne and Nathaniel Gonzalez and Jose G. Conde}, + keywords = {Medical informatics, Electronic data capture, Clinical research, Translational research}, + abstract = {Research electronic data capture (REDCap) is a novel workflow methodology and software solution designed for rapid development and deployment of electronic data capture tools to support clinical and translational research. We present: (1) a brief description of the REDCap metadata-driven software toolset; (2) detail concerning the capture and use of study-related metadata from scientific research teams; (3) measures of impact for REDCap; (4) details concerning a consortium network of domestic and international institutions collaborating on the project; and (5) strengths and limitations of the REDCap system. REDCap is currently supporting 286 translational research projects in a growing collaborative network including 27 active partner institutions.} +} + +@article{Wickham2014, + title={Tidy Data}, + volume={59}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v059i10}, + doi={10.18637/jss.v059.i10}, + abstract={A huge amount of effort is spent cleaning data to get it ready for analysis, but there has been little research on how to make data cleaning as easy and effective as possible. This paper tackles a small, but important, component of data cleaning: data tidying. Tidy datasets are easy to manipulate, model and visualize, and have a specific structure: each variable is a column, each observation is a row, and each type of observational unit is a table. This framework makes it easy to tidy messy datasets because only a small set of tools are needed to deal with a wide range of un-tidy datasets. This structure also makes it easier to develop tidy tools for data analysis, tools that both input and output tidy datasets. The advantages of a consistent data structure and matching tools are demonstrated with a case study free from mundane data manipulation chores.}, + number={10}, + journal={Journal of Statistical Software}, + author={Wickham, Hadley}, + year={2014}, + pages={1–23} +} + +@Manual{r_cit, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2020}, + url = {https://www.R-project.org/}, +} + +@Manual{redcapr_cit, + title = {REDCapR: Interaction Between R and REDCap}, + author = {Will Beasley}, + year = {2023}, + url = {https://ouhscbbmc.github.io/REDCapR/}, + note = {https://ouhscbbmc.github.io/REDCapR/, https://github.com/OuhscBbmc/REDCapR, https://www.ouhsc.edu/bbmc/, https://project-redcap.org}, +} + +@Manual{redcapapi_cit, + title = {{redcapAPI}: Accessing data from REDCap projects using the API}, + author = {Benjamin Nutter and Stephen Lane}, + year = {2023}, + doi = {10.5281/zenodo.11826}, + note = {R package version 2.8.0}, + url = {https://github.com/nutterb/redcapAPI/wiki}, +} + +@Manual{redcapdm_cit, + title = {REDCapDM: 'REDCap' Data Management}, + author = {João Carmezim and Judith Peñafiel and Pau Satorra and Esther García and Natàlia Pallarés and Cristian Tebé}, + year = {2023}, + note = {R package version 0.8.0}, + url = {https://ubidi.github.io/REDCapDM/}, +} + +@Manual{tidyredcap_cit, + title = {tidyREDCap: Helper Functions for Working with 'REDCap' Data}, + author = {Raymond Balise and Gabriel Odom and Anna Calderon and Layla Bouzoubaa and Wayne DeFreitas and Kyle Grealis}, + year = {2023}, + note = {R package version 1.1.1}, + url = {https://raymondbalise.github.io/tidyREDCap/index.html}, +} + +@Manual{labelled_cit, + title = {labelled: Manipulating Labelled Data}, + author = {Joseph Larmarange}, + year = {2023}, + note = {R package version 2.12.0}, + url = {https://larmarange.github.io/labelled/}, +} + +@Manual{openxlsx2_cit, + title = {openxlsx2: Read, Write and Edit 'xlsx' Files}, + year = {2023}, + note = {R package version 1.1}, + url = {https://janmarvin.github.io/openxlsx2/}, +} + +@Manual{skimr_cit, + title = {skimr: Compact and Flexible Summaries of Data}, + author = {Elin Waring and Michael Quinn and Amelia McNamara and Eduardo {Arino de la Rubia} and Hao Zhu and Shannon Ellis}, + year = {2023}, + note = {R package version 2.1.5, https://github.com/ropensci/skimr/}, + url = {https://docs.ropensci.org/skimr/ (website)}, +} + +@Manual{tibble_cit, + title = {tibble: Simple Data Frames}, + author = {Kirill Müller and Hadley Wickham}, + year = {2023}, + note = {https://tibble.tidyverse.org/, https://github.com/tidyverse/tibble}, +} + +@Misc{openssf_cit, + title = {OpenOpen Source Security Foundation_2023}, + url={https://openssf.org/}, + journal={Open Source Security Foundation}, + publisher={The Linux Foundation}, + year={2023}, + month={Oct} +} + +@Manual{redcapr_write_cit, + title = {Writing to a REDCap Project}, + author = {Will Beasley and Raymond Balise}, + year = {2023}, + url = {https://ouhscbbmc.github.io/REDCapR/articles/workflow-write.html} +} + +@Manual{redcaptidier_pkgdown_cit, + title = {REDCapTidieR}, + author = {Richard Hanna and Ezra Porter and Stephan Kadauke}, + url = {https://chop-cgtinformatics.github.io/REDCapTidieR/index.html}, + year = {2023} +} + +@Manual{superheroes_cit, + title = {Superhero Database}, + url = {https://www.superherodb.com/}, + author = {Jeroen ter Lingen}, + year = {2023} +} diff --git a/paper/paper.html b/paper/paper.html new file mode 100644 index 00000000..ced32ac5 --- /dev/null +++ b/paper/paper.html @@ -0,0 +1,694 @@ + + + + +
+ + + + + + + + +Capturing and storing electronic data is integral in the research +world. REDCap (Harris et al. 2009, 2019) offers a secure web +application that lets users build databases and surveys with a robust +front-end interface that can support data of any type, including data +requiring compliance with standards for protected information.
+Many REDCap users use the R programming language (R Core Team 2020) to extract and analyze their
+data. The REDCapR
+(Beasley 2023) and redcapAPI
+(Nutter and Lane 2023) packages allow R
+users to extract data directly into their programming environment. While
+this works well for simple REDCap databases, it becomes cumbersome for
+complex databases, because the REDCap API outputs a “block matrix”–a
+single table with varied granularity levels, which conflicts with the
+“tidy data” framework (Wickham 2014) that
+advocates for standardized data organization.
To address this, we introduce REDCapTidieR
, an
+open-source package that streamlines data extraction and restructures it
+into an intuitive format compatible with the tidy data principles. This
+facilitates seamless data analysis in R, especially for complex
+longitudinal studies.
While there are several tools available for REDCap data management, +REDCapTidieR introduces a unique solution by transforming the +challenging block matrix into a standardized tidy data structure that we +term the “supertibble”. This approach not only aligns with good data +science practice but also caters to databases of any complexity. By +providing a suite of utility functions to work with the supertibble, +REDCapTidieR provides a complete framework for extracting REDCap data +designed with user-friendliness at its core.
+As of 2023, the REDCap Consortium boasts nearly 3 million users +across over 150 countries. REDCap databases range from single-instrument +projects to complex builds that use both repeating instruments and +repeating events. These data structures are needed to capture multiple +items related to a specific visit, such as concomitant medications, or +events that cannot be planned ahead of time, such as adverse events.
+REDCap databases that contain repeating events and instruments +require significant manual pre-processing, a major pain point for +researchers and analysts. This is because the REDCap API returns a +single table (Figure 1) that includes data from instruments that record +data at different levels of granularity.
+While there are a few existing REDCap tools (Table 1),
+REDCapTidieR
occupies a unique space by providing analysts
+with a framework returns a tidy data structure regardless of the size or
+complexity of the extracted database. Although some of these tools also
+offer functions for data processing, such as the tidyREDCap
+(Balise et al. 2023) and REDCapDM
+(Carmezim et al. 2023) packages, only
+REDCapTidieR
restructures the block matrix into an easy to
+use format.
REDCapTidieR
is built with production readiness in mind.
+It builds upon REDCapR
, which contains an excellent test
+suite, to make API calls, and includes an extensive automated test suite
+and ample documentation through a pkgdown
site(https://chop-cgtinformatics.github.io/REDCapTidieR/index.html)
+(Hanna, Porter, and Kadauke 2023). It
+meets the rigorous requirements of the OpenSSF Best
+Practices Badge (“OpenOpen Source
+Security Foundation_2023” 2023), which certifies
+open-source projects that adhere to criteria for delivering
+high-quality, robust, and secure software.
Package | +Exports from REDCap | +Imports into REDCap | +Tidy Reformatting | +Extensive Test Suite | +
---|---|---|---|---|
redcapAPI | +x | +x | ++ | x | +
REDCapR | +x | +x | ++ | x | +
tidyREDCap | +x | ++ | + | + |
REDCapDM | +x | ++ | + | + |
REDCapTidieR | +x | ++ | x | +x | +
Table 1: Comparative breakdown of the landscape for REDCap tools in +R.
+The REDCapTidieR::read_redcap()
function leverages
+REDCapR
to make API calls to query the data and metadata of
+a REDCap project and returns the supertibble (Figure 1). The
+supertibble, named after the tibble
package
+(Müller and Wickham 2023), is an
+alternative presentation of the data in which multiple tables are linked
+together in a single object in a fashion consistent with tidy data
+principles.
Figure 1: The REDCapTidieR supertibble shown in the Data Viewer of
+the RStudio IDE. The “Superhero database” (Lingen
+2023) contains two instruments, one nonrepeating and one
+repeating. A. The REDCap API outputs a “Block Matrix”. Note an abundance
+of NA
values, which do not represent missing values but
+rather fields that do not apply due to the data structure. B. The
+read_redcap()
function returns a “Supertibble”. Note that
+each row represents one instrument, identified by the
+redcap_form_name
column. The redcap_data
+column is a list column that links to tibbles containing the data from a
+specific instrument. The Data Viewer allows drilling down into
+individual tibbles by clicking on the table icon, allowing for rapid and
+intuitive data exploration without any preprocessing. Since each
+instrument has a consistent granularity, these tibbles can be tidy. Two
+data tibbles are shown, one from a nonrepeating and one from a repeating
+instrument. Note the differences in granularity between the
+instruments.
REDCapTidieR
provides utility functions to work with the
+supertibble, all designed to work with the R pipe operator
+|>
. The extract_tibble()
function takes a
+supertibble object and returns a specific data tibble. The
+make_labelled()
function leverages the
+labelled
package (Larmarange
+2023) to apply variable labels to the supertibble. The
+add_skimr_metadata()
function uses the skimr
+package (Waring et al. 2023) to add
+summary statistics. Using the write_redcap_xlsx()
function,
+which leverages the openxlsx2
(Openxlsx2: Read, Write and Edit ’Xlsx’ Files
+2023) package, users can easily export an the supertibble into a
+collaborator-friendly Excel document, in which each Excel sheet contains
+the data for an instrument.
REDCapTidieR
cannot be used to write data to a REDCap
+project. We refer the reader to an excellent guide of how to accomplish
+this using REDCapR
(Beasley and
+Balise 2023).
REDCapTidieR
is available on GitHub
+and CRAN
+and works on all major operating systems.
We would like to thank Will Beasley, Paul Wildenhain, and Jan Marvin +for their feedback and support in development.
+This package was developed by the Cell +and Gene Therapy Informatics Team of the Children’s Hospital of Philadelphia.
+The authors declare no financial conflicts of interest.
+