From 5f5382b7501d031f88ea58d170e2c7e670eaba62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:43:07 -0500 Subject: [PATCH 01/23] Move NEWS.md into docs to render at the site --- NEWS.md => docs/src/news.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename NEWS.md => docs/src/news.md (100%) diff --git a/NEWS.md b/docs/src/news.md similarity index 100% rename from NEWS.md rename to docs/src/news.md From c503f22c1474f05de0f39f8ce582a0efdce14230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:43:35 -0500 Subject: [PATCH 02/23] Start ignoring DocumenterVitepress local files --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 8955f27..688158c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,9 @@ docs/docs docs/site docs/build docs/var +docs/node_modules/ +docs/Manifest.toml +docs/package-lock.json + deps/build.jl Manifest.toml \ No newline at end of file From 96f179a97bedc888cd4f1bed57fde1ad6d54ebfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:44:57 -0500 Subject: [PATCH 03/23] Remove MkDocs files --- docs/_overrides/partials/source.html | 30 ----- docs/mkdocs.yml | 118 -------------------- docs/src/javascripts/mathjax.js | 16 --- docs/src/stylesheets/custom.css | 157 --------------------------- 4 files changed, 321 deletions(-) delete mode 100644 docs/_overrides/partials/source.html delete mode 100644 docs/mkdocs.yml delete mode 100644 docs/src/javascripts/mathjax.js delete mode 100644 docs/src/stylesheets/custom.css diff --git a/docs/_overrides/partials/source.html b/docs/_overrides/partials/source.html deleted file mode 100644 index 4130444..0000000 --- a/docs/_overrides/partials/source.html +++ /dev/null @@ -1,30 +0,0 @@ -{% import "partials/language.html" as lang with context %} - -
- {% set icon = config.theme.icon.repo or "fontawesome/brands/git-alt" %} - {% include ".icons/" ~ icon ~ ".svg" %} -
-
- {{ config.repo_name }} -
-
-{% if config.theme.twitter_url %} - -
- {% include ".icons/fontawesome/brands/twitter.svg" %} -
-
- {{ config.theme.twitter_name }} -
-
-{% endif %} -{% if config.theme.sponsor_url %} - -
- {% include ".icons/fontawesome/regular/heart.svg" %} -
-
- {{ config.theme.sponsor_name }} -
-
-{% endif %} \ No newline at end of file diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml deleted file mode 100644 index 339bd76..0000000 --- a/docs/mkdocs.yml +++ /dev/null @@ -1,118 +0,0 @@ -theme: - name: material - logo: assets/Tidier_jl_logo.png - features: - - content.code.copy - # - announce.dismiss - - content.code.annotate - # - content.tabs.link - #- content.tooltips - # - header.autohide - # - navigation.expand - #- navigation.indexes - # - navigation.instant - # - navigation.prune - #- navigation.sections - #- navigation.tabs - # - navigation.tabs.sticky - - navigation.top - - navigation.footer - #- navigation.tracking - - search.highlight - - search.share - - search.suggest - - toc.follow - #- toc.integrate # Table of contents is integrated on the left; does not appear separately on the right. - - header.autohide # header disappears as you scroll - palette: - - # Light mode - - media: "(prefers-color-scheme: light)" - scheme: default - primary: white - accent: light blue - toggle: - icon: material/weather-sunny - name: Switch to dark mode - - # Dark mode - - media: "(prefers-color-scheme: dark)" - scheme: slate - primary: black - accent: light blue - toggle: - icon: material/weather-night - name: Switch to light mode - font: - text: Lato - icon: - repo: fontawesome/brands/github # GitHub logo in top right - #logo: "material/gridoff" # Equinox logo in top left - # favicon: "_static/icon_transparent.svg" - custom_dir: "_overrides" # Overriding part of the HTML - - # These additions are my own custom ones, having overridden a partial. - #twitter_name: "" - #twitter_url: "" -site_name: Tidier.jl -site_description: Tidier.jl -site_author: Lazaro Alonso and Karandeep Singh -site_url: "" - -repo_url: https://github.com/TidierOrg/Tidier.jl -repo_name: Tidier.jl -edit_uri: "" # No edit button, as some of our pages are in /docs and some in /examples via symlink, so it's impossible for them all to be accurate - -strict: true # Don't allow warnings during the build process -extra_javascript: - # The below three make MathJax work, see https://squidfunk.github.io/mkdocs-material/reference/mathjax/ - - _static/mathjax.js - - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js - -extra: - social: - - icon: fontawesome/brands/github - link: https://github.com/kdpsingh - - icon: fontawesome/brands/twitter - link: https://twitter.com/kdpsinghlab - -extra_css: - - stylesheets/custom.css - - assets/Documenter.css - -extra_javascript: - - javascripts/mathjax.js - - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js - -markdown_extensions: - - tables - - admonition - - toc: - permalink: "¤" # Adds a clickable permalink to each section heading - toc_depth: 4 - - pymdownx.arithmatex: # Render LaTeX via MathJax - generic: true - - pymdownx.details # Allowing hidden expandable regions denoted by ??? - - pymdownx.highlight - - pymdownx.inlinehilite - - pymdownx.snippets - - pymdownx.superfences # Seems to enable syntax highlighting when used with the Material theme. - - pymdownx.tasklist: - custom_checkbox: true - - def_list - - pymdownx.tabbed: - alternate_style: true - - attr_list - - md_in_html -plugins: - - search # default search plugin; needs manually re-enabling when using any other plugins - - autorefs # Cross-links to headings - - include_exclude_files: - exclude: - - "_overrides" -# - mknotebooks # Jupyter notebooks - - mkdocs-video -nav: - - "Home": "index.md" \ No newline at end of file diff --git a/docs/src/javascripts/mathjax.js b/docs/src/javascripts/mathjax.js deleted file mode 100644 index a80ddbf..0000000 --- a/docs/src/javascripts/mathjax.js +++ /dev/null @@ -1,16 +0,0 @@ -window.MathJax = { - tex: { - inlineMath: [["\\(", "\\)"]], - displayMath: [["\\[", "\\]"]], - processEscapes: true, - processEnvironments: true - }, - options: { - ignoreHtmlClass: ".*|", - processHtmlClass: "arithmatex" - } -}; - -document$.subscribe(() => { - MathJax.typesetPromise() -}) \ No newline at end of file diff --git a/docs/src/stylesheets/custom.css b/docs/src/stylesheets/custom.css deleted file mode 100644 index 84d99c0..0000000 --- a/docs/src/stylesheets/custom.css +++ /dev/null @@ -1,157 +0,0 @@ -/* Fix /page#foo going to the top of the viewport and being hidden by the navbar */ -html { - scroll-padding-top: 50px; - } - - /* Fit the Twitter handle alongside the GitHub one in the top right. */ - - div.md-header__source { - width: revert; - max-width: revert; - } - - a.md-source { - display: inline-block; - } - - .md-source__repository { - max-width: 100%; - } - - /* Emphasise sections of nav on left hand side */ - - nav.md-nav { - padding-left: 5px; - } - - nav.md-nav--secondary { - border-left: revert !important; - } - - .md-nav__title { - font-size: 0.9rem; - } - - .md-nav__item--section > .md-nav__link { - font-size: 0.9rem; - } - - /* Indent autogenerated documentation */ - - div.doc-contents { - padding-left: 25px; - border-left: 4px solid rgba(230, 230, 230); - } - - /* Increase visibility of splitters "---" */ - - [data-md-color-scheme="default"] .md-typeset hr { - border-bottom-color: rgb(0, 0, 0); - border-bottom-width: 1pt; - } - - [data-md-color-scheme="slate"] .md-typeset hr { - border-bottom-color: rgb(230, 230, 230); - } - - /* More space at the bottom of the page */ - - .md-main__inner { - margin-bottom: 1.5rem; - } - - /* Remove prev/next footer buttons */ - - .md-footer__inner { - display: none; - } - - /* Bugfix: remove the superfluous parts generated when doing: - - ??? Blah - - ::: library.something - */ - - .md-typeset details .mkdocstrings > h4 { - display: none; - } - - .md-typeset details .mkdocstrings > h5 { - display: none; - } - - /* Change default colours for tags */ - - [data-md-color-scheme="default"] { - --md-typeset-a-color: rgb(0, 150, 255) !important; - } - [data-md-color-scheme="slate"] { - --md-typeset-a-color: rgb(0, 150, 255) !important; - } - - /* Highlight functions, classes etc. type signatures. Really helps to make clear where - one item ends and another begins. */ - - [data-md-color-scheme="default"] { - --doc-heading-color: #DDD; - --doc-heading-border-color: #CCC; - --doc-heading-color-alt: #F0F0F0; - } - [data-md-color-scheme="slate"] { - --doc-heading-color: rgb(25,25,33); - --doc-heading-border-color: rgb(25,25,33); - --doc-heading-color-alt: rgb(33,33,44); - --md-code-bg-color: rgb(38,38,50); - } - - h4.doc-heading { - /* NOT var(--md-code-bg-color) as that's not visually distinct from other code blocks.*/ - background-color: var(--doc-heading-color); - border: solid var(--doc-heading-border-color); - border-width: 1.5pt; - border-radius: 2pt; - padding: 0pt 5pt 2pt 5pt; - } - h5.doc-heading, h6.heading { - background-color: var(--doc-heading-color-alt); - border-radius: 2pt; - padding: 0pt 5pt 2pt 5pt; - } - - /* From Pretty Pandas Dataframes */ -/* Supports mkdocs-material color variables */ -.data-frame { - border: 0; - font-size: smaller; -} -.data-frame tr { - border: none; - background: var(--md-code-bg-color, #ffffff); -} -.data-frame tr:nth-child(even) { - background: var(--md-default-bg-color, #f5f5f5); -} -.data-frame tr:hover { - background-color: var(--md-footer-bg-color--dark, #e1f5fe); -} - -.data-frame thead th { - background: var(--md-default-bg-color, #ffffff); - border-bottom: 1px solid #aaa; - font-weight: bold; -} -.data-frame th { - border: none; - padding-left: 10px; - padding-right: 10px; -} - -.data-frame td{ - /* background: #fff; */ - border: none; - text-align: right; - min-width:5em; - padding-left: 10px; - padding-right: 10px; -} \ No newline at end of file From 384a0077f4b7459618746279cc07fcd461dea6c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:45:31 -0500 Subject: [PATCH 04/23] Splitting index into more concise docs --- docs/src/faq.md | 39 +++++++ docs/src/installation.md | 47 +++++++++ docs/src/reference.md | 5 - docs/src/simple-analysis.md | 204 ++++++++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 docs/src/faq.md create mode 100644 docs/src/installation.md create mode 100644 docs/src/simple-analysis.md diff --git a/docs/src/faq.md b/docs/src/faq.md new file mode 100644 index 0000000..1a205fd --- /dev/null +++ b/docs/src/faq.md @@ -0,0 +1,39 @@ +## Frequently asked questions + +### I'm a Julia user. Why should I use Tidier.jl rather than other data analysis packages? + +While Julia has a number of great data analysis packages, the most mature and idiomatic Julia package for data analysis is DataFrames.jl. Most other data analysis packages in Julia build on top of DataFrames.jl, and Tidier.jl is no exception. + +DataFrames.jl emphasizes idiomatic Julia code without macros. While it is elegant, it can be verbose because of the need to write out anonymous functions. DataFrames.jl also emphasizes correctness, which means that errors are favored over warnings. For example, grouping by one variable and then subsequently grouping the already-grouped data frame by another variable results in an error in DataFrames.jl. These restrictions, while justified in some instances, can make interactive data analysis feel clunky and slow. + +A number of macro-based data analysis packages have emerged as extensions of DataFrames.jl to make data analysis syntax less verbose, including DataFramesMeta.jl, Query.jl, and DataFrameMacros.jl. All of these packages have their strengths, and each of these served as an inspiration towards the creation of Tidier.jl. + +What sets Tidier.jl apart is that it borrows the design of the tried-and-widely-adopted tidyverse and brings it to Julia. Our goal is to make data analysis code as easy and readable as possible. In our view, the reason you should use Tidier.jl is because of the richness, consistency, and thoroughness of the design made possible by bringing together two powerful tools: DataFrames.jl and the tidyverse. In Tidier.jl, nearly every possible transformation on data frames (e.g., aggregating, pivoting, nesting, and joining) can be accomplished using a consistent syntax. While you always have the option to intermix Tidier.jl code with DataFrames.jl code, Tidier.jl strives for completeness -- there should never be a requirement to fall back to DataFrames.jl for any kind of data analysis task. + +Tidier.jl also focuses on conciseness. This shows up most readily in two ways: the use of bare column names, and an approach to auto-vectorizing code. + +1. **Bare column names:** If you are referring to a column named `a`, you can simply refer to it as `a` in Tidier.jl. You are essentially referring to `a` as if it was within an anonymous function, where the variable `a` was mapped to the column `a` in the data frame. If you want to refer to an object `a` that is defined outside of the data frame, then you can write `!!a`, which we refer to as "bang-bang interpolation." This syntax is motivated by the tidyverse, where [the `!!` operator was selected because it is the least-bad "polite fiction" way of representing lazy interpolation](https://adv-r.hadley.nz/quasiquotation.html#the-polite-fiction-of). + +2. **Auto-vectorized code:** Most data transformation functions and operators are intended to be used on scalars. However, transformations are usually performed on columns of data (represented as 1-dimensional arrays, or vectors), which means that most functions need to be vectorized, which can get unwieldy and verbose. However, there are functions which operate directly on vectors and thus should not be vectorized when applied to columns (e.g., `mean()` and `median()`). Tidier.jl uses a customizable look-up table to know which functions to vectorize and which ones not to vectorize. This means that you can largely leave code as un-vectorized (i.e., `mean(a + 1)` rather than `mean(a .+ 1)`), and Tidier.jl will correctly infer convert the first code into the second before running it. There are several ways to manually override the defaults. + +Lastly, the reason you should consider using Tidier.jl is that it brings a consistent syntax not only to data manipulation but also to plotting (by wrapping Makie.jl and AlgebraOfGraphics.jl) and to the handling of categorical variables, strings, and dates. Wherever possible, Tidier.jl uses existing classes rather than defining new ones. As a result, using Tidier.jl should never preclude you from using other Base Julia functions with which you may already be familiar. + +### I'm an R user and I'm perfectly happy with the tidyverse. Why should I consider using Tidier.jl? + +If you're happy with the R tidyverse, then there's no imminent reason to switch to using Tidier.jl. While DataFrames.jl (the package on which TidierData.jl depends) [is faster than R's dplyr and tidyr on benchmarks](https://duckdblabs.github.io/db-benchmark/), there are other faster backends in R that allow for the use of tidyverse syntax with better speed (e.g., dtplyr, tidytable, tidypolars). + +The primary reason to consider using Tidier.jl is the value proposition of using Julia itself. Julia has many similarities to R (e.g., interactive coding in a console, functional style, multiple dispatch, dynamic data types), but unlike R, Julia is automatically compiled (to LLVM) before it runs. This means that certain compiler optimations, which are normally only possible in more verbose languages like C/C++ become available to Julia. There are a number of situations in R where the end-user is able to write fast R code as a direct result of C++ being used on the backend (e.g., through the use of the Rcpp package). This is why R is sometimes referred to as a glue language -- because it provides a very nice way of glueing together faster C++ code. + +The main value proposition of Julia is that you can use it as *both* a glue language *and* as a backend language. Tidier.jl embraces the glue language aspect of Julia while relying on packages like DataFrames.jl and Makie.jl on the backend. + +While Julia has very mature backends, we hope that Tidier.jl demonstrates the value of, and need for, more glue-oriented data analysis packages in Julia. + +### Why does Tidier.jl re-export so many packages? + +Tidier comes with batteries included. If you are using Tidier, you generally won't have to load in other packages for basic data analysis. Tidier is meant for interactive use. You can start your code with `using Tidier` and expect to have what you need at your fingertips. + +If you are a package developer, then you definitely should consider depending on one of the smaller packages that make up Tidier.jl rather than Tidier itself. For example, if you want to use the categorical variable functions from Tidier, then you should use rely on only TidierCats.jl as a dependency. + +### Should I update Tidier.jl or the underlying packages (e.g., TidierPlots.jl) individually? + +Either approach is okay. For most users, we recommend updating Tidier.jl directly, as this will update the underlying packages up to their latest minor versions (but not necessarily up to their latest patch release). However, if you need access to the latest functionality in the underlying packages, you should feel free to update them directly. We will keep Tidier.jl future-proof to underlying package updates, so this shouldn't cause any problems with Tidier.jl. \ No newline at end of file diff --git a/docs/src/installation.md b/docs/src/installation.md new file mode 100644 index 0000000..97c9fbe --- /dev/null +++ b/docs/src/installation.md @@ -0,0 +1,47 @@ +# Installation + +## Installing Tidier.jl + +There are 2 ways to install Tidier.jl: using the package console, or using Julia code when you're using the Julia console. You might also see the console referred to as the "REPL," which stands for Read-Evaluate-Print Loop. The REPL is where you can interactively run code and view the output. + +Julia's REPL is particularly cool because it provides a built-in package REPL and shell REPL, which allow you to take actions on managing packages (in the case of the package REPL) or run shell commands (in the shell REPL) without ever leaving the Julia REPL. + +To install the stable version of Tidier.jl, you can type the following into the Julia REPL: + +```julia +]add Tidier +``` + +The `]` character starts the Julia [package manager](https://docs.julialang.org/en/v1/stdlib/Pkg/). The `add Tidier` command tells the package manager to install the Tidier package from the Julia registry. You can exit the package REPL by pressing the backspace key to return to the Julia prompt. + +If you already have the Tidier package installed, the `add Tidier` command *will not* update the package. Instead, you can update the package using the the `update Tidier` (or `up Tidier` for short) commnds. As with the `add Tidier` command, make sure you are in the package REPL before you run these package manager commands. + +If you need to (or prefer to) install packages using Julia code, you can achieve the same outcome using the following code to install Tidier: + +```julia +import Pkg +Pkg.add("Tidier") +``` + +You can update Tidier.jl using the `Pkg.update()` function, as follows: + +```julia +import Pkg; Pkg.update("Tidier") +``` + +Note that while Julia allows you to separate statements by using multiple lines of code, you can also use a semi-colon (`;`) to separate multiple statements. This is convenient for short snippets of code. There's another practical reason to use semi-colons in coding, which is to silence the output of a function call. We will come back to this in the "Getting Started" section below. + +In general, installing the latest version of the package from the Julia registry should be sufficient because we follow a continuous-release cycle. After every update to the code, we update the version based on the magnitude of the change and then release the latest version to the registry. That's why it's so important to know how to update the package! + +However, if for some reason you do want to install the package directly from GitHub, you can get the newest version using either the package REPL... + +```julia +]add Tidier#main +``` + +...or using Julia code. + +```julia +import Pkg; Pkg.add(url="https://github.com/TidierOrg/Tidier.jl") + +``` diff --git a/docs/src/reference.md b/docs/src/reference.md index d3a22b8..6fdf17f 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -3,11 +3,6 @@ DocTestSetup= quote using Tidier end ``` -## Index -```@index -Modules = [Tidier] -Private = false -``` ## Reference - Exported functions ```@autodocs diff --git a/docs/src/simple-analysis.md b/docs/src/simple-analysis.md new file mode 100644 index 0000000..f424e29 --- /dev/null +++ b/docs/src/simple-analysis.md @@ -0,0 +1,204 @@ +# A Simple Data Analysis + +## Loading Tidier.jl + +Once you've installed Tidier.jl, you can load it by typing: + +```julia +using Tidier +``` + +When you type this command, multiple things happen behind the scenes. First, the following packages are loaded and re-exported, which is to say that all of the exported macros and functions from these packages become available, TidierData, TidierPlots, TidierCats, TidierDates, TidierStrings, TidierText, TidierVest + +Don't worry if you don't know what each of these packages does yet. We will cover them in package-specific documentation pages, which can be accessed below. For now, all you need to know is that these smaller packages are actually the ones doing all the work when you use Tidier. + +There are also a few other packages whose exported functions also become available. We will discuss these in the individual package documentation, but the most important ones for you to know about are: + +- The `DataFrame()` function from the DataFrames package is re-exported so that you can create a data frame without loading the DataFrames package. +- The `@chain()` macro from the Chain package is re-exported, so you chain together functions and macros +- The entire Statistics package is re-exported so you can access summary statistics like `mean()` and `median()` +- The CategoricalArrays package is re-exported so you can access the `categorical()` function to define categorical variables +- The Dates package is re-exported to enable support for variables containing dates + +## What can Tidier.jl do? + +Before we dive into an introduction of Julia and a look into how Tidier.jl works, it's useful to show you what Tidier.jl can do. First, we will read in some data, and then we will use Tidier.jl to chain together some data analysis operations. + +### First, let's read in the "Visits to Physician Office" dataset. + +This dataset comes with the Ecdat R package and and is titled OFP. [You can read more about the dataset here](https://rdrr.io/cran/Ecdat/man/OFP.html). To read in datasets packaged with commonly used R packages, we can use the RDatasets Julia package. + +```julia +julia> using Tidier, RDatasets +julia> ofp = dataset("Ecdat", "OFP") + +4406×19 DataFrame + Row │ OFP OFNP OPP OPNP EMR Hosp NumChro ⋯ + │ Int32 Int32 Int32 Int32 Int32 Int32 Int32 ⋯ +──────┼──────────────────────────────────────────────────── + 1 │ 5 0 0 0 0 1 ⋯ + 2 │ 1 0 2 0 2 0 + 3 │ 13 0 0 0 3 3 + 4 │ 16 0 5 0 1 1 + 5 │ 3 0 0 0 0 0 ⋯ + 6 │ 17 0 0 0 0 0 + 7 │ 9 0 0 0 0 0 + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ + 4401 │ 12 4 1 0 0 0 + 4402 │ 11 0 0 0 0 0 ⋯ + 4403 │ 12 0 0 0 0 0 + 4404 │ 10 0 20 0 1 1 + 4405 │ 16 1 0 0 0 0 + 4406 │ 0 0 0 0 0 0 ⋯ + 13 columns and 4393 rows omitted +``` + +Note that a preview of the data frame is automatically printed to the console. The reason this happens is that when you run this code line by line, the output of each line is printed to the console. This is convenient because it saves you from having to directly print the newly created `ofp` to the console in order to get a preview for what it contains. If this code were bundled in a code chunk (such as in a Jupyter notebook), then only the final line of the code chunk would be printed. + +The exact number of rows and columns that print will depend on the physical size of the REPL window. If you resize the console (e.g., in VS Code), Julia will adjust the number of rows/columns accordingly. + +If you want to suppress the output, you can add a `;` at the end of this statement, like this: + +```julia +julia> ofp = dataset("Ecdat", "OFP"); # Nothing prints +``` + +### With the OFP dataset loaded, let's ask some basic questions. + +#### What does the dataset consist of? + +We can use `@glimpse()` to find out the columns, data types, and peek at the first few values contained within the dataset. + +```julia +julia> @glimpse(ofp) + +Rows: 4406 +Columns: 19 +.OFP Int32 5, 1, 13, 16, 3, 17, 9, 3, 1, 0, 0, 44, 2, 1, 19, +.OFNP Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, +.OPP Int32 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, +.OPNP Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, +.EMR Int32 0, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +.Hosp Int32 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, +.NumChron Int32 2, 2, 4, 2, 2, 5, 0, 0, 0, 0, 1, 5, 1, 1, 1, 0, 1, +.AdlDiff Int32 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, +.Age Float64 6.9, 7.4, 6.6, 7.6, 7.9, 6.6, 7.5, 8.7, 7.3, 7.8, +.Black CategoricalValue{String, UInt8}yes, no, yes, no, no, no, no, no, +.Sex CategoricalValue{String, UInt8}male, female, female, male, female +.Married CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, no, no +.School Int32 6, 10, 10, 3, 6, 7, 8, 8, 8, 8, 8, 15, 8, 8, 12, 8 +.FamInc Float64 2.881, 2.7478, 0.6532, 0.6588, 0.6588, 0.3301, 0.8 +.Employed CategoricalValue{String, UInt8}yes, no, no, no, no, no, no, no, n +.Privins CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, yes, y +.Medicaid CategoricalValue{String, UInt8}no, no, yes, no, no, yes, no, no, +.Region CategoricalValue{String, UInt8}other, other, other, other, other, +.Hlth CategoricalValue{String, UInt8}other, other, poor, poor, other, p +``` + +If you're wondering why we need to place a `@` at the beginning of the word so that it reads `@glimpse()` rather than `glimpse()`, that's because including a `@` at the beginning denotes that this is a special type of function known as a macro. Macros have special capabilities in Julia, and many Tidier.jl functions that operate on data frames are implemented as macros. In this specific instance, we could have implemented `@glimpse()` without making use of any of the macro capabilities. However, for the sake of consistency, we have kept `@glimpse()` as a macro so that you can remember a basic rule of thumb: if Tidier.jl operates on a dataframe, then we will use macros rather than functions. The TidierPlots.jl package is a slight exception to this rule in that it is nearly entirely implemented as functions (rather than macros), and this will be described more in the TidierPlots documentation. + +#### Can we clean up the names of the columns? + +To avoid having to keep track of capitalization, data analysts often prefer column names to be in snake_case rather than TitleCase. Let's quickly apply this transformation to the `ofp` dataset. + +```julia +julia> ofp = @clean_names(ofp) +julia> @glimpse(ofp) + +Rows: 4406 +Columns: 19 +.ofp Int32 5, 1, 13, 16, 3, 17, 9, 3, 1, 0, 0, 44, 2, 1, 19, +.ofnp Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, +.opp Int32 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, +.opnp Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, +.emr Int32 0, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +.hosp Int32 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, +.num_chron Int32 2, 2, 4, 2, 2, 5, 0, 0, 0, 0, 1, 5, 1, 1, 1, 0, 1, +.adl_diff Int32 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, +.age Float64 6.9, 7.4, 6.6, 7.6, 7.9, 6.6, 7.5, 8.7, 7.3, 7.8, +.black CategoricalValue{String, UInt8}yes, no, yes, no, no, no, no, no, +.sex CategoricalValue{String, UInt8}male, female, female, male, female +.married CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, no, no +.school Int32 6, 10, 10, 3, 6, 7, 8, 8, 8, 8, 8, 15, 8, 8, 12, 8 +.fam_inc Float64 2.881, 2.7478, 0.6532, 0.6588, 0.6588, 0.3301, 0.8 +.employed CategoricalValue{String, UInt8}yes, no, no, no, no, no, no, no, n +.privins CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, yes, y +.medicaid CategoricalValue{String, UInt8}no, no, yes, no, no, yes, no, no, +.region CategoricalValue{String, UInt8}other, other, other, other, other, +.hlth CategoricalValue{String, UInt8}other, other, poor, poor, other, p +``` + +Now that our column names are cleaned up, we can ask some basic analysis questions. + +#### What is the mean age for people in each of the regions + +Because age is measured in decades according to the [dataset documentation](https://rdrr.io/cran/Ecdat/man/OFP.html)), we will multiply everyone's age by 10 before we calculate the median. + +```julia +julia> @chain ofp @group_by(region) @summarize(mean_age = mean(age * 10)) + +4×2 DataFrame + Row │ region mean_age + │ Cat… Float64 +─────┼─────────────────── + 1 │ other 73.987 + 2 │ midwest 74.0769 + 3 │ noreast 73.9343 + 4 │ west 74.1165 +``` + +Overall, the mean age looks pretty similar across regions. The fact that we were able to calculate each region's age also reveals that there are no missing values. Any region containing a missing value would have returned `missing` instead of a number. + +The `@chain` macro, which is defined in the Chain package and re-exported by Tidier, allows us to pipe together multiple operations sequentially from left to right. In the example above, the `ofp` dataset is being piped into the first argument of the `@group_by()` macro, the result of which is then being piped into the `@summarize()` macro, which is then automatically removing the grouping and returning the result. + +For grouped data frames, `@summarize()` behaves differently than other Tidier.jl macros: `@summarize()` removes one layer of grouping. Because the data frame was only grouped by one column, the result is no longer grouped. Had we grouped by multiple columns, the result would still be grouped by all but the last column. The other Tidier.jl macros keep the data grouped. Grouped data frames can be ungrouped using `@ungroup()`. If you apply a new `@group_by()` macro to an already-grouped data frame, then the newly specified groups override the old ones. + +When we use the `@chain` macro, we are taking advantage of the fact that Julia macros can either be called using parentheses syntax, where each argument is separated by a comma, or they can be called with a spaced syntax where no parentheses are used. In the case of Tidier.jl macros, we always use the parentheses syntax, which makes is very easy to use the spaced syntax when working with `@chain`. + +An alternate way of calling `@chain` using the parentheses syntax is as follows. From a purely stylistic perspective, I don't recommend this because it adds a number of extra characters. However, if you're new to Julia, it's worth knowing about this form so that you realize that there is no magic involved when working with macros. + +```julia +julia> @chain(ofp, @group_by(region), @summarize(mean_age = mean(age * 10))) + +4×2 DataFrame + Row │ region mean_age + │ Cat… Float64 +─────┼─────────────────── + 1 │ other 73.987 + 2 │ midwest 74.0769 + 3 │ noreast 73.9343 + 4 │ west 74.1165 +``` + +On the other hand, either of these single-line expressions can get quite hard-to-read as more and more expressions are chained together. To make this easier to handle, `@chain` supports multi-line expressions using `begin-end` blocks like this: + +```julia +julia> @chain ofp begin + @group_by(region) + @summarize(mean_age = mean(age * 10)) + end + +4×2 DataFrame + Row │ region mean_age + │ Cat… Float64 +─────┼─────────────────── + 1 │ other 73.987 + 2 │ midwest 74.0769 + 3 │ noreast 73.9343 + 4 │ west 74.1165 +``` + +This format is convenient for interactive data analysis because you can easily comment out individual operations and view the result. For example, if we wanted to know the mean age for the overall dataset, we could simply comment out the `@group_by()` operation. + +```julia +julia> @chain ofp begin + # @group_by(region) + @summarize(mean_age = mean(age * 10)) + end + +1×1 DataFrame + Row │ mean_age + │ Float64 +─────┼────────── + 1 │ 74.0241 +``` \ No newline at end of file From 8df08ed6a99b455b0a07fe63cd2cf9f778cc21ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:46:25 -0500 Subject: [PATCH 05/23] Install DocumenterVitepress and add style.css, packages.json and config.mts --- docs/Project.toml | 2 +- docs/package.json | 18 +++ docs/src/.vitepress/config.mts | 48 ++++++++ docs/src/.vitepress/theme/index.ts | 19 ++++ docs/src/.vitepress/theme/style.css | 171 ++++++++++++++++++++++++++++ 5 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 docs/package.json create mode 100644 docs/src/.vitepress/config.mts create mode 100644 docs/src/.vitepress/theme/index.ts create mode 100644 docs/src/.vitepress/theme/style.css diff --git a/docs/Project.toml b/docs/Project.toml index dedad76..ee15a84 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,7 +3,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" +DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Tidier = "f0413319-3358-4bb0-8e7c-0c83523a93bd" diff --git a/docs/package.json b/docs/package.json new file mode 100644 index 0000000..05c934b --- /dev/null +++ b/docs/package.json @@ -0,0 +1,18 @@ +{ + "scripts": { + "docs:dev": "vitepress dev build/.documenter", + "docs:build": "vitepress build build/.documenter", + "docs:preview": "vitepress preview build/.documenter" + }, + "dependencies": { + "@shikijs/transformers": "^1.1.7", + "markdown-it": "^14.0.0", + "markdown-it-footnote": "^4.0.0", + "markdown-it-mathjax3": "^4.3.2", + "vitepress-plugin-tabs": "^0.5.0", + "vitest": "^1.3.0" + }, + "devDependencies": { + "vitepress": "^1.0.2" + } +} diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts new file mode 100644 index 0000000..11aedb5 --- /dev/null +++ b/docs/src/.vitepress/config.mts @@ -0,0 +1,48 @@ +import { defineConfig } from 'vitepress' +import { tabsMarkdownPlugin } from 'vitepress-plugin-tabs' +import mathjax3 from "markdown-it-mathjax3"; +import footnote from "markdown-it-footnote"; + +// https://vitepress.dev/reference/site-config +export default defineConfig({ + base: 'REPLACE_ME_DOCUMENTER_VITEPRESS',// TODO: replace this in makedocs! + title: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + description: "A VitePress Site", + lastUpdated: true, + cleanUrls: true, + outDir: 'REPLACE_ME_DOCUMENTER_VITEPRESS', // This is required for MarkdownVitepress to work correctly... + head: [['link', { rel: 'icon', href: 'REPLACE_ME_DOCUMENTER_VITEPRESS_FAVICON' }]], + ignoreDeadLinks: true, + + markdown: { + math: true, + config(md) { + md.use(tabsMarkdownPlugin), + md.use(mathjax3), + md.use(footnote) + }, + theme: { + light: "github-light", + dark: "github-dark"} + }, + themeConfig: { + outline: 'deep', + logo: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + search: { + provider: 'local', + options: { + detailedView: true + } + }, + nav: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + sidebar: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + editLink: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + socialLinks: [ + { icon: 'github', link: 'REPLACE_ME_DOCUMENTER_VITEPRESS' } + ], + footer: { + message: 'Made with DocumenterVitepress.jl
', + copyright: `© Copyright ${new Date().getUTCFullYear()}.` + } + } +}) diff --git a/docs/src/.vitepress/theme/index.ts b/docs/src/.vitepress/theme/index.ts new file mode 100644 index 0000000..463b5d8 --- /dev/null +++ b/docs/src/.vitepress/theme/index.ts @@ -0,0 +1,19 @@ +// .vitepress/theme/index.ts +import { h } from 'vue' +import type { Theme } from 'vitepress' +import DefaultTheme from 'vitepress/theme' + +import { enhanceAppWithTabs } from 'vitepress-plugin-tabs/client' +import './style.css' + +export default { + extends: DefaultTheme, + Layout() { + return h(DefaultTheme.Layout, null, { + // https://vitepress.dev/guide/extending-default-theme#layout-slots + }) + }, + enhanceApp({ app, router, siteData }) { + enhanceAppWithTabs(app) + } +} satisfies Theme \ No newline at end of file diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css new file mode 100644 index 0000000..0750b4c --- /dev/null +++ b/docs/src/.vitepress/theme/style.css @@ -0,0 +1,171 @@ +@import url(https://fonts.googleapis.com/css?family=Space+Mono:regular,italic,700,700italic); +@import url(https://fonts.googleapis.com/css?family=Space+Grotesk:regular,italic,700,700italic); + +/* Customize default theme styling by overriding CSS variables: +https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css + */ + + /* Layouts */ + +/* + :root { + --vp-layout-max-width: 1440px; +} */ + +.VPHero .clip { + white-space: pre; + max-width: 500px; +} + +/* Fonts */ + +:root { + /* Typography */ + --vp-font-family-base: "Barlow", "Inter var experimental", "Inter var", + -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, + Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif; + + /* Code Snippet font */ + --vp-font-family-mono: "Fira Code", Menlo, Monaco, Consolas, "Courier New", + monospace; +} + +/* Colors */ +/* more colors ansi colors = [209, 32, 81, 204, 249, 166, 37] */ +/* more colors colors = ["#ff875f", "#0087d7", "#5fd7ff", "#ff5f87", "#b2b2b2", "#d75f00", "#00afaf"] */ +:root { + --julia-blue: #5fd7ff ; + --julia-purple: #5fd7ff; + --julia-red: #CB3C33; + --julia-green: #0087d7; + + --vp-c-brand: #0087d7; + --vp-c-brand-light: #0087d7; + --vp-c-brand-lighter: #5fd7ff ; + --vp-c-brand-lightest: #5fd7ff ; + --vp-c-brand-dark: #5fd7ff; + --vp-c-brand-darker: #5fd7ff ; + --vp-c-brand-dimm: #212425; +} + + /* Component: Button */ + +:root { + --vp-button-brand-border: var(--vp-c-brand-light); + --vp-button-brand-text: var(--vp-c-white); + --vp-button-brand-bg: var(--vp-c-brand); + --vp-button-brand-hover-border: var(--vp-c-brand-light); + --vp-button-brand-hover-text: var(--vp-c-white); + --vp-button-brand-hover-bg: var(--vp-c-brand-light); + --vp-button-brand-active-border: var(--vp-c-brand-light); + --vp-button-brand-active-text: var(--vp-c-white); + --vp-button-brand-active-bg: var(--vp-button-brand-bg); +} + +/* Component: Home */ + +:root { + --vp-home-hero-name-color: transparent; + --vp-home-hero-name-background: -webkit-linear-gradient( + 120deg, + #ff875f 30%, + #0087d7 + ); + + --vp-home-hero-image-background-image: linear-gradient( + -45deg, + #0087d7 35%, + #0087d7 35%, + #ff875f + ); + --vp-home-hero-image-filter: blur(40px); +} + +@media (min-width: 640px) { + :root { + --vp-home-hero-image-filter: blur(56px); + } +} + +@media (min-width: 960px) { + :root { + --vp-home-hero-image-filter: blur(72px); + } +} + +/* Component: Custom Block */ + +:root.dark { + --vp-custom-block-tip-border: var(--vp-c-brand); + --vp-custom-block-tip-text: var(--vp-c-brand-lightest); + --vp-custom-block-tip-bg: var(--vp-c-brand-dimm); + + /* // Tweak the color palette for blacks and dark grays */ + --vp-c-black: hsl(220 20% 9%); + --vp-c-black-pure: hsl(220, 24%, 4%); + --vp-c-black-soft: hsl(220 16% 13%); + --vp-c-black-mute: hsl(220 14% 17%); + --vp-c-gray: hsl(220 8% 56%); + --vp-c-gray-dark-1: hsl(220 10% 39%); + --vp-c-gray-dark-2: hsl(220 12% 28%); + --vp-c-gray-dark-3: hsl(220 12% 23%); + --vp-c-gray-dark-4: hsl(220 14% 17%); + --vp-c-gray-dark-5: hsl(220 16% 13%); + + /* // Backgrounds */ + --vp-c-bg: hsl(240, 2%, 11%); + --vp-custom-block-info-bg: hsl(220 14% 17%); + --vp-c-gutter: hsl(220 20% 9%); + + --vp-c-bg-alt: hsl(220 20% 9%); + --vp-c-bg-soft: hsl(220 14% 17%); + --vp-c-bg-mute: hsl(220 12% 23%); + +} + + /* Component: Algolia */ + +.DocSearch { + --docsearch-primary-color: var(--vp-c-brand) !important; +} + +/* Component: MathJax */ + +mjx-container > svg { + display: block; + margin: auto; +} + +mjx-container { + padding: 0.5rem 0; +} + +mjx-container { + display: inline-block; + margin: auto 2px -2px; +} + +mjx-container > svg { + margin: auto; + display: inline-block; +} + +/** + * Colors links + * -------------------------------------------------------------------------- */ + + :root { + --vp-c-brand-1: #ff875f; + --vp-c-brand-2: #ff875f; + --vp-c-brand-3: #ff875f; + --vp-c-sponsor: #ca2971; + --vitest-c-sponsor-hover: #c13071; +} + +.dark { + --vp-c-brand-1: #0087d7; + --vp-c-brand-2: #0087d7; + --vp-c-brand-3: #0087d7; + --vp-c-sponsor: #ee4e95; + --vitest-c-sponsor-hover: #e51370; +} \ No newline at end of file From 8034510652bdf3b48e3ca3068be0bdef55cecb1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:47:15 -0500 Subject: [PATCH 06/23] Update make.jl for DocumenterVitepress.jl and comment old mkdocs code --- docs/make.jl | 84 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 36c5de4..25d1b51 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,34 +1,70 @@ -using Documenter, DocumenterMarkdown +using Documenter, DocumenterVitepress using Tidier, DataFrames, RDatasets DocTestMeta = quote using Tidier, DataFrames, Chain, Statistics end + DocMeta.setdocmeta!(Tidier, :DocTestSetup, DocTestMeta; - recursive=true) -makedocs( - modules=[Tidier], - clean=true, - doctest=true, - #format = Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), - sitename="Tidier.jl", - authors="Karandeep Singh et al.", - strict=[ - :doctest, - :linkcheck, - :parse_error, - :example_block, - # Other available options are - # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, - # :footnote, :meta_block, :missing_docs, :setup_block - ], checkdocs=:all, format=Markdown(), draft=false, - build=joinpath(@__DIR__, "docs") + recursive=true ) -deploydocs(; repo="https://github.com/TidierOrg/Tidier.jl", push_preview=true, - deps=Deps.pip("mkdocs", "pygments", "python-markdown-math", "mkdocs-material", - "pymdown-extensions", "mkdocstrings", "mknotebooks", - "pytkdocs_tweaks", "mkdocs_include_exclude_files", "jinja2", "mkdocs-video"), - make=() -> run(`mkdocs build`), target="site", devbranch="main") +pgs = [ + "Home" => "index.md", + "Get Started" => ["Installation" => "installation.md", "A Simple Data Analysis" => "simple-analysis.md"], + "API Reference" => "reference.md", + "Changelog" => "news.md", + "FAQ" => "faq.md", + # "Contributing" => "contributing.md", +] + +fmt = DocumenterVitepress.MarkdownVitepress( + repo = "https://github.com/camilogarciabotero/Tidier.jl", + devurl = "dev", + # deploy_url = "yourgithubusername.github.io/Tidier.jl.jl", +) + +makedocs(; + modules = [Tidier], + authors = "Karandeep Singh et al.", + repo = "https://github.com/camilogarciabotero/Tidier.jl", + sitename = "Tidier.jl", + format = fmt, + pages= pgs, + warnonly = true, +) + +deploydocs(; + repo = "https://github.com/camilogarciabotero/Tidier.jl", + push_preview = true, +) + +# makedocs( +# modules=[Tidier], +# clean=true, +# doctest=true, +# #format = Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), +# sitename="Tidier.jl", +# authors="Karandeep Singh et al.", +# strict=[ +# :doctest, +# :linkcheck, +# :parse_error, +# :example_block, +# # Other available options are +# # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, +# # :footnote, :meta_block, :missing_docs, :setup_block +# ], +# checkdocs=:all, +# format=Markdown(), +# draft=false, +# build=joinpath(@__DIR__, "docs") +# ) + +# deploydocs(; repo="https://github.com/TidierOrg/Tidier.jl", push_preview=true, +# deps=Deps.pip("mkdocs", "pygments", "python-markdown-math", "mkdocs-material", +# "pymdown-extensions", "mkdocstrings", "mknotebooks", +# "pytkdocs_tweaks", "mkdocs_include_exclude_files", "jinja2", "mkdocs-video"), +# make=() -> run(`mkdocs build`), target="site", devbranch="main") From bb29c34e178c2d552a83ce012b5d25d8f645bce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:47:32 -0500 Subject: [PATCH 07/23] Add index file with all Tidier data using home hero template --- docs/src/index.md | 441 +++++++--------------------------------------- 1 file changed, 59 insertions(+), 382 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 36b1936..8c2a6f9 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,382 +1,59 @@ - - -## Tidier.jl - -Tidier.jl is a data analysis package inspired by R's tidyverse and crafted specifically for Julia. Tidier.jl is a meta-package in that its functionality comes from a series of smaller packages. Installing and using Tidier.jl brings the combined functionality of each of these packages to your fingertips. - -[[GitHub]](https://github.com/TidierOrg/Tidier.jl) | [[Documentation]](https://tidierorg.github.io/Tidier.jl/dev/) - -## Installing Tidier.jl - -There are 2 ways to install Tidier.jl: using the package console, or using Julia code when you're using the Julia console. You might also see the console referred to as the "REPL," which stands for Read-Evaluate-Print Loop. The REPL is where you can interactively run code and view the output. - -Julia's REPL is particularly cool because it provides a built-in package REPL and shell REPL, which allow you to take actions on managing packages (in the case of the package REPL) or run shell commands (in the shell REPL) without ever leaving the Julia REPL. - -To install the stable version of Tidier.jl, you can type the following into the Julia REPL: - -``` -]add Tidier -``` - -The `]` character starts the Julia [package manager](https://docs.julialang.org/en/v1/stdlib/Pkg/). The `add Tidier` command tells the package manager to install the Tidier package from the Julia registry. You can exit the package REPL by pressing the backspace key to return to the Julia prompt. - -If you already have the Tidier package installed, the `add Tidier` command *will not* update the package. Instead, you can update the package using the the `update Tidier` (or `up Tidier` for short) commnds. As with the `add Tidier` command, make sure you are in the package REPL before you run these package manager commands. - -If you need to (or prefer to) install packages using Julia code, you can achieve the same outcome using the following code to install Tidier: - -```julia -import Pkg -Pkg.add("Tidier") -``` - -You can update Tidier.jl using the `Pkg.update()` function, as follows: - -```julia -import Pkg; Pkg.update("Tidier") -``` - -Note that while Julia allows you to separate statements by using multiple lines of code, you can also use a semi-colon (`;`) to separate multiple statements. This is convenient for short snippets of code. There's another practical reason to use semi-colons in coding, which is to silence the output of a function call. We will come back to this in the "Getting Started" section below. - -In general, installing the latest version of the package from the Julia registry should be sufficient because we follow a continuous-release cycle. After every update to the code, we update the version based on the magnitude of the change and then release the latest version to the registry. That's why it's so important to know how to update the package! - -However, if for some reason you do want to install the package directly from GitHub, you can get the newest version using either the package REPL... - -``` -]add Tidier#main -``` - -...or using Julia code. - -```julia -import Pkg; Pkg.add(url="https://github.com/TidierOrg/Tidier.jl") -``` - -## Loading Tidier.jl - -Once you've installed Tidier.jl, you can load it by typing: - -```julia -using Tidier -``` - -When you type this command, multiple things happen behind the scenes. First, the following packages are loaded and re-exported, which is to say that all of the exported macros and functions from these packages become available: - -- TidierData -- TidierPlots -- TidierCats -- TidierDates -- TidierStrings -- TidierText -- TidierVest - -Don't worry if you don't know what each of these packages does yet. We will cover them in package-specific documentation pages, which can be accessed below. For now, all you need to know is that these smaller packages are actually the ones doing all the work when you use Tidier. - -There are also a few other packages whose exported functions also become available. We will discuss these in the individual package documentation, but the most important ones for you to know about are: - -- The `DataFrame()` function from the DataFrames package is re-exported so that you can create a data frame without loading the DataFrames package. -- The `@chain()` macro from the Chain package is re-exported, so you chain together functions and macros -- The entire Statistics package is re-exported so you can access summary statistics like `mean()` and `median()` -- The CategoricalArrays package is re-exported so you can access the `categorical()` function to define categorical variables -- The Dates package is re-exported to enable support for variables containing dates - -## What can Tidier.jl do? - -Before we dive into an introduction of Julia and a look into how Tidier.jl works, it's useful to show you what Tidier.jl can do. First, we will read in some data, and then we will use Tidier.jl to chain together some data analysis operations. - -### First, let's read in the "Visits to Physician Office" dataset. - -This dataset comes with the Ecdat R package and and is titled OFP. [You can read more about the dataset here](https://rdrr.io/cran/Ecdat/man/OFP.html). To read in datasets packaged with commonly used R packages, we can use the RDatasets Julia package. - -```julia -julia> using Tidier, RDatasets -julia> ofp = dataset("Ecdat", "OFP") - -4406×19 DataFrame - Row │ OFP OFNP OPP OPNP EMR Hosp NumChro ⋯ - │ Int32 Int32 Int32 Int32 Int32 Int32 Int32 ⋯ -──────┼──────────────────────────────────────────────────── - 1 │ 5 0 0 0 0 1 ⋯ - 2 │ 1 0 2 0 2 0 - 3 │ 13 0 0 0 3 3 - 4 │ 16 0 5 0 1 1 - 5 │ 3 0 0 0 0 0 ⋯ - 6 │ 17 0 0 0 0 0 - 7 │ 9 0 0 0 0 0 - ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ - 4401 │ 12 4 1 0 0 0 - 4402 │ 11 0 0 0 0 0 ⋯ - 4403 │ 12 0 0 0 0 0 - 4404 │ 10 0 20 0 1 1 - 4405 │ 16 1 0 0 0 0 - 4406 │ 0 0 0 0 0 0 ⋯ - 13 columns and 4393 rows omitted -``` - -Note that a preview of the data frame is automatically printed to the console. The reason this happens is that when you run this code line by line, the output of each line is printed to the console. This is convenient because it saves you from having to directly print the newly created `ofp` to the console in order to get a preview for what it contains. If this code were bundled in a code chunk (such as in a Jupyter notebook), then only the final line of the code chunk would be printed. - -The exact number of rows and columns that print will depend on the physical size of the REPL window. If you resize the console (e.g., in VS Code), Julia will adjust the number of rows/columns accordingly. - -If you want to suppress the output, you can add a `;` at the end of this statement, like this: - -```julia -julia> ofp = dataset("Ecdat", "OFP"); # Nothing prints -``` - -### With the OFP dataset loaded, let's ask some basic questions. - -#### What does the dataset consist of? - -We can use `@glimpse()` to find out the columns, data types, and peek at the first few values contained within the dataset. - -```julia -julia> @glimpse(ofp) - -Rows: 4406 -Columns: 19 -.OFP Int32 5, 1, 13, 16, 3, 17, 9, 3, 1, 0, 0, 44, 2, 1, 19, -.OFNP Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, -.OPP Int32 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, -.OPNP Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, -.EMR Int32 0, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -.Hosp Int32 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, -.NumChron Int32 2, 2, 4, 2, 2, 5, 0, 0, 0, 0, 1, 5, 1, 1, 1, 0, 1, -.AdlDiff Int32 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, -.Age Float64 6.9, 7.4, 6.6, 7.6, 7.9, 6.6, 7.5, 8.7, 7.3, 7.8, -.Black CategoricalValue{String, UInt8}yes, no, yes, no, no, no, no, no, -.Sex CategoricalValue{String, UInt8}male, female, female, male, female -.Married CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, no, no -.School Int32 6, 10, 10, 3, 6, 7, 8, 8, 8, 8, 8, 15, 8, 8, 12, 8 -.FamInc Float64 2.881, 2.7478, 0.6532, 0.6588, 0.6588, 0.3301, 0.8 -.Employed CategoricalValue{String, UInt8}yes, no, no, no, no, no, no, no, n -.Privins CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, yes, y -.Medicaid CategoricalValue{String, UInt8}no, no, yes, no, no, yes, no, no, -.Region CategoricalValue{String, UInt8}other, other, other, other, other, -.Hlth CategoricalValue{String, UInt8}other, other, poor, poor, other, p -``` - -If you're wondering why we need to place a `@` at the beginning of the word so that it reads `@glimpse()` rather than `glimpse()`, that's because including a `@` at the beginning denotes that this is a special type of function known as a macro. Macros have special capabilities in Julia, and many Tidier.jl functions that operate on data frames are implemented as macros. In this specific instance, we could have implemented `@glimpse()` without making use of any of the macro capabilities. However, for the sake of consistency, we have kept `@glimpse()` as a macro so that you can remember a basic rule of thumb: if Tidier.jl operates on a dataframe, then we will use macros rather than functions. The TidierPlots.jl package is a slight exception to this rule in that it is nearly entirely implemented as functions (rather than macros), and this will be described more in the TidierPlots documentation. - -#### Can we clean up the names of the columns? - -To avoid having to keep track of capitalization, data analysts often prefer column names to be in snake_case rather than TitleCase. Let's quickly apply this transformation to the `ofp` dataset. - -```julia -julia> ofp = @clean_names(ofp) -julia> @glimpse(ofp) - -Rows: 4406 -Columns: 19 -.ofp Int32 5, 1, 13, 16, 3, 17, 9, 3, 1, 0, 0, 44, 2, 1, 19, -.ofnp Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, -.opp Int32 0, 2, 0, 5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, -.opnp Int32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, -.emr Int32 0, 2, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -.hosp Int32 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, -.num_chron Int32 2, 2, 4, 2, 2, 5, 0, 0, 0, 0, 1, 5, 1, 1, 1, 0, 1, -.adl_diff Int32 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, -.age Float64 6.9, 7.4, 6.6, 7.6, 7.9, 6.6, 7.5, 8.7, 7.3, 7.8, -.black CategoricalValue{String, UInt8}yes, no, yes, no, no, no, no, no, -.sex CategoricalValue{String, UInt8}male, female, female, male, female -.married CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, no, no -.school Int32 6, 10, 10, 3, 6, 7, 8, 8, 8, 8, 8, 15, 8, 8, 12, 8 -.fam_inc Float64 2.881, 2.7478, 0.6532, 0.6588, 0.6588, 0.3301, 0.8 -.employed CategoricalValue{String, UInt8}yes, no, no, no, no, no, no, no, n -.privins CategoricalValue{String, UInt8}yes, yes, no, yes, yes, no, yes, y -.medicaid CategoricalValue{String, UInt8}no, no, yes, no, no, yes, no, no, -.region CategoricalValue{String, UInt8}other, other, other, other, other, -.hlth CategoricalValue{String, UInt8}other, other, poor, poor, other, p -``` - -Now that our column names are cleaned up, we can ask some basic analysis questions. - -#### What is the mean age for people in each of the regions - -Because age is measured in decades according to the [dataset documentation](https://rdrr.io/cran/Ecdat/man/OFP.html)), we will multiply everyone's age by 10 before we calculate the median. - -```julia -julia> @chain ofp @group_by(region) @summarize(mean_age = mean(age * 10)) - -4×2 DataFrame - Row │ region mean_age - │ Cat… Float64 -─────┼─────────────────── - 1 │ other 73.987 - 2 │ midwest 74.0769 - 3 │ noreast 73.9343 - 4 │ west 74.1165 -``` - -Overall, the mean age looks pretty similar across regions. The fact that we were able to calculate each region's age also reveals that there are no missing values. Any region containing a missing value would have returned `missing` instead of a number. - -The `@chain` macro, which is defined in the Chain package and re-exported by Tidier, allows us to pipe together multiple operations sequentially from left to right. In the example above, the `ofp` dataset is being piped into the first argument of the `@group_by()` macro, the result of which is then being piped into the `@summarize()` macro, which is then automatically removing the grouping and returning the result. - -For grouped data frames, `@summarize()` behaves differently than other Tidier.jl macros: `@summarize()` removes one layer of grouping. Because the data frame was only grouped by one column, the result is no longer grouped. Had we grouped by multiple columns, the result would still be grouped by all but the last column. The other Tidier.jl macros keep the data grouped. Grouped data frames can be ungrouped using `@ungroup()`. If you apply a new `@group_by()` macro to an already-grouped data frame, then the newly specified groups override the old ones. - -When we use the `@chain` macro, we are taking advantage of the fact that Julia macros can either be called using parentheses syntax, where each argument is separated by a comma, or they can be called with a spaced syntax where no parentheses are used. In the case of Tidier.jl macros, we always use the parentheses syntax, which makes is very easy to use the spaced syntax when working with `@chain`. - -An alternate way of calling `@chain` using the parentheses syntax is as follows. From a purely stylistic perspective, I don't recommend this because it adds a number of extra characters. However, if you're new to Julia, it's worth knowing about this form so that you realize that there is no magic involved when working with macros. - -```julia -julia> @chain(ofp, @group_by(region), @summarize(mean_age = mean(age * 10))) - -4×2 DataFrame - Row │ region mean_age - │ Cat… Float64 -─────┼─────────────────── - 1 │ other 73.987 - 2 │ midwest 74.0769 - 3 │ noreast 73.9343 - 4 │ west 74.1165 -``` - -On the other hand, either of these single-line expressions can get quite hard-to-read as more and more expressions are chained together. To make this easier to handle, `@chain` supports multi-line expressions using `begin-end` blocks like this: - -```julia -julia> @chain ofp begin - @group_by(region) - @summarize(mean_age = mean(age * 10)) - end - -4×2 DataFrame - Row │ region mean_age - │ Cat… Float64 -─────┼─────────────────── - 1 │ other 73.987 - 2 │ midwest 74.0769 - 3 │ noreast 73.9343 - 4 │ west 74.1165 -``` - -This format is convenient for interactive data analysis because you can easily comment out individual operations and view the result. For example, if we wanted to know the mean age for the overall dataset, we could simply comment out the `@group_by()` operation. - -```julia -julia> @chain ofp begin - # @group_by(region) - @summarize(mean_age = mean(age * 10)) - end - -1×1 DataFrame - Row │ mean_age - │ Float64 -─────┼────────── - 1 │ 74.0241 -``` -## Frequently asked questions - -### I'm a Julia user. Why should I use Tidier.jl rather than other data analysis packages? - -While Julia has a number of great data analysis packages, the most mature and idiomatic Julia package for data analysis is DataFrames.jl. Most other data analysis packages in Julia build on top of DataFrames.jl, and Tidier.jl is no exception. - -DataFrames.jl emphasizes idiomatic Julia code without macros. While it is elegant, it can be verbose because of the need to write out anonymous functions. DataFrames.jl also emphasizes correctness, which means that errors are favored over warnings. For example, grouping by one variable and then subsequently grouping the already-grouped data frame by another variable results in an error in DataFrames.jl. These restrictions, while justified in some instances, can make interactive data analysis feel clunky and slow. - -A number of macro-based data analysis packages have emerged as extensions of DataFrames.jl to make data analysis syntax less verbose, including DataFramesMeta.jl, Query.jl, and DataFrameMacros.jl. All of these packages have their strengths, and each of these served as an inspiration towards the creation of Tidier.jl. - -What sets Tidier.jl apart is that it borrows the design of the tried-and-widely-adopted tidyverse and brings it to Julia. Our goal is to make data analysis code as easy and readable as possible. In our view, the reason you should use Tidier.jl is because of the richness, consistency, and thoroughness of the design made possible by bringing together two powerful tools: DataFrames.jl and the tidyverse. In Tidier.jl, nearly every possible transformation on data frames (e.g., aggregating, pivoting, nesting, and joining) can be accomplished using a consistent syntax. While you always have the option to intermix Tidier.jl code with DataFrames.jl code, Tidier.jl strives for completeness -- there should never be a requirement to fall back to DataFrames.jl for any kind of data analysis task. - -Tidier.jl also focuses on conciseness. This shows up most readily in two ways: the use of bare column names, and an approach to auto-vectorizing code. - -1. **Bare column names:** If you are referring to a column named `a`, you can simply refer to it as `a` in Tidier.jl. You are essentially referring to `a` as if it was within an anonymous function, where the variable `a` was mapped to the column `a` in the data frame. If you want to refer to an object `a` that is defined outside of the data frame, then you can write `!!a`, which we refer to as "bang-bang interpolation." This syntax is motivated by the tidyverse, where [the `!!` operator was selected because it is the least-bad "polite fiction" way of representing lazy interpolation](https://adv-r.hadley.nz/quasiquotation.html#the-polite-fiction-of). - -2. **Auto-vectorized code:** Most data transformation functions and operators are intended to be used on scalars. However, transformations are usually performed on columns of data (represented as 1-dimensional arrays, or vectors), which means that most functions need to be vectorized, which can get unwieldy and verbose. However, there are functions which operate directly on vectors and thus should not be vectorized when applied to columns (e.g., `mean()` and `median()`). Tidier.jl uses a customizable look-up table to know which functions to vectorize and which ones not to vectorize. This means that you can largely leave code as un-vectorized (i.e., `mean(a + 1)` rather than `mean(a .+ 1)`), and Tidier.jl will correctly infer convert the first code into the second before running it. There are several ways to manually override the defaults. - -Lastly, the reason you should consider using Tidier.jl is that it brings a consistent syntax not only to data manipulation but also to plotting (by wrapping Makie.jl and AlgebraOfGraphics.jl) and to the handling of categorical variables, strings, and dates. Wherever possible, Tidier.jl uses existing classes rather than defining new ones. As a result, using Tidier.jl should never preclude you from using other Base Julia functions with which you may already be familiar. - -### I'm an R user and I'm perfectly happy with the tidyverse. Why should I consider using Tidier.jl? - -If you're happy with the R tidyverse, then there's no imminent reason to switch to using Tidier.jl. While DataFrames.jl (the package on which TidierData.jl depends) [is faster than R's dplyr and tidyr on benchmarks](https://duckdblabs.github.io/db-benchmark/), there are other faster backends in R that allow for the use of tidyverse syntax with better speed (e.g., dtplyr, tidytable, tidypolars). - -The primary reason to consider using Tidier.jl is the value proposition of using Julia itself. Julia has many similarities to R (e.g., interactive coding in a console, functional style, multiple dispatch, dynamic data types), but unlike R, Julia is automatically compiled (to LLVM) before it runs. This means that certain compiler optimations, which are normally only possible in more verbose languages like C/C++ become available to Julia. There are a number of situations in R where the end-user is able to write fast R code as a direct result of C++ being used on the backend (e.g., through the use of the Rcpp package). This is why R is sometimes referred to as a glue language -- because it provides a very nice way of glueing together faster C++ code. - -The main value proposition of Julia is that you can use it as *both* a glue language *and* as a backend language. Tidier.jl embraces the glue language aspect of Julia while relying on packages like DataFrames.jl and Makie.jl on the backend. - -While Julia has very mature backends, we hope that Tidier.jl demonstrates the value of, and need for, more glue-oriented data analysis packages in Julia. - -### Why does Tidier.jl re-export so many packages? - -Tidier comes with batteries included. If you are using Tidier, you generally won't have to load in other packages for basic data analysis. Tidier is meant for interactive use. You can start your code with `using Tidier` and expect to have what you need at your fingertips. - -If you are a package developer, then you definitely should consider depending on one of the smaller packages that make up Tidier.jl rather than Tidier itself. For example, if you want to use the categorical variable functions from Tidier, then you should use rely on only TidierCats.jl as a dependency. - -### Should I update Tidier.jl or the underlying packages (e.g., TidierPlots.jl) individually? - -Either approach is okay. For most users, we recommend updating Tidier.jl directly, as this will update the underlying packages up to their latest minor versions (but not necessarily up to their latest patch release). However, if you need access to the latest functionality in the underlying packages, you should feel free to update them directly. We will keep Tidier.jl future-proof to underlying package updates, so this shouldn't cause any problems with Tidier.jl. - -### Where can I learn more about the underlying packages that make up Tidier.jl? - - - -## TidierData.jl - -TidierData.jl is a package dedicated to data transformation and reshaping, powered by DataFrames.jl, ShiftedArrays.jl, and Cleaner.jl. It focuses on functionality within the dplyr, tidyr, and janitor R packages. - -[[GitHub]](https://github.com/TidierOrg/TidierData.jl) | [[Documentation]](https://tidierorg.github.io/TidierData.jl/latest/) - -

- - - -## TidierPlots.jl - -TidierPlots.jl is a package dedicated to plotting, powered by AlgebraOfGraphics.jl. It focuses on functionality within the ggplot2 R package. - -[[GitHub]](https://github.com/TidierOrg/TidierPlots.jl) | [[Documentation]](https://tidierorg.github.io/TidierPlots.jl/latest/) - -

- - - -## TidierCats.jl - -TidierCats.jl is a package dedicated to handling categorical variables, powered by CategoricalArrays.jl. It focuses on functionality within the forcats R package. - -[[GitHub]](https://github.com/TidierOrg/TidierCats.jl) | [[Documentation]](https://tidierorg.github.io/TidierCats.jl/dev/) - -

- - - -## TidierDates.jl - -TidierDates.jl is a package dedicated to handling dates and times. It focuses on functionality within the lubridate R package. - -[[GitHub]](https://github.com/TidierOrg/TidierDates.jl) | [[Documentation]](https://tidierorg.github.io/TidierDates.jl/dev/) - -

- - - -## TidierStrings.jl - -TidierStrings.jl is a package dedicated to handling strings. It focuses on functionality within the stringr R package. - -[[GitHub]](https://github.com/TidierOrg/TidierStrings.jl) | [[Documentation]](https://tidierorg.github.io/TidierStrings.jl/dev/) - -

- - - -## TidierText.jl - -TidierText.jl is a package dedicated to handling and tidying text data. It focuses on functionality within the tidytext R package. - -[[GitHub]](https://github.com/TidierOrg/TidierText.jl) - -

- - - -## TidierVest.jl - -TidierVest.jl is a package dedicated to scraping and tidying website data. It focuses on functionality within the rvest R package. - -[[GitHub]](https://github.com/TidierOrg/TidierVest.jl) - -

- -## What’s new in the Tidier.jl meta-package? - -See [NEWS.md](https://github.com/TidierOrg/Tidier.jl/blob/main/NEWS.md) for the latest updates. - -## What's missing - -Is there a tidyverse feature missing that you would like to see in Tidier.jl? Please file a GitHub issue to start a discussion. \ No newline at end of file +```@raw html +--- +# https://vitepress.dev/reference/default-theme-home-page +layout: home +hero: + name: "Tidier.jl" + tagline: Tidier.jl is a data analysis package inspired by R's tidyverse and crafted specifically for Julia. + image: + src: /Tidier_jl_logo.png + actions: + - theme: brand + text: Get Started + link: /docs/getting-started.md + - theme: alt + text: View on Github + link: https://github.com/TidierOrg/Tidier.jl, +features: + + - icon: tidierdata + title: TidierData.jl + details: "TidierData.jl is a 100% Julia implementation of the dplyr and tidyr R packages. Powered by the DataFrames.jl package and Julia’s extensive meta-programming capabilities, TidierData.jl is an R user’s love letter to data analysis in Julia." + link: https://tidierorg.github.io/TidierData.jl/latest/ + + - icon: tidierplots + title: TidierPlots.jl + details: "TidierPlots.jl is a 200% Julia implementation of the R package ggplot in Julia. Powered by Makie.jl, and Julia’s extensive meta-programming capabilities, TidierPlots.jl is an R user’s love letter to data visualization in Julia." + link: https://tidierorg.github.io/TidierPlots.jl/latest/ + + - icon: tidierfiles + title: TidierFiles.jl + details: "TidierFiles.jl leverages the CSV.jl, XLSX.jl, and ReadStatTables.jl packages to reimplement the R haven and readr packages." + link: https://tidierorg.github.io/TidierFiles.jl/dev/ + + - icon: tidiercats + title: TidierCats.jl + details: "TidierCats.jl is a 100% Julia implementation of the R package forcats in Julia. It has one main goal: to implement forcats's straightforward syntax and of ease of use while working with categorical variables for Julia users." + link: https://tidierorg.github.io/TidierCats.jl/dev/ + + - icon: tidierdates + title: TidierDates.jl + details: "TidierDates.jl is a 100% Julia implementation of the R package lubridate in Julia. It has one main goal: to implement lubridate's straightforward syntax and of ease of use while working with dates for Julia users." + link: https://tidierorg.github.io/TidierDates.jl/dev/ + + - icon: tidierstrings + title: TidierStrings.jl + details: "TidierStrings.jl is a 100% Julia implementation of the R package stringr in Julia. It has one main goal: to implement stringr's straightforward syntax and of ease of use while working with strings for Julia users." + link: https://tidierorg.github.io/TidierStrings.jl/dev/ + + - icon: tidiertext + title: TidierText.jl + details: "TidierText.jl is a 100% Julia implementation of the R tidytext package. The purpose of the package is to make it easy analyze text data using DataFrames." + link: https://tidierorg.github.io/TidierText.jl/dev/ + + - icon: tidierstrings + title: TidierVest.jl + details: "This library combines HTTP, Gumbo and Cascadia for a more simple way to scrape data" + link: https://tidierorg.github.io/TidierVest.jl/dev/ +--- +``` \ No newline at end of file From 4400854dcaec828c61d49698ab0725a55659e2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:53:20 -0500 Subject: [PATCH 08/23] Remove genfile.jl run from actions --- .github/workflows/Documenter.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index f2c268e..fa379b1 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -32,5 +32,4 @@ jobs: JULIA_DEBUG: "Documenter" DATADEPS_ALWAYS_ACCEPT: true run: | - julia --code-coverage=user --project=docs/ --color=yes docs/genfiles.jl julia --code-coverage=user --project=docs/ --color=yes docs/make.jl \ No newline at end of file From dd086fa18350ae9cb5e4254ee05d7c66b784d502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 16:55:34 -0500 Subject: [PATCH 09/23] Update links to remote in make.jl --- docs/make.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 25d1b51..69dfc5a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -21,7 +21,7 @@ pgs = [ ] fmt = DocumenterVitepress.MarkdownVitepress( - repo = "https://github.com/camilogarciabotero/Tidier.jl", + repo = "https://github.com/TidierOrg/Tidier.jl", devurl = "dev", # deploy_url = "yourgithubusername.github.io/Tidier.jl.jl", ) @@ -29,7 +29,7 @@ fmt = DocumenterVitepress.MarkdownVitepress( makedocs(; modules = [Tidier], authors = "Karandeep Singh et al.", - repo = "https://github.com/camilogarciabotero/Tidier.jl", + repo = "https://github.com/TidierOrg/Tidier.jl", sitename = "Tidier.jl", format = fmt, pages= pgs, @@ -37,7 +37,7 @@ makedocs(; ) deploydocs(; - repo = "https://github.com/camilogarciabotero/Tidier.jl", + repo = "https://github.com/TidierOrg/Tidier.jl", push_preview = true, ) From 0aebd1bee3c703c877e5c1c336ec8221746de890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 17:10:52 -0500 Subject: [PATCH 10/23] Update make.jl with target to biuld --- docs/make.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/make.jl b/docs/make.jl index 69dfc5a..38fb0dd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -38,6 +38,9 @@ makedocs(; deploydocs(; repo = "https://github.com/TidierOrg/Tidier.jl", + target="build", # this is where Vitepress stores its output + branch = "gh-pages", + devbranch = "main", push_preview = true, ) From 3201791e26605dfb9502adf2ef58ec6894b6a64c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 17:15:16 -0500 Subject: [PATCH 11/23] Update Documenter.yml action --- .github/workflows/Documenter.yml | 57 +++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index fa379b1..72aad3c 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -1,35 +1,52 @@ -name: Documenter +# Sample workflow for building and deploying a VitePress site to GitHub Pages +# +name: Deploy documentation + on: + # Runs on pushes targeting the `main` branch. Change this to `master` if you're + # using the `master` branch as the default branch. push: branches: - - main - tags: '*' + - main + tags: ['*'] pull_request: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: write + pages: write + id-token: write + actions: write + + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. concurrency: - # Skip intermediate builds: always. - # Cancel intermediate builds: only if it is a pull request build. - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + group: pages + cancel-in-progress: false + jobs: + # Build job build: - permissions: - contents: write runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 - - uses: julia-actions/cache@v1 - with: - cache-registries: "false" - - name: Install documentation dependencies - run: julia --project=docs -e 'using Pkg; pkg"dev ."; Pkg.instantiate()' - - name: Build and deploy + - name: Checkout + uses: actions/checkout@v4 + with: # Fetches the last commit only + fetch-depth: 0 + - name: Setup Julia + uses: julia-actions/setup-julia@v2 + - name: Pull Julia cache + uses: julia-actions/cache@v1 + - name: Generate logo + run: julia --project=docs -e "using Pkg; Pkg.instantiate()"; julia --project=docs/ --color=yes docs/logo.jl + - uses: julia-actions/julia-docdeploy@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key GKSwstype: "100" # https://discourse.julialang.org/t/generation-of-documentation-fails-qt-qpa-xcb-could-not-connect-to-display/60988 JULIA_DEBUG: "Documenter" - DATADEPS_ALWAYS_ACCEPT: true - run: | - julia --code-coverage=user --project=docs/ --color=yes docs/make.jl \ No newline at end of file + DATADEPS_ALWAYS_ACCEPT: true \ No newline at end of file From 526f6ad92be02eda308ed6e50563db19222af448 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 17:17:30 -0500 Subject: [PATCH 12/23] Remove logo workflow --- .github/workflows/Documenter.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 72aad3c..1964562 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -41,8 +41,6 @@ jobs: uses: julia-actions/setup-julia@v2 - name: Pull Julia cache uses: julia-actions/cache@v1 - - name: Generate logo - run: julia --project=docs -e "using Pkg; Pkg.instantiate()"; julia --project=docs/ --color=yes docs/logo.jl - uses: julia-actions/julia-docdeploy@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token From 1e3ada19db72febc962b37bbdcb00b61754a7a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 17:22:57 -0500 Subject: [PATCH 13/23] Testing Doc action into other branch --- .github/workflows/Documenter.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 1964562..e5db95a 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -7,7 +7,8 @@ on: # using the `master` branch as the default branch. push: branches: - - main + # - main + - camilogarciabotero:main tags: ['*'] pull_request: From 3c04f09dc6ed0dafba7480508574a69d65698bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 17:25:20 -0500 Subject: [PATCH 14/23] Revert to push the main branch --- .github/workflows/Documenter.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index e5db95a..1964562 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -7,8 +7,7 @@ on: # using the `master` branch as the default branch. push: branches: - # - main - - camilogarciabotero:main + - main tags: ['*'] pull_request: From 7da7a0a874de2bbb473692e02dc03bed748016bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 20:11:11 -0500 Subject: [PATCH 15/23] Avoid broken links in home --- docs/src/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 8c2a6f9..3ced658 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -29,7 +29,7 @@ features: - icon: tidierfiles title: TidierFiles.jl details: "TidierFiles.jl leverages the CSV.jl, XLSX.jl, and ReadStatTables.jl packages to reimplement the R haven and readr packages." - link: https://tidierorg.github.io/TidierFiles.jl/dev/ + - icon: tidiercats title: TidierCats.jl @@ -49,11 +49,11 @@ features: - icon: tidiertext title: TidierText.jl details: "TidierText.jl is a 100% Julia implementation of the R tidytext package. The purpose of the package is to make it easy analyze text data using DataFrames." - link: https://tidierorg.github.io/TidierText.jl/dev/ + - icon: tidierstrings title: TidierVest.jl details: "This library combines HTTP, Gumbo and Cascadia for a more simple way to scrape data" - link: https://tidierorg.github.io/TidierVest.jl/dev/ + --- ``` \ No newline at end of file From 09c603cf04a97001e4997502b117faef60aff3b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 20:11:37 -0500 Subject: [PATCH 16/23] Use less blur and on glowing color to logo --- docs/src/.vitepress/theme/style.css | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css index 0750b4c..1ee4402 100644 --- a/docs/src/.vitepress/theme/style.css +++ b/docs/src/.vitepress/theme/style.css @@ -69,16 +69,16 @@ https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/var --vp-home-hero-name-background: -webkit-linear-gradient( 120deg, #ff875f 30%, - #0087d7 + #ff875f ); --vp-home-hero-image-background-image: linear-gradient( -45deg, - #0087d7 35%, - #0087d7 35%, - #ff875f + #0088d775 35%, + #0088d775 35%, + #0088d775 ); - --vp-home-hero-image-filter: blur(40px); + --vp-home-hero-image-filter: blur(10px); } @media (min-width: 640px) { From 9f6cd056691a22f38cff916f3bd7aa2c45281103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 20:21:18 -0500 Subject: [PATCH 17/23] Edit 200% on TiderPlots description --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 3ced658..44920ab 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -23,7 +23,7 @@ features: - icon: tidierplots title: TidierPlots.jl - details: "TidierPlots.jl is a 200% Julia implementation of the R package ggplot in Julia. Powered by Makie.jl, and Julia’s extensive meta-programming capabilities, TidierPlots.jl is an R user’s love letter to data visualization in Julia." + details: "TidierPlots.jl is a 100% Julia implementation of the R package ggplot in Julia. Powered by Makie.jl, and Julia’s extensive meta-programming capabilities, TidierPlots.jl is an R user’s love letter to data visualization in Julia." link: https://tidierorg.github.io/TidierPlots.jl/latest/ - icon: tidierfiles From 1dead3b3d0b730c3e819a19375318791d6b5ce31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= Date: Mon, 8 Apr 2024 20:26:04 -0500 Subject: [PATCH 18/23] Revert gradient to title and glowing --- docs/src/.vitepress/theme/style.css | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css index 1ee4402..2c2f9ac 100644 --- a/docs/src/.vitepress/theme/style.css +++ b/docs/src/.vitepress/theme/style.css @@ -69,16 +69,16 @@ https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/var --vp-home-hero-name-background: -webkit-linear-gradient( 120deg, #ff875f 30%, - #ff875f + #0087d7 ); --vp-home-hero-image-background-image: linear-gradient( -45deg, - #0088d775 35%, - #0088d775 35%, - #0088d775 + #0d8fdac1 35%, + #0d8fdac1 35%, + #0d8fdac1 ); - --vp-home-hero-image-filter: blur(10px); + --vp-home-hero-image-filter: blur(20px); } @media (min-width: 640px) { From 046a3f8d228d0755247dc1124e1314a850d74f37 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 9 Apr 2024 09:54:39 -0400 Subject: [PATCH 19/23] Bump dependency versions and package version to 1.2.2. --- Project.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 456198e..bbb9d0c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Tidier" uuid = "f0413319-3358-4bb0-8e7c-0c83523a93bd" authors = ["Karandeep Singh"] -version = "1.2.1" +version = "1.2.2" [deps] Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -15,8 +15,8 @@ TidierVest = "969b988e-7aed-4820-b60d-bdec252047c4" [compat] Reexport = "0.2, 1" -TidierData = "0.14, 1" -TidierPlots = "0.5, 1" +TidierData = "0.15, 1" +TidierPlots = "0.6, 1" TidierCats = "0.1, 1" TidierDates = "0.2, 1" TidierStrings = "0.2, 1" From c938eff3df610e1e4392dd1701d237f12ca84b3e Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 9 Apr 2024 15:55:38 -0400 Subject: [PATCH 20/23] Bump Julia version to 1.9 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index bbb9d0c..c909325 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ TidierStrings = "0.2, 1" TidierText = "0.1, 1" TidierVest = "0.4, 1" -julia = "1.6" +julia = "1.9" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 59cf7a20f8a6b9115ad0701239acde704c7ec9b9 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 9 Apr 2024 16:06:57 -0400 Subject: [PATCH 21/23] Removed space before julia version. --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index c909325..54b4b40 100644 --- a/Project.toml +++ b/Project.toml @@ -22,7 +22,6 @@ TidierDates = "0.2, 1" TidierStrings = "0.2, 1" TidierText = "0.1, 1" TidierVest = "0.4, 1" - julia = "1.9" [extras] From 51f13a577190f4d99b05b19ae7a245ff7f200c62 Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 9 Apr 2024 16:18:14 -0400 Subject: [PATCH 22/23] Update CI.yml to use Julia version 1.9 --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index cdbe71d..f31f3f6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '1.8' + - '1.9' - 'nightly' os: - ubuntu-latest From 63bfbbd5433fa8e7dcbee7cca1047f8135bdf75d Mon Sep 17 00:00:00 2001 From: Karandeep Singh Date: Tue, 9 Apr 2024 16:28:14 -0400 Subject: [PATCH 23/23] Updated news.md --- docs/src/news.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/src/news.md b/docs/src/news.md index 5104c80..0816447 100644 --- a/docs/src/news.md +++ b/docs/src/news.md @@ -1,5 +1,13 @@ # Tidier.jl updates +## v1.2.2 - 2024-04-09 +- Update minimum Julia required version to 1.9 +- Base package version updates +- New documentation + +## v1.2.1 - 2024-01-02 +- Base package version updates + ## v1.2.0 - 2023-11-28 - Add and re-export TidierText.jl - Bugfix: Re-export TidierVest.jl (forgot to do this in 1.1.0)