diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 7ac529f71..19db0fc8d 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -58,6 +58,7 @@ jobs: data.table rstudioapi tibble + haven - name: Setup SSH access uses: mxschmitt/action-tmate@v3 diff --git a/crates/ark/src/data_explorer/histogram.rs b/crates/ark/src/data_explorer/histogram.rs index 568c8b737..4c3d1d062 100644 --- a/crates/ark/src/data_explorer/histogram.rs +++ b/crates/ark/src/data_explorer/histogram.rs @@ -161,6 +161,7 @@ mod tests { use stdext::assert_match; use super::*; + use crate::fixtures::package_is_installed; use crate::r_task; fn default_options() -> FormatOptions { @@ -606,4 +607,29 @@ mod tests { ); }) } + + #[test] + fn test_frequency_table_haven_labelled() { + r_task(|| { + if !package_is_installed("haven") { + return; + } + + test_frequency_table( + "haven::labelled(c(rep(1, 100), rep(2, 200), rep(3, 150)), labels = c('A' = 1, 'B' = 2, 'C' = 3))", + 10, + harp::parse_eval_global("c('B', 'C', 'A')").unwrap(), + vec![200, 150, 100], + None, + ); + // Account for all factor levels, even if they don't appear in the data + test_frequency_table( + "haven::labelled(c(rep(1, 100), rep(2, 200)), labels = c('A' = 1, 'B' = 2, 'C' = 3))", + 10, + harp::parse_eval_global("c('B', 'A', 'C')").unwrap(), + vec![200, 100, 0], + None, + ); + }) + } } diff --git a/crates/ark/src/data_explorer/summary_stats.rs b/crates/ark/src/data_explorer/summary_stats.rs index 913594283..c2f6a749e 100644 --- a/crates/ark/src/data_explorer/summary_stats.rs +++ b/crates/ark/src/data_explorer/summary_stats.rs @@ -183,6 +183,7 @@ where #[cfg(test)] mod tests { use super::*; + use crate::fixtures::package_is_installed; fn default_options() -> FormatOptions { FormatOptions { @@ -327,4 +328,32 @@ mod tests { assert_eq!(stats.date_stats, Some(expected)); }) } + + #[test] + fn test_haven_labelled() { + crate::r_task(|| { + if !package_is_installed("haven") { + return; + } + + let column = + harp::parse_eval_base("haven::labelled(c(1, 1, 2), c(Male = 1, Female = 2))") + .unwrap(); + + let column_factor = + harp::parse_eval_base("factor(c(1,1,2), labels = c('Male', 'Female'))").unwrap(); + + let stats = + summary_stats(column.sexp, ColumnDisplayType::String, &default_options()).unwrap(); + + let stats_factor = summary_stats( + column_factor.sexp, + ColumnDisplayType::String, + &default_options(), + ) + .unwrap(); + + assert_eq!(stats, stats_factor); + }) + } } diff --git a/crates/ark/src/data_explorer/utils.rs b/crates/ark/src/data_explorer/utils.rs index 2749a1715..5a696d602 100644 --- a/crates/ark/src/data_explorer/utils.rs +++ b/crates/ark/src/data_explorer/utils.rs @@ -55,6 +55,13 @@ pub fn display_type(x: SEXP) -> ColumnDisplayType { } if r_is_object(x) { + // `haven_labelled` objects inherit from their internal data type + // such as integer or character. We special case them here before + // checking the internal types below. + if r_inherits(x, "haven_labelled") { + return ColumnDisplayType::String; + } + if r_inherits(x, "logical") { return ColumnDisplayType::Boolean; } diff --git a/crates/ark/src/fixtures/utils.rs b/crates/ark/src/fixtures/utils.rs index 598f7bc99..ca709efa7 100644 --- a/crates/ark/src/fixtures/utils.rs +++ b/crates/ark/src/fixtures/utils.rs @@ -16,6 +16,7 @@ use serde::Serialize; use tree_sitter::Point; use crate::modules; +use crate::modules::ARK_ENVS; // Lock for tests that can't be run concurrently. Only needed for tests that can't // be wrapped in an `r_task()`. @@ -92,6 +93,16 @@ where } } +pub fn package_is_installed(package: &str) -> bool { + harp::parse_eval0( + format!(".ps.is_installed('{package}')").as_str(), + ARK_ENVS.positron_ns, + ) + .unwrap() + .try_into() + .unwrap() +} + #[cfg(test)] mod tests { use tree_sitter::Point; diff --git a/crates/ark/src/modules/positron/r_data_explorer.R b/crates/ark/src/modules/positron/r_data_explorer.R index a18ee40fc..40647c86b 100644 --- a/crates/ark/src/modules/positron/r_data_explorer.R +++ b/crates/ark/src/modules/positron/r_data_explorer.R @@ -59,6 +59,10 @@ summary_stats_number <- function(col) { } summary_stats_string <- function(col) { + if (inherits(col, 'haven_labelled')) { + col <- haven::as_factor(col) + } + if(is.factor(col)) { # We could have an optimization here to get unique and empty values # from levels, but probably not worth it. @@ -463,6 +467,10 @@ profile_frequency_table <- function(x, limit) { )) } + if (inherits(x, "haven_labelled")) { + x <- haven::as_factor(x) + } + if (is.factor(x)) { values <- levels(x) counts <- table(x)