Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into feature/default-repos
Browse files Browse the repository at this point in the history
  • Loading branch information
jmcphers committed Nov 27, 2024
2 parents c47f9fc + f3c1416 commit b12d1f4
Show file tree
Hide file tree
Showing 24 changed files with 822 additions and 422 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
data.table
rstudioapi
tibble
haven

- name: Setup SSH access
uses: mxschmitt/action-tmate@v3
Expand All @@ -70,4 +71,4 @@ jobs:
- name: Run Unit Tests
run: |
cargo test --verbose
cargo test --verbose -- --nocapture
2 changes: 1 addition & 1 deletion .github/workflows/test-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ jobs:
- name: Run Unit Tests
run: |
cargo test --verbose
cargo test --verbose -- --nocapture
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

# 0.1.9000

## 2024-11

- LSP: Assignments in function calls (e.g. `list(x <- 1)`) are now detected by the missing symbol linter to avoid annoying false positive diagnostics (https://github.com/posit-dev/positron/issues/3048). The downside is that this causes false negatives when the assignment happens in a call with local scope, e.g. in `local()` or `test_that()`. We prefer to be overly permissive than overly cautious in these matters.

- Jupyter: The following environment variables are now set in the same way that R does:

- `R_SHARE_DIR`
- `R_INCLUDE_DIR`
- `R_DOC_DIR`

This solves a number of problems in situations that depend on these variables being defined (https://github.com/posit-dev/positron/issues/3637).

## 2024-10

- Objects assigned at top level are now indexed, in addition to assigned functions. When a name is assigned multiple times, we now only index the first occurrence. This allows you to jump to the first "declaration" of the variable. In the future we'll improve this mechanism so that you can jump to the most recent assignment.
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 34 additions & 1 deletion crates/amalthea/src/comm/data_explorer_comm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
use serde::Deserialize;
use serde::Serialize;

/// Result in Methods
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct OpenDatasetResult {
/// An error message if opening the dataset failed
pub error_message: Option<String>
}

/// Result in Methods
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SearchSchemaResult {
Expand Down Expand Up @@ -67,7 +74,15 @@ pub struct BackendState {
pub sort_keys: Vec<ColumnSortKey>,

/// The features currently supported by the backend instance
pub supported_features: SupportedFeatures
pub supported_features: SupportedFeatures,

/// Optional flag allowing backend to report that it is unable to serve
/// requests. This parameter may change.
pub connected: Option<bool>,

/// Optional experimental parameter to provide an explanation when
/// connected=false. This parameter may change.
pub error_message: Option<String>
}

/// Schema for a column in a table
Expand Down Expand Up @@ -985,6 +1000,13 @@ pub enum ArraySelection {
SelectIndices(DataSelectionIndices)
}

/// Parameters for the OpenDataset method.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct OpenDatasetParams {
/// The resource locator or file path
pub uri: String,
}

/// Parameters for the GetSchema method.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct GetSchemaParams {
Expand Down Expand Up @@ -1079,6 +1101,9 @@ pub struct ReturnColumnProfilesParams {

/// Array of individual column profile results
pub profiles: Vec<ColumnProfileResult>,

/// Optional error message if something failed to compute
pub error_message: Option<String>,
}

/**
Expand All @@ -1087,6 +1112,12 @@ pub struct ReturnColumnProfilesParams {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(tag = "method", content = "params")]
pub enum DataExplorerBackendRequest {
/// Request to open a dataset given a URI
///
/// Request to open a dataset given a URI
#[serde(rename = "open_dataset")]
OpenDataset(OpenDatasetParams),

/// Request schema
///
/// Request subset of column schemas for a table-like object
Expand Down Expand Up @@ -1160,6 +1191,8 @@ pub enum DataExplorerBackendRequest {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(tag = "method", content = "result")]
pub enum DataExplorerBackendReply {
OpenDatasetReply(OpenDatasetResult),

GetSchemaReply(TableSchema),

SearchSchemaReply(SearchSchemaResult),
Expand Down
2 changes: 1 addition & 1 deletion crates/ark/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ark"
version = "0.1.155"
version = "0.1.156"
description = """
Ark, an R Kernel.
"""
Expand Down
1 change: 1 addition & 0 deletions crates/ark/src/data_explorer/column_profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ pub async fn handle_columns_profiles_requests(
let event = DataExplorerFrontendEvent::ReturnColumnProfiles(ReturnColumnProfilesParams {
callback_id,
profiles,
error_message: None,
});

let json_event = serde_json::to_value(event)?;
Expand Down
26 changes: 26 additions & 0 deletions crates/ark/src/data_explorer/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ mod tests {
use stdext::assert_match;

use super::*;
use crate::fixtures::package_is_installed;
use crate::r_task;

fn default_options() -> FormatOptions {
Expand Down Expand Up @@ -606,4 +607,29 @@ mod tests {
);
})
}

#[test]
fn test_frequency_table_haven_labelled() {
r_task(|| {
if !package_is_installed("haven") {
return;
}

test_frequency_table(
"haven::labelled(c(rep(1, 100), rep(2, 200), rep(3, 150)), labels = c('A' = 1, 'B' = 2, 'C' = 3))",
10,
harp::parse_eval_global("c('B', 'C', 'A')").unwrap(),
vec![200, 150, 100],
None,
);
// Account for all factor levels, even if they don't appear in the data
test_frequency_table(
"haven::labelled(c(rep(1, 100), rep(2, 200)), labels = c('A' = 1, 'B' = 2, 'C' = 3))",
10,
harp::parse_eval_global("c('B', 'A', 'C')").unwrap(),
vec![200, 100, 0],
None,
);
})
}
}
6 changes: 6 additions & 0 deletions crates/ark/src/data_explorer/r_data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,10 @@ impl RDataExplorer {

DataExplorerBackendRequest::GetState => r_task(|| self.r_get_state()),

DataExplorerBackendRequest::OpenDataset(_) => {
return Err(anyhow!("Data Explorer: Not yet supported"));
},

DataExplorerBackendRequest::SearchSchema(_) => {
return Err(anyhow!("Data Explorer: Not yet supported"));
},
Expand Down Expand Up @@ -873,6 +877,8 @@ impl RDataExplorer {
fn r_get_state(&self) -> anyhow::Result<DataExplorerBackendReply> {
let state = BackendState {
display_name: self.title.clone(),
connected: Some(true),
error_message: None,
table_shape: TableShape {
num_rows: match self.filtered_indices {
Some(ref indices) => indices.len() as i64,
Expand Down
29 changes: 29 additions & 0 deletions crates/ark/src/data_explorer/summary_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::fixtures::package_is_installed;

fn default_options() -> FormatOptions {
FormatOptions {
Expand Down Expand Up @@ -327,4 +328,32 @@ mod tests {
assert_eq!(stats.date_stats, Some(expected));
})
}

#[test]
fn test_haven_labelled() {
crate::r_task(|| {
if !package_is_installed("haven") {
return;
}

let column =
harp::parse_eval_base("haven::labelled(c(1, 1, 2), c(Male = 1, Female = 2))")
.unwrap();

let column_factor =
harp::parse_eval_base("factor(c(1,1,2), labels = c('Male', 'Female'))").unwrap();

let stats =
summary_stats(column.sexp, ColumnDisplayType::String, &default_options()).unwrap();

let stats_factor = summary_stats(
column_factor.sexp,
ColumnDisplayType::String,
&default_options(),
)
.unwrap();

assert_eq!(stats, stats_factor);
})
}
}
7 changes: 7 additions & 0 deletions crates/ark/src/data_explorer/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ pub fn display_type(x: SEXP) -> ColumnDisplayType {
}

if r_is_object(x) {
// `haven_labelled` objects inherit from their internal data type
// such as integer or character. We special case them here before
// checking the internal types below.
if r_inherits(x, "haven_labelled") {
return ColumnDisplayType::String;
}

if r_inherits(x, "logical") {
return ColumnDisplayType::Boolean;
}
Expand Down
11 changes: 11 additions & 0 deletions crates/ark/src/fixtures/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use serde::Serialize;
use tree_sitter::Point;

use crate::modules;
use crate::modules::ARK_ENVS;

// Lock for tests that can't be run concurrently. Only needed for tests that can't
// be wrapped in an `r_task()`.
Expand Down Expand Up @@ -92,6 +93,16 @@ where
}
}

pub fn package_is_installed(package: &str) -> bool {
harp::parse_eval0(
format!(".ps.is_installed('{package}')").as_str(),
ARK_ENVS.positron_ns,
)
.unwrap()
.try_into()
.unwrap()
}

#[cfg(test)]
mod tests {
use tree_sitter::Point;
Expand Down
42 changes: 39 additions & 3 deletions crates/ark/src/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use crossbeam::channel::Receiver;
use crossbeam::channel::Sender;
use crossbeam::select;
use harp::command::r_command;
use harp::command::r_command_from_path;
use harp::environment::r_ns_env;
use harp::environment::Environment;
use harp::environment::R_ENVS;
Expand Down Expand Up @@ -359,23 +360,58 @@ impl RMain {
args.push(CString::new(arg).unwrap().into_raw());
}

// Get `R_HOME` from env var, typically set by Positron / CI / kernel specification
let r_home = match std::env::var("R_HOME") {
Ok(home) => PathBuf::from(home),
Ok(home) => {
// Get `R_HOME` from env var, typically set by Positron / CI / kernel specification
PathBuf::from(home)
},
Err(_) => {
// Get `R_HOME` from `PATH`, via `R`
let Ok(result) = r_command(|command| {
let Ok(result) = r_command_from_path(|command| {
command.arg("RHOME");
}) else {
panic!("Can't find R or `R_HOME`");
};

let r_home = String::from_utf8(result.stdout).unwrap();
let r_home = r_home.trim();

// Now set `R_HOME`. From now on, `r_command()` can be used to
// run exactly the same R as is running in Ark.
unsafe { std::env::set_var("R_HOME", r_home) };
PathBuf::from(r_home)
},
};

// `R_HOME` is now defined no matter what and will be used by
// `r_command()`. Let's discover the other important environment
// variables set by R's shell script frontend.
// https://github.com/posit-dev/positron/issues/3637
if let Ok(output) =
r_command(|command| {
// From https://github.com/rstudio/rstudio/blob/74696236/src/cpp/core/r_util/REnvironmentPosix.cpp#L506-L515
command.arg("--vanilla").arg("-s").arg("-e").arg(
r#"cat(paste(R.home('share'), R.home('include'), R.home('doc'), sep=';'))"#,
);
})
{
if let Ok(vars) = String::from_utf8(output.stdout) {
let vars: Vec<&str> = vars.trim().split(';').collect();
if vars.len() == 3 {
// Set the R env vars as the R shell script frontend would
unsafe {
std::env::set_var("R_SHARE_DIR", vars[0]);
std::env::set_var("R_INCLUDE_DIR", vars[1]);
std::env::set_var("R_DOC_DIR", vars[2]);
};
} else {
log::warn!("Unexpected output for R envvars");
}
} else {
log::warn!("Could not read stdout for R envvars");
};
};

let libraries = RLibraries::from_r_home_path(&r_home);
libraries.initialize_pre_setup_r();

Expand Down
Loading

0 comments on commit b12d1f4

Please sign in to comment.