From a7cbf3bbc4ddcb593dec9876133764f71d937ab4 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 22 Nov 2024 16:40:49 +0100 Subject: [PATCH 1/4] Handle multiple expressions on single line in statement range --- crates/ark/src/lsp/encoding.rs | 2 +- crates/ark/src/lsp/statement_range.rs | 121 ++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/lsp/encoding.rs b/crates/ark/src/lsp/encoding.rs index a47ba8e27..b3e798399 100644 --- a/crates/ark/src/lsp/encoding.rs +++ b/crates/ark/src/lsp/encoding.rs @@ -136,7 +136,7 @@ fn convert_character_from_utf16_to_utf8(x: &str, character: usize) -> usize { } /// Converts a character offset into a particular line from UTF-8 to UTF-16 -fn convert_character_from_utf8_to_utf16(x: &str, character: usize) -> usize { +pub(crate) fn convert_character_from_utf8_to_utf16(x: &str, character: usize) -> usize { if x.is_ascii() { // Fast pass return character; diff --git a/crates/ark/src/lsp/statement_range.rs b/crates/ark/src/lsp/statement_range.rs index ce54509b6..4733f787a 100644 --- a/crates/ark/src/lsp/statement_range.rs +++ b/crates/ark/src/lsp/statement_range.rs @@ -7,6 +7,7 @@ use anyhow::bail; use anyhow::Result; +use harp::parse_exprs; use once_cell::sync::Lazy; use regex::Regex; use ropey::Rope; @@ -19,9 +20,11 @@ use tower_lsp::lsp_types::VersionedTextDocumentIdentifier; use tree_sitter::Node; use tree_sitter::Point; +use super::encoding::convert_character_from_utf8_to_utf16; use crate::lsp::encoding::convert_point_to_position; use crate::lsp::traits::cursor::TreeCursorExt; use crate::lsp::traits::rope::RopeExt; +use crate::r_task; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; @@ -64,6 +67,12 @@ pub(crate) fn statement_range( return Ok(Some(new_statement_range_response(&node, contents, code))); } + // First check with the R parser whether line at point is complete. + // In that case, send the whole line as range. + if let Some(range) = find_complete_line_at_point(contents, point)? { + return Ok(Some(StatementRangeResponse { range, code: None })); + } + if let Some(node) = find_statement_range_node(&root, row) { return Ok(Some(new_statement_range_response(&node, contents, None))); }; @@ -89,6 +98,53 @@ fn new_statement_range_response( StatementRangeResponse { range, code } } +fn find_complete_line_at_point(contents: &Rope, point: Point) -> anyhow::Result> { + let mut row = point.row; + let line; + + loop { + let Some(current_line) = contents.get_line(row) else { + // The document was empty and we've looped over all lines. Let + // regular handler deal with this. + return Ok(None); + }; + + let current_line = current_line.to_string(); + + let Ok(n_exprs) = r_task::r_task(|| -> anyhow::Result { + let exprs = parse_exprs(¤t_line)?; + Ok(exprs.length()) + }) else { + // If incomplete or doesn't parse, we don't have a complete line + return Ok(None); + }; + + // Eat empty lines + if n_exprs == 0 { + row = row + 1; + continue; + } + + line = current_line; + break; + } + + let end_column = line.chars().count(); + let end_column = convert_character_from_utf8_to_utf16(&line, end_column) as u32; + + let range = Range { + start: Position { + line: row as u32, + character: 0, + }, + end: Position { + line: row as u32, + character: end_column, + }, + }; + Ok(Some(range)) +} + fn find_roxygen_comment_at_point<'tree>( root: &'tree Node, contents: &Rope, @@ -497,10 +553,13 @@ fn contains_row_at_different_start_position(node: Node, row: usize) -> Option Date: Mon, 25 Nov 2024 11:59:47 +0100 Subject: [PATCH 2/4] Add failing tests for multiline expressions --- crates/ark/src/lsp/statement_range.rs | 63 +++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/lsp/statement_range.rs b/crates/ark/src/lsp/statement_range.rs index 4733f787a..88fafeb29 100644 --- a/crates/ark/src/lsp/statement_range.rs +++ b/crates/ark/src/lsp/statement_range.rs @@ -562,6 +562,7 @@ mod tests { use crate::lsp::statement_range::find_complete_line_at_point; use crate::lsp::statement_range::find_roxygen_comment_at_point; use crate::lsp::statement_range::find_statement_range_node; + use crate::lsp::statement_range::statement_range; use crate::lsp::traits::rope::RopeExt; // Intended to ease statement range testing. Supply `x` as a string containing @@ -1467,9 +1468,6 @@ test_that('stuff', { #[test] fn test_multiple_expressions_on_one_line_nested_case() { - // https://github.com/posit-dev/positron/issues/4317 - - // Can't use `statement_range_test()` because it revolves around finding nodes not ranges let doc = Document::new( " list({ @@ -1494,6 +1492,65 @@ list({ assert_eq!(range, expected_range); } + #[test] + fn test_multiple_expressions_after_multiline_expression() { + // FIXME: Should select up to 2 + let doc = Document::new( + " +{ + 1 +}; 2 +", + None, + ); + let expected_range = lsp_types::Range { + start: lsp_types::Position { + line: 1, + character: 0, + }, + end: lsp_types::Position { + line: 3, + character: 1, // Should be 4 + }, + }; + + let point = Point::new(1, 0); + let range = statement_range(doc.ast.root_node(), &doc.contents, point, point.row) + .unwrap() + .unwrap() + .range; + assert_eq!(range, expected_range); + + // FIXME: Should select up to second braces + let doc = Document::new( + " +{ + 1 +}; { + 2 +} +", + None, + ); + let expected_range = lsp_types::Range { + start: lsp_types::Position { + line: 1, + character: 0, + }, + end: lsp_types::Position { + line: 3, // Should be 5 + character: 1, + }, + }; + + let point = Point::new(1, 0); + let range = statement_range(doc.ast.root_node(), &doc.contents, point, point.row) + .unwrap() + .unwrap() + .range; + assert_eq!(range, expected_range); + } + #[test] fn test_no_top_level_statement() { let row = 2; From 97c7fd4981a5b3a8203f144f6de85bc9b458fab8 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 25 Nov 2024 14:35:57 +0100 Subject: [PATCH 3/4] Mention in changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b50f7aae..a835f46e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ # 0.1.9000 +## 2024-12 + +- LSP: The statement range provider now has better support for expressions separated by `;` on a single line (posit-dev/positron#4317). + ## 2024-11 - LSP: Assignments in function calls (e.g. `list(x <- 1)`) are now detected by the missing symbol linter to avoid annoying false positive diagnostics (https://github.com/posit-dev/positron/issues/3048). The downside is that this causes false negatives when the assignment happens in a call with local scope, e.g. in `local()` or `test_that()`. We prefer to be overly permissive than overly cautious in these matters. From 921fef4a371dcbaa546e9fdb0d67c1030d7dddfd Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Mon, 9 Dec 2024 10:31:55 -0500 Subject: [PATCH 4/4] Use tree-sitter approach to expand across semicolons (#644) --- crates/ark/src/lsp/encoding.rs | 2 +- crates/ark/src/lsp/statement_range.rs | 303 ++++++++++++-------------- 2 files changed, 137 insertions(+), 168 deletions(-) diff --git a/crates/ark/src/lsp/encoding.rs b/crates/ark/src/lsp/encoding.rs index b3e798399..a47ba8e27 100644 --- a/crates/ark/src/lsp/encoding.rs +++ b/crates/ark/src/lsp/encoding.rs @@ -136,7 +136,7 @@ fn convert_character_from_utf16_to_utf8(x: &str, character: usize) -> usize { } /// Converts a character offset into a particular line from UTF-8 to UTF-16 -pub(crate) fn convert_character_from_utf8_to_utf16(x: &str, character: usize) -> usize { +fn convert_character_from_utf8_to_utf16(x: &str, character: usize) -> usize { if x.is_ascii() { // Fast pass return character; diff --git a/crates/ark/src/lsp/statement_range.rs b/crates/ark/src/lsp/statement_range.rs index 88fafeb29..4849ddecc 100644 --- a/crates/ark/src/lsp/statement_range.rs +++ b/crates/ark/src/lsp/statement_range.rs @@ -1,30 +1,27 @@ // // statement_range.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2024 Posit Software, PBC. All rights reserved. // // use anyhow::bail; use anyhow::Result; -use harp::parse_exprs; use once_cell::sync::Lazy; use regex::Regex; use ropey::Rope; use serde::Deserialize; use serde::Serialize; use stdext::unwrap; +use tower_lsp::lsp_types; use tower_lsp::lsp_types::Position; -use tower_lsp::lsp_types::Range; use tower_lsp::lsp_types::VersionedTextDocumentIdentifier; use tree_sitter::Node; use tree_sitter::Point; -use super::encoding::convert_character_from_utf8_to_utf16; use crate::lsp::encoding::convert_point_to_position; use crate::lsp::traits::cursor::TreeCursorExt; use crate::lsp::traits::rope::RopeExt; -use crate::r_task; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; @@ -43,7 +40,7 @@ pub struct StatementRangeParams { #[serde(rename_all = "camelCase")] pub struct StatementRangeResponse { /// The document range the statement covers. - pub range: Range, + pub range: lsp_types::Range, /// Optionally, code to be executed for this `range` if it differs from /// what is actually pointed to by the `range` (i.e. roxygen examples). pub code: Option, @@ -64,87 +61,36 @@ pub(crate) fn statement_range( // with `code` stripped of the leading `#' ` if we detect that we are in // `@examples`. if let Some((node, code)) = find_roxygen_comment_at_point(&root, contents, point) { - return Ok(Some(new_statement_range_response(&node, contents, code))); - } - - // First check with the R parser whether line at point is complete. - // In that case, send the whole line as range. - if let Some(range) = find_complete_line_at_point(contents, point)? { - return Ok(Some(StatementRangeResponse { range, code: None })); + let range = node.range(); + return Ok(Some(new_statement_range_response(range, contents, code))); } if let Some(node) = find_statement_range_node(&root, row) { - return Ok(Some(new_statement_range_response(&node, contents, None))); + let range = expand_range_across_semicolons(node); + return Ok(Some(new_statement_range_response(range, contents, None))); }; Ok(None) } fn new_statement_range_response( - node: &Node, + range: tree_sitter::Range, contents: &Rope, code: Option, ) -> StatementRangeResponse { // Tree-sitter `Point`s - let start = node.start_position(); - let end = node.end_position(); + let start = range.start_point; + let end = range.end_point; // To LSP `Position`s let start = convert_point_to_position(contents, start); let end = convert_point_to_position(contents, end); - let range = Range { start, end }; + let range = lsp_types::Range { start, end }; StatementRangeResponse { range, code } } -fn find_complete_line_at_point(contents: &Rope, point: Point) -> anyhow::Result> { - let mut row = point.row; - let line; - - loop { - let Some(current_line) = contents.get_line(row) else { - // The document was empty and we've looped over all lines. Let - // regular handler deal with this. - return Ok(None); - }; - - let current_line = current_line.to_string(); - - let Ok(n_exprs) = r_task::r_task(|| -> anyhow::Result { - let exprs = parse_exprs(¤t_line)?; - Ok(exprs.length()) - }) else { - // If incomplete or doesn't parse, we don't have a complete line - return Ok(None); - }; - - // Eat empty lines - if n_exprs == 0 { - row = row + 1; - continue; - } - - line = current_line; - break; - } - - let end_column = line.chars().count(); - let end_column = convert_character_from_utf8_to_utf16(&line, end_column) as u32; - - let range = Range { - start: Position { - line: row as u32, - character: 0, - }, - end: Position { - line: row as u32, - character: end_column, - }, - }; - Ok(Some(range)) -} - fn find_roxygen_comment_at_point<'tree>( root: &'tree Node, contents: &Rope, @@ -240,6 +186,49 @@ fn find_roxygen_comment_at_point<'tree>( return Some((node, code)); } +/// Assuming `node` is the first node on a line, `expand_across_semicolons()` +/// checks to see if there are any other non-comment nodes after `node` that +/// share its line number. If there are, that means the nodes are separated by +/// a `;`, and that we should expand the range to also include the node after +/// the `;`. +fn expand_range_across_semicolons(mut node: Node) -> tree_sitter::Range { + let start_byte = node.start_byte(); + let start_point = node.start_position(); + + let mut end_byte = node.end_byte(); + let mut end_point = node.end_position(); + + // We know `node` is at the start of a line, but it's possible the node + // ends with a `;` and needs to be extended + while let Some(next) = node.next_sibling() { + let next_start_point = next.start_position(); + + if end_point.row != next_start_point.row { + // Next sibling is on a different row, we are safe + break; + } + if next.is_comment() { + // Next sibling is a trailing comment, we are safe + break; + } + + // Next sibling is on the same line as us, must be separated + // by a semicolon. Extend end of range to include next sibling. + end_byte = next.end_byte(); + end_point = next.end_position(); + + // Update ending `node` and recheck (i.e. `1; 2; 3`) + node = next; + } + + tree_sitter::Range { + start_byte, + end_byte, + start_point, + end_point, + } +} + fn find_statement_range_node<'tree>(root: &'tree Node, row: usize) -> Option> { let mut cursor = root.walk(); @@ -553,16 +542,13 @@ fn contains_row_at_different_start_position(node: Node, row: usize) -> Option b) { } #[test] - fn test_if_statements_without_braces_can_run_individual_expressions() { + fn test_if_statements_without_braces_should_run_the_whole_if_statement() { statement_range_test( " < b) 1 + 1>>", ); + // TODO: This should run the whole if statement because there are no braces statement_range_test( " if (a > b) @@ -1115,7 +1103,7 @@ if (a > b) } #[test] - fn test_top_level_if_else_statements_without_braces_can_run_individual_expressions_1() { + fn test_top_level_if_else_statements_without_braces_should_run_the_whole_if_statement() { statement_range_test( " < b) @@ -1124,20 +1112,21 @@ if (a > b) ", ); + // TODO: This should run the whole if statement because there are no braces statement_range_test( " if (a > b) - <<@1 + 1>> else if (b > c) - 2 + 2 else 4 + 4 + <<@1 + 1 else if (b > c) + 2 + 2 else 4 + 4>> ", ); - // TODO: I'm not exactly sure what this should run, but this seems strange + // TODO: This should run the whole if statement because there are no braces statement_range_test( " if (a > b) - <<1 + 1>> else if @(b > c) - 2 + 2 else 4 + 4 + <<1 + 1 else if @(b > c) + 2 + 2 else 4 + 4>> ", ); @@ -1145,25 +1134,25 @@ if (a > b) " if (a > b) 1 + 1 else if (b > c) - <<2 + @2>> else 4 + 4 + <<2 + @2 else 4 + 4>> ", ); - // TODO: I'm not exactly sure what this should run, but this seems strange + // TODO: This should run the whole if statement because there are no braces statement_range_test( " if (a > b) 1 + 1 else if (b > c) - <<2 + 2>> else@ 4 + 4 + <<2 + 2 else@ 4 + 4>> ", ); - // TODO: I'm not exactly sure what this should run, but this seems strange + // TODO: This should run the whole if statement because there are no braces statement_range_test( " if (a > b) 1 + 1 else if (b > c) - <<2 + 2>> else 4 @+ 4 + <<2 + 2 else 4 @+ 4>> ", ); } @@ -1183,6 +1172,7 @@ if (a > b) ", ); + // TODO: This should run the whole if statement because there are no braces statement_range_test( " { @@ -1209,6 +1199,7 @@ if (a > b) ", ); + // TODO: This should run the whole if statement because there are no braces statement_range_test( " { @@ -1235,6 +1226,7 @@ if (a > b) ", ); + // TODO: This should run the whole if statement because there are no braces statement_range_test( " { @@ -1436,119 +1428,96 @@ test_that('stuff', { #[test] fn test_multiple_expressions_on_one_line() { // https://github.com/posit-dev/positron/issues/4317 - - // Can't use `statement_range_test()` because it revolves around finding nodes not ranges - let doc = Document::new( + statement_range_test( " - -1; 2; 3 +<<1@; 2; 3>> +", + ); + statement_range_test( + " +<<1; @2; 3>> +", + ); + statement_range_test( + " +<<1; 2; 3@>> ", - None, ); - let expected_range = Some(lsp_types::Range { - start: lsp_types::Position { - line: 2, - character: 0, - }, - end: lsp_types::Position { - line: 2, - character: 8, - }, - }); - - let point = Point::new(2, 7); - let range = find_complete_line_at_point(&doc.contents, point).unwrap(); - assert_eq!(range, expected_range); // Empty lines don't prevent finding complete lines - let point = Point::new(0, 0); - let range = find_complete_line_at_point(&doc.contents, point).unwrap(); - assert_eq!(range, expected_range); + statement_range_test( + " +@ + +<<1; 2; 3>> + ", + ); } #[test] fn test_multiple_expressions_on_one_line_nested_case() { - let doc = Document::new( + statement_range_test( " list({ - 1; 2; 3 + @<<1; 2; 3>> }) -", - None, + ", + ); + statement_range_test( + " +list({ + <<1; @2; 3>> +}) + ", ); - let expected_range = Some(lsp_types::Range { - start: lsp_types::Position { - line: 2, - character: 0, - }, - end: lsp_types::Position { - line: 2, - character: 10, - }, - }); - - let point = Point::new(2, 0); - let range = find_complete_line_at_point(&doc.contents, point).unwrap(); - assert_eq!(range, expected_range); } #[test] fn test_multiple_expressions_after_multiline_expression() { - // FIXME: Should select up to 2 - let doc = Document::new( + // Selects everything + statement_range_test( " -{ +@<<{ 1 -}; 2 -", - None, +}; 2>> + ", ); - let expected_range = lsp_types::Range { - start: lsp_types::Position { - line: 1, - character: 0, - }, - end: lsp_types::Position { - line: 3, - character: 1, // Should be 4 - }, - }; - let point = Point::new(1, 0); - let range = statement_range(doc.ast.root_node(), &doc.contents, point, point.row) - .unwrap() - .unwrap() - .range; - assert_eq!(range, expected_range); + // Select up through the second brace + statement_range_test( + " +@<<{ + 1 +}; { + 2 +}>> + ", + ); - // FIXME: Should select up to second braces - let doc = Document::new( + // Only selects `1` + statement_range_test( " { - 1 + @<<1>> }; { 2 } -", - None, + ", ); - let expected_range = lsp_types::Range { - start: lsp_types::Position { - line: 1, - character: 0, - }, - end: lsp_types::Position { - line: 3, // Should be 5 - character: 1, - }, - }; + } - let point = Point::new(1, 0); - let range = statement_range(doc.ast.root_node(), &doc.contents, point, point.row) - .unwrap() - .unwrap() - .range; - assert_eq!(range, expected_range); + #[test] + fn test_multiple_expressions_on_one_line_doesnt_select_trailing_comment() { + statement_range_test( + " +@<<1>> # trailing + ", + ); + statement_range_test( + " +@<<1; 2>> # trailing + ", + ); } #[test]