From 48483fb07b0c0b671d3b208a647c427e2feebc54 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Wed, 11 Dec 2024 12:13:29 +0100 Subject: [PATCH] Share code for `parse` function between `comments.rs` and `tree_sitter.rs` --- topiary-core/src/atom_collection.rs | 3 +- topiary-core/src/comments.rs | 25 ++++------- topiary-core/src/{types.rs => common.rs} | 51 +++++++++++++++++++++- topiary-core/src/lib.rs | 6 +-- topiary-core/src/tree_sitter.rs | 54 +++--------------------- 5 files changed, 68 insertions(+), 71 deletions(-) rename topiary-core/src/{types.rs => common.rs} (52%) diff --git a/topiary-core/src/atom_collection.rs b/topiary-core/src/atom_collection.rs index 574cd05d..0a1eb13e 100644 --- a/topiary-core/src/atom_collection.rs +++ b/topiary-core/src/atom_collection.rs @@ -8,7 +8,8 @@ use std::{ use topiary_tree_sitter_facade::Node; use crate::{ - tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, + tree_sitter::NodeExt, + Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, }; /// A struct that holds sets of node IDs that have line breaks before or after them. diff --git a/topiary-core/src/comments.rs b/topiary-core/src/comments.rs index 97a7615e..4b31ad60 100644 --- a/topiary-core/src/comments.rs +++ b/topiary-core/src/comments.rs @@ -1,8 +1,8 @@ -use topiary_tree_sitter_facade::{InputEdit, Language, Node, Parser, Tree}; +use topiary_tree_sitter_facade::{InputEdit, Language, Node, Tree}; use crate::{ + common::{parse, Diff, InputSection, Position}, error::FormatterError, - types::{Diff, InputSection, Position}, FormatterResult, }; @@ -222,19 +222,6 @@ fn previous_non_comment_leaf<'tree>(starting_node: Node<'tree>) -> Option FormatterResult { - let mut parser = Parser::new()?; - parser.set_language(grammar)?; - let tree = parser - .parse(content, Some(&old_tree))? - .ok_or_else(|| FormatterError::Internal("Could not parse input".into(), None))?; - Ok(tree) -} - // Use the following heuristics to find a comment's anchor: // If the comment is only prefixed by blank symbols on its line, then the anchor is the // next non-comment sibling node. @@ -298,6 +285,7 @@ pub fn extract_comments<'a>( tree: &'a Tree, input: &'a str, grammar: &Language, + tolerate_parsing_errors: bool, ) -> FormatterResult { let mut anchors: Vec<(Node, AnchoredComment)> = Vec::new(); let mut anchored_comments: Vec = Vec::new(); @@ -351,7 +339,12 @@ pub fn extract_comments<'a>( for edit in edits { new_tree.edit(&edit); } - new_tree = reparse(new_tree, new_input.as_str(), grammar)?; + new_tree = parse( + new_input.as_str(), + grammar, + tolerate_parsing_errors, + Some(&new_tree), + )?; Ok(SeparatedInput { input_tree: new_tree, input_string: new_input, diff --git a/topiary-core/src/types.rs b/topiary-core/src/common.rs similarity index 52% rename from topiary-core/src/types.rs rename to topiary-core/src/common.rs index 00978fbd..61d35692 100644 --- a/topiary-core/src/types.rs +++ b/topiary-core/src/common.rs @@ -1,9 +1,11 @@ use std::{cmp::Ord, fmt::Display}; use serde::Serialize; -use topiary_tree_sitter_facade::{Node, Point}; +use topiary_tree_sitter_facade::{Node, Parser, Point, Tree}; -/// A module for common, low-level types in the topiary-core crate +use crate::{error::FormatterError, FormatterResult}; + +/// A module for common, low-level types and functions in the topiary-core crate /// Refers to a position within the code. Used for error reporting, and for /// comparing input with formatted output. The numbers are 1-based, because that @@ -55,3 +57,48 @@ pub trait Diff { fn subtract(self: &mut Self, other: T) -> Result<(), Self::ErrorType>; } + +/// Parses some string into a syntax tree, given a tree-sitter grammar. +pub fn parse( + content: &str, + grammar: &topiary_tree_sitter_facade::Language, + tolerate_parsing_errors: bool, + old_tree: Option<&Tree>, +) -> FormatterResult { + let mut parser = Parser::new()?; + parser.set_language(grammar).map_err(|_| { + FormatterError::Internal("Could not apply Tree-sitter grammar".into(), None) + })?; + + let tree = parser + .parse(content, old_tree)? + .ok_or_else(|| FormatterError::Internal("Could not parse input".into(), None))?; + + // Fail parsing if we don't get a complete syntax tree. + if !tolerate_parsing_errors { + check_for_error_nodes(&tree.root_node())?; + } + + Ok(tree) +} + +fn check_for_error_nodes(node: &Node) -> FormatterResult<()> { + if node.kind() == "ERROR" { + let start = node.start_position(); + let end = node.end_position(); + + // Report 1-based lines and columns. + return Err(FormatterError::Parsing { + start_line: start.row() + 1, + start_column: start.column() + 1, + end_line: end.row() + 1, + end_column: end.column() + 1, + }); + } + + for child in node.children(&mut node.walk()) { + check_for_error_nodes(&child)?; + } + + Ok(()) +} diff --git a/topiary-core/src/lib.rs b/topiary-core/src/lib.rs index 9bac38a9..81c3eaa4 100644 --- a/topiary-core/src/lib.rs +++ b/topiary-core/src/lib.rs @@ -16,20 +16,20 @@ use itertools::Itertools; use pretty_assertions::StrComparison; pub use crate::{ + common::{parse, Position}, error::{FormatterError, IoError}, language::Language, tree_sitter::{apply_query, CoverageData, SyntaxNode, TopiaryQuery, Visualisation}, - types::Position, }; mod atom_collection; pub mod comments; +pub mod common; mod error; mod graphviz; mod language; mod pretty; pub mod tree_sitter; -pub mod types; #[doc(hidden)] pub mod test_utils; @@ -264,7 +264,7 @@ pub fn formatter( } Operation::Visualise { output_format } => { - let tree = tree_sitter::parse(&content, &language.grammar, false)?; + let tree = parse(&content, &language.grammar, false, None)?; let root: SyntaxNode = tree.root_node().into(); match output_format { diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index 917ddf3c..db1f73f0 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -7,7 +7,7 @@ use std::{collections::HashSet, fmt::Display}; use serde::Serialize; use topiary_tree_sitter_facade::{ - Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, QueryPredicate, Tree, + Node, Point, Query, QueryCapture, QueryCursor, QueryMatch, QueryPredicate, }; use streaming_iterator::StreamingIterator; @@ -15,8 +15,8 @@ use streaming_iterator::StreamingIterator; use crate::{ atom_collection::{AtomCollection, QueryPredicates}, comments::{extract_comments, AnchoredComment, SeparatedInput}, + common::{parse, Position}, error::FormatterError, - types::Position, FormatterResult, }; @@ -211,14 +211,14 @@ pub fn apply_query( grammar: &topiary_tree_sitter_facade::Language, tolerate_parsing_errors: bool, ) -> FormatterResult { - let tree = parse(input_content, grammar, tolerate_parsing_errors)?; + let tree = parse(input_content, grammar, tolerate_parsing_errors, None)?; // Remove comments in a separate stream before applying queries let SeparatedInput { input_string, input_tree, comments, - } = extract_comments(&tree, input_content, grammar)?; + } = extract_comments(&tree, input_content, grammar, tolerate_parsing_errors)?; let source = input_string.as_bytes(); let root = input_tree.root_node(); @@ -325,50 +325,6 @@ pub fn apply_query( Ok(atoms) } -/// Parses some string into a syntax tree, given a tree-sitter grammar. -pub fn parse( - content: &str, - grammar: &topiary_tree_sitter_facade::Language, - tolerate_parsing_errors: bool, -) -> FormatterResult { - let mut parser = Parser::new()?; - parser.set_language(grammar).map_err(|_| { - FormatterError::Internal("Could not apply Tree-sitter grammar".into(), None) - })?; - - let tree = parser - .parse(content, None)? - .ok_or_else(|| FormatterError::Internal("Could not parse input".into(), None))?; - - // Fail parsing if we don't get a complete syntax tree. - if !tolerate_parsing_errors { - check_for_error_nodes(&tree.root_node())?; - } - - Ok(tree) -} - -fn check_for_error_nodes(node: &Node) -> FormatterResult<()> { - if node.kind() == "ERROR" { - let start = node.start_position(); - let end = node.end_position(); - - // Report 1-based lines and columns. - return Err(FormatterError::Parsing { - start_line: start.row() + 1, - start_column: start.column() + 1, - end_line: end.row() + 1, - end_column: end.column() + 1, - }); - } - - for child in node.children(&mut node.walk()) { - check_for_error_nodes(&child)?; - } - - Ok(()) -} - /// Collects the IDs of all leaf nodes in a set of query matches. /// /// This function takes a slice of `LocalQueryMatch` and a slice of capture names, @@ -519,7 +475,7 @@ pub fn check_query_coverage( original_query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, ) -> FormatterResult { - let tree = parse(input_content, grammar, false)?; + let tree = parse(input_content, grammar, false, None)?; let root = tree.root_node(); let source = input_content.as_bytes(); let mut missing_patterns = Vec::new();