diff --git a/examples/logging_tree_sitter_example.py b/examples/logging_tree_sitter_example.py deleted file mode 100644 index 3fe1a08..0000000 --- a/examples/logging_tree_sitter_example.py +++ /dev/null @@ -1,38 +0,0 @@ -from logging import INFO, Logger, basicConfig, getLogger -from time import sleep - -from salve_dependency_hub import langauge_mappings, language_functions - -from salve import HIGHLIGHT_TREE_SITTER, IPC, Response - -basicConfig( - level=INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" -) -logger: Logger = getLogger("Main") - - -def main(): - context = IPC() - - context.update_file( - "test", - open(__file__, "r+").read(), - ) - - context.request( - HIGHLIGHT_TREE_SITTER, - file="test", - language="python", - text_range=(1, 30), - tree_sitter_language=language_functions["python"], - mapping=langauge_mappings["python"], - ) - - sleep(1) - output: Response | None = context.get_response(HIGHLIGHT_TREE_SITTER) - print(output) - context.kill_IPC() - - -if __name__ == "__main__": - main() diff --git a/examples/simple_mapping_example.py b/examples/simple_mapping_example.py deleted file mode 100644 index 0110f18..0000000 --- a/examples/simple_mapping_example.py +++ /dev/null @@ -1,56 +0,0 @@ -from time import sleep - -from salve_dependency_hub import language_functions -from tree_sitter import Language, Parser, Tree - -from salve import HIGHLIGHT, IPC, Token, make_unrefined_mapping - - -def main(): - context = IPC() - - code: str = open(__file__, "r+").read() - - context.update_file("test", code) - - # Run normally to get the normal Tokens as a cross reference - # NOTE: if you try running the tree sitter higlighter without a mapping it just returns the - # pygments tokens assuming you will create a mapping with it. wWe aren't here but its good - # to know - context.request(HIGHLIGHT, file="test", language="python") - - sleep(1) - - pygments_output: list[Token] = context.get_response(HIGHLIGHT)["result"] # type: ignore - - # Not a comprehensive list but it works for this example - avoid_types = [ - "class_definition", - "block", - "function_definition", - "if_statement", - "expression_statement", - "call", - "parameters", - "argument_list", - "module", - "type", - ] - - tree: Tree = Parser(Language(language_functions["python"]())).parse( - bytes(code, "utf8") - ) - - print( - make_unrefined_mapping( - tree, - pygments_output, - avoid_types, - ) - ) - - context.kill_IPC() - - -if __name__ == "__main__": - main() diff --git a/examples/simple_tree_sitter_highlights_example.py b/examples/simple_tree_sitter_highlights_example.py deleted file mode 100644 index 6532250..0000000 --- a/examples/simple_tree_sitter_highlights_example.py +++ /dev/null @@ -1,32 +0,0 @@ -from time import sleep - -from salve_dependency_hub import langauge_mappings, language_functions - -from salve import HIGHLIGHT_TREE_SITTER, IPC, Response - - -def main(): - context = IPC() - - context.update_file( - "test", - open(__file__, "r+").read(), - ) - - context.request( - HIGHLIGHT_TREE_SITTER, - file="test", - language="python", - text_range=(1, 30), - tree_sitter_language=language_functions["python"], - mapping=langauge_mappings["python"], - ) - - sleep(1) - output: Response | None = context.get_response(HIGHLIGHT_TREE_SITTER) - print(output) - context.kill_IPC() - - -if __name__ == "__main__": - main() diff --git a/requirements-dev.txt b/requirements-dev.txt index 4804a01..82c7520 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,11 +2,9 @@ pygments pyeditorconfig beartype -tree-sitter # Testing pytest -salve_dependency_hub # Formatting ruff diff --git a/requirements.txt b/requirements.txt index 701d25a..ce9eab7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ pygments pyeditorconfig beartype -tree-sitter diff --git a/salve/__init__.py b/salve/__init__.py index 5d7ccdd..fb7fde5 100644 --- a/salve/__init__.py +++ b/salve/__init__.py @@ -9,7 +9,6 @@ DEFINITION, EDITORCONFIG, HIGHLIGHT, - HIGHLIGHT_TREE_SITTER, REPLACEMENTS, Response, ) @@ -17,5 +16,4 @@ Token, generic_tokens, is_unicode_letter, - make_unrefined_mapping, ) diff --git a/salve/ipc.py b/salve/ipc.py index e9edf3e..b408a6b 100644 --- a/salve/ipc.py +++ b/salve/ipc.py @@ -3,8 +3,6 @@ from pathlib import Path from random import randint -from beartype.typing import Callable - from .misc import ( COMMAND, COMMANDS, @@ -119,8 +117,6 @@ def request( text_range: tuple[int, int] = (1, -1), file_path: Path | str = Path(__file__), definition_starters: list[tuple[str, str]] = [("", "before")], - tree_sitter_language: Callable[[], int] | Path | str | None = None, - mapping: dict[str, str] | None = None, ) -> None: """Sends the main_server a request of type command with given kwargs - external API""" self.logger.debug("Beginning request") @@ -147,8 +143,6 @@ def request( text_range=text_range, file_path=file_path, definition_starters=definition_starters, - tree_sitter_language=tree_sitter_language, - mapping=mapping, ) def cancel_request(self, command: str) -> None: diff --git a/salve/misc.py b/salve/misc.py index f06657e..87c8906 100644 --- a/salve/misc.py +++ b/salve/misc.py @@ -12,7 +12,6 @@ "highlight", "editorconfig", "definition", - "highlight-tree-sitter", ] COMMAND = str @@ -21,8 +20,6 @@ HIGHLIGHT: COMMAND = COMMANDS[2] EDITORCONFIG: COMMAND = COMMANDS[3] DEFINITION: COMMAND = COMMANDS[4] -HIGHLIGHT_TREE_SITTER: COMMAND = COMMANDS[5] - class Message(TypedDict): """Base class for messages in and out of the server""" @@ -38,18 +35,14 @@ class Request(Message): file: str expected_keywords: NotRequired[list[str]] # autocomplete, replacements current_word: NotRequired[str] # autocomplete, replacements, definition - language: NotRequired[str] # highlight, highlight-tree-sitter + language: NotRequired[str] # highlight text_range: NotRequired[ tuple[int, int] - ] # highlight, highlight-tree-sitter + ] # highlight file_path: NotRequired[Path | str] # editorconfig definition_starters: NotRequired[ list[tuple[str, str]] ] # definition (list of regexes) - tree_sitter_language: NotRequired[ - Callable | Path | str - ] # highlight-tree-sitter - mapping: NotRequired[dict[str, str]] # highlight-tree-sitter class Notification(Message): diff --git a/salve/server.py b/salve/server.py index 8adf3ad..f9a1d15 100644 --- a/salve/server.py +++ b/salve/server.py @@ -3,9 +3,7 @@ from pathlib import Path from time import sleep -from beartype.typing import Callable from pyeditorconfig import get_config -from tree_sitter import Language, Parser from .misc import ( COMMANDS, @@ -21,8 +19,6 @@ get_definition, get_highlights, get_replacements, - lang_from_so, - tree_sitter_highlight, ) @@ -123,9 +119,7 @@ def handle_request(self, request: Request) -> None: command: str = request["command"] id: int = self.newest_ids[command] file: str = request["file"] - result: ( - list[str | tuple[tuple[int, int], int, str]] | dict[str, str] - ) = [] + result: list[str | Token] | dict[str, str] = [] cancelled: bool = False match request["command"]: @@ -161,43 +155,6 @@ def handle_request(self, request: Request) -> None: request["definition_starters"], # type: ignore request["current_word"], # type: ignore ) - case "highlight-tree-sitter": - self.logger.info("Getting Tree Sitter highlights for request") - - self.logger.debug("Getting language function") - language_function: Callable[[], int] | Path | str = request[ - "tree_sitter_language" - ] # type: ignore - if isinstance(language_function, Path): - self.logger.info("Language function is pathlib.Path") - lang = lang_from_so( - str(language_function.absolute()), - request["language"], # type: ignore - ) - self.logger.debug("Language created") - elif isinstance(language_function, str): - self.logger.info("Language function is str") - lang = lang_from_so( - language_function, - request["language"], # type: ignore - ) # type: ignore - self.logger.debug("Language created") - elif callable(language_function): - self.logger.info("Language function is actual function") - lang = Language(language_function()) - self.logger.debug("Language created") - - self.logger.debug("Creating Parser") - parser = Parser(lang) - self.logger.debug("Getting highlights from parser") - result = tree_sitter_highlight( # type: ignore - self.logger, - self.files[file], - request["language"], # type: ignore - request["mapping"], # type: ignore - parser, - request["text_range"], # type: ignore - ) case _: self.logger.warning(f"Command {command} not recognized") diff --git a/salve/server_functions/__init__.py b/salve/server_functions/__init__.py index 03a1546..e2ca59c 100644 --- a/salve/server_functions/__init__.py +++ b/salve/server_functions/__init__.py @@ -4,9 +4,6 @@ Token, generic_tokens, get_highlights, - lang_from_so, - make_unrefined_mapping, - tree_sitter_highlight, ) from .misc import is_unicode_letter # noqa: F401 from .replacements import get_replacements # noqa: F401 diff --git a/salve/server_functions/highlight/__init__.py b/salve/server_functions/highlight/__init__.py index 59975dc..126f3b4 100644 --- a/salve/server_functions/highlight/__init__.py +++ b/salve/server_functions/highlight/__init__.py @@ -1,7 +1,2 @@ from .highlight import get_highlights # noqa: F401 from .tokens import Token, generic_tokens # noqa: F401 -from .tree_sitter_funcs import ( # noqa: F401 - lang_from_so, - make_unrefined_mapping, - tree_sitter_highlight, -) diff --git a/salve/server_functions/highlight/tree_sitter_funcs.py b/salve/server_functions/highlight/tree_sitter_funcs.py deleted file mode 100644 index 8f841b2..0000000 --- a/salve/server_functions/highlight/tree_sitter_funcs.py +++ /dev/null @@ -1,310 +0,0 @@ -from ctypes import c_void_p, cdll -from logging import Logger -from os import fspath - -from tree_sitter import Language, Node, Parser, Tree, TreeCursor - -from .highlight import get_highlights -from .links_and_hidden_chars import get_special_tokens -from .misc import normal_text_range -from .tokens import Token, merge_tokens, only_tokens_in_text_range - -trees_and_parsers: dict[str, tuple[Tree, Parser, str]] = {} - - -def lang_from_so(path: str, name: str) -> Language: - lib = cdll.LoadLibrary(fspath(path)) - language_function = getattr(lib, f"tree_sitter_{name}") - language_function.restype = c_void_p - language_ptr = language_function() - return Language(language_ptr) - - -def tree_sitter_highlight( - logger: Logger, - new_code: str, - language_str: str, - mapping: dict[str, str] | None = None, - language_parser: Parser | None = None, - text_range: tuple[int, int] = (1, -1), -) -> list[Token]: - tree: Tree - return_tokens: list[Token] - - if not mapping: - # Fallback on the custom implementation - custom_highlights: list[Token] = get_highlights( - new_code, language_str, text_range - ) - - if language_str not in trees_and_parsers and language_parser: - tree = language_parser.parse(bytes(new_code, "utf8")) - trees_and_parsers[language_str] = (tree, language_parser, new_code) - - return custom_highlights - - split_text, text_range = normal_text_range(new_code, text_range) - - if language_str not in trees_and_parsers: - if not language_parser: - # We will never get here, the IPC API will deal with these but we need to appease - # the static type checkers - return [] - - tree = language_parser.parse(bytes(new_code, "utf8")) - trees_and_parsers[language_str] = (tree, language_parser, new_code) - return_tokens = node_to_tokens(tree.root_node, mapping, logger) - return_tokens.extend( - get_special_tokens(new_code, split_text, text_range[0]) - ) - return_tokens = only_tokens_in_text_range(return_tokens, text_range) - return return_tokens - - tree, parser, old_code = trees_and_parsers[language_str] - new_tree = edit_tree(old_code, new_code, tree, parser) - trees_and_parsers[language_str] = (new_tree, parser, new_code) - - return_tokens = node_to_tokens(new_tree, mapping, logger) - return_tokens.extend( - get_special_tokens(new_code, split_text, text_range[0]) - ) - return_tokens = only_tokens_in_text_range(return_tokens, text_range) - return return_tokens - - -def node_to_tokens( - root_node: Node | Tree, mapping: dict[str, str], logger: Logger -) -> list[Token]: - cursor: TreeCursor = root_node.walk() - tokens: list[Token] = [] - visited_nodes: set = set() - - while True: - node: Node | None = cursor.node - if not node: - break - - # Avoid re-processing the same node - if node.id not in visited_nodes: - visited_nodes.add(node.id) - - if node.child_count == 0: - if node.type not in mapping: - logger.warning( - f'Node type "{node.type}" not mapped. Start point: {node.start_point}, end point: {node.end_point}' - ) - continue - - start_row, start_col = node.start_point - end_row, end_col = node.end_point - - if end_row == start_row: - token = ( - (node.start_point[0] + 1, node.start_point[1]), - node.end_point[1] - node.start_point[1], - mapping[node.type], - ) - tokens.append(token) - continue - - split_text = node.text.splitlines() # type: ignore - for i, line in enumerate(split_text): - if line.strip() == b"": - continue - - if i == 0: - token = ( - (node.start_point[0] + 1, node.start_point[1]), - len(line), - mapping[node.type], - ) - tokens.append(token) - continue - start_col = 0 - lstripped_len: int = len(line.lstrip()) - start_col: int = len(line) - lstripped_len - token = ( - (node.start_point[0] + 1 + i, start_col), - len( - line.strip() - ), # Account for whitespace after the token if any - mapping[node.type], - ) - tokens.append(token) - - # Another child! - if cursor.goto_first_child(): - continue - - # A sibling node! - if cursor.goto_next_sibling(): - continue - - # Go up to parent to look for siblings and possibly other children (this is a depth first search) - while cursor.goto_parent(): - if cursor.goto_next_sibling(): - break - else: - break - - return merge_tokens(tokens) - - -def edit_tree( - old_code: str, new_code: str, tree: Tree, parser: Parser -) -> Tree: - if old_code == new_code: - return tree - - old_code_lines = old_code.splitlines() - new_code_lines = new_code.splitlines() - - # Find the first differing line - def find_first_diff(old_lines, new_lines): - min_len = min(len(old_lines), len(new_lines)) - for i in range(min_len): - if old_lines[i] != new_lines[i]: - return i - return min_len - - # Find the last differing line - def find_last_diff(old_lines, new_lines): - min_len = min(len(old_lines), len(new_lines)) - for i in range(1, min_len + 1): - if old_lines[-i] != new_lines[-i]: - return len(old_lines) - i - return min_len - - # Get line diffs - first_diff = find_first_diff(old_code_lines, new_code_lines) - last_diff_old = find_last_diff(old_code_lines, new_code_lines) - last_diff_new = find_last_diff(new_code_lines, old_code_lines) - - # Calculate byte offsets - start_byte = sum(len(line) + 1 for line in old_code_lines[:first_diff]) - old_end_byte = sum( - len(line) + 1 for line in old_code_lines[: last_diff_old + 1] - ) - new_end_byte = sum( - len(line) + 1 for line in new_code_lines[: last_diff_new + 1] - ) - - # Edit the tree - tree.edit( - start_byte=start_byte, - old_end_byte=old_end_byte, - new_end_byte=new_end_byte, - start_point=(first_diff, 0), - old_end_point=( - last_diff_old, - len(old_code_lines[last_diff_old]) if old_code_lines else 0, - ), - new_end_point=( - last_diff_new, - len(new_code_lines[last_diff_new]) if new_code_lines else 0, - ), - ) - - # Reparse the tree from the start_byte - tree = parser.parse(bytes(new_code, "utf8"), tree) - return tree - - -# Given a test token from the mapping function it will try to match it with the -# closest token type found elsewhere in the pygments list -def token_type_of_test( - test_token: Token, - pygments_tokens: list[Token], - original_type: str, - logger: Logger | None = None, -) -> str: - if not pygments_tokens: - return "" - - for new_token in pygments_tokens: - # Check if the tokens are effectively the same - same_line: bool = test_token[0][0] == new_token[0][0] - same_col_and_length: bool = ( - test_token[0][1] == new_token[0][1] - and test_token[1] == new_token[1] - ) - if not same_line: - continue - if same_line and same_col_and_length: - return new_token[2] - - # Check if the token's range is covered by the new_token - old_token_end: int = test_token[0][1] + test_token[1] - new_token_end: int = new_token[0][1] + new_token[1] - - fully_contained: bool = ( - old_token_end <= new_token_end - and test_token[0][1] >= new_token[0][1] - ) - - # We assume there is no partial overlap - if fully_contained: - return new_token[2] - if logger: - logger.warning( - f'Node type "{original_type}" could not be mapped over token "{test_token}".' - ) - return "" - - -# NOTE: The auto-mapper is great for users who don't want to spend forever mapping stuff as it -# will give a mapping made from what context it is given and then the user can refine it further -def make_unrefined_mapping( - tree: Tree, - custom_highlights: list[Token], - avoid_list: list[str], - logger: Logger | None = None, -) -> dict[str, str]: - # We assume that the pygments special output has parsed this the - cursor: TreeCursor = tree.walk() - mapping: dict[str, str] = {} - visited_nodes: set = set() - - while True: - node: Node | None = cursor.node - if not node: - break - - # Avoid re-processing the same node - if node.id not in visited_nodes: - visited_nodes.add(node.id) - - if node.type in mapping or node.type in avoid_list: - continue - - temp_token: Token = ( - (node.start_point[0] + 1, node.start_point[1]), - node.end_point[1] - node.start_point[1], - "TEST", - ) - token_type = token_type_of_test( - temp_token, custom_highlights, node.type, logger - ) - if not token_type and token_type not in avoid_list and logger: - logger.warning( - f"CANNOT MAP: node.type: {node.type}, node temp_token: {temp_token}" - ) - - mapping[node.type] = token_type - - # Another child! - if cursor.goto_first_child(): - continue - - # A sibling node! - if cursor.goto_next_sibling(): - continue - - # Go up to parent to look for siblings and possibly other children (this is a depth first search) - while cursor.goto_parent(): - if cursor.goto_next_sibling(): - break - else: - break - - return mapping diff --git a/setup.py b/setup.py index cd74d9d..d83354a 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ url="https://github.com/salve-org/salve", long_description=long_description, long_description_content_type="text/markdown", - install_requires=["pygments", "pyeditorconfig", "beartype", "tree-sitter"], + install_requires=["pygments", "pyeditorconfig", "beartype"], python_requires=">=3.11", license="MIT license", classifiers=[ diff --git a/tests/languages-darwin.so b/tests/languages-darwin.so deleted file mode 100755 index e1b621f..0000000 Binary files a/tests/languages-darwin.so and /dev/null differ diff --git a/tests/languages-linux.so b/tests/languages-linux.so deleted file mode 100755 index b0372b4..0000000 Binary files a/tests/languages-linux.so and /dev/null differ diff --git a/tests/test_tree_sitter.py b/tests/test_tree_sitter.py deleted file mode 100644 index 6737360..0000000 --- a/tests/test_tree_sitter.py +++ /dev/null @@ -1,176 +0,0 @@ -from logging import Logger - -from beartype.typing import Callable -from salve_dependency_hub import language_functions -from tree_sitter import Language, Parser, Tree - -from salve import make_unrefined_mapping -from salve.server_functions.highlight.tree_sitter_funcs import ( - edit_tree, - node_to_tokens, - tree_sitter_highlight, -) - -# Create useful variables -original_code_snippet: str = """class foo: - def bar() -> None: - if baz: - qux() -""" -minimal_python_mapping: dict[str, str] = { - "class": "Keyword", - "identifier": "Name", - ":": "Punctuation", - "def": "Keyword", - "(": "Punctuation", - ")": "Punctuation", - "->": "Operator", - "none": "Keyword", - "if": "Keyword", - "string_start": "String", - "string_content": "String", - "string_end": "String", - "string": "String", -} -pygments_output = [ - ((1, 0), 5, "Keyword"), - ((1, 6), 3, "Name"), - ((1, 9), 1, "Punctuation"), - ((2, 4), 3, "Keyword"), - ((2, 8), 3, "Name"), - ((2, 11), 2, "Punctuation"), - ((2, 14), 2, "Operator"), - ((2, 17), 4, "Keyword"), - ((2, 21), 1, "Punctuation"), - ((3, 8), 2, "Keyword"), - ((3, 11), 3, "Name"), - ((3, 14), 1, "Punctuation"), - ((4, 12), 3, "Name"), - ((4, 15), 2, "Punctuation"), -] -avoid_types = [ - "class_definition", - "block", - "function_definition", - "if_statement", - "expression_statement", - "call", - "parameters", - "argument_list", - "module", - "type", -] -code_snippet_output = [ - ((1, 0), 5, "Keyword"), - ((1, 6), 3, "Name"), - ((1, 9), 1, "Punctuation"), - ((2, 4), 3, "Keyword"), - ((2, 8), 3, "Name"), - ((2, 11), 2, "Punctuation"), - ((2, 14), 2, "Operator"), - ((2, 17), 4, "Keyword"), - ((2, 21), 1, "Punctuation"), - ((3, 8), 2, "Keyword"), - ((3, 11), 3, "Name"), - ((3, 14), 1, "Punctuation"), - ((4, 12), 3, "Name"), - ((4, 15), 2, "Punctuation"), - ((5, 0), 5, "Name"), - ((5, 5), 1, "Punctuation"), - ((5, 6), 6, "String"), - ((5, 12), 1, "Punctuation"), -] -py_language: Callable[[], int] = language_functions["python"] -parser: Parser = Parser( - Language(py_language()) -) # Will be input along with code snippet - - -def test_tree_sitter_highlight(): - assert ( - tree_sitter_highlight( - Logger(""), - original_code_snippet, - "python", - minimal_python_mapping, - parser, - ) - == pygments_output - ) - - # Run a second time to ensure the tree updates properly - code_snippet = original_code_snippet + 'print("Boo!")' - assert ( - tree_sitter_highlight( - Logger(""), code_snippet, "python", minimal_python_mapping - ) - == code_snippet_output - ) - - -def test_make_mapping(): - code_snippet = original_code_snippet + 'print("Boo!")' - tree: Tree = parser.parse(bytes(code_snippet, "utf8")) - assert ( - make_unrefined_mapping( - tree, - code_snippet_output, - avoid_types, - ) - == minimal_python_mapping - ) - - -def test_edit_tree(): - tree = parser.parse(bytes(original_code_snippet, "utf8")) - - tree_sitter_output = node_to_tokens( - tree, minimal_python_mapping, Logger("") - ) - assert pygments_output == tree_sitter_output - - assert ( - edit_tree(original_code_snippet, original_code_snippet, tree, parser) - == tree - ) - - old_code = original_code_snippet - code_snippet = '"""' + original_code_snippet + '"""' - tree: Tree = edit_tree(old_code, code_snippet, tree, parser) - assert node_to_tokens(tree, minimal_python_mapping, Logger("")) == [ - ((1, 0), 13, "String"), - ((2, 4), 18, "String"), - ((3, 8), 7, "String"), - ((4, 12), 5, "String"), - ((5, 0), 3, "String"), - ] - - old_code = code_snippet - code_snippet = original_code_snippet + 'print("Boo!")' - tree: Tree = edit_tree(old_code, code_snippet, tree, parser) - output = node_to_tokens(tree, minimal_python_mapping, Logger("")) - - assert output == [ - ((1, 0), 5, "Keyword"), - ((1, 6), 3, "Name"), - ((1, 9), 1, "Punctuation"), - ((2, 4), 3, "Keyword"), - ((2, 8), 3, "Name"), - ((2, 11), 2, "Punctuation"), - ((2, 14), 2, "Operator"), - ((2, 17), 4, "Keyword"), - ((2, 21), 1, "Punctuation"), - ((3, 8), 2, "Keyword"), - ((3, 11), 3, "Name"), - ((3, 14), 1, "Punctuation"), - ((4, 12), 3, "Name"), - ((4, 15), 2, "Punctuation"), - ((5, 0), 5, "Name"), - ((5, 5), 1, "Punctuation"), - ((5, 6), 6, "String"), - ((5, 12), 1, "Punctuation"), - ] - - -if __name__ == "__main__": - test_edit_tree()