From a20305815e89fed853d36cd9ccb25260e4010a5e Mon Sep 17 00:00:00 2001 From: Benjamin Bannier Date: Thu, 16 May 2024 17:08:48 +0200 Subject: [PATCH] Use Python package for tree-sitter-zeek tree-sitter-zeek now provides a Python package we can directly depend on. With this we can avoid building and shipping the generated tree-sitter bindings. By using the "official upstream API" we can also simplify the way we parse input. --- .gitignore | 3 --- .pre-commit-config.yaml | 4 ++- pyproject.toml | 3 ++- tree-sitter-zeek | 1 - zeekscript/__init__.py | 3 +-- zeekscript/parser.py | 56 ----------------------------------------- zeekscript/script.py | 7 ++++-- 7 files changed, 11 insertions(+), 66 deletions(-) delete mode 160000 tree-sitter-zeek delete mode 100644 zeekscript/parser.py diff --git a/.gitignore b/.gitignore index d7acf78..6eb9e1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,5 @@ build dist *.egg-info -zeekscript/zeek-language.so -zeekscript/zeek-language.exp -zeekscript/zeek-language.lib __pycache__ .idea/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 417703a..3dc22f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,9 @@ repos: hooks: - id: pylint additional_dependencies: - - setuptools + - "setuptools" + - "tree-sitter>=0.21.3" + - "tree-sitter-zeek" - repo: https://github.com/psf/black rev: 23.10.1 diff --git a/pyproject.toml b/pyproject.toml index f7d5e6a..e8b232f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,10 +25,11 @@ keywords = [ "parsing", ] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "tree-sitter>=0.21.3", + "tree-sitter-zeek", ] [project.optional-dependencies] diff --git a/tree-sitter-zeek b/tree-sitter-zeek deleted file mode 160000 index b1fdab4..0000000 --- a/tree-sitter-zeek +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b1fdab493110b542a9deabecf911d8a148649fc2 diff --git a/zeekscript/__init__.py b/zeekscript/__init__.py index cbc80d9..a744ff2 100644 --- a/zeekscript/__init__.py +++ b/zeekscript/__init__.py @@ -1,11 +1,10 @@ """Wrapper around more low-level tests.""" __version__ = "1.2.9" -__all__ = ["cli", "error", "formatter", "node", "output", "parser", "script"] +__all__ = ["cli", "error", "formatter", "node", "output", "script"] from .cli import * from .error import * from .formatter import * from .node import * from .output import * -from .parser import * from .script import * diff --git a/zeekscript/parser.py b/zeekscript/parser.py deleted file mode 100644 index ae0d105..0000000 --- a/zeekscript/parser.py +++ /dev/null @@ -1,56 +0,0 @@ -"""This module contains parser tooling for the zeekscript package.""" -import os -import pathlib -import sys - -try: - # In order to use the tree-sitter parser we need to load the TS language .so - # the TS Python bindings compiled at package build time (via our setup.py - # tooling). We use the following helpers when available (starting with - # Python 3.9) to locate the it. With earlier Python versions we fall back to - # using local path navigation and hope for the best. - # https://importlib-resources.readthedocs.io/en/latest/using.html#file-system-or-zip-file - from importlib.resources import files, as_file -except ImportError: - - def files(_): - return pathlib.Path(os.path.dirname(os.path.realpath(__file__))) - - def as_file(source): - return source - - -try: - import tree_sitter -except ImportError: - print("This package requires the tree_sitter package.") - sys.exit(1) - - -class Parser: - """tree_sitter.Parser abstraction that takes care of loading the TS Zeek language.""" - - TS_PARSER = None # A tree_sitter.Parser singleton - - def __init__(self): - Parser.load_parser() - - def parse(self, text): - """Returns a tree_sitter.Tree for the given script text. - - This tree may have errors, as indicated via its root node's has_error - flag. - """ - return Parser.TS_PARSER.parse(text) - - @classmethod - def load_parser(cls): - if cls.TS_PARSER is None: - # Python voodoo to access the bindings library contained in this - # package regardless of how we're loading the package. Details: - # https://importlib-resources.readthedocs.io/en/latest/using.html#file-system-or-zip-file - source = files(__package__).joinpath("zeek-language.so") - with as_file(source) as lib: - zeek_lang = tree_sitter.Language(str(lib), "zeek") - cls.TS_PARSER = tree_sitter.Parser() - cls.TS_PARSER.set_language(zeek_lang) diff --git a/zeekscript/script.py b/zeekscript/script.py index 51d2c28..fb5bad4 100644 --- a/zeekscript/script.py +++ b/zeekscript/script.py @@ -5,11 +5,13 @@ import pathlib import sys +import tree_sitter +import tree_sitter_zeek + from .error import FileError, ParserError from .formatter import Formatter from .node import Node from .output import OutputStream -from .parser import Parser class Script: @@ -55,7 +57,8 @@ def parse(self): except OSError as err: raise FileError(str(err)) from err - self.ts_tree = Parser().parse(self.source) + parser = tree_sitter.Parser(tree_sitter.Language(tree_sitter_zeek.language())) + self.ts_tree = parser.parse(self.source) if self.ts_tree is None or self.ts_tree.root_node is None: # This is a hard parse error and we need to bail. Smaller errors get