From 181df0c36f497271bff52b71f4ac9377ceaaecfd Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 7 May 2024 09:26:47 +0200 Subject: [PATCH] Python: Also use tree-sitter 0.22.6 --- python/extractor/tsg-python/Cargo.Bazel.lock | 300 +++++++++++++----- python/extractor/tsg-python/Cargo.lock | 48 ++- python/extractor/tsg-python/Cargo.toml | 4 +- python/extractor/tsg-python/Makefile | 112 +++++++ python/extractor/tsg-python/Package.swift | 47 +++ python/extractor/tsg-python/binding.gyp | 30 ++ .../bindings/c/tree-sitter-tsg_python.h | 16 + .../bindings/c/tree-sitter-tsg_python.pc.in | 11 + .../tsg-python/bindings/go/binding.go | 13 + .../tsg-python/bindings/go/binding_test.go | 15 + .../extractor/tsg-python/bindings/go/go.mod | 5 + .../tsg-python/bindings/node/binding.cc | 20 ++ .../tsg-python/bindings/node/index.d.ts | 28 ++ .../tsg-python/bindings/node/index.js | 7 + .../python/tree_sitter_tsg_python/__init__.py | 5 + .../tree_sitter_tsg_python/__init__.pyi | 1 + .../python/tree_sitter_tsg_python/binding.c | 27 ++ .../python/tree_sitter_tsg_python/py.typed | 0 .../tsg-python/bindings/rust/build.rs | 22 ++ .../extractor/tsg-python/bindings/rust/lib.rs | 54 ++++ .../swift/TreeSitterTsgPython/tsg_python.h | 16 + python/extractor/tsg-python/grammar.js | 11 + python/extractor/tsg-python/package-lock.json | 6 + python/extractor/tsg-python/package.json | 53 ++++ python/extractor/tsg-python/pyproject.toml | 29 ++ .../extractor/tsg-python/rust-toolchain.toml | 2 +- python/extractor/tsg-python/setup.py | 60 ++++ python/extractor/tsg-python/src/grammar.json | 20 ++ .../extractor/tsg-python/src/node-types.json | 11 + python/extractor/tsg-python/src/parser.c | 178 +++++++++++ .../tsg-python/src/tree_sitter/alloc.h | 54 ++++ .../tsg-python/src/tree_sitter/array.h | 290 +++++++++++++++++ .../tsg-python/src/tree_sitter/parser.h | 265 ++++++++++++++++ python/extractor/tsg-python/tsp/Cargo.toml | 2 +- ruby/extractor/Cargo.lock | 2 +- ruby/extractor/cargo-bazel-lock.json | 4 +- 36 files changed, 1669 insertions(+), 99 deletions(-) create mode 100644 python/extractor/tsg-python/Makefile create mode 100644 python/extractor/tsg-python/Package.swift create mode 100644 python/extractor/tsg-python/binding.gyp create mode 100644 python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.h create mode 100644 python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.pc.in create mode 100644 python/extractor/tsg-python/bindings/go/binding.go create mode 100644 python/extractor/tsg-python/bindings/go/binding_test.go create mode 100644 python/extractor/tsg-python/bindings/go/go.mod create mode 100644 python/extractor/tsg-python/bindings/node/binding.cc create mode 100644 python/extractor/tsg-python/bindings/node/index.d.ts create mode 100644 python/extractor/tsg-python/bindings/node/index.js create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed create mode 100644 python/extractor/tsg-python/bindings/rust/build.rs create mode 100644 python/extractor/tsg-python/bindings/rust/lib.rs create mode 100644 python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h create mode 100644 python/extractor/tsg-python/grammar.js create mode 100644 python/extractor/tsg-python/package-lock.json create mode 100644 python/extractor/tsg-python/package.json create mode 100644 python/extractor/tsg-python/pyproject.toml create mode 100644 python/extractor/tsg-python/setup.py create mode 100644 python/extractor/tsg-python/src/grammar.json create mode 100644 python/extractor/tsg-python/src/node-types.json create mode 100644 python/extractor/tsg-python/src/parser.c create mode 100644 python/extractor/tsg-python/src/tree_sitter/alloc.h create mode 100644 python/extractor/tsg-python/src/tree_sitter/array.h create mode 100644 python/extractor/tsg-python/src/tree_sitter/parser.h diff --git a/python/extractor/tsg-python/Cargo.Bazel.lock b/python/extractor/tsg-python/Cargo.Bazel.lock index e3b5a249e4fc9..d62b9b45a19a8 100644 --- a/python/extractor/tsg-python/Cargo.Bazel.lock +++ b/python/extractor/tsg-python/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "35a1ce4b6c4f997c496c11d3a8fcfaadc5833dfd41bebb022941687d73dde159", + "checksum": "5cc291d6260540e9e0364edc5927f547083e68247e5a694266279544ea1cf31c", "crates": { "ahash 0.4.7": { "name": "ahash", @@ -37,14 +37,14 @@ ], "license_file": null }, - "aho-corasick 0.7.18": { + "aho-corasick 1.1.3": { "name": "aho-corasick", - "version": "0.7.18", + "version": "1.1.3", "package_url": "https://github.com/BurntSushi/aho-corasick", "repository": { "Http": { - "url": "https://static.crates.io/crates/aho-corasick/0.7.18/download", - "sha256": "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" + "url": "https://static.crates.io/crates/aho-corasick/1.1.3/download", + "sha256": "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" } }, "targets": [ @@ -65,7 +65,7 @@ ], "crate_features": { "common": [ - "default", + "perf-literal", "std" ], "selects": {} @@ -73,16 +73,16 @@ "deps": { "common": [ { - "id": "memchr 2.4.1", + "id": "memchr 2.7.2", "target": "memchr" } ], "selects": {} }, - "edition": "2018", - "version": "0.7.18" + "edition": "2021", + "version": "1.1.3" }, - "license": "Unlicense/MIT", + "license": "Unlicense OR MIT", "license_ids": [ "MIT", "Unlicense" @@ -301,14 +301,14 @@ ], "license_file": null }, - "cc 1.0.70": { + "cc 1.0.97": { "name": "cc", - "version": "1.0.70", - "package_url": "https://github.com/alexcrichton/cc-rs", + "version": "1.0.97", + "package_url": "https://github.com/rust-lang/cc-rs", "repository": { "Http": { - "url": "https://static.crates.io/crates/cc/1.0.70/download", - "sha256": "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" + "url": "https://static.crates.io/crates/cc/1.0.97/download", + "sha256": "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" } }, "targets": [ @@ -328,9 +328,9 @@ "**" ], "edition": "2018", - "version": "1.0.70" + "version": "1.0.97" }, - "license": "MIT/Apache-2.0", + "license": "MIT OR Apache-2.0", "license_ids": [ "Apache-2.0", "MIT" @@ -711,14 +711,14 @@ ], "license_file": null }, - "memchr 2.4.1": { + "memchr 2.7.2": { "name": "memchr", - "version": "2.4.1", + "version": "2.7.2", "package_url": "https://github.com/BurntSushi/memchr", "repository": { "Http": { - "url": "https://static.crates.io/crates/memchr/2.4.1/download", - "sha256": "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + "url": "https://static.crates.io/crates/memchr/2.7.2/download", + "sha256": "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" } }, "targets": [ @@ -730,15 +730,6 @@ "**/*.rs" ] } - }, - { - "BuildScript": { - "crate_name": "build_script_build", - "crate_root": "build.rs", - "srcs": [ - "**/*.rs" - ] - } } ], "library_target_name": "memchr", @@ -748,29 +739,15 @@ ], "crate_features": { "common": [ - "default", + "alloc", "std" ], "selects": {} }, - "deps": { - "common": [ - { - "id": "memchr 2.4.1", - "target": "build_script_build" - } - ], - "selects": {} - }, - "edition": "2018", - "version": "2.4.1" - }, - "build_script_attrs": { - "data_glob": [ - "**" - ] + "edition": "2021", + "version": "2.7.2" }, - "license": "Unlicense/MIT", + "license": "Unlicense OR MIT", "license_ids": [ "MIT", "Unlicense" @@ -899,14 +876,14 @@ ], "license_file": null }, - "regex 1.5.5": { + "regex 1.10.4": { "name": "regex", - "version": "1.5.5", + "version": "1.10.4", "package_url": "https://github.com/rust-lang/regex", "repository": { "Http": { - "url": "https://static.crates.io/crates/regex/1.5.5/download", - "sha256": "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" + "url": "https://static.crates.io/crates/regex/1.10.4/download", + "sha256": "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" } }, "targets": [ @@ -927,14 +904,14 @@ ], "crate_features": { "common": [ - "aho-corasick", "default", - "memchr", "perf", + "perf-backtrack", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", + "perf-onepass", "std", "unicode", "unicode-age", @@ -950,22 +927,106 @@ "deps": { "common": [ { - "id": "aho-corasick 0.7.18", + "id": "aho-corasick 1.1.3", "target": "aho_corasick" }, { - "id": "memchr 2.4.1", + "id": "memchr 2.7.2", "target": "memchr" }, { - "id": "regex-syntax 0.6.25", + "id": "regex-automata 0.4.6", + "target": "regex_automata" + }, + { + "id": "regex-syntax 0.8.3", "target": "regex_syntax" } ], "selects": {} }, - "edition": "2018", - "version": "1.5.5" + "edition": "2021", + "version": "1.10.4" + }, + "license": "MIT OR Apache-2.0", + "license_ids": [ + "Apache-2.0", + "MIT" + ], + "license_file": null + }, + "regex-automata 0.4.6": { + "name": "regex-automata", + "version": "0.4.6", + "package_url": "https://github.com/rust-lang/regex/tree/master/regex-automata", + "repository": { + "Http": { + "url": "https://static.crates.io/crates/regex-automata/0.4.6/download", + "sha256": "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" + } + }, + "targets": [ + { + "Library": { + "crate_name": "regex_automata", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "regex_automata", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "alloc", + "dfa-onepass", + "hybrid", + "meta", + "nfa-backtrack", + "nfa-pikevm", + "nfa-thompson", + "perf-inline", + "perf-literal", + "perf-literal-multisubstring", + "perf-literal-substring", + "std", + "syntax", + "unicode", + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", + "unicode-word-boundary" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "aho-corasick 1.1.3", + "target": "aho_corasick" + }, + { + "id": "memchr 2.7.2", + "target": "memchr" + }, + { + "id": "regex-syntax 0.8.3", + "target": "regex_syntax" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "0.4.6" }, "license": "MIT OR Apache-2.0", "license_ids": [ @@ -974,14 +1035,14 @@ ], "license_file": null }, - "regex-syntax 0.6.25": { + "regex-syntax 0.8.3": { "name": "regex-syntax", - "version": "0.6.25", - "package_url": "https://github.com/rust-lang/regex", + "version": "0.8.3", + "package_url": "https://github.com/rust-lang/regex/tree/master/regex-syntax", "repository": { "Http": { - "url": "https://static.crates.io/crates/regex-syntax/0.6.25/download", - "sha256": "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + "url": "https://static.crates.io/crates/regex-syntax/0.8.3/download", + "sha256": "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" } }, "targets": [ @@ -1003,6 +1064,7 @@ "crate_features": { "common": [ "default", + "std", "unicode", "unicode-age", "unicode-bool", @@ -1014,10 +1076,10 @@ ], "selects": {} }, - "edition": "2018", - "version": "0.6.25" + "edition": "2021", + "version": "0.8.3" }, - "license": "MIT/Apache-2.0", + "license": "MIT OR Apache-2.0", "license_ids": [ "Apache-2.0", "MIT" @@ -1601,7 +1663,7 @@ "deps": { "common": [ { - "id": "regex 1.5.5", + "id": "regex 1.10.4", "target": "regex" }, { @@ -1621,7 +1683,7 @@ "deps": { "common": [ { - "id": "cc 1.0.70", + "id": "cc 1.0.97", "target": "cc" } ], @@ -1634,6 +1696,78 @@ ], "license_file": null }, + "tree-sitter 0.22.6": { + "name": "tree-sitter", + "version": "0.22.6", + "package_url": "https://github.com/tree-sitter/tree-sitter", + "repository": { + "Http": { + "url": "https://static.crates.io/crates/tree-sitter/0.22.6/download", + "sha256": "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" + } + }, + "targets": [ + { + "Library": { + "crate_name": "tree_sitter", + "crate_root": "binding_rust/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + }, + { + "BuildScript": { + "crate_name": "build_script_build", + "crate_root": "binding_rust/build.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "tree_sitter", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "regex 1.10.4", + "target": "regex" + }, + { + "id": "tree-sitter 0.22.6", + "target": "build_script_build" + } + ], + "selects": {} + }, + "edition": "2021", + "version": "0.22.6" + }, + "build_script_attrs": { + "data_glob": [ + "**" + ], + "deps": { + "common": [ + { + "id": "cc 1.0.97", + "target": "cc" + } + ], + "selects": {} + }, + "links": "tree-sitter" + }, + "license": "MIT", + "license_ids": [ + "MIT" + ], + "license_file": null + }, "tree-sitter-graph 0.7.0": { "name": "tree-sitter-graph", "version": "0.7.0", @@ -1667,7 +1801,7 @@ "target": "log" }, { - "id": "regex 1.5.5", + "id": "regex 1.10.4", "target": "regex" }, { @@ -1729,7 +1863,7 @@ "target": "clap" }, { - "id": "regex 1.5.5", + "id": "regex 1.10.4", "target": "regex" }, { @@ -1745,7 +1879,7 @@ "target": "thiserror" }, { - "id": "tree-sitter 0.20.4", + "id": "tree-sitter 0.22.6", "target": "tree_sitter" }, { @@ -1755,7 +1889,7 @@ ], "selects": {} }, - "edition": "2018", + "edition": "2021", "version": "0.1.0" }, "license": null, @@ -1795,7 +1929,7 @@ "deps": { "common": [ { - "id": "tree-sitter 0.20.4", + "id": "tree-sitter 0.22.6", "target": "tree_sitter" }, { @@ -1815,11 +1949,20 @@ "deps": { "common": [ { - "id": "cc 1.0.70", + "id": "cc 1.0.97", "target": "cc" } ], "selects": {} + }, + "link_deps": { + "common": [ + { + "id": "tree-sitter 0.22.6", + "target": "tree_sitter" + } + ], + "selects": {} } }, "license": "MIT", @@ -1986,7 +2129,6 @@ "crate_features": { "common": [ "consoleapi", - "errhandlingapi", "minwinbase", "minwindef", "processenv", @@ -2333,13 +2475,13 @@ }, "direct_deps": [ "anyhow 1.0.44", - "cc 1.0.70", + "cc 1.0.97", "clap 2.33.3", - "regex 1.5.5", + "regex 1.10.4", "smallvec 1.6.1", "string-interner 0.12.2", "thiserror 1.0.29", - "tree-sitter 0.20.4", + "tree-sitter 0.22.6", "tree-sitter-graph 0.7.0" ], "direct_dev_deps": [] diff --git a/python/extractor/tsg-python/Cargo.lock b/python/extractor/tsg-python/Cargo.lock index c179dd3036f91..e2841f3875cbe 100644 --- a/python/extractor/tsg-python/Cargo.lock +++ b/python/extractor/tsg-python/Cargo.lock @@ -10,9 +10,9 @@ checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -51,9 +51,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "cc" -version = "1.0.70" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" +checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" [[package]] name = "cfg-if" @@ -117,9 +117,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.4.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "proc-macro2" @@ -141,9 +141,21 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.5" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -152,9 +164,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.25" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "ryu" @@ -251,6 +263,16 @@ dependencies = [ "regex", ] +[[package]] +name = "tree-sitter" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +dependencies = [ + "cc", + "regex", +] + [[package]] name = "tree-sitter-graph" version = "0.7.0" @@ -264,7 +286,7 @@ dependencies = [ "smallvec", "string-interner", "thiserror", - "tree-sitter", + "tree-sitter 0.20.4", ] [[package]] @@ -277,7 +299,7 @@ dependencies = [ "smallvec", "string-interner", "thiserror", - "tree-sitter", + "tree-sitter 0.22.6", "tree-sitter-graph", "tsp", ] @@ -287,7 +309,7 @@ name = "tsp" version = "0.19.0" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.22.6", ] [[package]] diff --git a/python/extractor/tsg-python/Cargo.toml b/python/extractor/tsg-python/Cargo.toml index 1266f94f2b204..919235d115d82 100644 --- a/python/extractor/tsg-python/Cargo.toml +++ b/python/extractor/tsg-python/Cargo.toml @@ -4,7 +4,7 @@ name = "tsg-python" version = "0.1.0" authors = ["Taus Brock-Nannestad "] -edition = "2018" +edition = "2021" # When changing/updating these, the `Cargo.Bazel.lock` file has to be regenerated. # Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./tools/bazel sync --only=py_deps` @@ -20,7 +20,7 @@ anyhow = "1.0" regex = "1" smallvec = { version="1.6", features=["union"] } thiserror = "1.0" -tree-sitter = "0.20.4" +tree-sitter = ">= 0.22.6" tree-sitter-graph = "0.7.0" tsp = {path = "tsp"} clap = "2.32" diff --git a/python/extractor/tsg-python/Makefile b/python/extractor/tsg-python/Makefile new file mode 100644 index 0000000000000..1876dea8e6017 --- /dev/null +++ b/python/extractor/tsg-python/Makefile @@ -0,0 +1,112 @@ +VERSION := 0.0.1 + +LANGUAGE_NAME := tree-sitter-tsg_python + +# repository +SRC_DIR := src + +PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null) + +ifeq ($(PARSER_URL),) + PARSER_URL := $(subst .git,,$(PARSER_REPO_URL)) +ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),) + PARSER_URL := $(subst :,/,$(PARSER_URL)) + PARSER_URL := $(subst git@,https://,$(PARSER_URL)) +endif +endif + +TS ?= tree-sitter + +# ABI versioning +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# source/object files +PARSER := $(SRC_DIR)/parser.c +EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c)) +OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS)) + +# flags +ARFLAGS ?= rcs +override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC + +# OS-specific bits +ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED := $(LINKSHARED)-dynamiclib -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS), + endif + LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED := $(LINKSHARED)-shared -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS) + endif + LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR) +endif +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc + +lib$(LANGUAGE_NAME).a: $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ +ifneq ($(STRIP),) + $(STRIP) $@ +endif + +$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in + sed -e 's|@URL@|$(PARSER_URL)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@REQUIRES@|$(REQUIRES)|' \ + -e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +$(PARSER): $(SRC_DIR)/grammar.json + $(TS) generate --no-bindings $^ + +install: all + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' + install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h + install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a + install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) + +uninstall: + $(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ + '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ + '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + +clean: + $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) + +test: + $(TS) test + +.PHONY: all install uninstall clean test diff --git a/python/extractor/tsg-python/Package.swift b/python/extractor/tsg-python/Package.swift new file mode 100644 index 0000000000000..ca68797873fd4 --- /dev/null +++ b/python/extractor/tsg-python/Package.swift @@ -0,0 +1,47 @@ +// swift-tools-version:5.3 +import PackageDescription + +let package = Package( + name: "TreeSitterTsgPython", + products: [ + .library(name: "TreeSitterTsgPython", targets: ["TreeSitterTsgPython"]), + ], + dependencies: [], + targets: [ + .target(name: "TreeSitterTsgPython", + path: ".", + exclude: [ + "Cargo.toml", + "Makefile", + "binding.gyp", + "bindings/c", + "bindings/go", + "bindings/node", + "bindings/python", + "bindings/rust", + "prebuilds", + "grammar.js", + "package.json", + "package-lock.json", + "pyproject.toml", + "setup.py", + "test", + "examples", + ".editorconfig", + ".github", + ".gitignore", + ".gitattributes", + ".gitmodules", + ], + sources: [ + "src/parser.c", + // NOTE: if your language has an external scanner, add it here. + ], + resources: [ + .copy("queries") + ], + publicHeadersPath: "bindings/swift", + cSettings: [.headerSearchPath("src")]) + ], + cLanguageStandard: .c11 +) diff --git a/python/extractor/tsg-python/binding.gyp b/python/extractor/tsg-python/binding.gyp new file mode 100644 index 0000000000000..f6d0120ec5d48 --- /dev/null +++ b/python/extractor/tsg-python/binding.gyp @@ -0,0 +1,30 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_tsg_python_binding", + "dependencies": [ + " + +typedef struct TSLanguage TSLanguage; + +extern "C" TSLanguage *tree_sitter_tsg_python(); + +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { + 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports["name"] = Napi::String::New(env, "tsg_python"); + auto language = Napi::External::New(env, tree_sitter_tsg_python()); + language.TypeTag(&LANGUAGE_TYPE_TAG); + exports["language"] = language; + return exports; +} + +NODE_API_MODULE(tree_sitter_tsg_python_binding, Init) diff --git a/python/extractor/tsg-python/bindings/node/index.d.ts b/python/extractor/tsg-python/bindings/node/index.d.ts new file mode 100644 index 0000000000000..efe259eed03f0 --- /dev/null +++ b/python/extractor/tsg-python/bindings/node/index.d.ts @@ -0,0 +1,28 @@ +type BaseNode = { + type: string; + named: boolean; +}; + +type ChildNode = { + multiple: boolean; + required: boolean; + types: BaseNode[]; +}; + +type NodeInfo = + | (BaseNode & { + subtypes: BaseNode[]; + }) + | (BaseNode & { + fields: { [name: string]: ChildNode }; + children: ChildNode[]; + }); + +type Language = { + name: string; + language: unknown; + nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/python/extractor/tsg-python/bindings/node/index.js b/python/extractor/tsg-python/bindings/node/index.js new file mode 100644 index 0000000000000..6657bcf42decc --- /dev/null +++ b/python/extractor/tsg-python/bindings/node/index.js @@ -0,0 +1,7 @@ +const root = require("path").join(__dirname, "..", ".."); + +module.exports = require("node-gyp-build")(root); + +try { + module.exports.nodeTypeInfo = require("../../src/node-types.json"); +} catch (_) {} diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py new file mode 100644 index 0000000000000..67aac2959f9c8 --- /dev/null +++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py @@ -0,0 +1,5 @@ +"TsgPython grammar for tree-sitter" + +from ._binding import language + +__all__ = ["language"] diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi new file mode 100644 index 0000000000000..5416666fc300c --- /dev/null +++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi @@ -0,0 +1 @@ +def language() -> int: ... diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c new file mode 100644 index 0000000000000..bffec9d92fbe6 --- /dev/null +++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c @@ -0,0 +1,27 @@ +#include + +typedef struct TSLanguage TSLanguage; + +TSLanguage *tree_sitter_tsg_python(void); + +static PyObject* _binding_language(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr(tree_sitter_tsg_python()); +} + +static PyMethodDef methods[] = { + {"language", _binding_language, METH_NOARGS, + "Get the tree-sitter language for this grammar."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_binding", + .m_doc = NULL, + .m_size = -1, + .m_methods = methods +}; + +PyMODINIT_FUNC PyInit__binding(void) { + return PyModule_Create(&module); +} diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/python/extractor/tsg-python/bindings/rust/build.rs b/python/extractor/tsg-python/bindings/rust/build.rs new file mode 100644 index 0000000000000..cf8024ee7cbf5 --- /dev/null +++ b/python/extractor/tsg-python/bindings/rust/build.rs @@ -0,0 +1,22 @@ +fn main() { + let src_dir = std::path::Path::new("src"); + + let mut c_config = cc::Build::new(); + c_config.std("c11").include(src_dir); + + #[cfg(target_env = "msvc")] + c_config.flag("-utf-8"); + + let parser_path = src_dir.join("parser.c"); + c_config.file(&parser_path); + println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); + + // NOTE: if your language uses an external scanner, uncomment this block: + /* + let scanner_path = src_dir.join("scanner.c"); + c_config.file(&scanner_path); + println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + */ + + c_config.compile("tree-sitter-tsg_python"); +} diff --git a/python/extractor/tsg-python/bindings/rust/lib.rs b/python/extractor/tsg-python/bindings/rust/lib.rs new file mode 100644 index 0000000000000..4ac467a69ea27 --- /dev/null +++ b/python/extractor/tsg-python/bindings/rust/lib.rs @@ -0,0 +1,54 @@ +//! This crate provides TsgPython language support for the [tree-sitter][] parsing library. +//! +//! Typically, you will use the [language][language func] function to add this language to a +//! tree-sitter [Parser][], and then use the parser to parse some code: +//! +//! ``` +//! let code = r#" +//! "#; +//! let mut parser = tree_sitter::Parser::new(); +//! parser.set_language(&tree_sitter_tsg_python::language()).expect("Error loading TsgPython grammar"); +//! let tree = parser.parse(code, None).unwrap(); +//! assert!(!tree.root_node().has_error()); +//! ``` +//! +//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +//! [language func]: fn.language.html +//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html +//! [tree-sitter]: https://tree-sitter.github.io/ + +use tree_sitter::Language; + +extern "C" { + fn tree_sitter_tsg_python() -> Language; +} + +/// Get the tree-sitter [Language][] for this grammar. +/// +/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html +pub fn language() -> Language { + unsafe { tree_sitter_tsg_python() } +} + +/// The content of the [`node-types.json`][] file for this grammar. +/// +/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types +pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); + +// Uncomment these to include any queries that this grammar contains + +// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); +// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm"); +// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm"); +// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); + +#[cfg(test)] +mod tests { + #[test] + fn test_can_load_grammar() { + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&super::language()) + .expect("Error loading TsgPython grammar"); + } +} diff --git a/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h b/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h new file mode 100644 index 0000000000000..74dc05bd3b310 --- /dev/null +++ b/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h @@ -0,0 +1,16 @@ +#ifndef TREE_SITTER_TSG_PYTHON_H_ +#define TREE_SITTER_TSG_PYTHON_H_ + +typedef struct TSLanguage TSLanguage; + +#ifdef __cplusplus +extern "C" { +#endif + +const TSLanguage *tree_sitter_tsg_python(void); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_TSG_PYTHON_H_ diff --git a/python/extractor/tsg-python/grammar.js b/python/extractor/tsg-python/grammar.js new file mode 100644 index 0000000000000..87da9c2310ace --- /dev/null +++ b/python/extractor/tsg-python/grammar.js @@ -0,0 +1,11 @@ +/// +// @ts-check + +module.exports = grammar({ + name: "tsg_python", + + rules: { + // TODO: add the actual grammar rules + source_file: $ => "hello" + } +}); diff --git a/python/extractor/tsg-python/package-lock.json b/python/extractor/tsg-python/package-lock.json new file mode 100644 index 0000000000000..31eed2f7bb12a --- /dev/null +++ b/python/extractor/tsg-python/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "tsg-python", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/python/extractor/tsg-python/package.json b/python/extractor/tsg-python/package.json new file mode 100644 index 0000000000000..390c22ac097bf --- /dev/null +++ b/python/extractor/tsg-python/package.json @@ -0,0 +1,53 @@ +{ + "name": "tree-sitter-tsg-python", + "version": "0.0.1", + "description": "TsgPython grammar for tree-sitter", + "repository": "github:tree-sitter/tree-sitter-tsg-python", + "license": "MIT", + "main": "bindings/node", + "types": "bindings/node", + "keywords": [ + "incremental", + "parsing", + "tree-sitter", + "tsg_python" + ], + "files": [ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**" + ], + "dependencies": { + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" + }, + "devDependencies": { + "prebuildify": "^6.0.0", + "tree-sitter-cli": "^0.22.6" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + }, + "scripts": { + "install": "node-gyp-build", + "prebuildify": "prebuildify --napi --strip", + "build": "tree-sitter generate --no-bindings", + "build-wasm": "tree-sitter build --wasm", + "test": "tree-sitter test", + "parse": "tree-sitter parse" + }, + "tree-sitter": [ + { + "scope": "source.tsg_python", + "injection-regex": "^tsg_python$" + } + ] +} diff --git a/python/extractor/tsg-python/pyproject.toml b/python/extractor/tsg-python/pyproject.toml new file mode 100644 index 0000000000000..12ccf8442f9ec --- /dev/null +++ b/python/extractor/tsg-python/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "tree-sitter-tsg-python" +description = "TsgPython grammar for tree-sitter" +version = "0.0.1" +keywords = ["incremental", "parsing", "tree-sitter", "tsg-python"] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Compilers", + "Topic :: Text Processing :: Linguistic", + "Typing :: Typed" +] +requires-python = ">=3.8" +license.text = "MIT" +readme = "README.md" + +[project.urls] +Homepage = "https://github.com/tree-sitter/tree-sitter-tsg-python" + +[project.optional-dependencies] +core = ["tree-sitter~=0.21"] + +[tool.cibuildwheel] +build = "cp38-*" +build-frontend = "build" diff --git a/python/extractor/tsg-python/rust-toolchain.toml b/python/extractor/tsg-python/rust-toolchain.toml index fe5c5df29ffcd..92361581a978a 100644 --- a/python/extractor/tsg-python/rust-toolchain.toml +++ b/python/extractor/tsg-python/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "rustfmt" ] diff --git a/python/extractor/tsg-python/setup.py b/python/extractor/tsg-python/setup.py new file mode 100644 index 0000000000000..9f395d46ab642 --- /dev/null +++ b/python/extractor/tsg-python/setup.py @@ -0,0 +1,60 @@ +from os.path import isdir, join +from platform import system + +from setuptools import Extension, find_packages, setup +from setuptools.command.build import build +from wheel.bdist_wheel import bdist_wheel + + +class Build(build): + def run(self): + if isdir("queries"): + dest = join(self.build_lib, "tree_sitter_tsg_python", "queries") + self.copy_tree("queries", dest) + super().run() + + +class BdistWheel(bdist_wheel): + def get_tag(self): + python, abi, platform = super().get_tag() + if python.startswith("cp"): + python, abi = "cp38", "abi3" + return python, abi, platform + + +setup( + packages=find_packages("bindings/python"), + package_dir={"": "bindings/python"}, + package_data={ + "tree_sitter_tsg_python": ["*.pyi", "py.typed"], + "tree_sitter_tsg_python.queries": ["*.scm"], + }, + ext_package="tree_sitter_tsg_python", + ext_modules=[ + Extension( + name="_binding", + sources=[ + "bindings/python/tree_sitter_tsg_python/binding.c", + "src/parser.c", + # NOTE: if your language uses an external scanner, add it here. + ], + extra_compile_args=[ + "-std=c11", + ] if system() != "Windows" else [ + "/std:c11", + "/utf-8", + ], + define_macros=[ + ("Py_LIMITED_API", "0x03080000"), + ("PY_SSIZE_T_CLEAN", None) + ], + include_dirs=["src"], + py_limited_api=True, + ) + ], + cmdclass={ + "build": Build, + "bdist_wheel": BdistWheel + }, + zip_safe=False +) diff --git a/python/extractor/tsg-python/src/grammar.json b/python/extractor/tsg-python/src/grammar.json new file mode 100644 index 0000000000000..b607032352a9d --- /dev/null +++ b/python/extractor/tsg-python/src/grammar.json @@ -0,0 +1,20 @@ +{ + "name": "tsg_python", + "rules": { + "source_file": { + "type": "STRING", + "value": "hello" + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + } + ], + "conflicts": [], + "precedences": [], + "externals": [], + "inline": [], + "supertypes": [] +} diff --git a/python/extractor/tsg-python/src/node-types.json b/python/extractor/tsg-python/src/node-types.json new file mode 100644 index 0000000000000..43a64428b9118 --- /dev/null +++ b/python/extractor/tsg-python/src/node-types.json @@ -0,0 +1,11 @@ +[ + { + "type": "source_file", + "named": true, + "fields": {} + }, + { + "type": "hello", + "named": false + } +] \ No newline at end of file diff --git a/python/extractor/tsg-python/src/parser.c b/python/extractor/tsg-python/src/parser.c new file mode 100644 index 0000000000000..19f3840dffda0 --- /dev/null +++ b/python/extractor/tsg-python/src/parser.c @@ -0,0 +1,178 @@ +#include "tree_sitter/parser.h" + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 14 +#define STATE_COUNT 4 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 3 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 2 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 0 +#define MAX_ALIAS_SEQUENCE_LENGTH 1 +#define PRODUCTION_ID_COUNT 1 + +enum ts_symbol_identifiers { + anon_sym_hello = 1, + sym_source_file = 2, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_hello] = "hello", + [sym_source_file] = "source_file", +}; + +static const TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_hello] = anon_sym_hello, + [sym_source_file] = sym_source_file, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_hello] = { + .visible = true, + .named = false, + }, + [sym_source_file] = { + .visible = true, + .named = true, + }, +}; + +static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static const uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + +static const TSStateId ts_primary_state_ids[STATE_COUNT] = { + [0] = 0, + [1] = 1, + [2] = 2, + [3] = 3, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(5); + if (lookahead == 'h') ADVANCE(1); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(0); + END_STATE(); + case 1: + if (lookahead == 'e') ADVANCE(3); + END_STATE(); + case 2: + if (lookahead == 'l') ADVANCE(4); + END_STATE(); + case 3: + if (lookahead == 'l') ADVANCE(2); + END_STATE(); + case 4: + if (lookahead == 'o') ADVANCE(6); + END_STATE(); + case 5: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 6: + ACCEPT_TOKEN(anon_sym_hello); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 0}, + [2] = {.lex_state = 0}, + [3] = {.lex_state = 0}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_hello] = ACTIONS(1), + }, + [1] = { + [sym_source_file] = STATE(3), + [anon_sym_hello] = ACTIONS(3), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 1, + ACTIONS(5), 1, + ts_builtin_sym_end, + [4] = 1, + ACTIONS(7), 1, + ts_builtin_sym_end, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 4, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), + [7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC const TSLanguage *tree_sitter_tsg_python(void) { + static const TSLanguage language = { + .version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = ts_lex_modes, + .lex_fn = ts_lex, + .primary_state_ids = ts_primary_state_ids, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/python/extractor/tsg-python/src/tree_sitter/alloc.h b/python/extractor/tsg-python/src/tree_sitter/alloc.h new file mode 100644 index 0000000000000..1f4466d75c40b --- /dev/null +++ b/python/extractor/tsg-python/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/python/extractor/tsg-python/src/tree_sitter/array.h b/python/extractor/tsg-python/src/tree_sitter/array.h new file mode 100644 index 0000000000000..15a3b233bbb87 --- /dev/null +++ b/python/extractor/tsg-python/src/tree_sitter/array.h @@ -0,0 +1,290 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/python/extractor/tsg-python/src/tree_sitter/parser.h b/python/extractor/tsg-python/src/tree_sitter/parser.h new file mode 100644 index 0000000000000..17f0e94bfcf4d --- /dev/null +++ b/python/extractor/tsg-python/src/tree_sitter/parser.h @@ -0,0 +1,265 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/python/extractor/tsg-python/tsp/Cargo.toml b/python/extractor/tsg-python/tsp/Cargo.toml index 995f39a3bd996..a4b738657dee4 100644 --- a/python/extractor/tsg-python/tsp/Cargo.toml +++ b/python/extractor/tsg-python/tsp/Cargo.toml @@ -25,7 +25,7 @@ include = [ path = "bindings/rust/lib.rs" [dependencies] -tree-sitter = ">= 0.20, < 0.21" +tree-sitter = ">= 0.22.6" [build-dependencies] cc = "1.0" diff --git a/ruby/extractor/Cargo.lock b/ruby/extractor/Cargo.lock index 5de84efb6b50a..eb212202f62fd 100644 --- a/ruby/extractor/Cargo.lock +++ b/ruby/extractor/Cargo.lock @@ -167,7 +167,7 @@ checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "codeql-extractor" version = "0.2.0" -source = "git+https://github.com/github/codeql.git?rev=d0688b0645e59e291531612d41b1af71afcae46d#d0688b0645e59e291531612d41b1af71afcae46d" +source = "git+https://github.com/github/codeql.git?rev=5be699becc8ba8ac20f3258fbaa734381e26a9d9#5be699becc8ba8ac20f3258fbaa734381e26a9d9" dependencies = [ "chrono", "encoding", diff --git a/ruby/extractor/cargo-bazel-lock.json b/ruby/extractor/cargo-bazel-lock.json index 14c93370e07f8..356a870837b33 100644 --- a/ruby/extractor/cargo-bazel-lock.json +++ b/ruby/extractor/cargo-bazel-lock.json @@ -1,5 +1,5 @@ { - "checksum": "23633ca3169d15ab61de79d8d6a5f5b1b0a2043388e73b2fcd9e631939fd304b", + "checksum": "1ad87a553fb556a523e9dd8001738caf00cdb3f566a9a0b99a4d12b7659bbfd3", "crates": { "adler 1.0.2": { "name": "adler", @@ -953,7 +953,7 @@ "Git": { "remote": "https://github.com/github/codeql.git", "commitish": { - "Rev": "d0688b0645e59e291531612d41b1af71afcae46d" + "Rev": "5be699becc8ba8ac20f3258fbaa734381e26a9d9" }, "strip_prefix": "shared/tree-sitter-extractor" }