Skip to content

Commit

Permalink
added doccomment extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Oct 22, 2024
1 parent ab64964 commit a889eb0
Show file tree
Hide file tree
Showing 5 changed files with 309 additions and 1 deletion.
127 changes: 127 additions & 0 deletions .flox/env/manifest.lock
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
"gtest": {
"pkg-path": "gtest"
},
"libclang": {
"pkg-path": "python312Packages.libclang"
},
"ninja": {
"pkg-path": "ninja"
},
Expand Down Expand Up @@ -656,6 +659,130 @@
"group": "toplevel",
"priority": 5
},
{
"attr_path": "python312Packages.libclang",
"broken": false,
"derivation": "/nix/store/pwglbkmzfrnk0k4djhxbqfs9rjxnxcw8-python3.12-libclang-16.0.6.drv",
"description": "Python bindings for the C language family frontend for LLVM",
"install_id": "libclang",
"license": "NCSA",
"locked_url": "https://github.com/flox/nixpkgs?rev=5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"name": "python3.12-libclang-16.0.6",
"pname": "libclang",
"rev": "5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"rev_count": 690827,
"rev_date": "2024-10-09T16:51:18Z",
"scrape_date": "2024-10-11T03:53:01Z",
"stabilities": [
"staging",
"unstable"
],
"unfree": false,
"version": "python3.12-libclang-16.0.6",
"outputs_to_install": [
"out"
],
"outputs": {
"dist": "/nix/store/3ng7zqcak81svbc7jh3g3f62sk0c8zsn-python3.12-libclang-16.0.6-dist",
"out": "/nix/store/vjir447zpkxy5mj1gbd1s9nwxrgcdjrc-python3.12-libclang-16.0.6"
},
"system": "aarch64-darwin",
"group": "toplevel",
"priority": 5
},
{
"attr_path": "python312Packages.libclang",
"broken": false,
"derivation": "/nix/store/h3h20cq0k8n325l3hsbglr88clsjnvxk-python3.12-libclang-18.1.8.drv",
"description": "Python bindings for the C language family frontend for LLVM",
"install_id": "libclang",
"license": "NCSA",
"locked_url": "https://github.com/flox/nixpkgs?rev=5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"name": "python3.12-libclang-18.1.8",
"pname": "libclang",
"rev": "5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"rev_count": 690827,
"rev_date": "2024-10-09T16:51:18Z",
"scrape_date": "2024-10-11T03:53:01Z",
"stabilities": [
"staging",
"unstable"
],
"unfree": false,
"version": "python3.12-libclang-18.1.8",
"outputs_to_install": [
"out"
],
"outputs": {
"dist": "/nix/store/vs6nrrf6vxfmdlhkdnklwr2lx5p8n3b0-python3.12-libclang-18.1.8-dist",
"out": "/nix/store/1fiag40yp2p95jx51mjxcjzj7dl9zv7c-python3.12-libclang-18.1.8"
},
"system": "aarch64-linux",
"group": "toplevel",
"priority": 5
},
{
"attr_path": "python312Packages.libclang",
"broken": false,
"derivation": "/nix/store/szn0qbi1f1biv6wmmxwq1pbbnzc2lw7l-python3.12-libclang-16.0.6.drv",
"description": "Python bindings for the C language family frontend for LLVM",
"install_id": "libclang",
"license": "NCSA",
"locked_url": "https://github.com/flox/nixpkgs?rev=5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"name": "python3.12-libclang-16.0.6",
"pname": "libclang",
"rev": "5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"rev_count": 690827,
"rev_date": "2024-10-09T16:51:18Z",
"scrape_date": "2024-10-11T03:53:01Z",
"stabilities": [
"staging",
"unstable"
],
"unfree": false,
"version": "python3.12-libclang-16.0.6",
"outputs_to_install": [
"out"
],
"outputs": {
"dist": "/nix/store/p24gxfp4m62z6c3z99m74l8m3jgn5yli-python3.12-libclang-16.0.6-dist",
"out": "/nix/store/37flz7kk8f57vns2c26a3an5n49272f8-python3.12-libclang-16.0.6"
},
"system": "x86_64-darwin",
"group": "toplevel",
"priority": 5
},
{
"attr_path": "python312Packages.libclang",
"broken": false,
"derivation": "/nix/store/iv3blbkqhi0n1xbk1xj7xb929785df5d-python3.12-libclang-18.1.8.drv",
"description": "Python bindings for the C language family frontend for LLVM",
"install_id": "libclang",
"license": "NCSA",
"locked_url": "https://github.com/flox/nixpkgs?rev=5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"name": "python3.12-libclang-18.1.8",
"pname": "libclang",
"rev": "5633bcff0c6162b9e4b5f1264264611e950c8ec7",
"rev_count": 690827,
"rev_date": "2024-10-09T16:51:18Z",
"scrape_date": "2024-10-11T03:53:01Z",
"stabilities": [
"staging",
"unstable"
],
"unfree": false,
"version": "python3.12-libclang-18.1.8",
"outputs_to_install": [
"out"
],
"outputs": {
"dist": "/nix/store/0z18frhcy9mjj239bjr38vs57ryzlkps-python3.12-libclang-18.1.8-dist",
"out": "/nix/store/q7nb7xz48aybaw7is1ns3k1f1ab9iw6v-python3.12-libclang-18.1.8"
},
"system": "x86_64-linux",
"group": "toplevel",
"priority": 5
},
{
"attr_path": "ninja",
"broken": false,
Expand Down
1 change: 1 addition & 0 deletions .flox/env/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ compiler-rt.pkg-path = "llvmPackages.compiler-rt"
nlohmann_json.pkg-path = "nlohmann_json"
fmt.pkg-path = "fmt"
python312Full.pkg-path = "python312Full"
libclang.pkg-path = "python312Packages.libclang"
# hello.pkg-path = "hello"
# nodejs = { version = "^20.15.1", pkg-path = "nodejs" }

Expand Down
177 changes: 177 additions & 0 deletions cmake_modules/maud_apidoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
from clang.cindex import (
Index,
TranslationUnit,
SourceRange,
Cursor,
CursorKind,
TokenKind,
)

import json
import argparse

parser = argparse.ArgumentParser(
description="Scan a source file for documentation comments.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--source",
default="-",
type=argparse.FileType("r"),
help="source file to scan",
)
parser.add_argument(
"--output",
default="-",
type=argparse.FileType("w"),
help="destination file for json comments database",
)
parser.add_argument(
"--clang-args-file",
type=argparse.FileType("r"),
help="\\n-separated arguments, passed to libclang",
)
parser.add_argument(
"--doc-patterns",
nargs="+",
default=["/// ", "/**\n"],
metavar=("PATTERN", "PATTERNS"),
help="patterns used to recognize documentation comments",
)


# Anything more complicated than getting the decl and getting the docstring
# is out of scope.
# https://www.sphinx-doc.org/en/master/usage/domains/cpp.html
# - class (with namespace, base classes, template)
# - function (with namespace/membership, template, trailings)

# Read a source file
# Parse with cindex
# For each doccomment
# begins with a directive: goto directive comment
# line comments: concatenate consecutive comments, strip pattern
# block comments: strip pattern&comment delimiters&indent(up to first asterisk)
# extract the tokens of the next decl
# the decl may end early with ; or {
# also get the namespace from the decl
# For each directive comment
# not yet implemented
# Output as json


def can_be_documented(cursor_kind: CursorKind):
if cursor_kind == CursorKind.MACRO_DEFINITION:
return True

return cursor_kind.is_declaration() and cursor_kind not in {
CursorKind.NAMESPACE,
CursorKind.INVALID_FILE,
CursorKind.NAMESPACE_REF,
CursorKind.TEMPLATE_REF,
CursorKind.PREPROCESSING_DIRECTIVE,
CursorKind.MACRO_INSTANTIATION,
}


def get_ns(cursor):
path = []
parent = cursor.semantic_parent
while parent is not None and parent.spelling != tu.spelling:
path = [parent, *path]
parent = parent.semantic_parent
return path


def get_sphinx_decl_extent(cursor: Cursor):
start = None
end = None
tokens = cursor.get_tokens()
start = next(tokens).extent.start
for t in tokens:
if t.spelling in "{;":
# FIXME these could occur in an attribute
# or lambda expression
break
end = t.extent.end
return SourceRange.from_locations(start, end)


def comment_scan(tu: TranslationUnit, doc_patterns: list[str], contents: str) -> dict:
declarations = []
current_comment = None
current_comment_end = None
for t in tu.get_tokens(extent=tu.cursor.extent):
if t.kind == TokenKind.COMMENT:
comment = t.spelling
for pattern in doc_patterns:
if not comment.startswith(pattern):
continue
comment = comment.removeprefix(pattern)

if pattern.startswith("//"):
if current_comment is not None:
current_comment = f"{current_comment}\n{comment}"
else:
current_comment = comment
current_comment_end = t.extent.end
else:
raise NotImplementedError

if current_comment is None:
continue

if not can_be_documented(t.cursor.kind):
continue

if t.cursor.extent.start.offset < current_comment_end.offset:
continue

e = get_sphinx_decl_extent(t.cursor)
assert str(e.start.file) == str(e.end.file)
decl_str = (
contents.encode("utf-8")[e.start.offset : e.end.offset]
.decode("utf-8")
.replace("\n", " ")
)
declarations.append(
{
"declaration": decl_str,
"ns": "::".join([segment.spelling for segment in get_ns(t.cursor)]),
"location": {
"file": str(e.start.file),
"start": e.start.line,
"end": e.end.line,
},
"kind": str(t.cursor.kind).removeprefix("CursorKind."),
"comment": current_comment.split("\n"),
}
)
current_comment = None
current_comment_end = None

return {
"diagnostics": [str(d) for d in tu.diagnostics],
"declarations": declarations,
}


if __name__ == "__main__":
args = parser.parse_args()
index = Index.create()
source = args.source.read()
tu = index.parse(
args.source.name,
args=args.clang_args_file,
unsaved_files=[
(args.source.name, source),
],
options=(
TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
| TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
| TranslationUnit.PARSE_INCOMPLETE
),
)

json.dump(comment_scan(tu, args.doc_patterns, source), args.output, indent=2)
args.output.write("\n")
1 change: 1 addition & 0 deletions cmake_modules/sphinx_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ sphinxcontrib-qthelp==1.0.7
sphinxcontrib-serializinghtml==1.1.10
sphinxcontrib-websupport==1.2.7
urllib3==2.2.2
libclang
4 changes: 3 additions & 1 deletion zen.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ Zen

.. cpp:var:: int a = 42

.. cpp:function:: int f(int i)
.. cpp:function:: int [[doc::usage{ foo = 3; }]] f(int i = 3)

.. cpp:function:: template <typename... T> requires (sizeof...(T) > 0) int pack_size()

An expression: :cpp:expr:`a * f(a)`

Expand Down

0 comments on commit a889eb0

Please sign in to comment.