diff --git a/snooty/diagnostics.py b/snooty/diagnostics.py index c1d43652..8b3d7ed5 100644 --- a/snooty/diagnostics.py +++ b/snooty/diagnostics.py @@ -149,6 +149,24 @@ def __init__( self.name = name +class UnexpectedNodeType(Diagnostic): + severity = Diagnostic.Level.error + + def __init__( + self, + found_type: Optional[str], + expected_type: Optional[str], + start: Union[int, Tuple[int, int]], + ) -> None: + msg = f'Found unexpected node type "{found_type}".' + + if expected_type: + suggestion = f'Expected: "{expected_type}".' + msg += " " + suggestion + + super().__init__(msg, start) + + class UnnamedPage(Diagnostic): severity = Diagnostic.Level.error diff --git a/snooty/main.py b/snooty/main.py index 5a00431e..850b36ce 100644 --- a/snooty/main.py +++ b/snooty/main.py @@ -184,7 +184,7 @@ def handle_document( fully_qualified_pageid: str, document: Dict[str, Any], ) -> None: - if page_id.suffix != EXT_FOR_PAGE: + if page_id.suffix not in [EXT_FOR_PAGE, ".ast"]: return super().handle_document( build_identifiers, page_id, fully_qualified_pageid, document diff --git a/snooty/n.py b/snooty/n.py index 734fd355..904befa2 100644 --- a/snooty/n.py +++ b/snooty/n.py @@ -60,7 +60,7 @@ class FileId(PurePosixPath): """An unambiguous file path relative to the local project's root.""" - PAT_FILE_EXTENSIONS = re.compile(r"\.((txt)|(rst)|(yaml))$") + PAT_FILE_EXTENSIONS = re.compile(r"\.((txt)|(rst)|(yaml)|(ast))$") def collapse_dots(self) -> "FileId": result: List[str] = [] diff --git a/snooty/parser.py b/snooty/parser.py index 89d69476..aa165595 100644 --- a/snooty/parser.py +++ b/snooty/parser.py @@ -77,6 +77,7 @@ TodoInfo, UnexpectedDirectiveOrder, UnexpectedIndentation, + UnexpectedNodeType, UnknownOptionId, UnknownTabID, UnknownTabset, @@ -1843,6 +1844,48 @@ def build( fileids = (self.config.get_fileid(path) for path in paths) self.parse_rst_files(fileids, max_workers) + # Handle custom AST from API reference docs + with util.PerformanceLogger.singleton().start("parse pre-existing AST"): + ast_pages = util.get_files( + self.config.source_path, + {".ast"}, + self.config.root, + nested_projects_diagnostics, + ) + + for path in ast_pages: + fileid = self.config.get_fileid(path) + diagnostics: List[Diagnostic] = [] + + try: + text, read_diagnostics = self.config.read(fileid) + diagnostics.extend(read_diagnostics) + ast_json = json.loads(text) + is_valid_ast_root = ( + isinstance(ast_json, Dict) + and ast_json.get("type") == n.Root.type + ) + + if not is_valid_ast_root: + diagnostics.append( + UnexpectedNodeType(ast_json.get("type"), "root", 0) + ) + + ast_root = ( + util.NodeDeserializer.deserialize(ast_json, n.Root, diagnostics) + if is_valid_ast_root + else None + ) + new_page = Page.create( + fileid, + fileid.as_posix().replace(".ast", ".txt"), + "", + ast_root, + ) + self._page_updated(new_page, diagnostics) + except Exception as e: + logger.error(e) + for nested_path, diagnostics in nested_projects_diagnostics.items(): with self._backend_lock: self.on_diagnostics(nested_path, diagnostics) diff --git a/snooty/test_parser.py b/snooty/test_parser.py index 84e8e37a..83b11754 100644 --- a/snooty/test_parser.py +++ b/snooty/test_parser.py @@ -30,6 +30,7 @@ TabMustBeDirective, UnexpectedDirectiveOrder, UnexpectedIndentation, + UnexpectedNodeType, UnknownOptionId, UnknownTabID, UnknownTabset, @@ -4488,3 +4489,101 @@ def test_video() -> None: page.finish(diagnostics) # Diagnostic due to invalid upload-date format assert [type(x) for x in diagnostics] == [DocUtilsParseError] + + +def test_parse_ast() -> None: + with make_test( + { + Path( + "source/test.ast" + ): """ +{ + "type": "root", + "children": [ + { + "type": "section", + "children": [ + { + "type": "heading", + "children": [ + { + "type": "text", + "value": "Interface GridFSBucket" + } + ], + "id": "interface-gridfsbucket" + }, + { + "type": "paragraph", + "children": [ + { + "type": "reference", + "children": [ + { + "type": "text", + "value": "@ThreadSafe" + } + ], + "refuri": "http://mongodb.github.io/mongo-java-driver/5.2/apidocs/mongodb-driver-core/com/mongodb/annotations/ThreadSafe.html" + } + ] + }, + { + "type": "directive", + "name": "important", + "domain": "", + "argument": [ + { + "type": "text", + "value": "Important Callout Heading" + } + ], + "children": [ + { + "type": "paragraph", + "children": [ + { + "type": "text", + "value": "Important Callout Body Text" + } + ] + } + ] + } + ] + } + ], + "fileid": "test.ast" +} +""", + Path( + "source/bad-types.ast" + ): """ +{ + "type": "root", + "children": [ + { + "type": "section", + "children": [ + { + "type": "beep", + "children": [ + { + "type": "text", + "value": "Interface GridFSBucket" + } + ], + "id": "interface-gridfsbucket" + } + ] + } + ], + "fileid": "bad-types.ast" +} +""", + } + ) as result: + diagnostics = result.diagnostics[FileId("test.ast")] + assert not diagnostics + bad_types_diagnostics = result.diagnostics[FileId("bad-types.ast")] + assert [type(d) for d in bad_types_diagnostics] == [UnexpectedNodeType] diff --git a/snooty/util.py b/snooty/util.py index a55a1449..76c7f4e6 100644 --- a/snooty/util.py +++ b/snooty/util.py @@ -40,6 +40,7 @@ Set, TextIO, Tuple, + Type, TypeVar, Union, cast, @@ -48,7 +49,7 @@ import requests import tomli -from snooty.diagnostics import Diagnostic, NestedProject +from snooty.diagnostics import Diagnostic, NestedProject, UnexpectedNodeType from snooty.n import FileId from . import n, tinydocutils @@ -454,6 +455,98 @@ def cancel(self) -> None: self.__cancel.clear() +class NodeDeserializer: + node_types: List[Type[n.Node]] = [ + n.BlockSubstitutionReference, + n.Code, + n.Comment, + n.DefinitionList, + n.DefinitionListItem, + n.Directive, + n.DirectiveArgument, + n.Emphasis, + n.Field, + n.FieldList, + n.Footnote, + n.FootnoteReference, + n.Heading, + n.InlineTarget, + n.Label, + n.Line, + n.LineBlock, + n.ListNode, + n.ListNodeItem, + n.Literal, + n.NamedReference, + n.Paragraph, + n.Reference, + n.RefRole, + n.Role, + n.Root, + n.Section, + n.Strong, + n.SubstitutionDefinition, + n.SubstitutionReference, + n.Table, + n.Target, + n.TargetIdentifier, + n.Text, + n.TocTreeDirective, + n.Transition, + ] + node_classes: Dict[str, Type[n.Node]] = { + node_class.type: node_class for node_class in node_types + } + + @classmethod + def deserialize( + cls, + node: n.SerializedNode, + node_type: Type[n._N], + diagnostics: List[Diagnostic], + ) -> n._N: + filtered_fields: Dict[str, Any] = {} + + for field in dataclasses.fields(node_type): + # We don't need "span" to be present here since we need to hardcode it as the first argument of Node + if field.name == "span": + continue + + node_value = node.get(field.name) + has_nested_children = field.name == "children" and issubclass( + node_type, n.Parent + ) + has_nested_argument = field.name == "argument" and issubclass( + node_type, n.Directive + ) + if isinstance(node_value, List) and ( + has_nested_children or has_nested_argument + ): + deserialized_children: List[n.Node] = [] + + for child in node_value: + if not isinstance(child, dict): + continue + + child_type: str = child.get("type", "") + child_node_type = cls.node_classes.get(child_type) + if child_node_type: + deserialized_children.append( + cls.deserialize(child, child_node_type, diagnostics) + ) + else: + diagnostics.append(UnexpectedNodeType(child_type, None, 0)) + continue + + filtered_fields[field.name] = deserialized_children + else: + # Ideally, we validate that the data types of the fields match the data types of the JSON node, + # but that requires a more verbose and time-consuming process. For now, we assume data types are correct. + filtered_fields[field.name] = node_value + + return node_type((0,), **filtered_fields) + + def bundle( filename: PurePath, members: Iterable[Tuple[str, Union[str, bytes]]] ) -> bytes: