mongodb · rayangler · Dec 16, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 11, 2024
diff --git a/snooty/diagnostics.py b/snooty/diagnostics.py
@@ -149,6 +149,24 @@ def __init__(
         self.name = name
 
 
+class UnexpectedNodeType(Diagnostic):
+    severity = Diagnostic.Level.error
+
+    def __init__(
+        self,
+        found_type: Optional[str],
+        expected_type: Optional[str],
+        start: Union[int, Tuple[int, int]],
+    ) -> None:
+        msg = f'Found unexpected node type "{found_type}".'
+
+        if expected_type:
+            suggestion = f'Expected: "{expected_type}".'
+            msg += " " + suggestion
+
+        super().__init__(msg, start)
+
+
 class UnnamedPage(Diagnostic):
     severity = Diagnostic.Level.error
 

diff --git a/snooty/main.py b/snooty/main.py
@@ -184,7 +184,7 @@ def handle_document(
         fully_qualified_pageid: str,
         document: Dict[str, Any],
     ) -> None:
-        if page_id.suffix != EXT_FOR_PAGE:
+        if page_id.suffix not in [EXT_FOR_PAGE, ".ast"]:
             return
         super().handle_document(
             build_identifiers, page_id, fully_qualified_pageid, document

diff --git a/snooty/n.py b/snooty/n.py
@@ -60,7 +60,7 @@
 class FileId(PurePosixPath):
     """An unambiguous file path relative to the local project's root."""
 
-    PAT_FILE_EXTENSIONS = re.compile(r"\.((txt)|(rst)|(yaml))$")
+    PAT_FILE_EXTENSIONS = re.compile(r"\.((txt)|(rst)|(yaml)|(ast))$")
 
     def collapse_dots(self) -> "FileId":
         result: List[str] = []

diff --git a/snooty/parser.py b/snooty/parser.py
@@ -77,6 +77,7 @@
     TodoInfo,
     UnexpectedDirectiveOrder,
     UnexpectedIndentation,
+    UnexpectedNodeType,
     UnknownOptionId,
     UnknownTabID,
     UnknownTabset,
@@ -1843,6 +1844,48 @@ def build(
             fileids = (self.config.get_fileid(path) for path in paths)
             self.parse_rst_files(fileids, max_workers)
 
+        # Handle custom AST from API reference docs
+        with util.PerformanceLogger.singleton().start("parse pre-existing AST"):
+            ast_pages = util.get_files(
+                self.config.source_path,
+                {".ast"},
+                self.config.root,
+                nested_projects_diagnostics,
+            )
+
+            for path in ast_pages:
+                fileid = self.config.get_fileid(path)
+                diagnostics: List[Diagnostic] = []
+
+                try:
+                    text, read_diagnostics = self.config.read(fileid)
+                    diagnostics.extend(read_diagnostics)
+                    ast_json = json.loads(text)
+                    is_valid_ast_root = (
+                        isinstance(ast_json, Dict)
+                        and ast_json.get("type") == n.Root.type
+                    )
+
+                    if not is_valid_ast_root:
+                        diagnostics.append(
+                            UnexpectedNodeType(ast_json.get("type"), "root", 0)
+                        )
+
+                    ast_root = (
+                        util.NodeDeserializer.deserialize(ast_json, n.Root, diagnostics)
+                        if is_valid_ast_root
+                        else None
+                    )
+                    new_page = Page.create(
+                        fileid,
+                        fileid.as_posix().replace(".ast", ".txt"),
+                        "",
+                        ast_root,
+                    )
+                    self._page_updated(new_page, diagnostics)
+                except Exception as e:
+                    logger.error(e)
+
         for nested_path, diagnostics in nested_projects_diagnostics.items():
             with self._backend_lock:
                 self.on_diagnostics(nested_path, diagnostics)

diff --git a/snooty/test_parser.py b/snooty/test_parser.py
@@ -30,6 +30,7 @@
     TabMustBeDirective,
     UnexpectedDirectiveOrder,
     UnexpectedIndentation,
+    UnexpectedNodeType,
     UnknownOptionId,
     UnknownTabID,
     UnknownTabset,
@@ -4488,3 +4489,101 @@ def test_video() -> None:
     page.finish(diagnostics)
     # Diagnostic due to invalid upload-date format
     assert [type(x) for x in diagnostics] == [DocUtilsParseError]
+
+
+def test_parse_ast() -> None:
+    with make_test(
+        {
+            Path(
+                "source/test.ast"
+            ): """
+{
+    "type": "root",
+    "children": [
+        {
+            "type": "section",
+            "children": [
+                {
+                    "type": "heading",
+                    "children": [
+                        {
+                            "type": "text",
+                            "value": "Interface GridFSBucket"
+                        }
+                    ],
+                    "id": "interface-gridfsbucket"
+                },
+                {
+                    "type": "paragraph",
+                    "children": [
+                        {
+                            "type": "reference",
+                            "children": [
+                                {
+                                    "type": "text",
+                                    "value": "@ThreadSafe"
+                                }
+                            ],
+                            "refuri": "http://mongodb.github.io/mongo-java-driver/5.2/apidocs/mongodb-driver-core/com/mongodb/annotations/ThreadSafe.html"
+                        }
+                    ]
+                },
+                {
+                    "type": "directive",
+                    "name": "important",
+                    "domain": "",
+                    "argument": [
+                        {
+                            "type": "text",
+                            "value": "Important Callout Heading"
+                        }
+                    ],
+                    "children": [
+                        {
+                            "type": "paragraph",
+                            "children": [
+                                {
+                                    "type": "text",
+                                    "value": "Important Callout Body Text"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+    ],
+    "fileid": "test.ast"
+}
+""",
+            Path(
+                "source/bad-types.ast"
+            ): """
+{
+    "type": "root",
+    "children": [
+        {
+            "type": "section",
+            "children": [
+                {
+                    "type": "beep",
+                    "children": [
+                        {
+                            "type": "text",
+                            "value": "Interface GridFSBucket"
+                        }
+                    ],
+                    "id": "interface-gridfsbucket"
+                }
+            ]
+        }
+    ],
+    "fileid": "bad-types.ast"
+}
+""",
+        }
+    ) as result:
+        diagnostics = result.diagnostics[FileId("test.ast")]
+        assert not diagnostics
+        bad_types_diagnostics = result.diagnostics[FileId("bad-types.ast")]
+        assert [type(d) for d in bad_types_diagnostics] == [UnexpectedNodeType]
diff --git a/snooty/util.py b/snooty/util.py
@@ -40,6 +40,7 @@
     Set,
     TextIO,
     Tuple,
+    Type,
     TypeVar,
     Union,
     cast,
@@ -48,7 +49,7 @@
 import requests
 import tomli
 
-from snooty.diagnostics import Diagnostic, NestedProject
+from snooty.diagnostics import Diagnostic, NestedProject, UnexpectedNodeType
 from snooty.n import FileId
 
 from . import n, tinydocutils
@@ -454,6 +455,98 @@ def cancel(self) -> None:
         self.__cancel.clear()
 
 
+class NodeDeserializer:
+    node_types: List[Type[n.Node]] = [
+        n.BlockSubstitutionReference,
+        n.Code,
+        n.Comment,
+        n.DefinitionList,
+        n.DefinitionListItem,
+        n.Directive,
+        n.DirectiveArgument,
+        n.Emphasis,
+        n.Field,
+        n.FieldList,
+        n.Footnote,
+        n.FootnoteReference,
+        n.Heading,
+        n.InlineTarget,
+        n.Label,
+        n.Line,
+        n.LineBlock,
+        n.ListNode,
+        n.ListNodeItem,
+        n.Literal,
+        n.NamedReference,
+        n.Paragraph,
+        n.Reference,
+        n.RefRole,
+        n.Role,
+        n.Root,
+        n.Section,
+        n.Strong,
+        n.SubstitutionDefinition,
+        n.SubstitutionReference,
+        n.Table,
+        n.Target,
+        n.TargetIdentifier,
+        n.Text,
+        n.TocTreeDirective,
+        n.Transition,
+    ]
+    node_classes: Dict[str, Type[n.Node]] = {
+        node_class.type: node_class for node_class in node_types
+    }
+
+    @classmethod
+    def deserialize(
+        cls,
+        node: n.SerializedNode,
+        node_type: Type[n._N],
+        diagnostics: List[Diagnostic],
+    ) -> n._N:
+        filtered_fields: Dict[str, Any] = {}
+
+        for field in dataclasses.fields(node_type):
+            # We don't need "span" to be present here since we need to hardcode it as the first argument of Node
+            if field.name == "span":
+                continue
+
+            node_value = node.get(field.name)
+            has_nested_children = field.name == "children" and issubclass(
+                node_type, n.Parent
+            )
+            has_nested_argument = field.name == "argument" and issubclass(
+                node_type, n.Directive
+            )
+            if isinstance(node_value, List) and (
+                has_nested_children or has_nested_argument
+            ):
+                deserialized_children: List[n.Node] = []
+
+                for child in node_value:
+                    if not isinstance(child, dict):
+                        continue
+
+                    child_type: str = child.get("type", "")
+                    child_node_type = cls.node_classes.get(child_type)
+                    if child_node_type:
+                        deserialized_children.append(
+                            cls.deserialize(child, child_node_type, diagnostics)
+                        )
+                    else:
+                        diagnostics.append(UnexpectedNodeType(child_type, None, 0))
+                        continue
+
+                filtered_fields[field.name] = deserialized_children
+            else:
+                # Ideally, we validate that the data types of the fields match the data types of the JSON node,
+                # but that requires a more verbose and time-consuming process. For now, we assume data types are correct.
+                filtered_fields[field.name] = node_value
+
+        return node_type((0,), **filtered_fields)
+
+
 def bundle(
     filename: PurePath, members: Iterable[Tuple[str, Union[str, bytes]]]
 ) -> bytes: