Skip to content

Commit

Permalink
first version of embedded snippets check
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Mar 14, 2024
1 parent b39718d commit 4eacd4d
Show file tree
Hide file tree
Showing 102 changed files with 935 additions and 953 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ format:
# poetry run isort ./

test-and-lint-snippets:
poetry run mypy --config-file mypy.ini docs/website docs/examples
poetry run flake8 --max-line-length=200 docs/website docs/examples
poetry run mypy --config-file mypy.ini docs/website docs/examples docs/tools
poetry run flake8 --max-line-length=200 docs/website docs/examples docs/tools
cd docs/website/docs && poetry run pytest --ignore=node_modules

lint-security:
Expand Down
18 changes: 0 additions & 18 deletions docs/snippets/performance_chunking.py

This file was deleted.

23 changes: 0 additions & 23 deletions docs/snippets/performance_chunking_2.py

This file was deleted.

33 changes: 0 additions & 33 deletions docs/snippets/performance_parallel_awaitables.py

This file was deleted.

52 changes: 0 additions & 52 deletions docs/snippets/performance_parallel_extract_callables.py

This file was deleted.

File renamed without changes.
112 changes: 112 additions & 0 deletions docs/tools/check_embedded_snippets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
Walks through all markdown files, finds all code snippets, and checks wether they are parseable.
"""
import os
from typing import TypedDict, List
import ast
from textwrap import dedent
import tomlkit
import json
import yaml

DOCS_DIR = "../website/docs"

SNIPPET_MARKER = "```"


class Snippet(TypedDict):
language: str
code: str
file: str
line: int


if __name__ == "__main__":
# discover all markdown files to be processed
markdown_files = []
for path, directories, files in os.walk(DOCS_DIR):
if "api_reference" in path:
continue
for file in files:
if file.endswith(".md"):
markdown_files.append(os.path.join(path, file))

# extract snippets from markdown files
snippets: List[Snippet] = []
for file in markdown_files:
print(f"Processing file {file}")

# go line by line and find all code blocks
with open(file, "r") as f:
current_snippet: Snippet = None
lint_count = 0
for line in f.readlines():
lint_count += 1
if line.strip().startswith(SNIPPET_MARKER):
if current_snippet:
# process snippet
snippets.append(current_snippet)
current_snippet["code"] = dedent(current_snippet["code"])
current_snippet = None
else:
# start new snippet
current_snippet = {
"language": line.strip().split(SNIPPET_MARKER)[1],
"code": "",
"file": file,
"line": lint_count,
}
elif current_snippet:
current_snippet["code"] += line
assert not current_snippet

# parse python snippets for now
count = {}
total = 0
failed_count = {}
for snippet in snippets:
language = snippet["language"] or "unknown"
code = snippet["code"]
total += 1
count[language] = count.get(language, 0) + 1
print(
"Processing snippet no",
total,
"at line",
snippet["line"],
"in file",
snippet["file"],
"with language",
language,
)

# parse snippet by type
try:
if language in ["python", "py"]:
ast.parse(code)
elif language in ["toml"]:
tomlkit.loads(code)
elif language in ["json"]:
json.loads(snippet["code"])
elif language in ["yaml"]:
yaml.safe_load(code)
# ignore text and shell scripts
elif language in ["text", "sh", "shell", "bash", "bat"]:
pass
elif language in ["sql"]:
pass
elif language in ["unknown"]:
pass
else:
assert False, (
"Unknown language. Please add a parser or exception for this language, or choose"
" the correct language for the snippet."
)
except Exception as e:
print(f"Failed to parse snippet: {e}")
failed_count[language] = failed_count.get(language, 0) + 1

assert len(snippets) > 100, "Found too few snippets. Something went wrong." # sanity check

print(count)
print(failed_count)
77 changes: 0 additions & 77 deletions docs/website/check_embedded_snippets.py

This file was deleted.

Loading

0 comments on commit 4eacd4d

Please sign in to comment.