Skip to content

Commit

Permalink
DOCSP-8667: Add performance reporting system
Browse files Browse the repository at this point in the history
  • Loading branch information
i80and committed Jan 31, 2020
1 parent 0c53820 commit 0c91f74
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 17 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ node_modules/
htmlcov/
.venv/
dist/
test_data/docs/
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: help lint format test clean flit-publish package cut-release
.PHONY: help lint format test clean flit-publish package cut-release performance-report

SYSTEM_PYTHON=$(shell which python3)
PLATFORM=$(shell printf '%s_%s' "$$(uname -s | tr '[:upper:]' '[:lower:]')" "$$(uname -m)")
Expand Down Expand Up @@ -51,6 +51,7 @@ dist/${PACKAGE_NAME}.asc: dist/snooty-${VERSION}-${PLATFORM}.zip ## Build and si
clean: ## Remove all build artifacts
-rm -r snooty.tar.zip* snootycli.py .venv
-rm -rf dist
-rm -rf .docs

flit-publish: test ## Deploy the package to pypi
SOURCE_DATE_EPOCH="$$SOURCE_DATE_EPOCH" flit publish
Expand Down Expand Up @@ -83,3 +84,9 @@ cut-release: ## Release a new version of snooty. Must provide BUMP_TO_VERSION
@echo
@echo "Creating the release may now take several minutes. Check https://github.com/mongodb/snooty-parser/actions for status."
@echo "Release will be created at: https://github.com/mongodb/snooty-parser/releases/tag/v${BUMP_TO_VERSION}"

DOCS_COMMIT=1c6dfe71fd45fbdcdf5c7b73f050f615f4279064
performance-report: .venv/.EXISTS ## Fetch a sample corpus, and generate a timing report for each part of the parse
if [ ! -d .docs ]; then git clone https://github.com/mongodb/docs.git .docs; fi
cd .docs; if [ `git rev-parse HEAD` != "${DOCS_COMMIT}" ]; then git fetch && git reset --hard "${DOCS_COMMIT}"; fi
. .venv/bin/activate && python3 -m snooty.performance_report .docs
34 changes: 18 additions & 16 deletions snooty/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,20 +835,21 @@ def delete(self, path: PurePath) -> None:

self.backend.on_delete(self.get_fileid(path), self.build_identifiers)

def build(self) -> None:
def build(self, max_workers: Optional[int] = None) -> None:
all_yaml_diagnostics: Dict[PurePath, List[Diagnostic]] = {}
pool = multiprocessing.Pool()
try:
paths = util.get_files(self.config.source_path, RST_EXTENSIONS)
logger.debug("Processing rst files")
results = pool.imap_unordered(partial(parse_rst, self.parser), paths)
for page, diagnostics in results:
self._page_updated(page, diagnostics)
finally:
# We cannot use the multiprocessing.Pool context manager API due to the following:
# https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html#if-you-use-multiprocessing-pool
pool.close()
pool.join()
pool = multiprocessing.Pool(max_workers)
with util.PerformanceLogger.singleton().start("parse rst"):
try:
paths = util.get_files(self.config.source_path, RST_EXTENSIONS)
logger.debug("Processing rst files")
results = pool.imap_unordered(partial(parse_rst, self.parser), paths)
for page, diagnostics in results:
self._page_updated(page, diagnostics)
finally:
# We cannot use the multiprocessing.Pool context manager API due to the following:
# https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html#if-you-use-multiprocessing-pool
pool.close()
pool.join()

# Categorize our YAML files
logger.debug("Categorizing YAML files")
Expand Down Expand Up @@ -891,7 +892,8 @@ def create_page(filename: str) -> Tuple[Page, EmbeddedRstParser]:
page, all_yaml_diagnostics.get(page.source_path, [])
)

semantic_parse, semantic_diagnostics = self.semantic_parser.run(self.pages)
with util.PerformanceLogger.singleton().start("postprocessing"):
semantic_parse, semantic_diagnostics = self.semantic_parser.run(self.pages)

for fileid, page in self.semantic_parser.pages.items():
self.backend.on_update(self.prefix, self.build_identifiers, fileid, page)
Expand Down Expand Up @@ -1065,10 +1067,10 @@ def delete(self, path: PurePath) -> None:
with self._lock:
self._project.delete(path)

def build(self) -> None:
def build(self, max_workers: Optional[int] = None) -> None:
"""Build the full project."""
with self._lock:
self._project.build()
self._project.build(max_workers)

def stop_monitoring(self) -> None:
"""Stop the filesystem monitoring thread associated with this project."""
Expand Down
55 changes: 55 additions & 0 deletions snooty/performance_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import sys
import logging
from pathlib import Path
from typing import Dict, List
from .parser import Project
from .types import Diagnostic, Page, FileId, SerializableType, BuildIdentifierSet
from .util import PerformanceLogger

logging.basicConfig(level=logging.INFO)


class Backend:
def on_progress(self, progress: int, total: int, message: str) -> None:
pass

def on_diagnostics(self, path: FileId, diagnostics: List[Diagnostic]) -> None:
pass

def on_update(
self,
prefix: List[str],
build_identifiers: BuildIdentifierSet,
page_id: FileId,
page: Page,
) -> None:
pass

def on_update_metadata(
self,
prefix: List[str],
build_identifiers: BuildIdentifierSet,
field: Dict[str, SerializableType],
) -> None:
pass

def on_delete(self, page_id: FileId, build_identifiers: BuildIdentifierSet) -> None:
pass


def main() -> None:
backend = Backend()
root_path = Path(sys.argv[1])
project = Project(root_path, backend, {})

n_runs = 3
for i in range(n_runs):
print(f"run {i+1}/{n_runs}")
project.build(1)

for name, time in PerformanceLogger.singleton().times().items():
print(f"{name}:{time:10.4}")


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions snooty/util.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
import logging
import os
import time
import docutils.nodes
import docutils.parsers.rst.directives
import watchdog.events
import watchdog.observers
import watchdog.observers.api
from contextlib import contextmanager
from dataclasses import dataclass
from collections import defaultdict
from pathlib import Path, PurePath
from typing import (
cast,
Any,
Callable,
Container,
Counter,
List,
Dict,
Optional,
Tuple,
Expand Down Expand Up @@ -233,3 +237,29 @@ def split_domain(name: str) -> Tuple[str, str]:
return "", parts[0]

return parts[0], parts[1]


class PerformanceLogger:
_singleton: Optional["PerformanceLogger"] = None

def __init__(self) -> None:
self._times: Dict[str, List[float]] = defaultdict(list)

@contextmanager
def start(self, name: str) -> Iterator[None]:
start_time = time.perf_counter()
try:
yield None
finally:
self._times[name].append(time.perf_counter() - start_time)

def times(self) -> Dict[str, float]:
return {k: min(v) for k, v in self._times.items()}

@classmethod
def singleton(cls) -> "PerformanceLogger":
assert cls._singleton is not None
return cls._singleton


PerformanceLogger._singleton = PerformanceLogger()

0 comments on commit 0c91f74

Please sign in to comment.