From 75401ca005ce34b14ad0a31d0af206d2ac129e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 12 Sep 2023 10:20:20 +0200 Subject: [PATCH] Add stat support (#159) --- docs/index.rst | 1 + docs/stats.rst | 8 ++++++++ scrapy_poet/page_input_providers.py | 30 +++++++++++++++++++++++++++++ setup.py | 2 +- tests/test_providers.py | 23 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 docs/stats.rst diff --git a/docs/index.rst b/docs/index.rst index 7e8563f3..f21cb4ca 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,6 +44,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`. :maxdepth: 1 rules-from-web-poet + stats providers testing diff --git a/docs/stats.rst b/docs/stats.rst new file mode 100644 index 00000000..b7c80225 --- /dev/null +++ b/docs/stats.rst @@ -0,0 +1,8 @@ +.. _stats: + +===== +Stats +===== + +scrapy-poet tracks :external+web-poet:ref:`web-poet stats ` as part of +:ref:`Scrapy stats `, prefixed with ``poet/stats/``. diff --git a/scrapy_poet/page_input_providers.py b/scrapy_poet/page_input_providers.py index e244ea7a..f0345f45 100644 --- a/scrapy_poet/page_input_providers.py +++ b/scrapy_poet/page_input_providers.py @@ -27,7 +27,9 @@ PageParams, RequestUrl, ResponseUrl, + Stats, ) +from web_poet.page_inputs.stats import StatCollector, StatNum from web_poet.pages import is_injectable from scrapy_poet.downloader import create_scrapy_downloader @@ -333,3 +335,31 @@ async def __call__( results.append(item) return results + + +class ScrapyPoetStatCollector(StatCollector): + def __init__(self, stats): + self._stats = stats + self._prefix = "poet/stats/" + + def set(self, key: str, value: Any) -> None: # noqa: D102 + self._stats.set_value(f"{self._prefix}{key}", value) + + def inc(self, key: str, value: StatNum = 1) -> None: # noqa: D102 + self._stats.inc_value(f"{self._prefix}{key}", value) + + +class StatsProvider(PageObjectInputProvider): + """This class provides :class:`web_poet.Stats + ` instances. + """ + + provided_classes = {Stats} + + def __call__(self, to_provide: Set[Callable], crawler: Crawler): + """Creates an :class:`web_poet.Stats + ` instance using Scrapy's + stat collector. + """ + + return [Stats(stat_collector=ScrapyPoetStatCollector(crawler.stats))] diff --git a/setup.py b/setup.py index df20b694..16cb841a 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ "time_machine", "twisted >= 18.9.0", "url-matcher >= 0.2.0", - "web-poet >= 0.12.0", + "web-poet >= 0.15", ], classifiers=[ "Development Status :: 3 - Alpha", diff --git a/tests/test_providers.py b/tests/test_providers.py index 804af309..892d9583 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -18,6 +18,7 @@ ItemProvider, PageObjectInputProvider, PageParamsProvider, + StatsProvider, ) from scrapy_poet.utils.mockserver import get_ephemeral_port from scrapy_poet.utils.testing import ( @@ -253,3 +254,25 @@ def inside(): # garbage collected. inside() assert len(provider._cached_instances) == 0 + + +def test_stats_provider(settings): + crawler = get_crawler(Spider, settings) + injector = Injector(crawler) + provider = StatsProvider(injector) + + results = provider(set(), crawler) + + stats = results[0] + assert stats._stats._stats == crawler.stats + + stats.set("a", "1") + stats.set("b", 2) + stats.inc("b") + stats.inc("b", 5) + stats.inc("c") + + expected = {"a": "1", "b": 8, "c": 1} + expected = {f"poet/stats/{k}": v for k, v in expected.items()} + actual = {k: v for k, v in crawler.stats._stats.items() if k in expected} + assert actual == expected