Skip to content

Commit

Permalink
Add stat support (#159)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Sep 12, 2023
1 parent 5ebbb12 commit 75401ca
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`.
:maxdepth: 1

rules-from-web-poet
stats
providers
testing

Expand Down
8 changes: 8 additions & 0 deletions docs/stats.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.. _stats:

=====
Stats
=====

scrapy-poet tracks :external+web-poet:ref:`web-poet stats <stats>` as part of
:ref:`Scrapy stats <topics-stats>`, prefixed with ``poet/stats/``.
30 changes: 30 additions & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
PageParams,
RequestUrl,
ResponseUrl,
Stats,
)
from web_poet.page_inputs.stats import StatCollector, StatNum
from web_poet.pages import is_injectable

from scrapy_poet.downloader import create_scrapy_downloader
Expand Down Expand Up @@ -333,3 +335,31 @@ async def __call__(

results.append(item)
return results


class ScrapyPoetStatCollector(StatCollector):
def __init__(self, stats):
self._stats = stats
self._prefix = "poet/stats/"

def set(self, key: str, value: Any) -> None: # noqa: D102
self._stats.set_value(f"{self._prefix}{key}", value)

def inc(self, key: str, value: StatNum = 1) -> None: # noqa: D102
self._stats.inc_value(f"{self._prefix}{key}", value)


class StatsProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.Stats
<web_poet.page_inputs.client.Stats>` instances.
"""

provided_classes = {Stats}

def __call__(self, to_provide: Set[Callable], crawler: Crawler):
"""Creates an :class:`web_poet.Stats
<web_poet.page_inputs.client.Stats>` instance using Scrapy's
stat collector.
"""

return [Stats(stat_collector=ScrapyPoetStatCollector(crawler.stats))]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"time_machine",
"twisted >= 18.9.0",
"url-matcher >= 0.2.0",
"web-poet >= 0.12.0",
"web-poet >= 0.15",
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
23 changes: 23 additions & 0 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
ItemProvider,
PageObjectInputProvider,
PageParamsProvider,
StatsProvider,
)
from scrapy_poet.utils.mockserver import get_ephemeral_port
from scrapy_poet.utils.testing import (
Expand Down Expand Up @@ -253,3 +254,25 @@ def inside():
# garbage collected.
inside()
assert len(provider._cached_instances) == 0


def test_stats_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
provider = StatsProvider(injector)

results = provider(set(), crawler)

stats = results[0]
assert stats._stats._stats == crawler.stats

stats.set("a", "1")
stats.set("b", 2)
stats.inc("b")
stats.inc("b", 5)
stats.inc("c")

expected = {"a": "1", "b": 8, "c": 1}
expected = {f"poet/stats/{k}": v for k, v in expected.items()}
actual = {k: v for k, v in crawler.stats._stats.items() if k in expected}
assert actual == expected

0 comments on commit 75401ca

Please sign in to comment.