Skip to content

Commit

Permalink
Fix typing issues for typed Scrapy. (#166)
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR authored Nov 4, 2024
1 parent 21c6c9c commit 44a9591
Show file tree
Hide file tree
Showing 12 changed files with 52 additions and 55 deletions.
3 changes: 2 additions & 1 deletion scrapy_poet/downloadermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def process_request(
return None

logger.debug(f"Using DummyResponse instead of downloading {request}")
assert self.crawler.stats
self.crawler.stats.inc_value("scrapy_poet/dummy_response_count")
return DummyResponse(url=request.url, request=request)

Expand Down Expand Up @@ -132,7 +133,7 @@ def _skip_dependency_creation(self, request: Request, spider: Spider) -> bool:
@inlineCallbacks
def process_response(
self, request: Request, response: Response, spider: Spider
) -> Generator[Deferred, object, Response]:
) -> Generator[Deferred, object, Union[Response, Request]]:
"""This method fills :attr:`scrapy.Request.cb_kwargs
<scrapy.http.Request.cb_kwargs>` with instances for the required Page
Objects found in the callback signature.
Expand Down
4 changes: 3 additions & 1 deletion scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def build_instances(
)
# All the remaining dependencies are internal so they can be built just
# following the andi plan.
assert self.crawler.stats
for cls, kwargs_spec in plan.dependencies:
if cls not in instances.keys():
result_cls: type = cast(type, cls)
Expand All @@ -295,6 +296,7 @@ def build_instances_from_providers(
plan: andi.Plan,
):
"""Build dependencies handled by registered providers"""
assert self.crawler.stats
instances: Dict[Callable, Any] = {}
scrapy_provided_dependencies = self.available_dependencies_for_providers(
request, response
Expand All @@ -320,7 +322,7 @@ def build_instances_from_providers(
)
# This one should take `web_poet.HttpRequest` but `scrapy.Request` will work as well
# TODO: add `scrapy.Request` type in request_fingerprint() annotations
fingerprint = f"{provider.name}_{request_fingerprint(request)}"
fingerprint = f"{provider.name}_{request_fingerprint(request)}" # type: ignore[arg-type]
# Return the data if it is already in the cache
try:
data = self.cache[fingerprint].items()
Expand Down
1 change: 1 addition & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def __call__(self, to_provide: Set[Callable], crawler: Crawler):
<web_poet.page_inputs.client.HttpClient>` instance using Scrapy's
downloader.
"""
assert crawler.engine
downloader = create_scrapy_downloader(crawler.engine.download)
save_responses = crawler.settings.getbool("_SCRAPY_POET_SAVEFIXTURE")
return [
Expand Down
1 change: 1 addition & 0 deletions scrapy_poet/spidermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def process_spider_exception(

if not isinstance(exception, Retry):
return None
assert response.request
new_request_or_none = get_retry_request(
response.request,
spider=spider,
Expand Down
3 changes: 1 addition & 2 deletions scrapy_poet/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
try:
from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8
except ImportError:
# NO_CALLBACK = lambda: None # noqa: E731
NO_CALLBACK = None
NO_CALLBACK = None # type: ignore[assignment]


def get_scrapy_data_path(createdir: bool = True, default_dir: str = ".scrapy") -> str:
Expand Down
8 changes: 0 additions & 8 deletions scrapy_poet/utils/testing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
from inspect import isasyncgenfunction
from typing import Dict
from unittest import mock

from scrapy import Spider, signals
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -272,10 +271,3 @@ async def parse(*args, **kwargs):
# Mimic type annotations
parse.__annotations__ = callback.__annotations__
return parse


class AsyncMock(mock.MagicMock):
"""workaround since python 3.7 doesn't ship with asyncmock."""

async def __call__(self, *args, **kwargs):
return super().__call__(*args, **kwargs)
34 changes: 17 additions & 17 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
)
from scrapy_poet.utils.mockserver import MockServer
from scrapy_poet.utils.testing import (
AsyncMock,
DelayedResource,
EchoResource,
StatusResource,
Expand All @@ -40,7 +39,7 @@

@pytest.fixture
def scrapy_downloader() -> Callable:
mock_downloader = AsyncMock()
mock_downloader = mock.AsyncMock()
return create_scrapy_downloader(mock_downloader)


Expand Down Expand Up @@ -69,12 +68,12 @@ async def test_scrapy_poet_downloader(fake_http_response) -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:

mock_dtf.return_value = fake_http_response

mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

response = await scrapy_downloader(req)
Expand All @@ -96,10 +95,10 @@ async def test_scrapy_poet_downloader_ignored_request() -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.side_effect = scrapy.exceptions.IgnoreRequest
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

with pytest.raises(web_poet.exceptions.HttpError):
Expand All @@ -111,10 +110,10 @@ async def test_scrapy_poet_downloader_twisted_error() -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.side_effect = twisted.internet.error.TimeoutError
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

with pytest.raises(web_poet.exceptions.HttpRequestError):
Expand All @@ -126,10 +125,10 @@ async def test_scrapy_poet_downloader_head_redirect(fake_http_response) -> None:
req = web_poet.HttpRequest("https://example.com", method="HEAD")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.return_value = fake_http_response
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

await scrapy_downloader(req)
Expand Down Expand Up @@ -454,6 +453,7 @@ async def __call__(
custom_request = Request(
request.url, body=request.body, callback=NO_CALLBACK
)
assert crawler.engine
scrapy_response: Response = await maybe_deferred_to_future(
crawler.engine.download(custom_request)
)
Expand Down Expand Up @@ -493,7 +493,7 @@ class TestSpider(Spider):
def start_requests(self):
yield Request(server.root_url, callback=self.parse)

async def parse(self, response: DummyResponse, page: ItemPage):
async def parse(self, response: DummyResponse, page: ItemPage): # type: ignore[override]
item = await page.to_item()
items.append(item)

Expand Down Expand Up @@ -570,7 +570,7 @@ def test_parse_callback_none_dummy_response() -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -662,7 +662,7 @@ def test_parse_callback_none_with_deps(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
pass

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -705,7 +705,7 @@ def start_requests(self):
page = BasicPage(web_poet.HttpResponse("https://example.com", b""))
yield Request(server.root_url, cb_kwargs={"page": page})

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -740,7 +740,7 @@ def start_requests(self):
page = BasicPage(web_poet.HttpResponse("https://example.com", b""))
yield Request(server.root_url, cb_kwargs={"page": page})

def parse(
def parse( # type: ignore[override]
self,
response: DummyResponse,
page: BasicPage,
Expand Down Expand Up @@ -782,7 +782,7 @@ def test_parse_callback_NO_CALLBACK(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -814,7 +814,7 @@ def test_parse_callback_NO_CALLBACK_with_page_dep(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down
1 change: 1 addition & 0 deletions tests/test_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def _assert_instances(
reqmeta: Optional[Dict[str, Any]] = None,
) -> Generator[Any, Any, None]:
response = get_response_for_testing(callback, meta=reqmeta)
assert response.request
request = response.request

plan = injector.build_plan(response.request)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ class SkipDownloadSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
return {
"response": response,
}
Expand Down Expand Up @@ -332,7 +332,7 @@ class RequestUrlSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, url: RequestUrl):
def parse(self, response: DummyResponse, url: RequestUrl): # type: ignore[override]
return {
"response": response,
"url": url,
Expand All @@ -359,7 +359,7 @@ class ResponseUrlSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, url: ResponseUrl):
def parse(self, response: DummyResponse, url: ResponseUrl): # type: ignore[override]
return {
"response": response,
"url": url,
Expand Down Expand Up @@ -396,7 +396,7 @@ class ResponseUrlPageSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, page: ResponseUrlPage):
def parse(self, response: DummyResponse, page: ResponseUrlPage): # type: ignore[override]
return page.to_item()


Expand Down Expand Up @@ -427,7 +427,7 @@ class RequestUrlPageSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, page: RequestUrlPage):
def parse(self, response: DummyResponse, page: RequestUrlPage): # type: ignore[override]
return page.to_item()


Expand Down
17 changes: 8 additions & 9 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@
StatsProvider,
)
from scrapy_poet.utils.mockserver import get_ephemeral_port
from scrapy_poet.utils.testing import (
AsyncMock,
HtmlResource,
ProductHtml,
crawl_single_item,
)
from scrapy_poet.utils.testing import HtmlResource, ProductHtml, crawl_single_item


class NonProductHtml(HtmlResource):
Expand Down Expand Up @@ -75,7 +70,9 @@ def __call__(
assert isinstance(spider, scrapy.Spider)
ret: List[Any] = []
if Price in to_provide:
ret.append(Price(response.css(".price::text").get()))
price = response.css(".price::text").get()
assert price is not None
ret.append(Price(price))
if Html in to_provide:
ret.append(Html("Price Html!"))
return ret
Expand All @@ -89,7 +86,9 @@ def __call__(self, to_provide, response: scrapy.http.Response, settings: Setting
assert isinstance(settings, Settings)
ret: List[Any] = []
if Name in to_provide:
ret.append(Name(response.css(".name::text").get()))
name = response.css(".name::text").get()
assert name is not None
ret.append(Name(name))
if Html in to_provide:
ret.append(Html("Name Html!"))
return ret
Expand Down Expand Up @@ -200,7 +199,7 @@ def test_price_first_spider(settings):
@ensureDeferred
async def test_http_client_provider(settings):
crawler = get_crawler(Spider, settings)
crawler.engine = AsyncMock()
crawler.engine = mock.AsyncMock()
injector = Injector(crawler)

with mock.patch(
Expand Down
24 changes: 12 additions & 12 deletions tests/test_response_required_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@
HttpResponseProvider,
PageObjectInputProvider,
)
from scrapy_poet.utils import is_min_scrapy_version

# See: https://github.com/scrapinghub/scrapy-poet/issues/118
try:
from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8
except ImportError:
NO_CALLBACK = lambda: None # noqa: E731
from scrapy_poet.utils import NO_CALLBACK, is_min_scrapy_version


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -70,13 +64,13 @@ def __call__(self, to_provide):
class TextProductProvider(HttpResponseProvider):
# This is wrong. You should not annotate provider dependencies with classes
# like TextResponse or HtmlResponse, you should use Response instead.
def __call__(self, to_provide, response: TextResponse):
def __call__(self, to_provide, response: TextResponse): # type: ignore[override]
return super().__call__(to_provide, response)


class StringProductProvider(HttpResponseProvider):
def __call__(self, to_provide, response: str):
return super().__call__(to_provide, response)
def __call__(self, to_provide, response: str): # type: ignore[override]
return super().__call__(to_provide, response) # type: ignore[arg-type]


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -224,8 +218,11 @@ def test_is_provider_using_response():
reason="tests Scrapy < 2.8 before NO_CALLBACK was introduced",
)
def test_is_callback_using_response_for_scrapy28_below() -> None:
def cb(_: Any) -> Any:
return _

spider = MySpider()
request = Request("https://example.com", callback=lambda _: _)
request = Request("https://example.com", callback=cb)
assert is_callback_requiring_scrapy_response(spider.parse, request.callback) is True
assert (
is_callback_requiring_scrapy_response(spider.parse2, request.callback) is True
Expand Down Expand Up @@ -362,8 +359,11 @@ def test_is_callback_using_response_for_scrapy28_below() -> None:
reason="NO_CALLBACK not available in Scrapy < 2.8",
)
def test_is_callback_using_response_for_scrapy28_and_above() -> None:
def cb(_: Any) -> Any:
return _

spider = MySpider()
request_with_callback = Request("https://example.com", callback=lambda _: _)
request_with_callback = Request("https://example.com", callback=cb)
request_no_callback = Request("https://example.com", callback=NO_CALLBACK)

with warnings.catch_warnings(record=True) as caught_warnings:
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ deps =
deps =
mypy==1.11.2
pytest
Scrapy @ git+https://github.com/scrapy/scrapy@master

commands = mypy --ignore-missing-imports scrapy_poet tests

Expand Down

0 comments on commit 44a9591

Please sign in to comment.