From d5cff9dc4a4518bd5f506353366b23a946097bc3 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Mon, 25 Apr 2022 20:25:36 +0800 Subject: [PATCH 1/2] reorganize page_inputs.py as a submodule; move HttpClient to it --- tests/test_requests.py | 8 +- web_poet/__init__.py | 6 +- web_poet/page_inputs/__init__.py | 10 ++ web_poet/page_inputs/client.py | 153 ++++++++++++++++++ .../{page_inputs.py => page_inputs/http.py} | 15 +- web_poet/page_inputs/meta.py | 8 + web_poet/requests.py | 153 ------------------ 7 files changed, 179 insertions(+), 174 deletions(-) create mode 100644 web_poet/page_inputs/__init__.py create mode 100644 web_poet/page_inputs/client.py rename web_poet/{page_inputs.py => page_inputs/http.py} (96%) create mode 100644 web_poet/page_inputs/meta.py diff --git a/tests/test_requests.py b/tests/test_requests.py index 13ca72de..694e0e31 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -3,15 +3,13 @@ import pytest from web_poet.exceptions import RequestBackendError from web_poet.page_inputs import ( + HttpClient, HttpRequest, HttpResponse, HttpRequestBody, HttpRequestHeaders ) -from web_poet.requests import ( - HttpClient, - request_backend_var, -) +from web_poet.requests import request_backend_var @pytest.fixture @@ -47,7 +45,7 @@ async def test_perform_request_from_httpclient(async_mock): async def test_http_client_single_requests(async_mock): client = HttpClient(async_mock) - with mock.patch("web_poet.requests.HttpRequest") as mock_request: + with mock.patch("web_poet.page_inputs.client.HttpRequest") as mock_request: response = await client.request("url") response.url == "url" diff --git a/web_poet/__init__.py b/web_poet/__init__.py index cd71d07d..03943893 100644 --- a/web_poet/__init__.py +++ b/web_poet/__init__.py @@ -1,10 +1,8 @@ from .pages import WebPage, ItemPage, ItemWebPage, Injectable -from .requests import ( - request_backend_var, - HttpClient, -) +from .requests import request_backend_var from .page_inputs import ( Meta, + HttpClient, HttpRequest, HttpResponse, HttpRequestHeaders, diff --git a/web_poet/page_inputs/__init__.py b/web_poet/page_inputs/__init__.py new file mode 100644 index 00000000..9491a5c0 --- /dev/null +++ b/web_poet/page_inputs/__init__.py @@ -0,0 +1,10 @@ +from .meta import Meta +from .client import HttpClient +from .http import ( + HttpRequest, + HttpResponse, + HttpRequestHeaders, + HttpResponseHeaders, + HttpRequestBody, + HttpResponseBody, +) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py new file mode 100644 index 00000000..e4581125 --- /dev/null +++ b/web_poet/page_inputs/client.py @@ -0,0 +1,153 @@ +"""This module has a full support for :mod:`asyncio` that enables developers to +perform asynchronous additional requests inside of Page Objects. + +Note that the implementation to fully execute any :class:`~.Request` is not +handled in this module. With that, the framework using **web-poet** must supply +the implementation. + +You can read more about this in the :ref:`advanced-downloader-impl` documentation. +""" + +import asyncio +import logging +from typing import Optional, Dict, List, Union, Callable + +from web_poet.requests import request_backend_var +from web_poet.exceptions import RequestBackendError +from web_poet.page_inputs.http import ( + HttpRequest, + HttpRequestHeaders, + HttpRequestBody, + HttpResponse, +) + +logger = logging.getLogger(__name__) + +_StrMapping = Dict[str, str] +_Headers = Union[_StrMapping, HttpRequestHeaders] +_Body = Union[bytes, HttpRequestBody] + + +async def _perform_request(request: HttpRequest) -> HttpResponse: + """Given a :class:`~.Request`, execute it using the **request implementation** + that was set in the ``web_poet.request_backend_var`` :mod:`contextvars` + instance. + + .. warning:: + By convention, this function should return a :class:`~.HttpResponse`. + However, the underlying downloader assigned in + ``web_poet.request_backend_var`` might change that, depending on + how the framework using **web-poet** implements it. + """ + + logger.info(f"Requesting page: {request}") + + try: + request_backend = request_backend_var.get() + except LookupError: + raise RequestBackendError( + "Additional requests are used inside the Page Object but the " + "current framework has not set any HttpRequest Backend via " + "'web_poet.request_backend_var'" + ) + + response_data: HttpResponse = await request_backend(request) + return response_data + + +class HttpClient: + """A convenient client to easily execute requests. + + By default, it uses the request implementation assigned in the + ``web_poet.request_backend_var`` which is a :mod:`contextvars` instance to + download the actual requests. However, it can easily be overridable by + providing an optional ``request_downloader`` callable. + + Providing the request implementation by dependency injection would be a good + alternative solution when you want to avoid setting up :mod:`contextvars` + like ``web_poet.request_backend_var``. + + In any case, this doesn't contain any implementation about how to execute + any requests fed into it. When setting that up, make sure that the downloader + implementation returns a :class:`~.HttpResponse` instance. + """ + + def __init__(self, request_downloader: Callable = None): + self._request_downloader = request_downloader or _perform_request + + async def request( + self, + url: str, + *, + method: str = "GET", + headers: Optional[_Headers] = None, + body: Optional[_Body] = None, + ) -> HttpResponse: + """This is a shortcut for creating a :class:`~.HttpRequest` instance and executing + that request. + + A :class:`~.HttpResponse` instance should then be returned. + + .. warning:: + By convention, the request implementation supplied optionally to + :class:`~.HttpClient` should return a :class:`~.HttpResponse` instance. + However, the underlying implementation supplied might change that, + depending on how the framework using **web-poet** implements it. + """ + headers = headers or {} + body = body or b"" + req = HttpRequest(url=url, method=method, headers=headers, body=body) + return await self.execute(req) + + async def get( + self, url: str, *, headers: Optional[_Headers] = None + ) -> HttpResponse: + """Similar to :meth:`~.HttpClient.request` but peforming a ``GET`` + request. + """ + return await self.request(url=url, method="GET", headers=headers) + + async def post( + self, + url: str, + *, + headers: Optional[_Headers] = None, + body: Optional[_Body] = None, + ) -> HttpResponse: + """Similar to :meth:`~.HttpClient.request` but performing a ``POST`` + request. + """ + return await self.request(url=url, method="POST", headers=headers, body=body) + + async def execute(self, request: HttpRequest) -> HttpResponse: + """Accepts a single instance of :class:`~.HttpRequest` and executes it + using the request implementation configured in the :class:`~.HttpClient` + instance. + + This returns a single :class:`~.HttpResponse`. + """ + return await self._request_downloader(request) + + async def batch_execute( + self, *requests: HttpRequest, return_exceptions: bool = False + ) -> List[Union[HttpResponse, Exception]]: + """Similar to :meth:`~.HttpClient.execute` but accepts a collection of + :class:`~.HttpRequest` instances that would be batch executed. + + The order of the :class:`~.HttpResponses` would correspond to the order + of :class:`~.HttpRequest` passed. + + If any of the :class:`~.HttpRequest` raises an exception upon execution, + the exception is raised. + + To prevent this, the actual exception can be returned alongside any + successful :class:`~.HttpResponse`. This enables salvaging any usable + responses despite any possible failures. This can be done by setting + ``True`` to the ``return_exceptions`` parameter. + """ + + coroutines = [self._request_downloader(r) for r in requests] + responses = await asyncio.gather( + *coroutines, return_exceptions=return_exceptions + ) + return responses diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs/http.py similarity index 96% rename from web_poet/page_inputs.py rename to web_poet/page_inputs/http.py index 478429f9..e7ef2aca 100644 --- a/web_poet/page_inputs.py +++ b/web_poet/page_inputs/http.py @@ -14,7 +14,8 @@ from web_poet.utils import memoizemethod_noargs T_headers = TypeVar("T_headers", bound="HttpResponseHeaders") -AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]] + +_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]] class HttpRequestBody(bytes): @@ -99,7 +100,7 @@ class HttpResponseHeaders(_HttpHeaders): @classmethod def from_bytes_dict( - cls: Type[T_headers], arg: AnyStrDict, encoding: str = "utf-8" + cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8" ) -> T_headers: """An alternative constructor for instantiation where the header-value pairs could be in raw bytes form. @@ -270,13 +271,3 @@ def _auto_detect_fun(self, body: bytes) -> Optional[str]: except UnicodeError: continue return resolve_encoding(enc) - - -class Meta(dict): - """Container class that could contain any arbitrary data to be passed into - a Page Object. - - Note that this is simply a subclass of Python's ``dict``. - """ - - pass diff --git a/web_poet/page_inputs/meta.py b/web_poet/page_inputs/meta.py new file mode 100644 index 00000000..bbc61cea --- /dev/null +++ b/web_poet/page_inputs/meta.py @@ -0,0 +1,8 @@ +class Meta(dict): + """Container class that could contain any arbitrary data to be passed into + a Page Object. + + Note that this is simply a subclass of Python's ``dict``. + """ + + pass diff --git a/web_poet/requests.py b/web_poet/requests.py index 2030680c..3582a936 100644 --- a/web_poet/requests.py +++ b/web_poet/requests.py @@ -1,158 +1,5 @@ -"""This module has a full support for :mod:`asyncio` that enables developers to -perform asynchronous additional requests inside of Page Objects. - -Note that the implementation to fully execute any :class:`~.Request` is not -handled in this module. With that, the framework using **web-poet** must supply -the implementation. - -You can read more about this in the :ref:`advanced-downloader-impl` documentation. -""" - -import asyncio -import logging from contextvars import ContextVar -from typing import Optional, List, Callable, Union, Dict - -import attrs - -from web_poet.page_inputs import ( - HttpResponse, - HttpRequest, - HttpRequestHeaders, - HttpRequestBody, -) -from web_poet.exceptions import RequestBackendError - -logger = logging.getLogger(__name__) - -_StrMapping = Dict[str, str] -_Headers = Union[_StrMapping, HttpRequestHeaders] -_Body = Union[bytes, HttpRequestBody] - # Frameworks that wants to support additional requests in ``web-poet`` should # set the appropriate implementation for requesting data. request_backend_var: ContextVar = ContextVar("request_backend") - - -async def _perform_request(request: HttpRequest) -> HttpResponse: - """Given a :class:`~.Request`, execute it using the **request implementation** - that was set in the ``web_poet.request_backend_var`` :mod:`contextvars` - instance. - - .. warning:: - By convention, this function should return a :class:`~.HttpResponse`. - However, the underlying downloader assigned in - ``web_poet.request_backend_var`` might change that, depending on - how the framework using **web-poet** implements it. - """ - - logger.info(f"Requesting page: {request}") - - try: - request_backend = request_backend_var.get() - except LookupError: - raise RequestBackendError( - "Additional requests are used inside the Page Object but the " - "current framework has not set any HttpRequest Backend via " - "'web_poet.request_backend_var'" - ) - - response_data: HttpResponse = await request_backend(request) - return response_data - - -class HttpClient: - """A convenient client to easily execute requests. - - By default, it uses the request implementation assigned in the - ``web_poet.request_backend_var`` which is a :mod:`contextvars` instance to - download the actual requests. However, it can easily be overridable by - providing an optional ``request_downloader`` callable. - - Providing the request implementation by dependency injection would be a good - alternative solution when you want to avoid setting up :mod:`contextvars` - like ``web_poet.request_backend_var``. - - In any case, this doesn't contain any implementation about how to execute - any requests fed into it. When setting that up, make sure that the downloader - implementation returns a :class:`~.HttpResponse` instance. - """ - - def __init__(self, request_downloader: Callable = None): - self._request_downloader = request_downloader or _perform_request - - async def request( - self, - url: str, - *, - method: str = "GET", - headers: Optional[_Headers] = None, - body: Optional[_Body] = None, - ) -> HttpResponse: - """This is a shortcut for creating a :class:`~.HttpRequest` instance and executing - that request. - - A :class:`~.HttpResponse` instance should then be returned. - - .. warning:: - By convention, the request implementation supplied optionally to - :class:`~.HttpClient` should return a :class:`~.HttpResponse` instance. - However, the underlying implementation supplied might change that, - depending on how the framework using **web-poet** implements it. - """ - headers = headers or {} - body = body or b"" - req = HttpRequest(url=url, method=method, headers=headers, body=body) - return await self.execute(req) - - async def get(self, url: str, *, headers: Optional[_Headers] = None) -> HttpResponse: - """Similar to :meth:`~.HttpClient.request` but peforming a ``GET`` - request. - """ - return await self.request(url=url, method="GET", headers=headers) - - async def post( - self, - url: str, - *, - headers: Optional[_Headers] = None, - body: Optional[_Body] = None, - ) -> HttpResponse: - """Similar to :meth:`~.HttpClient.request` but performing a ``POST`` - request. - """ - return await self.request(url=url, method="POST", headers=headers, body=body) - - async def execute(self, request: HttpRequest) -> HttpResponse: - """Accepts a single instance of :class:`~.HttpRequest` and executes it - using the request implementation configured in the :class:`~.HttpClient` - instance. - - This returns a single :class:`~.HttpResponse`. - """ - return await self._request_downloader(request) - - async def batch_execute( - self, *requests: HttpRequest, return_exceptions: bool = False - ) -> List[Union[HttpResponse, Exception]]: - """Similar to :meth:`~.HttpClient.execute` but accepts a collection of - :class:`~.HttpRequest` instances that would be batch executed. - - The order of the :class:`~.HttpResponses` would correspond to the order - of :class:`~.HttpRequest` passed. - - If any of the :class:`~.HttpRequest` raises an exception upon execution, - the exception is raised. - - To prevent this, the actual exception can be returned alongside any - successful :class:`~.HttpResponse`. This enables salvaging any usable - responses despite any possible failures. This can be done by setting - ``True`` to the ``return_exceptions`` parameter. - """ - - coroutines = [self._request_downloader(r) for r in requests] - responses = await asyncio.gather( - *coroutines, return_exceptions=return_exceptions - ) - return responses From a4f1dcc019809c3da949944ec59d1c19e6a4de32 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Thu, 28 Apr 2022 19:08:31 +0800 Subject: [PATCH 2/2] move _perform_request() back into requests.py --- web_poet/page_inputs/client.py | 32 +----------------------------- web_poet/requests.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index e4581125..8760458f 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -9,11 +9,9 @@ """ import asyncio -import logging from typing import Optional, Dict, List, Union, Callable -from web_poet.requests import request_backend_var -from web_poet.exceptions import RequestBackendError +from web_poet.requests import request_backend_var, _perform_request from web_poet.page_inputs.http import ( HttpRequest, HttpRequestHeaders, @@ -21,40 +19,12 @@ HttpResponse, ) -logger = logging.getLogger(__name__) _StrMapping = Dict[str, str] _Headers = Union[_StrMapping, HttpRequestHeaders] _Body = Union[bytes, HttpRequestBody] -async def _perform_request(request: HttpRequest) -> HttpResponse: - """Given a :class:`~.Request`, execute it using the **request implementation** - that was set in the ``web_poet.request_backend_var`` :mod:`contextvars` - instance. - - .. warning:: - By convention, this function should return a :class:`~.HttpResponse`. - However, the underlying downloader assigned in - ``web_poet.request_backend_var`` might change that, depending on - how the framework using **web-poet** implements it. - """ - - logger.info(f"Requesting page: {request}") - - try: - request_backend = request_backend_var.get() - except LookupError: - raise RequestBackendError( - "Additional requests are used inside the Page Object but the " - "current framework has not set any HttpRequest Backend via " - "'web_poet.request_backend_var'" - ) - - response_data: HttpResponse = await request_backend(request) - return response_data - - class HttpClient: """A convenient client to easily execute requests. diff --git a/web_poet/requests.py b/web_poet/requests.py index 3582a936..f2006846 100644 --- a/web_poet/requests.py +++ b/web_poet/requests.py @@ -1,5 +1,41 @@ +import logging from contextvars import ContextVar +from web_poet.exceptions import RequestBackendError +from web_poet.page_inputs.http import ( + HttpRequest, + HttpResponse, +) + +logger = logging.getLogger(__name__) + # Frameworks that wants to support additional requests in ``web-poet`` should # set the appropriate implementation for requesting data. request_backend_var: ContextVar = ContextVar("request_backend") + + +async def _perform_request(request: HttpRequest) -> HttpResponse: + """Given a :class:`~.Request`, execute it using the **request implementation** + that was set in the ``web_poet.request_backend_var`` :mod:`contextvars` + instance. + + .. warning:: + By convention, this function should return a :class:`~.HttpResponse`. + However, the underlying downloader assigned in + ``web_poet.request_backend_var`` might change that, depending on + how the framework using **web-poet** implements it. + """ + + logger.info(f"Requesting page: {request}") + + try: + request_backend = request_backend_var.get() + except LookupError: + raise RequestBackendError( + "Additional requests are used inside the Page Object but the " + "current framework has not set any HttpRequest Backend via " + "'web_poet.request_backend_var'" + ) + + response_data: HttpResponse = await request_backend(request) + return response_data