Skip to content

Commit

Permalink
Merge pull request #33 from scrapinghub/from_bytes
Browse files Browse the repository at this point in the history
add from_bytes_dict alternative constructor for HttpResponseHeaders
  • Loading branch information
kmike authored Apr 11, 2022
2 parents 831754b + ddb7d20 commit a29d86d
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 1 deletion.
30 changes: 30 additions & 0 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,36 @@ def test_http_respose_headers():
headers["user agent"]


def test_http_response_headers_from_bytes_dict():
raw_headers = {
b"Content-Length": [b"316"],
b"Content-Encoding": [b"gzip", b"br"],
b"server": b"sffe",
"X-string": "string",
"X-missing": None,
"X-tuple": (b"x", "y"),
}
headers = HttpResponseHeaders.from_bytes_dict(raw_headers)

assert headers.get("content-length") == "316"
assert headers.get("content-encoding") == "gzip"
assert headers.getall("Content-Encoding") == ["gzip", "br"]
assert headers.get("server") == "sffe"
assert headers.get("x-string") == "string"
assert headers.get("x-missing") is None
assert headers.get("x-tuple") == "x"
assert headers.getall("x-tuple") == ["x", "y"]


def test_http_response_headers_from_bytes_dict_err():

with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})

with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})


def test_http_response_headers_init_requests():
requests_response = requests.Response()
requests_response.headers['User-Agent'] = "mozilla"
Expand Down
44 changes: 43 additions & 1 deletion web_poet/page_inputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Optional, Dict, List, TypeVar, Type
from typing import Optional, Dict, List, TypeVar, Type, Union, Tuple, AnyStr

import attrs
from multidict import CIMultiDict
Expand All @@ -14,6 +14,7 @@
from .utils import memoizemethod_noargs

T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]


class HttpResponseBody(bytes):
Expand Down Expand Up @@ -74,6 +75,47 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
"""
return cls([(pair["name"], pair["value"]) for pair in arg])

@classmethod
def from_bytes_dict(
cls: Type[T_headers], arg: AnyStrDict, encoding: str = "utf-8"
) -> T_headers:
"""An alternative constructor for instantiation where the header-value
pairs could be in raw bytes form.
This supports multiple header values in the form of ``List[bytes]`` and
``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
also works and wouldn't break the decoding process at all.
By default, it converts the ``bytes`` value using "utf-8". However, this
can easily be overridden using the ``encoding`` parameter.
>>> raw_values = {
... b"Content-Encoding": [b"gzip", b"br"],
... b"Content-Type": [b"text/html"],
... b"content-length": b"648",
... }
>>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
>>> headers
<HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
"""

def _norm(data):
if isinstance(data, str) or data is None:
return data
elif isinstance(data, bytes):
return data.decode(encoding)
raise ValueError(f"Expecting str or bytes. Received {type(data)}")

converted = []

for header, value in arg.items():
if isinstance(value, list) or isinstance(value, tuple):
converted.extend([(_norm(header), _norm(v)) for v in value])
else:
converted.append((_norm(header), _norm(value)))

return cls(converted)

def declared_encoding(self) -> Optional[str]:
""" Return encoding detected from the Content-Type header, or None
if encoding is not found """
Expand Down

0 comments on commit a29d86d

Please sign in to comment.