From 0226e0d46cf9b41ed9a8cacc38d6e6ac100df2a4 Mon Sep 17 00:00:00 2001 From: Yifei Kong Date: Thu, 4 Jul 2024 15:45:22 +0800 Subject: [PATCH] Add Cookies.get_dict, close #316 --- curl_cffi/requests/cookies.py | 61 +++++++++++++++++++++++----------- curl_cffi/requests/session.py | 1 + docs/cookies.rst | 3 ++ docs/faq.rst | 18 ++++++++++ tests/unittest/test_cookies.py | 13 ++++++++ 5 files changed, 76 insertions(+), 20 deletions(-) diff --git a/curl_cffi/requests/cookies.py b/curl_cffi/requests/cookies.py index b9bcb875..7b34f907 100644 --- a/curl_cffi/requests/cookies.py +++ b/curl_cffi/requests/cookies.py @@ -6,18 +6,16 @@ import re import time -import typing import warnings from dataclasses import dataclass from http.cookiejar import Cookie, CookieJar from http.cookies import _unquote +from typing import Dict, Iterator, List, MutableMapping, Optional, Tuple, Union from urllib.parse import urlparse from .errors import CookieConflict, RequestsError -CookieTypes = typing.Union[ - "Cookies", CookieJar, typing.Dict[str, str], typing.List[typing.Tuple[str, str]] -] +CookieTypes = Union["Cookies", CookieJar, Dict[str, str], List[Tuple[str, str]]] @dataclass @@ -126,12 +124,12 @@ def to_cookiejar_cookie(self) -> Cookie: IPV4_RE = re.compile(r"\.\d+$", re.ASCII) -class Cookies(typing.MutableMapping[str, str]): +class Cookies(MutableMapping[str, str]): """ HTTP Cookies, as a mutable mapping. """ - def __init__(self, cookies: typing.Optional[CookieTypes] = None) -> None: + def __init__(self, cookies: Optional[CookieTypes] = None) -> None: if cookies is None or isinstance(cookies, dict): self.jar = CookieJar() if isinstance(cookies, dict): @@ -164,7 +162,7 @@ def _eff_request_host(self, request) -> str: host += ".local" return host - def get_cookies_for_curl(self, request) -> typing.List[CurlMorsel]: + def get_cookies_for_curl(self, request) -> List[CurlMorsel]: """the process is similar to `cookiejar.add_cookie_header`, but load all cookies""" self.jar._cookies_lock.acquire() # type: ignore morsels = [] @@ -181,19 +179,24 @@ def get_cookies_for_curl(self, request) -> typing.List[CurlMorsel]: self.jar.clear_expired_cookies() return morsels - def update_cookies_from_curl(self, morsels: typing.List[CurlMorsel]): + def update_cookies_from_curl(self, morsels: List[CurlMorsel]): for morsel in morsels: cookie = morsel.to_cookiejar_cookie() self.jar.set_cookie(cookie) self.jar.clear_expired_cookies() - def set(self, name: str, value: str, domain: str = "", path: str = "/", secure=False) -> None: + def set( + self, name: str, value: str, domain: str = "", path: str = "/", secure=False + ) -> None: """ Set a cookie value by name. May optionally include domain and path. """ # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie if name.startswith("__Secure-") and secure is False: - warnings.warn("`secure` changed to True for `__Secure-` prefixed cookies", stacklevel=2) + warnings.warn( + "`secure` changed to True for `__Secure-` prefixed cookies", + stacklevel=2, + ) secure = True elif name.startswith("__Host-") and (secure is False or domain or path != "/"): warnings.warn( @@ -229,10 +232,10 @@ def set(self, name: str, value: str, domain: str = "", path: str = "/", secure=F def get( # type: ignore self, name: str, - default: typing.Optional[str] = None, - domain: typing.Optional[str] = None, - path: typing.Optional[str] = None, - ) -> typing.Optional[str]: + default: Optional[str] = None, + domain: Optional[str] = None, + path: Optional[str] = None, + ) -> Optional[str]: """ Get a cookie by name. May optionally include domain and path in order to specify exactly which cookie to retrieve. @@ -265,11 +268,26 @@ def get( # type: ignore return default return value + def get_dict( + self, domain: Optional[str] = None, path: Optional[str] = None + ) -> dict: + """ + Cookies with the same name on different domains may overwrite each other, + do NOT use this function as a method of serialization. + """ + ret = {} + for cookie in self.jar: + if (domain is None or cookie.name == domain) and ( + path is None or cookie.path == path + ): + ret[cookie.name] = cookie.value + return ret + def delete( self, name: str, - domain: typing.Optional[str] = None, - path: typing.Optional[str] = None, + domain: Optional[str] = None, + path: Optional[str] = None, ) -> None: """ Delete a cookie by name. May optionally include domain and path @@ -289,7 +307,7 @@ def delete( for cookie in remove: self.jar.clear(cookie.domain, cookie.path, cookie.name) - def clear(self, domain: typing.Optional[str] = None, path: typing.Optional[str] = None) -> None: + def clear(self, domain: Optional[str] = None, path: Optional[str] = None) -> None: """ Delete all cookies. Optionally include a domain and path in order to only delete a subset of all the cookies. @@ -302,7 +320,7 @@ def clear(self, domain: typing.Optional[str] = None, path: typing.Optional[str] args.append(path) self.jar.clear(*args) - def update(self, cookies: typing.Optional[CookieTypes] = None) -> None: # type: ignore + def update(self, cookies: Optional[CookieTypes] = None) -> None: # type: ignore cookies = Cookies(cookies) for cookie in cookies.jar: self.jar.set_cookie(cookie) @@ -322,7 +340,7 @@ def __delitem__(self, name: str) -> None: def __len__(self) -> int: return len(self.jar) - def __iter__(self) -> typing.Iterator[str]: + def __iter__(self) -> Iterator[str]: return (cookie.name for cookie in self.jar) def __bool__(self) -> bool: @@ -332,7 +350,10 @@ def __bool__(self) -> bool: def __repr__(self) -> str: cookies_repr = ", ".join( - [f"" for cookie in self.jar] + [ + f"" + for cookie in self.jar + ] ) return f"" diff --git a/curl_cffi/requests/session.py b/curl_cffi/requests/session.py index c5a1fe8f..87720294 100644 --- a/curl_cffi/requests/session.py +++ b/curl_cffi/requests/session.py @@ -226,6 +226,7 @@ def __init__( self._closed = False def _toggle_extensions_by_ids(self, curl, extension_ids): + # TODO find a better representation, rather than magic numbers default_enabled = {0, 51, 13, 43, 65281, 23, 10, 45, 35, 11, 16} to_enable_ids = extension_ids - default_enabled diff --git a/docs/cookies.rst b/docs/cookies.rst index e3dfe137..9a92afc5 100644 --- a/docs/cookies.rst +++ b/docs/cookies.rst @@ -4,6 +4,9 @@ Cookies How to save and load cookies ------ +Do not use ``get_dict`` to dump and load cookies. Cookies are more than just plain +key-value pairs. + .. code-block:: python import pickle diff --git a/docs/faq.rst b/docs/faq.rst index 4edaa33d..d34b63f4 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -159,3 +159,21 @@ your own headers. requests.get(url, impersonate="chrome", default_headers=False, headers=...) + +How to deal with encoding/decoding errors? + +Use ``chardet`` or ``cchardet`` + +.. code-block:: + + >>> from curl_cffi import requests + >>> r = requests.get("https://example.com/messy_codec.html") + >>> import chardet + >>> chardet.detect(r.content) + {'encoding': 'GB2312', 'confidence': 0.99, 'language': 'Chinese'} + +Or use regex or lxml to parse the meta header: + +.. code-block:: + + diff --git a/tests/unittest/test_cookies.py b/tests/unittest/test_cookies.py index ce787b0b..c24f115a 100644 --- a/tests/unittest/test_cookies.py +++ b/tests/unittest/test_cookies.py @@ -40,3 +40,16 @@ def test_curl_format_without_hostname(): m = CurlMorsel(name="foo", value="bar") with pytest.raises(RequestsError): m.to_curl_format() + + +def test_get_dict(): + c = Cookies({"foo": "bar"}) + d = c.get_dict() + assert d == {"foo": "bar"} + + c = Cookies({"foo": "bar", "hello": "world", "a": "b"}) + d = c.get_dict() + assert len(d) == 3 + assert d["foo"] == "bar" + assert d["hello"] == "world" + assert d["a"] == "b"