Skip to content

Commit

Permalink
Support wildcard http_cookie_names (cloudera#509)
Browse files Browse the repository at this point in the history
Adds wildcard ('*') support to the `http_cookie_names` connect property
to preserve all cookies returned by the server. Preserves prior behavior
for any other value of `http_cookie_names`.
  • Loading branch information
MikaelSmith committed Aug 6, 2024
1 parent acbd481 commit eccd976
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 16 deletions.
29 changes: 21 additions & 8 deletions impala/_thrift_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from impala.error import HttpError
from impala.util import get_logger_and_init_null
from impala.util import get_all_matching_cookies, get_cookie_expiry
from impala.util import get_all_matching_cookies, get_all_cookies, get_cookie_expiry

# Declare namedtuple for Cookie with named fields - cookie and expiry_time
Cookie = namedtuple('Cookie', ['cookie', 'expiry_time'])
Expand Down Expand Up @@ -83,7 +83,8 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
cookie-based authentication or session management. If there's only one name in the
cookie name list, a str value can be specified instead of the list. If a cookie with
one of these names is returned in an http response by the server or an intermediate
proxy then it will be included in each subsequent request for the same connection.
proxy then it will be included in each subsequent request for the same connection. If
it is set as wildcards, all cookies in an http response will be preserved.
"""
if port is not None:
warnings.warn(
Expand Down Expand Up @@ -129,8 +130,13 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
self.realhost = self.realport = self.proxy_auth = None
if not http_cookie_names:
# 'http_cookie_names' was explicitly set as an empty value ([], or '') in connect().
self.__preserve_all_cookies = False
self.__http_cookie_dict = None
self.__auth_cookie_names = None
elif str(http_cookie_names).strip() == "*":
self.__preserve_all_cookies = True
self.__http_cookie_dict = dict()
self.__auth_cookie_names = set()
else:
if isinstance(http_cookie_names, six.string_types):
http_cookie_names = [http_cookie_names]
Expand All @@ -140,7 +146,7 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
# Store the auth cookie names in __auth_cookie_names.
# Assume auth cookie names end with ".auth".
self.__auth_cookie_names = \
[ cn for cn in http_cookie_names if cn.endswith(".auth") ]
{ cn for cn in http_cookie_names if cn.endswith(".auth") }
# Set __are_matching_cookies_found as True if matching cookies are found in response.
self.__are_matching_cookies_found = False
self.__wbuf = BytesIO()
Expand Down Expand Up @@ -248,13 +254,20 @@ def getHttpCookieHeaderForRequest(self):
# Extract cookies from response and save those cookies for which the cookie names
# are in the cookie name list specified in the connect() API.
def extractHttpCookiesFromResponse(self):
if self.__http_cookie_dict is not None:
if self.__preserve_all_cookies:
matching_cookies = get_all_cookies(self.path, self.headers)
elif self.__http_cookie_dict is not None:
matching_cookies = get_all_matching_cookies(
self.__http_cookie_dict.keys(), self.path, self.headers)
if matching_cookies:
self.__are_matching_cookies_found = True
for c in matching_cookies:
self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c))
else:
matching_cookies = None

if matching_cookies:
self.__are_matching_cookies_found = True
for c in matching_cookies:
self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c))
if c.key.endswith(".auth"):
self.__auth_cookie_names.add(c.key)

# Return True if there are any saved cookies which are sent in previous request.
def areHttpCookiesSaved(self):
Expand Down
12 changes: 6 additions & 6 deletions impala/dbapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def connect(host='localhost', port=21050, database=None, timeout=None,
name only, a str value can be specified instead of a list.
If a cookie with one of these names is returned in an http response by the server
or an intermediate proxy then it will be included in each subsequent request for
the same connection.
the same connection. If set to wildcard ('*'), all cookies in an http response
will be preserved. By default 'http_cookie_names' is set to '*'.
Used only when `use_http_transport` is True.
By default 'http_cookie_names' is set to the list of HTTP cookie names used by
Impala and Hive. The names of authentication cookies are expected to end with
".auth" string, for example, "impala.auth" for Impala authentication cookies.
The names of authentication cookies are expected to end with ".auth" string, for
example, "impala.auth" for Impala authentication cookies.
If 'http_cookie_names' is explicitly set to a not None empty value ([], or ''),
Impyla won't attempt to do cookie based authentication or session management.
Currently cookie retention is supported for GSSAPI/LDAP/SASL/NOSASL/JWT over http.
Expand Down Expand Up @@ -191,8 +191,8 @@ def connect(host='localhost', port=21050, database=None, timeout=None,
warn_deprecate('auth_cookie_names', 'http_cookie_names')
http_cookie_names = auth_cookie_names
elif http_cookie_names is None:
# Set default value as the list of HTTP cookie names used by Impala, Hive and Knox.
http_cookie_names = ['impala.auth', 'impala.session.id', 'hive.server2.auth', 'KNOXSESSIONID', 'KNOX_BACKEND-HIVE', 'JSESSIONID']
# Preserve all cookies.
http_cookie_names = '*'

service = hs2.connect(host=host, port=port,
timeout=timeout, use_ssl=use_ssl,
Expand Down
71 changes: 70 additions & 1 deletion impala/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import unittest

import pytest
from impala.util import cookie_matches_path, get_cookie_expiry, get_all_matching_cookies
from impala.util import (cookie_matches_path, get_cookie_expiry, get_all_cookies,
get_all_matching_cookies)


class ImpalaUtilTests(unittest.TestCase):
Expand Down Expand Up @@ -78,6 +79,69 @@ def test_get_cookie_expiry(self):
now = datetime.now()
assert now + days2k <= get_cookie_expiry({'max-age': '172800000'}) <= now + days2k + sec

def test_get_matching_cookies(self):
cookies = get_all_cookies('/path', {})
assert not cookies

headers = make_cookie_headers([
('c_cookie', 'c_value'),
('b_cookie', 'b_value'),
('a_cookie', 'a_value')
])
cookies = csort(get_all_cookies('/path', headers))
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path'),
('a_cookie', 'a_value;Path=/')
])
cookies = csort(get_all_cookies('/path', headers))
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('B_cookie', 'B_value;Path=/path'),
('a_cookie', 'a_value;Path=/path/path2')
])
cookies = csort(get_all_cookies('/path', headers))
assert len(cookies) == 2
assert cookies[0].key == 'B_cookie' and cookies[0].value == 'B_value'
assert cookies[1].key == 'c_cookie' and cookies[1].value == 'c_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path'),
('a_cookie', 'a_value;Path=/path')
])
cookies = csort(get_all_cookies('/path/path1', headers))
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value'

headers = make_cookie_headers([
('b_cookie', 'b_value;Path=/path1'),
('a_cookie', 'a_value;Path=/path2')
])
cookies = get_all_cookies('/path', headers)
assert not cookies

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path1'),
('a_cookie', 'a_value;Path=/path2')
])
cookies = get_all_cookies('/path', headers)
assert len(cookies) == 1
assert cookies[0].key == 'c_cookie' and cookies[0].value == 'c_value'

def test_get_all_matching_cookies(self):
cookies = get_all_matching_cookies(['a', 'b'], '/path', {})
assert not cookies
Expand Down Expand Up @@ -173,3 +237,8 @@ def make_cookie_headers(cookie_vals):
value = pair[1]
headers.add_header('Set-Cookie', name + "=" + value)
return headers

def csort(cookies):
"""Sort list of Morsels as header order is not guaranteed."""
cookies.sort(key=lambda c: c.key)
return cookies
30 changes: 29 additions & 1 deletion impala/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def get_cookie_expiry(c):
return None


def get_all_matching_cookies(cookie_names, path, resp_headers):
def get_cookies(resp_headers):
"""Returns a SimpleCookie containing all Set-Cookie entries in resp_headers."""
if 'Set-Cookie' not in resp_headers:
return None

Expand All @@ -214,9 +215,36 @@ def get_all_matching_cookies(cookie_names, path, resp_headers):
cookie_headers = resp_headers.get_all('Set-Cookie')
for header in cookie_headers:
cookies.load(header)
return cookies
except Exception:
return None


def get_all_cookies(path, resp_headers):
"""Return cookies that match path.
Returns a list of Morsel objects representing cookie key/value pairs for all cookies
in resp_headers matching path."""
cookies = get_cookies(resp_headers)
if not cookies:
return None

matching_cookies = []
for c in cookies.values():
if c and cookie_matches_path(c, path):
matching_cookies.append(c)
return matching_cookies


def get_all_matching_cookies(cookie_names, path, resp_headers):
"""Return cookies in cookie_names that match path.
Returns a list of Morsel objects representing cookie key/value pairs for cookies
in resp_headers matching path where the cookie is also listed in cookie_names."""
cookies = get_cookies(resp_headers)
if not cookies:
return None

matching_cookies = []
for cn in cookie_names:
if cn in cookies:
Expand Down

0 comments on commit eccd976

Please sign in to comment.