Skip to content

Commit

Permalink
Support wildcard http_cookie_names (cloudera#509)
Browse files Browse the repository at this point in the history
Adds wildcard ('*') support to the `http_cookie_names` connect property
to preserve all cookies returned by the server. Preserves prior behavior
for any other value of `http_cookie_names`.
  • Loading branch information
MikaelSmith committed Jun 28, 2024
1 parent acbd481 commit 2861464
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 16 deletions.
29 changes: 21 additions & 8 deletions impala/_thrift_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from impala.error import HttpError
from impala.util import get_logger_and_init_null
from impala.util import get_all_matching_cookies, get_cookie_expiry
from impala.util import get_all_matching_cookies, get_all_cookies, get_cookie_expiry

# Declare namedtuple for Cookie with named fields - cookie and expiry_time
Cookie = namedtuple('Cookie', ['cookie', 'expiry_time'])
Expand Down Expand Up @@ -83,7 +83,8 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
cookie-based authentication or session management. If there's only one name in the
cookie name list, a str value can be specified instead of the list. If a cookie with
one of these names is returned in an http response by the server or an intermediate
proxy then it will be included in each subsequent request for the same connection.
proxy then it will be included in each subsequent request for the same connection. If
it is set as wildcards, all cookies in an http response will be preserved.
"""
if port is not None:
warnings.warn(
Expand Down Expand Up @@ -129,8 +130,13 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
self.realhost = self.realport = self.proxy_auth = None
if not http_cookie_names:
# 'http_cookie_names' was explicitly set as an empty value ([], or '') in connect().
self.__preserve_all_cookies = False
self.__http_cookie_dict = None
self.__auth_cookie_names = None
elif str(http_cookie_names).strip() == "*":
self.__preserve_all_cookies = True
self.__http_cookie_dict = dict()
self.__auth_cookie_names = set()
else:
if isinstance(http_cookie_names, six.string_types):
http_cookie_names = [http_cookie_names]
Expand All @@ -140,7 +146,7 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non
# Store the auth cookie names in __auth_cookie_names.
# Assume auth cookie names end with ".auth".
self.__auth_cookie_names = \
[ cn for cn in http_cookie_names if cn.endswith(".auth") ]
{ cn for cn in http_cookie_names if cn.endswith(".auth") }
# Set __are_matching_cookies_found as True if matching cookies are found in response.
self.__are_matching_cookies_found = False
self.__wbuf = BytesIO()
Expand Down Expand Up @@ -248,13 +254,20 @@ def getHttpCookieHeaderForRequest(self):
# Extract cookies from response and save those cookies for which the cookie names
# are in the cookie name list specified in the connect() API.
def extractHttpCookiesFromResponse(self):
if self.__http_cookie_dict is not None:
if self.__preserve_all_cookies:
matching_cookies = get_all_cookies(self.path, self.headers)
elif self.__http_cookie_dict is not None:
matching_cookies = get_all_matching_cookies(
self.__http_cookie_dict.keys(), self.path, self.headers)
if matching_cookies:
self.__are_matching_cookies_found = True
for c in matching_cookies:
self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c))
else:
matching_cookies = None

if matching_cookies:
self.__are_matching_cookies_found = True
for c in matching_cookies:
self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c))
if c.key.endswith(".auth"):
self.__auth_cookie_names.add(c.key)

# Return True if there are any saved cookies which are sent in previous request.
def areHttpCookiesSaved(self):
Expand Down
12 changes: 6 additions & 6 deletions impala/dbapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def connect(host='localhost', port=21050, database=None, timeout=None,
name only, a str value can be specified instead of a list.
If a cookie with one of these names is returned in an http response by the server
or an intermediate proxy then it will be included in each subsequent request for
the same connection.
the same connection. If set to wildcard ('*'), all cookies in an http response
will be preserved. By default 'http_cookie_names' is set to '*'.
Used only when `use_http_transport` is True.
By default 'http_cookie_names' is set to the list of HTTP cookie names used by
Impala and Hive. The names of authentication cookies are expected to end with
".auth" string, for example, "impala.auth" for Impala authentication cookies.
The names of authentication cookies are expected to end with ".auth" string, for
example, "impala.auth" for Impala authentication cookies.
If 'http_cookie_names' is explicitly set to a not None empty value ([], or ''),
Impyla won't attempt to do cookie based authentication or session management.
Currently cookie retention is supported for GSSAPI/LDAP/SASL/NOSASL/JWT over http.
Expand Down Expand Up @@ -191,8 +191,8 @@ def connect(host='localhost', port=21050, database=None, timeout=None,
warn_deprecate('auth_cookie_names', 'http_cookie_names')
http_cookie_names = auth_cookie_names
elif http_cookie_names is None:
# Set default value as the list of HTTP cookie names used by Impala, Hive and Knox.
http_cookie_names = ['impala.auth', 'impala.session.id', 'hive.server2.auth', 'KNOXSESSIONID', 'KNOX_BACKEND-HIVE', 'JSESSIONID']
# Preserve all cookies.
http_cookie_names = '*'

service = hs2.connect(host=host, port=port,
timeout=timeout, use_ssl=use_ssl,
Expand Down
76 changes: 75 additions & 1 deletion impala/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import unittest

import pytest
from impala.util import cookie_matches_path, get_cookie_expiry, get_all_matching_cookies
from impala.util import (cookie_matches_path, get_cookie_expiry, get_all_cookies,
get_all_matching_cookies)


class ImpalaUtilTests(unittest.TestCase):
Expand Down Expand Up @@ -78,6 +79,79 @@ def test_get_cookie_expiry(self):
now = datetime.now()
assert now + days2k <= get_cookie_expiry({'max-age': '172800000'}) <= now + days2k + sec

def test_get_matching_cookies(self):
cookies = get_all_cookies('/path', {})
assert not cookies

headers = make_cookie_headers([
('c_cookie', 'c_value'),
('b_cookie', 'b_value'),
('a_cookie', 'a_value')
])
cookies = get_all_cookies('/path', headers)
# result order is not specified, so sort results
cookies.sort()
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[0].value == 'c_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path'),
('a_cookie', 'a_value;Path=/')
])
cookies = get_all_cookies('/path', headers)
cookies.sort()
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path'),
('a_cookie', 'a_value;Path=/path/path2')
])
cookies = get_all_cookies('/path', headers)
cookies.sort()
assert len(cookies) == 2
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
cookies = get_all_cookies('/path', headers)
cookies.sort()
assert len(cookies) == 2
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path'),
('a_cookie', 'a_value;Path=/path')
])
cookies = get_all_cookies('/path/path1', headers)
cookies.sort()
assert len(cookies) == 3
assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value'
assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value'
assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value'

headers = make_cookie_headers([
('b_cookie', 'b_value;Path=/path1'),
('a_cookie', 'a_value;Path=/path2')
])
cookies = get_all_cookies('/path', headers)
assert not cookies

headers = make_cookie_headers([
('c_cookie', 'c_value;Path=/'),
('b_cookie', 'b_value;Path=/path1'),
('a_cookie', 'a_value;Path=/path2')
])
cookies = get_all_cookies('/path', headers)
assert len(cookies) == 1
assert cookies[0].key == 'c_cookie' and cookies[0].value == 'c_value'

def test_get_all_matching_cookies(self):
cookies = get_all_matching_cookies(['a', 'b'], '/path', {})
assert not cookies
Expand Down
21 changes: 20 additions & 1 deletion impala/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def get_cookie_expiry(c):
return None


def get_all_matching_cookies(cookie_names, path, resp_headers):
def get_cookies(resp_headers):
if 'Set-Cookie' not in resp_headers:
return None

Expand All @@ -214,9 +214,28 @@ def get_all_matching_cookies(cookie_names, path, resp_headers):
cookie_headers = resp_headers.get_all('Set-Cookie')
for header in cookie_headers:
cookies.load(header)
return cookies
except Exception:
return None


def get_all_cookies(path, resp_headers):
cookies = get_cookies(resp_headers)
if not cookies:
return None

matching_cookies = []
for c in cookies.values():
if c and cookie_matches_path(c, path):
matching_cookies.append(c)
return matching_cookies


def get_all_matching_cookies(cookie_names, path, resp_headers):
cookies = get_cookies(resp_headers)
if not cookies:
return None

matching_cookies = []
for cn in cookie_names:
if cn in cookies:
Expand Down

0 comments on commit 2861464

Please sign in to comment.