From 646104332f08330e3c72cdcdd75a058a2855b7aa Mon Sep 17 00:00:00 2001 From: dblock Date: Thu, 12 Oct 2023 14:04:03 -0400 Subject: [PATCH 1/4] Align pool_maxsize for different connection pool implementations. Signed-off-by: dblock --- CHANGELOG.md | 1 + opensearchpy/_async/http_aiohttp.py | 4 +- opensearchpy/connection/http_async.py | 4 +- opensearchpy/connection/http_urllib3.py | 9 +++-- opensearchpy/transport.py | 6 +++ .../test_client/test_requests.py | 32 +++++++++++++++ test_opensearchpy/test_client/test_urllib3.py | 39 +++++++++++++++++++ 7 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 test_opensearchpy/test_client/test_requests.py create mode 100644 test_opensearchpy/test_client/test_urllib3.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 141ca15a..1f6c3983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added generating imports and headers to API generator ([#467](https://github.com/opensearch-project/opensearch-py/pull/467)) - Added point-in-time APIs (create_pit, delete_pit, delete_all_pits, get_all_pits) and Security Client APIs (health and update_audit_configuration) ([#502](https://github.com/opensearch-project/opensearch-py/pull/502)) - Added new guide for using index templates with the client ([#531](https://github.com/opensearch-project/opensearch-py/pull/531)) +- Added `pool_maxsize` for `Urllib3HttpConnection` ([#535](https://github.com/opensearch-project/opensearch-py/pull/535)) ### Changed - Generate `tasks` client from API specs ([#508](https://github.com/opensearch-project/opensearch-py/pull/508)) - Generate `ingest` client from API specs ([#513](https://github.com/opensearch-project/opensearch-py/pull/513)) diff --git a/opensearchpy/_async/http_aiohttp.py b/opensearchpy/_async/http_aiohttp.py index cc426164..408f2b7c 100644 --- a/opensearchpy/_async/http_aiohttp.py +++ b/opensearchpy/_async/http_aiohttp.py @@ -85,7 +85,7 @@ def __init__( client_key=None, ssl_version=None, ssl_assert_fingerprint=None, - maxsize=10, + pool_maxsize=None, headers=None, ssl_context=None, http_compress=None, @@ -217,7 +217,7 @@ def __init__( self.session = None # Parameters for creating an aiohttp.ClientSession later. - self._limit = maxsize + self._limit = pool_maxsize self._http_auth = http_auth self._ssl_context = ssl_context self._trust_env = trust_env diff --git a/opensearchpy/connection/http_async.py b/opensearchpy/connection/http_async.py index 10f5a56a..5142f9f5 100644 --- a/opensearchpy/connection/http_async.py +++ b/opensearchpy/connection/http_async.py @@ -43,7 +43,7 @@ def __init__( client_key=None, ssl_version=None, ssl_assert_fingerprint=None, - maxsize=10, + pool_maxsize=None, headers=None, ssl_context=None, http_compress=None, @@ -140,7 +140,7 @@ def __init__( self.session = None # Parameters for creating an aiohttp.ClientSession later. - self._limit = maxsize + self._limit = pool_maxsize self._http_auth = http_auth self._ssl_context = ssl_context diff --git a/opensearchpy/connection/http_urllib3.py b/opensearchpy/connection/http_urllib3.py index 6fc09e72..4bc27bbb 100644 --- a/opensearchpy/connection/http_urllib3.py +++ b/opensearchpy/connection/http_urllib3.py @@ -86,7 +86,7 @@ class Urllib3HttpConnection(Connection): ``ssl`` module for exact options for your environment). :arg ssl_assert_hostname: use hostname verification if not `False` :arg ssl_assert_fingerprint: verify the supplied certificate fingerprint if not `None` - :arg maxsize: the number of connections which will be kept open to this + :arg pool_maxsize: the number of connections which will be kept open to this host. See https://urllib3.readthedocs.io/en/1.4/pools.html#api for more information. :arg headers: any custom http headers to be add to requests @@ -109,7 +109,7 @@ def __init__( ssl_version=None, ssl_assert_hostname=None, ssl_assert_fingerprint=None, - maxsize=10, + pool_maxsize=None, headers=None, ssl_context=None, http_compress=None, @@ -203,8 +203,11 @@ def __init__( if not ssl_show_warn: urllib3.disable_warnings() + if pool_maxsize and isinstance(pool_maxsize, int): + kw["maxsize"] = pool_maxsize + self.pool = pool_class( - self.hostname, port=self.port, timeout=self.timeout, maxsize=maxsize, **kw + self.hostname, port=self.port, timeout=self.timeout, **kw ) def perform_request( diff --git a/opensearchpy/transport.py b/opensearchpy/transport.py index 32c9baf4..301955df 100644 --- a/opensearchpy/transport.py +++ b/opensearchpy/transport.py @@ -83,6 +83,7 @@ def __init__( serializers=None, default_mimetype="application/json", max_retries=3, + pool_maxsize=None, retry_on_status=(502, 503, 504), retry_on_timeout=False, send_get_body_as="GET", @@ -120,6 +121,8 @@ def __init__( don't support passing bodies with GET requests. If you set this to 'POST' a POST method will be used instead, if to 'source' then the body will be serialized and passed as a query parameter `source`. + :arg pool_maxsize: Maximum connection pool size used by pool-manager + For custom connection-pooling on current session Any extra keyword arguments will be passed to the `connection_class` when creating and instance unless overridden by that connection's @@ -139,6 +142,7 @@ def __init__( self.deserializer = Deserializer(_serializers, default_mimetype) self.max_retries = max_retries + self.pool_maxsize = pool_maxsize self.retry_on_timeout = retry_on_timeout self.retry_on_status = retry_on_status self.send_get_body_as = send_get_body_as @@ -211,6 +215,8 @@ def _create_connection(host): # previously unseen params, create new connection kwargs = self.kwargs.copy() kwargs.update(host) + if self.pool_maxsize and isinstance(self.pool_maxsize, int): + kwargs["pool_maxsize"] = self.pool_maxsize return self.connection_class(**kwargs) connections = map(_create_connection, hosts) diff --git a/test_opensearchpy/test_client/test_requests.py b/test_opensearchpy/test_client/test_requests.py new file mode 100644 index 00000000..11434a17 --- /dev/null +++ b/test_opensearchpy/test_client/test_requests.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from unittest import TestCase + +from opensearchpy import OpenSearch, RequestsHttpConnection + + +class TestRequests(TestCase): + def test_connection_class(self): + client = OpenSearch(connection_class=RequestsHttpConnection) + self.assertEqual(client.transport.pool_maxsize, None) + self.assertEqual(client.transport.connection_class, RequestsHttpConnection) + self.assertIsInstance( + client.transport.connection_pool.connections[0], RequestsHttpConnection + ) + + def test_pool_maxsize(self): + client = OpenSearch(connection_class=RequestsHttpConnection, pool_maxsize=42) + self.assertEqual(client.transport.pool_maxsize, 42) + self.assertEqual( + client.transport.connection_pool.connections[0] + .session.adapters["https://"] + ._pool_maxsize, + 42, + ) diff --git a/test_opensearchpy/test_client/test_urllib3.py b/test_opensearchpy/test_client/test_urllib3.py new file mode 100644 index 00000000..227164eb --- /dev/null +++ b/test_opensearchpy/test_client/test_urllib3.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +from unittest import TestCase + +from urllib3.connectionpool import HTTPConnectionPool + +from opensearchpy import OpenSearch, Urllib3HttpConnection + + +class TestUrlLib3(TestCase): + def test_default(self): + client = OpenSearch() + self.assertEqual(client.transport.connection_class, Urllib3HttpConnection) + self.assertEqual(client.transport.pool_maxsize, None) + + def test_connection_class(self): + client = OpenSearch(connection_class=Urllib3HttpConnection) + self.assertEqual(client.transport.connection_class, Urllib3HttpConnection) + self.assertIsInstance( + client.transport.connection_pool.connections[0], Urllib3HttpConnection + ) + self.assertIsInstance( + client.transport.connection_pool.connections[0].pool, HTTPConnectionPool + ) + + def test_pool_maxsize(self): + client = OpenSearch(connection_class=Urllib3HttpConnection, pool_maxsize=42) + self.assertEqual(client.transport.pool_maxsize, 42) + # https://github.com/python/cpython/blob/3.12/Lib/queue.py#L35 + self.assertEqual( + client.transport.connection_pool.connections[0].pool.pool.maxsize, 42 + ) From 35a8443552df9bfbfba0894630d36491bbdb76d1 Mon Sep 17 00:00:00 2001 From: dblock Date: Thu, 12 Oct 2023 15:48:42 -0400 Subject: [PATCH 2/4] Document connection classes and settings. Signed-off-by: dblock --- USER_GUIDE.md | 1 + guides/auth.md | 1 - guides/connection_classes.md | 81 ++++++++++++++++++++++++++++++++++++ guides/proxy.md | 1 - 4 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 guides/connection_classes.md diff --git a/USER_GUIDE.md b/USER_GUIDE.md index b14ee0ad..416bbc4d 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -154,6 +154,7 @@ print(response) - [Point in Time](guides/point_in_time.md) - [Using a Proxy](guides/proxy.md) - [Index Templates](guides/index_template.md) +- [Connection Classes](guides/connection_classes.md) ## Plugins diff --git a/guides/auth.md b/guides/auth.md index 4b314764..3e7f4092 100644 --- a/guides/auth.md +++ b/guides/auth.md @@ -113,7 +113,6 @@ client = OpenSearch( ['htps://...'], use_ssl=True, verify_certs=True, - connection_class=RequestsHttpConnection, http_auth=HTTPKerberosAuth(mutual_authentication=OPTIONAL) ) diff --git a/guides/connection_classes.md b/guides/connection_classes.md new file mode 100644 index 00000000..734a091a --- /dev/null +++ b/guides/connection_classes.md @@ -0,0 +1,81 @@ +- [Connection Classes](#connection-classes) + - [Selecting a Connection Class](#selecting-a-connection-class) + - [Urllib3HttpConnection](#urllib3httpconnection) + - [RequestsHttpConnection](#requestshttpconnection) + - [AsyncHttpConnection](#asynchttpconnection) + - [Connection Pooling](#connection-pooling) + +# Connection Classes + +The OpenSearch Python synchrnous client supports both the `Urllib3HttpConnection` connection class (default) from the [urllib3](https://pypi.org/project/urllib3/) library, and `RequestsHttpConnection` from the [requests](https://pypi.org/project/requests/) library. We recommend you use the default, unless your application is standardized on `requests`. + +The faster, asynchronous client, implements a class called `AsyncHttpConnection`, which uses [aiohttp](https://pypi.org/project/aiohttp/). + +## Selecting a Connection Class + +### Urllib3HttpConnection + +```python +from opensearchpy import OpenSearch, Urllib3HttpConnection + +client = OpenSearch( + hosts = [{'host': 'localhost', 'port': 9200}], + http_auth = ('admin', 'admin'), + use_ssl = True, + verify_certs = False, + ssl_show_warn = False, + connection_class = Urllib3HttpConnection +) +``` + +### RequestsHttpConnection + +```python +from opensearchpy import OpenSearch, RequestsHttpConnection + +client = OpenSearch( + hosts = [{'host': 'localhost', 'port': 9200}], + http_auth = ('admin', 'admin'), + use_ssl = True, + verify_certs = False, + ssl_show_warn = False, + connection_class = RequestsHttpConnection +) +``` + +### AsyncHttpConnection + +```python +from opensearchpy import AsyncOpenSearch, AsyncHttpConnection + +async def main(): + client = AsyncOpenSearch( + hosts = [{'host': 'localhost', 'port': 9200}], + http_auth = ('admin', 'admin'), + use_ssl = True, + verify_certs = False, + ssl_show_warn = False, + connection_class = AsyncHttpConnection + ) +``` + +## Connection Pooling + +The OpenSearch Python client has a connection pool for each `host` value specified during initialization, and a connection pool for HTTP connections to each host implemented in the underlying HTTP libraries. You can adjust the max size of the latter connection pool with `pool_maxsize`. + +If you don't set this value, each connection library implementation will provide its default, which is typically `10`. Changing the pool size may improve performance in some multithreaded scenarios. + +The following example sets the number of connections in the connection pool to 12. + +```python +from opensearchpy import OpenSearch + +client = OpenSearch( + hosts = [{'host': 'localhost', 'port': 9200}], + http_auth = ('admin', 'admin'), + use_ssl = True, + verify_certs = False, + ssl_show_warn = False, + pool_maxsize = 12, +) +``` \ No newline at end of file diff --git a/guides/proxy.md b/guides/proxy.md index 5be7edf4..96b7d441 100644 --- a/guides/proxy.md +++ b/guides/proxy.md @@ -13,7 +13,6 @@ OpenSearch( hosts=["htps://..."], use_ssl=True, verify_certs=True, - connection_class=RequestsHttpConnection, trust_env=True, ) ``` From e6f38ba9e607d0ed9c07478c5fd0498ecc1d0703 Mon Sep 17 00:00:00 2001 From: dblock Date: Thu, 12 Oct 2023 16:26:45 -0400 Subject: [PATCH 3/4] Undo change in async for backwards compatibility. Signed-off-by: dblock --- opensearchpy/_async/http_aiohttp.py | 4 ++-- opensearchpy/connection/http_async.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opensearchpy/_async/http_aiohttp.py b/opensearchpy/_async/http_aiohttp.py index 408f2b7c..cc426164 100644 --- a/opensearchpy/_async/http_aiohttp.py +++ b/opensearchpy/_async/http_aiohttp.py @@ -85,7 +85,7 @@ def __init__( client_key=None, ssl_version=None, ssl_assert_fingerprint=None, - pool_maxsize=None, + maxsize=10, headers=None, ssl_context=None, http_compress=None, @@ -217,7 +217,7 @@ def __init__( self.session = None # Parameters for creating an aiohttp.ClientSession later. - self._limit = pool_maxsize + self._limit = maxsize self._http_auth = http_auth self._ssl_context = ssl_context self._trust_env = trust_env diff --git a/opensearchpy/connection/http_async.py b/opensearchpy/connection/http_async.py index 5142f9f5..10f5a56a 100644 --- a/opensearchpy/connection/http_async.py +++ b/opensearchpy/connection/http_async.py @@ -43,7 +43,7 @@ def __init__( client_key=None, ssl_version=None, ssl_assert_fingerprint=None, - pool_maxsize=None, + maxsize=10, headers=None, ssl_context=None, http_compress=None, @@ -140,7 +140,7 @@ def __init__( self.session = None # Parameters for creating an aiohttp.ClientSession later. - self._limit = pool_maxsize + self._limit = maxsize self._http_auth = http_auth self._ssl_context = ssl_context From b3a5ed557f506e36171aa44adab395865d4c7750 Mon Sep 17 00:00:00 2001 From: dblock Date: Thu, 12 Oct 2023 16:50:09 -0400 Subject: [PATCH 4/4] Fix: typo. Signed-off-by: dblock --- guides/connection_classes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guides/connection_classes.md b/guides/connection_classes.md index 734a091a..da7357fb 100644 --- a/guides/connection_classes.md +++ b/guides/connection_classes.md @@ -7,7 +7,7 @@ # Connection Classes -The OpenSearch Python synchrnous client supports both the `Urllib3HttpConnection` connection class (default) from the [urllib3](https://pypi.org/project/urllib3/) library, and `RequestsHttpConnection` from the [requests](https://pypi.org/project/requests/) library. We recommend you use the default, unless your application is standardized on `requests`. +The OpenSearch Python synchronous client supports both the `Urllib3HttpConnection` connection class (default) from the [urllib3](https://pypi.org/project/urllib3/) library, and `RequestsHttpConnection` from the [requests](https://pypi.org/project/requests/) library. We recommend you use the default, unless your application is standardized on `requests`. The faster, asynchronous client, implements a class called `AsyncHttpConnection`, which uses [aiohttp](https://pypi.org/project/aiohttp/).