From ab65ab4aa79120cb7ba680c670c8135f438b17dc Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 09:43:08 +0200 Subject: [PATCH 1/7] Remove dependency on Literal from typing to support Pyton 3.7 --- aleph_alpha_client/aleph_alpha_client.py | 23 +++++++++++++++++++++-- aleph_alpha_client/embedding.py | 18 ++++++++++++++++-- tests/test_qa.py | 5 +++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py index 481ec02..657bbbf 100644 --- a/aleph_alpha_client/aleph_alpha_client.py +++ b/aleph_alpha_client/aleph_alpha_client.py @@ -127,8 +127,22 @@ def available_models(self): ) return self._translate_errors(response).json() + def available_checkpoints(self): + """ + Queries all checkpoints which are currently available. + """ + response = self.get_request( + self.host + "checkpoints_available", headers=self.request_headers + ) + return self._translate_errors(response).json() + def tokenize( - self, model: Optional[str], prompt: str, tokens: bool = True, token_ids: bool = True, checkpoint: Optional[str] = None + self, + model: Optional[str], + prompt: str, + tokens: bool = True, + token_ids: bool = True, + checkpoint: Optional[str] = None, ): """ Tokenizes the given prompt for the given model. @@ -153,7 +167,12 @@ def tokenize( ) return self._translate_errors(response).json() - def detokenize(self, model: Optional[str], token_ids: List[int], checkpoint: Optional[str] = None): + def detokenize( + self, + model: Optional[str], + token_ids: List[int], + checkpoint: Optional[str] = None, + ): """ Detokenizes the given tokens. """ diff --git a/aleph_alpha_client/embedding.py b/aleph_alpha_client/embedding.py index 6d78345..b8b215b 100644 --- a/aleph_alpha_client/embedding.py +++ b/aleph_alpha_client/embedding.py @@ -3,7 +3,6 @@ Any, Dict, List, - Literal, NamedTuple, Optional, Tuple, @@ -89,6 +88,21 @@ class SemanticRepresentation(Enum): Query = "query" +class Compression(Enum): + """ + The default behavior is to return the full embedding, but you can optionally request an embedding compressed to a smaller set of dimensions. + + Full embedding sizes for supported models: + - luminous-base: 5120 + + The 128 size is expected to have a small drop in accuracy performance (4-6%), with the benefit of being much smaller, which makes comparing these embeddings much faster for use cases where speed is critical. + + The 128 size can also perform better if you are embedding really short texts or documents. + """ + + C128 = 128 + + class SemanticEmbeddingRequest(NamedTuple): """ Embeds a text and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers). @@ -113,7 +127,7 @@ class SemanticEmbeddingRequest(NamedTuple): prompt: Prompt representation: SemanticRepresentation - compress_to_size: Optional[Literal[128]] = None + compress_to_size: Optional[Compression] = None class SemanticEmbeddingResponse(NamedTuple): diff --git a/tests/test_qa.py b/tests/test_qa.py index 43890fb..2d4ffea 100644 --- a/tests/test_qa.py +++ b/tests/test_qa.py @@ -80,9 +80,10 @@ def test_qa_with_client(client: AlephAlphaClient): def test_qa_with_client_against_checkpoint( client: AlephAlphaClient, qa_checkpoint_name: str ): - model_name = "luminous-extended" # given a client - assert model_name in map(lambda model: model["name"], client.available_models()) + assert qa_checkpoint_name in map( + lambda checkpoint: checkpoint["name"], client.available_checkpoints() + ) # when posting a QA request with explicit parameters response = client.qa( From b21ee84e91785e6fb25c1b984386d44bcd9d1e11 Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 09:57:20 +0200 Subject: [PATCH 2/7] Fix mypy issues --- aleph_alpha_client/__init__.py | 1 + aleph_alpha_client/aleph_alpha_client.py | 5 ++++- aleph_alpha_client/embedding.py | 7 +++++-- tests/test_embed.py | 9 +++++---- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/aleph_alpha_client/__init__.py b/aleph_alpha_client/__init__.py index 5e7f78e..423b458 100644 --- a/aleph_alpha_client/__init__.py +++ b/aleph_alpha_client/__init__.py @@ -9,6 +9,7 @@ SemanticEmbeddingRequest, SemanticEmbeddingResponse, SemanticRepresentation, + SemanticEmbeddingCompression, ) from .completion import CompletionRequest, CompletionResponse from .qa import QaRequest, QaResponse diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py index 657bbbf..a2ee158 100644 --- a/aleph_alpha_client/aleph_alpha_client.py +++ b/aleph_alpha_client/aleph_alpha_client.py @@ -1,4 +1,5 @@ from socket import timeout +from tkinter import W from typing import Any, List, Optional, Dict, Sequence, Union import requests @@ -503,9 +504,11 @@ def semantic_embed( payload: Dict[str, Any] = { "prompt": serializable_prompt, "representation": request.representation.value, - "compress_to_size": request.compress_to_size, } + if request.compress_to_size is not None: + payload["compress_to_size"] = int(request.compress_to_size) + if model is not None: payload["model"] = model diff --git a/aleph_alpha_client/embedding.py b/aleph_alpha_client/embedding.py index b8b215b..bce0809 100644 --- a/aleph_alpha_client/embedding.py +++ b/aleph_alpha_client/embedding.py @@ -88,7 +88,7 @@ class SemanticRepresentation(Enum): Query = "query" -class Compression(Enum): +class SemanticEmbeddingCompression(Enum): """ The default behavior is to return the full embedding, but you can optionally request an embedding compressed to a smaller set of dimensions. @@ -102,6 +102,9 @@ class Compression(Enum): C128 = 128 + def __int__(self): + return self.value + class SemanticEmbeddingRequest(NamedTuple): """ @@ -127,7 +130,7 @@ class SemanticEmbeddingRequest(NamedTuple): prompt: Prompt representation: SemanticRepresentation - compress_to_size: Optional[Compression] = None + compress_to_size: Optional[SemanticEmbeddingCompression] = None class SemanticEmbeddingResponse(NamedTuple): diff --git a/tests/test_embed.py b/tests/test_embed.py index 663b457..6e2c55f 100644 --- a/tests/test_embed.py +++ b/tests/test_embed.py @@ -5,6 +5,7 @@ from aleph_alpha_client.embedding import ( SemanticEmbeddingRequest, SemanticRepresentation, + SemanticEmbeddingCompression, ) from aleph_alpha_client.prompt import Prompt from tests.common import client, checkpoint_name, model_name, luminous_base, model @@ -130,7 +131,7 @@ def test_embed_semantic(luminous_base: AlephAlphaModel): request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=128, + compress_to_size=SemanticEmbeddingCompression(128), ) result = luminous_base.semantic_embed(request=request) @@ -148,7 +149,7 @@ def test_embed_semantic_against_checkpoint( request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=128, + compress_to_size=SemanticEmbeddingCompression(128), ) result = model.semantic_embed(request=request) @@ -162,7 +163,7 @@ def test_embed_semantic_client(client: AlephAlphaClient): request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=128, + compress_to_size=SemanticEmbeddingCompression(128), ) result = client.semantic_embed( model="luminous-base", @@ -181,7 +182,7 @@ def test_semantic_embed_with_client_against_checkpoint( request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=128, + compress_to_size=SemanticEmbeddingCompression(128), ) result = client.semantic_embed( From c82f4d5fbd3fefb123fb3e8271c7925d344499a5 Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 10:13:15 +0200 Subject: [PATCH 3/7] Use int instead of Enum --- aleph_alpha_client/aleph_alpha_client.py | 4 +--- aleph_alpha_client/embedding.py | 20 +------------------- tests/test_embed.py | 9 ++++----- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py index a2ee158..06e1d94 100644 --- a/aleph_alpha_client/aleph_alpha_client.py +++ b/aleph_alpha_client/aleph_alpha_client.py @@ -504,11 +504,9 @@ def semantic_embed( payload: Dict[str, Any] = { "prompt": serializable_prompt, "representation": request.representation.value, + "compress_to_size": request.compress_to_size, } - if request.compress_to_size is not None: - payload["compress_to_size"] = int(request.compress_to_size) - if model is not None: payload["model"] = model diff --git a/aleph_alpha_client/embedding.py b/aleph_alpha_client/embedding.py index bce0809..6762422 100644 --- a/aleph_alpha_client/embedding.py +++ b/aleph_alpha_client/embedding.py @@ -88,24 +88,6 @@ class SemanticRepresentation(Enum): Query = "query" -class SemanticEmbeddingCompression(Enum): - """ - The default behavior is to return the full embedding, but you can optionally request an embedding compressed to a smaller set of dimensions. - - Full embedding sizes for supported models: - - luminous-base: 5120 - - The 128 size is expected to have a small drop in accuracy performance (4-6%), with the benefit of being much smaller, which makes comparing these embeddings much faster for use cases where speed is critical. - - The 128 size can also perform better if you are embedding really short texts or documents. - """ - - C128 = 128 - - def __int__(self): - return self.value - - class SemanticEmbeddingRequest(NamedTuple): """ Embeds a text and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers). @@ -130,7 +112,7 @@ class SemanticEmbeddingRequest(NamedTuple): prompt: Prompt representation: SemanticRepresentation - compress_to_size: Optional[SemanticEmbeddingCompression] = None + compress_to_size: Optional[int] = None class SemanticEmbeddingResponse(NamedTuple): diff --git a/tests/test_embed.py b/tests/test_embed.py index 6e2c55f..663b457 100644 --- a/tests/test_embed.py +++ b/tests/test_embed.py @@ -5,7 +5,6 @@ from aleph_alpha_client.embedding import ( SemanticEmbeddingRequest, SemanticRepresentation, - SemanticEmbeddingCompression, ) from aleph_alpha_client.prompt import Prompt from tests.common import client, checkpoint_name, model_name, luminous_base, model @@ -131,7 +130,7 @@ def test_embed_semantic(luminous_base: AlephAlphaModel): request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=SemanticEmbeddingCompression(128), + compress_to_size=128, ) result = luminous_base.semantic_embed(request=request) @@ -149,7 +148,7 @@ def test_embed_semantic_against_checkpoint( request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=SemanticEmbeddingCompression(128), + compress_to_size=128, ) result = model.semantic_embed(request=request) @@ -163,7 +162,7 @@ def test_embed_semantic_client(client: AlephAlphaClient): request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=SemanticEmbeddingCompression(128), + compress_to_size=128, ) result = client.semantic_embed( model="luminous-base", @@ -182,7 +181,7 @@ def test_semantic_embed_with_client_against_checkpoint( request = SemanticEmbeddingRequest( prompt=Prompt.from_text("hello"), representation=SemanticRepresentation.Symmetric, - compress_to_size=SemanticEmbeddingCompression(128), + compress_to_size=128, ) result = client.semantic_embed( From 93ca3f0f523c9d371ec15101375d3500ecc87d97 Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 10:14:51 +0200 Subject: [PATCH 4/7] remove unused imports --- aleph_alpha_client/__init__.py | 1 - aleph_alpha_client/aleph_alpha_client.py | 1 - 2 files changed, 2 deletions(-) diff --git a/aleph_alpha_client/__init__.py b/aleph_alpha_client/__init__.py index 423b458..5e7f78e 100644 --- a/aleph_alpha_client/__init__.py +++ b/aleph_alpha_client/__init__.py @@ -9,7 +9,6 @@ SemanticEmbeddingRequest, SemanticEmbeddingResponse, SemanticRepresentation, - SemanticEmbeddingCompression, ) from .completion import CompletionRequest, CompletionResponse from .qa import QaRequest, QaResponse diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py index 06e1d94..657bbbf 100644 --- a/aleph_alpha_client/aleph_alpha_client.py +++ b/aleph_alpha_client/aleph_alpha_client.py @@ -1,5 +1,4 @@ from socket import timeout -from tkinter import W from typing import Any, List, Optional, Dict, Sequence, Union import requests From ea21831b9eeea40ac33a93be0a64f6ab930095bc Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 10:16:28 +0200 Subject: [PATCH 5/7] Use python v3.7 --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 5580637..23e41b7 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -11,7 +11,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: "3.x" + python-version: "3.7" cache: "pip" cache-dependency-path: | **/setup.py From de220f743bca47359d755851acc1da1bb8d316b2 Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 10:26:47 +0200 Subject: [PATCH 6/7] Run lint-and-test on multiple python versions --- .github/workflows/integration.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 23e41b7..bdd78dd 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,13 +5,18 @@ on: push jobs: lint-and-test: runs-on: ubuntu-latest + strategy: + matrix: + # We support latest 3.x version and 3.7 because + # Google Colab uses 3.7 by default. + python-version: [3.7, 3.x] steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v3 with: - python-version: "3.7" + python-version: ${{ matrix.python-version }} cache: "pip" cache-dependency-path: | **/setup.py From dab1809fa83931d338d7f37c705cc7566d216ef3 Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Fri, 23 Sep 2022 10:30:26 +0200 Subject: [PATCH 7/7] Add changelog and increase version --- Changelog.md | 7 +++++++ aleph_alpha_client/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index c2bd9f9..0a0c8c6 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2.4.1 + +### Python support + +- Minimal supported Python version is now 3.7 +- Previously we only supported version 3.8 + ## 2.4.0 ### New feature diff --git a/aleph_alpha_client/version.py b/aleph_alpha_client/version.py index 3d67cd6..54499df 100644 --- a/aleph_alpha_client/version.py +++ b/aleph_alpha_client/version.py @@ -1 +1 @@ -__version__ = "2.4.0" +__version__ = "2.4.1"