Merge pull request #178 from Aleph-Alpha/max-tokens-optional

Max tokens optional
Aleph-Alpha · Aug 13, 2024 · 1ec4c50 · 1ec4c50
2 parents 18a772f + 4314ba0
commit 1ec4c50
Show file tree

Hide file tree

Showing 13 changed files with 40 additions and 27 deletions.
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
@@ -7,10 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        # We currently only support up to python 3.11 
-        # because aiohttp does not support 3.12 as of yet.
-        # 3.7 because Google Colab uses 3.7 by default.
-        python-version: [3.7, 3.11]
+        python-version: [3.8, 3.12]
 
     steps:
       - uses: actions/checkout@v4

diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## 7.3.0
+
+- Maximum token attribute of `CompletionRequest` defaults to None
+
+## 7.2.0
+
+### Python support
+
+- Minimal supported Python version is now 3.8
+- Dependency `aiohttp` is specified to be at least of version `3.10`.
+
 ## 7.1.0
 
 - Introduce support for internal feature 'tags'

diff --git a/aleph_alpha_client/__init__.py b/aleph_alpha_client/__init__.py
@@ -77,6 +77,8 @@
     "ImageControl",
     "ImagePromptItemExplanation",
     "ImageScore",
+    "load_base64_from_file",
+    "load_base64_from_url",
     "POOLING_OPTIONS",
     "Prompt",
     "PromptTemplate",
@@ -102,4 +104,5 @@
     "TokenPromptItemExplanation",
     "Tokens",
     "TokenScore",
+    "__version__",
 ]
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
@@ -10,10 +10,8 @@
     Optional,
     Dict,
     Sequence,
-    Tuple,
     Type,
     Union,
-    Iterator,
 )
 import aiohttp
 import asyncio
@@ -29,8 +27,6 @@
 from aleph_alpha_client.explanation import (
     ExplanationRequest,
     ExplanationResponse,
-    ExplanationRequest,
-    ExplanationResponse,
 )
 from aleph_alpha_client.summarization import SummarizationRequest, SummarizationResponse
 from aleph_alpha_client.qa import QaRequest, QaResponse

diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
@@ -15,12 +15,14 @@ class CompletionRequest:
             Unconditional completion can be started with an empty string (default).
             The prompt may contain a zero shot or few shot task.
 
-        maximum_tokens (int, optional, default 64):
+        maximum_tokens (int, optional, default None):
             The maximum number of tokens to be generated.
             Completion will terminate after the maximum number of tokens is reached. Increase this value to generate longer texts.
             A text is split into tokens. Usually there are more tokens than words.
             The maximum supported number of tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
-            The prompt's tokens plus the maximum_tokens request must not exceed this number.
+            The prompt's tokens plus the maximum_tokens request must not exceed this number. If set to None, the model will stop
+            generating tokens either if it outputs a sequence specified in `stop_sequences` or if it reaches its technical limit.
+            For most models, this means that the sum of input and output tokens is equal to its context window.
 
         temperature (float, optional, default 0.0)
             A higher sampling temperature encourages the model to produce less probable outputs ("be more creative"). Values are expected in a range from 0.0 to 1.0. Try high values (e.g. 0.9) for a more "creative" response and the default 0.0 for a well defined and repeatable answer.
@@ -181,7 +183,7 @@ class CompletionRequest:
     """
 
     prompt: Prompt
-    maximum_tokens: int = 64
+    maximum_tokens: Optional[int] = None
     temperature: float = 0.0
     top_k: int = 0
     top_p: float = 0.0

diff --git a/aleph_alpha_client/detokenization.py b/aleph_alpha_client/detokenization.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, asdict
-from typing import Any, Dict, List, Mapping, Optional, Sequence
+from typing import Any, Dict, Mapping, Sequence
 
 
 @dataclass(frozen=True)

diff --git a/aleph_alpha_client/explanation.py b/aleph_alpha_client/explanation.py
@@ -9,9 +9,6 @@
     Union,
 )
 
-# Import Literal with Python 3.7 fallback
-from typing_extensions import Literal
-
 from aleph_alpha_client import Text
 
 from aleph_alpha_client.prompt import ControlTokenOverlap, Image, Prompt, PromptItem

diff --git a/aleph_alpha_client/prompt_template.py b/aleph_alpha_client/prompt_template.py
@@ -1,6 +1,5 @@
 from re import finditer
-import re
-from typing import Dict, Iterable, Mapping, NewType, Sequence, Tuple, Union
+from typing import Dict, Iterable, Mapping, NewType, Tuple, Union
 from uuid import UUID, uuid4
 from liquid import Template
 

diff --git a/aleph_alpha_client/qa.py b/aleph_alpha_client/qa.py
@@ -1,5 +1,5 @@
 from dataclasses import asdict, dataclass
-from typing import Any, Dict, Mapping, Optional, Sequence
+from typing import Any, Mapping, Optional, Sequence
 
 from aleph_alpha_client.document import Document
 

diff --git a/aleph_alpha_client/summarization.py b/aleph_alpha_client/summarization.py
@@ -1,5 +1,5 @@
 from dataclasses import asdict, dataclass
-from typing import Any, Dict, Mapping, Sequence
+from typing import Any, Mapping
 
 from aleph_alpha_client.document import Document
 

diff --git a/aleph_alpha_client/version.py b/aleph_alpha_client/version.py
@@ -1,2 +1,2 @@
-__version__ = "7.1.0"
-MIN_API_VERSION = "1.16.0"
+__version__ = "7.3.0"
+MIN_API_VERSION = "1.17.0"
diff --git a/setup.py b/setup.py
@@ -44,8 +44,8 @@ def version():
     install_requires=[
         "requests >= 2.28",
         "urllib3 >= 1.26",
-        "aiohttp >= 3.8.6",
-        "aiodns >= 3.0.0",
+        "aiohttp >= 3.10.2",
+        "aiodns >= 3.2.0",
         "aiohttp-retry >= 2.8.3",
         "tokenizers >= 0.13.2",
         "typing_extensions >= 4.5.0",

diff --git a/tests/test_complete.py b/tests/test_complete.py
@@ -18,9 +18,6 @@
 )
 
 
-# AsyncClient
-
-
 @pytest.mark.system_test
 async def test_can_complete_with_async_client(
     async_client: AsyncClient, model_name: str
@@ -35,7 +32,18 @@ async def test_can_complete_with_async_client(
     assert response.model_version is not None
 
 
-# Client
+@pytest.mark.system_test
+def test_complete_maximum_tokens_none(sync_client: Client, model_name: str):
+    request = CompletionRequest(
+        prompt=Prompt.from_text("Hello, World!"),
+        maximum_tokens=None,
+        stop_sequences=[","],
+    )
+
+    response = sync_client.complete(request, model=model_name)
+    assert len(response.completions) == 1
+    assert response.completions[0].completion is not None
+    assert len(response.completions[0].completion) < 100
 
 
 @pytest.mark.system_test