Aleph-Alpha · fgebhart · Dec 5, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/.env.example b/.env.example
@@ -1,2 +1,2 @@
-TEST_API_URL=https://test.api.aleph-alpha.com
+TEST_API_URL=https://inference-api.your-domain.com
 TEST_TOKEN=
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
@@ -1,4 +1,4 @@
-name: Test things on pushes
+name: Test
 
 on: [push, pull_request]
 
@@ -24,7 +24,7 @@ jobs:
           poetry run mypy tests --ignore-missing-imports
       - name: Run tests
         run: |
-          poetry run pytest
+          poetry run pytest --color=yes
         env:
-          TEST_API_URL: https://api.aleph-alpha.com
-          TEST_TOKEN: ${{ secrets.AA_API_TOKEN }}
+          TEST_API_URL: ${{ secrets.TEST_API_URL }}
+          TEST_TOKEN: ${{ secrets.TEST_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -139,4 +139,6 @@ cython_debug/
 
 # IDEs
 .vscode/
-.idea/
+.idea/
+
+.ruff_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,13 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.1
+    hooks:
+      - id: ruff
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: 'v1.13.0'
+    hooks:
+      - id: mypy
+        pass_filenames: false
+        args: ['aleph_alpha_client', 'tests']
+        language: system
diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 8.0.0
+
+- Remove default value for `host` parameter in `Client` and `AsyncClient`. Passing a value for
+  the `host` is now required.
+
 ## 7.6.0
 
 - Add `instructable_embed` to `Client` and `AsyncClient`

diff --git a/Contributing.md b/Contributing.md
@@ -16,15 +16,15 @@ Tests can be run using pytest. Make sure to create a `.env` file with the follow
 
 ```env
 # test settings
-TEST_API_URL=https://test.api.aleph-alpha.com
+TEST_API_URL=https://inference-api.your-domain.com
 TEST_TOKEN=your_token
 ```
 
 Instead of a token username and password can be used.
 
 ```env
 # test settings
-TEST_API_URL=https://api.aleph-alpha.com
+TEST_API_URL=https://inference-api.your-domain.com
 TEST_USERNAME=your_username
 TEST_PASSWORD=your_password
 ```

diff --git a/README.md b/README.md
@@ -18,12 +18,15 @@ Python client for the [Aleph Alpha](https://aleph-alpha.com) API.
 import os
 from aleph_alpha_client import Client, CompletionRequest, Prompt
 
-client = Client(token=os.getenv("AA_TOKEN"))
+client = Client(
+    token=os.getenv("AA_TOKEN"),
+    host="https://inference-api.your-domain.com",
+)
 request = CompletionRequest(
     prompt=Prompt.from_text("Provide a short description of AI:"),
     maximum_tokens=64,
 )
-response = client.complete(request, model="luminous-extended")
+response = client.complete(request, model="pharia-1-llm-7b-control")
 
 print(response.completions[0].completion)
 ```
@@ -35,12 +38,15 @@ import os
 from aleph_alpha_client import AsyncClient, CompletionRequest, Prompt
 
 # Can enter context manager within an async function
-async with AsyncClient(token=os.environ["AA_TOKEN"]) as client:
+async with AsyncClient(
+    token=os.environ["AA_TOKEN"]
+    host="https://inference-api.your-domain.com",
+) as client:
     request = CompletionRequest(
         prompt=Prompt.from_text("Provide a short description of AI:"),
         maximum_tokens=64,
     )
-    response = client.complete_with_streaming(request, model="luminous-base")
+    response = client.complete_with_streaming(request, model="pharia-1-llm-7b-control")
 
     async for stream_item in response:
         print(stream_item)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
@@ -129,7 +129,7 @@ class Client:
         token (string, required):
             The API token that will be used for authentication.
 
-        host (string, required, default "https://api.aleph-alpha.com"):
+        host (string, required):
             The hostname of the API host.
 
         hosting(string, optional, default None):
@@ -163,17 +163,18 @@ class Client:
             Internal feature.
 
     Example usage:
-        >>> request = CompletionRequest(
-                prompt=Prompt.from_text(f"Request"), maximum_tokens=64
+        >>> request = CompletionRequest(prompt=Prompt.from_text(f"Request"), maximum_tokens=64)
+        >>> client = Client(
+                token=os.environ["AA_TOKEN"],
+                host="https://inference-api.your-domain.com",
             )
-        >>> client = Client(token=os.environ["AA_TOKEN"])
-        >>> response: CompletionResponse = client.complete(request, "luminous-base")
+        >>> response: CompletionResponse = client.complete(request, "pharia-1-llm-7b-control")
     """
 
     def __init__(
         self,
         token: str,
-        host: str = "https://api.aleph-alpha.com",
+        host: str,
         hosting: Optional[str] = None,
         request_timeout_seconds: int = DEFAULT_REQUEST_TIMEOUT,
         total_retries: int = 8,
@@ -706,7 +707,7 @@ class AsyncClient:
         token (string, required):
             The API token that will be used for authentication.
 
-        host (string, required, default "https://api.aleph-alpha.com"):
+        host (string, required):
             The hostname of the API host.
 
         hosting(string, optional, default None):
@@ -741,14 +742,17 @@ class AsyncClient:
 
     Example usage:
         >>> request = CompletionRequest(prompt=Prompt.from_text(f"Request"), maximum_tokens=64)
-        >>> async with AsyncClient(token=os.environ["AA_TOKEN"]) as client:
-                response: CompletionResponse = await client.complete(request, "luminous-base")
+        >>> async with AsyncClient(
+                token=os.environ["AA_TOKEN"],
+                host="https://inference-api.your-domain.com"
+            ) as client:
+                response: CompletionResponse = await client.complete(request, "pharia-1-llm-7b-control")
     """
 
     def __init__(
         self,
         token: str,
-        host: str = "https://api.aleph-alpha.com",
+        host: str,
         hosting: Optional[str] = None,
         request_timeout_seconds: int = DEFAULT_REQUEST_TIMEOUT,
         total_retries: int = 8,
@@ -846,7 +850,6 @@ async def _post_request(
         json_body = self._build_json_body(request, model)
 
         query_params = self._build_query_parameters()
-
         async with self.session.post(
             self.host + endpoint, json=json_body, params=query_params
         ) as response:

diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
@@ -40,12 +40,12 @@ class CompletionRequest:
         presence_penalty (float, optional, default 0.0)
             The presence penalty reduces the likelihood of generating tokens that are already present in the
             generated text (`repetition_penalties_include_completion=true`) respectively the prompt (`repetition_penalties_include_prompt=true`).
-            Presence penalty is independent of the number of occurences. Increase the value to produce text that is not repeating the input.
+            Presence penalty is independent of the number of occurrences. Increase the value to produce text that is not repeating the input.
 
         frequency_penalty (float, optional, default 0.0)
             The frequency penalty reduces the likelihood of generating tokens that are already present in the
             generated text (`repetition_penalties_include_completion=true`) respectively the prompt (`repetition_penalties_include_prompt=true`).
-            Frequency penalty is dependent on the number of occurences of a token.
+            Frequency penalty is dependent on the number of occurrences of a token.
 
         repetition_penalties_include_prompt (bool, optional, default False)
             Flag deciding whether presence penalty or frequency penalty are updated from the prompt
@@ -107,7 +107,7 @@ class CompletionRequest:
         stop_sequences (List(str), optional, default None)
             List of strings which will stop generation if they're generated. Stop sequences may be helpful in structured texts.
 
-            Example: In a question answering scenario a text may consist of lines starting with either "Question: " or "Answer: " (alternating). After producing an answer, the model will be likely to generate "Question: ". "Question: " may therfore be used as stop sequence in order not to have the model generate more questions but rather restrict text generation to the answers.
+            Example: In a question answering scenario a text may consist of lines starting with either "Question: " or "Answer: " (alternating). After producing an answer, the model will be likely to generate "Question: ". "Question: " may therefore be used as stop sequence in order not to have the model generate more questions but rather restrict text generation to the answers.
 
         tokens (bool, optional, default False)
             return tokens of completion
@@ -131,7 +131,7 @@ class CompletionRequest:
             (if repetition_penalties_include_prompt is True) and prior completion (if repetition_penalties_include_completion is True).
 
         sequence_penalty_min_length (int, default 2)
-            Minimal number of tokens to be considered as sequence. Must be greater or eqaul 2.
+            Minimal number of tokens to be considered as sequence. Must be greater or equal 2.
 
         use_multiplicative_sequence_penalty (bool, default False)
             Flag deciding whether sequence penalty is applied multiplicatively (True) or additively (False).

diff --git a/aleph_alpha_client/prompt.py b/aleph_alpha_client/prompt.py
@@ -54,7 +54,7 @@ class TokenControl:
 
         factor (float, required):
             The amount to adjust model attention by.
-            Values between 0 and 1 will supress attention.
+            Values between 0 and 1 will suppress attention.
             A value of 1 will have no effect.
             Values above 1 will increase attention.
 
@@ -121,7 +121,7 @@ class TextControl:
             The amount of characters to apply the factor to.
         factor (float, required):
             The amount to adjust model attention by.
-            Values between 0 and 1 will supress attention.
+            Values between 0 and 1 will suppress attention.
             A value of 1 will have no effect.
             Values above 1 will increase attention.
         token_overlap (ControlTokenOverlap, optional):
@@ -163,7 +163,7 @@ class Text:
         text (str, required):
             The text prompt
         controls (list of TextControl, required):
-            A list of TextControls to manilpulate attention when processing the prompt.
+            A list of TextControls to manipulate attention when processing the prompt.
             Can be empty if no manipulation is required.
 
     Examples:
@@ -227,7 +227,7 @@ class ImageControl:
             Must be a value between 0 and 1, where 1 means the full height of the image.
         factor (float, required):
             The amount to adjust model attention by.
-            Values between 0 and 1 will supress attention.
+            Values between 0 and 1 will suppress attention.
             A value of 1 will have no effect.
             Values above 1 will increase attention.
         token_overlap (ControlTokenOverlap, optional):
@@ -285,7 +285,7 @@ class Image:
         >>> image = Image.from_url(url)
     """
 
-    # We use a base_64 reperesentation, because we want to embed the image
+    # We use a base_64 representation, because we want to embed the image
     # into a prompt send in JSON.
     base_64: str
     cropping: Optional[Cropping]
@@ -310,7 +310,7 @@ def from_image_source(
                 p = urlparse(image_source)
                 if p.scheme:
                     return cls.from_url(url=image_source, controls=controls)
-            except Exception as e:
+            except Exception:
                 # we assume that If the string runs into a Exception it isn't not a valid ulr
                 pass
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -18,7 +18,7 @@ Synchronous client.
       from aleph_alpha_client import Client, CompletionRequest, Prompt
       import os
 
-      client = Client(token=os.getenv("AA_TOKEN"))
+      client = Client(token=os.getenv("AA_TOKEN"), host="https://inference-api.your-domain.com")
       prompt = Prompt.from_text("Provide a short description of AI:")
       request = CompletionRequest(prompt=prompt, maximum_tokens=20)
       result = client.complete(request, model="luminous-extended")
@@ -32,7 +32,7 @@ Synchronous client with prompt containing an image.
       from aleph_alpha_client import Client, CompletionRequest, PromptTemplate, Image
       import os
 
-      client = Client(token=os.getenv("AA_TOKEN"))
+      client = Client(token=os.getenv("AA_TOKEN"), host="https://inference-api.your-domain.com")
       image = Image.from_file("path-to-an-image")
       prompt_template = PromptTemplate("{{image}}This picture shows ")
       prompt = prompt_template.to_prompt(image=prompt_template.placeholder(image))
@@ -50,7 +50,7 @@ Asynchronous client.
    from aleph_alpha_client import AsyncClient, CompletionRequest, Prompt
 
    # Can enter context manager within an async function
-   async with AsyncClient(token=os.environ["AA_TOKEN"]) as client:
+   async with AsyncClient(token=os.environ["AA_TOKEN"], host="https://inference-api.your-domain.com") as client:
       request = CompletionRequest(
          prompt=Prompt.from_text("Request"),
          maximum_tokens=64,
-Original file line number
+Diff line change
@@ Expand Up / @@ -139,4 +139,6 @@ cython_debug/ @@
     # IDEs
     .vscode/
-    .idea/
+    .idea/
+    .ruff_cache/