TabbyML · wsxiaoys · Jan 9, 2025 · Jan 4, 2025 · Jan 7, 2025 · Jan 8, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -68,7 +68,7 @@ mime_guess = "2.0.4"
 assert_matches = "1.5"
 insta = "1.34.0"
 logkit = "0.3"
-async-openai = "0.20"
+async-openai-alt = "0.26.1"
 tracing-test = "0.2"
 clap = "4.3.0"
 ratelimit = "0.10"

diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml
@@ -17,7 +17,7 @@ serde_json = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 tabby-inference = { path = "../tabby-inference" }
 ollama-api-bindings = { path = "../ollama-api-bindings" }
-async-openai.workspace = true
+async-openai-alt.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 leaky-bucket = "1.1.2"

diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use async_openai::config::OpenAIConfig;
+use async_openai_alt::config::OpenAIConfig;
 use tabby_common::config::HttpModelConfig;
 use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig};
 
@@ -34,7 +34,7 @@
     let config = builder.build().expect("Failed to build config");
 
     let engine = Box::new(
-        async_openai::Client::with_config(config)
+        async_openai_alt::Client::with_config(config)
             .with_http_client(create_reqwest_client(api_endpoint)),
     );
 

diff --git a/crates/http-api-bindings/src/rate_limit.rs b/crates/http-api-bindings/src/rate_limit.rs
@@ -1,4 +1,4 @@
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
         ChatCompletionResponseStream, CreateChatCompletionRequest, CreateChatCompletionResponse,

diff --git a/crates/llama-cpp-server/Cargo.toml b/crates/llama-cpp-server/Cargo.toml
@@ -24,7 +24,7 @@ anyhow.workspace = true
 which = "6"
 serde.workspace = true
 serdeconv.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 
 [build-dependencies]
 cmake = "0.1"

diff --git a/crates/llama-cpp-server/src/lib.rs b/crates/llama-cpp-server/src/lib.rs
@@ -3,7 +3,7 @@
 use std::{path::PathBuf, sync::Arc};
 
 use anyhow::Result;
-use async_openai::error::OpenAIError;
+use async_openai_alt::error::OpenAIError;
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use serde::Deserialize;
@@ -161,15 +161,15 @@
 impl ChatCompletionStream for ChatCompletionServer {
     async fn chat(
         &self,
-        request: async_openai::types::CreateChatCompletionRequest,
-    ) -> Result<async_openai::types::CreateChatCompletionResponse, OpenAIError> {
+        request: async_openai_alt::types::CreateChatCompletionRequest,
+    ) -> Result<async_openai_alt::types::CreateChatCompletionResponse, OpenAIError> {
         self.chat_completion.chat(request).await
     }
 
     async fn chat_stream(
         &self,
-        request: async_openai::types::CreateChatCompletionRequest,
-    ) -> Result<async_openai::types::ChatCompletionResponseStream, OpenAIError> {
+        request: async_openai_alt::types::CreateChatCompletionRequest,
+    ) -> Result<async_openai_alt::types::ChatCompletionResponseStream, OpenAIError> {
         self.chat_completion.chat_stream(request).await
     }
 }

diff --git a/crates/tabby-inference/Cargo.toml b/crates/tabby-inference/Cargo.toml
@@ -16,7 +16,7 @@ derive_builder.workspace = true
 futures = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 trie-rs = "0.1.1"
-async-openai.workspace = true
+async-openai-alt.workspace = true
 secrecy = "0.8"
 reqwest.workspace = true
 tracing.workspace = true
diff --git a/crates/tabby-inference/src/chat.rs b/crates/tabby-inference/src/chat.rs
@@ -1,4 +1,4 @@
-use async_openai::{
+use async_openai_alt::{
     config::OpenAIConfig,
     error::OpenAIError,
     types::{
@@ -85,7 +85,7 @@ impl ExtendedOpenAIConfig {
     }
 }
 
-impl async_openai::config::Config for ExtendedOpenAIConfig {
+impl async_openai_alt::config::Config for ExtendedOpenAIConfig {
     fn headers(&self) -> reqwest::header::HeaderMap {
         self.base.headers()
     }
@@ -108,7 +108,7 @@ impl async_openai::config::Config for ExtendedOpenAIConfig {
 }
 
 #[async_trait]
-impl ChatCompletionStream for async_openai::Client<ExtendedOpenAIConfig> {
+impl ChatCompletionStream for async_openai_alt::Client<ExtendedOpenAIConfig> {
     async fn chat(
         &self,
         request: CreateChatCompletionRequest,

diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
@@ -59,7 +59,7 @@ axum-prometheus = "0.6"
 uuid.workspace = true
 color-eyre = { version = "0.6.3" }
 reqwest.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 spinners = "4.1.1"
 regex.workspace = true
 

diff --git a/crates/tabby/src/routes/chat.rs b/crates/tabby/src/routes/chat.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use async_openai::error::OpenAIError;
+use async_openai_alt::error::OpenAIError;
 use axum::{
     extract::State,
     response::sse::{Event, KeepAlive, Sse},
@@ -36,7 +36,7 @@ pub async fn chat_completions_utoipa(_request: Json<serde_json::Value>) -> Statu
 pub async fn chat_completions(
     State(state): State<Arc<dyn ChatCompletionStream>>,
     TypedHeader(MaybeUser(user)): TypedHeader<MaybeUser>,
-    Json(mut request): Json<async_openai::types::CreateChatCompletionRequest>,
+    Json(mut request): Json<async_openai_alt::types::CreateChatCompletionRequest>,
 ) -> Result<Sse<impl Stream<Item = Result<Event, anyhow::Error>>>, StatusCode> {
     if let Some(user) = user {
         request.user.replace(user);

diff --git a/crates/tabby/tests/goldentests.rs b/crates/tabby/tests/goldentests.rs
@@ -54,19 +54,22 @@ fn initialize_server(gpu_device: Option<&str>) {
     });
 }
 
-async fn wait_for_server(device: Option<&str>) {
-    initialize_server(device);
+async fn wait_for_server(gpu_device: Option<&str>) {
+    initialize_server(gpu_device);
 
     loop {
         println!("Waiting for server to start...");
-        let is_ok = reqwest::get("http://127.0.0.1:9090/v1/health")
-            .await
-            .is_ok();
-        if is_ok {
-            break;
-        } else {
-            sleep(Duration::from_secs(5)).await;
+        match reqwest::get("http://127.0.0.1:9090/v1/health").await {
+            Ok(resp) => {
+                if resp.status().is_success() {
+                    break;
+                }
+            }
+            Err(e) => {
+                println!("Waiting for server to start: {:?}", e);
+            }
         }
+        sleep(Duration::from_secs(5)).await;
     }
 }
 

diff --git a/crates/tabby/tests/goldentests_chat.rs b/crates/tabby/tests/goldentests_chat.rs
@@ -74,14 +74,17 @@ async fn wait_for_server(gpu_device: Option<&str>) {
 
     loop {
         println!("Waiting for server to start...");
-        let is_ok = reqwest::get("http://127.0.0.1:9090/v1/health")
-            .await
-            .is_ok();
-        if is_ok {
-            break;
-        } else {
-            sleep(Duration::from_secs(5)).await;
+        match reqwest::get("http://127.0.0.1:9090/v1/health").await {
+            Ok(resp) => {
+                if resp.status().is_success() {
+                    break;
+                }
+            }
+            Err(e) => {
+                println!("Waiting for server to start: {:?}", e);
+            }
         }
+        sleep(Duration::from_secs(5)).await;
     }
 }
 
@@ -103,8 +106,19 @@ async fn golden_test(body: serde_json::Value) -> String {
                     actual += content
                 }
             }
-            Err(_e) => {
-                // StreamEnd
+            Err(e) => {
+                match e {
+                    reqwest_eventsource::Error::StreamEnded => {
+                        break;
+                    }
+                    reqwest_eventsource::Error::InvalidStatusCode(code, resp) => {
+                        let resp = resp.text().await.unwrap();
+                        println!("Error: {} {:?}", code, resp);
+                    }
+                    e => {
+                        println!("Error: {:?}", e);
+                    }
+                }
                 break;
             }
         }

diff --git a/crates/tabby/tests/snapshots/goldentests_chat__run_chat_golden_tests-2.snap b/crates/tabby/tests/snapshots/goldentests_chat__run_chat_golden_tests-2.snap
@@ -1,5 +1,5 @@
 ---
 source: crates/tabby/tests/goldentests_chat.rs
-expression: "golden_test(json!({\n                \"seed\": 0, \"model\": \"default\", \"messages\":\n                [{\n                    \"role\": \"user\", \"content\":\n                    \"How to parse email address with regex\"\n                }]\n            })).await"
+expression: "golden_test(json!({\n    \"seed\": 0, \"model\": \"default\", \"messages\":\n    [{ \"role\": \"user\", \"content\": \"How to parse email address with regex\" }]\n})).await"
 ---
-" Parsing an email address with regular expressions can be a complex task. Here's one possible regular expression pattern that you can use to extract the username and domain name from an email address:\n```vbnet\n^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$\n```\nThis pattern checks for the following:\n\n1. The email address starts with one or more characters that are allowed in the username, such as letters, numbers, dots, and special characters.\n2. The `@` symbol must follow the username.\n3. The domain name consists of one or more characters that are allowed in the domain name, such as letters, numbers, dots, and hyphens.\n4. The domain name is followed by an optional period and one or more characters that are allowed in the domain name.\n5. The email address ends after the domain name and any optional period and characters.\n\nHere's an example of how you can use this regular expression pattern in Python:\n```python\nimport re\n\nemail = \"[email protected]\"\nusername, domain = re.split(\"[@.]\", email)\nprint(username)  # Output: example\nprint(domain)    # Output: example.com\n```\nIn this example, the `re.split()` function splits the email address into two parts using the regular expression pattern. The `username` variable will contain the username (`example`), and the `domain` variable will contain the domain name (`example.com`)."
+" Parsing an email address with regular expressions can be a bit tricky, but it can be done using a combination of patterns and character classes. Here's an example of a regular expression that can be used to match most email addresses:\n```\n\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b\n```\nThis regular expression uses the following patterns:\n\n* `\\b`: This is a word boundary that matches the beginning or end of a word. It ensures that the email address is matched as a whole, not just as a part of a longer string.\n* `[A-Za-z0-9._%+-]+`: This pattern matches one or more characters that are either letters (A-Z or a-z), digits (0-9), periods (.), underscores (\\_), percent signs (%), plus signs (+), or hyphens (-). This is the local part of the email address.\n* `@`: This is the character that separates the local part from the domain name.\n* `[A-Za-z0-9.-]+\\.`: This pattern matches one or more characters that are either letters (A-Z or a-z), digits (0-9), periods (.), or hyphens (-). The period is followed by a dot to indicate that it is the end of the domain name.\n* `[A-Z|a-z]{2,}`: This pattern matches two or more letters that are either uppercase (A-Z) or lowercase (a-z). This is the top-level domain of the email address.\n* `\\b`: This pattern matches the end of the email address.\n\nNote that this regular expression is not perfect and may not match all email addresses, but it should work for most common cases."
diff --git a/crates/tabby/tests/snapshots/goldentests_chat__run_chat_golden_tests.snap b/crates/tabby/tests/snapshots/goldentests_chat__run_chat_golden_tests.snap
@@ -1,5 +1,5 @@
 ---
 source: crates/tabby/tests/goldentests_chat.rs
-expression: "golden_test(json!({\n                \"seed\": 0, \"model\": \"default\", \"messages\":\n                [{\n                    \"role\": \"user\", \"content\":\n                    \"How to convert a list of string to numbers in python\"\n                }]\n            })).await"
+expression: "golden_test(json!({\n    \"seed\": 0, \"model\": \"default\", \"messages\":\n    [{\n        \"role\": \"user\", \"content\":\n        \"How to convert a list of string to numbers in python\"\n    }]\n})).await"
 ---
-" You can convert a list of strings to numbers in Python using the built-in `list()` function. Here's an example:\n```python\nstrings = ['1', '2', '3']\nnumbers = list(map(int, strings))\nprint(numbers)  # [1, 2, 3]\n```\nIn this example, we first define a list `strings` containing three string values. We then use the `map()` function to apply the `int()` function (which converts a string to an integer) to each element of the `strings` list, and the resulting list of integers is stored in the variable `numbers`. Finally, we print the `numbers` list to verify that the conversion was successful."
+" You can convert a list of strings to numbers in Python using the built-in `list()` function to convert the list of strings to a list of numbers, and then using the `int()` function to convert each element of the list to an integer. Here's an example:\n```\n# A list of strings\nnum_strings = ['1', '2', '3']\n\n# Convert the list of strings to a list of numbers\nnum_list = list(map(int, num_strings))\n\n# Print the list of numbers\nprint(num_list)\n```\nThis will output:\n```\n[1, 2, 3]\n```\nNote that this will only work if the strings represent integers. If the strings represent a different type of number, such as a decimal number, you will need to use a different function, such as `float()`, to convert them to a float.\n\nAlso, if you want to convert the string to a specific number type, you can use the built-in `int()` function and pass the number as an argument.\n\nFor example, to convert the string '123' to a float:\n```\nnum_string = '123'\nnum_float = float(num_string)\nprint(num_float)\n```\nThis will output:\n```\n123.0\n```\nAnd to convert the string '123' to a decimal:\n```\nnum_string = '123.45'\nnum_decimal = float(num_string)\nprint(num_decimal)\n```\nThis will output:\n```\n123.45\n```"
diff --git a/ee/tabby-schema/Cargo.toml b/ee/tabby-schema/Cargo.toml
@@ -10,7 +10,7 @@ schema-language = ["juniper/schema-language"]
 
 [dependencies]
 anyhow.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 async-trait.workspace = true
 axum = { workspace = true }
 base64 = "0.22.0"

diff --git a/ee/tabby-schema/src/schema/mod.rs b/ee/tabby-schema/src/schema/mod.rs
@@ -20,7 +20,7 @@ pub mod worker;
 use std::{sync::Arc, time::Instant};
 
 use access_policy::{AccessPolicyService, SourceIdAccessPolicy};
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
         ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,

diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml
@@ -53,7 +53,7 @@ strum.workspace = true
 cron = "0.12.1"
 async-stream.workspace = true
 logkit.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 ratelimit.workspace = true
 cached.workspace = true