Skip to content

Commit

Permalink
chore: use one mod for rate limit
Browse files Browse the repository at this point in the history
  • Loading branch information
zwpaper committed Nov 26, 2024
1 parent b6ec3a9 commit 2abb6cf
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 164 deletions.
18 changes: 7 additions & 11 deletions crates/http-api-bindings/src/chat/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
mod rate_limit;

use std::{sync::Arc, time::Duration};
use std::sync::Arc;

use async_openai::config::OpenAIConfig;
use ratelimit::Ratelimiter;
use tabby_common::config::HttpModelConfig;
use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig};

use super::rate_limit;
use crate::create_reqwest_client;

pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
Expand All @@ -19,6 +17,7 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
.with_api_key(model.api_key.clone().unwrap_or_default());

let mut builder = ExtendedOpenAIConfig::builder();

Check warning on line 20 in crates/http-api-bindings/src/chat/mod.rs

View check run for this annotation

Codecov / codecov/patch

crates/http-api-bindings/src/chat/mod.rs#L20

Added line #L20 was not covered by tests
builder
.base(config)
.supported_models(model.supported_models.clone())
Expand All @@ -39,11 +38,8 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
.with_http_client(create_reqwest_client(api_endpoint)),
);

let ratelimiter =
Ratelimiter::builder(model.rate_limit.request_per_minute, Duration::from_secs(60))
.max_tokens(model.rate_limit.request_per_minute)
.build()
.expect("Failed to create ratelimiter, please check the rate limit configuration");

Arc::new(rate_limit::RateLimitedChatStream::new(engine, ratelimiter))
Arc::new(rate_limit::RateLimitedChatStream::new(
engine,
model.rate_limit.request_per_minute,
))

Check warning on line 44 in crates/http-api-bindings/src/chat/mod.rs

View check run for this annotation

Codecov / codecov/patch

crates/http-api-bindings/src/chat/mod.rs#L39-L44

Added lines #L39 - L44 were not covered by tests
}
64 changes: 0 additions & 64 deletions crates/http-api-bindings/src/chat/rate_limit.rs

This file was deleted.

17 changes: 7 additions & 10 deletions crates/http-api-bindings/src/completion/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
mod llama;
mod mistral;
mod openai;
mod rate_limit;

use std::{sync::Arc, time::Duration};
use std::sync::Arc;

use llama::LlamaCppEngine;
use mistral::MistralFIMEngine;
use openai::OpenAICompletionEngine;
use ratelimit::Ratelimiter;
use tabby_common::config::HttpModelConfig;
use tabby_inference::CompletionStream;

use super::rate_limit;

pub async fn create(model: &HttpModelConfig) -> Arc<dyn CompletionStream> {
let engine = match model.kind.as_str() {
"llama.cpp/completion" => LlamaCppEngine::create(
Expand Down Expand Up @@ -51,13 +51,10 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn CompletionStream> {
),
};

let ratelimiter =
Ratelimiter::builder(model.rate_limit.request_per_minute, Duration::from_secs(60))
.max_tokens(model.rate_limit.request_per_minute)
.build()
.expect("Failed to create ratelimiter, please check the rate limit configuration");

Arc::new(rate_limit::RateLimitedCompletion::new(engine, ratelimiter))
Arc::new(rate_limit::RateLimitedCompletion::new(
engine,
model.rate_limit.request_per_minute,
))

Check warning on line 57 in crates/http-api-bindings/src/completion/mod.rs

View check run for this annotation

Codecov / codecov/patch

crates/http-api-bindings/src/completion/mod.rs#L54-L57

Added lines #L54 - L57 were not covered by tests
}

const FIM_TOKEN: &str = "<|FIM|>";
Expand Down
35 changes: 0 additions & 35 deletions crates/http-api-bindings/src/completion/rate_limit.rs

This file was deleted.

16 changes: 5 additions & 11 deletions crates/http-api-bindings/src/embedding/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
mod llama;
mod openai;
mod rate_limit;
mod voyage;

use core::panic;
use std::{sync::Arc, time::Duration};
use std::sync::Arc;

use llama::LlamaCppEngine;
use ratelimit::Ratelimiter;
use tabby_common::config::HttpModelConfig;
use tabby_inference::Embedding;

use self::{openai::OpenAIEmbeddingEngine, voyage::VoyageEmbeddingEngine};
use super::rate_limit;

pub async fn create(config: &HttpModelConfig) -> Arc<dyn Embedding> {
let engine = match config.kind.as_str() {
Expand Down Expand Up @@ -48,13 +47,8 @@ pub async fn create(config: &HttpModelConfig) -> Arc<dyn Embedding> {
),
};

let ratelimiter = Ratelimiter::builder(
Arc::new(rate_limit::RateLimitedEmbedding::new(
engine,

Check warning on line 51 in crates/http-api-bindings/src/embedding/mod.rs

View check run for this annotation

Codecov / codecov/patch

crates/http-api-bindings/src/embedding/mod.rs#L50-L51

Added lines #L50 - L51 were not covered by tests
config.rate_limit.request_per_minute,
Duration::from_secs(60),
)
.max_tokens(config.rate_limit.request_per_minute)
.build()
.expect("Failed to create ratelimiter, please check the rate limit configuration");

Arc::new(rate_limit::RateLimitedEmbedding::new(engine, ratelimiter))
))

Check warning on line 53 in crates/http-api-bindings/src/embedding/mod.rs

View check run for this annotation

Codecov / codecov/patch

crates/http-api-bindings/src/embedding/mod.rs#L53

Added line #L53 was not covered by tests
}
33 changes: 0 additions & 33 deletions crates/http-api-bindings/src/embedding/rate_limit.rs

This file was deleted.

1 change: 1 addition & 0 deletions crates/http-api-bindings/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod chat;
mod completion;
mod embedding;
mod rate_limit;

pub use chat::create as create_chat;
pub use completion::{build_completion_prompt, create};
Expand Down
Loading

0 comments on commit 2abb6cf

Please sign in to comment.