Skip to content

Commit

Permalink
chore(models-http-api): use rate_limit mod in embedding
Browse files Browse the repository at this point in the history
  • Loading branch information
zwpaper committed Nov 26, 2024
1 parent c3d92bb commit b3685ad
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 27 deletions.
1 change: 1 addition & 0 deletions crates/http-api-bindings/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ tabby-inference = { path = "../tabby-inference" }
ollama-api-bindings = { path = "../ollama-api-bindings" }
async-openai.workspace = true
ratelimit = "0.10"
tokio.workspace = true

[dev-dependencies]
tokio = { workspace = true, features = ["rt", "macros"] }
29 changes: 2 additions & 27 deletions crates/http-api-bindings/src/embedding/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
mod llama;
mod openai;
mod rate_limit;
mod voyage;

use core::panic;
use std::{sync::Arc, time::Duration};

use async_trait::async_trait;
use llama::LlamaCppEngine;
use ratelimit::Ratelimiter;
use tabby_common::config::HttpModelConfig;
Expand Down Expand Up @@ -55,31 +55,6 @@ pub async fn create(config: &HttpModelConfig) -> Arc<dyn Embedding> {
.max_tokens(config.rate_limit.request_per_minute)
.build()
.expect("Failed to create ratelimiter, please check the rate limit configuration");
let engine = RateLimiter {
embedding: engine,
rate_limiter: ratelimiter,
};

Arc::new(engine)
}

pub struct RateLimiter {
embedding: Box<dyn Embedding>,
rate_limiter: Ratelimiter,
}

#[async_trait]
impl Embedding for RateLimiter {
async fn embed(&self, prompt: &str) -> anyhow::Result<Vec<f32>> {
for _ in 0..5 {
if let Err(sleep) = self.rate_limiter.try_wait() {
std::thread::sleep(sleep);
continue;
}

return self.embedding.embed(prompt).await;
}

anyhow::bail!("Rate limit exceeded for OpenAI embedding");
}
Arc::new(rate_limit::RateLimitedEmbedding::new(engine, ratelimiter))
}
34 changes: 34 additions & 0 deletions crates/http-api-bindings/src/embedding/rate_limit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use async_trait::async_trait;
use ratelimit::Ratelimiter;
use tabby_inference::Embedding;
use tokio;

pub struct RateLimitedEmbedding {
embedding: Box<dyn Embedding>,
rate_limiter: Ratelimiter,
}

impl RateLimitedEmbedding {
pub fn new(embedding: Box<dyn Embedding>, rate_limiter: Ratelimiter) -> Self {
Self {
embedding,
rate_limiter,
}
}
}

#[async_trait]
impl Embedding for RateLimitedEmbedding {
async fn embed(&self, prompt: &str) -> anyhow::Result<Vec<f32>> {
for _ in 0..5 {
if let Err(sleep) = self.rate_limiter.try_wait() {
tokio::time::sleep(sleep).await;
continue;
}

return self.embedding.embed(prompt).await;
}

anyhow::bail!("Rate limit exceeded for embedding computation");
}
}

0 comments on commit b3685ad

Please sign in to comment.