diff --git a/crates/http-api-bindings/src/embedding/mod.rs b/crates/http-api-bindings/src/embedding/mod.rs
index 1d6e3cb3d333..d5b8920af72f 100644
--- a/crates/http-api-bindings/src/embedding/mod.rs
+++ b/crates/http-api-bindings/src/embedding/mod.rs
@@ -8,16 +8,13 @@ use std::sync::Arc;
 
 use llama::LlamaCppEngine;
 use rate_limit::RateLimitedEmbedding;
-use tabby_common::config::{HttpModelConfig, RateLimit};
+use tabby_common::config::HttpModelConfig;
 use tabby_inference::Embedding;
 
 use self::{openai::OpenAIEmbeddingEngine, voyage::VoyageEmbeddingEngine};
 
 pub async fn create(config: &HttpModelConfig) -> Arc<dyn Embedding> {
-    let rpm = config.rate_limit.as_ref().map_or_else(
-        || RateLimit::default().request_per_minute,
-        |rl| rl.request_per_minute,
-    );
+    let rpm = config.rate_limit.request_per_minute;
 
     let embedding: Arc<dyn Embedding> = match config.kind.as_str() {
         "llama.cpp/embedding" => {
diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs
index 56acd7590ae8..4d32b4221904 100644
--- a/crates/tabby-common/src/config.rs
+++ b/crates/tabby-common/src/config.rs
@@ -290,7 +290,8 @@ pub struct HttpModelConfig {
     pub api_key: Option<String>,
 
     #[builder(default)]
-    pub rate_limit: Option<RateLimit>,
+    #[serde(default)]
+    pub rate_limit: RateLimit,
 
     /// Used by OpenAI style API for model name.
     #[builder(default)]