Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys committed May 13, 2024
1 parent bc2d09b commit 8be5d2d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 21 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/llama-cpp-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ http-api-bindings = { path = "../http-api-bindings" }
reqwest.workspace = true
serde_json.workspace = true
tabby-inference = { path = "../tabby-inference" }
tracing.workspace = true
tokio = { workspace = true, features = ["process"] }

[dev-dependencies]
Expand Down
56 changes: 35 additions & 21 deletions crates/llama-cpp-server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{
use serde_json::json;
use tabby_inference::{ChatCompletionStream, CompletionStream, Embedding};
use tokio::task::JoinHandle;
use tracing::warn;

struct LlamaCppServer {
handle: JoinHandle<()>,
Expand All @@ -19,29 +20,36 @@ impl LlamaCppServer {
if !use_gpu {
num_gpu_layers = "0".to_string();
}
let mut process = tokio::process::Command::new("llama-server")
.arg("-m")
.arg(model_path)
.arg("--port")
.arg(SERVER_PORT.to_string())
.arg("-ngl")
.arg(num_gpu_layers)
.arg("-np")
.arg(parallelism.to_string())
.kill_on_drop(true)
.stderr(Stdio::null())
.stdout(Stdio::null())
.spawn()
.expect("Failed to spawn llama-cpp-server");

let model_path = model_path.to_owned();
let handle = tokio::spawn(async move {
let status_code = process
.wait()
.await
.ok()
.and_then(|s| s.code())
.unwrap_or(-1);
println!("Exist with exit code {}", status_code);
loop {
let mut process = tokio::process::Command::new("llama-server")
.arg("-m")
.arg(&model_path)
.arg("--port")
.arg(SERVER_PORT.to_string())
.arg("-ngl")
.arg(&num_gpu_layers)
.arg("-np")
.arg(parallelism.to_string())
.kill_on_drop(true)
.stderr(Stdio::inherit())
.stdout(Stdio::inherit())
.spawn()
.expect("Failed to spawn llama-cpp-server");

let status_code = process
.wait()
.await
.ok()
.and_then(|s| s.code())
.unwrap_or(-1);

if status_code != 0 {
warn!("llama-server exited with status code {}, restarting...", status_code);
}
}
});

Self { handle }
Expand Down Expand Up @@ -90,6 +98,12 @@ impl LlamaCppServer {
}
}

impl Drop for LlamaCppServer {
fn drop(&mut self) {
self.handle.abort();
}
}

fn api_endpoint() -> String {
format!("http://localhost:{SERVER_PORT}")
}
Expand Down

0 comments on commit 8be5d2d

Please sign in to comment.