From 3e89dc7d7c12f93942cf3cbc27c267d91c768e35 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 25 Mar 2024 20:58:55 +0800 Subject: [PATCH] docs: add blog post on orchestrate tabby with caddy (#1694) * docs: add blog post on orchestrate tabby with caddy * update * update * simplify * use raw-loader to embed files * add motivation * update * update * update * update --- .../Caddyfile | 5 ++ .../docker-compose.yml | 37 ++++++++ .../index.mdx | 88 +++++++++++++++++++ 3 files changed, 130 insertions(+) create mode 100644 website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/Caddyfile create mode 100644 website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/docker-compose.yml create mode 100644 website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/index.mdx diff --git a/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/Caddyfile b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/Caddyfile new file mode 100644 index 000000000000..50d68183cfe1 --- /dev/null +++ b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/Caddyfile @@ -0,0 +1,5 @@ +http://*:8080 { + handle_path /* { + reverse_proxy worker-0:8080 worker-1:8080 + } +} diff --git a/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/docker-compose.yml b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/docker-compose.yml new file mode 100644 index 000000000000..6703c17c0870 --- /dev/null +++ b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/docker-compose.yml @@ -0,0 +1,37 @@ +version: '3.5' + +services: + worker-0: + restart: always + image: tabbyml/tabby + command: serve --model TabbyML/StarCoder-1B --device cuda + volumes: + - "$HOME/.tabby:/data" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] + capabilities: [gpu] + + worker-1: + restart: always + image: tabbyml/tabby + command: serve --model TabbyML/StarCoder-1B --device cuda + volumes: + - "$HOME/.tabby:/data" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["1"] + capabilities: [gpu] + + web: + image: caddy + volumes: + - "./Caddyfile:/etc/caddy/Caddyfile:ro" + ports: + - "8080:8080" \ No newline at end of file diff --git a/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/index.mdx b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/index.mdx new file mode 100644 index 000000000000..47d0bffe0c88 --- /dev/null +++ b/website/blog/2024-03-26-tabby-with-replicas-behind-reverse-proxy/index.mdx @@ -0,0 +1,88 @@ +--- +authors: [meng] +tags: [deployment, reverse proxy] +--- + +import CodeBlock from '@theme/CodeBlock'; +import Caddyfile from "raw-loader!./Caddyfile" +import DockerComposeYaml from "raw-loader!./docker-compose.yml" + +# Tabby with Replicas and a Reverse Proxy + +Tabby operates as a single process, typically utilizing resources from a single GPU.This setup is usually sufficient for a team of ~50 engineers. +However, if you wish to scale this for a larger team, you'll need to harness compute resources from multiple GPUs. +One approach to achieve this is by creating additional replicas of the Tabby service and employing a reverse proxy to distribute traffic among these replicas. + +This guide assumes that you have a Linux machine with Docker, CUDA drivers, and the [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) already installed. + +Let's dive in! + +## Creating the Caddyfile + +Before configuring our services, we need to create a `Caddyfile` that will define how Caddy should handle incoming requests and reverse proxy them to Tabby: + + +{Caddyfile} + + +Note that we are assuming we have two GPUs in the machine; therefore, we should redirect traffic to two worker nodes. + +## Preparing the Model File + +Now, execute the following Docker command to pre-download the model file: + +```bash +docker run --entrypoint /opt/tabby/bin/tabby-cpu \ + -v $HOME/.tabby:/data tabbyml/tabby \ + download --model StarCoder-1B +``` + +Since we are only downloading the model file, we override the entrypoint to `tabby-cpu` to avoid the need for a GPU + +## Creating the Docker Compose File + +Next, create a `docker-compose.yml` file to orchestrate the Tabby and Caddy services. Here is the configuration for both services: + + +{DockerComposeYaml} + + +Note that we have two worker nodes, and we are using the same model for both of them, with each assigned to a different GPU (0 and 1, respectively). If you have more GPUs, you can add more worker nodes and assign them to the available GPUs (remember to update the `Caddyfile` accordingly!). + +## Starting the Services + +With the `docker-compose.yml` and `Caddyfile` configured, start the services using Docker Compose: + +```bash +docker-compose up -d +``` + +## Verifying the Setup + +To ensure that Tabby is running correctly behind Caddy, execute a curl command against the health endpoint: + +```bash +curl -L 'http://localhost:8080/v1/completions' \ +-H 'Content-Type: application/json' \ +-H 'Accept: application/json' \ +-d '{ + "language": "python", + "segments": { + "prefix": "def fib(n):\n ", + "suffix": "\n return fib(n - 1) + fib(n - 2)" + } +}' +``` + +The response should indicate that Tabby is healthy and ready to assist you with your coding tasks. + +## Securing Your Setup (Optional) + +For those interested in securing their setup, consider using Caddy directives like `forward_auth` and integrating with a service like [Authelia](https://www.authelia.com/). For more details on this, refer to the [Caddy documentation on forward_auth](https://caddyserver.com/docs/caddyfile/directives/forward_auth#authelia). + +--- + +And there you have it! You've successfully set up Tabby with Caddy as a reverse proxy. Happy coding with your new AI assistant! + +As an additional note, since the release of v0.9.0, Tabby enterprise edition now includes the built-in ability to handle replicas and load balancing, with a integrate account management system. +For more information, refer to the [official documentation](/docs/administration/distributed/) for details. \ No newline at end of file