From 17ec0403a30bfc9af7fb3a06c4b98336769e1399 Mon Sep 17 00:00:00 2001 From: Charush Date: Tue, 24 Dec 2024 07:37:29 +0000 Subject: [PATCH 01/14] feat: dapr integration for budlitellm --- .dapr/appconfig-dev.yaml | 19 +++++ .dapr/components/configstore.yaml | 20 ++++++ .dapr/components/pubsub-redis.yaml | 25 +++++++ .dapr/components/secretstore-env.yaml | 13 ++++ .dapr/components/statestore.yaml | 23 +++++++ .dapr/components/subscriptions.yaml | 12 ++++ .dockerignore | 1 + Dockerfile.bud | 18 +++-- deploy/docker-compose-dev.yaml | 69 +++++++++++++++++++ deploy/docker-compose-redis.yaml | 22 ++++++ deploy/start_dev.sh | 99 +++++++++++++++++++++++++++ deploy/stop_dev.sh | 57 +++++++++++++++ litellm/__about__.py | 19 +++++ litellm/commons/config.py | 82 ++++++++++++++++++++++ litellm/proxy/proxy_server.py | 4 ++ requirements.txt | 7 +- 16 files changed, 481 insertions(+), 9 deletions(-) create mode 100644 .dapr/appconfig-dev.yaml create mode 100644 .dapr/components/configstore.yaml create mode 100644 .dapr/components/pubsub-redis.yaml create mode 100644 .dapr/components/secretstore-env.yaml create mode 100644 .dapr/components/statestore.yaml create mode 100644 .dapr/components/subscriptions.yaml create mode 100644 deploy/docker-compose-dev.yaml create mode 100644 deploy/docker-compose-redis.yaml create mode 100755 deploy/start_dev.sh create mode 100755 deploy/stop_dev.sh create mode 100644 litellm/__about__.py create mode 100644 litellm/commons/config.py diff --git a/.dapr/appconfig-dev.yaml b/.dapr/appconfig-dev.yaml new file mode 100644 index 000000000000..b22f553b6877 --- /dev/null +++ b/.dapr/appconfig-dev.yaml @@ -0,0 +1,19 @@ +apiVersion: dapr.io/v1alpha1 +kind: Configuration +metadata: + name: appconfig + namespace: default +spec: + tracing: + samplingRate: "1" + stdout: true + features: + - name: SchedulerReminders + enabled: true + # zipkin: + # endpointAddress: http://localhost:9411/api/v2/spans + secrets: + scopes: + - storeName: secretstore-local + defaultAccess: allow + deniedSecrets: [ ] diff --git a/.dapr/components/configstore.yaml b/.dapr/components/configstore.yaml new file mode 100644 index 000000000000..4149f5d9c955 --- /dev/null +++ b/.dapr/components/configstore.yaml @@ -0,0 +1,20 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-configuration-stores/redis-configuration-store/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: configstore + namespace: development +spec: + type: configuration.redis + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/pubsub-redis.yaml b/.dapr/components/pubsub-redis.yaml new file mode 100644 index 000000000000..346914a8f510 --- /dev/null +++ b/.dapr/components/pubsub-redis.yaml @@ -0,0 +1,25 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-pubsub/setup-redis-pubsub/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: pubsub-redis + namespace: development +spec: + type: pubsub.redis + version: v1 + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD + - name: consumerID + value: "{appID}" + - name: concurrency + value: "10" +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/secretstore-env.yaml b/.dapr/components/secretstore-env.yaml new file mode 100644 index 000000000000..a8689cd3e6e1 --- /dev/null +++ b/.dapr/components/secretstore-env.yaml @@ -0,0 +1,13 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-secret-stores/file-secret-store/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: secretstore-local + namespace: development +spec: + type: secretstores.local.env + version: v1 + metadata: + - name: prefix + value: "SECRETS_" \ No newline at end of file diff --git a/.dapr/components/statestore.yaml b/.dapr/components/statestore.yaml new file mode 100644 index 000000000000..573694eebc40 --- /dev/null +++ b/.dapr/components/statestore.yaml @@ -0,0 +1,23 @@ +# Reference: https://docs.dapr.io/reference/components-reference/supported-state-stores/setup-redis/ + +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: statestore + namespace: development +spec: + type: state.redis + version: v1 + metadata: + - name: redisHost + secretKeyRef: + name: REDIS_URI + key: REDIS_URI + - name: redisPassword + secretKeyRef: + name: REDIS_PASSWORD + key: REDIS_PASSWORD + - name: actorStateStore + value: "true" +auth: + secretStore: secretstore-local \ No newline at end of file diff --git a/.dapr/components/subscriptions.yaml b/.dapr/components/subscriptions.yaml new file mode 100644 index 000000000000..7bc9ff3ca6ee --- /dev/null +++ b/.dapr/components/subscriptions.yaml @@ -0,0 +1,12 @@ +apiVersion: dapr.io/v2alpha1 +kind: Subscription +metadata: + name: pubsub-subscription +spec: + topic: budLitellmMessages + routes: + default: /notifications + pubsubname: pubsub-redis + deadLetterTopic: poisonMessages +scopes: + - budlitellm \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index 929eace5e343..2919658574fb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,3 +9,4 @@ tests .devcontainer *.tgz log.txt +budlitellm diff --git a/Dockerfile.bud b/Dockerfile.bud index e64c89ba763c..3cb2b8f18d85 100644 --- a/Dockerfile.bud +++ b/Dockerfile.bud @@ -11,7 +11,7 @@ WORKDIR /app # Install build dependencies RUN apt-get clean && apt-get update && \ - apt-get install -y gcc python3-dev && \ + apt-get install -y gcc python3-dev git && \ rm -rf /var/lib/apt/lists/* RUN pip install --upgrade pip && \ @@ -21,7 +21,7 @@ RUN pip install --upgrade pip && \ COPY . . # Build Admin UI -RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh +RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh # Build the package RUN rm -rf dist/* && python -m build @@ -44,13 +44,13 @@ RUN pip uninstall PyJWT -y RUN pip install PyJWT --no-cache-dir # Build Admin UI -RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh +RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh # Runtime stage FROM $LITELLM_RUNTIME_IMAGE AS runtime # Update dependencies and clean up - handles debian security issue -RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y git && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* WORKDIR /app # Copy the current directory contents into the container at /app @@ -64,10 +64,14 @@ COPY --from=builder /wheels/ /wheels/ # Install the built wheel using pip; again using a wildcard if it's the only file RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels +# ensure gptcache is installed +RUN pip install git+https://github.com/BudEcosystem/BudServeGPTCache.git#egg=gptcache +RUN pip install git+https://github.com/BudEcosystem/bud-microframe.git#egg=budmicroframe + # Generate prisma client RUN prisma generate -RUN chmod +x entrypoint.sh +# RUN chmod +x entrypoint.sh -EXPOSE 4000/tcp +EXPOSE 4010/tcp -ENTRYPOINT ["sh", "-c", "cp /app/config/config.yaml /app && litellm --config /app/config.yaml --port 4000"] \ No newline at end of file +# ENTRYPOINT ["sh", "-c", "litellm --config /app/litellm_config.yaml --port 4000"] \ No newline at end of file diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml new file mode 100644 index 000000000000..a27fb5c454e4 --- /dev/null +++ b/deploy/docker-compose-dev.yaml @@ -0,0 +1,69 @@ +# include: +# - ./docker-compose-redis.yaml + +services: + bud-litellm-app: + image: bud-microframe/$APP_NAME:$NAMESPACE + profiles: + - app + container_name: bud-mf-$NAMESPACE-$APP_NAME + build: + context: .. + dockerfile: ./Dockerfile.bud + # command: [ "litellm", "--config", "/app/litellm_config.yaml", "--port", $APP_PORT ] + # command: sh -c "cd litellm/proxy && litellm --config /app/litellm_config.yaml --port $APP_PORT" + command: sh -c "tail -f /dev/null" + # command : sh -c "alembic -c ./budcluster/alembic.ini upgrade head && tail -f /dev/null" # && uvicorn $APP_NAME.main:app --host 0.0.0.0 --port $APP_PORT --reload" + # ports: + # - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" # Dapr instances communicate over gRPC so gRPC port needs to be exposed + # - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" # Expose Dapr HTTP port for service invocation + # - "$APP_PORT:$APP_PORT" # Expose app port for debugging purposes + volumes: + - ../:/app/ + - ../cache:/app/cache + env_file: + - path: ../.env + required: true + depends_on: + - bud-litellm-placement + network_mode: host + bud-litellm-sidecar: + container_name: bud-mf-$NAMESPACE-$APP_NAME-dapr + image: "daprio/daprd:edge" + command: [ + "./daprd", + "--app-id", "$APP_NAME", + "--app-port", "$APP_PORT", + "--dapr-http-port", "$DAPR_HTTP_PORT", + "--dapr-grpc-port", "$DAPR_GRPC_PORT", + "--placement-host-address", "$DAPR_PLACEMENT_HOST:$DAPR_PLACEMENT_PORT", # Dapr's placement service can be reach via the docker DNS entry + "--metrics-port", "$DAPR_METRICS_PORT", + "--resources-path", "/components", + "--config", "/config/appconfig.yaml", + "--log-as-json" + ] + env_file: + - path: ../.env + required: true + volumes: + # - "../crypto-keys:/crypto-keys" + - "${DAPR_COMPONENTS:-../.dapr/components/}:/components" # Mount the components folder for the runtime to use. The mounted location must match the --resources-path argument. + - "${DAPR_APP_CONFIG:-../.dapr/appconfig-dev.yaml}:/config/appconfig.yaml" # Mount the config file for the runtime to use. The mounted location must match the --config argument. + - ./:/app/ + network_mode: "host" + # ports: + # - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" + # - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" + # network_mode: "service:app" + bud-litellm-placement: + container_name: bud-mf-$NAMESPACE-$APP_NAME-placement + image: "daprio/placement:edge" + command: [ "./placement", "--port", "$DAPR_PLACEMENT_PORT" ] + ports: + - "$DAPR_PLACEMENT_PORT:$DAPR_PLACEMENT_PORT" + networks: + - bud-litellm-network +networks: + bud-litellm-network: + name: bud-mf-$NAMESPACE-$APP_NAME + driver: bridge \ No newline at end of file diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml new file mode 100644 index 000000000000..805ccd2085ba --- /dev/null +++ b/deploy/docker-compose-redis.yaml @@ -0,0 +1,22 @@ +services: + bud_redis: + container_name: bud-mf-$NAMESPACE-redis + image: redis:alpine + ports: + - "$REDIS_PORT:$REDIS_PORT" + command: + - /bin/sh + - -c + - redis-server --requirepass "${SECRETS_REDIS_PASSWORD:?REDIS_PASSWORD variable is not set}" --port ${REDIS_PORT} + sysctls: + net.core.somaxconn: 1024 + healthcheck: + test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ] + volumes: + - redis_data:/data + # networks: + # - bud-microframe-nw + # network_mode: "host" + +volumes: + redis_data: \ No newline at end of file diff --git a/deploy/start_dev.sh b/deploy/start_dev.sh new file mode 100755 index 000000000000..b11453997ffb --- /dev/null +++ b/deploy/start_dev.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# + +DAPR_COMPONENTS="../.dapr/components/" +DAPR_APP_CONFIG="../.dapr/appconfig-dev.yaml" + +DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml" +BUILD_FLAG="" +DETACH_FLAG="" + +function display_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " --dapr-components Set the dapr components folder path, this should be relative to the deploy directory (default: $DAPR_COMPONENTS)" + echo " --dapr-app-config Set the dapr app config path, this should be relative to the deploy directory (default: $DAPR_APP_CONFIG)" + echo " -f FILE Specify the Docker Compose file to use, this should be relative to your current directory (default: $DOCKER_COMPOSE_FILE)" + echo " --build Include this flag to force a rebuild of the Docker containers" + echo " --skip-app Include this flag to skip the app container" + echo " -d Include this flag to detach and run the containers in background" + echo " --help Display this help message and exit" + echo + echo "Example:" + echo " $0 -f docker-compose-local.yaml --build" + echo " This will use 'docker-compose-local.yaml' and force a rebuild of the containers." + echo + exit 0 +} + +# Parse arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --dapr-components) DAPR_COMPONENTS="$2"; shift ;; + --dapr-app-config) DAPR_APP_CONFIG="$2"; shift ;; + -f) DOCKER_COMPOSE_FILE="$2"; shift ;; + --build) BUILD_FLAG="--build" ;; + --skip-app) SKIP_APP_FLAG="true" ;; + -d) DETACH_FLAG="-d" ;; + --help) display_help ;; + *) echo "Unknown parameter passed: $1"; show_help; exit 1 ;; + esac + shift +done + +set -a +source ./.env +set +a + +export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2) + +: ${APP_NAME:?Application name is required, use --app-name flag to specify the name.} + +# Print the environment variables +echo "****************************************************" +echo "* *" +echo "* Starting Microservice Environment *" +echo "* *" +echo "****************************************************" +echo "" +echo "🛠 App Name : $APP_NAME" +echo "🌐 App Port : $APP_PORT" +echo "🔑 Redis Uri : $SECRETS_REDIS_URI" +echo "🌍 Dapr HTTP Port : $DAPR_HTTP_PORT" +echo "🌍 Dapr gRPC Port : $DAPR_GRPC_PORT" +echo "🛠 Namespace : $NAMESPACE" +echo "📊 Log Level : $LOG_LEVEL" +echo "🗂 Config Store Name : $CONFIGSTORE_NAME" +echo "🔐 Secret Store Name : $SECRETSTORE_NAME" +echo "🛠 Dapr Components : $DAPR_COMPONENTS" +echo "🛠 Dapr App Config : $DAPR_APP_CONFIG" +echo "🛠 Docker Compose File : $DOCKER_COMPOSE_FILE" +echo "🚀 Build flag : $BUILD_FLAG" +echo "" +echo "****************************************************" + +# Bring up Docker Compose +echo "Bringing up Docker Compose with file: $DOCKER_COMPOSE_FILE" +if [ -z "$SKIP_APP_FLAG" ]; then + docker compose --profile app -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG +else + docker compose -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG +fi \ No newline at end of file diff --git a/deploy/stop_dev.sh b/deploy/stop_dev.sh new file mode 100755 index 000000000000..456654e9a804 --- /dev/null +++ b/deploy/stop_dev.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- +# + +# Default value for Docker Compose file +DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml" + +# Function to display help message +function display_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " -f FILE Specify the Docker Compose file to use (default: deploy/docker-compose-dev.yaml)" + echo " --help Display this help message and exit" + echo + echo "Example:" + echo " $0 -f docker-compose-local.yaml" + echo " This will stop the services using 'docker-compose-local.yaml'." + echo + exit 0 +} + +# Parse optional arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + -f) DOCKER_COMPOSE_FILE="$2"; shift ;; + --help) display_help ;; + *) echo "Unknown parameter: $1"; exit 1 ;; + esac + shift +done + +set -a +source ./.env +set +a + +export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2) + +# Stop Docker Compose services +echo "Stopping services defined in: $DOCKER_COMPOSE_FILE" +docker compose -f "$DOCKER_COMPOSE_FILE" stop \ No newline at end of file diff --git a/litellm/__about__.py b/litellm/__about__.py new file mode 100644 index 000000000000..9c3a5276d863 --- /dev/null +++ b/litellm/__about__.py @@ -0,0 +1,19 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# http://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +"""Contains metadata about the package, including version information and author details.""" + +__version__ = "budlitellm@0.0.1" diff --git a/litellm/commons/config.py b/litellm/commons/config.py new file mode 100644 index 000000000000..16e2ed717c59 --- /dev/null +++ b/litellm/commons/config.py @@ -0,0 +1,82 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2024 Bud Ecosystem Inc. +# # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# # +# http://www.apache.org/licenses/LICENSE-2.0 +# # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------- + +"""Manages application and secret configurations, utilizing environment variables and Dapr's configuration store for syncing.""" + +from pathlib import Path +from typing import Optional + +from budmicroframe.commons.config import BaseAppConfig, BaseSecretsConfig, register_settings, enable_periodic_sync_from_store +from pydantic import DirectoryPath, Field + +from litellm.__about__ import __version__ + + +class AppConfig(BaseAppConfig): + name: str = __version__.split("@")[0] + version: str = __version__.split("@")[-1] + description: str = "" + api_root: str = "" + + # Base Directory + base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve() + + # Bud-Litellm env + litellm_log: str = Field("DEBUG", alias="LITELLM_LOG") + litellm_master_key: str = Field("sk-1234", alias="LITELLM_MASTER_KEY") + litellm_salt_key: str = Field("litellm_salt_key", alias="LITELLM_SALT_KEY") + database_url: str = Field(..., alias="DATABASE_URL") + store_model_in_db: bool = Field(True, alias="STORE_MODEL_IN_DB") + budserve_app_baseurl: str = Field("http://127.0.0.1:8050", alias="BUDSERVE_APP_BASEURL") + + # Redis Config + redis_host: str = Field("localhost", alias="REDIS_HOST") + redis_port: int = Field(6379, alias="REDIS_PORT") + redis_username: str = Field("", alias="REDIS_USERNAME") + redis_password: str = Field("", alias="REDIS_PASSWORD") + redis_db: int = Field(0, alias="REDIS_DB") + + # Cache Config + enable_cache: bool = Field(False, alias="ENABLE_CACHE") + enable_cache_metric: bool = Field(False, alias="ENABLE_CACHE_METRIC") + cache_eviction_policy: str = Field("LRU", alias="CACHE_EVICTION_POLICY") + cache_max_size: int = Field(1000, alias="CACHE_MAX_SIZE") + cache_ttl: int = Field(3600, alias="CACHE_TTL") + cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD") + cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL") + + +class SecretsConfig(BaseSecretsConfig): + name: str = __version__.split("@")[0] + version: str = __version__.split("@")[-1] + + # Database + psql_user: Optional[str] = Field( + None, + alias="PSQL_USER", + json_schema_extra=enable_periodic_sync_from_store(is_global=True), + ) + psql_password: Optional[str] = Field( + None, + alias="PSQL_PASSWORD", + json_schema_extra=enable_periodic_sync_from_store(is_global=True), + ) + + +app_settings = AppConfig() +secrets_settings = SecretsConfig() + +register_settings(app_settings, secrets_settings) \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index a41491227715..8a4008f78b30 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -25,6 +25,9 @@ get_type_hints, ) +from budmicroframe.main import configure_app +from litellm.commons.config import app_settings, secrets_settings + if TYPE_CHECKING: from opentelemetry.trace import Span as _Span @@ -373,6 +376,7 @@ def generate_feedback_box(): root_path=server_root_path, # check if user passed root path, FastAPI defaults this value to "" ) +app = configure_app(app_settings, secrets_settings) ### CUSTOM API DOCS [ENTERPRISE FEATURE] ### # Custom OpenAPI schema generator to include only selected routes diff --git a/requirements.txt b/requirements.txt index 7b1abcdc6828..a93b24d234a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ +langchain_huggingface +gptcache +langchain_community # LITELLM PROXY DEPENDENCIES # anyio==4.4.0 # openai + http req. httpx==0.27.0 # Pin Httpx dependency @@ -38,7 +41,7 @@ cryptography==42.0.7 python-dotenv==1.0.0 # for env tiktoken==0.7.0 # for calculating usage importlib-metadata==6.8.0 # for random utils -tokenizers==0.14.0 # for calculating usage +tokenizers # for calculating usage click==8.1.7 # for proxy cli jinja2==3.1.4 # for prompt templates certifi==2024.7.4 # [TODO] clean up @@ -48,4 +51,4 @@ tenacity==8.2.3 # for retrying requests, when litellm.num_retries set pydantic==2.7.1 # proxy + openai req. jsonschema==4.22.0 # validating json schema websockets==10.4 # for realtime API -#### \ No newline at end of file +#### From 1490a4978b88b7a37f9eb6986afc91697617d80b Mon Sep 17 00:00:00 2001 From: Charush Date: Tue, 24 Dec 2024 07:49:38 +0000 Subject: [PATCH 02/14] refac: updated docker compose to include redis compose - service registration error if not included --- deploy/docker-compose-dev.yaml | 5 +++-- deploy/docker-compose-redis.yaml | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml index a27fb5c454e4..05ba6ff68c1e 100644 --- a/deploy/docker-compose-dev.yaml +++ b/deploy/docker-compose-dev.yaml @@ -1,5 +1,5 @@ -# include: -# - ./docker-compose-redis.yaml +include: + - ./docker-compose-redis.yaml services: bud-litellm-app: @@ -25,6 +25,7 @@ services: - path: ../.env required: true depends_on: + - bud-litellm-redis - bud-litellm-placement network_mode: host bud-litellm-sidecar: diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml index 805ccd2085ba..8f8b892ade14 100644 --- a/deploy/docker-compose-redis.yaml +++ b/deploy/docker-compose-redis.yaml @@ -1,6 +1,6 @@ services: - bud_redis: - container_name: bud-mf-$NAMESPACE-redis + bud-litellm-redis: + container_name: bud-mf-$NAMESPACE-litellm-redis image: redis:alpine ports: - "$REDIS_PORT:$REDIS_PORT" @@ -13,10 +13,16 @@ services: healthcheck: test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ] volumes: - - redis_data:/data + - budlitellm_redis_data:/data # networks: # - bud-microframe-nw # network_mode: "host" + networks: + - bud-litellm-network volumes: - redis_data: \ No newline at end of file + budlitellm_redis_data: +networks: + bud-litellm-network: + name: bud-mf-$NAMESPACE-$APP_NAME + driver: bridge From 4e965c3f6240bcc250decbdefaef9e627d816121 Mon Sep 17 00:00:00 2001 From: Charush Date: Mon, 30 Dec 2024 22:21:04 +0000 Subject: [PATCH 03/14] feat: added custom callback to collect metrics data for inference request --- litellm/custom_callbacks.py | 225 ++++++++++++++++++++++++++++++++++++ litellm_config.yaml | 4 +- 2 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 litellm/custom_callbacks.py diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py new file mode 100644 index 000000000000..cc371ac53ee8 --- /dev/null +++ b/litellm/custom_callbacks.py @@ -0,0 +1,225 @@ +from datetime import datetime +from typing import Any, Dict, List, Optional, Union +from uuid import UUID + +from litellm.integrations.custom_logger import CustomLogger +import litellm +from litellm._logging import verbose_logger +from budmicroframe.commons.schemas import CloudEventBase +from budmicroframe.shared.dapr_service import DaprService + + +class RequestMetrics(CloudEventBase): + request_id: UUID + request_ip: Optional[str] = None + project_id: UUID + project_name: str + endpoint_id: UUID + endpoint_name: str + endpoint_path: str + model_id: UUID + provider: str + modality: str + request_arrival_time: datetime + request_forwarded_time: datetime + response_start_time: datetime + response_end_time: datetime + request_body: Dict[str, Any] + response_body: Union[Dict[str, Any], List[Dict[str, Any]]] + cost: Optional[float] = None + is_cache_hit: bool + is_success: bool + + _model_name: Optional[str] = None + _input_tokens: Optional[int] = None + _output_tokens: Optional[int] = None + _response_analysis: Optional[Dict[str, Any]] = None + _is_streaming: Optional[bool] = None + + def validate_intervals(self) -> "RequestMetrics": + if self.response_start_time > self.response_end_time: + raise ValueError("Response start time cannot be after response end time.") + if self.request_arrival_time > self.response_start_time: + raise ValueError("Request arrival time cannot be after response start time.") + if self.request_forwarded_time > self.response_start_time: + raise ValueError("Request forwarded time cannot be after response start time.") + if self.request_arrival_time > self.response_end_time: + raise ValueError("Request arrival time cannot be after response end time.") + return self + + @property + def model_name(self) -> Optional[str]: + return self._model_name + + @model_name.setter + def model_name(self, value: Optional[str]) -> None: + self._model_name = value + + @property + def input_tokens(self) -> Optional[int]: + return self._input_tokens + + @input_tokens.setter + def input_tokens(self, value: Optional[int]) -> None: + self._input_tokens = value + + @property + def output_tokens(self) -> Optional[int]: + return self._output_tokens + + @output_tokens.setter + def output_tokens(self, value: Optional[int]) -> None: + self._output_tokens = value + + @property + def response_analysis(self) -> Optional[Dict[str, Any]]: + return self._response_analysis + + @response_analysis.setter + def response_analysis(self, value: Optional[Dict[str, Any]]) -> None: + self._response_analysis = value + + @property + def is_streaming(self) -> Optional[bool]: + return self._is_streaming + + @is_streaming.setter + def is_streaming(self, value: Optional[bool]) -> None: + self._is_streaming = value + + @property + def ttft(self) -> float: + if self.is_streaming and self.response_start_time > self.request_arrival_time: + return round((self.response_start_time - self.request_arrival_time).total_seconds(), 3) + + @property + def latency(self) -> float: + if self.response_end_time > self.request_arrival_time: + return round((self.response_end_time - self.request_arrival_time).total_seconds(), 3) + + @property + def throughput(self) -> Optional[float]: + if self.output_tokens is not None and self.latency is not None: + return round(self.output_tokens / self.latency, 3) + + +class UpdateRequestMetrics(CloudEventBase): + request_id: UUID + cost: Optional[float] = None + + +# This file includes the custom callbacks for LiteLLM Proxy +# Once defined, these can be passed in proxy_config.yaml +class MyCustomHandler(CustomLogger): + def log_pre_api_call(self, model, messages, kwargs): + verbose_logger.info("Pre-API Call") + + def log_post_api_call(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("Post-API Call") + + def log_stream_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Stream") + + def log_success_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Success") + + def log_failure_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Failure") + + def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> RequestMetrics: + # log: key, user, model, prompt, response, tokens, cost + # Access kwargs passed to litellm.completion() + model = kwargs.get("model", None) + is_cache_hit = kwargs.get("cache_hit") + response_body = kwargs.get("standard_logging_object", {}).get("response", {}) + # Access litellm_params passed to litellm.completion(), example access `metadata` + litellm_params = kwargs.get("litellm_params", {}) + proxy_server_request = litellm_params.get("proxy_server_request", {}) + model_info = litellm_params.get("model_info", {}) + metadata = litellm_params.get("metadata", {}) # headers passed to LiteLLM proxy, can be found here + + # Calculate cost using litellm.completion_cost() + response_obj = response_obj or {} + cost = litellm.completion_cost(completion_response=response_obj) + + usage = response_obj.get("usage", None) or {} + if isinstance(usage, litellm.Usage): + usage = dict(usage) + + metrics_data = RequestMetrics( + request_id=kwargs.get("litellm_call_id", None), + project_id=metadata.get("project_id", None), + project_name=metadata.get("project_name", None), + endpoint_id=model_info["metadata"]["endpoint_id"], + endpoint_name=model, + endpoint_path=litellm_params["api_base"], + model_id=model_info["id"], + provider=model_info["metadata"]["provider"], + modality=model_info["metadata"]["modality"], + request_arrival_time=kwargs.get("api_call_start_time", start_time), + request_forwarded_time=start_time, + response_start_time=kwargs.get("completion_start_time", end_time), + response_end_time=end_time, + request_body=proxy_server_request.get("body", {}), + response_body=response_body, + cost=cost, + is_cache_hit=is_cache_hit or False, + is_success=True, + ) + metrics_data.model_name = model_info["metadata"]["name"] + metrics_data.input_tokens = usage.get("prompt_tokens", None) + metrics_data.output_tokens = usage.get("completion_tokens", None) + metrics_data.is_streaming = kwargs.get("stream", False) + verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") + return metrics_data + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.info("On Async Success!") + metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time) + with DaprService() as dapr_service: + dapr_service.publish_to_topic( + data=metrics_data.model_dump(mode="json"), + pubsub_name="pubsub-redis", + target_topic_name="budMetricsMessages", + target_name="budMetrics", + source_topic_name="budLitellmMessages", + source_name="budLitellm", + event_type="add_request_metrics", + ) + return + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + try: + verbose_logger.info("On Async Failure !") + verbose_logger.info("\nkwargs", kwargs) + + # Acess Exceptions & Traceback + exception_event = kwargs.get("exception", None) + traceback_event = kwargs.get("traceback_exception", None) + + verbose_logger.info( + f""" + Exception: {exception_event} + Traceback: {traceback_event} + """ + ) + metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time) + metrics_data.is_success = False + with DaprService() as dapr_service: + dapr_service.publish_to_topic( + data=metrics_data.model_dump(mode="json"), + pubsub_name="pubsub-redis", + target_topic_name="budMetricsMessages", + target_name="budMetrics", + source_topic_name="budLitellmMessages", + source_name="budLitellm", + event_type="add_request_metrics", + ) + except Exception as e: + # TODO: what metrics data to log here? + verbose_logger.info(f"Exception: {e}") + +proxy_handler_instance = MyCustomHandler() + +# Set litellm.callbacks = [proxy_handler_instance] on the proxy +# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy \ No newline at end of file diff --git a/litellm_config.yaml b/litellm_config.yaml index 01422da9f8f7..91d57221fce2 100644 --- a/litellm_config.yaml +++ b/litellm_config.yaml @@ -5,4 +5,6 @@ router_settings: cache_responses: False redis_host: "os.environ/REDIS_HOST" redis_port: "os.environ/REDIS_PORT" - redis_password: "os.environ/REDIS_PASSWORD" \ No newline at end of file + redis_password: "os.environ/REDIS_PASSWORD" +litellm_settings: + callbacks: litellm.custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] \ No newline at end of file From 26cc93b88339016024888e365f5bd8dfadc10d11 Mon Sep 17 00:00:00 2001 From: Charush Date: Mon, 30 Dec 2024 22:23:26 +0000 Subject: [PATCH 04/14] feat: added project id and name in request metadata --- litellm/proxy/budserve_middleware.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py index 776f138b2c2d..3773255f9432 100644 --- a/litellm/proxy/budserve_middleware.py +++ b/litellm/proxy/budserve_middleware.py @@ -89,6 +89,11 @@ async def dispatch( # get endpoint details to fill cache_params user_config = await self.fetch_user_config(api_key, endpoint_name) + request_data["metadata"] = { + "project_id": user_config.get("project_id"), + "project_name": user_config.get("project_name"), + } + # redis connection params we will set as kubernetes env variables # can be fetched using os.getenv request_data["user_config"] = { From c012747ce73434a58cbda43cbca863d43d471cf4 Mon Sep 17 00:00:00 2001 From: Charush Date: Wed, 1 Jan 2025 11:43:45 +0000 Subject: [PATCH 05/14] feat: metrics target config set using app_settings --- litellm/custom_callbacks.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py index cc371ac53ee8..befa2a5c5287 100644 --- a/litellm/custom_callbacks.py +++ b/litellm/custom_callbacks.py @@ -4,6 +4,7 @@ from litellm.integrations.custom_logger import CustomLogger import litellm +from litellm.commons.config import app_settings from litellm._logging import verbose_logger from budmicroframe.commons.schemas import CloudEventBase from budmicroframe.shared.dapr_service import DaprService @@ -156,8 +157,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req model_id=model_info["id"], provider=model_info["metadata"]["provider"], modality=model_info["metadata"]["modality"], - request_arrival_time=kwargs.get("api_call_start_time", start_time), - request_forwarded_time=start_time, + request_arrival_time=start_time, + request_forwarded_time=kwargs.get("api_call_start_time", start_time), response_start_time=kwargs.get("completion_start_time", end_time), response_end_time=end_time, request_body=proxy_server_request.get("body", {}), @@ -166,10 +167,6 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req is_cache_hit=is_cache_hit or False, is_success=True, ) - metrics_data.model_name = model_info["metadata"]["name"] - metrics_data.input_tokens = usage.get("prompt_tokens", None) - metrics_data.output_tokens = usage.get("completion_tokens", None) - metrics_data.is_streaming = kwargs.get("stream", False) verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") return metrics_data @@ -179,11 +176,8 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti with DaprService() as dapr_service: dapr_service.publish_to_topic( data=metrics_data.model_dump(mode="json"), - pubsub_name="pubsub-redis", - target_topic_name="budMetricsMessages", - target_name="budMetrics", - source_topic_name="budLitellmMessages", - source_name="budLitellm", + target_topic_name=app_settings.budmetrics_topic_name, + target_name=app_settings.budmetrics_app_name, event_type="add_request_metrics", ) return @@ -208,11 +202,8 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti with DaprService() as dapr_service: dapr_service.publish_to_topic( data=metrics_data.model_dump(mode="json"), - pubsub_name="pubsub-redis", - target_topic_name="budMetricsMessages", - target_name="budMetrics", - source_topic_name="budLitellmMessages", - source_name="budLitellm", + target_topic_name=app_settings.budmetrics_topic_name, + target_name=app_settings.budmetrics_app_name, event_type="add_request_metrics", ) except Exception as e: From b4ff6b21044cb718e6191d76698d40b337318140 Mon Sep 17 00:00:00 2001 From: Charush Date: Wed, 1 Jan 2025 11:44:23 +0000 Subject: [PATCH 06/14] feat: added metrics app and topic to app_settings --- litellm/commons/config.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/litellm/commons/config.py b/litellm/commons/config.py index 16e2ed717c59..07d562fd024c 100644 --- a/litellm/commons/config.py +++ b/litellm/commons/config.py @@ -28,8 +28,8 @@ class AppConfig(BaseAppConfig): name: str = __version__.split("@")[0] version: str = __version__.split("@")[-1] - description: str = "" - api_root: str = "" + description: str = Field("Bud-Litellm is a proxy server for LLM requests.", alias="DOCS_DESCRIPTION") + api_root: str = Field("", alias="SERVER_ROOT_PATH") # Base Directory base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve() @@ -58,6 +58,10 @@ class AppConfig(BaseAppConfig): cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD") cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL") + # Metrics App and Topic + budmetrics_app_name: str = Field("budMetrics", alias="BUDMETRICS_APP_NAME") + budmetrics_topic_name: str = Field("budMetricsMessages", alias="BUDMETRICS_TOPIC_NAME") + class SecretsConfig(BaseSecretsConfig): name: str = __version__.split("@")[0] From 03534f9035619062907cca2d8cd1970a3c2a0032 Mon Sep 17 00:00:00 2001 From: Charush Date: Wed, 1 Jan 2025 11:45:02 +0000 Subject: [PATCH 07/14] chore: removed fastapi app definition since it is being overriden by configure_app --- litellm/proxy/proxy_server.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8a4008f78b30..667e61e8a12d 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -367,14 +367,14 @@ def generate_feedback_box(): else f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}" ) -app = FastAPI( - docs_url=_get_docs_url(), - redoc_url=_get_redoc_url(), - title=_title, - description=_description, - version=version, - root_path=server_root_path, # check if user passed root path, FastAPI defaults this value to "" -) +# app = FastAPI( +# docs_url=_get_docs_url(), +# redoc_url=_get_redoc_url(), +# title=_title, +# description=_description, +# version=version, +# root_path=server_root_path, # check if user passed root path, FastAPI defaults this value to "" +# ) app = configure_app(app_settings, secrets_settings) From dcf74ba3150e0d56e7d03a72298381714347530d Mon Sep 17 00:00:00 2001 From: Charush Date: Wed, 1 Jan 2025 19:54:08 +0000 Subject: [PATCH 08/14] feat: added error logging for metrics --- litellm/custom_callbacks.py | 54 +++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py index befa2a5c5287..6cfe911b02ee 100644 --- a/litellm/custom_callbacks.py +++ b/litellm/custom_callbacks.py @@ -15,10 +15,10 @@ class RequestMetrics(CloudEventBase): request_ip: Optional[str] = None project_id: UUID project_name: str - endpoint_id: UUID + endpoint_id: UUID | str endpoint_name: str endpoint_path: str - model_id: UUID + model_id: UUID | str provider: str modality: str request_arrival_time: datetime @@ -127,21 +127,34 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time): def log_failure_event(self, kwargs, response_obj, start_time, end_time): verbose_logger.info("On Failure") - def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> RequestMetrics: + def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics: # log: key, user, model, prompt, response, tokens, cost # Access kwargs passed to litellm.completion() + verbose_logger.info(f"\nresponse_obj : {response_obj}") + model = kwargs.get("model", None) is_cache_hit = kwargs.get("cache_hit") response_body = kwargs.get("standard_logging_object", {}).get("response", {}) + if failure: + response_body = { + "exception": str(kwargs.get("exception", None)), + "traceback": kwargs.get("traceback_exception", None) + } # Access litellm_params passed to litellm.completion(), example access `metadata` litellm_params = kwargs.get("litellm_params", {}) proxy_server_request = litellm_params.get("proxy_server_request", {}) + if not proxy_server_request: + proxy_server_request["body"] = { + "model": model, + "messages": kwargs.get("messages", []), + "stream": kwargs.get("stream", False) + } model_info = litellm_params.get("model_info", {}) metadata = litellm_params.get("metadata", {}) # headers passed to LiteLLM proxy, can be found here # Calculate cost using litellm.completion_cost() response_obj = response_obj or {} - cost = litellm.completion_cost(completion_response=response_obj) + cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0 usage = response_obj.get("usage", None) or {} if isinstance(usage, litellm.Usage): @@ -151,21 +164,23 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req request_id=kwargs.get("litellm_call_id", None), project_id=metadata.get("project_id", None), project_name=metadata.get("project_name", None), - endpoint_id=model_info["metadata"]["endpoint_id"], + endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "", endpoint_name=model, - endpoint_path=litellm_params["api_base"], - model_id=model_info["id"], - provider=model_info["metadata"]["provider"], - modality=model_info["metadata"]["modality"], + endpoint_path=litellm_params["api_base"] if litellm_params else None, + model_id=model_info["id"] if model_info else "", + provider=model_info["metadata"]["provider"] if model_info else "", + modality=model_info["metadata"]["modality"] if model_info else "", request_arrival_time=start_time, - request_forwarded_time=kwargs.get("api_call_start_time", start_time), - response_start_time=kwargs.get("completion_start_time", end_time), + request_forwarded_time=kwargs.get("api_call_start_time") or start_time, + response_start_time=kwargs.get("completion_start_time") or end_time, response_end_time=end_time, request_body=proxy_server_request.get("body", {}), response_body=response_body, cost=cost, is_cache_hit=is_cache_hit or False, - is_success=True, + is_success=not failure, + # model_name=model_info["metadata"]["name"] if model_info else "", + # is_streaming=kwargs.get("stream", False) ) verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") return metrics_data @@ -185,20 +200,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): try: verbose_logger.info("On Async Failure !") - verbose_logger.info("\nkwargs", kwargs) - - # Acess Exceptions & Traceback - exception_event = kwargs.get("exception", None) - traceback_event = kwargs.get("traceback_exception", None) - - verbose_logger.info( - f""" - Exception: {exception_event} - Traceback: {traceback_event} - """ - ) - metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time) - metrics_data.is_success = False + metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True) with DaprService() as dapr_service: dapr_service.publish_to_topic( data=metrics_data.model_dump(mode="json"), From 951d9f76639f1976f0f401a1fef6632d878c6186 Mon Sep 17 00:00:00 2001 From: Charush Date: Thu, 2 Jan 2025 11:43:31 +0000 Subject: [PATCH 09/14] feat: added few logs in proxy cli to debug server killed issue --- litellm/custom_callbacks.py | 83 ++++++++--------------------------- litellm/proxy/proxy_cli.py | 5 +++ litellm/proxy/proxy_server.py | 22 +++++++++- 3 files changed, 45 insertions(+), 65 deletions(-) diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py index 6cfe911b02ee..47a940fe733e 100644 --- a/litellm/custom_callbacks.py +++ b/litellm/custom_callbacks.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Union +from urllib.parse import urlparse from uuid import UUID from litellm.integrations.custom_logger import CustomLogger @@ -9,6 +10,18 @@ from budmicroframe.commons.schemas import CloudEventBase from budmicroframe.shared.dapr_service import DaprService +# error in budserve_middleware.py +# Scenario 1: if user sends wrong api key +# Scenario 2: if user sends wrong model param + +# Keys i won't have: +# project_id, project_name, endpoint_id, endpoint_name (what user has sent), endpoint_path, +# model_id, provider, modality, model_name +# Keys i can set: +# request_arrival_time == request_forwarded_time == response_start_time == response_end_time +# request_body, response_body, cost = 0, is_cache_hit = False, is_success = False, is_streaming = False + + class RequestMetrics(CloudEventBase): request_id: UUID @@ -30,13 +43,9 @@ class RequestMetrics(CloudEventBase): cost: Optional[float] = None is_cache_hit: bool is_success: bool + # model_name: str + # is_streaming: bool = False - _model_name: Optional[str] = None - _input_tokens: Optional[int] = None - _output_tokens: Optional[int] = None - _response_analysis: Optional[Dict[str, Any]] = None - _is_streaming: Optional[bool] = None - def validate_intervals(self) -> "RequestMetrics": if self.response_start_time > self.response_end_time: raise ValueError("Response start time cannot be after response end time.") @@ -47,62 +56,7 @@ def validate_intervals(self) -> "RequestMetrics": if self.request_arrival_time > self.response_end_time: raise ValueError("Request arrival time cannot be after response end time.") return self - - @property - def model_name(self) -> Optional[str]: - return self._model_name - - @model_name.setter - def model_name(self, value: Optional[str]) -> None: - self._model_name = value - - @property - def input_tokens(self) -> Optional[int]: - return self._input_tokens - - @input_tokens.setter - def input_tokens(self, value: Optional[int]) -> None: - self._input_tokens = value - - @property - def output_tokens(self) -> Optional[int]: - return self._output_tokens - - @output_tokens.setter - def output_tokens(self, value: Optional[int]) -> None: - self._output_tokens = value - - @property - def response_analysis(self) -> Optional[Dict[str, Any]]: - return self._response_analysis - - @response_analysis.setter - def response_analysis(self, value: Optional[Dict[str, Any]]) -> None: - self._response_analysis = value - - @property - def is_streaming(self) -> Optional[bool]: - return self._is_streaming - - @is_streaming.setter - def is_streaming(self, value: Optional[bool]) -> None: - self._is_streaming = value - - @property - def ttft(self) -> float: - if self.is_streaming and self.response_start_time > self.request_arrival_time: - return round((self.response_start_time - self.request_arrival_time).total_seconds(), 3) - - @property - def latency(self) -> float: - if self.response_end_time > self.request_arrival_time: - return round((self.response_end_time - self.request_arrival_time).total_seconds(), 3) - - @property - def throughput(self) -> Optional[float]: - if self.output_tokens is not None and self.latency is not None: - return round(self.output_tokens / self.latency, 3) - + class UpdateRequestMetrics(CloudEventBase): request_id: UUID @@ -151,7 +105,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur } model_info = litellm_params.get("model_info", {}) metadata = litellm_params.get("metadata", {}) # headers passed to LiteLLM proxy, can be found here - + api_route = urlparse(metadata.get("endpoint", "")).path + # Calculate cost using litellm.completion_cost() response_obj = response_obj or {} cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0 @@ -166,7 +121,7 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur project_name=metadata.get("project_name", None), endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "", endpoint_name=model, - endpoint_path=litellm_params["api_base"] if litellm_params else None, + endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route, model_id=model_info["id"] if model_info else "", provider=model_info["metadata"]["provider"] if model_info else "", modality=model_info["metadata"]["modality"] if model_info else "", diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 094828de17a3..dbc921ab129a 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -631,6 +631,7 @@ def _make_openai_completion(): ): try: from litellm.secret_managers.main import get_secret + from litellm._logging import verbose_proxy_logger if os.getenv("DATABASE_URL", None) is not None: ### add connection pool + pool timeout args @@ -651,10 +652,14 @@ def _make_openai_completion(): modified_url = append_query_params(database_url, params) os.environ["DIRECT_URL"] = modified_url ### + verbose_proxy_logger.info("Running prisma db push") subprocess.run(["prisma"], capture_output=True) + verbose_proxy_logger.info("Prisma db push complete") is_prisma_runnable = True except FileNotFoundError: is_prisma_runnable = False + except Exception as e: + print(e) if is_prisma_runnable: from litellm.proxy.db.check_migration import check_prisma_schema_diff diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 667e61e8a12d..475b7050f89a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -474,9 +474,13 @@ async def redirect_ui_middleware(request: Request, call_next): app.add_middleware(BudServeMiddleware) @app.middleware("http") async def catch_exceptions_middleware(request: Request, call_next): + start_time = time.time() try: return await call_next(request) except Exception as e: + # Handle the same way as the exception handler + import traceback + from litellm.custom_callbacks import proxy_handler_instance # Convert to ProxyException if needed if not isinstance(e, ProxyException): e = ProxyException( @@ -485,7 +489,23 @@ async def catch_exceptions_middleware(request: Request, call_next): param=None, code=500 ) - # Handle the same way as the exception handler + # TODO: send error to budmetrics + end_time = time.time() + request_body = await request.json() + kwargs = { + "model": request_body.get("model", None), + "cache_hit": False, + "exception": e, + "traceback_exception": traceback.format_exc(), + "litellm_params": { + "proxy_server_request": {"body": request_body}, + "metadata": { + "endpoint": request.url + } + }, + "stream": request_body.get("stream", False), + } + await proxy_handler_instance.async_log_failure_event(kwargs, None, start_time, end_time) return JSONResponse( status_code=int(e.code) if e.code else status.HTTP_500_INTERNAL_SERVER_ERROR, content={ From 3d045aba9ffceb25520a33c7102b52bf7d00d91e Mon Sep 17 00:00:00 2001 From: Charush Date: Fri, 3 Jan 2025 06:57:34 +0000 Subject: [PATCH 10/14] refac: RequestMetrics schema updated --- litellm/custom_callbacks.py | 38 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py index 47a940fe733e..a702f3f2db65 100644 --- a/litellm/custom_callbacks.py +++ b/litellm/custom_callbacks.py @@ -26,14 +26,15 @@ class RequestMetrics(CloudEventBase): request_id: UUID request_ip: Optional[str] = None - project_id: UUID - project_name: str - endpoint_id: UUID | str - endpoint_name: str - endpoint_path: str - model_id: UUID | str - provider: str - modality: str + project_id: Optional[UUID] + project_name: Optional[str] + endpoint_id: Optional[UUID] + endpoint_name: Optional[str] + endpoint_path: Optional[str] + model_id: Optional[UUID] + model_name: Optional[str] + provider: Optional[str] + modality: Optional[str] request_arrival_time: datetime request_forwarded_time: datetime response_start_time: datetime @@ -42,9 +43,8 @@ class RequestMetrics(CloudEventBase): response_body: Union[Dict[str, Any], List[Dict[str, Any]]] cost: Optional[float] = None is_cache_hit: bool + is_streaming: bool = False is_success: bool - # model_name: str - # is_streaming: bool = False def validate_intervals(self) -> "RequestMetrics": if self.response_start_time > self.response_end_time: @@ -57,12 +57,6 @@ def validate_intervals(self) -> "RequestMetrics": raise ValueError("Request arrival time cannot be after response end time.") return self - -class UpdateRequestMetrics(CloudEventBase): - request_id: UUID - cost: Optional[float] = None - - # This file includes the custom callbacks for LiteLLM Proxy # Once defined, these can be passed in proxy_config.yaml class MyCustomHandler(CustomLogger): @@ -119,12 +113,13 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur request_id=kwargs.get("litellm_call_id", None), project_id=metadata.get("project_id", None), project_name=metadata.get("project_name", None), - endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "", + endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None, endpoint_name=model, endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route, - model_id=model_info["id"] if model_info else "", - provider=model_info["metadata"]["provider"] if model_info else "", - modality=model_info["metadata"]["modality"] if model_info else "", + model_id=model_info["id"] if model_info else None, + model_name=model_info["metadata"]["name"] if model_info else None, + provider=model_info["metadata"]["provider"] if model_info else None, + modality=model_info["metadata"]["modality"] if model_info else None, request_arrival_time=start_time, request_forwarded_time=kwargs.get("api_call_start_time") or start_time, response_start_time=kwargs.get("completion_start_time") or end_time, @@ -133,9 +128,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur response_body=response_body, cost=cost, is_cache_hit=is_cache_hit or False, + is_streaming=kwargs.get("stream", False), is_success=not failure, - # model_name=model_info["metadata"]["name"] if model_info else "", - # is_streaming=kwargs.get("stream", False) ) verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") return metrics_data From 1c3554ce1cf4e385ad6b67faff9f7b17f2a8bdfe Mon Sep 17 00:00:00 2001 From: budbot Date: Fri, 3 Jan 2025 08:42:33 +0000 Subject: [PATCH 11/14] fix: fetching request body in exception handler middleware --- litellm/proxy/budserve_middleware.py | 1 + litellm/proxy/proxy_server.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py index 3773255f9432..0d2ba969f3fd 100644 --- a/litellm/proxy/budserve_middleware.py +++ b/litellm/proxy/budserve_middleware.py @@ -83,6 +83,7 @@ async def dispatch( # get the request body request_data = await _read_request_body(request=request) + request.state.original_body = json.dumps(request_data) api_key = await self.get_api_key(request) endpoint_name = request_data.get("model") diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 475b7050f89a..1117b8c5c725 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -491,7 +491,11 @@ async def catch_exceptions_middleware(request: Request, call_next): ) # TODO: send error to budmetrics end_time = time.time() - request_body = await request.json() + original_body = getattr(request.state, "original_body", None) + if original_body is not None: + request_body = json.loads(original_body) + else: + request_body = {} kwargs = { "model": request_body.get("model", None), "cache_hit": False, From fc05e430baeb2b96728faf2ef34a1b4b0f6ee235 Mon Sep 17 00:00:00 2001 From: budbot Date: Fri, 3 Jan 2025 21:58:25 +0000 Subject: [PATCH 12/14] fix: metrics save for middleware error and cluster connection error --- litellm/custom_callbacks.py | 44 +++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py index a702f3f2db65..7f6f6067c00e 100644 --- a/litellm/custom_callbacks.py +++ b/litellm/custom_callbacks.py @@ -1,7 +1,8 @@ +import copy from datetime import datetime from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse -from uuid import UUID +from uuid import UUID, uuid4 from litellm.integrations.custom_logger import CustomLogger import litellm @@ -78,28 +79,33 @@ def log_failure_event(self, kwargs, response_obj, start_time, end_time): def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics: # log: key, user, model, prompt, response, tokens, cost # Access kwargs passed to litellm.completion() - verbose_logger.info(f"\nresponse_obj : {response_obj}") + verbose_logger.info(f"\nkwargs : {kwargs}") + # verbose_logger.info(f"\nresponse_obj : {response_obj}") model = kwargs.get("model", None) is_cache_hit = kwargs.get("cache_hit") - response_body = kwargs.get("standard_logging_object", {}).get("response", {}) - if failure: - response_body = { - "exception": str(kwargs.get("exception", None)), - "traceback": kwargs.get("traceback_exception", None) - } + response_body = copy.deepcopy(kwargs.get("standard_logging_object", {}).get("response", {})) if not failure else { + "exception": str(kwargs.get("exception", None)), + "traceback": kwargs.get("traceback_exception", None) + } # Access litellm_params passed to litellm.completion(), example access `metadata` litellm_params = kwargs.get("litellm_params", {}) proxy_server_request = litellm_params.get("proxy_server_request", {}) + if proxy_server_request and proxy_server_request.get("body"): + # To handle unserializable metadata in proxy_server_request and circular dependencies + proxy_server_request["body"].pop("metadata", None) if not proxy_server_request: proxy_server_request["body"] = { "model": model, "messages": kwargs.get("messages", []), "stream": kwargs.get("stream", False) } - model_info = litellm_params.get("model_info", {}) - metadata = litellm_params.get("metadata", {}) # headers passed to LiteLLM proxy, can be found here - api_route = urlparse(metadata.get("endpoint", "")).path + model_info = copy.deepcopy(litellm_params.get("model_info", {})) + metadata = litellm_params.get("metadata", {}) + endpoint = metadata.get("endpoint", "") + api_route = urlparse(str(endpoint)).path + if litellm_params.get("api_base"): + api_route = f"{litellm_params['api_base']}{api_route}" # Calculate cost using litellm.completion_cost() response_obj = response_obj or {} @@ -110,12 +116,12 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur usage = dict(usage) metrics_data = RequestMetrics( - request_id=kwargs.get("litellm_call_id", None), + request_id=kwargs.get("litellm_call_id", uuid4()), project_id=metadata.get("project_id", None), project_name=metadata.get("project_name", None), endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None, endpoint_name=model, - endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route, + endpoint_path=api_route, model_id=model_info["id"] if model_info else None, model_name=model_info["metadata"]["name"] if model_info else None, provider=model_info["metadata"]["provider"] if model_info else None, @@ -131,15 +137,17 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur is_streaming=kwargs.get("stream", False), is_success=not failure, ) - verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") + # verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n") return metrics_data async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): verbose_logger.info("On Async Success!") metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time) + metrics_data_json = metrics_data.model_dump(mode="json") + verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}") with DaprService() as dapr_service: dapr_service.publish_to_topic( - data=metrics_data.model_dump(mode="json"), + data=metrics_data_json, target_topic_name=app_settings.budmetrics_topic_name, target_name=app_settings.budmetrics_app_name, event_type="add_request_metrics", @@ -150,16 +158,20 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti try: verbose_logger.info("On Async Failure !") metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True) + metrics_data_json = metrics_data.model_dump(mode="json") + verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}") with DaprService() as dapr_service: dapr_service.publish_to_topic( - data=metrics_data.model_dump(mode="json"), + data=metrics_data_json, target_topic_name=app_settings.budmetrics_topic_name, target_name=app_settings.budmetrics_app_name, event_type="add_request_metrics", ) except Exception as e: # TODO: what metrics data to log here? + import traceback verbose_logger.info(f"Exception: {e}") + verbose_logger.info(f"Traceback: {traceback.format_exc()}") proxy_handler_instance = MyCustomHandler() From c9475c437b20b32c67f7a20caeb0aa4c55df84e3 Mon Sep 17 00:00:00 2001 From: budbot Date: Wed, 8 Jan 2025 17:30:14 +0000 Subject: [PATCH 13/14] fix: model name maintained to user defined name instead of actual model name set for endpoint deployment --- litellm/router.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index 17af0e6193cc..ca49b0ddaca7 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3003,7 +3003,11 @@ async def make_call(self, original_function: Any, *args, **kwargs): response = await response ## PROCESS RESPONSE HEADERS await self.set_response_headers(response=response, model_group=model_group) - + verbose_router_logger.info(f"TYPE OF MAKE CALL RESPONSE : {type(response)}") + if hasattr(response, "model"): + response.model = model_group + if isinstance(response, dict): + response["model"] = model_group return response def _handle_mock_testing_rate_limit_error( From 57ebfe68bdc9c887a292f872507e4187ab0f85db Mon Sep 17 00:00:00 2001 From: bud1906 Date: Fri, 10 Jan 2025 16:17:37 +0000 Subject: [PATCH 14/14] fix: authorization header missing error propogated --- litellm/proxy/budserve_middleware.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py index 0d2ba969f3fd..57ac2a917b5c 100644 --- a/litellm/proxy/budserve_middleware.py +++ b/litellm/proxy/budserve_middleware.py @@ -21,6 +21,13 @@ class BudServeMiddleware(BaseHTTPMiddleware): async def get_api_key(self, request): authorization_header = request.headers.get("Authorization") + if not authorization_header: + raise ProxyException( + message="Authorization header is missing", + type="unauthorized", + param="Authorization", + code=401 + ) api_key = authorization_header.split(" ")[1] return api_key