From 17ec0403a30bfc9af7fb3a06c4b98336769e1399 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Tue, 24 Dec 2024 07:37:29 +0000
Subject: [PATCH 01/14] feat: dapr integration for budlitellm

---
 .dapr/appconfig-dev.yaml              | 19 +++++
 .dapr/components/configstore.yaml     | 20 ++++++
 .dapr/components/pubsub-redis.yaml    | 25 +++++++
 .dapr/components/secretstore-env.yaml | 13 ++++
 .dapr/components/statestore.yaml      | 23 +++++++
 .dapr/components/subscriptions.yaml   | 12 ++++
 .dockerignore                         |  1 +
 Dockerfile.bud                        | 18 +++--
 deploy/docker-compose-dev.yaml        | 69 +++++++++++++++++++
 deploy/docker-compose-redis.yaml      | 22 ++++++
 deploy/start_dev.sh                   | 99 +++++++++++++++++++++++++++
 deploy/stop_dev.sh                    | 57 +++++++++++++++
 litellm/__about__.py                  | 19 +++++
 litellm/commons/config.py             | 82 ++++++++++++++++++++++
 litellm/proxy/proxy_server.py         |  4 ++
 requirements.txt                      |  7 +-
 16 files changed, 481 insertions(+), 9 deletions(-)
 create mode 100644 .dapr/appconfig-dev.yaml
 create mode 100644 .dapr/components/configstore.yaml
 create mode 100644 .dapr/components/pubsub-redis.yaml
 create mode 100644 .dapr/components/secretstore-env.yaml
 create mode 100644 .dapr/components/statestore.yaml
 create mode 100644 .dapr/components/subscriptions.yaml
 create mode 100644 deploy/docker-compose-dev.yaml
 create mode 100644 deploy/docker-compose-redis.yaml
 create mode 100755 deploy/start_dev.sh
 create mode 100755 deploy/stop_dev.sh
 create mode 100644 litellm/__about__.py
 create mode 100644 litellm/commons/config.py

diff --git a/.dapr/appconfig-dev.yaml b/.dapr/appconfig-dev.yaml
new file mode 100644
index 000000000000..b22f553b6877
--- /dev/null
+++ b/.dapr/appconfig-dev.yaml
@@ -0,0 +1,19 @@
+apiVersion: dapr.io/v1alpha1
+kind: Configuration
+metadata:
+  name: appconfig
+  namespace: default
+spec:
+  tracing:
+    samplingRate: "1"
+    stdout: true
+  features:
+    - name: SchedulerReminders
+      enabled: true
+  #    zipkin:
+  #      endpointAddress: http://localhost:9411/api/v2/spans
+  secrets:
+    scopes:
+      - storeName: secretstore-local
+        defaultAccess: allow
+        deniedSecrets: [ ]
diff --git a/.dapr/components/configstore.yaml b/.dapr/components/configstore.yaml
new file mode 100644
index 000000000000..4149f5d9c955
--- /dev/null
+++ b/.dapr/components/configstore.yaml
@@ -0,0 +1,20 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-configuration-stores/redis-configuration-store/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: configstore
+  namespace: development
+spec:
+  type: configuration.redis
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/pubsub-redis.yaml b/.dapr/components/pubsub-redis.yaml
new file mode 100644
index 000000000000..346914a8f510
--- /dev/null
+++ b/.dapr/components/pubsub-redis.yaml
@@ -0,0 +1,25 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-pubsub/setup-redis-pubsub/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: pubsub-redis
+  namespace: development
+spec:
+  type: pubsub.redis
+  version: v1
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+    - name: consumerID
+      value: "{appID}"
+    - name: concurrency
+      value: "10"
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/secretstore-env.yaml b/.dapr/components/secretstore-env.yaml
new file mode 100644
index 000000000000..a8689cd3e6e1
--- /dev/null
+++ b/.dapr/components/secretstore-env.yaml
@@ -0,0 +1,13 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-secret-stores/file-secret-store/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: secretstore-local
+  namespace: development
+spec:
+  type: secretstores.local.env
+  version: v1
+  metadata:
+    - name: prefix
+      value: "SECRETS_"
\ No newline at end of file
diff --git a/.dapr/components/statestore.yaml b/.dapr/components/statestore.yaml
new file mode 100644
index 000000000000..573694eebc40
--- /dev/null
+++ b/.dapr/components/statestore.yaml
@@ -0,0 +1,23 @@
+# Reference: https://docs.dapr.io/reference/components-reference/supported-state-stores/setup-redis/
+
+apiVersion: dapr.io/v1alpha1
+kind: Component
+metadata:
+  name: statestore
+  namespace: development
+spec:
+  type: state.redis
+  version: v1
+  metadata:
+    - name: redisHost
+      secretKeyRef:
+        name: REDIS_URI
+        key: REDIS_URI
+    - name: redisPassword
+      secretKeyRef:
+        name: REDIS_PASSWORD
+        key: REDIS_PASSWORD
+    - name: actorStateStore
+      value: "true"
+auth:
+  secretStore: secretstore-local
\ No newline at end of file
diff --git a/.dapr/components/subscriptions.yaml b/.dapr/components/subscriptions.yaml
new file mode 100644
index 000000000000..7bc9ff3ca6ee
--- /dev/null
+++ b/.dapr/components/subscriptions.yaml
@@ -0,0 +1,12 @@
+apiVersion: dapr.io/v2alpha1
+kind: Subscription
+metadata:
+  name: pubsub-subscription
+spec:
+  topic: budLitellmMessages
+  routes:
+    default: /notifications
+  pubsubname: pubsub-redis
+  deadLetterTopic: poisonMessages
+scopes:
+  - budlitellm
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
index 929eace5e343..2919658574fb 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,3 +9,4 @@ tests
 .devcontainer
 *.tgz
 log.txt
+budlitellm
diff --git a/Dockerfile.bud b/Dockerfile.bud
index e64c89ba763c..3cb2b8f18d85 100644
--- a/Dockerfile.bud
+++ b/Dockerfile.bud
@@ -11,7 +11,7 @@ WORKDIR /app
 
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
-    apt-get install -y gcc python3-dev && \
+    apt-get install -y gcc python3-dev git && \
     rm -rf /var/lib/apt/lists/*
 
 RUN pip install --upgrade pip && \
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip && \
 COPY . .
 
 # Build Admin UI
-RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
+RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
 
 # Build the package
 RUN rm -rf dist/* && python -m build
@@ -44,13 +44,13 @@ RUN pip uninstall PyJWT -y
 RUN pip install PyJWT --no-cache-dir
 
 # Build Admin UI
-RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
+RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
 
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE AS runtime
 
 # Update dependencies and clean up - handles debian security issue
-RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* 
+RUN apt-get update && apt-get install -y git && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* 
 
 WORKDIR /app
 # Copy the current directory contents into the container at /app
@@ -64,10 +64,14 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 
+# ensure gptcache is installed
+RUN pip install git+https://github.com/BudEcosystem/BudServeGPTCache.git#egg=gptcache
+RUN pip install git+https://github.com/BudEcosystem/bud-microframe.git#egg=budmicroframe
+
 # Generate prisma client
 RUN prisma generate
-RUN chmod +x entrypoint.sh
+# RUN chmod +x entrypoint.sh
 
-EXPOSE 4000/tcp
+EXPOSE 4010/tcp
 
-ENTRYPOINT ["sh", "-c", "cp /app/config/config.yaml /app && litellm --config /app/config.yaml --port 4000"]
\ No newline at end of file
+# ENTRYPOINT ["sh", "-c", "litellm --config /app/litellm_config.yaml --port 4000"]
\ No newline at end of file
diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml
new file mode 100644
index 000000000000..a27fb5c454e4
--- /dev/null
+++ b/deploy/docker-compose-dev.yaml
@@ -0,0 +1,69 @@
+# include:
+#   - ./docker-compose-redis.yaml
+
+services:
+  bud-litellm-app:
+    image: bud-microframe/$APP_NAME:$NAMESPACE
+    profiles:
+      - app
+    container_name: bud-mf-$NAMESPACE-$APP_NAME
+    build:
+      context: ..
+      dockerfile: ./Dockerfile.bud
+    # command: [ "litellm", "--config", "/app/litellm_config.yaml", "--port", $APP_PORT ]
+    # command: sh -c "cd litellm/proxy && litellm --config /app/litellm_config.yaml --port $APP_PORT"
+    command: sh -c "tail -f /dev/null"
+    # command : sh -c "alembic -c ./budcluster/alembic.ini upgrade head && tail -f /dev/null" # && uvicorn $APP_NAME.main:app --host 0.0.0.0 --port $APP_PORT --reload"
+    # ports:
+    #   - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT" # Dapr instances communicate over gRPC so gRPC port needs to be exposed
+    #   - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT" # Expose Dapr HTTP port for service invocation
+    #   - "$APP_PORT:$APP_PORT" # Expose app port for debugging purposes
+    volumes:
+      - ../:/app/
+      - ../cache:/app/cache
+    env_file:
+      - path: ../.env
+        required: true
+    depends_on:
+      - bud-litellm-placement
+    network_mode: host
+  bud-litellm-sidecar:
+    container_name: bud-mf-$NAMESPACE-$APP_NAME-dapr
+    image: "daprio/daprd:edge"
+    command: [
+      "./daprd",
+      "--app-id", "$APP_NAME",
+      "--app-port", "$APP_PORT",
+      "--dapr-http-port", "$DAPR_HTTP_PORT",
+      "--dapr-grpc-port", "$DAPR_GRPC_PORT",
+      "--placement-host-address", "$DAPR_PLACEMENT_HOST:$DAPR_PLACEMENT_PORT", # Dapr's placement service can be reach via the docker DNS entry
+      "--metrics-port", "$DAPR_METRICS_PORT",
+      "--resources-path", "/components",
+      "--config", "/config/appconfig.yaml",
+      "--log-as-json"
+    ]
+    env_file:
+      - path: ../.env
+        required: true
+    volumes:
+      # - "../crypto-keys:/crypto-keys"
+      - "${DAPR_COMPONENTS:-../.dapr/components/}:/components" # Mount the components folder for the runtime to use. The mounted location must match the --resources-path argument.
+      - "${DAPR_APP_CONFIG:-../.dapr/appconfig-dev.yaml}:/config/appconfig.yaml" # Mount the config file for the runtime to use. The mounted location must match the --config argument.
+      - ./:/app/
+    network_mode: "host"
+    # ports:
+    #   - "$DAPR_GRPC_PORT:$DAPR_GRPC_PORT"
+    #   - "$DAPR_HTTP_PORT:$DAPR_HTTP_PORT"
+    # network_mode: "service:app"
+  bud-litellm-placement:
+    container_name: bud-mf-$NAMESPACE-$APP_NAME-placement
+    image: "daprio/placement:edge"
+    command: [ "./placement", "--port", "$DAPR_PLACEMENT_PORT" ]
+    ports:
+      - "$DAPR_PLACEMENT_PORT:$DAPR_PLACEMENT_PORT"
+    networks:
+      - bud-litellm-network
+networks:
+  bud-litellm-network:
+    name: bud-mf-$NAMESPACE-$APP_NAME
+    driver: bridge
\ No newline at end of file
diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml
new file mode 100644
index 000000000000..805ccd2085ba
--- /dev/null
+++ b/deploy/docker-compose-redis.yaml
@@ -0,0 +1,22 @@
+services:
+  bud_redis:
+    container_name: bud-mf-$NAMESPACE-redis
+    image: redis:alpine
+    ports:
+      - "$REDIS_PORT:$REDIS_PORT"
+    command:
+      - /bin/sh
+      - -c
+      - redis-server --requirepass "${SECRETS_REDIS_PASSWORD:?REDIS_PASSWORD variable is not set}" --port ${REDIS_PORT}
+    sysctls:
+      net.core.somaxconn: 1024
+    healthcheck:
+      test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ]
+    volumes:
+      - redis_data:/data
+    # networks:
+    #   - bud-microframe-nw
+    # network_mode: "host"
+
+volumes:
+  redis_data:
\ No newline at end of file
diff --git a/deploy/start_dev.sh b/deploy/start_dev.sh
new file mode 100755
index 000000000000..b11453997ffb
--- /dev/null
+++ b/deploy/start_dev.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+#
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+#
+
+DAPR_COMPONENTS="../.dapr/components/"
+DAPR_APP_CONFIG="../.dapr/appconfig-dev.yaml"
+
+DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml"
+BUILD_FLAG=""
+DETACH_FLAG=""
+
+function display_help() {
+    echo "Usage: $0 [options]"
+    echo
+    echo "Options:"
+    echo "  --dapr-components        Set the dapr components folder path, this should be relative to the deploy directory (default: $DAPR_COMPONENTS)"
+    echo "  --dapr-app-config        Set the dapr app config path, this should be relative to the deploy directory (default: $DAPR_APP_CONFIG)"
+    echo "  -f FILE                  Specify the Docker Compose file to use, this should be relative to your current directory (default: $DOCKER_COMPOSE_FILE)"
+    echo "  --build                  Include this flag to force a rebuild of the Docker containers"
+    echo "  --skip-app               Include this flag to skip the app container"
+    echo "  -d                       Include this flag to detach and run the containers in background"
+    echo "  --help                   Display this help message and exit"
+    echo
+    echo "Example:"
+    echo "  $0 -f docker-compose-local.yaml --build"
+    echo "  This will use 'docker-compose-local.yaml' and force a rebuild of the containers."
+    echo
+    exit 0
+}
+
+# Parse arguments
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --dapr-components) DAPR_COMPONENTS="$2"; shift ;;
+        --dapr-app-config) DAPR_APP_CONFIG="$2"; shift ;;
+        -f) DOCKER_COMPOSE_FILE="$2"; shift ;;
+        --build) BUILD_FLAG="--build" ;;
+        --skip-app) SKIP_APP_FLAG="true" ;;
+        -d) DETACH_FLAG="-d" ;;
+        --help) display_help ;;
+        *) echo "Unknown parameter passed: $1"; show_help; exit 1 ;;
+    esac
+    shift
+done
+
+set -a
+source ./.env
+set +a
+
+export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2)
+
+: ${APP_NAME:?Application name is required, use --app-name flag to specify the name.}
+
+# Print the environment variables
+echo "****************************************************"
+echo "*                                                  *"
+echo "*         Starting Microservice Environment        *"
+echo "*                                                  *"
+echo "****************************************************"
+echo ""
+echo "🛠 App Name            : $APP_NAME"
+echo "🌐 App Port             : $APP_PORT"
+echo "🔑 Redis Uri           : $SECRETS_REDIS_URI"
+echo "🌍 Dapr HTTP Port      : $DAPR_HTTP_PORT"
+echo "🌍 Dapr gRPC Port      : $DAPR_GRPC_PORT"
+echo "🛠 Namespace            : $NAMESPACE"
+echo "📊 Log Level           : $LOG_LEVEL"
+echo "🗂 Config Store Name    : $CONFIGSTORE_NAME"
+echo "🔐 Secret Store Name   : $SECRETSTORE_NAME"
+echo "🛠 Dapr Components     : $DAPR_COMPONENTS"
+echo "🛠 Dapr App Config     : $DAPR_APP_CONFIG"
+echo "🛠 Docker Compose File : $DOCKER_COMPOSE_FILE"
+echo "🚀 Build flag          : $BUILD_FLAG"
+echo ""
+echo "****************************************************"
+
+# Bring up Docker Compose
+echo "Bringing up Docker Compose with file: $DOCKER_COMPOSE_FILE"
+if [ -z "$SKIP_APP_FLAG" ]; then
+    docker compose --profile app -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG
+else
+    docker compose -f "$DOCKER_COMPOSE_FILE" up $BUILD_FLAG $DETACH_FLAG
+fi
\ No newline at end of file
diff --git a/deploy/stop_dev.sh b/deploy/stop_dev.sh
new file mode 100755
index 000000000000..456654e9a804
--- /dev/null
+++ b/deploy/stop_dev.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+#
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+#
+
+# Default value for Docker Compose file
+DOCKER_COMPOSE_FILE="./deploy/docker-compose-dev.yaml"
+
+# Function to display help message
+function display_help() {
+    echo "Usage: $0 [options]"
+    echo
+    echo "Options:"
+    echo "  -f FILE   Specify the Docker Compose file to use (default: deploy/docker-compose-dev.yaml)"
+    echo "  --help                Display this help message and exit"
+    echo
+    echo "Example:"
+    echo "  $0 -f docker-compose-local.yaml"
+    echo "  This will stop the services using 'docker-compose-local.yaml'."
+    echo
+    exit 0
+}
+
+# Parse optional arguments
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        -f) DOCKER_COMPOSE_FILE="$2"; shift ;;
+        --help) display_help ;;
+        *) echo "Unknown parameter: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+set -a
+source ./.env
+set +a
+
+export REDIS_PORT=$(echo "${SECRETS_REDIS_URI:-redis:6379}" | cut -d':' -f2)
+
+# Stop Docker Compose services
+echo "Stopping services defined in: $DOCKER_COMPOSE_FILE"
+docker compose -f "$DOCKER_COMPOSE_FILE" stop
\ No newline at end of file
diff --git a/litellm/__about__.py b/litellm/__about__.py
new file mode 100644
index 000000000000..9c3a5276d863
--- /dev/null
+++ b/litellm/__about__.py
@@ -0,0 +1,19 @@
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      http://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+
+"""Contains metadata about the package, including version information and author details."""
+
+__version__ = "budlitellm@0.0.1"
diff --git a/litellm/commons/config.py b/litellm/commons/config.py
new file mode 100644
index 000000000000..16e2ed717c59
--- /dev/null
+++ b/litellm/commons/config.py
@@ -0,0 +1,82 @@
+#  -----------------------------------------------------------------------------
+#  Copyright (c) 2024 Bud Ecosystem Inc.
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#      http://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  -----------------------------------------------------------------------------
+
+"""Manages application and secret configurations, utilizing environment variables and Dapr's configuration store for syncing."""
+
+from pathlib import Path
+from typing import Optional
+
+from budmicroframe.commons.config import BaseAppConfig, BaseSecretsConfig, register_settings, enable_periodic_sync_from_store
+from pydantic import DirectoryPath, Field
+
+from litellm.__about__ import __version__
+
+
+class AppConfig(BaseAppConfig):
+    name: str = __version__.split("@")[0]
+    version: str = __version__.split("@")[-1]
+    description: str = ""
+    api_root: str = ""
+
+    # Base Directory
+    base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve()
+
+    # Bud-Litellm env
+    litellm_log: str = Field("DEBUG", alias="LITELLM_LOG")
+    litellm_master_key: str = Field("sk-1234", alias="LITELLM_MASTER_KEY")
+    litellm_salt_key: str = Field("litellm_salt_key", alias="LITELLM_SALT_KEY")
+    database_url: str = Field(..., alias="DATABASE_URL")
+    store_model_in_db: bool = Field(True, alias="STORE_MODEL_IN_DB")
+    budserve_app_baseurl: str = Field("http://127.0.0.1:8050", alias="BUDSERVE_APP_BASEURL")
+    
+    # Redis Config
+    redis_host: str = Field("localhost", alias="REDIS_HOST")
+    redis_port: int = Field(6379, alias="REDIS_PORT")
+    redis_username: str = Field("", alias="REDIS_USERNAME")
+    redis_password: str = Field("", alias="REDIS_PASSWORD")
+    redis_db: int = Field(0, alias="REDIS_DB")
+    
+    # Cache Config
+    enable_cache: bool = Field(False, alias="ENABLE_CACHE")
+    enable_cache_metric: bool = Field(False, alias="ENABLE_CACHE_METRIC")
+    cache_eviction_policy: str = Field("LRU", alias="CACHE_EVICTION_POLICY")
+    cache_max_size: int = Field(1000, alias="CACHE_MAX_SIZE")
+    cache_ttl: int = Field(3600, alias="CACHE_TTL")
+    cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD")
+    cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL")
+
+
+class SecretsConfig(BaseSecretsConfig):
+    name: str = __version__.split("@")[0]
+    version: str = __version__.split("@")[-1]
+    
+    # Database
+    psql_user: Optional[str] = Field(
+        None,
+        alias="PSQL_USER",
+        json_schema_extra=enable_periodic_sync_from_store(is_global=True),
+    )
+    psql_password: Optional[str] = Field(
+        None,
+        alias="PSQL_PASSWORD",
+        json_schema_extra=enable_periodic_sync_from_store(is_global=True),
+    )
+
+
+app_settings = AppConfig()
+secrets_settings = SecretsConfig()
+
+register_settings(app_settings, secrets_settings)
\ No newline at end of file
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a41491227715..8a4008f78b30 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -25,6 +25,9 @@
     get_type_hints,
 )
 
+from budmicroframe.main import configure_app
+from litellm.commons.config import app_settings, secrets_settings
+
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
@@ -373,6 +376,7 @@ def generate_feedback_box():
     root_path=server_root_path,  # check if user passed root path, FastAPI defaults this value to ""
 )
 
+app = configure_app(app_settings, secrets_settings)
 
 ### CUSTOM API DOCS [ENTERPRISE FEATURE] ###
 # Custom OpenAPI schema generator to include only selected routes
diff --git a/requirements.txt b/requirements.txt
index 7b1abcdc6828..a93b24d234a3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
+langchain_huggingface
+gptcache
+langchain_community
 # LITELLM PROXY DEPENDENCIES #
 anyio==4.4.0 # openai + http req.
 httpx==0.27.0 # Pin Httpx dependency
@@ -38,7 +41,7 @@ cryptography==42.0.7
 python-dotenv==1.0.0 # for env 
 tiktoken==0.7.0 # for calculating usage
 importlib-metadata==6.8.0 # for random utils
-tokenizers==0.14.0 # for calculating usage
+tokenizers # for calculating usage
 click==8.1.7 # for proxy cli 
 jinja2==3.1.4 # for prompt templates
 certifi==2024.7.4 # [TODO] clean up 
@@ -48,4 +51,4 @@ tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
 jsonschema==4.22.0 # validating json schema
 websockets==10.4 # for realtime API
-####
\ No newline at end of file
+####

From 1490a4978b88b7a37f9eb6986afc91697617d80b Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Tue, 24 Dec 2024 07:49:38 +0000
Subject: [PATCH 02/14] refac: updated docker compose to include redis compose
 - service registration error if not included

---
 deploy/docker-compose-dev.yaml   |  5 +++--
 deploy/docker-compose-redis.yaml | 14 ++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/deploy/docker-compose-dev.yaml b/deploy/docker-compose-dev.yaml
index a27fb5c454e4..05ba6ff68c1e 100644
--- a/deploy/docker-compose-dev.yaml
+++ b/deploy/docker-compose-dev.yaml
@@ -1,5 +1,5 @@
-# include:
-#   - ./docker-compose-redis.yaml
+include:
+  - ./docker-compose-redis.yaml
 
 services:
   bud-litellm-app:
@@ -25,6 +25,7 @@ services:
       - path: ../.env
         required: true
     depends_on:
+      - bud-litellm-redis
       - bud-litellm-placement
     network_mode: host
   bud-litellm-sidecar:
diff --git a/deploy/docker-compose-redis.yaml b/deploy/docker-compose-redis.yaml
index 805ccd2085ba..8f8b892ade14 100644
--- a/deploy/docker-compose-redis.yaml
+++ b/deploy/docker-compose-redis.yaml
@@ -1,6 +1,6 @@
 services:
-  bud_redis:
-    container_name: bud-mf-$NAMESPACE-redis
+  bud-litellm-redis:
+    container_name: bud-mf-$NAMESPACE-litellm-redis
     image: redis:alpine
     ports:
       - "$REDIS_PORT:$REDIS_PORT"
@@ -13,10 +13,16 @@ services:
     healthcheck:
       test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ]
     volumes:
-      - redis_data:/data
+      - budlitellm_redis_data:/data
     # networks:
     #   - bud-microframe-nw
     # network_mode: "host"
+    networks:
+      - bud-litellm-network
 
 volumes:
-  redis_data:
\ No newline at end of file
+  budlitellm_redis_data:
+networks:
+  bud-litellm-network:
+    name: bud-mf-$NAMESPACE-$APP_NAME
+    driver: bridge

From 4e965c3f6240bcc250decbdefaef9e627d816121 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Mon, 30 Dec 2024 22:21:04 +0000
Subject: [PATCH 03/14] feat: added custom callback to collect metrics data for
 inference request

---
 litellm/custom_callbacks.py | 225 ++++++++++++++++++++++++++++++++++++
 litellm_config.yaml         |   4 +-
 2 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 litellm/custom_callbacks.py

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
new file mode 100644
index 000000000000..cc371ac53ee8
--- /dev/null
+++ b/litellm/custom_callbacks.py
@@ -0,0 +1,225 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Union
+from uuid import UUID
+
+from litellm.integrations.custom_logger import CustomLogger
+import litellm
+from litellm._logging import verbose_logger
+from budmicroframe.commons.schemas import CloudEventBase
+from budmicroframe.shared.dapr_service import DaprService
+
+
+class RequestMetrics(CloudEventBase):
+    request_id: UUID
+    request_ip: Optional[str] = None
+    project_id: UUID
+    project_name: str
+    endpoint_id: UUID
+    endpoint_name: str
+    endpoint_path: str
+    model_id: UUID
+    provider: str
+    modality: str
+    request_arrival_time: datetime
+    request_forwarded_time: datetime
+    response_start_time: datetime
+    response_end_time: datetime
+    request_body: Dict[str, Any]
+    response_body: Union[Dict[str, Any], List[Dict[str, Any]]]
+    cost: Optional[float] = None
+    is_cache_hit: bool
+    is_success: bool
+
+    _model_name: Optional[str] = None
+    _input_tokens: Optional[int] = None
+    _output_tokens: Optional[int] = None
+    _response_analysis: Optional[Dict[str, Any]] = None
+    _is_streaming: Optional[bool] = None
+    
+    def validate_intervals(self) -> "RequestMetrics":
+        if self.response_start_time > self.response_end_time:
+            raise ValueError("Response start time cannot be after response end time.")
+        if self.request_arrival_time > self.response_start_time:
+            raise ValueError("Request arrival time cannot be after response start time.")
+        if self.request_forwarded_time > self.response_start_time:
+            raise ValueError("Request forwarded time cannot be after response start time.")
+        if self.request_arrival_time > self.response_end_time:
+            raise ValueError("Request arrival time cannot be after response end time.")
+        return self
+
+    @property
+    def model_name(self) -> Optional[str]:
+        return self._model_name
+
+    @model_name.setter
+    def model_name(self, value: Optional[str]) -> None:
+        self._model_name = value
+
+    @property
+    def input_tokens(self) -> Optional[int]:
+        return self._input_tokens
+
+    @input_tokens.setter
+    def input_tokens(self, value: Optional[int]) -> None:
+        self._input_tokens = value
+        
+    @property
+    def output_tokens(self) -> Optional[int]:
+        return self._output_tokens
+
+    @output_tokens.setter
+    def output_tokens(self, value: Optional[int]) -> None:
+        self._output_tokens = value
+
+    @property
+    def response_analysis(self) -> Optional[Dict[str, Any]]:
+        return self._response_analysis
+
+    @response_analysis.setter
+    def response_analysis(self, value: Optional[Dict[str, Any]]) -> None:
+        self._response_analysis = value
+
+    @property
+    def is_streaming(self) -> Optional[bool]:
+        return self._is_streaming
+
+    @is_streaming.setter
+    def is_streaming(self, value: Optional[bool]) -> None:
+        self._is_streaming = value
+        
+    @property
+    def ttft(self) -> float:
+        if self.is_streaming and self.response_start_time > self.request_arrival_time:
+            return round((self.response_start_time - self.request_arrival_time).total_seconds(), 3)
+
+    @property
+    def latency(self) -> float:
+        if self.response_end_time > self.request_arrival_time:
+            return round((self.response_end_time - self.request_arrival_time).total_seconds(), 3)
+
+    @property
+    def throughput(self) -> Optional[float]:
+        if self.output_tokens is not None and self.latency is not None:
+            return round(self.output_tokens / self.latency, 3)
+       
+ 
+class UpdateRequestMetrics(CloudEventBase):
+    request_id: UUID
+    cost: Optional[float] = None
+
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(CustomLogger):
+    def log_pre_api_call(self, model, messages, kwargs): 
+        verbose_logger.info("Pre-API Call")
+    
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("Post-API Call")
+
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("On Stream")
+        
+    def log_success_event(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("On Success")
+
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time): 
+        verbose_logger.info("On Failure")
+        
+    def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> RequestMetrics:
+        # log: key, user, model, prompt, response, tokens, cost
+        # Access kwargs passed to litellm.completion()
+        model = kwargs.get("model", None)
+        is_cache_hit = kwargs.get("cache_hit")
+        response_body = kwargs.get("standard_logging_object", {}).get("response", {})
+        # Access litellm_params passed to litellm.completion(), example access `metadata`
+        litellm_params = kwargs.get("litellm_params", {})
+        proxy_server_request = litellm_params.get("proxy_server_request", {})
+        model_info = litellm_params.get("model_info", {})
+        metadata = litellm_params.get("metadata", {})   # headers passed to LiteLLM proxy, can be found here
+
+        # Calculate cost using  litellm.completion_cost()
+        response_obj = response_obj or {}
+        cost = litellm.completion_cost(completion_response=response_obj)
+
+        usage = response_obj.get("usage", None) or {}
+        if isinstance(usage, litellm.Usage):
+            usage = dict(usage)
+        
+        metrics_data = RequestMetrics(
+            request_id=kwargs.get("litellm_call_id", None),
+            project_id=metadata.get("project_id", None),
+            project_name=metadata.get("project_name", None),
+            endpoint_id=model_info["metadata"]["endpoint_id"],
+            endpoint_name=model,
+            endpoint_path=litellm_params["api_base"],
+            model_id=model_info["id"],
+            provider=model_info["metadata"]["provider"],
+            modality=model_info["metadata"]["modality"],
+            request_arrival_time=kwargs.get("api_call_start_time", start_time),
+            request_forwarded_time=start_time,
+            response_start_time=kwargs.get("completion_start_time", end_time),
+            response_end_time=end_time,
+            request_body=proxy_server_request.get("body", {}),
+            response_body=response_body,
+            cost=cost,
+            is_cache_hit=is_cache_hit or False,
+            is_success=True,
+        )
+        metrics_data.model_name = model_info["metadata"]["name"]
+        metrics_data.input_tokens = usage.get("prompt_tokens", None)
+        metrics_data.output_tokens = usage.get("completion_tokens", None)
+        metrics_data.is_streaming = kwargs.get("stream", False)
+        verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
+        return metrics_data
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        verbose_logger.info("On Async Success!")
+        metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time)
+        with DaprService() as dapr_service:
+            dapr_service.publish_to_topic(
+                data=metrics_data.model_dump(mode="json"),
+                pubsub_name="pubsub-redis",
+                target_topic_name="budMetricsMessages",
+                target_name="budMetrics",
+                source_topic_name="budLitellmMessages",
+                source_name="budLitellm",
+                event_type="add_request_metrics",
+            )
+        return
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): 
+        try:
+            verbose_logger.info("On Async Failure !")
+            verbose_logger.info("\nkwargs", kwargs)
+
+            # Acess Exceptions & Traceback
+            exception_event = kwargs.get("exception", None)
+            traceback_event = kwargs.get("traceback_exception", None)
+
+            verbose_logger.info(
+                f"""
+                    Exception: {exception_event}
+                    Traceback: {traceback_event}
+                """
+            )
+            metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time)
+            metrics_data.is_success = False
+            with DaprService() as dapr_service:
+                dapr_service.publish_to_topic(
+                    data=metrics_data.model_dump(mode="json"),
+                    pubsub_name="pubsub-redis",
+                    target_topic_name="budMetricsMessages",
+                    target_name="budMetrics",
+                    source_topic_name="budLitellmMessages",
+                    source_name="budLitellm",
+                    event_type="add_request_metrics",
+                )
+        except Exception as e:
+            # TODO: what metrics data to log here?
+            verbose_logger.info(f"Exception: {e}")
+
+proxy_handler_instance = MyCustomHandler()
+
+# Set litellm.callbacks = [proxy_handler_instance] on the proxy
+# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
\ No newline at end of file
diff --git a/litellm_config.yaml b/litellm_config.yaml
index 01422da9f8f7..91d57221fce2 100644
--- a/litellm_config.yaml
+++ b/litellm_config.yaml
@@ -5,4 +5,6 @@ router_settings:
   cache_responses: False
   redis_host: "os.environ/REDIS_HOST"
   redis_port: "os.environ/REDIS_PORT"
-  redis_password: "os.environ/REDIS_PASSWORD"
\ No newline at end of file
+  redis_password: "os.environ/REDIS_PASSWORD"
+litellm_settings:
+  callbacks: litellm.custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
\ No newline at end of file

From 26cc93b88339016024888e365f5bd8dfadc10d11 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Mon, 30 Dec 2024 22:23:26 +0000
Subject: [PATCH 04/14] feat: added project id and name in request metadata

---
 litellm/proxy/budserve_middleware.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py
index 776f138b2c2d..3773255f9432 100644
--- a/litellm/proxy/budserve_middleware.py
+++ b/litellm/proxy/budserve_middleware.py
@@ -89,6 +89,11 @@ async def dispatch(
         # get endpoint details to fill cache_params
         user_config = await self.fetch_user_config(api_key, endpoint_name)
         
+        request_data["metadata"] = {
+            "project_id": user_config.get("project_id"),
+            "project_name": user_config.get("project_name"),
+        }
+        
         # redis connection params we will set as kubernetes env variables
         # can be fetched using os.getenv
         request_data["user_config"] = {

From c012747ce73434a58cbda43cbca863d43d471cf4 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Wed, 1 Jan 2025 11:43:45 +0000
Subject: [PATCH 05/14] feat: metrics target config set using app_settings

---
 litellm/custom_callbacks.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
index cc371ac53ee8..befa2a5c5287 100644
--- a/litellm/custom_callbacks.py
+++ b/litellm/custom_callbacks.py
@@ -4,6 +4,7 @@
 
 from litellm.integrations.custom_logger import CustomLogger
 import litellm
+from litellm.commons.config import app_settings
 from litellm._logging import verbose_logger
 from budmicroframe.commons.schemas import CloudEventBase
 from budmicroframe.shared.dapr_service import DaprService
@@ -156,8 +157,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req
             model_id=model_info["id"],
             provider=model_info["metadata"]["provider"],
             modality=model_info["metadata"]["modality"],
-            request_arrival_time=kwargs.get("api_call_start_time", start_time),
-            request_forwarded_time=start_time,
+            request_arrival_time=start_time,
+            request_forwarded_time=kwargs.get("api_call_start_time", start_time),
             response_start_time=kwargs.get("completion_start_time", end_time),
             response_end_time=end_time,
             request_body=proxy_server_request.get("body", {}),
@@ -166,10 +167,6 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req
             is_cache_hit=is_cache_hit or False,
             is_success=True,
         )
-        metrics_data.model_name = model_info["metadata"]["name"]
-        metrics_data.input_tokens = usage.get("prompt_tokens", None)
-        metrics_data.output_tokens = usage.get("completion_tokens", None)
-        metrics_data.is_streaming = kwargs.get("stream", False)
         verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
         return metrics_data
 
@@ -179,11 +176,8 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
         with DaprService() as dapr_service:
             dapr_service.publish_to_topic(
                 data=metrics_data.model_dump(mode="json"),
-                pubsub_name="pubsub-redis",
-                target_topic_name="budMetricsMessages",
-                target_name="budMetrics",
-                source_topic_name="budLitellmMessages",
-                source_name="budLitellm",
+                target_topic_name=app_settings.budmetrics_topic_name,
+                target_name=app_settings.budmetrics_app_name,
                 event_type="add_request_metrics",
             )
         return
@@ -208,11 +202,8 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
             with DaprService() as dapr_service:
                 dapr_service.publish_to_topic(
                     data=metrics_data.model_dump(mode="json"),
-                    pubsub_name="pubsub-redis",
-                    target_topic_name="budMetricsMessages",
-                    target_name="budMetrics",
-                    source_topic_name="budLitellmMessages",
-                    source_name="budLitellm",
+                    target_topic_name=app_settings.budmetrics_topic_name,
+                    target_name=app_settings.budmetrics_app_name,
                     event_type="add_request_metrics",
                 )
         except Exception as e:

From b4ff6b21044cb718e6191d76698d40b337318140 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Wed, 1 Jan 2025 11:44:23 +0000
Subject: [PATCH 06/14] feat: added metrics app and topic to app_settings

---
 litellm/commons/config.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/litellm/commons/config.py b/litellm/commons/config.py
index 16e2ed717c59..07d562fd024c 100644
--- a/litellm/commons/config.py
+++ b/litellm/commons/config.py
@@ -28,8 +28,8 @@
 class AppConfig(BaseAppConfig):
     name: str = __version__.split("@")[0]
     version: str = __version__.split("@")[-1]
-    description: str = ""
-    api_root: str = ""
+    description: str = Field("Bud-Litellm is a proxy server for LLM requests.", alias="DOCS_DESCRIPTION")
+    api_root: str = Field("", alias="SERVER_ROOT_PATH")
 
     # Base Directory
     base_dir: DirectoryPath = Path(__file__).parent.parent.parent.resolve()
@@ -58,6 +58,10 @@ class AppConfig(BaseAppConfig):
     cache_score_threshold: float = Field(0.8, alias="CACHE_SCORE_THRESHOLD")
     cache_embedding_model: str = Field("sentence-transformers/all-MiniLM-L6-v2", alias="CACHE_EMBEDDING_MODEL")
 
+    # Metrics App and Topic
+    budmetrics_app_name: str = Field("budMetrics", alias="BUDMETRICS_APP_NAME")
+    budmetrics_topic_name: str = Field("budMetricsMessages", alias="BUDMETRICS_TOPIC_NAME")
+    
 
 class SecretsConfig(BaseSecretsConfig):
     name: str = __version__.split("@")[0]

From 03534f9035619062907cca2d8cd1970a3c2a0032 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Wed, 1 Jan 2025 11:45:02 +0000
Subject: [PATCH 07/14] chore: removed fastapi app definition since it is being
 overriden by configure_app

---
 litellm/proxy/proxy_server.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 8a4008f78b30..667e61e8a12d 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -367,14 +367,14 @@ def generate_feedback_box():
     else f"Proxy Server to call 100+ LLMs in the OpenAI format. {custom_swagger_message}\n\n{ui_message}"
 )
 
-app = FastAPI(
-    docs_url=_get_docs_url(),
-    redoc_url=_get_redoc_url(),
-    title=_title,
-    description=_description,
-    version=version,
-    root_path=server_root_path,  # check if user passed root path, FastAPI defaults this value to ""
-)
+# app = FastAPI(
+#     docs_url=_get_docs_url(),
+#     redoc_url=_get_redoc_url(),
+#     title=_title,
+#     description=_description,
+#     version=version,
+#     root_path=server_root_path,  # check if user passed root path, FastAPI defaults this value to ""
+# )
 
 app = configure_app(app_settings, secrets_settings)
 

From dcf74ba3150e0d56e7d03a72298381714347530d Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Wed, 1 Jan 2025 19:54:08 +0000
Subject: [PATCH 08/14] feat: added error logging for metrics

---
 litellm/custom_callbacks.py | 54 +++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
index befa2a5c5287..6cfe911b02ee 100644
--- a/litellm/custom_callbacks.py
+++ b/litellm/custom_callbacks.py
@@ -15,10 +15,10 @@ class RequestMetrics(CloudEventBase):
     request_ip: Optional[str] = None
     project_id: UUID
     project_name: str
-    endpoint_id: UUID
+    endpoint_id: UUID | str
     endpoint_name: str
     endpoint_path: str
-    model_id: UUID
+    model_id: UUID | str
     provider: str
     modality: str
     request_arrival_time: datetime
@@ -127,21 +127,34 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
     def log_failure_event(self, kwargs, response_obj, start_time, end_time): 
         verbose_logger.info("On Failure")
         
-    def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> RequestMetrics:
+    def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics:
         # log: key, user, model, prompt, response, tokens, cost
         # Access kwargs passed to litellm.completion()
+        verbose_logger.info(f"\nresponse_obj : {response_obj}")
+
         model = kwargs.get("model", None)
         is_cache_hit = kwargs.get("cache_hit")
         response_body = kwargs.get("standard_logging_object", {}).get("response", {})
+        if failure:
+            response_body = {
+                "exception": str(kwargs.get("exception", None)),
+                "traceback": kwargs.get("traceback_exception", None) 
+            }
         # Access litellm_params passed to litellm.completion(), example access `metadata`
         litellm_params = kwargs.get("litellm_params", {})
         proxy_server_request = litellm_params.get("proxy_server_request", {})
+        if not proxy_server_request:
+            proxy_server_request["body"] = {
+                "model": model,
+                "messages": kwargs.get("messages", []),
+                "stream": kwargs.get("stream", False)
+            }
         model_info = litellm_params.get("model_info", {})
         metadata = litellm_params.get("metadata", {})   # headers passed to LiteLLM proxy, can be found here
 
         # Calculate cost using  litellm.completion_cost()
         response_obj = response_obj or {}
-        cost = litellm.completion_cost(completion_response=response_obj)
+        cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0
 
         usage = response_obj.get("usage", None) or {}
         if isinstance(usage, litellm.Usage):
@@ -151,21 +164,23 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time) -> Req
             request_id=kwargs.get("litellm_call_id", None),
             project_id=metadata.get("project_id", None),
             project_name=metadata.get("project_name", None),
-            endpoint_id=model_info["metadata"]["endpoint_id"],
+            endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "",
             endpoint_name=model,
-            endpoint_path=litellm_params["api_base"],
-            model_id=model_info["id"],
-            provider=model_info["metadata"]["provider"],
-            modality=model_info["metadata"]["modality"],
+            endpoint_path=litellm_params["api_base"] if litellm_params else None,
+            model_id=model_info["id"] if model_info else "",
+            provider=model_info["metadata"]["provider"] if model_info else "",
+            modality=model_info["metadata"]["modality"] if model_info else "",
             request_arrival_time=start_time,
-            request_forwarded_time=kwargs.get("api_call_start_time", start_time),
-            response_start_time=kwargs.get("completion_start_time", end_time),
+            request_forwarded_time=kwargs.get("api_call_start_time") or start_time,
+            response_start_time=kwargs.get("completion_start_time") or end_time,
             response_end_time=end_time,
             request_body=proxy_server_request.get("body", {}),
             response_body=response_body,
             cost=cost,
             is_cache_hit=is_cache_hit or False,
-            is_success=True,
+            is_success=not failure,
+            # model_name=model_info["metadata"]["name"] if model_info else "",
+            # is_streaming=kwargs.get("stream", False)
         )
         verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
         return metrics_data
@@ -185,20 +200,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): 
         try:
             verbose_logger.info("On Async Failure !")
-            verbose_logger.info("\nkwargs", kwargs)
-
-            # Acess Exceptions & Traceback
-            exception_event = kwargs.get("exception", None)
-            traceback_event = kwargs.get("traceback_exception", None)
-
-            verbose_logger.info(
-                f"""
-                    Exception: {exception_event}
-                    Traceback: {traceback_event}
-                """
-            )
-            metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time)
-            metrics_data.is_success = False
+            metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True)
             with DaprService() as dapr_service:
                 dapr_service.publish_to_topic(
                     data=metrics_data.model_dump(mode="json"),

From 951d9f76639f1976f0f401a1fef6632d878c6186 Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Thu, 2 Jan 2025 11:43:31 +0000
Subject: [PATCH 09/14] feat: added few logs in proxy cli to debug server
 killed issue

---
 litellm/custom_callbacks.py   | 83 ++++++++---------------------------
 litellm/proxy/proxy_cli.py    |  5 +++
 litellm/proxy/proxy_server.py | 22 +++++++++-
 3 files changed, 45 insertions(+), 65 deletions(-)

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
index 6cfe911b02ee..47a940fe733e 100644
--- a/litellm/custom_callbacks.py
+++ b/litellm/custom_callbacks.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Union
+from urllib.parse import urlparse
 from uuid import UUID
 
 from litellm.integrations.custom_logger import CustomLogger
@@ -9,6 +10,18 @@
 from budmicroframe.commons.schemas import CloudEventBase
 from budmicroframe.shared.dapr_service import DaprService
 
+# error in budserve_middleware.py
+# Scenario 1: if user sends wrong api key
+# Scenario 2: if user sends wrong model param
+
+# Keys i won't have:
+# project_id, project_name, endpoint_id, endpoint_name (what user has sent), endpoint_path,
+# model_id, provider, modality, model_name
+# Keys i can set:
+# request_arrival_time == request_forwarded_time == response_start_time == response_end_time
+# request_body, response_body, cost = 0, is_cache_hit = False, is_success = False, is_streaming = False
+
+
 
 class RequestMetrics(CloudEventBase):
     request_id: UUID
@@ -30,13 +43,9 @@ class RequestMetrics(CloudEventBase):
     cost: Optional[float] = None
     is_cache_hit: bool
     is_success: bool
+    # model_name: str
+    # is_streaming: bool = False
 
-    _model_name: Optional[str] = None
-    _input_tokens: Optional[int] = None
-    _output_tokens: Optional[int] = None
-    _response_analysis: Optional[Dict[str, Any]] = None
-    _is_streaming: Optional[bool] = None
-    
     def validate_intervals(self) -> "RequestMetrics":
         if self.response_start_time > self.response_end_time:
             raise ValueError("Response start time cannot be after response end time.")
@@ -47,62 +56,7 @@ def validate_intervals(self) -> "RequestMetrics":
         if self.request_arrival_time > self.response_end_time:
             raise ValueError("Request arrival time cannot be after response end time.")
         return self
-
-    @property
-    def model_name(self) -> Optional[str]:
-        return self._model_name
-
-    @model_name.setter
-    def model_name(self, value: Optional[str]) -> None:
-        self._model_name = value
-
-    @property
-    def input_tokens(self) -> Optional[int]:
-        return self._input_tokens
-
-    @input_tokens.setter
-    def input_tokens(self, value: Optional[int]) -> None:
-        self._input_tokens = value
-        
-    @property
-    def output_tokens(self) -> Optional[int]:
-        return self._output_tokens
-
-    @output_tokens.setter
-    def output_tokens(self, value: Optional[int]) -> None:
-        self._output_tokens = value
-
-    @property
-    def response_analysis(self) -> Optional[Dict[str, Any]]:
-        return self._response_analysis
-
-    @response_analysis.setter
-    def response_analysis(self, value: Optional[Dict[str, Any]]) -> None:
-        self._response_analysis = value
-
-    @property
-    def is_streaming(self) -> Optional[bool]:
-        return self._is_streaming
-
-    @is_streaming.setter
-    def is_streaming(self, value: Optional[bool]) -> None:
-        self._is_streaming = value
-        
-    @property
-    def ttft(self) -> float:
-        if self.is_streaming and self.response_start_time > self.request_arrival_time:
-            return round((self.response_start_time - self.request_arrival_time).total_seconds(), 3)
-
-    @property
-    def latency(self) -> float:
-        if self.response_end_time > self.request_arrival_time:
-            return round((self.response_end_time - self.request_arrival_time).total_seconds(), 3)
-
-    @property
-    def throughput(self) -> Optional[float]:
-        if self.output_tokens is not None and self.latency is not None:
-            return round(self.output_tokens / self.latency, 3)
-       
+   
  
 class UpdateRequestMetrics(CloudEventBase):
     request_id: UUID
@@ -151,7 +105,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             }
         model_info = litellm_params.get("model_info", {})
         metadata = litellm_params.get("metadata", {})   # headers passed to LiteLLM proxy, can be found here
-
+        api_route = urlparse(metadata.get("endpoint", "")).path
+        
         # Calculate cost using  litellm.completion_cost()
         response_obj = response_obj or {}
         cost = litellm.completion_cost(completion_response=response_obj) if not failure else 0
@@ -166,7 +121,7 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             project_name=metadata.get("project_name", None),
             endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "",
             endpoint_name=model,
-            endpoint_path=litellm_params["api_base"] if litellm_params else None,
+            endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route,
             model_id=model_info["id"] if model_info else "",
             provider=model_info["metadata"]["provider"] if model_info else "",
             modality=model_info["metadata"]["modality"] if model_info else "",
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 094828de17a3..dbc921ab129a 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -631,6 +631,7 @@ def _make_openai_completion():
         ):
             try:
                 from litellm.secret_managers.main import get_secret
+                from litellm._logging import verbose_proxy_logger
 
                 if os.getenv("DATABASE_URL", None) is not None:
                     ### add connection pool + pool timeout args
@@ -651,10 +652,14 @@ def _make_openai_completion():
                     modified_url = append_query_params(database_url, params)
                     os.environ["DIRECT_URL"] = modified_url
                     ###
+                verbose_proxy_logger.info("Running prisma db push")
                 subprocess.run(["prisma"], capture_output=True)
+                verbose_proxy_logger.info("Prisma db push complete")
                 is_prisma_runnable = True
             except FileNotFoundError:
                 is_prisma_runnable = False
+            except Exception as e:
+                print(e)
 
             if is_prisma_runnable:
                 from litellm.proxy.db.check_migration import check_prisma_schema_diff
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 667e61e8a12d..475b7050f89a 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -474,9 +474,13 @@ async def redirect_ui_middleware(request: Request, call_next):
 app.add_middleware(BudServeMiddleware)
 @app.middleware("http")
 async def catch_exceptions_middleware(request: Request, call_next):
+    start_time = time.time()
     try:
         return await call_next(request)
     except Exception as e:
+        # Handle the same way as the exception handler
+        import traceback
+        from litellm.custom_callbacks import proxy_handler_instance
         # Convert to ProxyException if needed
         if not isinstance(e, ProxyException):
             e = ProxyException(
@@ -485,7 +489,23 @@ async def catch_exceptions_middleware(request: Request, call_next):
                 param=None,
                 code=500
             )
-        # Handle the same way as the exception handler
+        # TODO: send error to budmetrics
+        end_time = time.time()
+        request_body = await request.json()
+        kwargs = {
+            "model": request_body.get("model", None),
+            "cache_hit": False,
+            "exception": e,
+            "traceback_exception": traceback.format_exc(),
+            "litellm_params": {
+                "proxy_server_request": {"body": request_body},
+                "metadata": {
+                    "endpoint": request.url
+                }
+            },
+            "stream": request_body.get("stream", False),
+        }
+        await proxy_handler_instance.async_log_failure_event(kwargs, None, start_time, end_time)
         return JSONResponse(
             status_code=int(e.code) if e.code else status.HTTP_500_INTERNAL_SERVER_ERROR,
             content={

From 3d045aba9ffceb25520a33c7102b52bf7d00d91e Mon Sep 17 00:00:00 2001
From: Charush <charush@foundationflow.ai>
Date: Fri, 3 Jan 2025 06:57:34 +0000
Subject: [PATCH 10/14] refac: RequestMetrics schema updated

---
 litellm/custom_callbacks.py | 38 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
index 47a940fe733e..a702f3f2db65 100644
--- a/litellm/custom_callbacks.py
+++ b/litellm/custom_callbacks.py
@@ -26,14 +26,15 @@
 class RequestMetrics(CloudEventBase):
     request_id: UUID
     request_ip: Optional[str] = None
-    project_id: UUID
-    project_name: str
-    endpoint_id: UUID | str
-    endpoint_name: str
-    endpoint_path: str
-    model_id: UUID | str
-    provider: str
-    modality: str
+    project_id: Optional[UUID]
+    project_name: Optional[str]
+    endpoint_id: Optional[UUID]
+    endpoint_name: Optional[str]
+    endpoint_path: Optional[str]
+    model_id: Optional[UUID]
+    model_name: Optional[str]
+    provider: Optional[str]
+    modality: Optional[str]
     request_arrival_time: datetime
     request_forwarded_time: datetime
     response_start_time: datetime
@@ -42,9 +43,8 @@ class RequestMetrics(CloudEventBase):
     response_body: Union[Dict[str, Any], List[Dict[str, Any]]]
     cost: Optional[float] = None
     is_cache_hit: bool
+    is_streaming: bool = False
     is_success: bool
-    # model_name: str
-    # is_streaming: bool = False
 
     def validate_intervals(self) -> "RequestMetrics":
         if self.response_start_time > self.response_end_time:
@@ -57,12 +57,6 @@ def validate_intervals(self) -> "RequestMetrics":
             raise ValueError("Request arrival time cannot be after response end time.")
         return self
    
- 
-class UpdateRequestMetrics(CloudEventBase):
-    request_id: UUID
-    cost: Optional[float] = None
-
-
 # This file includes the custom callbacks for LiteLLM Proxy
 # Once defined, these can be passed in proxy_config.yaml
 class MyCustomHandler(CustomLogger):
@@ -119,12 +113,13 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             request_id=kwargs.get("litellm_call_id", None),
             project_id=metadata.get("project_id", None),
             project_name=metadata.get("project_name", None),
-            endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else "",
+            endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None,
             endpoint_name=model,
             endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route,
-            model_id=model_info["id"] if model_info else "",
-            provider=model_info["metadata"]["provider"] if model_info else "",
-            modality=model_info["metadata"]["modality"] if model_info else "",
+            model_id=model_info["id"] if model_info else None,
+            model_name=model_info["metadata"]["name"] if model_info else None,
+            provider=model_info["metadata"]["provider"] if model_info else None,
+            modality=model_info["metadata"]["modality"] if model_info else None,
             request_arrival_time=start_time,
             request_forwarded_time=kwargs.get("api_call_start_time") or start_time,
             response_start_time=kwargs.get("completion_start_time") or end_time,
@@ -133,9 +128,8 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             response_body=response_body,
             cost=cost,
             is_cache_hit=is_cache_hit or False,
+            is_streaming=kwargs.get("stream", False),
             is_success=not failure,
-            # model_name=model_info["metadata"]["name"] if model_info else "",
-            # is_streaming=kwargs.get("stream", False)
         )
         verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
         return metrics_data

From 1c3554ce1cf4e385ad6b67faff9f7b17f2a8bdfe Mon Sep 17 00:00:00 2001
From: budbot <manikesh.ponnam@bud.studio>
Date: Fri, 3 Jan 2025 08:42:33 +0000
Subject: [PATCH 11/14] fix: fetching request body in exception handler
 middleware

---
 litellm/proxy/budserve_middleware.py | 1 +
 litellm/proxy/proxy_server.py        | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py
index 3773255f9432..0d2ba969f3fd 100644
--- a/litellm/proxy/budserve_middleware.py
+++ b/litellm/proxy/budserve_middleware.py
@@ -83,6 +83,7 @@ async def dispatch(
 
         # get the request body
         request_data = await _read_request_body(request=request)
+        request.state.original_body = json.dumps(request_data)
         api_key = await self.get_api_key(request)
         endpoint_name = request_data.get("model")
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 475b7050f89a..1117b8c5c725 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -491,7 +491,11 @@ async def catch_exceptions_middleware(request: Request, call_next):
             )
         # TODO: send error to budmetrics
         end_time = time.time()
-        request_body = await request.json()
+        original_body = getattr(request.state, "original_body", None)
+        if original_body is not None:
+            request_body = json.loads(original_body)
+        else:
+            request_body = {}
         kwargs = {
             "model": request_body.get("model", None),
             "cache_hit": False,

From fc05e430baeb2b96728faf2ef34a1b4b0f6ee235 Mon Sep 17 00:00:00 2001
From: budbot <manikesh.ponnam@bud.studio>
Date: Fri, 3 Jan 2025 21:58:25 +0000
Subject: [PATCH 12/14] fix: metrics save for middleware error and cluster
 connection error

---
 litellm/custom_callbacks.py | 44 +++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/litellm/custom_callbacks.py b/litellm/custom_callbacks.py
index a702f3f2db65..7f6f6067c00e 100644
--- a/litellm/custom_callbacks.py
+++ b/litellm/custom_callbacks.py
@@ -1,7 +1,8 @@
+import copy
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Union
 from urllib.parse import urlparse
-from uuid import UUID
+from uuid import UUID, uuid4
 
 from litellm.integrations.custom_logger import CustomLogger
 import litellm
@@ -78,28 +79,33 @@ def log_failure_event(self, kwargs, response_obj, start_time, end_time):
     def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failure=False) -> RequestMetrics:
         # log: key, user, model, prompt, response, tokens, cost
         # Access kwargs passed to litellm.completion()
-        verbose_logger.info(f"\nresponse_obj : {response_obj}")
+        verbose_logger.info(f"\nkwargs : {kwargs}")
+        # verbose_logger.info(f"\nresponse_obj : {response_obj}")
 
         model = kwargs.get("model", None)
         is_cache_hit = kwargs.get("cache_hit")
-        response_body = kwargs.get("standard_logging_object", {}).get("response", {})
-        if failure:
-            response_body = {
-                "exception": str(kwargs.get("exception", None)),
-                "traceback": kwargs.get("traceback_exception", None) 
-            }
+        response_body = copy.deepcopy(kwargs.get("standard_logging_object", {}).get("response", {})) if not failure else {
+            "exception": str(kwargs.get("exception", None)),
+            "traceback": kwargs.get("traceback_exception", None) 
+        }
         # Access litellm_params passed to litellm.completion(), example access `metadata`
         litellm_params = kwargs.get("litellm_params", {})
         proxy_server_request = litellm_params.get("proxy_server_request", {})
+        if proxy_server_request and proxy_server_request.get("body"):
+            # To handle unserializable metadata in proxy_server_request and circular dependencies
+            proxy_server_request["body"].pop("metadata", None)
         if not proxy_server_request:
             proxy_server_request["body"] = {
                 "model": model,
                 "messages": kwargs.get("messages", []),
                 "stream": kwargs.get("stream", False)
             }
-        model_info = litellm_params.get("model_info", {})
-        metadata = litellm_params.get("metadata", {})   # headers passed to LiteLLM proxy, can be found here
-        api_route = urlparse(metadata.get("endpoint", "")).path
+        model_info = copy.deepcopy(litellm_params.get("model_info", {}))
+        metadata = litellm_params.get("metadata", {})
+        endpoint = metadata.get("endpoint", "")
+        api_route = urlparse(str(endpoint)).path
+        if litellm_params.get("api_base"):
+            api_route = f"{litellm_params['api_base']}{api_route}"
         
         # Calculate cost using  litellm.completion_cost()
         response_obj = response_obj or {}
@@ -110,12 +116,12 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             usage = dict(usage)
         
         metrics_data = RequestMetrics(
-            request_id=kwargs.get("litellm_call_id", None),
+            request_id=kwargs.get("litellm_call_id", uuid4()),
             project_id=metadata.get("project_id", None),
             project_name=metadata.get("project_name", None),
             endpoint_id=model_info["metadata"]["endpoint_id"] if model_info else None,
             endpoint_name=model,
-            endpoint_path=f"{litellm_params['api_base']}/{api_route}" if litellm_params else api_route,
+            endpoint_path=api_route,
             model_id=model_info["id"] if model_info else None,
             model_name=model_info["metadata"]["name"] if model_info else None,
             provider=model_info["metadata"]["provider"] if model_info else None,
@@ -131,15 +137,17 @@ def get_request_metrics(self, kwargs, response_obj, start_time, end_time, failur
             is_streaming=kwargs.get("stream", False),
             is_success=not failure,
         )
-        verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
+        # verbose_logger.info(f"\n\nMetrics Data: {metrics_data}\n\n")
         return metrics_data
 
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         verbose_logger.info("On Async Success!")
         metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time)
+        metrics_data_json = metrics_data.model_dump(mode="json")
+        verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}")
         with DaprService() as dapr_service:
             dapr_service.publish_to_topic(
-                data=metrics_data.model_dump(mode="json"),
+                data=metrics_data_json,
                 target_topic_name=app_settings.budmetrics_topic_name,
                 target_name=app_settings.budmetrics_app_name,
                 event_type="add_request_metrics",
@@ -150,16 +158,20 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
         try:
             verbose_logger.info("On Async Failure !")
             metrics_data = self.get_request_metrics(kwargs, response_obj, start_time, end_time, failure=True)
+            metrics_data_json = metrics_data.model_dump(mode="json")
+            verbose_logger.info(f"Metrics Data JSON: {metrics_data_json}")
             with DaprService() as dapr_service:
                 dapr_service.publish_to_topic(
-                    data=metrics_data.model_dump(mode="json"),
+                    data=metrics_data_json,
                     target_topic_name=app_settings.budmetrics_topic_name,
                     target_name=app_settings.budmetrics_app_name,
                     event_type="add_request_metrics",
                 )
         except Exception as e:
             # TODO: what metrics data to log here?
+            import traceback
             verbose_logger.info(f"Exception: {e}")
+            verbose_logger.info(f"Traceback: {traceback.format_exc()}")
 
 proxy_handler_instance = MyCustomHandler()
 

From c9475c437b20b32c67f7a20caeb0aa4c55df84e3 Mon Sep 17 00:00:00 2001
From: budbot <manikesh.ponnam@bud.studio>
Date: Wed, 8 Jan 2025 17:30:14 +0000
Subject: [PATCH 13/14] fix: model name maintained to user defined name instead
 of actual model name set for endpoint deployment

---
 litellm/router.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/litellm/router.py b/litellm/router.py
index 17af0e6193cc..ca49b0ddaca7 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3003,7 +3003,11 @@ async def make_call(self, original_function: Any, *args, **kwargs):
             response = await response
         ## PROCESS RESPONSE HEADERS
         await self.set_response_headers(response=response, model_group=model_group)
-
+        verbose_router_logger.info(f"TYPE OF MAKE CALL RESPONSE : {type(response)}")
+        if hasattr(response, "model"):
+            response.model = model_group
+        if isinstance(response, dict):
+            response["model"] = model_group
         return response
 
     def _handle_mock_testing_rate_limit_error(

From 57ebfe68bdc9c887a292f872507e4187ab0f85db Mon Sep 17 00:00:00 2001
From: bud1906 <rohit.polasani@bud.studio>
Date: Fri, 10 Jan 2025 16:17:37 +0000
Subject: [PATCH 14/14] fix: authorization header missing error propogated

---
 litellm/proxy/budserve_middleware.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py
index 0d2ba969f3fd..57ac2a917b5c 100644
--- a/litellm/proxy/budserve_middleware.py
+++ b/litellm/proxy/budserve_middleware.py
@@ -21,6 +21,13 @@ class BudServeMiddleware(BaseHTTPMiddleware):
 
     async def get_api_key(self, request):
         authorization_header = request.headers.get("Authorization")
+        if not authorization_header:
+            raise ProxyException(
+                message="Authorization header is missing",
+                type="unauthorized",
+                param="Authorization",
+                code=401
+            )
         api_key = authorization_header.split(" ")[1]
         return api_key