From c0fac9141cc6347ba62184489422ac98e165c65f Mon Sep 17 00:00:00 2001 From: "K.Y" Date: Sun, 21 Jul 2024 23:23:58 +0800 Subject: [PATCH] Support configuration through YAML files --- .env | 17 +- .gitignore | 1 + Examples/chat_completion.py | 27 +- deploy.md | 34 +- deploy_en.md | 6 - openai-forward-config.example.yaml | 92 ++++++ openai-forward-config.yaml | 92 ++++++ openai_forward/__init__.py | 3 +- openai_forward/__main__.py | 22 +- openai_forward/app.py | 35 +- openai_forward/cache/__init__.py | 5 +- openai_forward/cache/chat/chat_completions.py | 9 +- openai_forward/cache/chat/response.py | 3 +- openai_forward/cache/database.py | 2 +- openai_forward/cache/embedding/response.py | 2 +- openai_forward/config/interface.py | 96 +++--- openai_forward/config/settings.py | 301 ++++++++++++++++++ openai_forward/content/openai.py | 8 +- openai_forward/forward/__init__.py | 3 +- openai_forward/forward/core.py | 56 +++- openai_forward/settings.py | 210 ------------ openai_forward/webui/run.py | 72 +++-- pyproject.toml | 51 +-- tests/test_forwarding.py | 5 +- tests/test_settings.py | 2 +- 25 files changed, 758 insertions(+), 396 deletions(-) create mode 100644 openai-forward-config.example.yaml create mode 100644 openai-forward-config.yaml create mode 100644 openai_forward/config/settings.py delete mode 100644 openai_forward/settings.py diff --git a/.env b/.env index 3b01fdf..61058ca 100644 --- a/.env +++ b/.env @@ -21,10 +21,17 @@ DEFAULT_REQUEST_CACHING_VALUE=false BENCHMARK_MODE=true FORWARD_CONFIG=[{"base_url":"https://api.openai.com","route":"/","type":"openai"}] - -#LEVEL_MODELS={"1": ["gpt-4"], "2": ["gpt-3.5-turbo"]} -#OPENAI_API_KEY_CONFIG={"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]} -#FORWARD_KEY_CONFIG={"fk-0": 0, "fk-1": 1, "fk-2": 2, "default": 1} +#FORWARD_CONFIG=[{"base_url":"https://api.deepseek.com","route":"/","type":"openai"}] +#FORWARD_CONFIG=[{"base_url":"http://localhost:3000","route":"/","type":"general"}] +#CUSTOM_MODEL_CONFIG='{ +#"backend":"ollama", +#"model_map": {"gpt-3.5-turbo":"qwen2:7b"}, +#"api_base": "http://localhost:11434" +#}' + +#LEVEL_MODELS='{"1":["gpt-4"],"2":["gpt-3.5-turbo"]}' +#OPENAI_API_KEY_CONFIG='{"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}' +#FORWARD_KEY_CONFIG='{"0": ["fk-0"], "1":["fk-1", "fk-11"], "2": ["fk-2"]}' # `REQ_RATE_LIMIT`: i.e., Request rate limit for specified routes, user specific # format: {route: ratelimit-string} @@ -46,7 +53,7 @@ RATE_LIMIT_STRATEGY=moving-window # Rate limit for returned tokens TOKEN_RATE_LIMIT='{ -"/v1/chat/completions":[{"level":0,"limit":"60/second"}], +"/v1/chat/completions":[{"level":0,"limit":"100/second"}], "/v1/completions":[{"level":0,"limit":"60/second"}], "/benchmark/v1/chat/completions":[{"level":0,"limit":"20/second"}] }' diff --git a/.gitignore b/.gitignore index 62f1a65..b4da486 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ run.sh ssl/ chat.yaml chat_*.yaml +openai-forward-config.yaml.* config.toml config_parser.py diff --git a/Examples/chat_completion.py b/Examples/chat_completion.py index 1f567da..d620414 100644 --- a/Examples/chat_completion.py +++ b/Examples/chat_completion.py @@ -18,14 +18,25 @@ max_tokens = None -# user_content = """ -# 用c实现目前已知最快平方根算法 -# """ -# user_content = '最初有1000千克的蘑菇,其中99%的成分是水。经过几天的晴天晾晒后,蘑菇中的水分含量现在是98%,蘑菇中减少了多少水分?' -# user_content = "Write down the most romantic sentence you can think of." -user_content = "光散射中的Mie理论的理论公式是怎样的?请用latex语法表示它公式使用$$符号包裹。" - -model = "gpt-3.5-turbo" + +queries = { + 0: "用c实现目前已知最快平方根导数算法", + 1: "既然快递要 3 天才到,为什么不把所有的快递都提前 3 天发?", + 2: "只切一刀,如何把四个橘子分给四个小朋友?", + 3: "最初有1000千克的蘑菇,其中99%的成分是水。经过几天的晴天晾晒后,蘑菇中的水分含量现在是98%,蘑菇中减少了多少水分?", + 4: "哥哥弟弟百米赛跑,第一次从同一起点起跑,哥哥到达终点时领先弟弟一米获胜,第二次哥哥从起点后退一米处开始起跑,问结果如何?", + 5: "光散射中的Mie理论的理论公式是怎样的?请用latex语法表示它公式使用$$符号包裹。", + 6: "Write down the most romantic sentence you can think of.", + 7: "为什么我爸妈结婚的时候没邀请我参加婚礼?", + 8: "一个人自杀了,这个世界上是多了一个自杀的人,还是少了一个自杀的人", +} + +user_content = queries[8] + +# model = "gpt-3.5-turbo" +model = "gpt-4o-mini" +# model = "deepseek-chat" +# model="gpt-4o" # model="gpt-4" mt = MeasureTime().start() diff --git a/deploy.md b/deploy.md index 62f04cc..e7a2922 100644 --- a/deploy.md +++ b/deploy.md @@ -6,14 +6,10 @@
-一键部署至render -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) - [pip部署](#pip部署) | -[docker部署](#docker部署) | -[render一键部署](#render-一键部署) +[docker部署](#docker部署)
@@ -22,14 +18,14 @@ 1. [pip 安装部署](deploy.md#pip部署) 2. [Docker部署](deploy.md#docker部署) -**一键免费云平台部署** +**~~一键免费云平台部署~~** -1. [Render一键部署](deploy.md#render-一键部署) -2. 更多部署: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md +1. ~~[Render一键部署](deploy.md#render-一键部署)~~ +2. ~~更多部署: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md~~ **其它反代** [CloudFlare AI Gateway](https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/) -[ChatGPT](https://github.com/pandora-next/deploy) +~~[ChatGPT](https://github.com/pandora-next/deploy)~~ --- @@ -113,23 +109,3 @@ aifd run - -## Render 一键部署 -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) - -Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问! - -1. 点击一键部署按钮 - 也可先fork本仓库 -->到Render的Dashboard上 New Web Services --> Connect 到刚刚fork到仓库 后面步骤均默认即可 -2. 填写环境变量,`openai-forward`所有配置都可以通过环境变量设置,可以根据自己需要填写。 - -然后等待部署完成即可。 -Render的免费计划: 每月750小时免费实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长. - -注:默认render在15分钟内没有服务请求时会自动休眠(好处是休眠后不会占用750h的免费实例时间),休眠后下一次请求会被阻塞 ~15s。 -如果希望服务15分钟不自动休眠,可以使用定时脚本(如每14分钟)对render服务进行保活。保活脚本参考`scripts/keep_render_alive.py`. -如果希望零停机部署可以在render设置中配置 `Health Check Path`为`/healthz` - -> https://render.openai-forward.com -> https://openai-forward.onrender.com - diff --git a/deploy_en.md b/deploy_en.md index d6bfcb2..2ce4b0b 100644 --- a/deploy_en.md +++ b/deploy_en.md @@ -76,12 +76,6 @@ proxy_buffering off; docker run -d -p 8000:8000 beidongjiedeguang/openai-forward:latest ``` -If the `.env` environment variable is specified: - -```bash -docker run --env-file .env -d -p 8000:8000 beidongjiedeguang/openai-forward:latest -``` - This will map the host's 8000 port. Access the service via `http://{ip}:8000`. The log path inside the container is `/home/openai-forward/Log/`. It can be mapped when starting up. diff --git a/openai-forward-config.example.yaml b/openai-forward-config.example.yaml new file mode 100644 index 0000000..57e3d6f --- /dev/null +++ b/openai-forward-config.example.yaml @@ -0,0 +1,92 @@ +log: + general: true + openai: true + +cache: + general: true + openai: true + routes: + - "/v1/chat/completions" + - "/v1/embeddings" + # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB) + backend: MEMORY + root_path_or_url: "./FLAXKV_DB" + default_request_caching_value: false + +chat_completion_route: "/v1/chat/completions" +# custom_general_route: "/v1/models/gemini-pro" + +benchmark_mode: true + +forward: + - base_url: "https://api.openai.com" + route: "/" + type: "openai" + + - base_url: "https://generativelanguage.googleapis.com" + route: "/gemini" + type: "general" + +# custom_model_config: +# backend: "ollama" +# model_map: +# gpt-3.5-turbo: "qwen2:7b" +# api_base: "http://localhost:11434" + +api_key: + level: + 1: ["gpt-4"] + 2: ["gpt-3.5-turbo"] + + openai_key: + "sk-xxx1": [0] + "sk-xxx2": [1] + "sk-xxx3": [1, 2] + + forward_key: + 0: ["fk-0"] + 1: ["fk-1", "fk-11"] + 2: ["fk-2"] + +rate_limit: + global_rate_limit: "200/minute" + strategy: "moving-window" + iter_chunk: "one-by-one" + req_rate_limit: + - route: "/v1/chat/completions" + value: + - level: 0 + limit: "100/2minutes" + + - route: "/v1/completions" + value: + - level: 0 + limit: "60/minute;600/hour" + req_rate_limit_backend: "redis://localhost:6379" + + token_rate_limit: + - route: "/v1/chat/completions" + value: + - level: 0 + limit: "100/second" + - route: "/v1/completions" + value: + - level: 0 + limit: "60/second" + - route: "/benchmark/v1/chat/completions" + value: + - level: 0 + limit: "20/second" + +timeout: 6 + +ip_blacklist: +ip_whitelist: + +webui_restart_port: 15555 +webui_log_port: 15556 + +proxy: +default_stream_response: true + +tz: Asia/Shanghai diff --git a/openai-forward-config.yaml b/openai-forward-config.yaml new file mode 100644 index 0000000..860b9cd --- /dev/null +++ b/openai-forward-config.yaml @@ -0,0 +1,92 @@ +log: + general: true + openai: true + +cache: + general: true + openai: true + routes: + - "/v1/chat/completions" + - "/v1/embeddings" + # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB) + backend: MEMORY + root_path_or_url: "./FLAXKV_DB" + default_request_caching_value: false + +chat_completion_route: "/v1/chat/completions" +# custom_general_route: "/v1/models/gemini-pro" + +benchmark_mode: true + +forward: + - base_url: "https://api.openai.com" + route: "/" + type: "openai" + + - base_url: "https://generativelanguage.googleapis.com" + route: "/gemini" + type: "general" + +# custom_model_config: +# backend: "ollama" +# model_map: +# gpt-3.5-turbo: "qwen2:7b" +# api_base: "http://localhost:11434" + +api_key: + level: + 1: ["gpt-4"] + 2: ["gpt-3.5-turbo"] + + openai_key: + "sk-xxx1": [0] + "sk-xxx2": [1] + "sk-xxx3": [1, 2] + + forward_key: + 0: ["fk-0"] + 1: ["fk-1", "fk-11"] + 2: ["fk-2"] + +rate_limit: + global_rate_limit: "200/minute" + strategy: "moving-window" + iter_chunk: "one-by-one" + req_rate_limit: + - route: "/v1/chat/completions" + value: + - level: 0 + limit: "100/2minutes" + + - route: "/v1/completions" + value: + - level: 0 + limit: "60/minute;600/hour" + req_rate_limit_backend: "redis://localhost:6379" + + token_rate_limit: + - route: "/v1/chat/completions" + value: + - level: 0 + limit: "60/second" + - route: "/v1/completions" + value: + - level: 0 + limit: "60/second" + - route: "/benchmark/v1/chat/completions" + value: + - level: 0 + limit: "20/second" + +timeout: 6 + +ip_blacklist: +ip_whitelist: + +webui_restart_port: 15555 +webui_log_port: 15556 + +proxy: +default_stream_response: true + +tz: Asia/Shanghai diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py index 793a432..06d75ab 100644 --- a/openai_forward/__init__.py +++ b/openai_forward/__init__.py @@ -1,6 +1,5 @@ -__version__ = "0.8.1" +__version__ = "0.8.2-alpha" from dotenv import load_dotenv -from yaml import load load_dotenv('.env', override=False) diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py index c3fe16d..558490a 100644 --- a/openai_forward/__main__.py +++ b/openai_forward/__main__.py @@ -1,12 +1,20 @@ import atexit +import datetime import os import pickle import platform import signal import subprocess +from pathlib import Path import fire import uvicorn +import yaml + + +def save_yaml(path: Path, data: dict): + with open(path, 'w') as f: + yaml.dump(data, f) class Cli: @@ -106,10 +114,16 @@ def mq_worker(log_socket: zmq.Socket): while True: message = socket.recv() - env_dict: dict = pickle.loads(message) + config_dict: dict = pickle.loads(message) + config_path = Path("openai-forward-config.yaml") + # backup + time_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + backup_path = Path(f"openai-forward-config.yaml.{time_str}.bak") + if config_path.exists(): + # rename openai-forward-config.yaml to openai-forward-config.yaml.bak + config_path.rename(backup_path) - for key, value in env_dict.items(): - os.environ[key] = value + save_yaml(config_path, config_dict) self._restart_uvicorn( port=port, @@ -208,8 +222,8 @@ def convert(log_folder: str = None, target_path: str = None): Returns: None """ + from openai_forward.config.settings import OPENAI_ROUTE_PREFIX from openai_forward.helper import convert_folder_to_jsonl, route_prefix_to_str - from openai_forward.settings import OPENAI_ROUTE_PREFIX print(60 * '-') if log_folder is None: diff --git a/openai_forward/app.py b/openai_forward/app.py index 6c62c56..98bbf1b 100644 --- a/openai_forward/app.py +++ b/openai_forward/app.py @@ -1,12 +1,12 @@ +from contextlib import asynccontextmanager + from fastapi import FastAPI, Request, status from fastapi.middleware.cors import CORSMiddleware from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi.errors import RateLimitExceeded -from . import __version__, custom_slowapi -from .forward import ForwardManager -from .helper import normalize_route as normalize_route_path -from .settings import ( +from . import __version__ +from .config.settings import ( BENCHMARK_MODE, RATE_LIMIT_BACKEND, RATE_LIMIT_STRATEGY, @@ -14,6 +14,8 @@ get_limiter_key, show_startup, ) +from .forward import ForwardManager +from .helper import normalize_route as normalize_route_path forward_manager = ForwardManager() @@ -22,7 +24,18 @@ strategy=RATE_LIMIT_STRATEGY, storage_uri=RATE_LIMIT_BACKEND, ) -app = FastAPI(title="openai-forward", version=__version__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup logic + await forward_manager.start_up() + yield + # Shutdown logic + await forward_manager.shutdown() + + +app = FastAPI(title="openai-forward", version=__version__, lifespan=lifespan) app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) @@ -63,17 +76,6 @@ def healthz(request: Request): methods=["POST"], ) - -@app.on_event("startup") -async def startup(): - await forward_manager.start_up() - - -@app.on_event("shutdown") -async def shutdown(): - await forward_manager.shutdown() - - add_route = lambda obj: app.add_route( obj.ROUTE_PREFIX + "{api_path:path}", route=limiter.limit(dynamic_request_rate_limit)(obj.reverse_proxy), @@ -83,5 +85,4 @@ async def shutdown(): [add_route(obj) for obj in forward_manager.generic_objs] [add_route(obj) for obj in forward_manager.root_objs] - show_startup() diff --git a/openai_forward/cache/__init__.py b/openai_forward/cache/__init__.py index 5ae8d66..90c8dd0 100644 --- a/openai_forward/cache/__init__.py +++ b/openai_forward/cache/__init__.py @@ -1,12 +1,13 @@ -from flaxkv.pack import decode, encode +from flaxkv.pack import encode from loguru import logger -from ..settings import ( +from openai_forward.config.settings import ( CACHE_OPENAI, CACHE_ROUTE_SET, CHAT_COMPLETION_ROUTE, EMBEDDING_ROUTE, ) + from .chat.response import gen_response, get_cached_chat_response from .database import db_dict from .embedding.response import get_cached_embedding_response diff --git a/openai_forward/cache/chat/chat_completions.py b/openai_forward/cache/chat/chat_completions.py index e6c7511..9e14ec8 100644 --- a/openai_forward/cache/chat/chat_completions.py +++ b/openai_forward/cache/chat/chat_completions.py @@ -9,13 +9,10 @@ from fastapi import Request from fastapi.responses import Response, StreamingResponse -from ...decorators import ( - async_random_sleep, - async_token_rate_limit_auth_level, - random_sleep, -) +from openai_forward.config.settings import FWD_KEY, token_interval_conf + +from ...decorators import async_token_rate_limit_auth_level, random_sleep from ...helper import get_unique_id -from ...settings import FWD_KEY, token_interval_conf from .tokenizer import TIKTOKEN_VALID, count_tokens, encode_as_pieces diff --git a/openai_forward/cache/chat/response.py b/openai_forward/cache/chat/response.py index 7b92c62..f055b14 100644 --- a/openai_forward/cache/chat/response.py +++ b/openai_forward/cache/chat/response.py @@ -7,7 +7,8 @@ from flaxkv.pack import encode from loguru import logger -from ...settings import CACHE_OPENAI, FWD_KEY +from openai_forward.config.settings import CACHE_OPENAI, FWD_KEY + from ..database import db_dict from .chat_completions import ( async_token_rate_limit_auth_level, diff --git a/openai_forward/cache/database.py b/openai_forward/cache/database.py index 346c94b..abe06ef 100644 --- a/openai_forward/cache/database.py +++ b/openai_forward/cache/database.py @@ -1,6 +1,6 @@ from flaxkv import FlaxKV -from ..settings import CACHE_BACKEND, CACHE_ROOT_PATH_OR_URL, LOG_CACHE_DB_INFO +from ..config.settings import CACHE_BACKEND, CACHE_ROOT_PATH_OR_URL, LOG_CACHE_DB_INFO if CACHE_BACKEND.upper() == "MEMORY": db_dict = {} diff --git a/openai_forward/cache/embedding/response.py b/openai_forward/cache/embedding/response.py index 5df4a17..dceedfb 100644 --- a/openai_forward/cache/embedding/response.py +++ b/openai_forward/cache/embedding/response.py @@ -4,7 +4,7 @@ from flaxkv.pack import encode from loguru import logger -from ...settings import CACHE_OPENAI +from ...config.settings import CACHE_OPENAI from ..database import db_dict diff --git a/openai_forward/config/interface.py b/openai_forward/config/interface.py index bd13d23..998c134 100644 --- a/openai_forward/config/interface.py +++ b/openai_forward/config/interface.py @@ -1,16 +1,21 @@ import json -import os -from typing import Dict, List, Literal, Optional, Tuple, Union +from typing import Literal -from attrs import asdict, define, field, filters +from attrs import asdict, define, field -from ..settings import * +from openai_forward.config.settings import * class Base: def to_dict(self, drop_none=True): if drop_none: - return asdict(self, filter=filters.exclude(type(None))) + + def custom_filter(attribute, value): + if drop_none: + return value is not None + return True + + return asdict(self, filter=custom_filter) return asdict(self) def to_dict_str(self): @@ -48,23 +53,22 @@ class CacheConfig(Base): backend: str = 'LevelDB' root_path_or_url: str = './FLAXKV_DB' default_request_caching_value: bool = True - cache_openai: bool = False - cache_general: bool = False - cache_routes: List = ['/v1/chat/completions'] + openai: bool = False + general: bool = False + routes: List = ['/v1/chat/completions'] def convert_to_env(self, set_env=False): - env_dict = {} - env_dict['CACHE_OPENAI'] = str(self.cache_openai) - env_dict['CACHE_GENERAL'] = str(self.cache_general) + env_dict['CACHE_OPENAI'] = str(self.openai) + env_dict['CACHE_GENERAL'] = str(self.general) env_dict['CACHE_BACKEND'] = self.backend env_dict['CACHE_ROOT_PATH_OR_URL'] = self.root_path_or_url env_dict['DEFAULT_REQUEST_CACHING_VALUE'] = str( self.default_request_caching_value ) - env_dict['CACHE_ROUTES'] = json.dumps(self.cache_routes) + env_dict['CACHE_ROUTES'] = json.dumps(self.routes) if set_env: os.environ.update(env_dict) @@ -84,22 +88,22 @@ class RateLimit(Base): token_rate_limit: List[RateLimitType] = [ RateLimitType( route="/v1/chat/completions", - value=[{"level": '0', "limit": "60/second"}], + value=[{"level": 0, "limit": "60/second"}], ), RateLimitType( - route="/v1/completions", value=[{"level": '0', "limit": "60/second"}] + route="/v1/completions", value=[{"level": 0, "limit": "60/second"}] ), ] req_rate_limit: List[RateLimitType] = [ RateLimitType( route="/v1/chat/completions", - value=[{"level": '0', "limit": "100/2minutes"}], + value=[{"level": 0, "limit": "100/2minutes"}], ), RateLimitType( - route="/v1/completions", value=[{"level": '0', "limit": "60/minute"}] + route="/v1/completions", value=[{"level": 0, "limit": "60/minute"}] ), RateLimitType( - route="/v1/embeddings", value=[{"level": '0', "limit": "100/2minutes"}] + route="/v1/embeddings", value=[{"level": 0, "limit": "100/2minutes"}] ), ] iter_chunk: Literal['one-by-one', 'efficiency'] = 'one-by-one' @@ -125,8 +129,8 @@ def convert_to_env(self, set_env=False): @define(slots=True) class ApiKey(Base): - openai_key: Dict = {"": "0"} - forward_key: Dict = {"": 0} + openai_key: Dict = {"sk-xx1": [0]} + forward_key: Dict = {0: ["fk-1"]} level: Dict = {1: ["gpt-3.5-turbo"]} def convert_to_env(self, set_env=False): @@ -146,13 +150,13 @@ def convert_to_env(self, set_env=False): @define(slots=True) class Log(Base): - LOG_GENERAL: bool = True - LOG_OPENAI: bool = True + general: bool = True + openai: bool = True def convert_to_env(self, set_env=False): env_dict = {} - env_dict['LOG_GENERAL'] = str(self.LOG_GENERAL) - env_dict['LOG_OPENAI'] = str(self.LOG_OPENAI) + env_dict['LOG_GENERAL'] = str(self.general) + env_dict['LOG_OPENAI'] = str(self.openai) if set_env: os.environ.update(env_dict) return env_dict @@ -160,7 +164,15 @@ def convert_to_env(self, set_env=False): @define(slots=True) class Config(Base): - forward: Forward = Forward() + # forward: Forward = Forward() + forward: List[ForwardItem] = [ + ForwardItem(base_url="https://api.openai.com", route="/", type="openai"), + ForwardItem( + base_url="https://generativelanguage.googleapis.com", + route="/gemini", + type="general", + ), + ] api_key: ApiKey = ApiKey() @@ -179,8 +191,9 @@ class Config(Base): default_stream_response: bool = True def convert_to_env(self, set_env=False): - env_dict = {} - env_dict.update(self.forward.convert_to_env()) + # env_dict = {} + # env_dict.update(self.forward.convert_to_env()) + env_dict = {'FORWARD_CONFIG': json.dumps([i.to_dict() for i in self.forward])} env_dict.update(self.api_key.convert_to_env()) env_dict.update(self.cache.convert_to_env()) env_dict.update(self.rate_limit.convert_to_env()) @@ -206,8 +219,8 @@ def come_from_env(self): self.timezone = os.environ.get('TZ', 'Asia/Shanghai') self.benchmark_mode = BENCHMARK_MODE self.proxy = PROXY or "" - self.log.LOG_OPENAI = LOG_OPENAI - self.log.LOG_GENERAL = LOG_GENERAL + self.log.openai = LOG_OPENAI + self.log.general = LOG_GENERAL self.rate_limit.strategy = RATE_LIMIT_STRATEGY self.rate_limit.global_rate_limit = GLOBAL_RATE_LIMIT @@ -223,15 +236,24 @@ def come_from_env(self): self.cache.backend = CACHE_BACKEND self.cache.root_path_or_url = CACHE_ROOT_PATH_OR_URL self.cache.default_request_caching_value = DEFAULT_REQUEST_CACHING_VALUE - self.cache.cache_openai = CACHE_OPENAI or self.cache.cache_openai - self.cache.cache_general = CACHE_GENERAL or self.cache.cache_general - self.cache.cache_routes = list(CACHE_ROUTE_SET) or self.cache.cache_routes + self.cache.openai = CACHE_OPENAI or self.cache.openai + self.cache.general = CACHE_GENERAL or self.cache.general + self.cache.routes = list(CACHE_ROUTE_SET) or self.cache.routes self.api_key.level = LEVEL_MODELS or self.api_key.level - self.api_key.openai_key = { - key: ','.join([str(i) for i in value]) - for key, value in OPENAI_API_KEY.items() - } or self.api_key.openai_key - self.api_key.forward_key = FWD_KEY or self.api_key.forward_key - self.forward.forward = [ForwardItem(**i) for i in FORWARD_CONFIG] + self.api_key.openai_key = OPENAI_API_KEY or self.api_key.openai_key + self.api_key.forward_key = LEVEL_TO_FWD_KEY or self.api_key.forward_key + self.forward = [ForwardItem(**i) for i in FORWARD_CONFIG] return self + + +if __name__ == "__main__": + import yaml + + def save_dict_to_yaml(data, file_path): + with open(file_path, 'w') as file: + yaml.dump(data, file, default_flow_style=False) + + config = Config() + print(config.to_dict()) + save_dict_to_yaml(config.to_dict(), 'config.yaml') diff --git a/openai_forward/config/settings.py b/openai_forward/config/settings.py new file mode 100644 index 0000000..f32f120 --- /dev/null +++ b/openai_forward/config/settings.py @@ -0,0 +1,301 @@ +import itertools +import os +from pathlib import Path +from typing import Any, Dict, List, Set + +import limits +import yaml +from fastapi import Request + +from openai_forward.console import print_rate_limit_info, print_startup_info +from openai_forward.content.config import setting_log +from openai_forward.helper import format_route_prefix + +config_file_path = Path("openai-forward-config.yaml") +if config_file_path.exists(): + with open(config_file_path) as file: + config = yaml.safe_load(file) +else: + config = {} + +if not config: + # 从环境变量中获取配置将被弃用 + from openai_forward.helper import ( + env2dict, + env2list, + format_route_prefix, + get_client_ip, + ) + + TIMEOUT = float(os.environ.get("TIMEOUT", "").strip() or "10") + DEFAULT_STREAM_RESPONSE = ( + os.environ.get("DEFAULT_STREAM_RESPONSE", "True").strip().lower() == "true" + ) + + ITER_CHUNK_TYPE = ( + os.environ.get("ITER_CHUNK_TYPE", "").strip() or "efficiency" + ) # Options: efficiency, one-by-one + + CHAT_COMPLETION_ROUTE = ( + os.environ.get("CHAT_COMPLETION_ROUTE", "/v1/chat/completions").strip().lower() + ) + COMPLETION_ROUTE = ( + os.environ.get("COMPLETION_ROUTE", "/v1/completions").strip().lower() + ) + EMBEDDING_ROUTE = ( + os.environ.get("EMBEDDING_ROUTE", "/v1/embeddings").strip().lower() + ) + CUSTOM_GENERAL_ROUTE = os.environ.get("CUSTOM_GENERAL_ROUTE", "").strip().lower() + + CACHE_ROUTE_SET = set(env2dict("CACHE_ROUTES", [])) + + FORWARD_CONFIG = env2dict( + "FORWARD_CONFIG", + [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}], + ) + + CUSTOM_MODEL_CONFIG = env2dict("CUSTOM_MODEL_CONFIG", {}) + + token_rate_limit_conf = env2dict("TOKEN_RATE_LIMIT") + PRINT_CHAT = os.environ.get("PRINT_CHAT", "False").strip().lower() == "true" + + LOG_OPENAI = os.environ.get("LOG_OPENAI", "False").strip().lower() == "true" + LOG_GENERAL = os.environ.get("LOG_GENERAL", "False").strip().lower() == "true" + + CACHE_OPENAI = os.environ.get("CACHE_OPENAI", "False").strip().lower() == "true" + CACHE_GENERAL = os.environ.get("CACHE_GENERAL", "False").strip().lower() == "true" + + BENCHMARK_MODE = os.environ.get("BENCHMARK_MODE", "false").strip().lower() == "true" + + LOG_CACHE_DB_INFO = ( + os.environ.get("LOG_CACHE_DB_INFO", "false").strip().lower() == "true" + ) + CACHE_BACKEND = os.environ.get("CACHE_BACKEND", "MEMORY").strip() + CACHE_ROOT_PATH_OR_URL = os.environ.get("CACHE_ROOT_PATH_OR_URL", "..").strip() + + PROXY = os.environ.get("PROXY", "").strip() or None + GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or "inf" + RATE_LIMIT_BACKEND = os.environ.get("REQ_RATE_LIMIT_BACKEND", "").strip() or None + RATE_LIMIT_STRATEGY = ( + os.environ.get("RATE_LIMIT_STRATEGY", "fixed-window").strip() or "fixed-window" + ) + req_rate_limit_dict = env2dict('REQ_RATE_LIMIT') + + DEFAULT_REQUEST_CACHING_VALUE = ( + os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower() + == "true" + ) + + OPENAI_API_KEY = env2dict("OPENAI_API_KEY_CONFIG") + + LEVEL_TO_FWD_KEY = env2dict("FORWARD_KEY_CONFIG") + LEVEL_MODELS = {int(key): value for key, value in env2dict("LEVEL_MODELS").items()} + + ENV_VAR_SEP = "," + + IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP) + IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP) +else: + TIMEOUT = float(config.get('timeout', 10)) + DEFAULT_STREAM_RESPONSE = config.get('default_stream_response', True) + + CHAT_COMPLETION_ROUTE = config.get( + 'chat_completion_route', '/v1/chat/completions' + ).lower() + COMPLETION_ROUTE = config.get('completion_route', '/v1/completions').lower() + EMBEDDING_ROUTE = config.get('embedding_route', '/v1/embeddings').lower() + CUSTOM_GENERAL_ROUTE = config.get('custom_general_route', '').lower() + + CACHE_ROUTE_SET: Set[str] = set(config.get('cache', {}).get('routes', [])) + + openai_additional_start_info = {'cache_routes': CACHE_ROUTE_SET} + general_additional_start_info = {'cache_routes': CACHE_ROUTE_SET} + + FORWARD_CONFIG = config.get( + 'forward', + [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}], + ) + + CUSTOM_MODEL_CONFIG = config.get('custom_model_config', {}) + + PRINT_CHAT = config.get('print_chat', False) + + LOG_OPENAI = config.get('log', {}).get('openai', False) + LOG_GENERAL = config.get('log', {}).get('general', False) + + CACHE_OPENAI = config.get('cache', {}).get('openai', False) + CACHE_GENERAL = config.get('cache', {}).get('general', False) + DEFAULT_REQUEST_CACHING_VALUE = config.get('cache', {}).get( + 'default_request_caching_value', False + ) + + BENCHMARK_MODE = config.get('benchmark_mode', False) + + LOG_CACHE_DB_INFO = config.get('log_cache_db_info', False) + CACHE_BACKEND = config.get('cache', {}).get('backend', 'MEMORY') + CACHE_ROOT_PATH_OR_URL = config.get('cache', {}).get('root_path_or_url', '.') + + PROXY = config.get('proxy') + + IP_WHITELIST = config.get("ip_whitelist", []) + IP_BLACKLIST = config.get("ip_blacklist", []) + + _api_key = config.get("api_key", {}) + OPENAI_API_KEY = _api_key.get("openai_key", {}) + LEVEL_TO_FWD_KEY = _api_key.get("forward_key", {}) + LEVEL_MODELS = _api_key.get("level", {}) + + _rate_limit = config.get("rate_limit", {}) + _token_rate_limit_list = _rate_limit.get('token_rate_limit', []) + token_rate_limit_conf = { + item['route']: item['value'] for item in _token_rate_limit_list + } + GLOBAL_RATE_LIMIT = _rate_limit.get('global_rate_limit', 'inf') + RATE_LIMIT_STRATEGY = _rate_limit.get('strategy', 'fixed-window') + _req_rate_limit_list = config.get('req_rate_limit', []) + RATE_LIMIT_BACKEND = config.get('req_rate_limit_backend', None) + req_rate_limit_dict = { + item['route']: item['value'] for item in _req_rate_limit_list + } + + ITER_CHUNK_TYPE = _rate_limit.get('iter_chunk', 'efficiency') + +openai_additional_start_info = {} +general_additional_start_info = {} + +openai_additional_start_info['cache_routes'] = CACHE_ROUTE_SET +general_additional_start_info['cache_routes'] = CACHE_ROUTE_SET + +OPENAI_BASE_URL = [ + i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'openai' +] +OPENAI_ROUTE_PREFIX = [ + format_route_prefix(i['route']) + for i in FORWARD_CONFIG + if i and i.get('type') == 'openai' +] + +GENERAL_BASE_URL = [ + i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'general' +] +GENERAL_ROUTE_PREFIX = [ + format_route_prefix(i['route']) + for i in FORWARD_CONFIG + if i and i.get('type') == 'general' +] + +for openai_route, general_route in zip(OPENAI_ROUTE_PREFIX, GENERAL_ROUTE_PREFIX): + assert openai_route not in GENERAL_ROUTE_PREFIX + assert general_route not in OPENAI_ROUTE_PREFIX + +if BENCHMARK_MODE: + openai_additional_start_info["benchmark_mode"] = BENCHMARK_MODE + +openai_additional_start_info["LOG_OPENAI"] = LOG_OPENAI +general_additional_start_info["LOG_GENERAL"] = LOG_GENERAL + +if LOG_OPENAI: + setting_log(openai_route_prefix=OPENAI_ROUTE_PREFIX, print_chat=PRINT_CHAT) + +if PRINT_CHAT: + openai_additional_start_info["print_chat"] = True + +DEFAULT_REQUEST_CACHING_VALUE = (DEFAULT_REQUEST_CACHING_VALUE and CACHE_OPENAI) or ( + DEFAULT_REQUEST_CACHING_VALUE and CACHE_GENERAL +) +if CACHE_OPENAI: + openai_additional_start_info["cache_backend"] = CACHE_BACKEND + if not CACHE_BACKEND.lower() == 'memory': + openai_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL + openai_additional_start_info[ + "default_request_caching_value" + ] = DEFAULT_REQUEST_CACHING_VALUE + +if CACHE_GENERAL: + general_additional_start_info["cache_backend"] = CACHE_BACKEND + if not CACHE_BACKEND.lower() == 'memory': + general_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL + general_additional_start_info[ + "default_request_caching_value" + ] = DEFAULT_REQUEST_CACHING_VALUE + +FWD_KEY = {} +for level, fk_list in LEVEL_TO_FWD_KEY.items(): + for _fk in fk_list: + FWD_KEY[_fk] = int(level) + +if PROXY: + openai_additional_start_info["proxy"] = PROXY + + +def get_limiter_key(request: Request): + limiter_prefix = f"{request.scope.get('root_path')}{request.scope.get('path')}" + fk_or_sk = request.headers.get("Authorization", "default") + key = f"{limiter_prefix},{fk_or_sk}" + return key + + +def dynamic_request_rate_limit(key: str): + limite_prefix, fk_or_sk = key.split(',') + key_level = FWD_KEY.get(fk_or_sk, 0) + for route in req_rate_limit_dict: + if key.startswith(route): + for level_dict in req_rate_limit_dict[route]: + if level_dict['level'] == key_level: + return level_dict['limit'] + + break + return GLOBAL_RATE_LIMIT + + +def cvt_token_rate_to_interval(token_rate_limit: str): + if token_rate_limit: + rate_limit_item = limits.parse(token_rate_limit) + token_interval = ( + rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds + ) / rate_limit_item.amount + else: + token_interval = 0 + return token_interval + + +token_interval_conf = {} +for route, rate_limit_list in token_rate_limit_conf.items(): + token_interval_conf.setdefault(route, {}) + for level_dict in rate_limit_list: + token_interval_conf[route][level_dict['level']] = cvt_token_rate_to_interval( + level_dict['limit'] + ) + +styles = itertools.cycle( + ["#7CD9FF", "#BDADFF", "#9EFFE3", "#f1b8e4", "#F5A88E", "#BBCA89"] +) + + +def show_startup(): + for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX): + print_startup_info( + base_url, + route_prefix, + OPENAI_API_KEY, + FWD_KEY, + style=next(styles), + **openai_additional_start_info, + ) + for base_url, route_prefix in zip(GENERAL_BASE_URL, GENERAL_ROUTE_PREFIX): + print_startup_info( + base_url, + route_prefix, + "", + "", + style=next(styles), + **general_additional_start_info, + ) + + print_rate_limit_info( + RATE_LIMIT_BACKEND, + RATE_LIMIT_STRATEGY, + GLOBAL_RATE_LIMIT, + req_rate_limit_dict, + token_rate_limit_conf, + ) diff --git a/openai_forward/content/openai.py b/openai_forward/content/openai.py index 4e05f40..58f80d5 100644 --- a/openai_forward/content/openai.py +++ b/openai_forward/content/openai.py @@ -9,8 +9,9 @@ from loguru import logger from orjson import JSONDecodeError +from openai_forward.config.settings import DEFAULT_REQUEST_CACHING_VALUE + from ..helper import get_client_ip, get_unique_id, route_prefix_to_str -from ..settings import DEFAULT_REQUEST_CACHING_VALUE from .helper import markdown_print, parse_sse_buffer, print @@ -173,7 +174,10 @@ def parse_payload(self, request: Request, raw_payload): if self.webui: self.q.put({"uid": uid, "payload": raw_payload}) - payload = orjson.loads(raw_payload) + if raw_payload: + payload = orjson.loads(raw_payload) + else: + payload = {} caching = payload.pop("caching", None) if caching is None: caching = DEFAULT_REQUEST_CACHING_VALUE diff --git a/openai_forward/forward/__init__.py b/openai_forward/forward/__init__.py index a8f1567..8bfbd0c 100644 --- a/openai_forward/forward/__init__.py +++ b/openai_forward/forward/__init__.py @@ -1,12 +1,13 @@ from typing import List -from ..settings import ( +from openai_forward.config.settings import ( GENERAL_BASE_URL, GENERAL_ROUTE_PREFIX, OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX, PROXY, ) + from .core import GenericForward, OpenaiForward diff --git a/openai_forward/forward/core.py b/openai_forward/forward/core.py index 5edc2c6..a1a1a61 100644 --- a/openai_forward/forward/core.py +++ b/openai_forward/forward/core.py @@ -4,10 +4,12 @@ import traceback from asyncio import Queue from itertools import cycle -from typing import Any, AsyncGenerator, Iterable +from typing import AsyncGenerator import aiohttp import anyio +import litellm +import orjson from aiohttp import TCPConnector from fastapi import HTTPException, Request, status from loguru import logger @@ -19,6 +21,7 @@ get_cached_generic_response, get_cached_response, ) +from ..config.settings import * from ..content.openai import ( ChatLogger, CompletionLogger, @@ -26,8 +29,7 @@ WhisperLogger, ) from ..decorators import async_retry, async_token_rate_limit_auth_level -from ..helper import InfiniteSet, get_client_ip, get_unique_id -from ..settings import * +from ..helper import InfiniteSet, get_client_ip # from beartype import beartype @@ -140,7 +142,7 @@ def validate_request_host(ip): logger.warning(f"IP {ip} is unauthorized") raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail=f"Forbidden Error", + detail="Forbidden Error", ) @staticmethod @@ -312,9 +314,9 @@ def prepare_client(self, request: Request, return_origin_header=False) -> dict: 'route_path': route_path, } - async def _handle_payload(self, request: Request, route_path: str, model_set): + def _handle_payload(self, method: str, payload, route_path: str, model_set): - if not request.method == "POST": + if method != "POST": return if route_path in ( @@ -323,7 +325,6 @@ async def _handle_payload(self, request: Request, route_path: str, model_set): EMBEDDING_ROUTE, CUSTOM_GENERAL_ROUTE, ): - payload = await request.json() model = payload.get("model", None) if model is not None and model not in model_set: @@ -345,14 +346,18 @@ async def reverse_proxy(self, request: Request): """ assert self.client data = await request.body() + if data: + payload = orjson.loads(data) + else: + payload = {} if LOG_GENERAL: - logger.debug(f"payload: {data}") + logger.debug(f"payload: {payload}") client_config = self.prepare_client(request, return_origin_header=True) route_path = client_config["route_path"] _, model_set = self.handle_authorization(client_config) - payload = await self._handle_payload(request, route_path, model_set) + self._handle_payload(request.method, payload, route_path, model_set) cached_response, cache_key = get_cached_generic_response( data, request, route_path @@ -361,6 +366,37 @@ async def reverse_proxy(self, request: Request): if cached_response: return cached_response + if CUSTOM_MODEL_CONFIG and route_path in (CHAT_COMPLETION_ROUTE,): + prev_model = payload['model'] + custom_model_map = CUSTOM_MODEL_CONFIG['model_map'] + current_model = custom_model_map.get(prev_model, prev_model) + if current_model in custom_model_map.values(): + if CUSTOM_MODEL_CONFIG['backend'] == "ollama": + api_base = CUSTOM_MODEL_CONFIG['api_base'] + prev_model = payload['model'] + payload['model'] = f"ollama_chat/{current_model}" + logger.debug(f"{prev_model} -> {payload['model']=}") + + r = await litellm.acompletion( + **payload, + api_base=api_base, + ) + + @async_token_rate_limit_auth_level(token_interval_conf, FWD_KEY) + async def stream(request: Request): + if payload.get("stream", True): + async for chunk in r: + yield b'data: ' + orjson.dumps( + chunk.to_dict() + ) + b'\n\n' + else: + yield orjson.dumps(r.to_dict()) + + return StreamingResponse( + stream(request), + status_code=200, + media_type="text/event-stream", + ) r = await self.send(client_config, data=data) return StreamingResponse( @@ -491,7 +527,7 @@ async def _handle_payload(self, request: Request, route_path: str, model_set): else: ... - except Exception as e: + except Exception: logger.warning( f"log chat error:\nhost:{request.client.host} method:{request.method}: {traceback.format_exc()}" ) diff --git a/openai_forward/settings.py b/openai_forward/settings.py deleted file mode 100644 index 648cc72..0000000 --- a/openai_forward/settings.py +++ /dev/null @@ -1,210 +0,0 @@ -import itertools -import os - -import limits -from fastapi import Request - -from .console import print_rate_limit_info, print_startup_info -from .content.config import setting_log -from .helper import env2dict, env2list, format_route_prefix, get_client_ip - -openai_additional_start_info = {} -general_additional_start_info = {} - -TIMEOUT = float(os.environ.get("TIMEOUT", "").strip() or "10") -DEFAULT_STREAM_RESPONSE = ( - os.environ.get("DEFAULT_STREAM_RESPONSE", "True").strip().lower() == "true" -) - -ITER_CHUNK_TYPE = ( - os.environ.get("ITER_CHUNK_TYPE", "").strip() or "efficiency" -) # Options: efficiency, one-by-one - -CHAT_COMPLETION_ROUTE = ( - os.environ.get("CHAT_COMPLETION_ROUTE", "/v1/chat/completions").strip().lower() -) -COMPLETION_ROUTE = os.environ.get("COMPLETION_ROUTE", "/v1/completions").strip().lower() -EMBEDDING_ROUTE = os.environ.get("EMBEDDING_ROUTE", "/v1/embeddings").strip().lower() -CUSTOM_GENERAL_ROUTE = os.environ.get("CUSTOM_GENERAL_ROUTE", "").strip().lower() - -CACHE_ROUTE_SET = set(env2dict("CACHE_ROUTES", [])) - -openai_additional_start_info['cache_routes'] = CACHE_ROUTE_SET -general_additional_start_info['cache_routes'] = CACHE_ROUTE_SET - -FORWARD_CONFIG = env2dict( - "FORWARD_CONFIG", - [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}], -) - -ENV_VAR_SEP = "," - -OPENAI_BASE_URL = [ - i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'openai' -] -OPENAI_ROUTE_PREFIX = [ - format_route_prefix(i['route']) - for i in FORWARD_CONFIG - if i and i.get('type') == 'openai' -] - -GENERAL_BASE_URL = [ - i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'general' -] -GENERAL_ROUTE_PREFIX = [ - format_route_prefix(i['route']) - for i in FORWARD_CONFIG - if i and i.get('type') == 'general' -] - -for openai_route, general_route in zip(OPENAI_ROUTE_PREFIX, GENERAL_ROUTE_PREFIX): - assert openai_route not in GENERAL_ROUTE_PREFIX - assert general_route not in OPENAI_ROUTE_PREFIX - -BENCHMARK_MODE = os.environ.get("BENCHMARK_MODE", "false").strip().lower() == "true" -if BENCHMARK_MODE: - openai_additional_start_info["benchmark_mode"] = BENCHMARK_MODE - -PRINT_CHAT = os.environ.get("PRINT_CHAT", "False").strip().lower() == "true" - -LOG_OPENAI = os.environ.get("LOG_OPENAI", "False").strip().lower() == "true" -LOG_GENERAL = os.environ.get("LOG_GENERAL", "False").strip().lower() == "true" - -CACHE_OPENAI = os.environ.get("CACHE_OPENAI", "False").strip().lower() == "true" -CACHE_GENERAL = os.environ.get("CACHE_GENERAL", "False").strip().lower() == "true" - -openai_additional_start_info["LOG_OPENAI"] = LOG_OPENAI -general_additional_start_info["LOG_GENERAL"] = LOG_GENERAL - - -if LOG_OPENAI: - setting_log(openai_route_prefix=OPENAI_ROUTE_PREFIX, print_chat=PRINT_CHAT) - - -if PRINT_CHAT: - openai_additional_start_info["print_chat"] = True - - -LOG_CACHE_DB_INFO = ( - os.environ.get("LOG_CACHE_DB_INFO", "false").strip().lower() == "true" -) -CACHE_BACKEND = os.environ.get("CACHE_BACKEND", "MEMORY").strip() -CACHE_ROOT_PATH_OR_URL = os.environ.get("CACHE_ROOT_PATH_OR_URL", ".").strip() - -DEFAULT_REQUEST_CACHING_VALUE = False -if CACHE_OPENAI: - openai_additional_start_info["cache_backend"] = CACHE_BACKEND - if not CACHE_BACKEND.lower() == 'memory': - openai_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL - DEFAULT_REQUEST_CACHING_VALUE = ( - os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower() - == "true" - ) - openai_additional_start_info[ - "default_request_caching_value" - ] = DEFAULT_REQUEST_CACHING_VALUE - -if CACHE_GENERAL: - general_additional_start_info["cache_backend"] = CACHE_BACKEND - if not CACHE_BACKEND.lower() == 'memory': - general_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL - DEFAULT_REQUEST_CACHING_VALUE = ( - os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower() - == "true" - ) - general_additional_start_info[ - "default_request_caching_value" - ] = DEFAULT_REQUEST_CACHING_VALUE - -IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP) -IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP) - -OPENAI_API_KEY = env2dict("OPENAI_API_KEY_CONFIG") -FWD_KEY = env2dict("FORWARD_KEY_CONFIG") -LEVEL_MODELS = {int(key): value for key, value in env2dict("LEVEL_MODELS").items()} - -PROXY = os.environ.get("PROXY", "").strip() or None - -if PROXY: - openai_additional_start_info["proxy"] = PROXY - -GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or "inf" -RATE_LIMIT_BACKEND = os.environ.get("REQ_RATE_LIMIT_BACKEND", "").strip() or None -RATE_LIMIT_STRATEGY = ( - os.environ.get("RATE_LIMIT_STRATEGY", "fixed-window").strip() or "fixed-window" -) -req_rate_limit_dict = env2dict('REQ_RATE_LIMIT') - - -def get_limiter_key(request: Request): - limiter_prefix = f"{request.scope.get('root_path')}{request.scope.get('path')}" - fk_or_sk = request.headers.get("Authorization", "default") - key = f"{limiter_prefix},{fk_or_sk}" - return key - - -def dynamic_request_rate_limit(key: str): - limite_prefix, fk_or_sk = key.split(',') - key_level = FWD_KEY.get(fk_or_sk, 0) - for route in req_rate_limit_dict: - if key.startswith(route): - for level_dict in req_rate_limit_dict[route]: - if level_dict['level'] == key_level: - return level_dict['limit'] - - break - return GLOBAL_RATE_LIMIT - - -def cvt_token_rate_to_interval(token_rate_limit: str): - if token_rate_limit: - rate_limit_item = limits.parse(token_rate_limit) - token_interval = ( - rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds - ) / rate_limit_item.amount - else: - token_interval = 0 - return token_interval - - -token_rate_limit_conf = env2dict("TOKEN_RATE_LIMIT") -token_interval_conf = {} -for route, rate_limit_list in token_rate_limit_conf.items(): - token_interval_conf.setdefault(route, {}) - for level_dict in rate_limit_list: - token_interval_conf[route][level_dict['level']] = cvt_token_rate_to_interval( - level_dict['limit'] - ) - -styles = itertools.cycle( - ["#7CD9FF", "#BDADFF", "#9EFFE3", "#f1b8e4", "#F5A88E", "#BBCA89"] -) - - -def show_startup(): - for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX): - print_startup_info( - base_url, - route_prefix, - OPENAI_API_KEY, - FWD_KEY, - style=next(styles), - **openai_additional_start_info, - ) - for base_url, route_prefix in zip(GENERAL_BASE_URL, GENERAL_ROUTE_PREFIX): - print_startup_info( - base_url, - route_prefix, - "", - "", - style=next(styles), - **general_additional_start_info, - ) - - print_rate_limit_info( - RATE_LIMIT_BACKEND, - RATE_LIMIT_STRATEGY, - GLOBAL_RATE_LIMIT, - req_rate_limit_dict, - token_rate_limit_conf, - ) diff --git a/openai_forward/webui/run.py b/openai_forward/webui/run.py index 50d4547..4294f4c 100644 --- a/openai_forward/webui/run.py +++ b/openai_forward/webui/run.py @@ -1,4 +1,5 @@ import ast +import os import pickle import secrets import threading @@ -85,43 +86,31 @@ def worker(log_socket: zmq.Socket, q: SimpleQueue): "Apply and Restart", help="Saving configuration and reloading openai forward" ): with st.spinner("Saving configuration and reloading openai forward..."): - env_dict = config.convert_to_env(set_env=False) + # env_dict = config.convert_to_env(set_env=False) socket = st.session_state['socket'] - socket.send(pickle.dumps(env_dict)) + socket.send(pickle.dumps(config.to_dict())) message: bytes = socket.recv() st.success(message.decode()) - def generate_env_content(): - env_dict = config.convert_to_env(set_env=False) - env_content = "\n".join([f"{key}={value}" for key, value in env_dict.items()]) - return env_content - - if st.button("Save to .env", help="Saving configuration to .env file"): - with st.spinner("Saving configuration to .env file."): - with open(".env", "w") as f: - f.write(generate_env_content()) - st.success("Configuration saved to .env file") - if st.button( - "Export to .env file", + "Export to config.yaml", ): - # Deferred data for download button: https://github.com/streamlit/streamlit/issues/5053 + yaml_str = yaml.dump(config.to_dict(), default_flow_style=False) + yaml_bytes = yaml_str.encode('utf-8') download = st.download_button( use_container_width=True, label="Export", - data=generate_env_content(), - file_name="config.env", + data=yaml_bytes, + file_name="config.yaml", mime="text/plain", ) def display_forward_configuration(): - forward_config = config.forward - st.subheader("AI Forward") with st.form("forward_configuration", border=False): - df = pd.DataFrame([i.to_dict() for i in forward_config.forward]) + df = pd.DataFrame([i.to_dict() for i in config.forward]) edited_df = st.data_editor( df, num_rows="dynamic", key="editor1", use_container_width=True ) @@ -155,7 +144,7 @@ def display_forward_configuration(): submitted = st.form_submit_button("Save", use_container_width=True) if submitted: - forward_config.forward = [ + config.forward = [ ForwardItem(row["base_url"], row["route"], row["type"]) for i, row in edited_df.iterrows() if row["route"] is not None and row["base_url"] is not None @@ -218,11 +207,9 @@ def display_api_key_configuration(): 'gpt-4-0125-preview', 'gpt-4-0613', 'gpt-4-1106-preview', - 'gpt-4-1106-vision-preview', 'gpt-4-turbo', 'gpt-4-turbo-2024-04-09', 'gpt-4-turbo-preview', - 'gpt-4-vision-preview', 'gpt-4o', 'gpt-4o-2024-05-13', 'gpt-4o-mini', @@ -262,9 +249,26 @@ def display_api_key_configuration(): with st.form("api_key_form", border=False): st.subheader("OpenAI API Key") + + def to_list(x: str): + x = str(x).replace(',', ',').strip() + if x == '': + return [] + try: + x = ast.literal_eval(x) + if isinstance(x, list): + return x + if isinstance(x, tuple): + return list(x) + else: + return [x] + except: + return str(x).split(',') + + to_int_list = lambda x: [int(i) for i in x] df = pd.DataFrame( [ - {'api_key': key, 'level': value} + {'api_key': key, 'level': str(value)} for key, value in api_key.openai_key.items() ] ) @@ -275,7 +279,7 @@ def display_api_key_configuration(): st.subheader("Forward Key") df2 = pd.DataFrame( [ - {'api_key': key, 'level': value} + {'level': int(key), 'api_key': str(value)} for key, value in api_key.forward_key.items() ] ) @@ -286,11 +290,13 @@ def display_api_key_configuration(): submitted = st.form_submit_button("Save", use_container_width=True) if submitted: api_key.openai_key = { - row["api_key"]: row["level"] for i, row in edited_df.iterrows() + row["api_key"]: to_int_list(to_list(row["level"])) + for i, row in edited_df.iterrows() } api_key.forward_key = { - row["api_key"]: row["level"] for i, row in edited_df2.iterrows() + int(row["level"]): to_list(row["api_key"]) + for i, row in edited_df2.iterrows() } api_key.level = level_model_map @@ -304,14 +310,14 @@ def display_cache_configuration(): with st.container(): st.subheader("Cache Configuration") - cache_openai = st.checkbox("Cache OpenAI route", cache.cache_openai) + cache_openai = st.checkbox("Cache OpenAI route", cache.openai) cache_default_request_caching_value = st.checkbox( "For OpenAI API, return using cache by default", cache.default_request_caching_value, disabled=not cache_openai, ) - cache_general = st.checkbox("Cache General route", cache.cache_general) + cache_general = st.checkbox("Cache General route", cache.general) cache_backend = st.selectbox( "Cache Backend", @@ -325,21 +331,21 @@ def display_cache_configuration(): disabled=cache_backend == "MEMORY", ) - df = pd.DataFrame([{"cache_route": i} for i in cache.cache_routes]) + df = pd.DataFrame([{"cache_route": i} for i in cache.routes]) edited_df = st.data_editor( df, num_rows="dynamic", key="editor1", use_container_width=True ) submitted = st.button("Save", use_container_width=True) if submitted: - cache.cache_openai = cache_openai - cache.cache_general = cache_general + cache.openai = cache_openai + cache.general = cache_general cache.backend = cache_backend cache.root_path_or_url = cache_root_path_or_url cache.default_request_caching_value = cache_default_request_caching_value - cache.cache_routes = [ + cache.routes = [ row['cache_route'] for i, row in edited_df.iterrows() if row["cache_route"] is not None diff --git a/pyproject.toml b/pyproject.toml index 8c0d8af..e641b25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "tomli", "tomli-w", "pyzmq", + "pyyaml", ] dynamic = ["version"] @@ -78,25 +79,35 @@ aifd = "openai_forward.__main__:main" [tool.hatch.version] path = "openai_forward/__init__.py" -[tool.isort] -profile = "black" - -[tool.black] -line-length = 88 -exclude = ''' -/( - \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist -)/ -''' +#[tool.isort] +#profile = "black" +# +#[tool.black] +#line-length = 88 +#exclude = ''' +#/( +# \.git +# | \.hg +# | \.mypy_cache +# | \.tox +# | \.venv +# | _build +# | buck-out +# | build +# | dist +#)/ +#''' +[tool.ruff] +line-length = 100 +select = [ # 选择的规则 + "F", + "E", + "W", + "UP", + "PERF", +] +ignore = ["F401"] # 忽略的规则 [tool.hatch.build] include = [ @@ -107,3 +118,7 @@ exclude = [ [tool.hatch.build.targets.wheel] packages = ["openai_forward"] + +[tool.rye] +managed = true +dev-dependencies = [] \ No newline at end of file diff --git a/tests/test_forwarding.py b/tests/test_forwarding.py index 34cda5d..48ed2f4 100644 --- a/tests/test_forwarding.py +++ b/tests/test_forwarding.py @@ -1,5 +1,5 @@ import importlib -from unittest.mock import Mock, patch +from unittest.mock import Mock import pytest from fastapi import Request @@ -11,6 +11,7 @@ params=[ { "FWD_KEY": {'fk0': 0, 'fk1': 1, 'fk2': 2}, + # "FWD_KEY": {0:["fk-0"], 1:["fk-1"], 2: ["fk-2"]}, "OPENAI_API_KEY": {'sk1': [0, 1], 'sk2': [1], 'sk3': [2], 'sk4': [0]}, "LEVEL_MODELS": { 1: ['gpt-3.5-turbo', 'text-embedding-3-small'], @@ -21,7 +22,7 @@ ] ) def openai_forward(request): - from openai_forward import settings + from openai_forward.config import settings settings.FWD_KEY = request.param['FWD_KEY'] settings.OPENAI_API_KEY = request.param['OPENAI_API_KEY'] diff --git a/tests/test_settings.py b/tests/test_settings.py index f7e72b3..63f6a4f 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -1,4 +1,4 @@ -from openai_forward.settings import ( +from openai_forward.config.settings import ( CACHE_ROUTE_SET, FORWARD_CONFIG, FWD_KEY,