diff --git a/.env b/.env
index 3b01fdf..61058ca 100644
--- a/.env
+++ b/.env
@@ -21,10 +21,17 @@ DEFAULT_REQUEST_CACHING_VALUE=false
BENCHMARK_MODE=true
FORWARD_CONFIG=[{"base_url":"https://api.openai.com","route":"/","type":"openai"}]
-
-#LEVEL_MODELS={"1": ["gpt-4"], "2": ["gpt-3.5-turbo"]}
-#OPENAI_API_KEY_CONFIG={"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}
-#FORWARD_KEY_CONFIG={"fk-0": 0, "fk-1": 1, "fk-2": 2, "default": 1}
+#FORWARD_CONFIG=[{"base_url":"https://api.deepseek.com","route":"/","type":"openai"}]
+#FORWARD_CONFIG=[{"base_url":"http://localhost:3000","route":"/","type":"general"}]
+#CUSTOM_MODEL_CONFIG='{
+#"backend":"ollama",
+#"model_map": {"gpt-3.5-turbo":"qwen2:7b"},
+#"api_base": "http://localhost:11434"
+#}'
+
+#LEVEL_MODELS='{"1":["gpt-4"],"2":["gpt-3.5-turbo"]}'
+#OPENAI_API_KEY_CONFIG='{"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}'
+#FORWARD_KEY_CONFIG='{"0": ["fk-0"], "1":["fk-1", "fk-11"], "2": ["fk-2"]}'
# `REQ_RATE_LIMIT`: i.e., Request rate limit for specified routes, user specific
# format: {route: ratelimit-string}
@@ -46,7 +53,7 @@ RATE_LIMIT_STRATEGY=moving-window
# Rate limit for returned tokens
TOKEN_RATE_LIMIT='{
-"/v1/chat/completions":[{"level":0,"limit":"60/second"}],
+"/v1/chat/completions":[{"level":0,"limit":"100/second"}],
"/v1/completions":[{"level":0,"limit":"60/second"}],
"/benchmark/v1/chat/completions":[{"level":0,"limit":"20/second"}]
}'
diff --git a/.gitignore b/.gitignore
index 62f1a65..b4da486 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ run.sh
ssl/
chat.yaml
chat_*.yaml
+openai-forward-config.yaml.*
config.toml
config_parser.py
diff --git a/Examples/chat_completion.py b/Examples/chat_completion.py
index 1f567da..d620414 100644
--- a/Examples/chat_completion.py
+++ b/Examples/chat_completion.py
@@ -18,14 +18,25 @@
max_tokens = None
-# user_content = """
-# 用c实现目前已知最快平方根算法
-# """
-# user_content = '最初有1000千克的蘑菇,其中99%的成分是水。经过几天的晴天晾晒后,蘑菇中的水分含量现在是98%,蘑菇中减少了多少水分?'
-# user_content = "Write down the most romantic sentence you can think of."
-user_content = "光散射中的Mie理论的理论公式是怎样的?请用latex语法表示它公式使用$$符号包裹。"
-
-model = "gpt-3.5-turbo"
+
+queries = {
+ 0: "用c实现目前已知最快平方根导数算法",
+ 1: "既然快递要 3 天才到,为什么不把所有的快递都提前 3 天发?",
+ 2: "只切一刀,如何把四个橘子分给四个小朋友?",
+ 3: "最初有1000千克的蘑菇,其中99%的成分是水。经过几天的晴天晾晒后,蘑菇中的水分含量现在是98%,蘑菇中减少了多少水分?",
+ 4: "哥哥弟弟百米赛跑,第一次从同一起点起跑,哥哥到达终点时领先弟弟一米获胜,第二次哥哥从起点后退一米处开始起跑,问结果如何?",
+ 5: "光散射中的Mie理论的理论公式是怎样的?请用latex语法表示它公式使用$$符号包裹。",
+ 6: "Write down the most romantic sentence you can think of.",
+ 7: "为什么我爸妈结婚的时候没邀请我参加婚礼?",
+ 8: "一个人自杀了,这个世界上是多了一个自杀的人,还是少了一个自杀的人",
+}
+
+user_content = queries[8]
+
+# model = "gpt-3.5-turbo"
+model = "gpt-4o-mini"
+# model = "deepseek-chat"
+# model="gpt-4o"
# model="gpt-4"
mt = MeasureTime().start()
diff --git a/deploy.md b/deploy.md
index 62f04cc..e7a2922 100644
--- a/deploy.md
+++ b/deploy.md
@@ -6,14 +6,10 @@
-一键部署至render
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward)
-
[pip部署](#pip部署) |
-[docker部署](#docker部署) |
-[render一键部署](#render-一键部署)
+[docker部署](#docker部署)
@@ -22,14 +18,14 @@
1. [pip 安装部署](deploy.md#pip部署)
2. [Docker部署](deploy.md#docker部署)
-**一键免费云平台部署**
+**~~一键免费云平台部署~~**
-1. [Render一键部署](deploy.md#render-一键部署)
-2. 更多部署: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md
+1. ~~[Render一键部署](deploy.md#render-一键部署)~~
+2. ~~更多部署: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md~~
**其它反代**
[CloudFlare AI Gateway](https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/)
-[ChatGPT](https://github.com/pandora-next/deploy)
+~~[ChatGPT](https://github.com/pandora-next/deploy)~~
---
@@ -113,23 +109,3 @@ aifd run
-
-## Render 一键部署
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward)
-
-Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问!
-
-1. 点击一键部署按钮
- 也可先fork本仓库 -->到Render的Dashboard上 New Web Services --> Connect 到刚刚fork到仓库 后面步骤均默认即可
-2. 填写环境变量,`openai-forward`所有配置都可以通过环境变量设置,可以根据自己需要填写。
-
-然后等待部署完成即可。
-Render的免费计划: 每月750小时免费实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长.
-
-注:默认render在15分钟内没有服务请求时会自动休眠(好处是休眠后不会占用750h的免费实例时间),休眠后下一次请求会被阻塞 ~15s。
-如果希望服务15分钟不自动休眠,可以使用定时脚本(如每14分钟)对render服务进行保活。保活脚本参考`scripts/keep_render_alive.py`.
-如果希望零停机部署可以在render设置中配置 `Health Check Path`为`/healthz`
-
-> https://render.openai-forward.com
-> https://openai-forward.onrender.com
-
diff --git a/deploy_en.md b/deploy_en.md
index d6bfcb2..2ce4b0b 100644
--- a/deploy_en.md
+++ b/deploy_en.md
@@ -76,12 +76,6 @@ proxy_buffering off;
docker run -d -p 8000:8000 beidongjiedeguang/openai-forward:latest
```
-If the `.env` environment variable is specified:
-
-```bash
-docker run --env-file .env -d -p 8000:8000 beidongjiedeguang/openai-forward:latest
-```
-
This will map the host's 8000 port. Access the service via `http://{ip}:8000`.
The log path inside the container is `/home/openai-forward/Log/`. It can be mapped when starting up.
diff --git a/openai-forward-config.example.yaml b/openai-forward-config.example.yaml
new file mode 100644
index 0000000..57e3d6f
--- /dev/null
+++ b/openai-forward-config.example.yaml
@@ -0,0 +1,92 @@
+log:
+ general: true
+ openai: true
+
+cache:
+ general: true
+ openai: true
+ routes:
+ - "/v1/chat/completions"
+ - "/v1/embeddings"
+ # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
+ backend: MEMORY
+ root_path_or_url: "./FLAXKV_DB"
+ default_request_caching_value: false
+
+chat_completion_route: "/v1/chat/completions"
+# custom_general_route: "/v1/models/gemini-pro"
+
+benchmark_mode: true
+
+forward:
+ - base_url: "https://api.openai.com"
+ route: "/"
+ type: "openai"
+
+ - base_url: "https://generativelanguage.googleapis.com"
+ route: "/gemini"
+ type: "general"
+
+# custom_model_config:
+# backend: "ollama"
+# model_map:
+# gpt-3.5-turbo: "qwen2:7b"
+# api_base: "http://localhost:11434"
+
+api_key:
+ level:
+ 1: ["gpt-4"]
+ 2: ["gpt-3.5-turbo"]
+
+ openai_key:
+ "sk-xxx1": [0]
+ "sk-xxx2": [1]
+ "sk-xxx3": [1, 2]
+
+ forward_key:
+ 0: ["fk-0"]
+ 1: ["fk-1", "fk-11"]
+ 2: ["fk-2"]
+
+rate_limit:
+ global_rate_limit: "200/minute"
+ strategy: "moving-window"
+ iter_chunk: "one-by-one"
+ req_rate_limit:
+ - route: "/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "100/2minutes"
+
+ - route: "/v1/completions"
+ value:
+ - level: 0
+ limit: "60/minute;600/hour"
+ req_rate_limit_backend: "redis://localhost:6379"
+
+ token_rate_limit:
+ - route: "/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "100/second"
+ - route: "/v1/completions"
+ value:
+ - level: 0
+ limit: "60/second"
+ - route: "/benchmark/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "20/second"
+
+timeout: 6
+
+ip_blacklist:
+ip_whitelist:
+
+webui_restart_port: 15555
+webui_log_port: 15556
+
+proxy:
+default_stream_response: true
+
+tz: Asia/Shanghai
diff --git a/openai-forward-config.yaml b/openai-forward-config.yaml
new file mode 100644
index 0000000..860b9cd
--- /dev/null
+++ b/openai-forward-config.yaml
@@ -0,0 +1,92 @@
+log:
+ general: true
+ openai: true
+
+cache:
+ general: true
+ openai: true
+ routes:
+ - "/v1/chat/completions"
+ - "/v1/embeddings"
+ # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
+ backend: MEMORY
+ root_path_or_url: "./FLAXKV_DB"
+ default_request_caching_value: false
+
+chat_completion_route: "/v1/chat/completions"
+# custom_general_route: "/v1/models/gemini-pro"
+
+benchmark_mode: true
+
+forward:
+ - base_url: "https://api.openai.com"
+ route: "/"
+ type: "openai"
+
+ - base_url: "https://generativelanguage.googleapis.com"
+ route: "/gemini"
+ type: "general"
+
+# custom_model_config:
+# backend: "ollama"
+# model_map:
+# gpt-3.5-turbo: "qwen2:7b"
+# api_base: "http://localhost:11434"
+
+api_key:
+ level:
+ 1: ["gpt-4"]
+ 2: ["gpt-3.5-turbo"]
+
+ openai_key:
+ "sk-xxx1": [0]
+ "sk-xxx2": [1]
+ "sk-xxx3": [1, 2]
+
+ forward_key:
+ 0: ["fk-0"]
+ 1: ["fk-1", "fk-11"]
+ 2: ["fk-2"]
+
+rate_limit:
+ global_rate_limit: "200/minute"
+ strategy: "moving-window"
+ iter_chunk: "one-by-one"
+ req_rate_limit:
+ - route: "/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "100/2minutes"
+
+ - route: "/v1/completions"
+ value:
+ - level: 0
+ limit: "60/minute;600/hour"
+ req_rate_limit_backend: "redis://localhost:6379"
+
+ token_rate_limit:
+ - route: "/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "60/second"
+ - route: "/v1/completions"
+ value:
+ - level: 0
+ limit: "60/second"
+ - route: "/benchmark/v1/chat/completions"
+ value:
+ - level: 0
+ limit: "20/second"
+
+timeout: 6
+
+ip_blacklist:
+ip_whitelist:
+
+webui_restart_port: 15555
+webui_log_port: 15556
+
+proxy:
+default_stream_response: true
+
+tz: Asia/Shanghai
diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py
index 793a432..06d75ab 100644
--- a/openai_forward/__init__.py
+++ b/openai_forward/__init__.py
@@ -1,6 +1,5 @@
-__version__ = "0.8.1"
+__version__ = "0.8.2-alpha"
from dotenv import load_dotenv
-from yaml import load
load_dotenv('.env', override=False)
diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py
index c3fe16d..558490a 100644
--- a/openai_forward/__main__.py
+++ b/openai_forward/__main__.py
@@ -1,12 +1,20 @@
import atexit
+import datetime
import os
import pickle
import platform
import signal
import subprocess
+from pathlib import Path
import fire
import uvicorn
+import yaml
+
+
+def save_yaml(path: Path, data: dict):
+ with open(path, 'w') as f:
+ yaml.dump(data, f)
class Cli:
@@ -106,10 +114,16 @@ def mq_worker(log_socket: zmq.Socket):
while True:
message = socket.recv()
- env_dict: dict = pickle.loads(message)
+ config_dict: dict = pickle.loads(message)
+ config_path = Path("openai-forward-config.yaml")
+ # backup
+ time_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+ backup_path = Path(f"openai-forward-config.yaml.{time_str}.bak")
+ if config_path.exists():
+ # rename openai-forward-config.yaml to openai-forward-config.yaml.bak
+ config_path.rename(backup_path)
- for key, value in env_dict.items():
- os.environ[key] = value
+ save_yaml(config_path, config_dict)
self._restart_uvicorn(
port=port,
@@ -208,8 +222,8 @@ def convert(log_folder: str = None, target_path: str = None):
Returns:
None
"""
+ from openai_forward.config.settings import OPENAI_ROUTE_PREFIX
from openai_forward.helper import convert_folder_to_jsonl, route_prefix_to_str
- from openai_forward.settings import OPENAI_ROUTE_PREFIX
print(60 * '-')
if log_folder is None:
diff --git a/openai_forward/app.py b/openai_forward/app.py
index 6c62c56..98bbf1b 100644
--- a/openai_forward/app.py
+++ b/openai_forward/app.py
@@ -1,12 +1,12 @@
+from contextlib import asynccontextmanager
+
from fastapi import FastAPI, Request, status
from fastapi.middleware.cors import CORSMiddleware
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
-from . import __version__, custom_slowapi
-from .forward import ForwardManager
-from .helper import normalize_route as normalize_route_path
-from .settings import (
+from . import __version__
+from .config.settings import (
BENCHMARK_MODE,
RATE_LIMIT_BACKEND,
RATE_LIMIT_STRATEGY,
@@ -14,6 +14,8 @@
get_limiter_key,
show_startup,
)
+from .forward import ForwardManager
+from .helper import normalize_route as normalize_route_path
forward_manager = ForwardManager()
@@ -22,7 +24,18 @@
strategy=RATE_LIMIT_STRATEGY,
storage_uri=RATE_LIMIT_BACKEND,
)
-app = FastAPI(title="openai-forward", version=__version__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ # Startup logic
+ await forward_manager.start_up()
+ yield
+ # Shutdown logic
+ await forward_manager.shutdown()
+
+
+app = FastAPI(title="openai-forward", version=__version__, lifespan=lifespan)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
@@ -63,17 +76,6 @@ def healthz(request: Request):
methods=["POST"],
)
-
-@app.on_event("startup")
-async def startup():
- await forward_manager.start_up()
-
-
-@app.on_event("shutdown")
-async def shutdown():
- await forward_manager.shutdown()
-
-
add_route = lambda obj: app.add_route(
obj.ROUTE_PREFIX + "{api_path:path}",
route=limiter.limit(dynamic_request_rate_limit)(obj.reverse_proxy),
@@ -83,5 +85,4 @@ async def shutdown():
[add_route(obj) for obj in forward_manager.generic_objs]
[add_route(obj) for obj in forward_manager.root_objs]
-
show_startup()
diff --git a/openai_forward/cache/__init__.py b/openai_forward/cache/__init__.py
index 5ae8d66..90c8dd0 100644
--- a/openai_forward/cache/__init__.py
+++ b/openai_forward/cache/__init__.py
@@ -1,12 +1,13 @@
-from flaxkv.pack import decode, encode
+from flaxkv.pack import encode
from loguru import logger
-from ..settings import (
+from openai_forward.config.settings import (
CACHE_OPENAI,
CACHE_ROUTE_SET,
CHAT_COMPLETION_ROUTE,
EMBEDDING_ROUTE,
)
+
from .chat.response import gen_response, get_cached_chat_response
from .database import db_dict
from .embedding.response import get_cached_embedding_response
diff --git a/openai_forward/cache/chat/chat_completions.py b/openai_forward/cache/chat/chat_completions.py
index e6c7511..9e14ec8 100644
--- a/openai_forward/cache/chat/chat_completions.py
+++ b/openai_forward/cache/chat/chat_completions.py
@@ -9,13 +9,10 @@
from fastapi import Request
from fastapi.responses import Response, StreamingResponse
-from ...decorators import (
- async_random_sleep,
- async_token_rate_limit_auth_level,
- random_sleep,
-)
+from openai_forward.config.settings import FWD_KEY, token_interval_conf
+
+from ...decorators import async_token_rate_limit_auth_level, random_sleep
from ...helper import get_unique_id
-from ...settings import FWD_KEY, token_interval_conf
from .tokenizer import TIKTOKEN_VALID, count_tokens, encode_as_pieces
diff --git a/openai_forward/cache/chat/response.py b/openai_forward/cache/chat/response.py
index 7b92c62..f055b14 100644
--- a/openai_forward/cache/chat/response.py
+++ b/openai_forward/cache/chat/response.py
@@ -7,7 +7,8 @@
from flaxkv.pack import encode
from loguru import logger
-from ...settings import CACHE_OPENAI, FWD_KEY
+from openai_forward.config.settings import CACHE_OPENAI, FWD_KEY
+
from ..database import db_dict
from .chat_completions import (
async_token_rate_limit_auth_level,
diff --git a/openai_forward/cache/database.py b/openai_forward/cache/database.py
index 346c94b..abe06ef 100644
--- a/openai_forward/cache/database.py
+++ b/openai_forward/cache/database.py
@@ -1,6 +1,6 @@
from flaxkv import FlaxKV
-from ..settings import CACHE_BACKEND, CACHE_ROOT_PATH_OR_URL, LOG_CACHE_DB_INFO
+from ..config.settings import CACHE_BACKEND, CACHE_ROOT_PATH_OR_URL, LOG_CACHE_DB_INFO
if CACHE_BACKEND.upper() == "MEMORY":
db_dict = {}
diff --git a/openai_forward/cache/embedding/response.py b/openai_forward/cache/embedding/response.py
index 5df4a17..dceedfb 100644
--- a/openai_forward/cache/embedding/response.py
+++ b/openai_forward/cache/embedding/response.py
@@ -4,7 +4,7 @@
from flaxkv.pack import encode
from loguru import logger
-from ...settings import CACHE_OPENAI
+from ...config.settings import CACHE_OPENAI
from ..database import db_dict
diff --git a/openai_forward/config/interface.py b/openai_forward/config/interface.py
index bd13d23..998c134 100644
--- a/openai_forward/config/interface.py
+++ b/openai_forward/config/interface.py
@@ -1,16 +1,21 @@
import json
-import os
-from typing import Dict, List, Literal, Optional, Tuple, Union
+from typing import Literal
-from attrs import asdict, define, field, filters
+from attrs import asdict, define, field
-from ..settings import *
+from openai_forward.config.settings import *
class Base:
def to_dict(self, drop_none=True):
if drop_none:
- return asdict(self, filter=filters.exclude(type(None)))
+
+ def custom_filter(attribute, value):
+ if drop_none:
+ return value is not None
+ return True
+
+ return asdict(self, filter=custom_filter)
return asdict(self)
def to_dict_str(self):
@@ -48,23 +53,22 @@ class CacheConfig(Base):
backend: str = 'LevelDB'
root_path_or_url: str = './FLAXKV_DB'
default_request_caching_value: bool = True
- cache_openai: bool = False
- cache_general: bool = False
- cache_routes: List = ['/v1/chat/completions']
+ openai: bool = False
+ general: bool = False
+ routes: List = ['/v1/chat/completions']
def convert_to_env(self, set_env=False):
-
env_dict = {}
- env_dict['CACHE_OPENAI'] = str(self.cache_openai)
- env_dict['CACHE_GENERAL'] = str(self.cache_general)
+ env_dict['CACHE_OPENAI'] = str(self.openai)
+ env_dict['CACHE_GENERAL'] = str(self.general)
env_dict['CACHE_BACKEND'] = self.backend
env_dict['CACHE_ROOT_PATH_OR_URL'] = self.root_path_or_url
env_dict['DEFAULT_REQUEST_CACHING_VALUE'] = str(
self.default_request_caching_value
)
- env_dict['CACHE_ROUTES'] = json.dumps(self.cache_routes)
+ env_dict['CACHE_ROUTES'] = json.dumps(self.routes)
if set_env:
os.environ.update(env_dict)
@@ -84,22 +88,22 @@ class RateLimit(Base):
token_rate_limit: List[RateLimitType] = [
RateLimitType(
route="/v1/chat/completions",
- value=[{"level": '0', "limit": "60/second"}],
+ value=[{"level": 0, "limit": "60/second"}],
),
RateLimitType(
- route="/v1/completions", value=[{"level": '0', "limit": "60/second"}]
+ route="/v1/completions", value=[{"level": 0, "limit": "60/second"}]
),
]
req_rate_limit: List[RateLimitType] = [
RateLimitType(
route="/v1/chat/completions",
- value=[{"level": '0', "limit": "100/2minutes"}],
+ value=[{"level": 0, "limit": "100/2minutes"}],
),
RateLimitType(
- route="/v1/completions", value=[{"level": '0', "limit": "60/minute"}]
+ route="/v1/completions", value=[{"level": 0, "limit": "60/minute"}]
),
RateLimitType(
- route="/v1/embeddings", value=[{"level": '0', "limit": "100/2minutes"}]
+ route="/v1/embeddings", value=[{"level": 0, "limit": "100/2minutes"}]
),
]
iter_chunk: Literal['one-by-one', 'efficiency'] = 'one-by-one'
@@ -125,8 +129,8 @@ def convert_to_env(self, set_env=False):
@define(slots=True)
class ApiKey(Base):
- openai_key: Dict = {"": "0"}
- forward_key: Dict = {"": 0}
+ openai_key: Dict = {"sk-xx1": [0]}
+ forward_key: Dict = {0: ["fk-1"]}
level: Dict = {1: ["gpt-3.5-turbo"]}
def convert_to_env(self, set_env=False):
@@ -146,13 +150,13 @@ def convert_to_env(self, set_env=False):
@define(slots=True)
class Log(Base):
- LOG_GENERAL: bool = True
- LOG_OPENAI: bool = True
+ general: bool = True
+ openai: bool = True
def convert_to_env(self, set_env=False):
env_dict = {}
- env_dict['LOG_GENERAL'] = str(self.LOG_GENERAL)
- env_dict['LOG_OPENAI'] = str(self.LOG_OPENAI)
+ env_dict['LOG_GENERAL'] = str(self.general)
+ env_dict['LOG_OPENAI'] = str(self.openai)
if set_env:
os.environ.update(env_dict)
return env_dict
@@ -160,7 +164,15 @@ def convert_to_env(self, set_env=False):
@define(slots=True)
class Config(Base):
- forward: Forward = Forward()
+ # forward: Forward = Forward()
+ forward: List[ForwardItem] = [
+ ForwardItem(base_url="https://api.openai.com", route="/", type="openai"),
+ ForwardItem(
+ base_url="https://generativelanguage.googleapis.com",
+ route="/gemini",
+ type="general",
+ ),
+ ]
api_key: ApiKey = ApiKey()
@@ -179,8 +191,9 @@ class Config(Base):
default_stream_response: bool = True
def convert_to_env(self, set_env=False):
- env_dict = {}
- env_dict.update(self.forward.convert_to_env())
+ # env_dict = {}
+ # env_dict.update(self.forward.convert_to_env())
+ env_dict = {'FORWARD_CONFIG': json.dumps([i.to_dict() for i in self.forward])}
env_dict.update(self.api_key.convert_to_env())
env_dict.update(self.cache.convert_to_env())
env_dict.update(self.rate_limit.convert_to_env())
@@ -206,8 +219,8 @@ def come_from_env(self):
self.timezone = os.environ.get('TZ', 'Asia/Shanghai')
self.benchmark_mode = BENCHMARK_MODE
self.proxy = PROXY or ""
- self.log.LOG_OPENAI = LOG_OPENAI
- self.log.LOG_GENERAL = LOG_GENERAL
+ self.log.openai = LOG_OPENAI
+ self.log.general = LOG_GENERAL
self.rate_limit.strategy = RATE_LIMIT_STRATEGY
self.rate_limit.global_rate_limit = GLOBAL_RATE_LIMIT
@@ -223,15 +236,24 @@ def come_from_env(self):
self.cache.backend = CACHE_BACKEND
self.cache.root_path_or_url = CACHE_ROOT_PATH_OR_URL
self.cache.default_request_caching_value = DEFAULT_REQUEST_CACHING_VALUE
- self.cache.cache_openai = CACHE_OPENAI or self.cache.cache_openai
- self.cache.cache_general = CACHE_GENERAL or self.cache.cache_general
- self.cache.cache_routes = list(CACHE_ROUTE_SET) or self.cache.cache_routes
+ self.cache.openai = CACHE_OPENAI or self.cache.openai
+ self.cache.general = CACHE_GENERAL or self.cache.general
+ self.cache.routes = list(CACHE_ROUTE_SET) or self.cache.routes
self.api_key.level = LEVEL_MODELS or self.api_key.level
- self.api_key.openai_key = {
- key: ','.join([str(i) for i in value])
- for key, value in OPENAI_API_KEY.items()
- } or self.api_key.openai_key
- self.api_key.forward_key = FWD_KEY or self.api_key.forward_key
- self.forward.forward = [ForwardItem(**i) for i in FORWARD_CONFIG]
+ self.api_key.openai_key = OPENAI_API_KEY or self.api_key.openai_key
+ self.api_key.forward_key = LEVEL_TO_FWD_KEY or self.api_key.forward_key
+ self.forward = [ForwardItem(**i) for i in FORWARD_CONFIG]
return self
+
+
+if __name__ == "__main__":
+ import yaml
+
+ def save_dict_to_yaml(data, file_path):
+ with open(file_path, 'w') as file:
+ yaml.dump(data, file, default_flow_style=False)
+
+ config = Config()
+ print(config.to_dict())
+ save_dict_to_yaml(config.to_dict(), 'config.yaml')
diff --git a/openai_forward/config/settings.py b/openai_forward/config/settings.py
new file mode 100644
index 0000000..f32f120
--- /dev/null
+++ b/openai_forward/config/settings.py
@@ -0,0 +1,301 @@
+import itertools
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Set
+
+import limits
+import yaml
+from fastapi import Request
+
+from openai_forward.console import print_rate_limit_info, print_startup_info
+from openai_forward.content.config import setting_log
+from openai_forward.helper import format_route_prefix
+
+config_file_path = Path("openai-forward-config.yaml")
+if config_file_path.exists():
+ with open(config_file_path) as file:
+ config = yaml.safe_load(file)
+else:
+ config = {}
+
+if not config:
+ # 从环境变量中获取配置将被弃用
+ from openai_forward.helper import (
+ env2dict,
+ env2list,
+ format_route_prefix,
+ get_client_ip,
+ )
+
+ TIMEOUT = float(os.environ.get("TIMEOUT", "").strip() or "10")
+ DEFAULT_STREAM_RESPONSE = (
+ os.environ.get("DEFAULT_STREAM_RESPONSE", "True").strip().lower() == "true"
+ )
+
+ ITER_CHUNK_TYPE = (
+ os.environ.get("ITER_CHUNK_TYPE", "").strip() or "efficiency"
+ ) # Options: efficiency, one-by-one
+
+ CHAT_COMPLETION_ROUTE = (
+ os.environ.get("CHAT_COMPLETION_ROUTE", "/v1/chat/completions").strip().lower()
+ )
+ COMPLETION_ROUTE = (
+ os.environ.get("COMPLETION_ROUTE", "/v1/completions").strip().lower()
+ )
+ EMBEDDING_ROUTE = (
+ os.environ.get("EMBEDDING_ROUTE", "/v1/embeddings").strip().lower()
+ )
+ CUSTOM_GENERAL_ROUTE = os.environ.get("CUSTOM_GENERAL_ROUTE", "").strip().lower()
+
+ CACHE_ROUTE_SET = set(env2dict("CACHE_ROUTES", []))
+
+ FORWARD_CONFIG = env2dict(
+ "FORWARD_CONFIG",
+ [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}],
+ )
+
+ CUSTOM_MODEL_CONFIG = env2dict("CUSTOM_MODEL_CONFIG", {})
+
+ token_rate_limit_conf = env2dict("TOKEN_RATE_LIMIT")
+ PRINT_CHAT = os.environ.get("PRINT_CHAT", "False").strip().lower() == "true"
+
+ LOG_OPENAI = os.environ.get("LOG_OPENAI", "False").strip().lower() == "true"
+ LOG_GENERAL = os.environ.get("LOG_GENERAL", "False").strip().lower() == "true"
+
+ CACHE_OPENAI = os.environ.get("CACHE_OPENAI", "False").strip().lower() == "true"
+ CACHE_GENERAL = os.environ.get("CACHE_GENERAL", "False").strip().lower() == "true"
+
+ BENCHMARK_MODE = os.environ.get("BENCHMARK_MODE", "false").strip().lower() == "true"
+
+ LOG_CACHE_DB_INFO = (
+ os.environ.get("LOG_CACHE_DB_INFO", "false").strip().lower() == "true"
+ )
+ CACHE_BACKEND = os.environ.get("CACHE_BACKEND", "MEMORY").strip()
+ CACHE_ROOT_PATH_OR_URL = os.environ.get("CACHE_ROOT_PATH_OR_URL", "..").strip()
+
+ PROXY = os.environ.get("PROXY", "").strip() or None
+ GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or "inf"
+ RATE_LIMIT_BACKEND = os.environ.get("REQ_RATE_LIMIT_BACKEND", "").strip() or None
+ RATE_LIMIT_STRATEGY = (
+ os.environ.get("RATE_LIMIT_STRATEGY", "fixed-window").strip() or "fixed-window"
+ )
+ req_rate_limit_dict = env2dict('REQ_RATE_LIMIT')
+
+ DEFAULT_REQUEST_CACHING_VALUE = (
+ os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower()
+ == "true"
+ )
+
+ OPENAI_API_KEY = env2dict("OPENAI_API_KEY_CONFIG")
+
+ LEVEL_TO_FWD_KEY = env2dict("FORWARD_KEY_CONFIG")
+ LEVEL_MODELS = {int(key): value for key, value in env2dict("LEVEL_MODELS").items()}
+
+ ENV_VAR_SEP = ","
+
+ IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP)
+ IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP)
+else:
+ TIMEOUT = float(config.get('timeout', 10))
+ DEFAULT_STREAM_RESPONSE = config.get('default_stream_response', True)
+
+ CHAT_COMPLETION_ROUTE = config.get(
+ 'chat_completion_route', '/v1/chat/completions'
+ ).lower()
+ COMPLETION_ROUTE = config.get('completion_route', '/v1/completions').lower()
+ EMBEDDING_ROUTE = config.get('embedding_route', '/v1/embeddings').lower()
+ CUSTOM_GENERAL_ROUTE = config.get('custom_general_route', '').lower()
+
+ CACHE_ROUTE_SET: Set[str] = set(config.get('cache', {}).get('routes', []))
+
+ openai_additional_start_info = {'cache_routes': CACHE_ROUTE_SET}
+ general_additional_start_info = {'cache_routes': CACHE_ROUTE_SET}
+
+ FORWARD_CONFIG = config.get(
+ 'forward',
+ [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}],
+ )
+
+ CUSTOM_MODEL_CONFIG = config.get('custom_model_config', {})
+
+ PRINT_CHAT = config.get('print_chat', False)
+
+ LOG_OPENAI = config.get('log', {}).get('openai', False)
+ LOG_GENERAL = config.get('log', {}).get('general', False)
+
+ CACHE_OPENAI = config.get('cache', {}).get('openai', False)
+ CACHE_GENERAL = config.get('cache', {}).get('general', False)
+ DEFAULT_REQUEST_CACHING_VALUE = config.get('cache', {}).get(
+ 'default_request_caching_value', False
+ )
+
+ BENCHMARK_MODE = config.get('benchmark_mode', False)
+
+ LOG_CACHE_DB_INFO = config.get('log_cache_db_info', False)
+ CACHE_BACKEND = config.get('cache', {}).get('backend', 'MEMORY')
+ CACHE_ROOT_PATH_OR_URL = config.get('cache', {}).get('root_path_or_url', '.')
+
+ PROXY = config.get('proxy')
+
+ IP_WHITELIST = config.get("ip_whitelist", [])
+ IP_BLACKLIST = config.get("ip_blacklist", [])
+
+ _api_key = config.get("api_key", {})
+ OPENAI_API_KEY = _api_key.get("openai_key", {})
+ LEVEL_TO_FWD_KEY = _api_key.get("forward_key", {})
+ LEVEL_MODELS = _api_key.get("level", {})
+
+ _rate_limit = config.get("rate_limit", {})
+ _token_rate_limit_list = _rate_limit.get('token_rate_limit', [])
+ token_rate_limit_conf = {
+ item['route']: item['value'] for item in _token_rate_limit_list
+ }
+ GLOBAL_RATE_LIMIT = _rate_limit.get('global_rate_limit', 'inf')
+ RATE_LIMIT_STRATEGY = _rate_limit.get('strategy', 'fixed-window')
+ _req_rate_limit_list = config.get('req_rate_limit', [])
+ RATE_LIMIT_BACKEND = config.get('req_rate_limit_backend', None)
+ req_rate_limit_dict = {
+ item['route']: item['value'] for item in _req_rate_limit_list
+ }
+
+ ITER_CHUNK_TYPE = _rate_limit.get('iter_chunk', 'efficiency')
+
+openai_additional_start_info = {}
+general_additional_start_info = {}
+
+openai_additional_start_info['cache_routes'] = CACHE_ROUTE_SET
+general_additional_start_info['cache_routes'] = CACHE_ROUTE_SET
+
+OPENAI_BASE_URL = [
+ i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'openai'
+]
+OPENAI_ROUTE_PREFIX = [
+ format_route_prefix(i['route'])
+ for i in FORWARD_CONFIG
+ if i and i.get('type') == 'openai'
+]
+
+GENERAL_BASE_URL = [
+ i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'general'
+]
+GENERAL_ROUTE_PREFIX = [
+ format_route_prefix(i['route'])
+ for i in FORWARD_CONFIG
+ if i and i.get('type') == 'general'
+]
+
+for openai_route, general_route in zip(OPENAI_ROUTE_PREFIX, GENERAL_ROUTE_PREFIX):
+ assert openai_route not in GENERAL_ROUTE_PREFIX
+ assert general_route not in OPENAI_ROUTE_PREFIX
+
+if BENCHMARK_MODE:
+ openai_additional_start_info["benchmark_mode"] = BENCHMARK_MODE
+
+openai_additional_start_info["LOG_OPENAI"] = LOG_OPENAI
+general_additional_start_info["LOG_GENERAL"] = LOG_GENERAL
+
+if LOG_OPENAI:
+ setting_log(openai_route_prefix=OPENAI_ROUTE_PREFIX, print_chat=PRINT_CHAT)
+
+if PRINT_CHAT:
+ openai_additional_start_info["print_chat"] = True
+
+DEFAULT_REQUEST_CACHING_VALUE = (DEFAULT_REQUEST_CACHING_VALUE and CACHE_OPENAI) or (
+ DEFAULT_REQUEST_CACHING_VALUE and CACHE_GENERAL
+)
+if CACHE_OPENAI:
+ openai_additional_start_info["cache_backend"] = CACHE_BACKEND
+ if not CACHE_BACKEND.lower() == 'memory':
+ openai_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL
+ openai_additional_start_info[
+ "default_request_caching_value"
+ ] = DEFAULT_REQUEST_CACHING_VALUE
+
+if CACHE_GENERAL:
+ general_additional_start_info["cache_backend"] = CACHE_BACKEND
+ if not CACHE_BACKEND.lower() == 'memory':
+ general_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL
+ general_additional_start_info[
+ "default_request_caching_value"
+ ] = DEFAULT_REQUEST_CACHING_VALUE
+
+FWD_KEY = {}
+for level, fk_list in LEVEL_TO_FWD_KEY.items():
+ for _fk in fk_list:
+ FWD_KEY[_fk] = int(level)
+
+if PROXY:
+ openai_additional_start_info["proxy"] = PROXY
+
+
+def get_limiter_key(request: Request):
+ limiter_prefix = f"{request.scope.get('root_path')}{request.scope.get('path')}"
+ fk_or_sk = request.headers.get("Authorization", "default")
+ key = f"{limiter_prefix},{fk_or_sk}"
+ return key
+
+
+def dynamic_request_rate_limit(key: str):
+ limite_prefix, fk_or_sk = key.split(',')
+ key_level = FWD_KEY.get(fk_or_sk, 0)
+ for route in req_rate_limit_dict:
+ if key.startswith(route):
+ for level_dict in req_rate_limit_dict[route]:
+ if level_dict['level'] == key_level:
+ return level_dict['limit']
+
+ break
+ return GLOBAL_RATE_LIMIT
+
+
+def cvt_token_rate_to_interval(token_rate_limit: str):
+ if token_rate_limit:
+ rate_limit_item = limits.parse(token_rate_limit)
+ token_interval = (
+ rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds
+ ) / rate_limit_item.amount
+ else:
+ token_interval = 0
+ return token_interval
+
+
+token_interval_conf = {}
+for route, rate_limit_list in token_rate_limit_conf.items():
+ token_interval_conf.setdefault(route, {})
+ for level_dict in rate_limit_list:
+ token_interval_conf[route][level_dict['level']] = cvt_token_rate_to_interval(
+ level_dict['limit']
+ )
+
+styles = itertools.cycle(
+ ["#7CD9FF", "#BDADFF", "#9EFFE3", "#f1b8e4", "#F5A88E", "#BBCA89"]
+)
+
+
+def show_startup():
+ for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX):
+ print_startup_info(
+ base_url,
+ route_prefix,
+ OPENAI_API_KEY,
+ FWD_KEY,
+ style=next(styles),
+ **openai_additional_start_info,
+ )
+ for base_url, route_prefix in zip(GENERAL_BASE_URL, GENERAL_ROUTE_PREFIX):
+ print_startup_info(
+ base_url,
+ route_prefix,
+ "",
+ "",
+ style=next(styles),
+ **general_additional_start_info,
+ )
+
+ print_rate_limit_info(
+ RATE_LIMIT_BACKEND,
+ RATE_LIMIT_STRATEGY,
+ GLOBAL_RATE_LIMIT,
+ req_rate_limit_dict,
+ token_rate_limit_conf,
+ )
diff --git a/openai_forward/content/openai.py b/openai_forward/content/openai.py
index 4e05f40..58f80d5 100644
--- a/openai_forward/content/openai.py
+++ b/openai_forward/content/openai.py
@@ -9,8 +9,9 @@
from loguru import logger
from orjson import JSONDecodeError
+from openai_forward.config.settings import DEFAULT_REQUEST_CACHING_VALUE
+
from ..helper import get_client_ip, get_unique_id, route_prefix_to_str
-from ..settings import DEFAULT_REQUEST_CACHING_VALUE
from .helper import markdown_print, parse_sse_buffer, print
@@ -173,7 +174,10 @@ def parse_payload(self, request: Request, raw_payload):
if self.webui:
self.q.put({"uid": uid, "payload": raw_payload})
- payload = orjson.loads(raw_payload)
+ if raw_payload:
+ payload = orjson.loads(raw_payload)
+ else:
+ payload = {}
caching = payload.pop("caching", None)
if caching is None:
caching = DEFAULT_REQUEST_CACHING_VALUE
diff --git a/openai_forward/forward/__init__.py b/openai_forward/forward/__init__.py
index a8f1567..8bfbd0c 100644
--- a/openai_forward/forward/__init__.py
+++ b/openai_forward/forward/__init__.py
@@ -1,12 +1,13 @@
from typing import List
-from ..settings import (
+from openai_forward.config.settings import (
GENERAL_BASE_URL,
GENERAL_ROUTE_PREFIX,
OPENAI_BASE_URL,
OPENAI_ROUTE_PREFIX,
PROXY,
)
+
from .core import GenericForward, OpenaiForward
diff --git a/openai_forward/forward/core.py b/openai_forward/forward/core.py
index 5edc2c6..a1a1a61 100644
--- a/openai_forward/forward/core.py
+++ b/openai_forward/forward/core.py
@@ -4,10 +4,12 @@
import traceback
from asyncio import Queue
from itertools import cycle
-from typing import Any, AsyncGenerator, Iterable
+from typing import AsyncGenerator
import aiohttp
import anyio
+import litellm
+import orjson
from aiohttp import TCPConnector
from fastapi import HTTPException, Request, status
from loguru import logger
@@ -19,6 +21,7 @@
get_cached_generic_response,
get_cached_response,
)
+from ..config.settings import *
from ..content.openai import (
ChatLogger,
CompletionLogger,
@@ -26,8 +29,7 @@
WhisperLogger,
)
from ..decorators import async_retry, async_token_rate_limit_auth_level
-from ..helper import InfiniteSet, get_client_ip, get_unique_id
-from ..settings import *
+from ..helper import InfiniteSet, get_client_ip
# from beartype import beartype
@@ -140,7 +142,7 @@ def validate_request_host(ip):
logger.warning(f"IP {ip} is unauthorized")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
- detail=f"Forbidden Error",
+ detail="Forbidden Error",
)
@staticmethod
@@ -312,9 +314,9 @@ def prepare_client(self, request: Request, return_origin_header=False) -> dict:
'route_path': route_path,
}
- async def _handle_payload(self, request: Request, route_path: str, model_set):
+ def _handle_payload(self, method: str, payload, route_path: str, model_set):
- if not request.method == "POST":
+ if method != "POST":
return
if route_path in (
@@ -323,7 +325,6 @@ async def _handle_payload(self, request: Request, route_path: str, model_set):
EMBEDDING_ROUTE,
CUSTOM_GENERAL_ROUTE,
):
- payload = await request.json()
model = payload.get("model", None)
if model is not None and model not in model_set:
@@ -345,14 +346,18 @@ async def reverse_proxy(self, request: Request):
"""
assert self.client
data = await request.body()
+ if data:
+ payload = orjson.loads(data)
+ else:
+ payload = {}
if LOG_GENERAL:
- logger.debug(f"payload: {data}")
+ logger.debug(f"payload: {payload}")
client_config = self.prepare_client(request, return_origin_header=True)
route_path = client_config["route_path"]
_, model_set = self.handle_authorization(client_config)
- payload = await self._handle_payload(request, route_path, model_set)
+ self._handle_payload(request.method, payload, route_path, model_set)
cached_response, cache_key = get_cached_generic_response(
data, request, route_path
@@ -361,6 +366,37 @@ async def reverse_proxy(self, request: Request):
if cached_response:
return cached_response
+ if CUSTOM_MODEL_CONFIG and route_path in (CHAT_COMPLETION_ROUTE,):
+ prev_model = payload['model']
+ custom_model_map = CUSTOM_MODEL_CONFIG['model_map']
+ current_model = custom_model_map.get(prev_model, prev_model)
+ if current_model in custom_model_map.values():
+ if CUSTOM_MODEL_CONFIG['backend'] == "ollama":
+ api_base = CUSTOM_MODEL_CONFIG['api_base']
+ prev_model = payload['model']
+ payload['model'] = f"ollama_chat/{current_model}"
+ logger.debug(f"{prev_model} -> {payload['model']=}")
+
+ r = await litellm.acompletion(
+ **payload,
+ api_base=api_base,
+ )
+
+ @async_token_rate_limit_auth_level(token_interval_conf, FWD_KEY)
+ async def stream(request: Request):
+ if payload.get("stream", True):
+ async for chunk in r:
+ yield b'data: ' + orjson.dumps(
+ chunk.to_dict()
+ ) + b'\n\n'
+ else:
+ yield orjson.dumps(r.to_dict())
+
+ return StreamingResponse(
+ stream(request),
+ status_code=200,
+ media_type="text/event-stream",
+ )
r = await self.send(client_config, data=data)
return StreamingResponse(
@@ -491,7 +527,7 @@ async def _handle_payload(self, request: Request, route_path: str, model_set):
else:
...
- except Exception as e:
+ except Exception:
logger.warning(
f"log chat error:\nhost:{request.client.host} method:{request.method}: {traceback.format_exc()}"
)
diff --git a/openai_forward/settings.py b/openai_forward/settings.py
deleted file mode 100644
index 648cc72..0000000
--- a/openai_forward/settings.py
+++ /dev/null
@@ -1,210 +0,0 @@
-import itertools
-import os
-
-import limits
-from fastapi import Request
-
-from .console import print_rate_limit_info, print_startup_info
-from .content.config import setting_log
-from .helper import env2dict, env2list, format_route_prefix, get_client_ip
-
-openai_additional_start_info = {}
-general_additional_start_info = {}
-
-TIMEOUT = float(os.environ.get("TIMEOUT", "").strip() or "10")
-DEFAULT_STREAM_RESPONSE = (
- os.environ.get("DEFAULT_STREAM_RESPONSE", "True").strip().lower() == "true"
-)
-
-ITER_CHUNK_TYPE = (
- os.environ.get("ITER_CHUNK_TYPE", "").strip() or "efficiency"
-) # Options: efficiency, one-by-one
-
-CHAT_COMPLETION_ROUTE = (
- os.environ.get("CHAT_COMPLETION_ROUTE", "/v1/chat/completions").strip().lower()
-)
-COMPLETION_ROUTE = os.environ.get("COMPLETION_ROUTE", "/v1/completions").strip().lower()
-EMBEDDING_ROUTE = os.environ.get("EMBEDDING_ROUTE", "/v1/embeddings").strip().lower()
-CUSTOM_GENERAL_ROUTE = os.environ.get("CUSTOM_GENERAL_ROUTE", "").strip().lower()
-
-CACHE_ROUTE_SET = set(env2dict("CACHE_ROUTES", []))
-
-openai_additional_start_info['cache_routes'] = CACHE_ROUTE_SET
-general_additional_start_info['cache_routes'] = CACHE_ROUTE_SET
-
-FORWARD_CONFIG = env2dict(
- "FORWARD_CONFIG",
- [{"base_url": "https://api.openai.com", "route": "/", "type": "openai"}],
-)
-
-ENV_VAR_SEP = ","
-
-OPENAI_BASE_URL = [
- i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'openai'
-]
-OPENAI_ROUTE_PREFIX = [
- format_route_prefix(i['route'])
- for i in FORWARD_CONFIG
- if i and i.get('type') == 'openai'
-]
-
-GENERAL_BASE_URL = [
- i['base_url'] for i in FORWARD_CONFIG if i and i.get('type') == 'general'
-]
-GENERAL_ROUTE_PREFIX = [
- format_route_prefix(i['route'])
- for i in FORWARD_CONFIG
- if i and i.get('type') == 'general'
-]
-
-for openai_route, general_route in zip(OPENAI_ROUTE_PREFIX, GENERAL_ROUTE_PREFIX):
- assert openai_route not in GENERAL_ROUTE_PREFIX
- assert general_route not in OPENAI_ROUTE_PREFIX
-
-BENCHMARK_MODE = os.environ.get("BENCHMARK_MODE", "false").strip().lower() == "true"
-if BENCHMARK_MODE:
- openai_additional_start_info["benchmark_mode"] = BENCHMARK_MODE
-
-PRINT_CHAT = os.environ.get("PRINT_CHAT", "False").strip().lower() == "true"
-
-LOG_OPENAI = os.environ.get("LOG_OPENAI", "False").strip().lower() == "true"
-LOG_GENERAL = os.environ.get("LOG_GENERAL", "False").strip().lower() == "true"
-
-CACHE_OPENAI = os.environ.get("CACHE_OPENAI", "False").strip().lower() == "true"
-CACHE_GENERAL = os.environ.get("CACHE_GENERAL", "False").strip().lower() == "true"
-
-openai_additional_start_info["LOG_OPENAI"] = LOG_OPENAI
-general_additional_start_info["LOG_GENERAL"] = LOG_GENERAL
-
-
-if LOG_OPENAI:
- setting_log(openai_route_prefix=OPENAI_ROUTE_PREFIX, print_chat=PRINT_CHAT)
-
-
-if PRINT_CHAT:
- openai_additional_start_info["print_chat"] = True
-
-
-LOG_CACHE_DB_INFO = (
- os.environ.get("LOG_CACHE_DB_INFO", "false").strip().lower() == "true"
-)
-CACHE_BACKEND = os.environ.get("CACHE_BACKEND", "MEMORY").strip()
-CACHE_ROOT_PATH_OR_URL = os.environ.get("CACHE_ROOT_PATH_OR_URL", ".").strip()
-
-DEFAULT_REQUEST_CACHING_VALUE = False
-if CACHE_OPENAI:
- openai_additional_start_info["cache_backend"] = CACHE_BACKEND
- if not CACHE_BACKEND.lower() == 'memory':
- openai_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL
- DEFAULT_REQUEST_CACHING_VALUE = (
- os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower()
- == "true"
- )
- openai_additional_start_info[
- "default_request_caching_value"
- ] = DEFAULT_REQUEST_CACHING_VALUE
-
-if CACHE_GENERAL:
- general_additional_start_info["cache_backend"] = CACHE_BACKEND
- if not CACHE_BACKEND.lower() == 'memory':
- general_additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL
- DEFAULT_REQUEST_CACHING_VALUE = (
- os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower()
- == "true"
- )
- general_additional_start_info[
- "default_request_caching_value"
- ] = DEFAULT_REQUEST_CACHING_VALUE
-
-IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP)
-IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP)
-
-OPENAI_API_KEY = env2dict("OPENAI_API_KEY_CONFIG")
-FWD_KEY = env2dict("FORWARD_KEY_CONFIG")
-LEVEL_MODELS = {int(key): value for key, value in env2dict("LEVEL_MODELS").items()}
-
-PROXY = os.environ.get("PROXY", "").strip() or None
-
-if PROXY:
- openai_additional_start_info["proxy"] = PROXY
-
-GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or "inf"
-RATE_LIMIT_BACKEND = os.environ.get("REQ_RATE_LIMIT_BACKEND", "").strip() or None
-RATE_LIMIT_STRATEGY = (
- os.environ.get("RATE_LIMIT_STRATEGY", "fixed-window").strip() or "fixed-window"
-)
-req_rate_limit_dict = env2dict('REQ_RATE_LIMIT')
-
-
-def get_limiter_key(request: Request):
- limiter_prefix = f"{request.scope.get('root_path')}{request.scope.get('path')}"
- fk_or_sk = request.headers.get("Authorization", "default")
- key = f"{limiter_prefix},{fk_or_sk}"
- return key
-
-
-def dynamic_request_rate_limit(key: str):
- limite_prefix, fk_or_sk = key.split(',')
- key_level = FWD_KEY.get(fk_or_sk, 0)
- for route in req_rate_limit_dict:
- if key.startswith(route):
- for level_dict in req_rate_limit_dict[route]:
- if level_dict['level'] == key_level:
- return level_dict['limit']
-
- break
- return GLOBAL_RATE_LIMIT
-
-
-def cvt_token_rate_to_interval(token_rate_limit: str):
- if token_rate_limit:
- rate_limit_item = limits.parse(token_rate_limit)
- token_interval = (
- rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds
- ) / rate_limit_item.amount
- else:
- token_interval = 0
- return token_interval
-
-
-token_rate_limit_conf = env2dict("TOKEN_RATE_LIMIT")
-token_interval_conf = {}
-for route, rate_limit_list in token_rate_limit_conf.items():
- token_interval_conf.setdefault(route, {})
- for level_dict in rate_limit_list:
- token_interval_conf[route][level_dict['level']] = cvt_token_rate_to_interval(
- level_dict['limit']
- )
-
-styles = itertools.cycle(
- ["#7CD9FF", "#BDADFF", "#9EFFE3", "#f1b8e4", "#F5A88E", "#BBCA89"]
-)
-
-
-def show_startup():
- for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX):
- print_startup_info(
- base_url,
- route_prefix,
- OPENAI_API_KEY,
- FWD_KEY,
- style=next(styles),
- **openai_additional_start_info,
- )
- for base_url, route_prefix in zip(GENERAL_BASE_URL, GENERAL_ROUTE_PREFIX):
- print_startup_info(
- base_url,
- route_prefix,
- "",
- "",
- style=next(styles),
- **general_additional_start_info,
- )
-
- print_rate_limit_info(
- RATE_LIMIT_BACKEND,
- RATE_LIMIT_STRATEGY,
- GLOBAL_RATE_LIMIT,
- req_rate_limit_dict,
- token_rate_limit_conf,
- )
diff --git a/openai_forward/webui/run.py b/openai_forward/webui/run.py
index 50d4547..4294f4c 100644
--- a/openai_forward/webui/run.py
+++ b/openai_forward/webui/run.py
@@ -1,4 +1,5 @@
import ast
+import os
import pickle
import secrets
import threading
@@ -85,43 +86,31 @@ def worker(log_socket: zmq.Socket, q: SimpleQueue):
"Apply and Restart", help="Saving configuration and reloading openai forward"
):
with st.spinner("Saving configuration and reloading openai forward..."):
- env_dict = config.convert_to_env(set_env=False)
+ # env_dict = config.convert_to_env(set_env=False)
socket = st.session_state['socket']
- socket.send(pickle.dumps(env_dict))
+ socket.send(pickle.dumps(config.to_dict()))
message: bytes = socket.recv()
st.success(message.decode())
- def generate_env_content():
- env_dict = config.convert_to_env(set_env=False)
- env_content = "\n".join([f"{key}={value}" for key, value in env_dict.items()])
- return env_content
-
- if st.button("Save to .env", help="Saving configuration to .env file"):
- with st.spinner("Saving configuration to .env file."):
- with open(".env", "w") as f:
- f.write(generate_env_content())
- st.success("Configuration saved to .env file")
-
if st.button(
- "Export to .env file",
+ "Export to config.yaml",
):
- # Deferred data for download button: https://github.com/streamlit/streamlit/issues/5053
+ yaml_str = yaml.dump(config.to_dict(), default_flow_style=False)
+ yaml_bytes = yaml_str.encode('utf-8')
download = st.download_button(
use_container_width=True,
label="Export",
- data=generate_env_content(),
- file_name="config.env",
+ data=yaml_bytes,
+ file_name="config.yaml",
mime="text/plain",
)
def display_forward_configuration():
- forward_config = config.forward
-
st.subheader("AI Forward")
with st.form("forward_configuration", border=False):
- df = pd.DataFrame([i.to_dict() for i in forward_config.forward])
+ df = pd.DataFrame([i.to_dict() for i in config.forward])
edited_df = st.data_editor(
df, num_rows="dynamic", key="editor1", use_container_width=True
)
@@ -155,7 +144,7 @@ def display_forward_configuration():
submitted = st.form_submit_button("Save", use_container_width=True)
if submitted:
- forward_config.forward = [
+ config.forward = [
ForwardItem(row["base_url"], row["route"], row["type"])
for i, row in edited_df.iterrows()
if row["route"] is not None and row["base_url"] is not None
@@ -218,11 +207,9 @@ def display_api_key_configuration():
'gpt-4-0125-preview',
'gpt-4-0613',
'gpt-4-1106-preview',
- 'gpt-4-1106-vision-preview',
'gpt-4-turbo',
'gpt-4-turbo-2024-04-09',
'gpt-4-turbo-preview',
- 'gpt-4-vision-preview',
'gpt-4o',
'gpt-4o-2024-05-13',
'gpt-4o-mini',
@@ -262,9 +249,26 @@ def display_api_key_configuration():
with st.form("api_key_form", border=False):
st.subheader("OpenAI API Key")
+
+ def to_list(x: str):
+ x = str(x).replace(',', ',').strip()
+ if x == '':
+ return []
+ try:
+ x = ast.literal_eval(x)
+ if isinstance(x, list):
+ return x
+ if isinstance(x, tuple):
+ return list(x)
+ else:
+ return [x]
+ except:
+ return str(x).split(',')
+
+ to_int_list = lambda x: [int(i) for i in x]
df = pd.DataFrame(
[
- {'api_key': key, 'level': value}
+ {'api_key': key, 'level': str(value)}
for key, value in api_key.openai_key.items()
]
)
@@ -275,7 +279,7 @@ def display_api_key_configuration():
st.subheader("Forward Key")
df2 = pd.DataFrame(
[
- {'api_key': key, 'level': value}
+ {'level': int(key), 'api_key': str(value)}
for key, value in api_key.forward_key.items()
]
)
@@ -286,11 +290,13 @@ def display_api_key_configuration():
submitted = st.form_submit_button("Save", use_container_width=True)
if submitted:
api_key.openai_key = {
- row["api_key"]: row["level"] for i, row in edited_df.iterrows()
+ row["api_key"]: to_int_list(to_list(row["level"]))
+ for i, row in edited_df.iterrows()
}
api_key.forward_key = {
- row["api_key"]: row["level"] for i, row in edited_df2.iterrows()
+ int(row["level"]): to_list(row["api_key"])
+ for i, row in edited_df2.iterrows()
}
api_key.level = level_model_map
@@ -304,14 +310,14 @@ def display_cache_configuration():
with st.container():
st.subheader("Cache Configuration")
- cache_openai = st.checkbox("Cache OpenAI route", cache.cache_openai)
+ cache_openai = st.checkbox("Cache OpenAI route", cache.openai)
cache_default_request_caching_value = st.checkbox(
"For OpenAI API, return using cache by default",
cache.default_request_caching_value,
disabled=not cache_openai,
)
- cache_general = st.checkbox("Cache General route", cache.cache_general)
+ cache_general = st.checkbox("Cache General route", cache.general)
cache_backend = st.selectbox(
"Cache Backend",
@@ -325,21 +331,21 @@ def display_cache_configuration():
disabled=cache_backend == "MEMORY",
)
- df = pd.DataFrame([{"cache_route": i} for i in cache.cache_routes])
+ df = pd.DataFrame([{"cache_route": i} for i in cache.routes])
edited_df = st.data_editor(
df, num_rows="dynamic", key="editor1", use_container_width=True
)
submitted = st.button("Save", use_container_width=True)
if submitted:
- cache.cache_openai = cache_openai
- cache.cache_general = cache_general
+ cache.openai = cache_openai
+ cache.general = cache_general
cache.backend = cache_backend
cache.root_path_or_url = cache_root_path_or_url
cache.default_request_caching_value = cache_default_request_caching_value
- cache.cache_routes = [
+ cache.routes = [
row['cache_route']
for i, row in edited_df.iterrows()
if row["cache_route"] is not None
diff --git a/pyproject.toml b/pyproject.toml
index 8c0d8af..e641b25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
"tomli",
"tomli-w",
"pyzmq",
+ "pyyaml",
]
dynamic = ["version"]
@@ -78,25 +79,35 @@ aifd = "openai_forward.__main__:main"
[tool.hatch.version]
path = "openai_forward/__init__.py"
-[tool.isort]
-profile = "black"
-
-[tool.black]
-line-length = 88
-exclude = '''
-/(
- \.git
- | \.hg
- | \.mypy_cache
- | \.tox
- | \.venv
- | _build
- | buck-out
- | build
- | dist
-)/
-'''
+#[tool.isort]
+#profile = "black"
+#
+#[tool.black]
+#line-length = 88
+#exclude = '''
+#/(
+# \.git
+# | \.hg
+# | \.mypy_cache
+# | \.tox
+# | \.venv
+# | _build
+# | buck-out
+# | build
+# | dist
+#)/
+#'''
+[tool.ruff]
+line-length = 100
+select = [ # 选择的规则
+ "F",
+ "E",
+ "W",
+ "UP",
+ "PERF",
+]
+ignore = ["F401"] # 忽略的规则
[tool.hatch.build]
include = [
@@ -107,3 +118,7 @@ exclude = [
[tool.hatch.build.targets.wheel]
packages = ["openai_forward"]
+
+[tool.rye]
+managed = true
+dev-dependencies = []
\ No newline at end of file
diff --git a/tests/test_forwarding.py b/tests/test_forwarding.py
index 34cda5d..48ed2f4 100644
--- a/tests/test_forwarding.py
+++ b/tests/test_forwarding.py
@@ -1,5 +1,5 @@
import importlib
-from unittest.mock import Mock, patch
+from unittest.mock import Mock
import pytest
from fastapi import Request
@@ -11,6 +11,7 @@
params=[
{
"FWD_KEY": {'fk0': 0, 'fk1': 1, 'fk2': 2},
+ # "FWD_KEY": {0:["fk-0"], 1:["fk-1"], 2: ["fk-2"]},
"OPENAI_API_KEY": {'sk1': [0, 1], 'sk2': [1], 'sk3': [2], 'sk4': [0]},
"LEVEL_MODELS": {
1: ['gpt-3.5-turbo', 'text-embedding-3-small'],
@@ -21,7 +22,7 @@
]
)
def openai_forward(request):
- from openai_forward import settings
+ from openai_forward.config import settings
settings.FWD_KEY = request.param['FWD_KEY']
settings.OPENAI_API_KEY = request.param['OPENAI_API_KEY']
diff --git a/tests/test_settings.py b/tests/test_settings.py
index f7e72b3..63f6a4f 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -1,4 +1,4 @@
-from openai_forward.settings import (
+from openai_forward.config.settings import (
CACHE_ROUTE_SET,
FORWARD_CONFIG,
FWD_KEY,