KenyonY · KenyonY · Jul 21, 2024 · Jul 21, 2024
diff --git a/.env b/.env
@@ -21,10 +21,17 @@ DEFAULT_REQUEST_CACHING_VALUE=false
 BENCHMARK_MODE=true
 
 FORWARD_CONFIG=[{"base_url":"https://api.openai.com","route":"/","type":"openai"}]
-
-#LEVEL_MODELS={"1": ["gpt-4"], "2": ["gpt-3.5-turbo"]}
-#OPENAI_API_KEY_CONFIG={"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}
-#FORWARD_KEY_CONFIG={"fk-0": 0, "fk-1": 1, "fk-2": 2, "default": 1}
+#FORWARD_CONFIG=[{"base_url":"https://api.deepseek.com","route":"/","type":"openai"}]
+#FORWARD_CONFIG=[{"base_url":"http://localhost:3000","route":"/","type":"general"}]
+#CUSTOM_MODEL_CONFIG='{
+#"backend":"ollama",
+#"model_map": {"gpt-3.5-turbo":"qwen2:7b"},
+#"api_base": "http://localhost:11434"
+#}'
+
+#LEVEL_MODELS='{"1":["gpt-4"],"2":["gpt-3.5-turbo"]}'
+#OPENAI_API_KEY_CONFIG='{"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}'
+#FORWARD_KEY_CONFIG='{"0": ["fk-0"], "1":["fk-1", "fk-11"], "2": ["fk-2"]}'
 
 # `REQ_RATE_LIMIT`: i.e., Request rate limit for specified routes, user specific
 # format: {route: ratelimit-string}
@@ -46,7 +53,7 @@ RATE_LIMIT_STRATEGY=moving-window
 
 # Rate limit for returned tokens
 TOKEN_RATE_LIMIT='{
-"/v1/chat/completions":[{"level":0,"limit":"60/second"}],
+"/v1/chat/completions":[{"level":0,"limit":"100/second"}],
 "/v1/completions":[{"level":0,"limit":"60/second"}],
 "/benchmark/v1/chat/completions":[{"level":0,"limit":"20/second"}]
 }'

diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ run.sh
 ssl/
 chat.yaml
 chat_*.yaml
+openai-forward-config.yaml.*
 
 config.toml
 config_parser.py

diff --git a/Examples/chat_completion.py b/Examples/chat_completion.py
@@ -18,14 +18,25 @@
 
 max_tokens = None
 
-# user_content = """
-# 用c实现目前已知最快平方根算法
-# """
-# user_content = '最初有1000千克的蘑菇，其中99%的成分是水。经过几天的晴天晾晒后，蘑菇中的水分含量现在是98%，蘑菇中减少了多少水分？'
-# user_content = "Write down the most romantic sentence you can think of."
-user_content = "光散射中的Mie理论的理论公式是怎样的？请用latex语法表示它公式使用$$符号包裹。"
-
-model = "gpt-3.5-turbo"
+
+queries = {
+    0: "用c实现目前已知最快平方根导数算法",
+    1: "既然快递要 3 天才到，为什么不把所有的快递都提前 3 天发？",
+    2: "只切一刀，如何把四个橘子分给四个小朋友？",
+    3: "最初有1000千克的蘑菇，其中99%的成分是水。经过几天的晴天晾晒后，蘑菇中的水分含量现在是98%，蘑菇中减少了多少水分？",
+    4: "哥哥弟弟百米赛跑，第一次从同一起点起跑，哥哥到达终点时领先弟弟一米获胜，第二次哥哥从起点后退一米处开始起跑，问结果如何?",
+    5: "光散射中的Mie理论的理论公式是怎样的？请用latex语法表示它公式使用$$符号包裹。",
+    6: "Write down the most romantic sentence you can think of.",
+    7: "为什么我爸妈结婚的时候没邀请我参加婚礼？",
+    8: "一个人自杀了，这个世界上是多了一个自杀的人，还是少了一个自杀的人",
+}
+
+user_content = queries[8]
+
+# model = "gpt-3.5-turbo"
+model = "gpt-4o-mini"
+# model = "deepseek-chat"
+# model="gpt-4o"
 # model="gpt-4"
 
 mt = MeasureTime().start()

diff --git a/deploy.md b/deploy.md
@@ -6,14 +6,10 @@
 </h1>
 <div align="center">
 
-一键部署至render   
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward)
-
 
 
 [pip部署](#pip部署) |
-[docker部署](#docker部署) |
-[render一键部署](#render-一键部署) 
+[docker部署](#docker部署) 
 
 </div>
 
@@ -22,14 +18,14 @@
 1. [pip 安装部署](deploy.md#pip部署)
 2. [Docker部署](deploy.md#docker部署)
 
-**一键免费云平台部署**
+**~~一键免费云平台部署~~**
 
-1. [Render一键部署](deploy.md#render-一键部署)
-2. 更多部署： https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md
+1. ~~[Render一键部署](deploy.md#render-一键部署)~~
+2. ~~更多部署： https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md~~
 
 **其它反代**  
 [CloudFlare AI Gateway](https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/)  
-[ChatGPT](https://github.com/pandora-next/deploy)  
+~~[ChatGPT](https://github.com/pandora-next/deploy)~~
 
 
 ---
@@ -113,23 +109,3 @@ aifd run
 <a>
    <img src="https://raw.githubusercontent.com/KenyonY/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
 </a>
-
-## Render 一键部署
-[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward)
-
-Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问！
-
-1. 点击一键部署按钮  
-   也可先fork本仓库 -->到Render的Dashboard上 New Web Services --> Connect 到刚刚fork到仓库 后面步骤均默认即可
-2. 填写环境变量，`openai-forward`所有配置都可以通过环境变量设置，可以根据自己需要填写。
-
-然后等待部署完成即可。  
-Render的免费计划: 每月750小时免费实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长.
-
-注：默认render在15分钟内没有服务请求时会自动休眠(好处是休眠后不会占用750h的免费实例时间)，休眠后下一次请求会被阻塞 ~15s。
-如果希望服务15分钟不自动休眠，可以使用定时脚本（如每14分钟）对render服务进行保活。保活脚本参考`scripts/keep_render_alive.py`.    
-如果希望零停机部署可以在render设置中配置 `Health Check Path`为`/healthz`   
-
-> https://render.openai-forward.com  
-> https://openai-forward.onrender.com 
-
diff --git a/deploy_en.md b/deploy_en.md
@@ -76,12 +76,6 @@ proxy_buffering off;
 docker run -d -p 8000:8000 beidongjiedeguang/openai-forward:latest 
 ```
 
-If the `.env` environment variable is specified:
-
-```bash
-docker run --env-file .env -d -p 8000:8000 beidongjiedeguang/openai-forward:latest 
-```
-
 This will map the host's 8000 port. Access the service via `http://{ip}:8000`.  
 The log path inside the container is `/home/openai-forward/Log/`. It can be mapped when starting up.
 

diff --git a/openai-forward-config.example.yaml b/openai-forward-config.example.yaml
@@ -0,0 +1,92 @@
+log:
+  general: true
+  openai: true
+
+cache:
+  general: true
+  openai: true
+  routes:
+    - "/v1/chat/completions"
+    - "/v1/embeddings"
+  # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
+  backend: MEMORY
+  root_path_or_url: "./FLAXKV_DB"
+  default_request_caching_value: false
+
+chat_completion_route: "/v1/chat/completions"
+# custom_general_route: "/v1/models/gemini-pro"
+
+benchmark_mode: true
+
+forward:
+  - base_url: "https://api.openai.com"
+    route: "/"
+    type: "openai"
+
+  - base_url: "https://generativelanguage.googleapis.com"
+    route: "/gemini"
+    type: "general"
+
+# custom_model_config:
+#   backend: "ollama"
+#   model_map:
+#     gpt-3.5-turbo: "qwen2:7b"
+#   api_base: "http://localhost:11434"
+
+api_key:
+  level:
+    1: ["gpt-4"]
+    2: ["gpt-3.5-turbo"]
+
+  openai_key:
+    "sk-xxx1": [0]
+    "sk-xxx2": [1]
+    "sk-xxx3": [1, 2]
+
+  forward_key:
+    0: ["fk-0"]
+    1: ["fk-1", "fk-11"]
+    2: ["fk-2"]
+
+rate_limit:
+  global_rate_limit: "200/minute"
+  strategy: "moving-window"
+  iter_chunk: "one-by-one"
+  req_rate_limit:
+    - route: "/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "100/2minutes"
+
+    - route: "/v1/completions"
+      value:
+      - level: 0
+        limit: "60/minute;600/hour"
+  req_rate_limit_backend: "redis://localhost:6379"
+
+  token_rate_limit:
+    - route: "/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "100/second"
+    - route: "/v1/completions"
+      value:
+      - level: 0
+        limit: "60/second"
+    - route: "/benchmark/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "20/second"
+
+timeout: 6
+
+ip_blacklist:
+ip_whitelist:
+
+webui_restart_port: 15555
+webui_log_port: 15556
+
+proxy:
+default_stream_response: true
+
+tz: Asia/Shanghai
diff --git a/openai-forward-config.yaml b/openai-forward-config.yaml
@@ -0,0 +1,92 @@
+log:
+  general: true
+  openai: true
+
+cache:
+  general: true
+  openai: true
+  routes:
+    - "/v1/chat/completions"
+    - "/v1/embeddings"
+  # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
+  backend: MEMORY
+  root_path_or_url: "./FLAXKV_DB"
+  default_request_caching_value: false
+
+chat_completion_route: "/v1/chat/completions"
+# custom_general_route: "/v1/models/gemini-pro"
+
+benchmark_mode: true
+
+forward:
+  - base_url: "https://api.openai.com"
+    route: "/"
+    type: "openai"
+
+  - base_url: "https://generativelanguage.googleapis.com"
+    route: "/gemini"
+    type: "general"
+
+# custom_model_config:
+#   backend: "ollama"
+#   model_map:
+#     gpt-3.5-turbo: "qwen2:7b"
+#   api_base: "http://localhost:11434"
+
+api_key:
+  level:
+    1: ["gpt-4"]
+    2: ["gpt-3.5-turbo"]
+
+  openai_key:
+    "sk-xxx1": [0]
+    "sk-xxx2": [1]
+    "sk-xxx3": [1, 2]
+
+  forward_key:
+    0: ["fk-0"]
+    1: ["fk-1", "fk-11"]
+    2: ["fk-2"]
+
+rate_limit:
+  global_rate_limit: "200/minute"
+  strategy: "moving-window"
+  iter_chunk: "one-by-one"
+  req_rate_limit:
+    - route: "/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "100/2minutes"
+
+    - route: "/v1/completions"
+      value:
+      - level: 0
+        limit: "60/minute;600/hour"
+  req_rate_limit_backend: "redis://localhost:6379"
+
+  token_rate_limit:
+    - route: "/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "60/second"
+    - route: "/v1/completions"
+      value:
+      - level: 0
+        limit: "60/second"
+    - route: "/benchmark/v1/chat/completions"
+      value:
+      - level: 0
+        limit: "20/second"
+
+timeout: 6
+
+ip_blacklist:
+ip_whitelist:
+
+webui_restart_port: 15555
+webui_log_port: 15556
+
+proxy:
+default_stream_response: true
+
+tz: Asia/Shanghai
diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py
@@ -1,6 +1,5 @@
-__version__ = "0.8.1"
+__version__ = "0.8.2-alpha"
 
 from dotenv import load_dotenv
-from yaml import load
 
 load_dotenv('.env', override=False)
diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py
@@ -1,12 +1,20 @@
 import atexit
+import datetime
 import os
 import pickle
 import platform
 import signal
 import subprocess
+from pathlib import Path
 
 import fire
 import uvicorn
+import yaml
+
+
+def save_yaml(path: Path, data: dict):
+    with open(path, 'w') as f:
+        yaml.dump(data, f)
 
 
 class Cli:
@@ -106,10 +114,16 @@ def mq_worker(log_socket: zmq.Socket):
 
             while True:
                 message = socket.recv()
-                env_dict: dict = pickle.loads(message)
+                config_dict: dict = pickle.loads(message)
+                config_path = Path("openai-forward-config.yaml")
+                # backup
+                time_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+                backup_path = Path(f"openai-forward-config.yaml.{time_str}.bak")
+                if config_path.exists():
+                    # rename openai-forward-config.yaml to openai-forward-config.yaml.bak
+                    config_path.rename(backup_path)
 
-                for key, value in env_dict.items():
-                    os.environ[key] = value
+                save_yaml(config_path, config_dict)
 
                 self._restart_uvicorn(
                     port=port,
@@ -208,8 +222,8 @@ def convert(log_folder: str = None, target_path: str = None):
         Returns:
             None
         """
+        from openai_forward.config.settings import OPENAI_ROUTE_PREFIX
         from openai_forward.helper import convert_folder_to_jsonl, route_prefix_to_str
-        from openai_forward.settings import OPENAI_ROUTE_PREFIX
 
         print(60 * '-')
         if log_folder is None: