feat: Token rate limit on specified routes (#65)

- Configurable token rate limit - Fix startup info - More elegant chat log output
KenyonY · Aug 23, 2023 · adebb24 · adebb24
1 parent ab9cdde
commit adebb24
Show file tree

Hide file tree

Showing 14 changed files with 364 additions and 250 deletions.
diff --git a/.env b/.env
@@ -3,6 +3,8 @@
 # `LOG_CHAT`: 是否记录日志
 LOG_CHAT=false
 
+PRINT_CHAT=false
+
 # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址，允许指定多个, 以逗号隔开。
 # 如果指定超过一个，则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/
 OPENAI_BASE_URL=https://api.openai.com
@@ -18,26 +20,26 @@ EXTRA_BASE_URL=
 # `EXTRA_ROUTE_PREFIX`: 与 EXTRA_BASE_URL 匹配的路由前缀
 EXTRA_ROUTE_PREFIX=
 
-# `ROUTE_RATE_LIMIT`: i.e. RPM 对指定路由的请求速率限制, 区分用户
+# `REQ_RATE_LIMIT`: i.e. 对指定路由的请求速率限制, 区分用户
 # format: {route: ratelimit-string}
 # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
-ROUTE_RATE_LIMIT={"/healthz": "60/2minutes", "/v1/chat/completions": "15/minute;200/hour"}
+REQ_RATE_LIMIT={"/healthz":"100/2minutes","/v1/chat/completions":"60/minute;600/hour"}
 
-# `GLOBAL_RATE_LIMIT`: 所有`ROUTE_RATE_LIMIT`没有指定的路由. 不填默认无限制
-GLOBAL_RATE_LIMIT=30/minute
+# `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
+GLOBAL_RATE_LIMIT=
 
 #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
 # `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost.
 RATE_LIMIT_STRATEGY=moving-window
 
 
-# TPM: 返回的token速率限制
-TOKEN_RATE_LIMIT=50/second
+# 返回的token速率限制
+TOKEN_RATE_LIMIT={"/v1/chat/completions":"40/second"}
 
 
 TIMEOUT=300
 
 IP_BLACKLIST=
 
 # 设定时区
-TZ=Asia/Shanghai
+TZ=Asia/Shanghai
diff --git a/.env.example b/.env.example
@@ -20,16 +20,16 @@ EXTRA_BASE_URL='http://localhost:8882, http://localhost:8881'
 # EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀
 EXTRA_ROUTE_PREFIX='/tts, /translate'
 
-# `ROUTE_RATE_LIMIT`: 指定路由的请求速率限制（区分用户）
+# `REQ_RATE_LIMIT`: 指定路由的请求速率限制（区分用户）
 # format: {route: ratelimit-string}
 # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
-ROUTE_RATE_LIMIT='{
+REQ_RATE_LIMIT='{
 "/healthz": "50/3minutes",
 "/openai/v1/chat/completions": "1/10seconds",
 "/localai/v1/chat/completions": "2/second"
 }'
 
-# `GLOBAL_RATE_LIMIT`: 所有`ROUTE_RATE_LIMIT`没有指定的路由. 不填默认无限制
+# `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
 GLOBAL_RATE_LIMIT=2/5seconds
 
 #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html
@@ -40,7 +40,7 @@ RATE_LIMIT_STRATEGY=fixed-window
 PROXY=http://localhost:7890
 
 # `TOKEN_RATE_LIMIT` 对每一份流式返回的token速率限制 (注：这里的token并不严格等于gpt中定义的token，而是SSE的chunk)
-TOKEN_RATE_LIMIT=16/second
+TOKEN_RATE_LIMIT={"/v1/chat/completions":"20/second"}
 
 
 TIMEOUT=300

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 <h1 align="center">
     <br>
-    OpenAI forward
+    OpenAI Forward
     <br>
 </h1>
 <p align="center">
@@ -57,7 +57,7 @@ OpenAI-Forward是大模型与用户层之间的一道转发服务，
    <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
 </a>
 
-### 特点
+## 特点
 
 OpenAI-Forward支持以下功能:
 
@@ -74,8 +74,8 @@ OpenAI-Forward支持以下功能:
 > https://api.openai-forward.com  
 > https://render.openai-forward.com
 
-<font size=3>
-注：本项目中提供的所有代理服务仅供学习使用，请勿用作其它用途。
+<font size=2 >
+注：这里提供的代理服务仅供学习使用。
 </font>
 
 
@@ -97,7 +97,7 @@ pip install openai-forward
 ```bash
 aifd run
 ```
-如果一切正常将会看到下面的启动信息  
+如果读入了根路径的`.env`的配置, 将会看到以下启动信息
 
 ```bash
 ❯ aifd run
@@ -107,16 +107,13 @@ aifd run
 │  route prefix     /                                │
 │  api keys         False                            │
 │  forward keys     False                            │
-│  Log chat         False                            │
 ╰────────────────────────────────────────────────────╯
 ╭──────────── ⏱️ Rate Limit configuration ───────────╮
 │                                                    │
-│  strategy                   moving-window          │
-│  /healthz                   60/2minutes            │
-│  /v1/chat/completions       15/minute;200/hour     │
-│  global_rate_limit          30/minute              │
-│  token_rate_limit           50/second              │
-│  token_interval_time        0.0200s                │
+│  strategy               moving-window              │
+│  /healthz               100/2minutes (req)         │
+│  /v1/chat/completions   60/minute;600/hour (req)   │
+│  /v1/chat/completions   40/second (token)          │
 ╰────────────────────────────────────────────────────╯
 INFO:     Started server process [33811]
 INFO:     Waiting for application startup.
@@ -201,11 +198,12 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 
 与 [LocalAI](https://github.com/go-skynet/LocalAI)，
 [api-for-open-llm](https://github.com/xusenlinzy/api-for-open-llm)等
-一起使用，赋予这些服务接口的RPM限制，TPM限制，日志等能力。  
+一起使用，赋予这些服务接口的用户请求速率限制，token输出速率限制，对话日志输出等能力。  
 
 以LocalAI为例：  
 假设部署的LocalAI服务运行在 `http://localhost:8080`，
-那么接下来只需修改.env配置中`OPENAI_BASE_URL=http://localhost:8080` 就可以完成对LocalAI的代理。
+那么接下来只需修改环境变量(或[.env](.env)文件)中`OPENAI_BASE_URL=http://localhost:8080` 就可以完成对LocalAI的代理，
+然后即可在`aifd`的默认服务端口`http://localhost:8000`中访问LocalAI.
 
 (待补充)
 
@@ -215,6 +213,11 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 将claude的api格式对齐为openai的格式，然后使用`openai-forward`进行代理。
 (待补充)
 
+<a>
+   <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
+</a>
+
+
 ## 配置
 
 ### 命令行参数
@@ -247,10 +250,10 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
 | FORWARD_KEY         | 允许调用方使用该key代替openai api key，支持多个forward key, 以逗号分隔; 如果设置了OPENAI_API_KEY，而没有设置FORWARD_KEY, 则客户端调用时无需提供密钥, 此时出于安全考虑不建议FORWARD_KEY置空 |           无            |
 | EXTRA_BASE_URL      | 额外转发服务地址                                                                                                                          |           无            |
 | EXTRA_ROUTE_PREFIX  | 额外转发服务路由前缀                                                                                                                        |           无            |
-| ROUTE_RATE_LIMIT    | 指定路由的请求速率限制（区分用户）                                                                                                                 |           无            |
-| GLOBAL_RATE_LIMIT   | 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制                                                                                                    |           无            |
+| REQ_RATE_LIMIT      | 指定路由的请求速率限制（区分用户）                                                                                                                 |           无            |
+| GLOBAL_RATE_LIMIT   | 所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制                                                                                                |           无            |
 | RATE_LIMIT_STRATEGY | 速率限制策略(fixed-window, fixed-window-elastic-expiry, moving-window)                                                                  |           无            |
-| TOKEN_RATE_LIMIT    | 对每一份流式返回的token速率限制 (这里的token并不严格等于gpt中定义的token，而是SSE的chunk)                                                                                                                         |           无            |
+| TOKEN_RATE_LIMIT    | 对每一份流式返回的token速率限制 (这里的token并不严格等于gpt中定义的token，而是SSE的chunk)                                                                       |           无            |
 | PROXY               | http代理                                                                                                                            |           无            |
 | LOG_CHAT            | 是否记录聊天内容                                                                                                                          |        `false`         |
 

diff --git a/deploy.md b/deploy.md
@@ -51,7 +51,7 @@ pip install openai-forward
 aifd run   
 ```
 服务就搭建完成了。  
-配置见[配置](README.md#配置选项)
+配置见[配置](README.md#配置)
 
 ### 服务调用
 
@@ -147,7 +147,6 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例
 
 注： Railway 每月提供 $5.0和500小时执行时间的免费计划。这意味着单个免费用户每个月只能使用大约21天
 
-> https://railway.openai-forward.com
 
 <a>
    <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
@@ -167,8 +166,6 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例
 2. [绑定自定义域名](https://vercel.com/docs/concepts/projects/domains/add-a-domain)：Vercel 分配的DNS在某些区域被污染了导致国内无法访问，绑定自定义域名即可直连。
 
 
-> https://vercel.openai-forward.com  
-
 
 <a>
    <img src="https://raw.githubusercontent.com/beidongjiedeguang/openai-forward/main/.github/images/separators/aqua.png" height=8px width="100%">
@@ -195,6 +192,4 @@ stateDiagram-v2
 ```
 这种部署方式轻便简洁，支持流式转发. 不过目前[_worker.js](_worker.js)这个简单脚本仅提供转发服务, 不支持额外功能。
 
-> https://cloudflare.worker.openai-forward.com  
-> https://cloudflare.page.openai-forward.com  
 > https://openai-forward-9ak.pages.dev (这是cloudflare pages自动分配的域名，目前可以直接访问)
diff --git a/openai_forward/app.py b/openai_forward/app.py
@@ -5,8 +5,9 @@
 from .forwarding import fwd_anything_objs, fwd_openai_objs
 from .forwarding.settings import (
     RATE_LIMIT_STRATEGY,
-    dynamic_rate_limit,
+    dynamic_request_rate_limit,
     get_limiter_key,
+    show_startup,
 )
 
 limiter = Limiter(key_func=get_limiter_key, strategy=RATE_LIMIT_STRATEGY)
@@ -23,16 +24,18 @@
     response_description="Return HTTP Status Code 200 (OK)",
     status_code=status.HTTP_200_OK,
 )
-@limiter.limit(dynamic_rate_limit)
+@limiter.limit(dynamic_request_rate_limit)
 def healthz(request: Request):
     return "OK"
 
 
 add_route = lambda obj: app.add_route(
     obj.ROUTE_PREFIX + "{api_path:path}",
-    limiter.limit(dynamic_rate_limit)(obj.reverse_proxy),
+    limiter.limit(dynamic_request_rate_limit)(obj.reverse_proxy),
     methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"],
 )
 
 [add_route(obj) for obj in fwd_openai_objs()]
 [add_route(obj) for obj in fwd_anything_objs()]
+
+show_startup()
diff --git a/openai_forward/cli.py b/openai_forward/cli.py
@@ -0,0 +1,89 @@
+from rich import print
+from rich.panel import Panel
+from rich.table import Table
+
+
+def print_startup_info(base_url, route_prefix, api_key, fwd_key, /, style, **kwargs):
+    """
+    Prints the startup information of the application.
+    """
+    try:
+        from dotenv import load_dotenv
+
+        load_dotenv(".env")
+    except Exception:
+        ...
+    route_prefix = route_prefix or "/"
+    if not isinstance(api_key, str):
+        api_key = True if len(api_key) else False
+    if not isinstance(fwd_key, str):
+        fwd_key = True if len(fwd_key) else False
+    table = Table(title="", box=None, width=50)
+
+    matrcs = {
+        "base url": {
+            'value': base_url,
+        },
+        "route prefix": {
+            'value': route_prefix,
+        },
+        "api keys": {
+            'value': str(api_key),
+        },
+        "forward keys": {
+            'value': str(fwd_key),
+            'style': "#62E883" if fwd_key or not api_key else "red",
+        },
+    }
+    table.add_column("", justify='left', width=10)
+    table.add_column("", justify='left')
+    for key, value in matrcs.items():
+        table.add_row(key, value['value'], style=value.get('style', style))
+    for key, value in kwargs.items():
+        table.add_row(key, str(value), style=style)
+
+    print(Panel(table, title="🤗 openai-forward is ready to serve! ", expand=False))
+
+
+def print_rate_limit_info(
+    strategy: str,
+    global_req_rate_limit: str,
+    req_rate_limit: dict,
+    token_rate_limit: dict,
+    **kwargs,
+):
+    """
+    Print rate limit information.
+
+    Args:
+        strategy (str): The strategy used for rate limiting.
+        global_req_rate_limit (str): The global request rate limit.
+        req_rate_limit (dict): A dictionary of request rate limit.
+        token_rate_limit (dict): A dictionary of token rate limit.
+        **kwargs: Other limits info.
+
+    Returns:
+        None
+    """
+    table = Table(title="", box=None, width=50)
+    table.add_column("")
+    table.add_column("", justify='left')
+    if strategy:
+        table.add_row("strategy", strategy, style='#7CD9FF')
+
+    if global_req_rate_limit:
+        table.add_row(
+            "global rate limit", f"{global_req_rate_limit} (req)", style='#C5FF95'
+        )
+    for key, value in req_rate_limit.items():
+        table.add_row(key, f"{value} (req)", style='#C5FF95')
+
+    for key, value in token_rate_limit.items():
+        if isinstance(value, float):
+            value = f"{value:.4f} s/token"
+        table.add_row(key, f"{value} (token)", style='#C5FF95')
+
+    for key, value in kwargs.items():
+        table.add_row(key, str(value), style='#C5FF95')
+
+    print(Panel(table, title="⏱️ Rate Limit configuration", expand=False))