diff --git a/.env b/.env index 27c8f1a..f2ce2f9 100644 --- a/.env +++ b/.env @@ -5,6 +5,7 @@ LOG_CHAT=true # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB) CACHE_BACKEND=LMDB +CACHE_ROOT_PATH_OR_URL=./FLAXKV_DB CACHE_CHAT_COMPLETION=true DEFAULT_REQUEST_CACHING_VALUE=false diff --git a/README.md b/README.md index edc7ffe..b927139 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,10 @@ OpenAI-Forward 提供以下核心功能: - **全能转发**:可转发几乎所有类型的请求 -- **性能优先**:拥有出色的异步性能 +- **性能优先**:出色的异步性能 - **缓存AI预测**:对AI预测进行缓存,加速服务访问并节省费用 -- **用户流量控制**:自定义请求与Token速率 -- **实时响应日志**:优化调用链的可观察性 +- **用户流量控制**:自定义请求速率与Token速率 +- **实时响应日志**:提升LLMs可观察性 - **自定义秘钥**:替代原始API密钥 - **多目标路由**:转发多个服务地址至同一服务下的不同路由 - **自动重试**:确保服务的稳定性,请求失败时将自动重试 @@ -246,7 +246,7 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ | TOKEN_RATE_LIMIT | 限制流式响应中每个token(或SSE chunk)的输出速率 | 无 | | PROXY | 设置HTTP代理地址 | 无 | | LOG_CHAT | 开关聊天内容的日志记录,用于调试和监控 | `false` | -| CACHE_BACKEND | cache后端,支持内存后端和数据库后端,默认为内存后端,可选lmdb, rocksdb和leveldb数据库后端 | `MEMORY` | +| CACHE_BACKEND | cache后端,支持内存后端和数据库后端,默认为内存后端,可选lmdb、leveldb数据库后端 | `lmdb` | | CACHE_CHAT_COMPLETION | 是否缓存/v1/chat/completions 结果 | `false` | 详细配置说明可参见 [.env.example](.env.example) 文件。(待完善) @@ -254,11 +254,11 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ > 注意:如果你设置了 OPENAI_API_KEY 但未设置 FORWARD_KEY,客户端在调用时将不需要提供密钥。由于这可能存在安全风险,除非有明确需求,否则不推荐将 > FORWARD_KEY 置空。 -### Caching +### 智能缓存 -缓存默认使用内存后端,可选择数据库后端,需安装相应的环境: +缓存可选择数据库后端,需安装相应的环境: -- 配置环境变量中`CACHE_BACKEND`以使用相应的数据库后端进行存储。 可选值`MEMORY`、`LMDB`、`LEVELDB` +- 配置环境变量中`CACHE_BACKEND`以使用相应的数据库后端进行存储。 可选值`LMDB`、`LEVELDB` - 配置`CACHE_CHAT_COMPLETION`为`true`以缓存/v1/chat/completions 结果。 **Python** @@ -328,7 +328,7 @@ FORWARD_KEY=fk-****** # 这里fk-token由我们自己定义 ```text {'messages': [{'role': 'user', 'content': 'hi'}], 'model': 'gpt-3.5-turbo', 'stream': True, 'max_tokens': None, 'n': 1, 'temperature': 1, 'top_p': 1, 'logit_bias': None, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None, 'user': None, 'ip': '127.0.0.1', 'uid': '2155fe1580e6aed626aa1ad74c1ce54e', 'datetime': '2023-10-17 15:27:12'} -{'assistant': 'Hello! How can I assist you today?', 'is_function_call': False, 'uid': '2155fe1580e6aed626aa1ad74c1ce54e'} +{'assistant': 'Hello! How can I assist you today?', 'is_tool_calls': False, 'uid': '2155fe1580e6aed626aa1ad74c1ce54e'} ``` 转换为`json`格式: @@ -351,8 +351,8 @@ aifd convert "user": "hi" } ], - "functions": null, - "is_function_call": false, + "tools": null, + "is_tool_calls": false, "assistant": "Hello! How can I assist you today?" } ] diff --git a/README_EN.md b/README_EN.md index 55d7d70..197f3d9 100644 --- a/README_EN.md +++ b/README_EN.md @@ -240,22 +240,22 @@ Execute `aifd run --help` to get details on arguments. You can create a .env file in the project's run directory to customize configurations. For a reference configuration, see the [.env.example](.env.example) file in the root directory. -| Environment Variable | Description | Default Value | -|-----------------------|-------------------------------------------------------------------------------------------------|:-----------------------------:| -| OPENAI_BASE_URL | Set base address for OpenAI-style API | https://api.openai.com | -| OPENAI_ROUTE_PREFIX | Define a route prefix for the OPENAI_BASE_URL interface address | / | -| OPENAI_API_KEY | Configure API key in OpenAI style, supports using multiple keys separated by commas | None | -| FORWARD_KEY | Set a custom key for proxying, multiple keys can be separated by commas. If not set (not recommended), it will directly use `OPENAI_API_KEY` | None | -| EXTRA_BASE_URL | Configure the base URL for additional proxy services | None | -| EXTRA_ROUTE_PREFIX | Define the route prefix for additional proxy services | None | -| REQ_RATE_LIMIT | Set the user request rate limit for specific routes (user distinguished) | None | -| GLOBAL_RATE_LIMIT | Configure a global request rate limit applicable to routes not specified in `REQ_RATE_LIMIT` | None | -| RATE_LIMIT_STRATEGY | Choose a rate limit strategy, options include: fixed-window, fixed-window-elastic-expiry, moving-window | None | -| TOKEN_RATE_LIMIT | Limit the output rate of each token (or SSE chunk) in a streaming response | None | -| PROXY | Set HTTP proxy address | None | -| LOG_CHAT | Toggle chat content logging for debugging and monitoring | `false` | -| CACHE_BACKEND | Cache backend, supports memory backend and database backend. By default, it's memory backend, optional database backends are lmdb, rocksdb, and leveldb | `MEMORY` | -| CACHE_CHAT_COMPLETION | Whether to cache /v1/chat/completions results | `false` | +| Environment Variable | Description | Default Value | +|-----------------------|-------------------------------------------------------------------------------------------------|:----------------------:| +| OPENAI_BASE_URL | Set base address for OpenAI-style API | https://api.openai.com | +| OPENAI_ROUTE_PREFIX | Define a route prefix for the OPENAI_BASE_URL interface address | / | +| OPENAI_API_KEY | Configure API key in OpenAI style, supports using multiple keys separated by commas | None | +| FORWARD_KEY | Set a custom key for proxying, multiple keys can be separated by commas. If not set (not recommended), it will directly use `OPENAI_API_KEY` | None | +| EXTRA_BASE_URL | Configure the base URL for additional proxy services | None | +| EXTRA_ROUTE_PREFIX | Define the route prefix for additional proxy services | None | +| REQ_RATE_LIMIT | Set the user request rate limit for specific routes (user distinguished) | None | +| GLOBAL_RATE_LIMIT | Configure a global request rate limit applicable to routes not specified in `REQ_RATE_LIMIT` | None | +| RATE_LIMIT_STRATEGY | Choose a rate limit strategy, options include: fixed-window, fixed-window-elastic-expiry, moving-window | None | +| TOKEN_RATE_LIMIT | Limit the output rate of each token (or SSE chunk) in a streaming response | None | +| PROXY | Set HTTP proxy address | None | +| LOG_CHAT | Toggle chat content logging for debugging and monitoring | `false` | +| CACHE_BACKEND | Cache backend, supports memory backend and database backend. By default, it's memory backend, optional database backends are lmdb, and leveldb | `lmdb` | +| CACHE_CHAT_COMPLETION | Whether to cache /v1/chat/completions results | `false` | Detailed configuration descriptions can be seen in the [.env.example](.env.example) file. (To be completed) @@ -264,7 +264,7 @@ Detailed configuration descriptions can be seen in the [.env.example](.env.examp ### Caching -- Configure `CACHE_BACKEND` in the environment variable to use the respective database backend for storage. Options are `MEMORY`, `LMDB`, and `LEVELDB`. +- Configure `CACHE_BACKEND` in the environment variable to use the respective database backend for storage. Options are `LMDB`, and `LEVELDB`. - Set `CACHE_CHAT_COMPLETION` to `true` to cache /v1/chat/completions results. **Python** diff --git a/deploy.md b/deploy.md index dbaaab9..a27ce51 100644 --- a/deploy.md +++ b/deploy.md @@ -7,7 +7,7 @@
一键部署至render -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) @@ -25,6 +25,8 @@ **一键免费云平台部署** 1. [Render一键部署](deploy.md#render-一键部署) +2. [CloudFlare AI Gateway](https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/) +3. 更多部署: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md --- ## pip部署 @@ -66,7 +68,7 @@ proxy_buffering off; ``` - + ## Docker部署 @@ -86,7 +88,7 @@ docker run -d -p 8000:8000 beidongjiedeguang/openai-forward:latest ## 源码部署 ```bash -git clone https://github.com/beidongjiedeguang/openai-forward.git --depth=1 +git clone https://github.com/KenyonY/openai-forward.git --depth=1 cd openai-forward pip install -e . @@ -96,11 +98,11 @@ aifd run - + ## Render 一键部署 -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问! diff --git a/deploy_en.md b/deploy_en.md index c252a6c..ee3041f 100644 --- a/deploy_en.md +++ b/deploy_en.md @@ -7,7 +7,7 @@
Deploy with one click to Render -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) [pip Deployment](#pip-deployment) | [docker Deployment](#docker-deployment) | @@ -24,6 +24,8 @@ This document offers several deployment methods: **One-click Free Cloud Deployment** 1. [Render One-click Deployment](#render-one-click-deployment) +2. [CloudFlare AI Gateway](https://developers.cloudflare.com/ai-gateway/) +3. More Deployment: https://github.com/KenyonY/openai-forward/blob/0.5.x/deploy.md --- ## pip Deployment @@ -65,7 +67,7 @@ proxy_buffering off; ``` - + ## Docker Deployment @@ -83,7 +85,7 @@ For SSL setup, refer to the above. Environment variable configuration can be fou ## Source Code Deployment ```bash -git clone https://github.com/beidongjiedeguang/openai-forward.git --depth=1 +git clone https://github.com/KenyonY/openai-forward.git --depth=1 cd openai-forward pip install -e . @@ -92,11 +94,11 @@ aifd run For SSL setup, refer to the above. - + ## Render One-click Deployment -[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/KenyonY/openai-forward) Render might be considered the easiest of all deployment methods, and the domain it generates can be directly accessed domestically! diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py index 0eb7076..002ea3b 100644 --- a/openai_forward/__init__.py +++ b/openai_forward/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.6.7" +__version__ = "0.6.8" from dotenv import load_dotenv diff --git a/openai_forward/cache/chat_completions.py b/openai_forward/cache/chat_completions.py index d88c0f3..5344fc8 100644 --- a/openai_forward/cache/chat_completions.py +++ b/openai_forward/cache/chat_completions.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import time from itertools import cycle from typing import List, Literal, Optional, Union diff --git a/openai_forward/cache/database.py b/openai_forward/cache/database.py index c1f58a0..69df935 100644 --- a/openai_forward/cache/database.py +++ b/openai_forward/cache/database.py @@ -1,6 +1,8 @@ -from flaxkv import dbdict +from flaxkv import FlaxKV -from ..settings import CACHE_BACKEND, LOG_CACHE_DB_INFO +from ..settings import CACHE_BACKEND, CACHE_ROOT_PATH_OR_URL, LOG_CACHE_DB_INFO + +cache = True if CACHE_BACKEND.upper() == "LMDB": try: @@ -9,9 +11,21 @@ raise ImportError("Please install LMDB: pip install lmdb") if LOG_CACHE_DB_INFO: - db_dict = dbdict("./CACHE_LMDB", backend='lmdb', log="INFO", save_log=True) + db_dict = FlaxKV( + "CACHE_LMDB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend='lmdb', + cache=cache, + log="INFO", + save_log=True, + ) else: - db_dict = dbdict("./CACHE_LMDB", backend='lmdb') + db_dict = FlaxKV( + "CACHE_LMDB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend='lmdb', + cache=cache, + ) elif CACHE_BACKEND.upper() == "LEVELDB": try: @@ -20,11 +34,39 @@ raise ImportError("Please install LevelDB: pip install plyvel") if LOG_CACHE_DB_INFO: - db_dict = dbdict( - "./CACHE_LEVELDB", backend='leveldb', log="INFO", save_log=True + db_dict = FlaxKV( + "CACHE_LEVELDB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend='leveldb', + cache=cache, + log="INFO", + save_log=True, ) else: - db_dict = dbdict("./CACHE_LEVELDB", backend='leveldb') + db_dict = FlaxKV( + "CACHE_LEVELDB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend='leveldb', + cache=cache, + ) + +elif CACHE_BACKEND.upper() == "REMOTE": + if LOG_CACHE_DB_INFO: + db_dict = FlaxKV( + "REMOTE_DB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend="remote", + cache=cache, + log="INFO", + save_log=True, + ) + else: + db_dict = FlaxKV( + "REMOTE_DB", + root_path_or_url=CACHE_ROOT_PATH_OR_URL, + backend="remote", + cache=cache, + ) elif CACHE_BACKEND.upper() == "MEMORY": db_dict = {} diff --git a/openai_forward/forward/base.py b/openai_forward/forward/base.py index 88e9197..a8b13d5 100644 --- a/openai_forward/forward/base.py +++ b/openai_forward/forward/base.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import traceback from asyncio import Queue diff --git a/openai_forward/settings.py b/openai_forward/settings.py index 02919b6..ccc941b 100644 --- a/openai_forward/settings.py +++ b/openai_forward/settings.py @@ -56,9 +56,12 @@ os.environ.get("LOG_CACHE_DB_INFO", "false").strip().lower() == "true" ) CACHE_BACKEND = os.environ.get("CACHE_BACKEND", "MEMORY").strip() +CACHE_ROOT_PATH_OR_URL = os.environ.get("CACHE_ROOT_PATH_OR_URL", ".").strip() + DEFAULT_REQUEST_CACHING_VALUE = False if CACHE_CHAT_COMPLETION: additional_start_info["cache_backend"] = CACHE_BACKEND + additional_start_info["cache_root_path_or_url"] = CACHE_ROOT_PATH_OR_URL DEFAULT_REQUEST_CACHING_VALUE = ( os.environ.get("DEFAULT_REQUEST_CACHING_VALUE", "false").strip().lower() == "true" diff --git a/pyproject.toml b/pyproject.toml index bcb0eda..6279b5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "pytz", "slowapi==0.1.8", "aiohttp>=3.8.5", - "flaxkv>=0.1.4", + "flaxkv[client]>=0.2.1", "msgpack", "tiktoken>=0.5.1", "tomli",