From af3a05f72d9ad6a9d55a0845c0f2cd9aaea0c38a Mon Sep 17 00:00:00 2001 From: kunyuan <32060638+beidongjiedeguang@users.noreply.github.com> Date: Sat, 27 May 2023 18:34:30 +0800 Subject: [PATCH] feat: add convert log tool && switch chat saver && cancel `fk-` prefix limit (#34) * feat: switch log saver * Update README * fix: fix except error && add load_chat func * cancel `fk-` prefix limit * :fire: Cancel commit-lint check * add tool.py * fix pytest --- .env | 7 +- .env.example | 6 ++ .github/workflows/ci.yml | 11 +- README.md | 189 +++++++++++++++++++++++---------- README_EN.md | 65 +++++++----- deploy.md | 17 +-- openai_forward/__init__.py | 2 +- openai_forward/__main__.py | 26 +++-- openai_forward/app.py | 5 +- openai_forward/base.py | 73 ++++++------- openai_forward/config.py | 87 ++++----------- openai_forward/content/chat.py | 94 ++++------------ openai_forward/openai.py | 1 - openai_forward/tool.py | 114 ++++++++++++++++++++ pyproject.toml | 3 + tests/test_chat_save.py | 34 ------ tests/test_env.py | 11 +- tests/test_http.py | 2 +- 18 files changed, 413 insertions(+), 334 deletions(-) create mode 100644 openai_forward/tool.py delete mode 100644 tests/test_chat_save.py diff --git a/.env b/.env index 297e1b3..f5449ad 100644 --- a/.env +++ b/.env @@ -1,5 +1,8 @@ -LOG_CHAT=True +LOG_CHAT=false OPENAI_BASE_URL=https://api.openai.com OPENAI_API_KEY= FORWARD_KEY= -ROUTE_PREFIX= \ No newline at end of file +ROUTE_PREFIX= + +# 设定时区 +TZ=Asia/Shanghai \ No newline at end of file diff --git a/.env.example b/.env.example index fc2cead..e5b81dd 100644 --- a/.env.example +++ b/.env.example @@ -8,3 +8,9 @@ OPENAI_API_KEY=sk-xxx1 sk-xxx2 sk-xxx3 # FORWARD_KEY: 当前面的OPENAI_API_KEY被设置,就可以设置这里的FORWARD_KEY,客户端调用时可以使用FORWARD_KEY作为api key FORWARD_KEY=fk-xxx1 + +# ROUTE_PREFIX: 可指定整个转发服务的根路由前缀 +ROUTE_PREFIX= + +# 设定时区 +TZ=Asia/Shanghai \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9a76be9..9fc61fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,20 +7,11 @@ on: pull_request: paths-ignore: - 'docs/**' - - 'README.md' - - 'README_*.md' + - '*.md' branches: - main jobs: - commit-lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: wagoid/commitlint-github-action@v4 - check-black: runs-on: ubuntu-latest steps: diff --git a/README.md b/README.md index 7612b33..289ca0e 100644 --- a/README.md +++ b/README.md @@ -35,62 +35,67 @@

+
+[功能](#功能) | +[部署指南](#部署指南) | +[应用](#应用) | +[配置选项](#配置选项) | +[聊天日志](#聊天日志) + +[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) + +
本项目用于解决一些地区无法直接访问OpenAI的问题,将该服务部署在可以正常访问openai -api的服务器上,通过该服务转发OpenAI的请求。即搭建反向代理服务 +api的服务器上,通过该服务转发OpenAI的请求。即搭建反向代理服务 --- 由本项目搭建的长期代理地址: -> https://api.openai-forward.com - +> https://api.openai-forward.com +## 功能 -## 目录 - -- [功能](#功能) -- [部署指南](#部署指南) -- [应用](#应用) -- [配置选项](#配置选项) -- [聊天日志](#聊天日志) -- [高级配置](#高级配置) +**基础功能** -## 功能 -**基础功能** - [x] 支持转发OpenAI所有接口 - [x] 支持流式响应 - [x] 支持指定转发路由前缀 - [x] docker部署 - [x] pip 安装部署 - [x] cloudflare 部署 -- [x] [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fbeidongjiedeguang%2Fopenai-forward&project-name=openai-forward&repository-name=openai-forward&framework=other) ~~Vercel一键部署(不建议)~~ -- [x] [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv) Railway 一键部署 -- [x] [![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) Render 一键部署 +- [x] ~~Vercel一键部署(不建议)~~ +- [x] Railway 一键部署 +- [x] Render 一键部署 + +**高级功能** -**高级功能** - [x] 实时记录聊天记录(包括流式响应的聊天内容) - [x] 允许输入多个openai api key 组成轮询池 -- [x] 自定义 api key (见高级配置) +- [x] 自定义 转发api key (见[高级配置](#高级配置)) ## 部署指南 -提供以下几种部署方式 +提供以下几种部署方式 + +**有海外vps方案** -**有海外vps方案** 1. [pip 安装部署](deploy.md#pip-推荐) (推荐) -2. [Docker部署](deploy.md#docker-推荐) (推荐) - > https://api.openai-forward.com +2. [Docker部署](deploy.md#docker-推荐) (推荐) + > https://api.openai-forward.com + +**无vps免费部署方案** -**无vps免费部署方案** 1. [一键Vercel部署](deploy.md#vercel-一键部署) (不推荐) - > ~~https://vercel.openai-forward.com~~ + > ~~https://vercel.openai-forward.com~~ 2. [cloudflare部署](deploy.md#cloudflare-部署) (推荐) > https://cloudflare.openai-forward.com 3. [Railway部署](deploy.md#Railway-一键部署) > https://railway.openai-forward.com -4. [Render一键部署](deploy.md#render-一键部署) (最佳推荐) - > https://render.openai-forward.com +4. [Render一键部署](deploy.md#render-一键部署) (推荐) + > https://render.openai-forward.com ## 应用 @@ -99,6 +104,9 @@ api的服务器上,通过该服务转发OpenAI的请求。即搭建反向代 基于开源项目[ChatGPT-Next-Web](https://github.com/Yidadaa/ChatGPT-Next-Web)搭建自己的chatgpt服务 替换docker启动命令中的 `BASE_URL`为我们自己搭建的代理服务地址 +
+Click for more details + ```bash docker run -d \ -p 3000:3000 \ @@ -107,11 +115,24 @@ docker run -d \ -e CODE="kunyuan" \ yidadaa/chatgpt-next-web ``` + 这里部署了一个,供大家轻度使用: - https://chat.beidongjiedeguang.top , 访问密码: `kunyuan` +https://chat.beidongjiedeguang.top , 访问密码: `kunyuan` +
### 在代码中使用 +**Python** + +```diff + import openai ++ openai.api_base = "https://api.openai-forward.com/v1" + openai.api_key = "sk-******" +``` + +
+ More Examples + **JS/TS** ```diff @@ -123,15 +144,8 @@ docker run -d \ }); ``` -**Python** - -```diff - import openai -+ openai.api_base = "https://api.openai-forward.com/v1" - openai.api_key = "sk-******" -``` - **gpt-3.5-turbo** + ```bash curl https://api.openai-forward.com/v1/chat/completions \ -H "Content-Type: application/json" \ @@ -143,6 +157,7 @@ curl https://api.openai-forward.com/v1/chat/completions \ ``` **Image Generation (DALL-E)** + ```bash curl --location 'https://api.openai-forward.com/v1/images/generations' \ --header 'Authorization: Bearer sk-******' \ @@ -154,44 +169,64 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ }' ``` +
## 配置选项 +配置的设置方式支持两种 +一种为在shell中运行`openai-forward run --port=8000`的命令行方式指定; +另一种为读取环境变量的方式指定。 + +
+Click for more details + **`openai-forward run`参数配置项** -| 配置项 | 说明 | 默认值 | -|-----------| --- | :---: | -| --port | 服务端口号 | 8000 | -| --workers | 工作进程数 | 1 | +| 配置项 | 说明 | 默认值 | +|-----------------|-------------------|:----------------------:| +| --port | 服务端口号 | 8000 | +| --workers | 工作进程数 | 1 | +| --base_url | 同 OPENAI_BASE_URL | https://api.openai.com | +| --api_key | 同 OPENAI_API_KEY | `None` | +| --forward_key | 同 FORWARD_KEY | `None` | +| --route_prefix | 同 ROUTE_PREFIX | `None` | +| --log_chat | 同 LOG_CHAT | `False` | -更多参数 `openai-forward run --help` 查看 +也可通过 `openai-forward run --help` 查看 **环境变量配置项** -支持从运行目录下的`.env`文件中读取: +支持从运行目录下的`.env`文件中读取 -| 环境变量 | 说明 | 默认值 | -|-----------------|-----------------------------------------------------------------|:------------------------:| -| OPENAI_API_KEY | 默认openai api key,支持多个默认api key, 以 `sk-` 开头, 以空格分割 | 无 | -| FORWARD_KEY | 允许调用方使用该key代替openai api key,支持多个forward key, 以`fk-` 开头, 以空格分割 | 无 | -| OPENAI_BASE_URL | 转发base url | `https://api.openai.com` | -| LOG_CHAT | 是否记录聊天内容 | `true` | -| ROUTE_PREFIX | 路由前缀 | 无 | +| 环境变量 | 说明 | 默认值 | +|-----------------|-----------------------------------------------------------------------------------------------------------------------------------|:------------------------:| +| OPENAI_BASE_URL | 默认 openai官方 api 地址 | https://api.openai.com | +| OPENAI_API_KEY | 默认openai api key,支持多个默认api key, 以 `sk-` 开头, 以空格分割 | 无 | +| FORWARD_KEY | 允许调用方使用该key代替openai api key,支持多个forward key, 以空格分割; 如果设置了OPENAI_API_KEY,而没有设置FORWARD_KEY, 则客户端调用时无需提供密钥, 此时出于安全考虑不建议FORWARD_KEY置空 | 无 | +| OPENAI_BASE_URL | 转发base url | `https://api.openai.com` | +| ROUTE_PREFIX | 路由前缀 | 无 | +| LOG_CHAT | 是否记录聊天内容 | `false` | +
## 高级配置 **设置api_key为自己设置的forward key** 需要配置 OPENAI_API_KEY 和 FORWARD_KEY, 例如 +
+ Click for more details ```bash OPENAI_API_KEY=sk-******* FORWARD_KEY=fk-****** # 这里fk-token由我们自己定义 ``` + 这里我们配置了FORWARD_KEY为`fk-******`, 那么后面客户端在调用时只需设置OPENAI_API_KEY为我们自定义的`fk-******` 即可。 -这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用`fk-******`搭配代理服务使用(如下面的例子) 而无需担心OPENAI_API_KEY被泄露。 +这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用`fk-******`搭配代理服务使用(如下面的例子) +而无需担心OPENAI_API_KEY被泄露。 并且可以对外分发`fk-******` **用例:** + ```bash curl https://api.openai-forward.com/v1/chat/completions \ -H "Content-Type: application/json" \ @@ -201,14 +236,18 @@ curl https://api.openai-forward.com/v1/chat/completions \ "messages": [{"role": "user", "content": "Hello!"}] }' ``` + **Python** + ```diff import openai + openai.api_base = "https://api.openai-forward.com/v1" - openai.api_key = "sk-******" + openai.api_key = "fk-******" ``` + **Web application** + ```bash docker run -d \ -p 3000:3000 \ @@ -218,21 +257,63 @@ docker run -d \ yidadaa/chatgpt-next-web ``` +
+ ## 聊天日志 +默认不记录聊天日志,若要开启需设置环境变量`LOG_CHAT=true` +
+ Click for more details + 保存路径在当前目录下的`Log/chat.log`路径中。 记录格式为 ```text -[{'host': xxx, 'model': xxx, 'message': [{'user': xxx}, {'assistant': xxx}]}, -{'assistant': xxx}] +{'messages': [{'user': 'hi'}], 'model': 'gpt-3.5-turbo', 'host': '', 'uid': '467a17ec-bf39-4b65-9ebd-e722b3bdd5c3'} +{'assistant': 'Hello! How can I assist you today?', 'uid': '467a17ec-bf39-4b65-9ebd-e722b3bdd5c3'} +{'messages': [{'user': 'Hello!'}], 'model': 'gpt-3.5-turbo', 'host': '', 'uid': 'f844d156-e747-4887-aef8-e40d977b5ee7'} +{'assistant': 'Hi there! How can I assist you today?', 'uid': 'f844d156-e747-4887-aef8-e40d977b5ee7'} +... +``` -[{'host': ...}, -{'assistant': ...}] +转换为`jsonl`格式: +```bash +openai-forward convert +``` + +即可转换为以下格式: + +```json lines +[ + { + "messages": [ + { + "user": "hi!" + } + ] + }, + { + "assistant": "Hello! How can I assist you today?" + } +] +[ + { + "messages": [ + { + "user": "Hello!" + } + ] + }, + { + "assistant": "Hi there! How can I assist you today?" + } +] ... ``` +
+ ## Backer and Sponsor @@ -241,4 +322,4 @@ docker run -d \ ## License -Openai-forward is licensed under the [MIT](https://opensource.org/license/mit/) license. +OpenAI-Forward is licensed under the [MIT](https://opensource.org/license/mit/) license. diff --git a/README_EN.md b/README_EN.md index 2810bbd..712baa7 100644 --- a/README_EN.md +++ b/README_EN.md @@ -50,39 +50,47 @@

-This project is designed to solve the problem of some regions being unable to directly access OpenAI. The service is deployed on a server that can access the OpenAI API, and OpenAI requests are forwarded through the service, i.e. a reverse proxy service is set up. ---- -Test access: https://api.openai-forward.com/v1/chat/completions -To put it another way, https://api.openai-forward.com is equivalent to https://api.openai.com. +
+ +[Features](#Features) | +[Usage](#Usage) | +[Deploy](#Deploy) | +[Service Usage](#Service-Usage) | +[Configuration](#Configuration) | +[Chat Log](#Chat-log) + +[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv) +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) + +
+ +This project is designed to solve the problem of some regions being unable to directly access OpenAI. The service is deployed on a server that can access the OpenAI API, and OpenAI requests are forwarded through the service, i.e. a reverse proxy service is set up. --- -# Table of Contents +由本项目搭建的长期代理地址: +> https://api.openai-forward.com -- [Features](#Features) -- [Usage](#Usage) -- [Deploy](#Deploy) -- [Service Usage](#Service-Usage) -- [Configuration](#Configuration) -- [Chat Log](#Chat-log) -- [Advanced Configuration](#Advanced-Configuration) # Features **Basic Features** -- [x] Support forwarding all OpenAI APIs. +- [x] Support forwarding all OpenAI interfaces. - [x] Support streaming responses. -- [x] Support specifying forwarding route prefixes. +- [x] Support specifying the forwarding route prefix. - [x] Docker deployment. - [x] Pip installation deployment. +- [x] Cloudflare deployment. +- [x] ~~Vercel one-click deployment (not recommended)~~ +- [x] Railway one-click deployment. +- [x] Render one-click deployment. **Advanced Features** - [x] Real-time recording of chat logs (including chat content from streaming responses). - [x] Support default OpenAI API key (round-robin invocation of multiple API keys). - [x] Custom forward API key instead of OpenAI API key (see advanced configuration). -- [x] Support request IP verification (IP whitelist and blacklist). # Usage @@ -201,22 +209,27 @@ http://{ip}:{port}/v1/chat/completions **`openai-forward run` Parameter Configuration Options** -| Configuration Option | Description | Default Value | -|-----------| --- | :---: | -| --port | Service port number | 8000 | +| Configuration Item | Description | Default Value | +|-----------------|-------------------|:----------------------:| +| --port | Server port number | 8000 | | --workers | Number of worker processes | 1 | +| --base_url | Same as OPENAI_BASE_URL | https://api.openai.com | +| --api_key | Same as OPENAI_API_KEY | `None` | +| --forward_key | Same as FORWARD_KEY | `None` | +| --route_prefix | Same as ROUTE_PREFIX | `None` | +| --log_chat | Same as LOG_CHAT | `False` | **Environment Variable Configuration Options** refer to the `.env` file in the project root directory -| Environment Variable | Description | Default Value | -|-----------------|------------|:------------------------:| -| OPENAI_API_KEY | Default API key, supports multiple default API keys separated by space. | None | -| FORWARD_KEY | Allow the caller to use the key instead of the OpenAI API key, support multiple forward keys starting with "fk-" and separated by spaces. | None | -| OPENAI_BASE_URL | Forwarding base URL | `https://api.openai.com` | -|LOG_CHAT| Whether to log chat content | `true` | -|ROUTE_PREFIX| Route prefix | None | - +| Environment Variable | Description | Default Value | +|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------:| +| OPENAI_BASE_URL | Default OpenAI API address | https://api.openai.com | +| OPENAI_API_KEY | Default OpenAI API key(s), support multiple default API keys starting with `sk-`, separated by spaces | None | +| FORWARD_KEY | Allow the caller to use this key instead of the OpenAI API key, support multiple forward keys separated by spaces; If OPENAI_API_KEY is set but FORWARD_KEY is not set, the key does not need to be provided when the client calls, it is not recommended to set FORWARD_KEY to empty for security reasons | None | +| OPENAI_BASE_URL | Forward base URL | `https://api.openai.com` | +| ROUTE_PREFIX | Route prefix | None | +| LOG_CHAT | Whether to log the chat content | `false` | # Chat Log The saved path is in the `Log/` directory under the current directory. diff --git a/deploy.md b/deploy.md index bdbd2e4..ed4d886 100644 --- a/deploy.md +++ b/deploy.md @@ -18,7 +18,7 @@ openai_forward run --port=9999 服务就搭建完成了,使用方式只需将`https://api.openai.com` 替换为服务所在端口`http://{ip}:{port}` 即可。 当然也可以将 OPENAI_API_KEY 作为环境变量或`--api_key`参数传入作为默认api key, 这样客户端在请求相关路由时可以无需在Header中传入Authorization。 -带默认api key的启动方式: +带默认api key的启动方式(但这样存在安全风险,建议结合`--forward_key`一起使用, 见[使用方式](README.md#高级配置)): ```bash openai_forward run --port=9999 --api_key="sk-******" @@ -27,6 +27,7 @@ openai_forward run --port=9999 --api_key="sk-******" 注: 如果既存在默认api key又在请求头中传入了api key,则以请求头中的api key会覆盖默认api key. + ### 服务调用 替换openai的api地址为该服务的地址即可,如: @@ -34,6 +35,7 @@ openai_forward run --port=9999 --api_key="sk-******" ```bash https://api.openai.com/v1/chat/completions ``` +更多使用方式见 [应用](README.md#应用) 替换为 @@ -85,14 +87,13 @@ openai-forward run [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fbeidongjiedeguang%2Fopenai-forward&project-name=openai-forward&repository-name=openai-forward&framework=other) ⚠️目前Vercel中使用Serverless Function部署的方式尚不支持流式,没有Log记录, 而且仅提供较短的接口超时时间。 -所以现在不推荐使用这种部署方式。 +所以现在不推荐使用这种部署方式。 (部署时需将环境变量`LOG_CHAT` 设置为`False`,否则会部署/运行失败。) 1. 点击按钮即可一键免费部署 也可先fork本仓库,再手动在vercel操作界面import项目 2. [绑定自定义域名](https://vercel.com/docs/concepts/projects/domains/add-a-domain):Vercel 分配的域名 DNS 在某些区域被污染了导致国内无法访问,绑定自定义域名即可直连。 - > https://vercel.openai-forward.com 仅供测试 @@ -114,7 +115,7 @@ stateDiagram-v2 去注册域名机构更改默认nameserver为cloudflare提供的nameserver --> 在cloudflare的worker中添加域名: 域名服务器更改验证成功 在cloudflare的worker中添加域名 --> [*] ``` -这种部署方式轻便简洁,支持流式转发. 对于没有vps的用户还是提交推荐的。不过目前[worker.js](worker.js)这个简单脚本仅提供转发服务, 不提供额外功能。 +这种部署方式轻便简洁,支持流式转发. 对于没有vps的用户还是十分推荐的。不过目前[worker.js](worker.js)这个简单脚本仅提供转发服务, 不提供额外功能。 > https://cloudflare.openai-forward.com @@ -128,7 +129,7 @@ stateDiagram-v2 2. 填写环境变量,必填项`PORT` :`8000`, 可选项 如默认的OPENAI_API_KEY 等 3. 绑定自定义域名 -注: Railway 每月提供 $5.0和500小时执行时间的免费计划。这意味着免费用户每个月只能使用大约21天 +注: Railway 每月提供 $5.0和500小时执行时间的免费计划。这意味着单个免费用户每个月只能使用大约21天 > https://railway.openai-forward.com @@ -144,7 +145,11 @@ stateDiagram-v2 2. 填写环境变量,如默认的OPENAI_API_KEY 等,也可以不填 然后等待部署完成即可。 -它的免费计划: 每月750小时实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长. +Render的免费计划: 每月750小时免费实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长. +注:默认render在15分钟内没有服务请求时会自动休眠(好处是休眠后不会占用750h的免费实例时间),休眠后下一次请求会被阻塞 5~10s。 +如果希望零停机部署可以在设置中设置`Health Check Path`为`/doc` > https://render.openai-forward.com > https://openai-forward.onrender.com + + diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py index 67dd92b..609d3c3 100644 --- a/openai_forward/__init__.py +++ b/openai_forward/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.1" +__version__ = "0.2.2" from dotenv import load_dotenv diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py index 3218fe9..7084d17 100644 --- a/openai_forward/__main__.py +++ b/openai_forward/__main__.py @@ -33,19 +33,19 @@ def run( ip_blacklist: str, None """ if base_url: - os.environ['OPENAI_BASE_URL'] = base_url + os.environ["OPENAI_BASE_URL"] = base_url if api_key: - os.environ['OPENAI_API_KEY'] = api_key + os.environ["OPENAI_API_KEY"] = api_key if forward_key: - os.environ['FORWARD_KEY'] = forward_key + os.environ["FORWARD_KEY"] = forward_key if log_chat: - os.environ['LOG_CHAT'] = log_chat + os.environ["LOG_CHAT"] = log_chat if route_prefix: - os.environ['ROUTE_PREFIX'] = route_prefix + os.environ["ROUTE_PREFIX"] = route_prefix if ip_whitelist: - os.environ['IP_WHITELIST'] = ip_whitelist + os.environ["IP_WHITELIST"] = ip_whitelist if ip_blacklist: - os.environ['IP_BLACKLIST'] = ip_blacklist + os.environ["IP_BLACKLIST"] = ip_blacklist ssl_keyfile = os.environ.get("ssl_keyfile", None) or None ssl_certfile = os.environ.get("ssl_certfile", None) or None @@ -54,11 +54,21 @@ def run( host="0.0.0.0", port=port, workers=workers, - app_dir='..', + app_dir="..", ssl_keyfile=ssl_keyfile, ssl_certfile=ssl_certfile, ) + @staticmethod + def convert( + log_path: str = "./Log/chat.log", target_path: str = "./Log/chat.jsonl" + ): + """Convert log file to jsonl file""" + from openai_forward.tool import convert_chatlog_to_jsonl + + print(f"Convert {log_path} to {target_path}") + convert_chatlog_to_jsonl(log_path, target_path) + def main(): fire.Fire(Cli) diff --git a/openai_forward/app.py b/openai_forward/app.py index ab1f796..94667d4 100644 --- a/openai_forward/app.py +++ b/openai_forward/app.py @@ -1,4 +1,3 @@ -import httpx from sparrow.api import create_app from .openai import Openai @@ -8,9 +7,9 @@ openai = Openai() app.add_route( - openai.ROUTE_PREFIX + '/{api_path:path}', + openai.ROUTE_PREFIX + "/{api_path:path}", openai.reverse_proxy, - methods=['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'HEAD', 'PATCH', 'TRACE'], + methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"], ) app.include_router(router_v1) diff --git a/openai_forward/base.py b/openai_forward/base.py index 1b4c234..044e44f 100644 --- a/openai_forward/base.py +++ b/openai_forward/base.py @@ -7,8 +7,9 @@ from loguru import logger from starlette.background import BackgroundTask -from .config import env2list, print_startup_info, setting_log +from .config import print_startup_info, setting_log from .content.chat import ChatSaver +from .tool import env2list class OpenaiBase: @@ -17,24 +18,24 @@ class OpenaiBase: _LOG_CHAT = os.environ.get("LOG_CHAT", "False").strip().lower() == "true" _openai_api_key_list = env2list("OPENAI_API_KEY", sep=" ") _cycle_api_key = cycle(_openai_api_key_list) - _FWD_KEYS = env2list("FORWARD_KEY", sep=" ") - _use_forward_key = _openai_api_key_list != [] and _FWD_KEYS != [] + _FWD_KEYS = set(env2list("FORWARD_KEY", sep=" ")) + _no_auth_mode = _openai_api_key_list != [] and _FWD_KEYS == set() IP_WHITELIST = env2list("IP_WHITELIST", sep=" ") IP_BLACKLIST = env2list("IP_BLACKLIST", sep=" ") if ROUTE_PREFIX: - if ROUTE_PREFIX.endswith('/'): + if ROUTE_PREFIX.endswith("/"): ROUTE_PREFIX = ROUTE_PREFIX[:-1] - if not ROUTE_PREFIX.startswith('/'): - ROUTE_PREFIX = '/' + ROUTE_PREFIX - timeout = 30 + if not ROUTE_PREFIX.startswith("/"): + ROUTE_PREFIX = "/" + ROUTE_PREFIX + timeout = 60 print_startup_info( - BASE_URL, ROUTE_PREFIX, _openai_api_key_list, _FWD_KEYS, _LOG_CHAT + BASE_URL, ROUTE_PREFIX, _openai_api_key_list, _no_auth_mode, _LOG_CHAT ) if _LOG_CHAT: setting_log(save_file=False) - chatsaver = ChatSaver(save_interval=10) + chatsaver = ChatSaver() def validate_request_host(self, ip): if self.IP_WHITELIST and ip not in self.IP_WHITELIST: @@ -49,14 +50,16 @@ def validate_request_host(self, ip): ) @classmethod - async def aiter_bytes(cls, r: httpx.Response, route_path: str): - bytes_ = b'' + async def aiter_bytes(cls, r: httpx.Response, route_path: str, uid: str): + bytes_ = b"" async for chunk in r.aiter_bytes(): bytes_ += chunk yield chunk try: target_info = cls.chatsaver.parse_bytes_to_content(bytes_, route_path) - cls.chatsaver.add_chat({target_info['role']: target_info['content']}) + cls.chatsaver.add_chat( + {target_info["role"]: target_info["content"], "uid": uid} + ) except Exception as e: logger.debug(f"log chat (not) error:\n{e=}") @@ -65,50 +68,35 @@ async def _reverse_proxy(cls, request: Request): client = httpx.AsyncClient(base_url=cls.BASE_URL, http1=True, http2=False) url_path = request.url.path url_path = url_path[len(cls.ROUTE_PREFIX) :] - url = httpx.URL(path=url_path, query=request.url.query.encode('utf-8')) + url = httpx.URL(path=url_path, query=request.url.query.encode("utf-8")) headers = dict(request.headers) - auth = headers.pop("authorization", None) - if auth and str(auth).startswith("Bearer sk-"): - tmp_headers = {'Authorization': auth} - elif cls._openai_api_key_list: - logger.info(f"Use forward key: {cls._use_forward_key}") - if cls._use_forward_key: - fk_prefix = "Bearer fk-" - logger.info(f"current forward key: {auth}") - if ( - auth - and str(auth).startswith(fk_prefix) - and auth[len("Bearer ") :] in cls._FWD_KEYS - ): - auth = "Bearer " + next(cls._cycle_api_key) - tmp_headers = {'Authorization': auth} - else: - tmp_headers = {} - else: - auth = "Bearer " + next(cls._cycle_api_key) - tmp_headers = {'Authorization': auth} - else: - tmp_headers = {} + auth = headers.pop("authorization", "") + auth_headers_dict = {"Content-Type": "application/json", "Authorization": auth} + auth_prefix = "Bearer " + if cls._no_auth_mode or auth and auth[len(auth_prefix) :] in cls._FWD_KEYS: + auth = auth_prefix + next(cls._cycle_api_key) + auth_headers_dict["Authorization"] = auth log_chat_completions = False - if cls._LOG_CHAT and request.method == 'POST': + uid = None + if cls._LOG_CHAT and request.method == "POST": try: chat_info = await cls.chatsaver.parse_payload_to_content( request, route_path=url_path ) if chat_info: cls.chatsaver.add_chat(chat_info) + uid = chat_info.get("uid") log_chat_completions = True except Exception as e: logger.debug( f"log chat error:\n{request.client.host=} {request.method=}: {e}" ) - tmp_headers.update({"Content-Type": "application/json"}) req = client.build_request( request.method, url, - headers=tmp_headers, + headers=auth_headers_dict, content=request.stream(), timeout=cls.timeout, ) @@ -124,14 +112,15 @@ async def _reverse_proxy(cls, request: Request): status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail=error_info ) except Exception as e: - error_info = f"{type(e)}: {e}" - logger.error(error_info) + logger.exception(f"{type(e)}:") raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=error_info + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=e ) aiter_bytes = ( - cls.aiter_bytes(r, url_path) if log_chat_completions else r.aiter_bytes() + cls.aiter_bytes(r, url_path, uid) + if log_chat_completions + else r.aiter_bytes() ) return StreamingResponse( aiter_bytes, diff --git a/openai_forward/config.py b/openai_forward/config.py index bcb73fa..8ec5752 100644 --- a/openai_forward/config.py +++ b/openai_forward/config.py @@ -2,42 +2,38 @@ import os import sys import time -from typing import Dict, List, Union -import orjson from loguru import logger from rich import print from rich.panel import Panel from rich.table import Table -from sparrow import relp -def print_startup_info(base_url, route_prefix, api_key, forward_key, log_chat): +def print_startup_info(base_url, route_prefix, api_key, no_auth_mode, log_chat): try: from dotenv import load_dotenv - load_dotenv('.env') + load_dotenv(".env") except Exception: ... route_prefix = route_prefix or "/" api_key_info = True if len(api_key) else False - forward_key_info = True if len(forward_key) else False table = Table(title="", box=None, width=100) table.add_column("base-url", justify="left", style="#df412f") - table.add_column("route-prefix", justify="center", style="#df412f") - table.add_column("openai-api-key", justify="center", style="green") - table.add_column("forward-key", justify="center", style="green") + table.add_column("route-prefix", justify="center", style="green") + table.add_column("api-key-polling-pool", justify="center", style="green") + table.add_column( + "no-auth-mode", justify="center", style="red" if no_auth_mode else "green" + ) table.add_column("Log-chat", justify="center", style="green") - table.add_column("Log-dir", justify="center", style="#f5bb00") table.add_row( base_url, route_prefix, str(api_key_info), - str(forward_key_info), + str(no_auth_mode), str(log_chat), - "./Log/*.log", ) - print(Panel(table, title="🤗openai-forward is ready to serve!", expand=False)) + print(Panel(table, title="🤗 openai-forward is ready to serve! ", expand=False)) class InterceptHandler(logging.Handler): @@ -61,8 +57,8 @@ def emit(self, record): def setting_log(save_file=False, log_name="openai_forward", multi_process=True): # TODO 修复时区配置 if os.environ.get("TZ") == "Asia/Shanghai": - os.environ['TZ'] = "UTC-8" - if hasattr(time, 'tzset'): + os.environ["TZ"] = "UTC-8" + if hasattr(time, "tzset"): time.tzset() logging.root.handlers = [InterceptHandler()] @@ -72,6 +68,13 @@ def setting_log(save_file=False, log_name="openai_forward", multi_process=True): config_handlers = [ {"sink": sys.stdout, "level": "DEBUG"}, + { + "sink": f"./Log/chat.log", + "enqueue": multi_process, + "rotation": "20 MB", + "filter": lambda record: "chat" in record["extra"], + "format": "{message}", + }, ] if save_file: config_handlers += [ @@ -85,57 +88,3 @@ def setting_log(save_file=False, log_name="openai_forward", multi_process=True): logger_config = {"handlers": config_handlers} logger.configure(**logger_config) - - -def yaml_dump(data, filepath, rel_path=False, mode='w'): - abs_path = relp(filepath, parents=1) if rel_path else filepath - from yaml import dump - - try: - from yaml import CDumper as Dumper - except ImportError: - from yaml import Dumper - with open(abs_path, mode=mode, encoding="utf-8") as fw: - fw.write(dump(data, Dumper=Dumper, allow_unicode=True, indent=4)) - - -def yaml_load(filepath, rel_path=False, mode='r'): - abs_path = relp(filepath, parents=1) if rel_path else filepath - from yaml import load - - try: - from yaml import CLoader as Loader - except ImportError: - from yaml import Loader - with open(abs_path, mode=mode, encoding="utf-8") as stream: - # stream = stream.read() - content = load(stream, Loader=Loader) - return content - - -def json_load(filepath: str, rel=False, mode='rb'): - abs_path = relp(filepath, parents=1) if rel else filepath - with open(abs_path, mode=mode) as f: - return orjson.loads(f.read()) - - -def json_dump( - data: Union[List, Dict], filepath: str, rel=False, indent_2=False, mode='wb' -): - orjson_option = 0 - if indent_2: - orjson_option = orjson.OPT_INDENT_2 - abs_path = relp(filepath, parents=1) if rel else filepath - with open(abs_path, mode=mode) as f: - f.write(orjson.dumps(data, option=orjson_option)) - - -def str2list(s: str, sep=' '): - if s: - return [i.strip() for i in s.split(sep) if i.strip()] - else: - return [] - - -def env2list(env_name: str, sep=" "): - return str2list(os.environ.get(env_name, "").strip(), sep=sep) diff --git a/openai_forward/content/chat.py b/openai_forward/content/chat.py index d5a2928..c4a39f8 100644 --- a/openai_forward/content/chat.py +++ b/openai_forward/content/chat.py @@ -1,13 +1,10 @@ -import os -from pathlib import Path -from typing import Dict, List +import uuid import orjson from fastapi import Request from httpx._decoders import LineDecoder from loguru import logger from orjson import JSONDecodeError -from sparrow import relp decoder = LineDecoder() @@ -16,7 +13,7 @@ def _parse_iter_line_content(line: str): line = line[6:] try: line_dict = orjson.loads(line) - return line_dict['choices'][0]['delta']['content'] + return line_dict["choices"][0]["delta"]["content"] except JSONDecodeError: return "" except KeyError: @@ -24,66 +21,52 @@ def _parse_iter_line_content(line: str): def parse_chat_completions(bytes_: bytes): - txt_lines = decoder.decode(bytes_.decode('utf-8')) + txt_lines = decoder.decode(bytes_.decode("utf-8")) line0 = txt_lines[0] target_info = dict() if line0.startswith("data:"): line0 = orjson.loads(line0[6:]) - msg = line0['choices'][0]['delta'] + msg = line0["choices"][0]["delta"] else: line0 = orjson.loads(line0) - msg = line0['choices'][0]['message'] + msg = line0["choices"][0]["message"] - target_info['created'] = line0['created'] - target_info['id'] = line0['id'] - target_info['model'] = line0['model'] - target_info['role'] = msg['role'] - target_info['content'] = msg.get("content", "") + target_info["created"] = line0["created"] + target_info["id"] = line0["id"] + target_info["model"] = line0["model"] + target_info["role"] = msg["role"] + target_info["content"] = msg.get("content", "") # loop for stream for line in txt_lines[1:]: if line in ("", "\n", "\n\n"): continue elif line.startswith("data: "): - target_info['content'] += _parse_iter_line_content(line) + target_info["content"] += _parse_iter_line_content(line) else: logger.warning(f"line not startswith data: {line}") return target_info class ChatSaver: - def __init__(self, max_chat_size=2000, save_interval=2, _dir='./Log'): - self._chat_list = [] - self._file_idx = 0 - self._save_interval = save_interval - self._max_chat_file_size = max_chat_size - self._cur_chat_file_size = 0 - self._log_dir = _dir - self._init_chat_file() - - @property - def chat_file(self): - return os.path.join(self._log_dir, f"chat_{self._file_idx}.txt") - - def _init_chat_file(self): - _log_dir = Path(self._log_dir) - if not _log_dir.exists(): - _log_dir.mkdir(parents=True) - while Path(self.chat_file).exists(): - self._file_idx += 1 + def __init__(self): + self.logger = logger.bind(chat=True) @staticmethod async def parse_payload_to_content(request: Request, route_path: str): + uid = uuid.uuid4().__str__() payload = await request.json() if route_path == "/v1/chat/completions": - msgs = payload['messages'] - model = payload['model'] - return { - "host": request.client.host, + msgs = payload["messages"] + model = payload["model"] + content = { + "messages": [{msg["role"]: msg["content"]} for msg in msgs], "model": model, - "messages": [{msg['role']: msg['content']} for msg in msgs], + "host": request.headers.get("x-real-ip") or "", + "uid": uid, } else: - return {} + content = {} + return content @staticmethod def parse_bytes_to_content(bytes_: bytes, route_path: str): @@ -93,35 +76,4 @@ def parse_bytes_to_content(bytes_: bytes, route_path: str): return {} def add_chat(self, chat_info: dict): - logger.info(str(chat_info)) - self._chat_list.append(chat_info) - self._cur_chat_file_size += 1 - self._save_chat() - - def _save_chat(self): - if len(self._chat_list) >= self._save_interval: - logger.info(f"save chat to {self.chat_file}") - if self._cur_chat_file_size > self._max_chat_file_size: - logger.info(f"{self._cur_chat_file_size} is too large, create new file") - self._file_idx += 1 - self._cur_chat_file_size = 1 - self.dump_chat_list(self._chat_list, self.chat_file, mode='a+', _end='\n') - self._chat_list = [] - - @staticmethod - def dump_chat_list( - data: List[Dict], filepath: str, rel=False, mode='w', _sep='\n', _end="\n" - ): - str_data = _sep.join([str(i) for i in data]) + _end - abs_path = relp(filepath, parents=1) if rel else filepath - with open(abs_path, mode=mode) as f: - f.write(str_data) - - @staticmethod - def load_chat_list(filepath: str, rel=False, mode='r', _sep='\n'): - abs_path = relp(filepath, parents=1) if rel else filepath - with open(abs_path, mode=mode, encoding='utf-8') as f: - str_result = f.read() - result_list = str_result.split(_sep) - result = [eval(i) for i in result_list if i] - return result + self.logger.debug(f"{chat_info}") diff --git a/openai_forward/openai.py b/openai_forward/openai.py index 282b53d..900570b 100644 --- a/openai_forward/openai.py +++ b/openai_forward/openai.py @@ -1,7 +1,6 @@ from fastapi import Request from .base import OpenaiBase -from .config import setting_log from .routers.schemas import OpenAIV1ChatCompletion diff --git a/openai_forward/tool.py b/openai_forward/tool.py new file mode 100644 index 0000000..908b17d --- /dev/null +++ b/openai_forward/tool.py @@ -0,0 +1,114 @@ +import ast +import os +from typing import Dict, List, Union + +import orjson +from sparrow import relp + + +def yaml_dump(data, filepath, rel_path=False, mode="w"): + abs_path = relp(filepath, parents=1) if rel_path else filepath + from yaml import dump + + try: + from yaml import CDumper as Dumper + except ImportError: + from yaml import Dumper + with open(abs_path, mode=mode, encoding="utf-8") as fw: + fw.write(dump(data, Dumper=Dumper, allow_unicode=True, indent=4)) + + +def yaml_load(filepath, rel_path=False, mode="r"): + abs_path = relp(filepath, parents=1) if rel_path else filepath + from yaml import load + + try: + from yaml import CLoader as Loader + except ImportError: + from yaml import Loader + with open(abs_path, mode=mode, encoding="utf-8") as stream: + content = load(stream, Loader=Loader) + return content + + +def json_load(filepath: str, rel=False, mode="rb"): + abs_path = relp(filepath, parents=1) if rel else filepath + with open(abs_path, mode=mode) as f: + return orjson.loads(f.read()) + + +def json_dump( + data: Union[List, Dict], filepath: str, rel=False, indent_2=False, mode="wb" +): + orjson_option = 0 + if indent_2: + orjson_option = orjson.OPT_INDENT_2 + abs_path = relp(filepath, parents=1) if rel else filepath + with open(abs_path, mode=mode) as f: + f.write(orjson.dumps(data, option=orjson_option)) + + +def str2list(s: str, sep=" "): + if s: + return [i.strip() for i in s.split(sep) if i.strip()] + else: + return [] + + +def env2list(env_name: str, sep=" "): + return str2list(os.environ.get(env_name, "").strip(), sep=sep) + + +def get_matches(messages: List[Dict], assistant: List[Dict]): + matches = [] + assis_idx_to_remove, msg_idx_to_remove = [], [] + for idx_msg in range(len(messages)): + win = min(5, len(messages) - 1) + range_list = [idx_msg + (i + 1) // 2 * (-1) ** (i + 1) for i in range(win)] + # range_list = [idx_msg + 0, idx_msg + 1, idx_msg - 1, idx_msg + 2, idx_msg - 2, ...] + for idx_ass in range_list: + if idx_ass >= len(assistant): + break + if messages[idx_msg]["uid"] == assistant[idx_ass]["uid"]: + matches.append( + [ + {"messages": messages[idx_msg]["messages"]}, + {"assistant": assistant[idx_ass]["assistant"]}, + ] + ) + assis_idx_to_remove.append(idx_ass) + msg_idx_to_remove.append(idx_msg) + break + assis_remain = [i for j, i in enumerate(assistant) if j not in assis_idx_to_remove] + msg_remain = [i for j, i in enumerate(messages) if j not in msg_idx_to_remove] + remains = [ + [{"messages": x["messages"]}, {"assistant": y["assistant"]}] + for x in msg_remain + for y in assis_remain + if x["uid"] == y["uid"] + ] + matches.extend(remains) + return matches + + +def parse_chat_log(filepath: str): + with open(filepath, "r", encoding="utf-8") as f: + messages, assistant = [], [] + for line in f.readlines(): + content: dict = ast.literal_eval(line) + if content.get("messages"): + messages.append(content) + else: + assistant.append(content) + return get_matches(messages, assistant) + + +def convert_chatlog_to_jsonl(log_path: str, target_path: str): + try: + import orjsonl + except ImportError: + raise ImportError( + "import orjsonl error, please `pip install openai_forward[tool]` first" + ) + content_list = parse_chat_log(log_path) + orjsonl.save(target_path, content_list) diff --git a/pyproject.toml b/pyproject.toml index f53bebf..37ab6dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,9 @@ bard = [ edge = [ "EdgeGPT", ] +tool = [ + "orjsonl" +] [project.scripts] openai_forward = "openai_forward.__main__:main" diff --git a/tests/test_chat_save.py b/tests/test_chat_save.py deleted file mode 100644 index c9a62ff..0000000 --- a/tests/test_chat_save.py +++ /dev/null @@ -1,34 +0,0 @@ -import pytest -from utils import rm - -from openai_forward.content.chat import ChatSaver - - -@pytest.fixture(scope="module") -def saver() -> ChatSaver: - return ChatSaver(save_interval=1, max_chat_size=2) - - -class TestChatSaver: - @classmethod - def teardown_class(cls): - rm("Log/chat*.txt") - - def test_init(self, saver: ChatSaver): - assert saver.chat_file.endswith("chat_0.txt") - - def test_add_chat(self, saver: ChatSaver): - saver.add_chat({"id": 1, "content": "hello"}) - assert saver.chat_file.endswith("chat_0.txt") - saver.add_chat({"id": 2, "content": "hello"}) - assert saver.chat_file.endswith("chat_0.txt") - saver.add_chat({"id": 3, "content": "hello"}) - assert saver.chat_file.endswith("chat_1.txt") - saver.add_chat({"id": 4, "content": "hello"}) - assert saver.chat_file.endswith("chat_1.txt") - saver.add_chat({"id": 5, "content": "hello"}) - assert saver.chat_file.endswith("chat_2.txt") - - def test_init_file(self): - saver = ChatSaver() - assert saver.chat_file.endswith("chat_3.txt") diff --git a/tests/test_env.py b/tests/test_env.py index a99c3a5..9357b8a 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -9,7 +9,7 @@ class TestEnv: - with open(".env", 'r', encoding='utf-8') as f: + with open(".env", "r", encoding="utf-8") as f: defualt_env = f.read() @classmethod @@ -23,7 +23,7 @@ def setup_class(cls): IP_WHITELIST= IP_BLACKLIST= """ - with open(".env", 'w', encoding='utf-8') as f: + with open(".env", "w", encoding="utf-8") as f: f.write(env) time.sleep(0.1) @@ -33,10 +33,9 @@ def setup_class(cls): @classmethod def teardown_class(cls): - with open(".env", 'w', encoding='utf-8') as f: + with open(".env", "w", encoding="utf-8") as f: f.write(cls.defualt_env) def test_env1(self): - assert self.aibase._FWD_KEYS == ['ps1', 'ps2', 'ps3'] - assert self.aibase._openai_api_key_list == ['key1', 'key2'] - assert self.aibase._use_forward_key + assert self.aibase._openai_api_key_list == ["key1", "key2"] + assert self.aibase._no_auth_mode is False diff --git a/tests/test_http.py b/tests/test_http.py index 30acc4c..04c3f69 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -10,7 +10,7 @@ class TestRun: @classmethod def setup_class(cls): kill(8000) - base_url = "https://api.openai-forward.top" + base_url = "https://api.openai-forward.com" subprocess.Popen(["nohup", "openai-forward", "run", "--base_url", base_url]) time.sleep(3)