diff --git a/.github/data/whisper.m4a b/.github/data/whisper.m4a
new file mode 100644
index 0000000..4d108b7
Binary files /dev/null and b/.github/data/whisper.m4a differ
diff --git a/Examples/chat.py b/Examples/chat.py
new file mode 100644
index 0000000..fc34c77
--- /dev/null
+++ b/Examples/chat.py
@@ -0,0 +1,12 @@
+import openai
+
+openai.api_base = "https://api.openai-forward.com/v1"
+openai.api_key = "sk-******"
+
+resp = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo",
+ messages=[
+ {"role": "user", "content": "Who won the world series in 2020?"},
+ ],
+)
+print(resp.choices)
diff --git a/Examples/embedding.py b/Examples/embedding.py
new file mode 100644
index 0000000..10232a1
--- /dev/null
+++ b/Examples/embedding.py
@@ -0,0 +1,9 @@
+import openai
+
+openai.api_base = "http://localhost:8000/v1"
+openai.api_key = "sk-******"
+response = openai.Embedding.create(
+ input="Your text string goes here", model="text-embedding-ada-002"
+)
+embeddings = response['data'][0]['embedding']
+print(embeddings)
diff --git a/Examples/whisper.py b/Examples/whisper.py
new file mode 100644
index 0000000..50b7430
--- /dev/null
+++ b/Examples/whisper.py
@@ -0,0 +1,10 @@
+# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
+import openai
+from sparrow import relp
+
+openai.api_base = "https://api.openai-forward.com/v1"
+openai.api_key = "sk-******"
+
+audio_file = open(relp("../.github/data/whisper.m4a"), "rb")
+transcript = openai.Audio.transcribe("whisper-1", audio_file)
+print(transcript)
diff --git a/README.md b/README.md
index 94ffa16..c250680 100644
--- a/README.md
+++ b/README.md
@@ -61,16 +61,6 @@
> https://render.openai-forward.com
> https://railway.openai-forward.com
-
-
- 👉Tips
-
-🎉🎉🎉近期GPT-4 API 已经全面可用! 但它需要付费api账户,也就是需要先绑定信用卡。
-目前比较推荐开源加密钱包[OneKey](https://github.com/OneKeyHQ)的VISA虚拟卡:[https://card.onekey.so](https://card.onekey.so/?i=O163GB)
-
-
-
-
## 功能
**基础功能**
@@ -95,27 +85,26 @@
👉 [部署文档](deploy.md)
-
提供以下几种部署方式
**有海外vps方案**
-1. [pip 安装部署](deploy.md#pip部署)
-2. [Docker部署](deploy.md#docker部署)
+1. [pip 安装部署](deploy.md#pip部署)
+2. [Docker部署](deploy.md#docker部署)
> https://api.openai-forward.com
**无vps免费部署方案**
+
1. [Railway部署](deploy.md#Railway-一键部署)
> https://railway.openai-forward.com
2. [Render一键部署](deploy.md#render-一键部署)
> https://render.openai-forward.com
-
---
下面的部署仅提供单一转发功能
3. [一键Vercel部署](deploy.md#vercel-一键部署)
> https://vercel.openai-forward.com
-4. [cloudflare部署](deploy.md#cloudflare-部署)
+4. [cloudflare部署](deploy.md#cloudflare-部署)
> https://cloudflare.page.openai-forward.com
## 应用
@@ -200,6 +189,7 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
另一种为读取环境变量的方式指定。
### 命令行参数
+
可通过 `openai-forward run --help` 查看
@@ -217,10 +207,10 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
| --route_prefix | 同 ROUTE_PREFIX | `None` |
| --log_chat | 同 LOG_CHAT | `False` |
-
### 环境变量配置项
+
支持从运行目录下的`.env`文件中读取
| 环境变量 | 说明 | 默认值 |
@@ -231,7 +221,6 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \
| ROUTE_PREFIX | 路由前缀 | 无 |
| LOG_CHAT | 是否记录聊天内容 | `false` |
-
## 高级配置
**设置openai api_key为自定义的forward key**
@@ -245,8 +234,8 @@ FORWARD_KEY=fk-****** # 这里fk-token由我们自己定义
```
这里我们配置了FORWARD_KEY为`fk-******`, 那么后面客户端在调用时只需设置OPENAI_API_KEY为我们自定义的`fk-******` 即可。
-这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用自定义的api-key`fk-******`,
-无需担心真正的OPENAI_API_KEY被泄露。并且可以对外分发`fk-******`。
+这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用自定义的api-key`fk-******`,
+无需担心真正的OPENAI_API_KEY被泄露。并且可以对外分发`fk-******`。
**用例:**
diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py
index 36bb831..397da19 100644
--- a/openai_forward/__init__.py
+++ b/openai_forward/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.4"
+__version__ = "0.3.0-alpha"
from dotenv import load_dotenv
diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py
index 50c14df..65caf47 100644
--- a/openai_forward/__main__.py
+++ b/openai_forward/__main__.py
@@ -60,12 +60,12 @@ def run(
)
@staticmethod
- def convert(log_path: str = "./Log/chat.log", target_path: str = "./Log/chat.json"):
- """Convert log file to jsonl file"""
- from openai_forward.tool import convert_chatlog_to_jsonl
+ def convert(log_folder: str = "./Log/chat", target_path: str = "./Log/chat.json"):
+ """Convert log folder to jsonl file"""
+ from openai_forward.tool import convert_folder_to_jsonl
- print(f"Convert {log_path} to {target_path}")
- convert_chatlog_to_jsonl(log_path, target_path)
+ print(f"Convert {log_folder}/*.log to {target_path}")
+ convert_folder_to_jsonl(log_folder, target_path)
def main():
diff --git a/openai_forward/base.py b/openai_forward/base.py
index c8a0e17..df2c1ca 100644
--- a/openai_forward/base.py
+++ b/openai_forward/base.py
@@ -9,6 +9,7 @@
from .config import print_startup_info, setting_log
from .content.chat import ChatSaver
+from .content.whisper import WhisperSaver
from .tool import env2list
@@ -36,6 +37,7 @@ class OpenaiBase:
if _LOG_CHAT:
setting_log(save_file=False)
chatsaver = ChatSaver()
+ whispersaver = WhisperSaver()
def validate_request_host(self, ip):
if self.IP_WHITELIST and ip not in self.IP_WHITELIST:
@@ -56,10 +58,13 @@ async def aiter_bytes(cls, r: httpx.Response, route_path: str, uid: str):
bytes_ += chunk
yield chunk
try:
- target_info = cls.chatsaver.parse_bytes_to_content(bytes_, route_path)
- cls.chatsaver.add_chat(
- {target_info["role"]: target_info["content"], "uid": uid}
- )
+ if route_path == "/v1/chat/completions":
+ target_info = cls.chatsaver.parse_bytes_to_content(bytes_, route_path)
+ cls.chatsaver.add_chat(
+ {target_info["role"]: target_info["content"], "uid": uid}
+ )
+ elif route_path.startswith("/v1/audio/"):
+ cls.whispersaver.add_log(bytes_)
except Exception as e:
logger.debug(f"log chat (not) error:\n{e=}")
@@ -71,23 +76,27 @@ async def _reverse_proxy(cls, request: Request):
url = httpx.URL(path=url_path, query=request.url.query.encode("utf-8"))
headers = dict(request.headers)
auth = headers.pop("authorization", "")
- auth_headers_dict = {"Content-Type": "application/json", "Authorization": auth}
+ content_type = headers.pop("content-type", "application/json")
+ auth_headers_dict = {"Content-Type": content_type, "Authorization": auth}
auth_prefix = "Bearer "
if cls._no_auth_mode or auth and auth[len(auth_prefix) :] in cls._FWD_KEYS:
auth = auth_prefix + next(cls._cycle_api_key)
auth_headers_dict["Authorization"] = auth
- log_chat_completions = False
+ if_log = False
uid = None
if cls._LOG_CHAT and request.method == "POST":
try:
- chat_info = await cls.chatsaver.parse_payload_to_content(
- request, route_path=url_path
- )
- if chat_info:
- cls.chatsaver.add_chat(chat_info)
- uid = chat_info.get("uid")
- log_chat_completions = True
+ if url_path.startswith("/v1/audio/"):
+ if_log = True
+ else:
+ chat_info = await cls.chatsaver.parse_payload_to_content(
+ request, route_path=url_path
+ )
+ if chat_info:
+ cls.chatsaver.add_chat(chat_info)
+ uid = chat_info.get("uid")
+ if_log = True
except Exception as e:
logger.debug(
f"log chat error:\n{request.client.host=} {request.method=}: {e}"
@@ -117,11 +126,7 @@ async def _reverse_proxy(cls, request: Request):
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=e
)
- aiter_bytes = (
- cls.aiter_bytes(r, url_path, uid)
- if log_chat_completions
- else r.aiter_bytes()
- )
+ aiter_bytes = cls.aiter_bytes(r, url_path, uid) if if_log else r.aiter_bytes()
return StreamingResponse(
aiter_bytes,
status_code=r.status_code,
diff --git a/openai_forward/config.py b/openai_forward/config.py
index 8ec5752..c247505 100644
--- a/openai_forward/config.py
+++ b/openai_forward/config.py
@@ -69,12 +69,19 @@ def setting_log(save_file=False, log_name="openai_forward", multi_process=True):
config_handlers = [
{"sink": sys.stdout, "level": "DEBUG"},
{
- "sink": f"./Log/chat.log",
+ "sink": f"./Log/chat/chat.log",
"enqueue": multi_process,
- "rotation": "20 MB",
+ "rotation": "50 MB",
"filter": lambda record: "chat" in record["extra"],
"format": "{message}",
},
+ {
+ "sink": f"./Log/whisper/whisper.log",
+ "enqueue": multi_process,
+ "rotation": "30 MB",
+ "filter": lambda record: "whisper" in record["extra"],
+ "format": "{message}",
+ },
]
if save_file:
config_handlers += [
diff --git a/openai_forward/content/image.py b/openai_forward/content/image.py
deleted file mode 100644
index e69de29..0000000
diff --git a/openai_forward/content/whisper.py b/openai_forward/content/whisper.py
new file mode 100644
index 0000000..cda6cbf
--- /dev/null
+++ b/openai_forward/content/whisper.py
@@ -0,0 +1,10 @@
+from loguru import logger
+
+
+class WhisperSaver:
+ def __init__(self):
+ self.logger = logger.bind(whisper=True)
+
+ def add_log(self, bytes_: bytes):
+ text_content = bytes_.decode("utf-8")
+ self.logger.debug(text_content)
diff --git a/openai_forward/tool.py b/openai_forward/tool.py
index 73889d2..de8f40a 100644
--- a/openai_forward/tool.py
+++ b/openai_forward/tool.py
@@ -4,32 +4,7 @@
import orjson
from rich import print
-from sparrow import MeasureTime, relp
-
-
-def yaml_dump(data, filepath, rel_path=False, mode="w"):
- abs_path = relp(filepath, parents=1) if rel_path else filepath
- from yaml import dump
-
- try:
- from yaml import CDumper as Dumper
- except ImportError:
- from yaml import Dumper
- with open(abs_path, mode=mode, encoding="utf-8") as fw:
- fw.write(dump(data, Dumper=Dumper, allow_unicode=True, indent=4))
-
-
-def yaml_load(filepath, rel_path=False, mode="r"):
- abs_path = relp(filepath, parents=1) if rel_path else filepath
- from yaml import load
-
- try:
- from yaml import CLoader as Loader
- except ImportError:
- from yaml import Loader
- with open(abs_path, mode=mode, encoding="utf-8") as stream:
- content = load(stream, Loader=Loader)
- return content
+from sparrow import MeasureTime, ls, relp
def json_load(filepath: str, rel=False, mode="rb"):
@@ -60,51 +35,38 @@ def env2list(env_name: str, sep=" "):
return str2list(os.environ.get(env_name, "").strip(), sep=sep)
-def get_matches(messages: List[Dict], assistant: List[Dict]):
+def get_matches(messages: List[Dict], assistants: List[Dict]):
mt = MeasureTime()
mt.start()
- msg_len, ass_len = len(messages), len(assistant)
+ msg_len, ass_len = len(messages), len(assistants)
if msg_len != ass_len:
print(f"message({msg_len}) 与 assistant({ass_len}) 长度不匹配")
- matches = []
- assis_idx_to_remove, msg_idx_to_remove = [], []
-
- def cvt(msg: dict, ass: dict):
- return {
- "datetime": msg.get('datetime'),
- "forwarded-for": msg.get("forwarded-for"),
- "model": msg.get("model"),
- "messages": msg.get("messages"),
- "assistant": ass.get("assistant"),
- }
-
- for idx_msg in range(len(messages)):
- win = min(max(abs(ass_len - msg_len), 16), len(messages) - 1)
- range_list = [idx_msg + (i + 1) // 2 * (-1) ** (i + 1) for i in range(win)]
- # range_list = [idx_msg + 0, idx_msg + 1, idx_msg - 1, idx_msg + 2, idx_msg - 2, ...]
- for idx_ass in range_list:
- if idx_ass >= len(assistant):
- break
- if messages[idx_msg]["uid"] == assistant[idx_ass]["uid"]:
- matches.append(cvt(messages[idx_msg], assistant[idx_ass]))
- assis_idx_to_remove.append(idx_ass)
- msg_idx_to_remove.append(idx_msg)
- break
- assis_remain = [i for j, i in enumerate(assistant) if j not in assis_idx_to_remove]
- msg_remain = [i for j, i in enumerate(messages) if j not in msg_idx_to_remove]
- remains = [
- cvt(x, y) for x in msg_remain for y in assis_remain if x["uid"] == y["uid"]
+
+ cvt = lambda msg, ass: {
+ "datetime": msg.get('datetime'),
+ "forwarded-for": msg.get("forwarded-for"),
+ "model": msg.get("model"),
+ "messages": msg.get("messages"),
+ "assistant": ass.get("assistant"),
+ }
+
+ msg_uid_dict = {m.pop("uid"): m for m in messages}
+ ass_uid_dict = {a.pop("uid"): a for a in assistants}
+ matches = [
+ cvt(msg_uid_dict[uid], ass_uid_dict[uid])
+ for uid in msg_uid_dict
+ if uid in ass_uid_dict
]
- matches.extend(remains)
+
ref_len = max(msg_len, ass_len)
if len(matches) != ref_len:
- print(f"存在{ref_len-len(matches)}条未匹配数据")
+ print(f"存在{ref_len - len(matches)}条未匹配数据")
mt.show_interval("计算耗时:")
return matches
-def parse_chat_log(filepath: str):
- with open(filepath, "r", encoding="utf-8") as f:
+def parse_log_to_list(log_path: str):
+ with open(log_path, "r", encoding="utf-8") as f:
messages, assistant = [], []
for line in f.readlines():
content: dict = ast.literal_eval(line)
@@ -112,9 +74,30 @@ def parse_chat_log(filepath: str):
messages.append(content)
else:
assistant.append(content)
- return get_matches(messages, assistant)
+ return messages, assistant
def convert_chatlog_to_jsonl(log_path: str, target_path: str):
- content_list = parse_chat_log(log_path)
+ message_list, assistant_list = parse_log_to_list(log_path)
+ content_list = get_matches(messages=message_list, assistants=assistant_list)
+ json_dump(content_list, target_path, indent_2=True)
+
+
+def sort_logname_by_datetime(log_path: str):
+ return ls(log_path, "*.log", relp=False)
+
+
+def convert_folder_to_jsonl(folder_path: str, target_path: str):
+ log_files = sort_logname_by_datetime(folder_path)
+ messages = []
+ assistants = []
+ for log_path in log_files:
+ msg, ass = parse_log_to_list(log_path)
+
+ msg_len, ass_len = len(msg), len(ass)
+ if msg_len != ass_len:
+ print(f"{log_path=} message({msg_len}) 与 assistant({ass_len}) 长度不匹配")
+ messages.extend(msg)
+ assistants.extend(ass)
+ content_list = get_matches(messages=messages, assistants=assistants)
json_dump(content_list, target_path, indent_2=True)
diff --git a/pyproject.toml b/pyproject.toml
index bb73a9e..ff0a5b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ classifiers = [
dependencies = [
"loguru",
- "sparrow-python>=0.1.3",
+ "sparrow-python>=0.1.5",
"fastapi",
"uvicorn",
"orjson",
@@ -38,8 +38,9 @@ Issues = "https://github.com/beidongjiedeguang/openai-forward/issues"
Source = "https://github.com/beidongjiedeguang/openai-forward"
[project.optional-dependencies]
-tool = [
- "orjsonl"
+test = [
+ "openai",
+ "pytest",
]
[project.scripts]
diff --git a/pytest.ini b/pytest.ini
index 772a48f..a895357 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -5,3 +5,4 @@ markers =
timeout: marks test timeout duration
repeat: marks that test run n times
addopts = --doctest-modules --doctest-glob=README.md --doctest-glob=*.py --ignore=setup.py
+norecursedirs = Examples