diff --git a/.github/data/whisper.m4a b/.github/data/whisper.m4a new file mode 100644 index 0000000..4d108b7 Binary files /dev/null and b/.github/data/whisper.m4a differ diff --git a/Examples/chat.py b/Examples/chat.py new file mode 100644 index 0000000..fc34c77 --- /dev/null +++ b/Examples/chat.py @@ -0,0 +1,12 @@ +import openai + +openai.api_base = "https://api.openai-forward.com/v1" +openai.api_key = "sk-******" + +resp = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "Who won the world series in 2020?"}, + ], +) +print(resp.choices) diff --git a/Examples/embedding.py b/Examples/embedding.py new file mode 100644 index 0000000..10232a1 --- /dev/null +++ b/Examples/embedding.py @@ -0,0 +1,9 @@ +import openai + +openai.api_base = "http://localhost:8000/v1" +openai.api_key = "sk-******" +response = openai.Embedding.create( + input="Your text string goes here", model="text-embedding-ada-002" +) +embeddings = response['data'][0]['embedding'] +print(embeddings) diff --git a/Examples/whisper.py b/Examples/whisper.py new file mode 100644 index 0000000..50b7430 --- /dev/null +++ b/Examples/whisper.py @@ -0,0 +1,10 @@ +# Note: you need to be using OpenAI Python v0.27.0 for the code below to work +import openai +from sparrow import relp + +openai.api_base = "https://api.openai-forward.com/v1" +openai.api_key = "sk-******" + +audio_file = open(relp("../.github/data/whisper.m4a"), "rb") +transcript = openai.Audio.transcribe("whisper-1", audio_file) +print(transcript) diff --git a/README.md b/README.md index 94ffa16..c250680 100644 --- a/README.md +++ b/README.md @@ -61,16 +61,6 @@ > https://render.openai-forward.com > https://railway.openai-forward.com - -
- 👉Tips - -🎉🎉🎉近期GPT-4 API 已经全面可用! 但它需要付费api账户,也就是需要先绑定信用卡。 -目前比较推荐开源加密钱包[OneKey](https://github.com/OneKeyHQ)的VISA虚拟卡:[https://card.onekey.so](https://card.onekey.so/?i=O163GB) - -
- - ## 功能 **基础功能** @@ -95,27 +85,26 @@ 👉 [部署文档](deploy.md) - 提供以下几种部署方式 **有海外vps方案** -1. [pip 安装部署](deploy.md#pip部署) -2. [Docker部署](deploy.md#docker部署) +1. [pip 安装部署](deploy.md#pip部署) +2. [Docker部署](deploy.md#docker部署) > https://api.openai-forward.com **无vps免费部署方案** + 1. [Railway部署](deploy.md#Railway-一键部署) > https://railway.openai-forward.com 2. [Render一键部署](deploy.md#render-一键部署) > https://render.openai-forward.com - --- 下面的部署仅提供单一转发功能 3. [一键Vercel部署](deploy.md#vercel-一键部署) > https://vercel.openai-forward.com -4. [cloudflare部署](deploy.md#cloudflare-部署) +4. [cloudflare部署](deploy.md#cloudflare-部署) > https://cloudflare.page.openai-forward.com ## 应用 @@ -200,6 +189,7 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ 另一种为读取环境变量的方式指定。 ### 命令行参数 + 可通过 `openai-forward run --help` 查看
@@ -217,10 +207,10 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ | --route_prefix | 同 ROUTE_PREFIX | `None` | | --log_chat | 同 LOG_CHAT | `False` | -
### 环境变量配置项 + 支持从运行目录下的`.env`文件中读取 | 环境变量 | 说明 | 默认值 | @@ -231,7 +221,6 @@ curl --location 'https://api.openai-forward.com/v1/images/generations' \ | ROUTE_PREFIX | 路由前缀 | 无 | | LOG_CHAT | 是否记录聊天内容 | `false` | - ## 高级配置 **设置openai api_key为自定义的forward key** @@ -245,8 +234,8 @@ FORWARD_KEY=fk-****** # 这里fk-token由我们自己定义 ``` 这里我们配置了FORWARD_KEY为`fk-******`, 那么后面客户端在调用时只需设置OPENAI_API_KEY为我们自定义的`fk-******` 即可。 -这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用自定义的api-key`fk-******`, -无需担心真正的OPENAI_API_KEY被泄露。并且可以对外分发`fk-******`。 +这样的好处是在使用一些需要输入OPENAI_API_KEY的第三方应用时,我们可以使用自定义的api-key`fk-******`, +无需担心真正的OPENAI_API_KEY被泄露。并且可以对外分发`fk-******`。 **用例:** diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py index 36bb831..397da19 100644 --- a/openai_forward/__init__.py +++ b/openai_forward/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.4" +__version__ = "0.3.0-alpha" from dotenv import load_dotenv diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py index 50c14df..65caf47 100644 --- a/openai_forward/__main__.py +++ b/openai_forward/__main__.py @@ -60,12 +60,12 @@ def run( ) @staticmethod - def convert(log_path: str = "./Log/chat.log", target_path: str = "./Log/chat.json"): - """Convert log file to jsonl file""" - from openai_forward.tool import convert_chatlog_to_jsonl + def convert(log_folder: str = "./Log/chat", target_path: str = "./Log/chat.json"): + """Convert log folder to jsonl file""" + from openai_forward.tool import convert_folder_to_jsonl - print(f"Convert {log_path} to {target_path}") - convert_chatlog_to_jsonl(log_path, target_path) + print(f"Convert {log_folder}/*.log to {target_path}") + convert_folder_to_jsonl(log_folder, target_path) def main(): diff --git a/openai_forward/base.py b/openai_forward/base.py index c8a0e17..df2c1ca 100644 --- a/openai_forward/base.py +++ b/openai_forward/base.py @@ -9,6 +9,7 @@ from .config import print_startup_info, setting_log from .content.chat import ChatSaver +from .content.whisper import WhisperSaver from .tool import env2list @@ -36,6 +37,7 @@ class OpenaiBase: if _LOG_CHAT: setting_log(save_file=False) chatsaver = ChatSaver() + whispersaver = WhisperSaver() def validate_request_host(self, ip): if self.IP_WHITELIST and ip not in self.IP_WHITELIST: @@ -56,10 +58,13 @@ async def aiter_bytes(cls, r: httpx.Response, route_path: str, uid: str): bytes_ += chunk yield chunk try: - target_info = cls.chatsaver.parse_bytes_to_content(bytes_, route_path) - cls.chatsaver.add_chat( - {target_info["role"]: target_info["content"], "uid": uid} - ) + if route_path == "/v1/chat/completions": + target_info = cls.chatsaver.parse_bytes_to_content(bytes_, route_path) + cls.chatsaver.add_chat( + {target_info["role"]: target_info["content"], "uid": uid} + ) + elif route_path.startswith("/v1/audio/"): + cls.whispersaver.add_log(bytes_) except Exception as e: logger.debug(f"log chat (not) error:\n{e=}") @@ -71,23 +76,27 @@ async def _reverse_proxy(cls, request: Request): url = httpx.URL(path=url_path, query=request.url.query.encode("utf-8")) headers = dict(request.headers) auth = headers.pop("authorization", "") - auth_headers_dict = {"Content-Type": "application/json", "Authorization": auth} + content_type = headers.pop("content-type", "application/json") + auth_headers_dict = {"Content-Type": content_type, "Authorization": auth} auth_prefix = "Bearer " if cls._no_auth_mode or auth and auth[len(auth_prefix) :] in cls._FWD_KEYS: auth = auth_prefix + next(cls._cycle_api_key) auth_headers_dict["Authorization"] = auth - log_chat_completions = False + if_log = False uid = None if cls._LOG_CHAT and request.method == "POST": try: - chat_info = await cls.chatsaver.parse_payload_to_content( - request, route_path=url_path - ) - if chat_info: - cls.chatsaver.add_chat(chat_info) - uid = chat_info.get("uid") - log_chat_completions = True + if url_path.startswith("/v1/audio/"): + if_log = True + else: + chat_info = await cls.chatsaver.parse_payload_to_content( + request, route_path=url_path + ) + if chat_info: + cls.chatsaver.add_chat(chat_info) + uid = chat_info.get("uid") + if_log = True except Exception as e: logger.debug( f"log chat error:\n{request.client.host=} {request.method=}: {e}" @@ -117,11 +126,7 @@ async def _reverse_proxy(cls, request: Request): status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=e ) - aiter_bytes = ( - cls.aiter_bytes(r, url_path, uid) - if log_chat_completions - else r.aiter_bytes() - ) + aiter_bytes = cls.aiter_bytes(r, url_path, uid) if if_log else r.aiter_bytes() return StreamingResponse( aiter_bytes, status_code=r.status_code, diff --git a/openai_forward/config.py b/openai_forward/config.py index 8ec5752..c247505 100644 --- a/openai_forward/config.py +++ b/openai_forward/config.py @@ -69,12 +69,19 @@ def setting_log(save_file=False, log_name="openai_forward", multi_process=True): config_handlers = [ {"sink": sys.stdout, "level": "DEBUG"}, { - "sink": f"./Log/chat.log", + "sink": f"./Log/chat/chat.log", "enqueue": multi_process, - "rotation": "20 MB", + "rotation": "50 MB", "filter": lambda record: "chat" in record["extra"], "format": "{message}", }, + { + "sink": f"./Log/whisper/whisper.log", + "enqueue": multi_process, + "rotation": "30 MB", + "filter": lambda record: "whisper" in record["extra"], + "format": "{message}", + }, ] if save_file: config_handlers += [ diff --git a/openai_forward/content/image.py b/openai_forward/content/image.py deleted file mode 100644 index e69de29..0000000 diff --git a/openai_forward/content/whisper.py b/openai_forward/content/whisper.py new file mode 100644 index 0000000..cda6cbf --- /dev/null +++ b/openai_forward/content/whisper.py @@ -0,0 +1,10 @@ +from loguru import logger + + +class WhisperSaver: + def __init__(self): + self.logger = logger.bind(whisper=True) + + def add_log(self, bytes_: bytes): + text_content = bytes_.decode("utf-8") + self.logger.debug(text_content) diff --git a/openai_forward/tool.py b/openai_forward/tool.py index 73889d2..de8f40a 100644 --- a/openai_forward/tool.py +++ b/openai_forward/tool.py @@ -4,32 +4,7 @@ import orjson from rich import print -from sparrow import MeasureTime, relp - - -def yaml_dump(data, filepath, rel_path=False, mode="w"): - abs_path = relp(filepath, parents=1) if rel_path else filepath - from yaml import dump - - try: - from yaml import CDumper as Dumper - except ImportError: - from yaml import Dumper - with open(abs_path, mode=mode, encoding="utf-8") as fw: - fw.write(dump(data, Dumper=Dumper, allow_unicode=True, indent=4)) - - -def yaml_load(filepath, rel_path=False, mode="r"): - abs_path = relp(filepath, parents=1) if rel_path else filepath - from yaml import load - - try: - from yaml import CLoader as Loader - except ImportError: - from yaml import Loader - with open(abs_path, mode=mode, encoding="utf-8") as stream: - content = load(stream, Loader=Loader) - return content +from sparrow import MeasureTime, ls, relp def json_load(filepath: str, rel=False, mode="rb"): @@ -60,51 +35,38 @@ def env2list(env_name: str, sep=" "): return str2list(os.environ.get(env_name, "").strip(), sep=sep) -def get_matches(messages: List[Dict], assistant: List[Dict]): +def get_matches(messages: List[Dict], assistants: List[Dict]): mt = MeasureTime() mt.start() - msg_len, ass_len = len(messages), len(assistant) + msg_len, ass_len = len(messages), len(assistants) if msg_len != ass_len: print(f"message({msg_len}) 与 assistant({ass_len}) 长度不匹配") - matches = [] - assis_idx_to_remove, msg_idx_to_remove = [], [] - - def cvt(msg: dict, ass: dict): - return { - "datetime": msg.get('datetime'), - "forwarded-for": msg.get("forwarded-for"), - "model": msg.get("model"), - "messages": msg.get("messages"), - "assistant": ass.get("assistant"), - } - - for idx_msg in range(len(messages)): - win = min(max(abs(ass_len - msg_len), 16), len(messages) - 1) - range_list = [idx_msg + (i + 1) // 2 * (-1) ** (i + 1) for i in range(win)] - # range_list = [idx_msg + 0, idx_msg + 1, idx_msg - 1, idx_msg + 2, idx_msg - 2, ...] - for idx_ass in range_list: - if idx_ass >= len(assistant): - break - if messages[idx_msg]["uid"] == assistant[idx_ass]["uid"]: - matches.append(cvt(messages[idx_msg], assistant[idx_ass])) - assis_idx_to_remove.append(idx_ass) - msg_idx_to_remove.append(idx_msg) - break - assis_remain = [i for j, i in enumerate(assistant) if j not in assis_idx_to_remove] - msg_remain = [i for j, i in enumerate(messages) if j not in msg_idx_to_remove] - remains = [ - cvt(x, y) for x in msg_remain for y in assis_remain if x["uid"] == y["uid"] + + cvt = lambda msg, ass: { + "datetime": msg.get('datetime'), + "forwarded-for": msg.get("forwarded-for"), + "model": msg.get("model"), + "messages": msg.get("messages"), + "assistant": ass.get("assistant"), + } + + msg_uid_dict = {m.pop("uid"): m for m in messages} + ass_uid_dict = {a.pop("uid"): a for a in assistants} + matches = [ + cvt(msg_uid_dict[uid], ass_uid_dict[uid]) + for uid in msg_uid_dict + if uid in ass_uid_dict ] - matches.extend(remains) + ref_len = max(msg_len, ass_len) if len(matches) != ref_len: - print(f"存在{ref_len-len(matches)}条未匹配数据") + print(f"存在{ref_len - len(matches)}条未匹配数据") mt.show_interval("计算耗时:") return matches -def parse_chat_log(filepath: str): - with open(filepath, "r", encoding="utf-8") as f: +def parse_log_to_list(log_path: str): + with open(log_path, "r", encoding="utf-8") as f: messages, assistant = [], [] for line in f.readlines(): content: dict = ast.literal_eval(line) @@ -112,9 +74,30 @@ def parse_chat_log(filepath: str): messages.append(content) else: assistant.append(content) - return get_matches(messages, assistant) + return messages, assistant def convert_chatlog_to_jsonl(log_path: str, target_path: str): - content_list = parse_chat_log(log_path) + message_list, assistant_list = parse_log_to_list(log_path) + content_list = get_matches(messages=message_list, assistants=assistant_list) + json_dump(content_list, target_path, indent_2=True) + + +def sort_logname_by_datetime(log_path: str): + return ls(log_path, "*.log", relp=False) + + +def convert_folder_to_jsonl(folder_path: str, target_path: str): + log_files = sort_logname_by_datetime(folder_path) + messages = [] + assistants = [] + for log_path in log_files: + msg, ass = parse_log_to_list(log_path) + + msg_len, ass_len = len(msg), len(ass) + if msg_len != ass_len: + print(f"{log_path=} message({msg_len}) 与 assistant({ass_len}) 长度不匹配") + messages.extend(msg) + assistants.extend(ass) + content_list = get_matches(messages=messages, assistants=assistants) json_dump(content_list, target_path, indent_2=True) diff --git a/pyproject.toml b/pyproject.toml index bb73a9e..ff0a5b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ dependencies = [ "loguru", - "sparrow-python>=0.1.3", + "sparrow-python>=0.1.5", "fastapi", "uvicorn", "orjson", @@ -38,8 +38,9 @@ Issues = "https://github.com/beidongjiedeguang/openai-forward/issues" Source = "https://github.com/beidongjiedeguang/openai-forward" [project.optional-dependencies] -tool = [ - "orjsonl" +test = [ + "openai", + "pytest", ] [project.scripts] diff --git a/pytest.ini b/pytest.ini index 772a48f..a895357 100644 --- a/pytest.ini +++ b/pytest.ini @@ -5,3 +5,4 @@ markers = timeout: marks test timeout duration repeat: marks that test run n times addopts = --doctest-modules --doctest-glob=README.md --doctest-glob=*.py --ignore=setup.py +norecursedirs = Examples