From a7c00953a3b3152a47d746c9805bbbe64cf94c86 Mon Sep 17 00:00:00 2001
From: canisminor1990 <i@canisminor.cc>
Date: Wed, 15 Nov 2023 00:13:09 +0800
Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20Refactor=20api?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     |  22 +--
 api/azure-speech.ts                           |  28 ----
 api/edge-speech.ts                            |  13 ++
 api/microsoft-speech.ts                       |  12 +-
 api/open-stt.ts                               |  19 +++
 api/openai-tts.ts                             |  19 +++
 package.json                                  |   3 +-
 src/const/api.ts                              |  32 ++--
 src/data/{nameList.ts => voiceList.ts}        |   0
 src/index.ts                                  |  10 +-
 src/server.ts                                 |   7 +-
 src/server/cors.ts                            | 140 ------------------
 src/server/createEdgeSpeechComletion.ts       | 103 +++++++++++++
 ...t.ts => createMicrosoftSpeechComletion.ts} |  28 +++-
 .../createOpenaiAudioSpeechCompletion.ts      |  26 ++++
 ...eateOpenaiAudioTranscriptionsCompletion.ts |  29 ++++
 src/server/getAllowOrigins.ts                 |  15 --
 src/server/handleAzureSpeechRequest.ts        |  52 -------
 src/server/types.ts                           |  58 ++++++++
 src/services/fetchAzureSpeech.ts              |  39 -----
 src/services/fetchEdgeSpeech.ts               | 109 ++------------
 src/services/fetchMicrosoftSpeech.ts          |  32 ++--
 src/services/fetchOpenaiSTT.ts                |  45 +++---
 src/services/fetchOpenaiTTS.ts                |  41 ++---
 src/useAzureSpeech/demos/index.tsx            |  84 -----------
 src/useAzureSpeech/index.md                   |  11 --
 src/useAzureSpeech/index.ts                   |  15 --
 src/useEdgeSpeech/demos/index.tsx             |  19 ++-
 src/useEdgeSpeech/index.ts                    |   2 +-
 src/useMicrosoftSpeech/demos/index.tsx        |  34 +++--
 src/useMicrosoftSpeech/index.md               |   2 +-
 src/useMicrosoftSpeech/index.ts               |   2 +-
 src/useOpenaiSTT/demos/index.tsx              |   3 +-
 src/useOpenaiTTS/demos/index.tsx              |   5 +-
 src/useOpenaiTTS/index.ts                     |   2 +-
 src/useSpeechSynthes/demos/index.tsx          |   8 +-
 src/useSpeechSynthes/index.ts                 |   6 +-
 src/utils/genSSML.ts                          |  34 +++--
 src/utils/getVoiceList.ts                     |   6 +-
 39 files changed, 477 insertions(+), 638 deletions(-)
 delete mode 100644 api/azure-speech.ts
 create mode 100644 api/edge-speech.ts
 create mode 100644 api/open-stt.ts
 create mode 100644 api/openai-tts.ts
 rename src/data/{nameList.ts => voiceList.ts} (100%)
 delete mode 100644 src/server/cors.ts
 create mode 100644 src/server/createEdgeSpeechComletion.ts
 rename src/server/{handleMicrosoftSpeechRequest.ts => createMicrosoftSpeechComletion.ts} (55%)
 create mode 100644 src/server/createOpenaiAudioSpeechCompletion.ts
 create mode 100644 src/server/createOpenaiAudioTranscriptionsCompletion.ts
 delete mode 100644 src/server/getAllowOrigins.ts
 delete mode 100644 src/server/handleAzureSpeechRequest.ts
 create mode 100644 src/server/types.ts
 delete mode 100644 src/services/fetchAzureSpeech.ts
 delete mode 100644 src/useAzureSpeech/demos/index.tsx
 delete mode 100644 src/useAzureSpeech/index.md
 delete mode 100644 src/useAzureSpeech/index.ts

diff --git a/README.md b/README.md
index f1c9683..f01621d 100644
--- a/README.md
+++ b/README.md
@@ -86,17 +86,17 @@ Click button below to deploy your private plugins' gateway.
 
 This project provides some additional configuration items set with environment variables:
 
-| Environment Variable         | Description                                                                                                                                             | Default                     |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- |
-| `ALLOW_ORIGINS`              | Allow origins , string or string array                                                                                                                  |                             |
-| `OPENAI_API_KEY`             | This is the API key you apply on the OpenAI account page                                                                                                | `sk-xxxxxx...xxxxxx`        |
-| `OPENAI_PROXY_URL`           | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL           | `https://api.openai.com/v1` |
-| `AZURE_SPEECH_KEY`           | This is the API key of Azure Speech Service                                                                                                             |                             |
-| `AZURE_SPEECH_REGION`        | This is the region of Azure Speech Service                                                                                                              |                             |
-| `AZURE_SPEECH_PROXY_URL`     | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL     | `/api/azure-speech`         |
-| `MICROSOFT_SPEECH_PROXY_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech`     |
-| `EDDGE_API_TOKEN`            | This is the API key of Edge Speech Service                                                                                                              |                             |
-| `EDDGE_PROXY_URL`            | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL               |                             |
+| Environment Variable       | Description                                                                                                                                             | Default                     |
+| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- |
+| `ALLOW_ORIGINS`            | Allow origins , string or string array                                                                                                                  |                             |
+| `OPENAI_API_KEY`           | This is the API key you apply on the OpenAI account page                                                                                                | `sk-xxxxxx...xxxxxx`        |
+| `OPENAI_PROXY_URL`         | If you manually configure the OpenAI interface proxy, you can use this configuration item to override the default OpenAI API request base URL           | `https://api.openai.com/v1` |
+| `AZURE_SPEECH_KEY`         | This is the API key of Azure Speech Service                                                                                                             |                             |
+| `AZURE_SPEECH_REGION`      | This is the region of Azure Speech Service                                                                                                              |                             |
+| `AZURE_SPEECH_PROXY_URL`   | If you manually configure the AZURE Speech interface proxy, you can use this configuration item to override the default Speech API request base URL     | `/api/azure-speech`         |
+| `MICROSOFT_SPEECH_API_URL` | If you manually configure the Microsoft Speech interface proxy, you can use this configuration item to override the default Speech API request base URL | `/api/microsoft-speech`     |
+| `EDGE_API_TOKEN`           | This is the API key of Edge Speech Service                                                                                                              |                             |
+| `EDGE_SPEECH_API_URL`      | If you manually configure the Edge interface proxy, you can use this configuration item to override the default Edge wss request base URL               |                             |
 
 <div align="right">
 
diff --git a/api/azure-speech.ts b/api/azure-speech.ts
deleted file mode 100644
index f6d90da..0000000
--- a/api/azure-speech.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-// TODO: fix vercel error
-// Error: The Edge Function "api/azure-speech" is referencing unsupported modules:
-// - https-proxy-agent: net, tls, url
-// - microsoft-cognitiveservices-speech-sdk: vc-blob-asset:speech-processor.js, fs, net, tls
-
-/*
-import cors from '../src/server/cors';
-import { getAllowOrigins } from '../src/server/getAllowOrigins';
-import { handleAzureSpeechRequest } from '../src/server/handleAzureSpeechRequest';
-
-export const config = {
-  runtime: 'edge',
-};
-
-export default async (req: Request) => {
-  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
-  const origin = getAllowOrigins(req);
-  if (!origin) return new Response('Origin Not Allowed', { status: 403 });
-  const res = await handleAzureSpeechRequest(req);
-  return cors(req, res, { methods: ['POST'], origin });
-};
-*/
-
-export default async (req: Request) => {
-  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
-
-  return new Response('WIP');
-};
diff --git a/api/edge-speech.ts b/api/edge-speech.ts
new file mode 100644
index 0000000..c4f9639
--- /dev/null
+++ b/api/edge-speech.ts
@@ -0,0 +1,13 @@
+import { createEdgeSpeechComletion } from '../src/server/createEdgeSpeechComletion';
+import { EdgeSpeechPayload } from '../src/server/types';
+
+export const config = {
+  runtime: 'edge',
+};
+
+export default async (req: Request) => {
+  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+  const payload = (await req.json()) as EdgeSpeechPayload;
+  const res = await createEdgeSpeechComletion({ payload });
+  return res;
+};
diff --git a/api/microsoft-speech.ts b/api/microsoft-speech.ts
index 9b62c17..38e3f02 100644
--- a/api/microsoft-speech.ts
+++ b/api/microsoft-speech.ts
@@ -1,6 +1,5 @@
-import cors from '../src/server/cors';
-import { getAllowOrigins } from '../src/server/getAllowOrigins';
-import { handleMicrosoftSpeechRequest } from '../src/server/handleMicrosoftSpeechRequest';
+import { createMicrosoftSpeechComletion } from '../src/server/createMicrosoftSpeechComletion';
+import { MicrosoftSpeechPayload } from '../src/server/types';
 
 export const config = {
   runtime: 'edge',
@@ -8,8 +7,7 @@ export const config = {
 
 export default async (req: Request) => {
   if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
-  const origin = getAllowOrigins(req);
-  if (!origin) return new Response('Origin Not Allowed', { status: 403 });
-  const res = await handleMicrosoftSpeechRequest(req);
-  return cors(req, new Response(res.body, res), { methods: ['POST'], origin });
+  const payload = (await req.json()) as MicrosoftSpeechPayload;
+  const res = await createMicrosoftSpeechComletion({ payload });
+  return res;
 };
diff --git a/api/open-stt.ts b/api/open-stt.ts
new file mode 100644
index 0000000..32ae42b
--- /dev/null
+++ b/api/open-stt.ts
@@ -0,0 +1,19 @@
+import OpenAI from 'openai';
+
+import { OPENAI_API_KEY, OPENAI_PROXY_URL } from '@/const/api';
+
+import { createOpenaiAudioTranscriptionsCompletion } from '../src/server/createOpenaiAudioTranscriptionsCompletion';
+import { OpenAISTTPayload } from '../src/server/types';
+
+export const config = {
+  runtime: 'edge',
+};
+
+export default async (req: Request) => {
+  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+  const payload = (await req.json()) as OpenAISTTPayload;
+  if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 });
+  const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL });
+  const res = await createOpenaiAudioTranscriptionsCompletion({ openai, payload });
+  return res;
+};
diff --git a/api/openai-tts.ts b/api/openai-tts.ts
new file mode 100644
index 0000000..88ecbfe
--- /dev/null
+++ b/api/openai-tts.ts
@@ -0,0 +1,19 @@
+import OpenAI from 'openai';
+
+import { OPENAI_API_KEY, OPENAI_PROXY_URL } from '@/const/api';
+
+import { createOpenaiAudioSpeechCompletion } from '../src/server/createOpenaiAudioSpeechCompletion';
+import { OpenAITTSPayload } from '../src/server/types';
+
+export const config = {
+  runtime: 'edge',
+};
+
+export default async (req: Request) => {
+  if (req.method !== 'POST') return new Response('Method Not Allowed', { status: 405 });
+  const payload = (await req.json()) as OpenAITTSPayload;
+  if (!OPENAI_API_KEY) return new Response('OPENAI_API_KEY is not set', { status: 500 });
+  const openai = new OpenAI({ apiKey: OPENAI_API_KEY, baseURL: OPENAI_PROXY_URL });
+  const res = await createOpenaiAudioSpeechCompletion({ openai, payload });
+  return res;
+};
diff --git a/package.json b/package.json
index c8b5aa4..ca9845d 100644
--- a/package.json
+++ b/package.json
@@ -66,11 +66,10 @@
     "antd-style": "^3",
     "lodash-es": "^4",
     "lucide-react": "latest",
-    "microsoft-cognitiveservices-speech-sdk": "^1",
+    "openai": "^4.17.3",
     "query-string": "^8",
     "react-error-boundary": "^4.0.11",
     "react-layout-kit": "^1",
-    "ssml-document": "^1",
     "swr": "^2",
     "url-join": "^5",
     "uuid": "^9"
diff --git a/src/const/api.ts b/src/const/api.ts
index 89afd3c..ca4fbad 100644
--- a/src/const/api.ts
+++ b/src/const/api.ts
@@ -1,15 +1,14 @@
-import urlJoin from 'url-join';
-
-export const MICROSOFT_SPPECH_URL =
+export const MICROSOFT_SPEECH_URL =
   'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';
-export const MICROSOFT_SPEECH_PROXY_URL =
-  process.env.MICROSOFT_SPEECH_PROXY_URL ||
-  process.env.NEXT_PUBLIC_MICROSOFT_SPEECH_PROXY_URL ||
-  '/api/microsoft-speech';
-export const AZURE_SPEECH_PROXY_URL =
-  process.env.AZURE_SPEECH_PROXY_URL ||
-  process.env.NEXT_PUBLIC_AZURE_SPEECH_PROXY_URL ||
-  '/api/azure-speech';
+export const EDGE_SPEECH_URL =
+  'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
+export const EDGE_API_TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
+
+export const MICROSOFT_SPEECH_API_URL = '/api/microsoft-speech';
+export const EDGE_SPEECH_API_URL = '/api/edge-speech';
+export const OPENAI_TTS_API_URL = '/api/openai-tts';
+export const OPENAI_STT_API_URL = '/api/openai-stt';
+
 export const AZURE_SPEECH_KEY =
   process.env.AZURE_SPEECH_KEY || process.env.NEXT_PUBLIC_AZURE_SPEECH_KEY || '';
 export const AZURE_SPEECH_REGION =
@@ -20,14 +19,3 @@ export const OPENAI_PROXY_URL =
   process.env.OPENAI_PROXY_URL ||
   process.env.NEXT_PUBLIC_OPENAI_PROXY_URL ||
   'https://api.openai.com/v1';
-export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
-export const OPENAI_STT_URL = (api?: string) =>
-  urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');
-export const EDDGE_PROXY_URL =
-  process.env.EDDGE_PROXY_URL ||
-  process.env.NEXT_PUBLIC_EDDGE_PROXY_UR ||
-  'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
-export const EDDGE_API_TOKEN =
-  process.env.EDDGE_API_TOKEN ||
-  process.env.NEXT_PUBLIC_EDDGE_API_TOKEN ||
-  '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
diff --git a/src/data/nameList.ts b/src/data/voiceList.ts
similarity index 100%
rename from src/data/nameList.ts
rename to src/data/voiceList.ts
diff --git a/src/index.ts b/src/index.ts
index 5f2b637..985450d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -3,19 +3,17 @@ export { default as AudioVisualizer, type AudioVisualizerProps } from './AudioVi
 export { default as azureVoiceList } from './data/azureVoiceList';
 export { default as edgeVoiceList } from './data/edgeVoiceList';
 export { default as voiceLocale } from './data/locales';
-export { default as nameList } from './data/nameList';
 export { default as openaiVoiceList } from './data/openaiVoiceList';
+export { default as voiceList } from './data/voiceList';
 export { useAudioPlayer } from './hooks/useAudioPlayer';
 export { useAudioVisualizer } from './hooks/useAudioVisualizer';
 export { useBlobUrl } from './hooks/useBlobUrl';
 export { useStreamAudioPlayer } from './hooks/useStreamAudioPlayer';
-export { type AzureSpeechOptions, fetchAzureSpeech } from './services/fetchAzureSpeech';
 export { type EdgeSpeechOptions, fetchEdgeSpeech } from './services/fetchEdgeSpeech';
 export { fetchMicrosoftSpeech, type MicrosoftSpeechOptions } from './services/fetchMicrosoftSpeech';
 export { fetchOpenaiSTT, type OpenaiSttOptions } from './services/fetchOpenaiSTT';
 export { fetchOpenaiTTS, type OpenaiTtsOptions } from './services/fetchOpenaiTTS';
 export { useAudioRecorder } from './useAudioRecorder';
-export { useAzureSpeech } from './useAzureSpeech';
 export { useEdgeSpeech } from './useEdgeSpeech';
 export { useMicrosoftSpeech } from './useMicrosoftSpeech';
 export {
@@ -42,3 +40,9 @@ export {
   getSpeechSynthesVoiceOptions,
   getVoiceLocaleOptions,
 } from './utils/getVoiceList';
+export {
+  EDGE_SPEECH_API_URL,
+  MICROSOFT_SPEECH_API_URL,
+  OPENAI_STT_API_URL,
+  OPENAI_TTS_API_URL,
+} from '@/const/api';
diff --git a/src/server.ts b/src/server.ts
index 4c232a9..114b555 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,2 +1,5 @@
-export { handleAzureSpeechRequest } from './server/handleAzureSpeechRequest';
-export { handleMicrosoftSpeechRequest } from './server/handleMicrosoftSpeechRequest';
+export { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion';
+export { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion';
+export { createOpenaiAudioSpeechCompletion } from '@/server/createOpenaiAudioSpeechCompletion';
+export { createOpenaiAudioTranscriptionsCompletion } from '@/server/createOpenaiAudioTranscriptionsCompletion';
+export * from '@/server/types';
diff --git a/src/server/cors.ts b/src/server/cors.ts
deleted file mode 100644
index a8be6c5..0000000
--- a/src/server/cors.ts
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * Multi purpose CORS lib.
- * Note: Based on the `cors` package in npm but using only
- * web APIs. Feel free to use it in your own projects.
- */
-
-type StaticOrigin = boolean | string | RegExp | (boolean | string | RegExp)[];
-
-type OriginFn = (origin: string | undefined, req: Request) => StaticOrigin | Promise<StaticOrigin>;
-
-interface CorsOptions {
-  allowedHeaders?: string | string[];
-  credentials?: boolean;
-  exposedHeaders?: string | string[];
-  maxAge?: number;
-  methods?: string | string[];
-  optionsSuccessStatus?: number;
-  origin?: StaticOrigin | OriginFn;
-  preflightContinue?: boolean;
-}
-
-const defaultOptions: CorsOptions = {
-  methods: 'GET,HEAD,PUT,PATCH,POST,DELETE',
-  optionsSuccessStatus: 204,
-  origin: '*',
-  preflightContinue: false,
-};
-
-function isOriginAllowed(origin: string, allowed: StaticOrigin): boolean {
-  return Array.isArray(allowed)
-    ? allowed.some((o) => isOriginAllowed(origin, o))
-    : typeof allowed === 'string'
-    ? origin === allowed
-    : allowed instanceof RegExp
-    ? allowed.test(origin)
-    : !!allowed;
-}
-
-function getOriginHeaders(reqOrigin: string | undefined, origin: StaticOrigin) {
-  const headers = new Headers();
-
-  if (origin === '*') {
-    // Allow any origin
-    headers.set('Access-Control-Allow-Origin', '*');
-  } else if (typeof origin === 'string') {
-    // Fixed origin
-    headers.set('Access-Control-Allow-Origin', origin);
-    headers.append('Vary', 'Origin');
-  } else {
-    const allowed = isOriginAllowed(reqOrigin ?? '', origin);
-
-    if (allowed && reqOrigin) {
-      headers.set('Access-Control-Allow-Origin', reqOrigin);
-    }
-    headers.append('Vary', 'Origin');
-  }
-
-  return headers;
-}
-
-// originHeadersFromReq
-
-async function originHeadersFromReq(req: Request, origin: StaticOrigin | OriginFn) {
-  const reqOrigin = req.headers.get('Origin') || undefined;
-  const value = typeof origin === 'function' ? await origin(reqOrigin, req) : origin;
-
-  if (!value) return;
-  return getOriginHeaders(reqOrigin, value);
-}
-
-function getAllowedHeaders(req: Request, allowed?: string | string[]) {
-  const headers = new Headers();
-
-  if (!allowed) {
-    allowed = req.headers.get('Access-Control-Request-Headers')!;
-    headers.append('Vary', 'Access-Control-Request-Headers');
-  } else if (Array.isArray(allowed)) {
-    // If the allowed headers is an array, turn it into a string
-    allowed = allowed.join(',');
-  }
-  if (allowed) {
-    headers.set('Access-Control-Allow-Headers', allowed);
-  }
-
-  return headers;
-}
-
-export default async function cors(req: Request, res: Response, options?: CorsOptions) {
-  const opts = { ...defaultOptions, ...options };
-  const { headers } = res;
-  const originHeaders = await originHeadersFromReq(req, opts.origin ?? false);
-  const mergeHeaders = (v: string, k: string) => {
-    if (k === 'Vary') headers.append(k, v);
-    else headers.set(k, v);
-  };
-
-  // If there's no origin we won't touch the response
-  if (!originHeaders) return res;
-
-  originHeaders.forEach(mergeHeaders);
-
-  if (opts.credentials) {
-    headers.set('Access-Control-Allow-Credentials', 'true');
-  }
-
-  const exposed = Array.isArray(opts.exposedHeaders)
-    ? opts.exposedHeaders.join(',')
-    : opts.exposedHeaders;
-
-  if (exposed) {
-    headers.set('Access-Control-Expose-Headers', exposed);
-  }
-
-  // Handle the preflight request
-  if (req.method === 'OPTIONS') {
-    if (opts.methods) {
-      const methods = Array.isArray(opts.methods) ? opts.methods.join(',') : opts.methods;
-
-      headers.set('Access-Control-Allow-Methods', methods);
-    }
-
-    getAllowedHeaders(req, opts.allowedHeaders).forEach(mergeHeaders);
-
-    if (typeof opts.maxAge === 'number') {
-      headers.set('Access-Control-Max-Age', String(opts.maxAge));
-    }
-
-    if (opts.preflightContinue) return res;
-
-    headers.set('Content-Length', '0');
-    return new Response(null, { headers, status: opts.optionsSuccessStatus });
-  }
-
-  // If we got here, it's a normal request
-  return res;
-}
-
-export function initCors(options?: CorsOptions) {
-  return (req: Request, res: Response) => cors(req, res, options);
-}
diff --git a/src/server/createEdgeSpeechComletion.ts b/src/server/createEdgeSpeechComletion.ts
new file mode 100644
index 0000000..c0a478c
--- /dev/null
+++ b/src/server/createEdgeSpeechComletion.ts
@@ -0,0 +1,103 @@
+import qs from 'query-string';
+import { v4 as uuidv4 } from 'uuid';
+
+import { EDGE_API_TOKEN, EDGE_SPEECH_URL } from '../const/api';
+import { EdgeSpeechPayload } from '../server/types';
+import { genSSML } from '../utils/genSSML';
+import { genSendContent } from '../utils/genSendContent';
+import { getHeadersAndData } from '../utils/getHeadersAndData';
+
+const configConent = JSON.stringify({
+  context: {
+    synthesis: {
+      audio: {
+        metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true },
+        outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
+      },
+    },
+  },
+});
+
+const genHeader = (connectId: string) => {
+  const date = new Date().toString();
+  const configHeader = {
+    'Content-Type': 'application/json; charset=utf-8',
+    'Path': 'speech.config',
+    'X-Timestamp': date,
+  };
+  const contentHeader = {
+    'Content-Type': 'application/ssml+xml',
+    'Path': 'ssml',
+    'X-RequestId': connectId,
+    'X-Timestamp': date,
+  };
+  return {
+    configHeader,
+    contentHeader,
+  };
+};
+
+interface CreateEdgeSpeechComletionOptions {
+  payload: EdgeSpeechPayload;
+}
+
+export const createEdgeSpeechComletion = async ({
+  payload,
+}: CreateEdgeSpeechComletionOptions): Promise<Response> => {
+  const { input, options } = payload;
+
+  const connectId = uuidv4().replaceAll('-', '');
+  const url = qs.stringifyUrl({
+    query: {
+      ConnectionId: connectId,
+      TrustedClientToken: EDGE_API_TOKEN,
+    },
+    url: EDGE_SPEECH_URL,
+  });
+
+  const { configHeader, contentHeader } = genHeader(connectId);
+  const config = genSendContent(configHeader, configConent);
+  const content = genSendContent(contentHeader, genSSML(input, options));
+
+  return new Promise((resolve, reject) => {
+    const ws = new WebSocket(url);
+    ws.binaryType = 'arraybuffer';
+    const onOpen = () => {
+      ws.send(config);
+      ws.send(content);
+    };
+    let audioData = new ArrayBuffer(0);
+    const onMessage = async (event: MessageEvent<any>) => {
+      if (typeof event.data === 'string') {
+        const { headers } = getHeadersAndData(event.data);
+        switch (headers['Path']) {
+          case 'turn.end': {
+            ws.close();
+            if (!audioData.byteLength) return;
+            const res = new Response(audioData);
+            resolve(res);
+            break;
+          }
+        }
+      } else if (event.data instanceof ArrayBuffer) {
+        const dataview = new DataView(event.data);
+        const headerLength = dataview.getInt16(0);
+        if (event.data.byteLength > headerLength + 2) {
+          const newBody = event.data.slice(2 + headerLength);
+          const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength);
+          const mergedUint8Array = new Uint8Array(newAudioData);
+          mergedUint8Array.set(new Uint8Array(audioData), 0);
+          mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength);
+          audioData = newAudioData;
+        }
+      }
+    };
+    const onError = () => {
+      reject(new Error('WebSocket error occurred.'));
+      ws.close();
+    };
+    ws.addEventListener('open', onOpen);
+    ws.addEventListener('message', onMessage);
+    ws.addEventListener('error', onError);
+  });
+};
diff --git a/src/server/handleMicrosoftSpeechRequest.ts b/src/server/createMicrosoftSpeechComletion.ts
similarity index 55%
rename from src/server/handleMicrosoftSpeechRequest.ts
rename to src/server/createMicrosoftSpeechComletion.ts
index 1032970..9b2f854 100644
--- a/src/server/handleMicrosoftSpeechRequest.ts
+++ b/src/server/createMicrosoftSpeechComletion.ts
@@ -1,8 +1,18 @@
 import { v4 as uuidv4 } from 'uuid';
 
-import { MICROSOFT_SPPECH_URL } from '../const/api';
+import { MICROSOFT_SPEECH_URL } from '../const/api';
+import { MicrosoftSpeechPayload } from '../server/types';
+import { genSSML } from '../utils/genSSML';
+
+interface CreateMicrosoftSpeechComletionOptions {
+  payload: MicrosoftSpeechPayload;
+}
+
+export const createMicrosoftSpeechComletion = async ({
+  payload,
+}: CreateMicrosoftSpeechComletionOptions) => {
+  const { input, options } = payload;
 
-export const handleMicrosoftSpeechRequest = async (req: Request, options?: any) => {
   const DEFAULT_HEADERS = new Headers({
     'accept': '*/*',
     'accept-language': 'zh-CN,zh;q=0.9',
@@ -20,13 +30,21 @@ export const handleMicrosoftSpeechRequest = async (req: Request, options?: any)
       'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
   });
 
-  const res = await fetch(MICROSOFT_SPPECH_URL, {
-    body: req.body,
+  const body = JSON.stringify({
+    offsetInPlainText: 0,
+    properties: {
+      SpeakTriggerSource: 'AccTuningPagePlayButton',
+    },
+    ssml: genSSML(input, options),
+    ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
+  });
+
+  const res = await fetch(MICROSOFT_SPEECH_URL, {
+    body,
     headers: DEFAULT_HEADERS,
     method: 'POST',
     // @ts-ignore
     responseType: 'arraybuffer',
-    ...options,
   });
 
   return res;
diff --git a/src/server/createOpenaiAudioSpeechCompletion.ts b/src/server/createOpenaiAudioSpeechCompletion.ts
new file mode 100644
index 0000000..8b0a1fe
--- /dev/null
+++ b/src/server/createOpenaiAudioSpeechCompletion.ts
@@ -0,0 +1,26 @@
+import OpenAI from 'openai';
+
+import { OpenAITTSPayload } from './types';
+
+interface CreateOpenaiAudioSpeechCompletionOptions {
+  openai: OpenAI;
+  payload: OpenAITTSPayload;
+}
+
+export const createOpenaiAudioSpeechCompletion = async ({
+  payload,
+  openai,
+}: CreateOpenaiAudioSpeechCompletionOptions) => {
+  const { options, input } = payload;
+
+  const response = await openai.audio.speech.create(
+    {
+      input,
+      model: options.model,
+      voice: options.voice,
+    },
+    { headers: { Accept: '*/*' } },
+  );
+
+  return response;
+};
diff --git a/src/server/createOpenaiAudioTranscriptionsCompletion.ts b/src/server/createOpenaiAudioTranscriptionsCompletion.ts
new file mode 100644
index 0000000..7c3ec30
--- /dev/null
+++ b/src/server/createOpenaiAudioTranscriptionsCompletion.ts
@@ -0,0 +1,29 @@
+import OpenAI from 'openai';
+
+import { OpenAISTTPayload } from './types';
+
+interface CreateOpenaiAudioTranscriptionsOptions {
+  openai: OpenAI;
+  payload: OpenAISTTPayload;
+}
+
+export const createOpenaiAudioTranscriptionsCompletion = async ({
+  payload,
+  openai,
+}: CreateOpenaiAudioTranscriptionsOptions) => {
+  const { blob, options } = payload;
+
+  const file = new File([blob], `${Date.now()}.${options.mineType.extension}`, {
+    type: options.mineType.mineType,
+  });
+
+  const response = await openai.audio.transcriptions.create(
+    {
+      file,
+      model: options.model,
+    },
+    { headers: { Accept: '*/*' } },
+  );
+
+  return response.text;
+};
diff --git a/src/server/getAllowOrigins.ts b/src/server/getAllowOrigins.ts
deleted file mode 100644
index 3fccfdf..0000000
--- a/src/server/getAllowOrigins.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-const ALLOW_ORIGINS = process.env?.ALLOW_ORIGINS?.split(',') || undefined;
-
-export const getAllowOrigins = (req: Request) => {
-  let origin = '*';
-
-  if (ALLOW_ORIGINS) {
-    const reqOrigin = req.headers.get('origin');
-    if (reqOrigin && ALLOW_ORIGINS.includes(reqOrigin)) {
-      origin = reqOrigin;
-    } else {
-      return;
-    }
-  }
-  return origin;
-};
diff --git a/src/server/handleAzureSpeechRequest.ts b/src/server/handleAzureSpeechRequest.ts
deleted file mode 100644
index 9c9033f..0000000
--- a/src/server/handleAzureSpeechRequest.ts
+++ /dev/null
@@ -1,52 +0,0 @@
-import {
-  AudioConfig,
-  PropertyId,
-  ResultReason,
-  SpeechConfig,
-  SpeechSynthesisOutputFormat,
-  SpeechSynthesisResult,
-  SpeechSynthesizer,
-} from 'microsoft-cognitiveservices-speech-sdk';
-
-import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '../const/api';
-
-const fetchAzureSpeech = async (ssml: string, { api }: any): Promise<ArrayBuffer> => {
-  const key = api.key || AZURE_SPEECH_KEY;
-  const region = api.key || AZURE_SPEECH_REGION;
-  const speechConfig = SpeechConfig.fromSubscription(key, region);
-  speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true');
-  speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus;
-
-  const audioConfig = AudioConfig.fromDefaultSpeakerOutput();
-  const synthesizer: SpeechSynthesizer | null = new SpeechSynthesizer(speechConfig, audioConfig);
-
-  const completeCb = async (
-    result: SpeechSynthesisResult,
-    resolve: (value: ArrayBuffer) => void,
-  ) => {
-    if (result.reason === ResultReason.SynthesizingAudioCompleted) {
-      const audioData = result.audioData;
-      resolve(audioData);
-    }
-    synthesizer.close();
-  };
-
-  const errCb = (err: string, reject: (err?: any) => void) => {
-    reject(err);
-    synthesizer.close();
-  };
-
-  return new Promise<ArrayBuffer>((resolve, reject) => {
-    synthesizer.speakSsmlAsync(
-      ssml,
-      (result) => completeCb(result, resolve),
-      (err) => errCb(err, reject),
-    );
-  });
-};
-
-export const handleAzureSpeechRequest = async (req: Request) => {
-  const { ssml, ...options } = req.body as any;
-  const data = await fetchAzureSpeech(ssml, options);
-  return new Response(data);
-};
diff --git a/src/server/types.ts b/src/server/types.ts
new file mode 100644
index 0000000..4ddfbdf
--- /dev/null
+++ b/src/server/types.ts
@@ -0,0 +1,58 @@
+import { SsmlOptions } from '@/utils/genSSML';
+import { RecordMineType } from '@/utils/getRecordMineType';
+
+export interface MicrosoftSpeechPayload {
+  /**
+   * @title 语音合成的文本
+   */
+  input: string;
+  /**
+   * @title SSML 语音合成的配置
+   */
+  options: SsmlOptions;
+}
+
+export interface EdgeSpeechPayload {
+  /**
+   * @title 语音合成的文本
+   */
+  input: string;
+  /**
+   * @title SSML 语音合成的配置
+   */
+  options: Pick<SsmlOptions, 'voice'>;
+}
+
+export interface OpenAITTSPayload {
+  /**
+   * @title 语音合成的文本
+   */
+  input: string;
+  options: {
+    /**
+     * @title 语音合成的模型名称
+     */
+    model: string;
+    /**
+     * @title 语音合成的声音名称
+     */
+    voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
+  };
+}
+
+export interface OpenAISTTPayload {
+  /**
+   * @title 语音识别的文件
+   */
+  blob: Blob;
+  options: {
+    /**
+     * @title 语音文件格式
+     */
+    mineType: RecordMineType;
+    /**
+     * @title 语音识别的模型名称
+     */
+    model: string;
+  };
+}
diff --git a/src/services/fetchAzureSpeech.ts b/src/services/fetchAzureSpeech.ts
deleted file mode 100644
index ea495a3..0000000
--- a/src/services/fetchAzureSpeech.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-import { AZURE_SPEECH_KEY, AZURE_SPEECH_PROXY_URL, AZURE_SPEECH_REGION } from '@/const/api';
-import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
-import { type SsmlOptions, genSSML } from '@/utils/genSSML';
-
-export interface AzureSpeechOptions extends SsmlOptions {
-  api?: {
-    key?: string;
-    proxy?: string;
-    region?: string;
-  };
-}
-
-export const fetchAzureSpeech = async (
-  text: string,
-  { api = {}, ...options }: AzureSpeechOptions,
-): Promise<AudioBuffer> => {
-  const data = JSON.stringify({
-    api: {
-      key: api?.key || AZURE_SPEECH_KEY,
-      region: api?.region || AZURE_SPEECH_REGION,
-    },
-    ssml: genSSML(text, options),
-  });
-  const url = api?.proxy || AZURE_SPEECH_PROXY_URL;
-
-  const response: Response = await fetch(url, {
-    body: data,
-    method: 'POST',
-    // @ts-ignore
-    responseType: 'arraybuffer',
-  });
-
-  if (!response.ok) {
-    throw new Error('Network response was not ok');
-  }
-
-  const arrayBuffer = await response.arrayBuffer();
-  return await arrayBufferConvert(arrayBuffer);
-};
diff --git a/src/services/fetchEdgeSpeech.ts b/src/services/fetchEdgeSpeech.ts
index 7ab2b50..9c18349 100644
--- a/src/services/fetchEdgeSpeech.ts
+++ b/src/services/fetchEdgeSpeech.ts
@@ -1,104 +1,25 @@
-import qs from 'query-string';
-import { v4 as uuidv4 } from 'uuid';
-
-import { EDDGE_API_TOKEN, EDDGE_PROXY_URL } from '@/const/api';
+import { createEdgeSpeechComletion } from '@/server/createEdgeSpeechComletion';
+import { EdgeSpeechPayload } from '@/server/types';
 import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
-import { type SsmlOptions, genSSML } from '@/utils/genSSML';
-import { genSendContent } from '@/utils/genSendContent';
-import { getHeadersAndData } from '@/utils/getHeadersAndData';
-
-const configConent = JSON.stringify({
-  context: {
-    synthesis: {
-      audio: {
-        metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true },
-        outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
-      },
-    },
-  },
-});
-
-const genHeader = (connectId: string) => {
-  const date = new Date().toString();
-  const configHeader = {
-    'Content-Type': 'application/json; charset=utf-8',
-    'Path': 'speech.config',
-    'X-Timestamp': date,
-  };
-  const contentHeader = {
-    'Content-Type': 'application/ssml+xml',
-    'Path': 'ssml',
-    'X-RequestId': connectId,
-    'X-Timestamp': date,
-  };
-  return {
-    configHeader,
-    contentHeader,
-  };
-};
+import { type SsmlOptions } from '@/utils/genSSML';
 
-export interface EdgeSpeechOptions extends Pick<SsmlOptions, 'name'> {
+export interface EdgeSpeechOptions extends Pick<SsmlOptions, 'voice'> {
   api?: {
-    key?: string;
-    proxy?: string;
+    url?: string;
   };
 }
+
 export const fetchEdgeSpeech = async (
-  text: string,
-  { api = {}, ...options }: EdgeSpeechOptions,
+  input: string,
+  { api, ...options }: EdgeSpeechOptions,
 ): Promise<AudioBuffer> => {
-  const connectId = uuidv4().replaceAll('-', '');
-  const url = qs.stringifyUrl({
-    query: {
-      ConnectionId: connectId,
-      TrustedClientToken: api?.key || EDDGE_API_TOKEN,
-    },
-    url: api?.proxy || EDDGE_PROXY_URL,
-  });
+  const payload: EdgeSpeechPayload = { input, options };
 
-  const { configHeader, contentHeader } = genHeader(connectId);
-  const config = genSendContent(configHeader, configConent);
-  const content = genSendContent(contentHeader, genSSML(text, options));
+  const response = await (api?.url
+    ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+    : await createEdgeSpeechComletion({ payload }));
 
-  return new Promise((resolve, reject) => {
-    const ws = new WebSocket(url);
-    ws.binaryType = 'arraybuffer';
-    const onOpen = () => {
-      ws.send(config);
-      ws.send(content);
-    };
-    let audioData = new ArrayBuffer(0);
-    const onMessage = async (event: MessageEvent<any>) => {
-      if (typeof event.data === 'string') {
-        const { headers } = getHeadersAndData(event.data);
-        switch (headers['Path']) {
-          case 'turn.end': {
-            ws.close();
-            if (!audioData.byteLength) return;
-            const audioBuffer = await arrayBufferConvert(audioData);
-            resolve(audioBuffer);
-            break;
-          }
-        }
-      } else if (event.data instanceof ArrayBuffer) {
-        const dataview = new DataView(event.data);
-        const headerLength = dataview.getInt16(0);
-        if (event.data.byteLength > headerLength + 2) {
-          const newBody = event.data.slice(2 + headerLength);
-          const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength);
-          const mergedUint8Array = new Uint8Array(newAudioData);
-          mergedUint8Array.set(new Uint8Array(audioData), 0);
-          mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength);
-          audioData = newAudioData;
-        }
-      }
-    };
-    const onError = () => {
-      reject(new Error('WebSocket error occurred.'));
-      ws.close();
-    };
-    ws.addEventListener('open', onOpen);
-    ws.addEventListener('message', onMessage);
-    ws.addEventListener('error', onError);
-  });
+  const arrayBuffer = await response.arrayBuffer();
+  const audioBuffer = await arrayBufferConvert(arrayBuffer);
+  return audioBuffer;
 };
diff --git a/src/services/fetchMicrosoftSpeech.ts b/src/services/fetchMicrosoftSpeech.ts
index 323c80f..4a0594a 100644
--- a/src/services/fetchMicrosoftSpeech.ts
+++ b/src/services/fetchMicrosoftSpeech.ts
@@ -1,39 +1,29 @@
-import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';
+import { createMicrosoftSpeechComletion } from '@/server/createMicrosoftSpeechComletion';
+import { MicrosoftSpeechPayload } from '@/server/types';
 import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
 import { type SsmlOptions } from '@/utils/genSSML';
-import { genSSML } from '@/utils/genSSML';
 
 export interface MicrosoftSpeechOptions extends SsmlOptions {
   api?: {
-    proxy?: string;
+    url?: string;
   };
 }
 
 export const fetchMicrosoftSpeech = async (
-  text: string,
-  { api = {}, ...options }: MicrosoftSpeechOptions,
+  input: string,
+  { api, ...options }: MicrosoftSpeechOptions,
 ): Promise<AudioBuffer> => {
-  const data = JSON.stringify({
-    offsetInPlainText: 0,
-    properties: {
-      SpeakTriggerSource: 'AccTuningPagePlayButton',
-    },
-    ssml: genSSML(text, options),
-    ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
-  });
-  const url = api?.proxy || MICROSOFT_SPEECH_PROXY_URL;
+  const payload: MicrosoftSpeechPayload = { input, options };
 
-  const response: Response = await fetch(url, {
-    body: data,
-    method: 'POST',
-    // @ts-ignore
-    responseType: 'arraybuffer',
-  });
+  const response = await (api?.url
+    ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+    : createMicrosoftSpeechComletion({ payload }));
 
   if (!response.ok) {
     throw new Error('Network response was not ok');
   }
 
   const arrayBuffer = await response.arrayBuffer();
-  return await arrayBufferConvert(arrayBuffer);
+  const audioBuffer = await arrayBufferConvert(arrayBuffer);
+  return audioBuffer;
 };
diff --git a/src/services/fetchOpenaiSTT.ts b/src/services/fetchOpenaiSTT.ts
index d29e643..4f7a6e4 100644
--- a/src/services/fetchOpenaiSTT.ts
+++ b/src/services/fetchOpenaiSTT.ts
@@ -1,12 +1,15 @@
-import { v4 as uuidv4 } from 'uuid';
+import OpenAI from 'openai';
 
-import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';
+import { OPENAI_API_KEY, OPENAI_PROXY_URL } from '@/const/api';
+import { createOpenaiAudioTranscriptionsCompletion } from '@/server/createOpenaiAudioTranscriptionsCompletion';
+import { OpenAISTTPayload } from '@/server/types';
 import { RecordMineType, getRecordMineType } from '@/utils/getRecordMineType';
 
 export interface OpenaiSttOptions {
   api?: {
     key?: string;
     proxy?: string;
+    url?: string;
   };
   mineType?: RecordMineType;
   model?: 'whisper-1';
@@ -18,28 +21,22 @@ export const fetchOpenaiSTT = async (
   { api = {}, model = 'whisper-1', mineType }: OpenaiSttOptions,
 ): Promise<string> => {
   const key = api?.key || OPENAI_API_KEY;
-  const url = OPENAI_STT_URL(api?.proxy);
-
-  const headers = new Headers({
-    Authorization: `Bearer ${key}`,
-  });
-
-  const filename = `${uuidv4()}.${mineType?.extension || getRecordMineType().extension}`;
-  const file = new File([speech], filename, {
-    type: mineType?.mineType || getRecordMineType().mineType,
-  });
-
-  const body = new FormData();
-  body.append('file', file);
-  body.append('model', model);
-
-  const response: Response = await fetch(url, { body, headers, method: 'POST' });
-
-  if (!response.ok) {
-    throw new Error('Network response was not ok');
-  }
+  const url = api?.proxy || OPENAI_PROXY_URL;
+
+  const payload: OpenAISTTPayload = {
+    blob: speech,
+    options: {
+      mineType: mineType || getRecordMineType(),
+      model,
+    },
+  };
 
-  const json = await response.json();
+  const response = (await (api?.url
+    ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+    : createOpenaiAudioTranscriptionsCompletion({
+        openai: new OpenAI({ apiKey: key, baseURL: url }),
+        payload,
+      }))) as string;
 
-  return json?.text;
+  return response;
 };
diff --git a/src/services/fetchOpenaiTTS.ts b/src/services/fetchOpenaiTTS.ts
index 75d6218..9007563 100644
--- a/src/services/fetchOpenaiTTS.ts
+++ b/src/services/fetchOpenaiTTS.ts
@@ -1,36 +1,43 @@
-import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';
+import OpenAI from 'openai';
+
+import { OPENAI_API_KEY, OPENAI_PROXY_URL } from '@/const/api';
+import { createOpenaiAudioSpeechCompletion } from '@/server/createOpenaiAudioSpeechCompletion';
+import { OpenAITTSPayload } from '@/server/types';
 import { arrayBufferConvert } from '@/utils/arrayBufferConvert';
 import { type SsmlOptions } from '@/utils/genSSML';
 
 export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
 
-export interface OpenaiTtsOptions extends Pick<SsmlOptions, 'name'> {
+export interface OpenaiTtsOptions extends Pick<SsmlOptions, 'voice'>, OpenAITTSPayload {
   api: {
     key?: string;
     proxy?: string;
+    url?: string;
   };
   model?: 'tts-1' | 'tts-1-hd';
-  name: OpenaiVoice | string;
+  voice: OpenaiVoice;
 }
 export const fetchOpenaiTTS = async (
-  text: string,
-  { api = {}, model = 'tts-1', ...options }: OpenaiTtsOptions,
+  input: string,
+  { api = {}, model = 'tts-1', voice }: OpenaiTtsOptions,
 ): Promise<AudioBuffer> => {
   const key = api?.key || OPENAI_API_KEY;
-  const url = OPENAI_TTS_URL(api?.proxy);
-
-  const headers = new Headers({
-    'Authorization': `Bearer ${key}`,
-    'Content-Type': 'application/json',
-  });
+  const url = api?.proxy || OPENAI_PROXY_URL;
 
-  const body = JSON.stringify({
-    input: text,
-    model,
-    voice: options.name,
-  });
+  const payload: OpenAITTSPayload = {
+    input,
+    options: {
+      model,
+      voice,
+    },
+  };
 
-  const response: Response = await fetch(url, { body, headers, method: 'POST' });
+  const response = await (api?.url
+    ? fetch(api.url, { body: JSON.stringify(payload), method: 'POST' })
+    : await createOpenaiAudioSpeechCompletion({
+        openai: new OpenAI({ apiKey: key, baseURL: url }),
+        payload,
+      }));
 
   if (!response.ok) {
     throw new Error('Network response was not ok');
diff --git a/src/useAzureSpeech/demos/index.tsx b/src/useAzureSpeech/demos/index.tsx
deleted file mode 100644
index 8d5e7b6..0000000
--- a/src/useAzureSpeech/demos/index.tsx
+++ /dev/null
@@ -1,84 +0,0 @@
-import { AudioPlayer, genLevaOptions, getAzureVoiceOptions, useAzureSpeech } from '@lobehub/tts';
-import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
-import { Button, Input } from 'antd';
-import { Volume2 } from 'lucide-react';
-import { Flexbox } from 'react-layout-kit';
-
-const defaultText = '这是一段使用 Azure Speech 的语音演示';
-
-export default () => {
-  const store = useCreateStore();
-
-  const api: any = useControls(
-    {
-      key: {
-        label: 'AZURE_SPEECH_KEY',
-        value: '',
-      },
-      region: {
-        label: 'AZURE_SPEECH_REGION',
-        value: '',
-      },
-    },
-    { store },
-  );
-
-  const options: any = useControls(
-    {
-      name: {
-        options: genLevaOptions(getAzureVoiceOptions()),
-        value: 'zh-CN-YunxiaNeural',
-      },
-      pitch: {
-        max: 1,
-        min: -1,
-        step: 0.1,
-        value: 0,
-      },
-      rate: {
-        max: 1,
-        min: -1,
-        step: 0.1,
-        value: 0,
-      },
-      style: {
-        options: [
-          'affectionate',
-          'angry',
-          'calm',
-          'cheerful',
-          'disgruntled',
-          'embarrassed',
-          'fearful',
-          'general',
-          'gentle',
-          'sad',
-          'serious',
-        ],
-        value: 'general',
-      },
-    },
-    { store },
-  );
-  const { setText, isGlobalLoading, audio, start, stop } = useAzureSpeech(defaultText, {
-    api,
-    ...options,
-  });
-  return (
-    <StoryBook levaStore={store}>
-      <Flexbox gap={8}>
-        {isGlobalLoading ? (
-          <Button block loading onClick={stop}>
-            Generating...
-          </Button>
-        ) : (
-          <Button block icon={<Icon icon={Volume2} />} onClick={start} type={'primary'}>
-            Speak
-          </Button>
-        )}
-        <Input.TextArea defaultValue={defaultText} onChange={(e) => setText(e.target.value)} />
-        <AudioPlayer audio={audio} isLoading={isGlobalLoading} />
-      </Flexbox>
-    </StoryBook>
-  );
-};
diff --git a/src/useAzureSpeech/index.md b/src/useAzureSpeech/index.md
deleted file mode 100644
index 1bbc032..0000000
--- a/src/useAzureSpeech/index.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-nav: Components
-group: TTS
-title: useAzureSpeech
----
-
-## hooks
-
-- ENV: `AZURE_SPEECH_KEY` `AZURE_SPEECH_REGION`
-
-<code src="./demos/index.tsx" nopadding></code>
diff --git a/src/useAzureSpeech/index.ts b/src/useAzureSpeech/index.ts
deleted file mode 100644
index 6e29ae5..0000000
--- a/src/useAzureSpeech/index.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useState } from 'react';
-
-import { AzureSpeechOptions, fetchAzureSpeech } from '@/services/fetchAzureSpeech';
-import { useTTS } from '@/useTTS';
-
-export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
-  const [text, setText] = useState<string>(defaultText);
-  const rest = useTTS(options.name, text, (segmentText: string) =>
-    fetchAzureSpeech(segmentText, options),
-  );
-  return {
-    setText,
-    ...rest,
-  };
-};
diff --git a/src/useEdgeSpeech/demos/index.tsx b/src/useEdgeSpeech/demos/index.tsx
index dae1ef8..f44e544 100644
--- a/src/useEdgeSpeech/demos/index.tsx
+++ b/src/useEdgeSpeech/demos/index.tsx
@@ -1,4 +1,10 @@
-import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useEdgeSpeech } from '@lobehub/tts';
+import {
+  AudioPlayer,
+  EDGE_SPEECH_API_URL,
+  genLevaOptions,
+  getEdgeVoiceOptions,
+  useEdgeSpeech,
+} from '@lobehub/tts';
 import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
 import { Button, Input } from 'antd';
 import { Volume2 } from 'lucide-react';
@@ -11,21 +17,14 @@ export default () => {
 
   const api: any = useControls(
     {
-      key: {
-        label: 'EDDGE_API_TOKEN',
-        value: '',
-      },
-      proxy: {
-        label: 'EDDGE_PROXY_URL',
-        value: '',
-      },
+      url: EDGE_SPEECH_API_URL,
     },
     { store },
   );
 
   const options: any = useControls(
     {
-      name: {
+      voice: {
         options: genLevaOptions(getEdgeVoiceOptions()),
         value: 'zh-CN-YunxiaNeural',
       },
diff --git a/src/useEdgeSpeech/index.ts b/src/useEdgeSpeech/index.ts
index ff2f7fc..6909b06 100644
--- a/src/useEdgeSpeech/index.ts
+++ b/src/useEdgeSpeech/index.ts
@@ -5,7 +5,7 @@ import { useTTS } from '@/useTTS';
 
 export const useEdgeSpeech = (defaultText: string, options: EdgeSpeechOptions) => {
   const [text, setText] = useState<string>(defaultText);
-  const rest = useTTS(options.name, text, (segmentText: string) =>
+  const rest = useTTS(options.voice, text, (segmentText: string) =>
     fetchEdgeSpeech(segmentText, options),
   );
   return {
diff --git a/src/useMicrosoftSpeech/demos/index.tsx b/src/useMicrosoftSpeech/demos/index.tsx
index ac31eba..24cce4f 100644
--- a/src/useMicrosoftSpeech/demos/index.tsx
+++ b/src/useMicrosoftSpeech/demos/index.tsx
@@ -1,4 +1,10 @@
-import { AudioPlayer, genLevaOptions, getEdgeVoiceOptions, useMicrosoftSpeech } from '@lobehub/tts';
+import {
+  AudioPlayer,
+  MICROSOFT_SPEECH_API_URL,
+  genLevaOptions,
+  getEdgeVoiceOptions,
+  useMicrosoftSpeech,
+} from '@lobehub/tts';
 import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
 import { Button, Input } from 'antd';
 import { Volume2 } from 'lucide-react';
@@ -8,16 +14,17 @@ const defaultText = '这是一段使用 Microsoft Speech 的语音演示';
 
 export default () => {
   const store = useCreateStore();
-  const options: any = useControls(
+  const api: any = useControls(
     {
-      api: {
-        label: 'MICROSOFT_SPEECH_PROXY_URL',
-        value: '',
-      },
-      name: {
-        options: genLevaOptions(getEdgeVoiceOptions()),
-        value: 'zh-CN-YunxiaNeural',
+      url: {
+        label: 'MICROSOFT_SPEECH_API_URL',
+        value: MICROSOFT_SPEECH_API_URL,
       },
+    },
+    { store },
+  );
+  const options: any = useControls(
+    {
       pitch: {
         max: 1,
         min: -1,
@@ -46,10 +53,17 @@ export default () => {
         ],
         value: 'general',
       },
+      voice: {
+        options: genLevaOptions(getEdgeVoiceOptions()),
+        value: 'zh-CN-YunxiaNeural',
+      },
     },
     { store },
   );
-  const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, options);
+  const { setText, isGlobalLoading, audio, start, stop } = useMicrosoftSpeech(defaultText, {
+    api,
+    ...options,
+  });
   return (
     <StoryBook levaStore={store}>
       <Flexbox gap={8}>
diff --git a/src/useMicrosoftSpeech/index.md b/src/useMicrosoftSpeech/index.md
index 8ab707e..b431580 100644
--- a/src/useMicrosoftSpeech/index.md
+++ b/src/useMicrosoftSpeech/index.md
@@ -6,6 +6,6 @@ title: useMicrosoftSpeech
 
 ## hooks
 
-- ENV: `MICROSOFT_SPEECH_PROXY_URL`
+- ENV: `MICROSOFT_SPEECH_API_URL`
 
 <code src="./demos/index.tsx" nopadding></code>
diff --git a/src/useMicrosoftSpeech/index.ts b/src/useMicrosoftSpeech/index.ts
index 637d2c6..3e2e150 100644
--- a/src/useMicrosoftSpeech/index.ts
+++ b/src/useMicrosoftSpeech/index.ts
@@ -5,7 +5,7 @@ import { useTTS } from '@/useTTS';
 
 export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeechOptions) => {
   const [text, setText] = useState<string>(defaultText);
-  const rest = useTTS(options.name, text, (segmentText: string) =>
+  const rest = useTTS(options.voice, text, (segmentText: string) =>
     fetchMicrosoftSpeech(segmentText, options),
   );
   return {
diff --git a/src/useOpenaiSTT/demos/index.tsx b/src/useOpenaiSTT/demos/index.tsx
index e638a02..3b05f8e 100644
--- a/src/useOpenaiSTT/demos/index.tsx
+++ b/src/useOpenaiSTT/demos/index.tsx
@@ -1,4 +1,4 @@
-import { useOpenaiSTTWithRecord } from '@lobehub/tts';
+import { OPENAI_STT_API_URL, useOpenaiSTTWithRecord } from '@lobehub/tts';
 import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
 import { Button, Input } from 'antd';
 import { Mic, StopCircle } from 'lucide-react';
@@ -16,6 +16,7 @@ export default () => {
         label: 'OPENAI_PROXY_URL',
         value: '',
       },
+      url: OPENAI_STT_API_URL,
     },
     { store },
   );
diff --git a/src/useOpenaiTTS/demos/index.tsx b/src/useOpenaiTTS/demos/index.tsx
index b07e534..cfbc24b 100644
--- a/src/useOpenaiTTS/demos/index.tsx
+++ b/src/useOpenaiTTS/demos/index.tsx
@@ -1,4 +1,4 @@
-import { AudioPlayer, openaiVoiceList, useOpenaiTTS } from '@lobehub/tts';
+import { AudioPlayer, OPENAI_TTS_API_URL, openaiVoiceList, useOpenaiTTS } from '@lobehub/tts';
 import { Icon, StoryBook, useControls, useCreateStore } from '@lobehub/ui';
 import { Button, Input } from 'antd';
 import { Volume2 } from 'lucide-react';
@@ -19,13 +19,14 @@ export default () => {
         label: 'OPENAI_PROXY_URL',
         value: '',
       },
+      url: OPENAI_TTS_API_URL,
     },
     { store },
   );
 
   const options: any = useControls(
     {
-      name: {
+      voice: {
         options: openaiVoiceList,
         value: 'alloy',
       },
diff --git a/src/useOpenaiTTS/index.ts b/src/useOpenaiTTS/index.ts
index d04cc68..c88a78b 100644
--- a/src/useOpenaiTTS/index.ts
+++ b/src/useOpenaiTTS/index.ts
@@ -5,7 +5,7 @@ import { useTTS } from '@/useTTS';
 
 export const useOpenaiTTS = (defaultText: string, options: OpenaiTtsOptions) => {
   const [text, setText] = useState<string>(defaultText);
-  const rest = useTTS(options.name, text, (segmentText: string) =>
+  const rest = useTTS(options.voice, text, (segmentText: string) =>
     fetchOpenaiTTS(segmentText, options),
   );
   return {
diff --git a/src/useSpeechSynthes/demos/index.tsx b/src/useSpeechSynthes/demos/index.tsx
index 162c9d6..f1406d5 100644
--- a/src/useSpeechSynthes/demos/index.tsx
+++ b/src/useSpeechSynthes/demos/index.tsx
@@ -10,10 +10,6 @@ export default () => {
   const store = useCreateStore();
   const options: any = useControls(
     {
-      name: {
-        options: genLevaOptions(getSpeechSynthesVoiceOptions()),
-        value: '婷婷',
-      },
       pitch: {
         max: 1,
         min: -1,
@@ -26,6 +22,10 @@ export default () => {
         step: 0.1,
         value: 0,
       },
+      voice: {
+        options: genLevaOptions(getSpeechSynthesVoiceOptions()),
+        value: '婷婷',
+      },
     },
     { store },
   );
diff --git a/src/useSpeechSynthes/index.ts b/src/useSpeechSynthes/index.ts
index e34cea2..af9b347 100644
--- a/src/useSpeechSynthes/index.ts
+++ b/src/useSpeechSynthes/index.ts
@@ -2,18 +2,18 @@ import { useCallback, useMemo, useState } from 'react';
 
 import { SsmlOptions } from '@/utils/genSSML';
 
-export const useSpeechSynthes = (defaultText: string, { name, rate, pitch }: SsmlOptions) => {
+export const useSpeechSynthes = (defaultText: string, { voice, rate, pitch }: SsmlOptions) => {
   const [voiceList, setVoiceList] = useState(speechSynthesis.getVoices());
   const [text, setText] = useState<string>(defaultText);
   const [isLoading, setIsLoading] = useState<boolean>(false);
 
   const speechSynthesisUtterance = useMemo(() => {
     const utterance = new SpeechSynthesisUtterance(text);
-    utterance.voice = voiceList.find((item) => item.name === name) as any;
+    utterance.voice = voiceList.find((item) => item.name === voice) as any;
     if (pitch) utterance.pitch = pitch * 10;
     if (rate) utterance.rate = rate * 10;
     return utterance;
-  }, [text, voiceList, rate, pitch, name]);
+  }, [text, voiceList, rate, pitch, voice]);
 
   speechSynthesis.onvoiceschanged = () => {
     setVoiceList(speechSynthesis.getVoices());
diff --git a/src/utils/genSSML.ts b/src/utils/genSSML.ts
index 1e0bc1e..57dd52a 100644
--- a/src/utils/genSSML.ts
+++ b/src/utils/genSSML.ts
@@ -1,5 +1,3 @@
-import { Document, ServiceProvider } from 'ssml-document';
-
 export type StyleName =
   | 'affectionate'
   | 'angry'
@@ -14,16 +12,34 @@ export type StyleName =
   | 'serious';
 
 export interface SsmlOptions {
-  name: string;
   pitch?: number;
   rate?: number;
   style?: StyleName;
+  voice: string;
 }
 
-export const genSSML = (text: string, options: SsmlOptions) => {
-  let ssml = new Document().voice(options.name);
-  if (options.style) ssml.expressAs({ style: options.style });
-  if (options.pitch || options.rate) ssml.prosody({ pitch: options.pitch, rate: options.rate });
-  const result = ssml.say(text).render({ provider: ServiceProvider.Microsoft });
-  return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">${result}</speak>`;
+const voiceTemplate = (input: string, { voice }: Pick<SsmlOptions, 'voice'>) =>
+  `<voice name="${voice}">${input}</voice>`;
+
+const styleTemplate = (input: string, { style }: Pick<SsmlOptions, 'style'>) => {
+  if (!style) return input;
+  return `<mstts:express-as style="${style}">${input}</mstts:express-as>`;
+};
+
+const prosodyTemplate = (input: string, { pitch, rate }: Pick<SsmlOptions, 'pitch' | 'rate'>) => {
+  if (!pitch && !rate) return input;
+  return `<prosody pitch="${Math.floor((pitch || 1) * 100)}%" rate="${Math.floor(
+    (rate || 1) * 100,
+  )}%">${input}</prosody>`;
+};
+const speackTemplate = (input: string) =>
+  `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">${input}</speak>`;
+
+export const genSSML = (input: string, options: SsmlOptions) => {
+  let ssml = prosodyTemplate(input, options);
+  ssml = styleTemplate(ssml, options);
+  ssml = voiceTemplate(ssml, options);
+  ssml = speackTemplate(ssml);
+
+  return ssml;
 };
diff --git a/src/utils/getVoiceList.ts b/src/utils/getVoiceList.ts
index 841b040..9cd789b 100644
--- a/src/utils/getVoiceList.ts
+++ b/src/utils/getVoiceList.ts
@@ -4,9 +4,9 @@ import { flatten } from 'lodash-es';
 import azureVoiceList from '@/data/azureVoiceList';
 import edgeVoiceList from '@/data/edgeVoiceList';
 import voiceLocale from '@/data/locales';
-import nameList from '@/data/nameList';
 import openaiVoiceList from '@/data/openaiVoiceList';
 import speechSynthesVoiceList from '@/data/speechSynthesVoiceList';
+import voiceList from '@/data/voiceList';
 
 export const genSpeechSynthesVoiceList = () => {
   const data = speechSynthesis.getVoices();
@@ -38,7 +38,7 @@ export const getAzureVoiceOptions = (locale?: string): SelectProps['options'] =>
       ? (azureVoiceList as any)?.[locale] || []
       : flatten(Object.values(azureVoiceList));
 
-  return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice }));
+  return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice }));
 };
 
 export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] => {
@@ -46,7 +46,7 @@ export const getEdgeVoiceOptions = (locale?: string): SelectProps['options'] =>
     locale && (edgeVoiceList as any)[locale]
       ? (edgeVoiceList as any)[locale] || []
       : flatten(Object.values(edgeVoiceList));
-  return data.map((voice: any) => ({ label: (nameList as any)?.[voice] || voice, value: voice }));
+  return data.map((voice: any) => ({ label: (voiceList as any)?.[voice] || voice, value: voice }));
 };
 
 export const getOpenaiVoiceOptions = (): SelectProps['options'] => {