diff --git a/.gitignore b/.gitignore
index 5fc9aabb..93e8d09f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,7 +88,7 @@ ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
-# .python-version
+.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
diff --git a/README.rst b/README.rst
index 337f6873..69beebcf 100644
--- a/README.rst
+++ b/README.rst
@@ -97,7 +97,7 @@ To use all of the functionality of the library, you should have:
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
* **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
-* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
+* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_openai``)
* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)
The following requirements are optional, but can improve or extend functionality in some situations:
@@ -176,9 +176,9 @@ You can install it with ``python3 -m pip install SpeechRecognition[whisper-local
OpenAI Whisper API (for OpenAI Whisper API users)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The library `openai `__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_whisper_api``).
+The library `openai `__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_openai``).
-If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_whisper_api`` will raise an ``RequestError``.
+If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_openai`` will raise an ``RequestError``.
You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``.
diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py
index a5d8a688..38ef95bd 100644
--- a/examples/microphone_recognition.py
+++ b/examples/microphone_recognition.py
@@ -96,6 +96,6 @@
# recognize speech using Whisper API
OPENAI_API_KEY = "INSERT OPENAI API KEY HERE"
try:
- print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")
+ print(f"Whisper API thinks you said {r.recognize_openai(audio, api_key=OPENAI_API_KEY)}")
except sr.RequestError as e:
print(f"Could not request results from Whisper API; {e}")
diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index e8b6c7e0..e245e819 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -314,8 +314,8 @@ You can translate the result to english with Whisper by passing translate=True
Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options
-``recognizer_instance.recognize_whisper_api(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)``
---------------------------------------------------------------------------------------------------------------------------
+``recognizer_instance.recognize_openai(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)``
+---------------------------------------------------------------------------------------------------------------------
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 238d5e50..94345ccb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1506,12 +1506,13 @@ def flush(self, *args, **kwargs):
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
# This is a workaround to resolve this issue
try:
- from .recognizers import google, groq, whisper
+ from .recognizers import google, openai, groq
except (ModuleNotFoundError, ImportError):
pass
else:
Recognizer.recognize_google = google.recognize_legacy
- Recognizer.recognize_whisper_api = whisper.recognize_whisper_api
+ Recognizer.recognize_openai = openai.recognize
+ Recognizer.recognize_whisper_api = openai.recognize # Deprecated
Recognizer.recognize_groq = groq.recognize_groq
diff --git a/speech_recognition/recognizers/whisper.py b/speech_recognition/recognizers/openai.py
similarity index 53%
rename from speech_recognition/recognizers/whisper.py
rename to speech_recognition/recognizers/openai.py
index 31a8e43e..79843d69 100644
--- a/speech_recognition/recognizers/whisper.py
+++ b/speech_recognition/recognizers/openai.py
@@ -1,19 +1,42 @@
from __future__ import annotations
import os
-from io import BytesIO
+from typing import Literal
+
+from typing_extensions import Unpack
from speech_recognition.audio import AudioData
from speech_recognition.exceptions import SetupError
+from speech_recognition.recognizers.whisper_api import (
+ OpenAICompatibleRecognizer,
+)
+
+# https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-model
+WhisperModel = Literal["whisper-1"]
+
+
+class OpenAIOptionalParameters:
+ """OpenAI speech transcription's optional parameters.
+ https://platform.openai.com/docs/api-reference/audio/createTranscription
+ """
+
+ language: str
+ prompt: str
+ # TODO Add support `Literal["text", "srt", "verbose_json", "vtt"]`
+ response_format: Literal["json"]
+ temperature: float
+ # timestamp_granularities # TODO support
-def recognize_whisper_api(
+
+def recognize(
recognizer,
audio_data: "AudioData",
*,
- model: str = "whisper-1",
+ model: WhisperModel = "whisper-1",
api_key: str | None = None,
-):
+ **kwargs: Unpack[OpenAIOptionalParameters],
+) -> str:
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
@@ -23,8 +46,6 @@ def recognize_whisper_api(
Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing.
"""
- if not isinstance(audio_data, AudioData):
- raise ValueError("``audio_data`` must be an ``AudioData`` instance")
if api_key is None and os.environ.get("OPENAI_API_KEY") is None:
raise SetupError("Set environment variable ``OPENAI_API_KEY``")
@@ -35,9 +56,5 @@ def recognize_whisper_api(
"missing openai module: ensure that openai is set up correctly."
)
- wav_data = BytesIO(audio_data.get_wav_data())
- wav_data.name = "SpeechRecognition_audio.wav"
-
- client = openai.OpenAI(api_key=api_key)
- transcript = client.audio.transcriptions.create(file=wav_data, model=model)
- return transcript.text
+ recognizer = OpenAICompatibleRecognizer(openai.OpenAI(api_key=api_key))
+ return recognizer.recognize(audio_data, model, **kwargs)
diff --git a/tests/recognizers/test_openai.py b/tests/recognizers/test_openai.py
new file mode 100644
index 00000000..21c2b04e
--- /dev/null
+++ b/tests/recognizers/test_openai.py
@@ -0,0 +1,31 @@
+from unittest.mock import MagicMock
+
+import httpx
+import respx
+
+from speech_recognition import AudioData, Recognizer
+from speech_recognition.recognizers import openai
+
+
+@respx.mock(assert_all_called=True, assert_all_mocked=True)
+def test_transcribe_with_openai_whisper(respx_mock, monkeypatch):
+ monkeypatch.setenv("OPENAI_API_KEY", "sk_openai_api_key")
+
+ respx_mock.post(
+ "https://api.openai.com/v1/audio/transcriptions",
+ headers__contains={"Authorization": "Bearer sk_openai_api_key"},
+ data__contains={"model": "whisper-1"},
+ ).mock(
+ return_value=httpx.Response(
+ 200,
+ json={"text": "Transcription by OpenAI Whisper"},
+ )
+ )
+
+ audio_data = MagicMock(spec=AudioData)
+ audio_data.get_wav_data.return_value = b"audio_data"
+
+ actual = openai.recognize(MagicMock(spec=Recognizer), audio_data)
+
+ assert actual == "Transcription by OpenAI Whisper"
+ audio_data.get_wav_data.assert_called_once()
diff --git a/tests/recognizers/test_whisper.py b/tests/recognizers/test_whisper.py
deleted file mode 100644
index e84d0503..00000000
--- a/tests/recognizers/test_whisper.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from unittest import TestCase
-from unittest.mock import MagicMock, patch
-
-from speech_recognition import AudioData, Recognizer
-from speech_recognition.recognizers import whisper
-
-
-@patch("speech_recognition.recognizers.whisper.os.environ")
-@patch("speech_recognition.recognizers.whisper.BytesIO")
-@patch("openai.OpenAI")
-class RecognizeWhisperApiTestCase(TestCase):
- def test_recognize_default_arguments(self, OpenAI, BytesIO, environ):
- client = OpenAI.return_value
- transcript = client.audio.transcriptions.create.return_value
-
- recognizer = MagicMock(spec=Recognizer)
- audio_data = MagicMock(spec=AudioData)
-
- actual = whisper.recognize_whisper_api(recognizer, audio_data)
-
- self.assertEqual(actual, transcript.text)
- audio_data.get_wav_data.assert_called_once_with()
- BytesIO.assert_called_once_with(audio_data.get_wav_data.return_value)
- OpenAI.assert_called_once_with(api_key=None)
- client.audio.transcriptions.create.assert_called_once_with(
- file=BytesIO.return_value, model="whisper-1"
- )
-
- def test_recognize_pass_arguments(self, OpenAI, BytesIO, environ):
- client = OpenAI.return_value
-
- recognizer = MagicMock(spec=Recognizer)
- audio_data = MagicMock(spec=AudioData)
-
- _ = whisper.recognize_whisper_api(
- recognizer, audio_data, model="x-whisper", api_key="OPENAI_API_KEY"
- )
-
- OpenAI.assert_called_once_with(api_key="OPENAI_API_KEY")
- client.audio.transcriptions.create.assert_called_once_with(
- file=BytesIO.return_value, model="x-whisper"
- )