Skip to content

Commit

Permalink
Merge pull request #801 from ftnext/refactor/openai-whisper-api
Browse files Browse the repository at this point in the history
Rename: recognize_whisper_api -> recognize_openai
  • Loading branch information
ftnext authored Dec 7, 2024
2 parents c4cb90f + 81f48a2 commit b417fc9
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
Expand Down
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ To use all of the functionality of the library, you should have:
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
* **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_openai``)
* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)

The following requirements are optional, but can improve or extend functionality in some situations:
Expand Down Expand Up @@ -176,9 +176,9 @@ You can install it with ``python3 -m pip install SpeechRecognition[whisper-local
OpenAI Whisper API (for OpenAI Whisper API users)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The library `openai <https://pypi.org/project/openai/>`__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_whisper_api``).
The library `openai <https://pypi.org/project/openai/>`__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_openai``).

If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_whisper_api`` will raise an ``RequestError``.
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_openai`` will raise an ``RequestError``.

You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``.

Expand Down
2 changes: 1 addition & 1 deletion examples/microphone_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,6 @@
# recognize speech using Whisper API
OPENAI_API_KEY = "INSERT OPENAI API KEY HERE"
try:
print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")
print(f"Whisper API thinks you said {r.recognize_openai(audio, api_key=OPENAI_API_KEY)}")
except sr.RequestError as e:
print(f"Could not request results from Whisper API; {e}")
4 changes: 2 additions & 2 deletions reference/library-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ You can translate the result to english with Whisper by passing translate=True

Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options

``recognizer_instance.recognize_whisper_api(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)``
--------------------------------------------------------------------------------------------------------------------------
``recognizer_instance.recognize_openai(audio_data: AudioData, model: str = "whisper-1", api_key: str | None = None)``
---------------------------------------------------------------------------------------------------------------------

Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.

Expand Down
5 changes: 3 additions & 2 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1506,12 +1506,13 @@ def flush(self, *args, **kwargs):
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
# This is a workaround to resolve this issue
try:
from .recognizers import google, groq, whisper
from .recognizers import google, openai, groq
except (ModuleNotFoundError, ImportError):
pass
else:
Recognizer.recognize_google = google.recognize_legacy
Recognizer.recognize_whisper_api = whisper.recognize_whisper_api
Recognizer.recognize_openai = openai.recognize
Recognizer.recognize_whisper_api = openai.recognize # Deprecated
Recognizer.recognize_groq = groq.recognize_groq


Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,42 @@
from __future__ import annotations

import os
from io import BytesIO
from typing import Literal

from typing_extensions import Unpack

from speech_recognition.audio import AudioData
from speech_recognition.exceptions import SetupError
from speech_recognition.recognizers.whisper_api import (
OpenAICompatibleRecognizer,
)

# https://platform.openai.com/docs/api-reference/audio/createTranscription#audio-createtranscription-model
WhisperModel = Literal["whisper-1"]


class OpenAIOptionalParameters:
"""OpenAI speech transcription's optional parameters.
https://platform.openai.com/docs/api-reference/audio/createTranscription
"""

language: str
prompt: str
# TODO Add support `Literal["text", "srt", "verbose_json", "vtt"]`
response_format: Literal["json"]
temperature: float
# timestamp_granularities # TODO support

def recognize_whisper_api(

def recognize(
recognizer,
audio_data: "AudioData",
*,
model: str = "whisper-1",
model: WhisperModel = "whisper-1",
api_key: str | None = None,
):
**kwargs: Unpack[OpenAIOptionalParameters],
) -> str:
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
Expand All @@ -23,8 +46,6 @@ def recognize_whisper_api(
Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing.
"""
if not isinstance(audio_data, AudioData):
raise ValueError("``audio_data`` must be an ``AudioData`` instance")
if api_key is None and os.environ.get("OPENAI_API_KEY") is None:
raise SetupError("Set environment variable ``OPENAI_API_KEY``")

Expand All @@ -35,9 +56,5 @@ def recognize_whisper_api(
"missing openai module: ensure that openai is set up correctly."
)

wav_data = BytesIO(audio_data.get_wav_data())
wav_data.name = "SpeechRecognition_audio.wav"

client = openai.OpenAI(api_key=api_key)
transcript = client.audio.transcriptions.create(file=wav_data, model=model)
return transcript.text
recognizer = OpenAICompatibleRecognizer(openai.OpenAI(api_key=api_key))
return recognizer.recognize(audio_data, model, **kwargs)
31 changes: 31 additions & 0 deletions tests/recognizers/test_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from unittest.mock import MagicMock

import httpx
import respx

from speech_recognition import AudioData, Recognizer
from speech_recognition.recognizers import openai


@respx.mock(assert_all_called=True, assert_all_mocked=True)
def test_transcribe_with_openai_whisper(respx_mock, monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "sk_openai_api_key")

respx_mock.post(
"https://api.openai.com/v1/audio/transcriptions",
headers__contains={"Authorization": "Bearer sk_openai_api_key"},
data__contains={"model": "whisper-1"},
).mock(
return_value=httpx.Response(
200,
json={"text": "Transcription by OpenAI Whisper"},
)
)

audio_data = MagicMock(spec=AudioData)
audio_data.get_wav_data.return_value = b"audio_data"

actual = openai.recognize(MagicMock(spec=Recognizer), audio_data)

assert actual == "Transcription by OpenAI Whisper"
audio_data.get_wav_data.assert_called_once()
42 changes: 0 additions & 42 deletions tests/recognizers/test_whisper.py

This file was deleted.

0 comments on commit b417fc9

Please sign in to comment.