Skip to content

Commit

Permalink
Merge pull request #3 from GabrielSCabrera/dev
Browse files Browse the repository at this point in the history
Merge changes from dev
  • Loading branch information
GabrielSCabrera authored Dec 28, 2023
2 parents 80dadee + a36c8f6 commit 7059ca6
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 1,200 deletions.
12 changes: 0 additions & 12 deletions Pipfile

This file was deleted.

1,081 changes: 0 additions & 1,081 deletions Pipfile.lock

This file was deleted.

12 changes: 6 additions & 6 deletions banterbot/characters/therapist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ def run() -> None:
# Optional system prompt to set up a custom character prior to initializing BanterBot.
system = (
"You are Grendel the Therapy Troll, an emotionally unstable and bottom of the barrel moronic Troll therapist."
" You have a far more eloquent brother named Grondle the Quiz Troll, who is a game show host, and you have a "
" mischievious pet possum named Boggle. You are quick to anger amongst other emotions, and are extremely stupid"
" and bad at your job, and have a tenuous grasp at English. You will do your best to help the patient with"
" their problems. Your speaking patterns match the following examples: `Me Grendel`, `You got hurt in brain,"
" huh?`, `How about: instead of do big thing, uh... do little thing, every day.`, `No! Grendel not mean"
" that!`. Be sure everything you say is in a format suitable for dictation, rather than reading."
" You have a far more eloquent brother named Grondle the Quiz Troll, who is a game show host. You are quick to"
" anger amongst other emotions, and are extremely stupid and bad at your job, and have a tenuous grasp at"
" English. You will do your best to help the patient with their problems. Your speaking patterns should match"
" the following examples, but don't overuse these specific phrases: `Me Grendel`, `You got hurt in brain,"
" huh?`, `No! Grendel not mean that!`. Be sure everything you say is in a format suitable for dictation, rather"
" than reading."
)

interface = TKInterface(
Expand Down
4 changes: 2 additions & 2 deletions banterbot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
# Define the punctuation marks that can be used to split sentences into phrases for prosody selection.
PHRASE_DELIM = [",", ".", "?", "!", ":", ";", '"', "`", "|", "\n", "\t", "\r\n"]

# The amount of time that should be added to a "soft interruption" as defined in class `SpeechToText`.
INTERRUPTION_DELAY: datetime.timedelta = datetime.timedelta(seconds=1.0)
# The amount of time that should be added to a "soft interruption" as defined in class `SpeechRecognitionService`.
INTERRUPTION_DELAY: datetime.timedelta = datetime.timedelta(seconds=0.5)
30 changes: 4 additions & 26 deletions banterbot/extensions/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,28 +93,6 @@ def __init__(
# Initialize the subclass GUI
self._init_gui()

@property
def listening(self) -> bool:
"""
If the current instance of `SpeechSynthesisService` is in the process of listening, returns True. Otherwise,
returns False.
Args:
bool: The listening state of the current instance.
"""
return self._speech_recognition_service._listening

@property
def speaking(self) -> bool:
"""
If the current instance of `SpeechRecognitionService` is in the process of speaking, returns True. Otherwise,
returns False.
Args:
bool: The speaking state of the current instance.
"""
return self._speech_synthesis_service.speaking

def interrupt(self, shutdown_time: Optional[int] = None) -> None:
"""
Interrupts all speech-to-text recognition, text-to-speech synthesis, and OpenAI API streams.
Expand All @@ -127,8 +105,8 @@ def interrupt(self, shutdown_time: Optional[int] = None) -> None:
self._interrupt = time.perf_counter_ns() if not shutdown_time else shutdown_time
self._openai_service.interrupt(kill=True)
self._openai_service_tone.interrupt(kill=True)
self._speech_recognition_service.interrupt(kill=False)
self._speech_synthesis_service.interrupt(kill=True)
self._speech_recognition_service.interrupt()
self._speech_synthesis_service.interrupt()

def listener_activate(self, name: Optional[str] = None) -> None:
"""
Expand Down Expand Up @@ -279,8 +257,8 @@ def respond(self, init_time: int) -> None:
raise FormatMismatchError()

for item in self._speech_synthesis_service.synthesize(phrases=phrases, init_time=init_time):
self.update_conversation_area(item.value.text)
content += item.value.text
self.update_conversation_area(item.text)
content += item.text

if self._interrupt < init_time and content.strip():
message = Message(role=ChatCompletionRoles.ASSISTANT, content=content.strip())
Expand Down
61 changes: 51 additions & 10 deletions banterbot/gui/tk_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import threading
import time
import tkinter as tk
import tkinter.simpledialog
from tkinter import ttk
from typing import Optional, Union

Expand Down Expand Up @@ -62,9 +63,21 @@ def __init__(
# Bind the `_quit` method to program exit, in order to guarantee the stopping of all running threads.
self.protocol("WM_DELETE_WINDOW", self._quit)

# Flag and lock to indicate whether any keys are currently activating the listener.
self._key_down = False
self._key_down_lock = threading.Lock()

def listener_activate(self, idx: int) -> None:
user_name = self.name_entries[idx].get().split(" ")[0].strip()
super().listener_activate(user_name)
with self._key_down_lock:
if not self._key_down:
self._key_down = True
user_name = self.name_entries[idx].get().split(" ")[0].strip()
return super().listener_activate(user_name)

def listener_deactivate(self) -> None:
self._key_down = False
self.reset_focus()
return super().listener_deactivate()

def request_response(self) -> None:
if self._messages:
Expand Down Expand Up @@ -97,10 +110,17 @@ def update_conversation_area(self, word: str) -> None:
super().update_conversation_area(word)
self.conversation_area["state"] = tk.NORMAL
self.conversation_area.insert(tk.END, word)
self.conversation_area.update_idletasks()
self.conversation_area["state"] = tk.DISABLED
self.conversation_area.update_idletasks()
self.conversation_area.see(tk.END)

def update_name(self, idx: int) -> None:
name = tkinter.simpledialog.askstring("Name", "Enter a Name")
self.names[idx].set(name)

def reset_focus(self) -> None:
self.panel_frame.focus_set()

def _quit(self) -> None:
"""
This method is called on exit, and interrupts any currently running activity.
Expand Down Expand Up @@ -140,27 +160,48 @@ def _init_gui(self) -> None:
self.panel_frame.grid(row=0, column=1, padx=10, pady=10, sticky="nsew")

self.name_entries = []
self.names = []
self.listen_buttons = []
self.edit_buttons = []

for i in range(9):
name = tk.StringVar()
name.set(f"User {i+1}")
name_entry = tk.Entry(
self.panel_frame, bg="black", fg="white", insertbackground="white", font=self._font, width=12
self.panel_frame,
textvariable=name,
readonlybackground="black",
fg="white",
font=self._font,
width=12,
state="readonly",
takefocus=False,
)
name_entry.grid(row=i, column=0, padx=(5, 0), pady=5, sticky="nsew")
name_entry.insert(0, f"User {i+1}")
self.name_entries.append(name_entry)
self.names.append(name)

listen_button = ttk.Button(self.panel_frame, text="Listen", width=7)
listen_button.grid(row=i, column=1, padx=(0, 5), pady=5, sticky="nsew")
listen_button.bind(f"<ButtonPress-1>", lambda event, i=i: self.listener_activate(i))
listen_button.bind(f"<ButtonRelease-1>", lambda event: self.listener_deactivate())
listen_button.grid(row=i, column=2, padx=(0, 5), pady=5, sticky="nsew")

edit_button = ttk.Button(self.panel_frame, text="✎", width=2)
edit_button.grid(row=i, column=1, padx=(0, 5), pady=5, sticky="nsew")

edit_button.bind(f"<ButtonPress-1>", lambda _, i=i: self.update_name(i))
edit_button.bind(f"<ButtonRelease-1>", lambda _: self.reset_focus())
self.edit_buttons.append(edit_button)

listen_button.bind(f"<ButtonPress-1>", lambda _, i=i: self.listener_activate(i))
listen_button.bind(f"<ButtonRelease-1>", lambda _: self.listener_deactivate())
self.listen_buttons.append(listen_button)

self.bind(f"<KeyPress-{i+1}>", lambda event, i=i: self.listener_activate(i))
self.bind(f"<KeyRelease-{i+1}>", lambda event: self.listener_deactivate())
self.bind(f"<KeyPress-{i+1}>", lambda _, i=i: self.listener_activate(i))
self.bind(f"<KeyRelease-{i+1}>", lambda _: self.listener_deactivate())

self.request_btn = ttk.Button(self.panel_frame, text="Respond", width=7)
self.request_btn.grid(row=9, column=0, padx=(5, 0), pady=5, sticky="nsew")

self.request_btn.bind(f"<ButtonRelease-1>", lambda event: self.request_response())
self.bind("<Return>", lambda event: self.request_response())

self.reset_focus()
3 changes: 1 addition & 2 deletions banterbot/handlers/speech_synthesis_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __iter__(self) -> Generator[Word, None, None]:
self._iterating = True

# Start synthesizing.
self._synthesizer.speak_ssml_async(self._ssml)
self._synthesizer.start_speaking_ssml_async(self._ssml)
logging.debug("SpeechSynthesisHandler synthesizer started")

# Process the words as they are synthesized.
Expand All @@ -67,7 +67,6 @@ def __iter__(self) -> Generator[Word, None, None]:
yield item["word"]
logging.debug(f"SpeechSynthesisHandler yielded word: `{item['word']}`")

def close(self):
self._synthesizer.stop_speaking_async()

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion banterbot/models/traits/primary_trait.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing_extensions import Self

from banterbot.managers.resource_manager import ResourceManager
from repo.banterbot.paths import primary_traits
from banterbot.paths import primary_traits


class PrimaryTrait:
Expand Down
108 changes: 48 additions & 60 deletions banterbot/services/speech_synthesis_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import threading
import time
from collections.abc import Generator
from typing import Optional

import azure.cognitiveservices.speech as speechsdk
Expand All @@ -11,8 +12,6 @@

from banterbot.data.enums import EnvVar
from banterbot.handlers.speech_synthesis_handler import SpeechSynthesisHandler
from banterbot.handlers.stream_handler import StreamHandler
from banterbot.managers.stream_manager import StreamManager
from banterbot.models.phrase import Phrase
from banterbot.models.word import Word
from banterbot.utils.closeable_queue import CloseableQueue
Expand All @@ -39,46 +38,37 @@ def __init__(
output_format (SpeechSynthesisOutputFormat, optional): The desired output format for the synthesized speech.
Default is Audio16Khz32KBitRateMonoMp3.
"""
self._init_synthesizer(output_format=output_format)
# Initialize the output format
self._output_format = output_format

# Initialize the StreamManager for handling streaming processes.
self._stream_manager = StreamManager()
# Initialize the speech synthesizer with the specified output format
self._init_synthesizer(output_format=self._output_format)

# The latest interruption time.
self._interrupt = 0

# A list of active stream handlers.
self._stream_handlers = []
self._stream_handlers_lock = threading.Lock()

# Initialize a blank result_id time data dictionary. This will be updated each time a synthesis starts/stops.
self._synthesis_data = {}
# Initialize the queue for storing the words as they are synthesized
self._queue = CloseableQueue()

# Initialize a blank list of new result_ids. This will be updated each time a new stream is created.
self._new_result_ids = []
self._result_ids_lock = threading.Lock()
# The iterable that is currently being iterated over
self._iterable: Optional[SpeechSynthesisHandler] = None

# Initialize a closeable queue for storing the words as they are synthesized.
self._queue = CloseableQueue()
# The latest interruption time.
self._interrupt = 0

def interrupt(self, kill: bool = False) -> None:
def interrupt(self) -> None:
"""
Interrupts the current speech synthesis process.
Args:
kill (bool): Whether the interruption should kill the queues or not.
"""
self._interrupt = time.perf_counter_ns()
for result_id in self._new_result_ids:
self._synthesis_data[result_id]["active"] = False
self._new_result_ids.clear()
with self._stream_handlers_lock:
for handler in self._stream_handlers:
handler.interrupt(kill=kill)
self._stream_handlers.clear()
self._queue.close()
# Closing the connection to the speech synthesizer.
self._connection.close()
# Reinitialize the speech synthesizer with the default output format
self._init_synthesizer(output_format=self._output_format)
logging.debug(f"SpeechSynthesisService Interrupted")

def synthesize(self, phrases: list[Phrase], init_time: Optional[int] = None) -> StreamHandler:
def synthesize(self, phrases: list[Phrase], init_time: Optional[int] = None) -> Generator[Word, None, None]:
"""
Synthesizes the given phrases into speech and returns a handler for the stream of synthesized words.
Expand All @@ -90,16 +80,18 @@ def synthesize(self, phrases: list[Phrase], init_time: Optional[int] = None) ->
StreamHandler: A handler for the stream of synthesized words.
"""
# Record the time at which the synthesis was initialized pre-lock, in order to account for future interruptions.
# Record the time at which the stream was initialized pre-lock, in order to account for future interruptions.
init_time = time.perf_counter_ns() if init_time is None else init_time
if self._interrupt >= init_time:
return tuple()
else:
iterable = SpeechSynthesisHandler(phrases=phrases, synthesizer=self._synthesizer, queue=self._queue)
handler = self._stream_manager.stream(iterable=iterable, close_stream=iterable.close)
with self._stream_handlers_lock:
self._stream_handlers.append(handler)
return handler
with self.__class__._synthesis_lock:
if self._interrupt >= init_time:
return tuple()
else:
self._queue.reset()
self._iterable = SpeechSynthesisHandler(
phrases=phrases, synthesizer=self._synthesizer, queue=self._queue
)

for i in self._iterable:
yield i

def _init_synthesizer(self, output_format: SpeechSynthesisOutputFormat) -> None:
"""
Expand Down Expand Up @@ -136,7 +128,6 @@ def _callback_completed(self, event: speechsdk.SessionEventArgs) -> None:
event (speechsdk.SessionEventArgs): Event arguments containing information about the synthesis completed.
"""
logging.debug("SpeechSynthesisService disconnected")
self._synthesis_data[event.result.result_id]["active"] = False
self._queue.close()

def _callback_started(self, event: speechsdk.SessionEventArgs) -> None:
Expand All @@ -147,9 +138,7 @@ def _callback_started(self, event: speechsdk.SessionEventArgs) -> None:
event (speechsdk.SessionEventArgs): Event arguments containing information about the synthesis started.
"""
logging.debug("SpeechSynthesisService connected")

self._synthesis_data[event.result.result_id] = {"start": time.perf_counter_ns(), "active": True}
self._new_result_ids.append(event._result._result_id)
self._synthesis_start = time.perf_counter_ns()

@staticmethod
@nb.njit(cache=True)
Expand Down Expand Up @@ -179,26 +168,25 @@ def _callback_word_boundary(self, event: speechsdk.SessionEventArgs) -> None:
event (speechsdk.SessionEventArgs): Event arguments containing information about the word boundary.
"""
# Check if the event is still active based on the result_id.
if self._synthesis_data[event.result_id]["active"]:
time = self._calculate_offset(
start_synthesis_time=self._synthesis_data[event._result_id]["start"],
audio_offset=event.audio_offset,
total_seconds=event.duration.total_seconds(),
word_length=event.word_length,
)
data = {
"time": time,
"word": Word(
text=(
event.text
if event.boundary_type == speechsdk.SpeechSynthesisBoundaryType.Punctuation
else " " + event.text
),
offset=datetime.timedelta(microseconds=event.audio_offset / 10),
duration=event.duration,
time = self._calculate_offset(
start_synthesis_time=self._synthesis_start,
audio_offset=event.audio_offset,
total_seconds=event.duration.total_seconds(),
word_length=event.word_length,
)
data = {
"time": time,
"word": Word(
text=(
event.text
if event.boundary_type == speechsdk.SpeechSynthesisBoundaryType.Punctuation
else " " + event.text
),
}
self._queue.put(data)
offset=datetime.timedelta(microseconds=event.audio_offset / 10),
duration=event.duration,
),
}
self._queue.put(data)

def _callbacks_connect(self):
"""
Expand Down

0 comments on commit 7059ca6

Please sign in to comment.