Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exotel example with jittery audio #747

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions examples/exotel-websocket/bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
import sys

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.services.deepgram import DeepgramSTTService
from deepgram.clients.listen.v1.websocket.options import LiveOptions
from pipecat.transports.network.fastapi_websocket import (
FastAPIWebsocketTransport,
FastAPIWebsocketParams,
)
from pipecat.serializers.twilio import TwilioFrameSerializer
# from exotel import ExotelFrameSerializer
from pipecat.serializers.livekit import LivekitFrameSerializer
from pipecat.serializers.protobuf import ProtobufFrameSerializer

from loguru import logger

from dotenv import load_dotenv
from custom.transport.ExotelWebsocketTransport import ExotelWebsocketTransport, ExotelWebsocketParams
from custom.serializers.exotel import ExotelFrameSerializer

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def run_bot(websocket_client, stream_sid):
transport = FastAPIWebsocketTransport(
# transport = ExotelWebsocketTransport(
websocket=websocket_client,
input_name="input.pcm",
output_name="output.wav",
params=FastAPIWebsocketParams(
# params=ExotelWebsocketParams(
audio_out_enabled=True,
add_wav_header=True,
audio_in_enabled=True,
audio_in_channels=1,
audio_out_sample_rate=8000,
audio_out_bitrate=128000,
camera_out_enabled=False,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
serializer=TwilioFrameSerializer(stream_sid=stream_sid, params=TwilioFrameSerializer.InputParams(
sample_rate=8000,
)),
# serializer=ExotelFrameSerializer(stream_sid=stream_sid, params=ExotelFrameSerializer.InputParams(
# sample_rate=8000,
# )),
),
)

llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
live_options=LiveOptions(
encoding="linear16",
channels=1,
model='nova-2-general',
punctuate=True,
interim_results=True,
endpointing=500,
utterance_end_ms=1000,
)
)
# | {'event': 'start', 'stream_sid': '3357459ca698d8c765bac10b0ec418bl', 'sequence_number': '1', 'start': {'stream_sid': '3357459ca698d8c765bac10b0ec418bl', 'call_sid': 'b0c8aea60aceb0007fc5a1bb75c218bl', 'account_sid': 'kritibudh1', 'from': '09992750105', 'to': '04446972319', 'media_format': {'encoding': 'base64', 'sample_rate': '8000', 'bit_rate': '128kbps'}}}

# tts = CartesiaTTSService(
# api_key=os.getenv("CARTESIA_API_KEY"),
# voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
# sample_rate=8000,
# encoding="pcm_s16le",
# container='raw',
# )

tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
model="eleven_multilingual_v2",
output_format="mp3_44100_64",
)
# 'mp3_22050_32', 'mp3_44100_32', 'mp3_44100_64',

messages = [
{
"role": "system",
"content": "You are a helpful LLM in an audio call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]

context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)

pipeline = Pipeline(
[
transport.input(), # Websocket input from client
stt, # Speech-To-Text
context_aggregator.user(),
llm, # LLM
tts, # Text-To-Speech
transport.output(), # Websocket output to client
context_aggregator.assistant(),
]
)

task = PipelineTask(
pipeline, params=PipelineParams(allow_interruptions=True))

@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])

@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
await task.queue_frames([EndFrame()])

runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
53 changes: 53 additions & 0 deletions examples/exotel-websocket/custom/serializers/exotel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from typing import Optional, Dict, Any
import json
import base64
from dataclasses import dataclass

from pydantic import BaseModel
from pipecat.audio.utils import ulaw_to_pcm, pcm_to_ulaw
from pipecat.frames.frames import AudioRawFrame, Frame, StartInterruptionFrame

from pipecat.serializers.base_serializer import FrameSerializer
from pipecat.frames.frames import Frame
from livekit.rtc import AudioFrame


@dataclass
class ExotelFrameSerializer(FrameSerializer):
class InputParams(BaseModel):
sample_rate: int = 8000

def __init__(self, stream_sid: str, params: InputParams = InputParams()):
self._stream_sid = stream_sid
self._params = params

def serialize(self, frame: Frame) -> str | bytes | None:
if isinstance(frame, AudioRawFrame):
data = frame.audio
# Convert PCM to ULaw for Exotel streaming requirement
serialized_data = pcm_to_ulaw(
data, frame.sample_rate, self._params.sample_rate)
payload = base64.b64encode(serialized_data).decode("utf-8")
answer = {
"event": "media",
"streamSid": self._stream_sid,
"media": {"payload": payload}
}
return json.dumps(answer)
elif isinstance(frame, StartInterruptionFrame):
answer = {"event": "clear", "streamSid": self._stream_sid}
return json.dumps(answer)
return None

def deserialize(self, data: str | bytes) -> Frame | None:
message = json.loads(data)
if message["event"] != "media":
return None
else:
payload_base64 = message["media"]["payload"]
payload = base64.b64decode(payload_base64)
deserialized_data = ulaw_to_pcm(
payload, self._params.sample_rate, self._params.sample_rate)
audio_frame = AudioRawFrame(
audio=deserialized_data, num_channels=1, sample_rate=self._params.sample_rate)
return audio_frame
Loading