Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[128] - Toggle /voice-search endpoint on/off #436

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ env:
POSTGRES_USER: postgres-test-user
POSTGRES_DB: postgres-test-db
REDIS_HOST: redis://redis:6379
TOGGLE_VOICE: custom

jobs:
container-job:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -55,6 +57,7 @@ jobs:
env:
PROMETHEUS_MULTIPROC_DIR: /tmp
REDIS_HOST: ${{ env.REDIS_HOST }}
TOGGLE_VOICE: ${{ env.TOGGLE_VOICE }}
run: |
cd core_backend
export POSTGRES_HOST=postgres POSTGRES_USER=$POSTGRES_USER \
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

PROJECT_NAME = aaq
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate
Expand Down
2 changes: 1 addition & 1 deletion core_backend/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

.PHONY : tests

Expand Down Expand Up @@ -49,4 +50,3 @@ teardown-redis-test:
teardown-test-db:
@docker stop testdb
@docker rm testdb

1 change: 1 addition & 0 deletions core_backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
BACKEND_ROOT_PATH = os.environ.get("BACKEND_ROOT_PATH", "")

# Speech API
TOGGLE_VOICE = os.environ.get("TOGGLE_VOICE", None)
CUSTOM_SPEECH_ENDPOINT = os.environ.get("CUSTOM_SPEECH_ENDPOINT", None)
# Logging
LANGFUSE = os.environ.get("LANGFUSE", "False")
Expand Down
230 changes: 119 additions & 111 deletions core_backend/app/question_answer/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from sqlalchemy.ext.asyncio import AsyncSession

from ..auth.dependencies import authenticate_key, rate_limiter
from ..config import CUSTOM_SPEECH_ENDPOINT, GCS_SPEECH_BUCKET, USE_CROSS_ENCODER
from ..config import (
CUSTOM_SPEECH_ENDPOINT,
GCS_SPEECH_BUCKET,
TOGGLE_VOICE,
USE_CROSS_ENCODER,
)
from ..contents.models import (
get_similar_content_async,
increment_query_count,
Expand Down Expand Up @@ -157,134 +162,137 @@ async def search(
)


@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
if TOGGLE_VOICE is not None:

@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
},
},
},
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)

unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"
unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"

await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)
await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)

file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)

if CUSTOM_SPEECH_ENDPOINT is not None:
transcription = await post_to_speech(file_path, CUSTOM_SPEECH_ENDPOINT)
transcription_result = transcription["text"]
else:
transcription_result = await transcribe_audio(file_path)

user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)
if CUSTOM_SPEECH_ENDPOINT is not None:
transcription = await post_to_speech(file_path, CUSTOM_SPEECH_ENDPOINT)
transcription_result = transcription["text"]
else:
transcription_result = await transcribe_audio(file_path)

response = await get_search_response(
query_refined=user_query_refined_template,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)
user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)

if user_query.generate_llm_response:
response = await get_generation_response(
response = await get_search_response(
query_refined=user_query_refined_template,
response=response,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)
if user_query.generate_llm_response:
response = await get_generation_response(
query_refined=user_query_refined_template,
response=response,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()
if type(response) is QueryAudioResponse:
return response

if type(response) is QueryAudioResponse:
return response
if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=response.model_dump(),
)

if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST, content=response.model_dump()
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)
except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)

except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)


async def download_file_from_url(file_url: str) -> tuple[BytesIO, str, str]:
Expand Down
1 change: 1 addition & 0 deletions core_backend/tests/api/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ ALIGN_SCORE_API="http://localhost:5002/alignscore_base"
# Speech Api endpoint
# if u want to try the tests for the external TTS and STT apis then comment this out
CUSTOM_SPEECH_ENDPOINT="http://localhost:8001/transcribe"
TOGGLE_VOICE=custom
5 changes: 5 additions & 0 deletions deployment/docker-compose/template.core_backend.env
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ LITELLM_ENDPOINT="http://localhost:4000"
#PGVECTOR_VECTOR_SIZE=1024

#### Speech APIs ###############################################################
# Set this variable to 'external' or 'custom' accordingly to toggle the /voice-search endpoint
# By default it is not set so it defaults to None
# TOGGLE_VOICE=external
Copy link
Collaborator

@amiraliemami amiraliemami Sep 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to boolean ENABLE_VOICE_SEARCH for clarity? We only use it as a boolean check anyway. I'd suggest changing the description to:

# This variable controls whether the voice search endpoint is active (set to true) or inactive (set to false). Default is false.

# If enabled, we default to using external services unless `CUSTOM_SPEECH_ENDPOINT` is set, in which case the custom hosted APIs will be used.


# if TOGGLE_VOICE is set to 'Custom' then make sure to also set the Environment variables mentioned below
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is TOGGLE_VOICE, I don't see anywhere else. Probably that's what turned into ENABLE_VOICE_SEARCH

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably change that in the docs. I saw it in docs/components/voice-service/inded.md

# CUSTOM_SPEECH_ENDPOINT=http://speech_service:8001/transcribe

#### Temporary folder for prometheus gunicorn multiprocess ####################
Expand Down
Loading