Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[128] - Toggle /voice-search endpoint on/off #436

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ env:
POSTGRES_USER: postgres-test-user
POSTGRES_DB: postgres-test-db
REDIS_HOST: redis://redis:6379
ENABLE_VOICE_SEARCH: True

jobs:
container-job:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -55,6 +57,7 @@ jobs:
env:
PROMETHEUS_MULTIPROC_DIR: /tmp
REDIS_HOST: ${{ env.REDIS_HOST }}
ENABLE_VOICE_SEARCH: ${{ env.ENABLE_VOICE_SEARCH }}
run: |
cd core_backend
export POSTGRES_HOST=postgres POSTGRES_USER=$POSTGRES_USER \
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

PROJECT_NAME = aaq
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate
Expand Down
2 changes: 1 addition & 1 deletion core_backend/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!make
SHELL := /bin/bash

.PHONY : tests

Expand Down Expand Up @@ -49,4 +50,3 @@ teardown-redis-test:
teardown-test-db:
@docker stop testdb
@docker rm testdb

2 changes: 2 additions & 0 deletions core_backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@
BACKEND_ROOT_PATH = os.environ.get("BACKEND_ROOT_PATH", "")

# Speech API
ENABLE_VOICE_SEARCH = os.getenv("ENABLE_VOICE_SEARCH", "False").lower() == "true"

CUSTOM_STT_ENDPOINT = os.environ.get("CUSTOM_STT_ENDPOINT", None)
CUSTOM_TTS_ENDPOINT = os.environ.get("CUSTOM_TTS_ENDPOINT", None)

Expand Down
228 changes: 118 additions & 110 deletions core_backend/app/question_answer/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
from sqlalchemy.ext.asyncio import AsyncSession

from ..auth.dependencies import authenticate_key, rate_limiter
from ..config import CUSTOM_STT_ENDPOINT, GCS_SPEECH_BUCKET, USE_CROSS_ENCODER
from ..config import (
CUSTOM_STT_ENDPOINT,
ENABLE_VOICE_SEARCH,
GCS_SPEECH_BUCKET,
USE_CROSS_ENCODER,
)
from ..contents.models import (
get_similar_content_async,
increment_query_count,
Expand Down Expand Up @@ -155,134 +160,137 @@ async def search(
)


@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
if ENABLE_VOICE_SEARCH:

@router.post(
"/voice-search",
response_model=QueryAudioResponse,
responses={
status.HTTP_400_BAD_REQUEST: {
"model": QueryResponseError,
"description": "Bad Request",
},
status.HTTP_500_INTERNAL_SERVER_ERROR: {
"model": QueryResponseError,
"description": "Internal Server Error",
},
},
},
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)
)
async def voice_search(
file_url: str,
request: Request,
asession: AsyncSession = Depends(get_async_session),
user_db: UserDB = Depends(authenticate_key),
) -> QueryAudioResponse | JSONResponse:
"""
Endpoint to transcribe audio from a provided URL,
generate an LLM response, by default generate_tts is
set to true and return a public random URL of an audio
file containing the spoken version of the generated response.
"""
try:
file_stream, content_type, file_extension = await download_file_from_url(
file_url
)

unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"
unique_filename = generate_random_filename(file_extension)
destination_blob_name = f"stt-voice-notes/{unique_filename}"

await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)
file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
await upload_file_to_gcs(
GCS_SPEECH_BUCKET, file_stream, destination_blob_name, content_type
)
file_path = f"temp/{unique_filename}"
with open(file_path, "wb") as f:
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)
f.write(file_stream.read())
file_stream.seek(0)

if CUSTOM_STT_ENDPOINT is not None:
transcription = await post_to_speech_stt(file_path, CUSTOM_STT_ENDPOINT)
transcription_result = transcription["text"]
if CUSTOM_STT_ENDPOINT is not None:
transcription = await post_to_speech_stt(file_path, CUSTOM_STT_ENDPOINT)
transcription_result = transcription["text"]

else:
transcription_result = await transcribe_audio(file_path)
else:
transcription_result = await transcribe_audio(file_path)

user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)
user_query = QueryBase(
generate_llm_response=True,
query_text=transcription_result,
query_metadata={},
)

response = await get_search_response(
query_refined=user_query_refined_template,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)
(
user_query_db,
user_query_refined_template,
response_template,
) = await get_user_query_and_response(
user_id=user_db.user_id,
user_query=user_query,
asession=asession,
generate_tts=True,
)

if user_query.generate_llm_response:
response = await get_generation_response(
response = await get_search_response(
query_refined=user_query_refined_template,
response=response,
response=response_template,
user_id=user_db.user_id,
n_similar=int(N_TOP_CONTENT),
n_to_crossencoder=int(N_TOP_CONTENT_TO_CROSSENCODER),
asession=asession,
exclude_archived=True,
request=request,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)
if user_query.generate_llm_response:
response = await get_generation_response(
query_refined=user_query_refined_template,
response=response,
)

await save_query_response_to_db(user_query_db, response, asession)
await increment_query_count(
user_id=user_db.user_id,
contents=response.search_results,
asession=asession,
)
await save_content_for_query_to_db(
user_id=user_db.user_id,
query_id=response.query_id,
session_id=user_query.session_id,
contents=response.search_results,
asession=asession,
)

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()

if os.path.exists(file_path):
os.remove(file_path)
file_stream.close()
if type(response) is QueryAudioResponse:
return response

if type(response) is QueryAudioResponse:
return response
if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content=response.model_dump(),
)

if type(response) is QueryResponseError:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST, content=response.model_dump()
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)

except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)
except ValueError as ve:
logger.error(f"ValueError: {str(ve)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": f"Value error: {str(ve)}"},
)

except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"error": "Internal server error"},
)


@identify_language__before
Expand Down
1 change: 1 addition & 0 deletions core_backend/tests/api/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ ALIGN_SCORE_API="http://localhost:5002/alignscore_base"
# if u want to try the tests for the external TTS and STT apis then comment this out
CUSTOM_STT_ENDPOINT="http://localhost:8001/transcribe"
CUSTOM_TTS_ENDPOINT="http://localhost:8001/synthesize"
ENABLE_VOICE_SEARCH=True
5 changes: 5 additions & 0 deletions deployment/docker-compose/template.core_backend.env
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ LITELLM_ENDPOINT="http://localhost:4000"
#PGVECTOR_VECTOR_SIZE=1024

#### Speech APIs ###############################################################
# This variable controls whether the voice search endpoint is active (set to true) or inactive (set to false). Default is false.
# If enabled, we default to using external services unless `CUSTOM_SPEECH_ENDPOINT` is set, in which case the custom hosted APIs will be used.
# ENABLE_VOICE_SEARCH=True

# if TOGGLE_VOICE is set to 'Custom' then make sure to also set the Environment variables mentioned below
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is TOGGLE_VOICE, I don't see anywhere else. Probably that's what turned into ENABLE_VOICE_SEARCH

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably change that in the docs. I saw it in docs/components/voice-service/inded.md

# CUSTOM_STT_ENDPOINT=http://speech_service:8001/transcribe
# CUSTOM_TTS_ENDPOINT=http://speech_service:8001/synthesize

Expand Down
Loading