From 924c6ecea3cd08361ce001b72a5aa2c78471870c Mon Sep 17 00:00:00 2001 From: DHRUMIL PATEL <123137675+dhrumilp12@users.noreply.github.com> Date: Thu, 13 Jun 2024 13:27:25 -0400 Subject: [PATCH] audio --- client/src/Components/chatComponent.jsx | 27 ++++++++--- server/services/speech_service.py | 60 +++++++++++++++++++++---- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/client/src/Components/chatComponent.jsx b/client/src/Components/chatComponent.jsx index fdfd8707..cd58dd12 100644 --- a/client/src/Components/chatComponent.jsx +++ b/client/src/Components/chatComponent.jsx @@ -154,10 +154,15 @@ const ChatComponent = () => { // Function to handle recording start const startRecording = () => { + setAudioChunks([]); navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { - const recorder = new MediaRecorder(stream); - recorder.ondataavailable = (e) => setAudioChunks(current => [...current, e.data]); + const options = { mimeType: 'audio/webm' }; + const recorder = new MediaRecorder(stream, options); + recorder.ondataavailable = (e) => { + console.log('Data available:', e.data.size); // Log size to check if data is present + setAudioChunks(current => [...current, e.data]); + }; recorder.onstop = sendAudioToServer; recorder.start(); setMediaRecorder(recorder); @@ -167,13 +172,21 @@ const ChatComponent = () => { // Function to handle recording stop const stopRecording = () => { - mediaRecorder.stop(); - setIsRecording(false); - setMediaRecorder(null); + if (mediaRecorder) { + mediaRecorder.stop(); + setIsRecording(false); + setMediaRecorder(null); + } }; const sendAudioToServer = useCallback(() => { - const audioBlob = new Blob(audioChunks, { 'type': 'audio/wav' }); + console.log('Audio chunks size:', audioChunks.reduce((sum, chunk) => sum + chunk.size, 0)); // Log total size of chunks + const audioBlob = new Blob(audioChunks, { 'type': 'audio/webm' }); + if (audioBlob.size === 0) { + console.error('Audio Blob is empty'); + return; + } + console.log(`Sending audio blob of size: ${audioBlob.size} bytes`); const formData = new FormData(); formData.append('audio', audioBlob); setIsLoading(true); @@ -197,7 +210,7 @@ const ChatComponent = () => { .finally(() => { setIsLoading(false); }); - }, []); + }, [audioChunks]); diff --git a/server/services/speech_service.py b/server/services/speech_service.py index 0cf823cc..c4cc9c77 100644 --- a/server/services/speech_service.py +++ b/server/services/speech_service.py @@ -1,22 +1,60 @@ import azure.cognitiveservices.speech as speechsdk import io +import subprocess +import os + +def check_ffmpeg(): + try: + result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True) + print("FFmpeg version:") + print(result.stdout) + except Exception as e: + print("Failed to run FFmpeg:", str(e)) + +check_ffmpeg() + +def convert_audio_to_wav(input_audio_path, output_audio_path): + try: + command = ['ffmpeg', '-i', input_audio_path, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', output_audio_path] + result = subprocess.run(command, check=True, text=True, capture_output=True) + print(f"FFmpeg output: {result.stdout}") + except subprocess.CalledProcessError as e: + print(f"FFmpeg error: {e.stderr}") + raise Exception("Failed to convert audio") from e + def speech_to_text(audio_file): + + # Save original audio to a temporary file + temp_input_path = 'temp_input.webm' + temp_output_path = 'temp_output.wav' + try: + with open(temp_input_path, 'wb') as f: + f.write(audio_file.read()) + + # Convert to WAV format + convert_audio_to_wav(temp_input_path, temp_output_path) + + # Load converted audio and process + with open(temp_output_path, 'rb') as f: + audio_data = f.read() # Convert the audio file received into a stream - audio_stream = io.BytesIO() - audio_file.save(audio_stream) + audio_stream = io.BytesIO(audio_data) + + print(f"Size of audio file: {audio_stream.getbuffer().nbytes} bytes") # Debugging the size of the file audio_stream.seek(0) - + print(f"Size of audio file: {audio_stream.getbuffer().nbytes} bytes") # Set up the speech config with your subscription details speech_key = "c833c8ef4bb0441b98971cc2d850f462" service_region = "eastus" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) - speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "15000") # Timeout in milliseconds + speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "5000") # Timeout in milliseconds # Create a push stream that can be used with the speech recognizer push_stream = speechsdk.audio.PushAudioInputStream() audio_config = speechsdk.audio.AudioConfig(stream=push_stream) - + speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) + # Read the buffer and push into the push stream data = audio_stream.read(1024) @@ -26,8 +64,6 @@ def speech_to_text(audio_file): data = audio_stream.read(1024) push_stream.close() - # Create a recognizer with the given settings - speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) print("Speak into your microphone.") result = speech_recognizer.recognize_once() @@ -43,8 +79,9 @@ def speech_to_text(audio_file): cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: - print("Error details: {}".for_sessiondetails.error_details) + print("Error details: {}".format(cancellation_details.error_details)) return "Speech Recognition canceled" + else: print("Speech Recognition canceled: {}".format(result.cancellation_details.reason)) if result.cancellation_details.reason == speechsdk.CancellationReason.Error: @@ -55,3 +92,10 @@ def speech_to_text(audio_file): except Exception as e: print(f"Error during speech recognition: {str(e)}") return None + finally: + # Clean up temporary files + if os.path.exists(temp_input_path): + os.remove(temp_input_path) + if os.path.exists(temp_output_path): + os.remove(temp_output_path) + print("Temporary files removed")