Skip to content

Commit

Permalink
audio
Browse files Browse the repository at this point in the history
  • Loading branch information
dhrumilp12 committed Jun 13, 2024
1 parent f6b844e commit 924c6ec
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 15 deletions.
27 changes: 20 additions & 7 deletions client/src/Components/chatComponent.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,15 @@ const ChatComponent = () => {

// Function to handle recording start
const startRecording = () => {
setAudioChunks([]);
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
const recorder = new MediaRecorder(stream);
recorder.ondataavailable = (e) => setAudioChunks(current => [...current, e.data]);
const options = { mimeType: 'audio/webm' };
const recorder = new MediaRecorder(stream, options);
recorder.ondataavailable = (e) => {
console.log('Data available:', e.data.size); // Log size to check if data is present
setAudioChunks(current => [...current, e.data]);
};
recorder.onstop = sendAudioToServer;
recorder.start();
setMediaRecorder(recorder);
Expand All @@ -167,13 +172,21 @@ const ChatComponent = () => {

// Function to handle recording stop
const stopRecording = () => {
mediaRecorder.stop();
setIsRecording(false);
setMediaRecorder(null);
if (mediaRecorder) {
mediaRecorder.stop();
setIsRecording(false);
setMediaRecorder(null);
}
};

const sendAudioToServer = useCallback(() => {
const audioBlob = new Blob(audioChunks, { 'type': 'audio/wav' });
console.log('Audio chunks size:', audioChunks.reduce((sum, chunk) => sum + chunk.size, 0)); // Log total size of chunks
const audioBlob = new Blob(audioChunks, { 'type': 'audio/webm' });
if (audioBlob.size === 0) {
console.error('Audio Blob is empty');
return;
}
console.log(`Sending audio blob of size: ${audioBlob.size} bytes`);
const formData = new FormData();
formData.append('audio', audioBlob);
setIsLoading(true);
Expand All @@ -197,7 +210,7 @@ const ChatComponent = () => {
.finally(() => {
setIsLoading(false);
});
}, []);
}, [audioChunks]);



Expand Down
60 changes: 52 additions & 8 deletions server/services/speech_service.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,60 @@
import azure.cognitiveservices.speech as speechsdk
import io
import subprocess
import os

def check_ffmpeg():
try:
result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
print("FFmpeg version:")
print(result.stdout)
except Exception as e:
print("Failed to run FFmpeg:", str(e))

check_ffmpeg()

def convert_audio_to_wav(input_audio_path, output_audio_path):
try:
command = ['ffmpeg', '-i', input_audio_path, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', output_audio_path]
result = subprocess.run(command, check=True, text=True, capture_output=True)
print(f"FFmpeg output: {result.stdout}")
except subprocess.CalledProcessError as e:
print(f"FFmpeg error: {e.stderr}")
raise Exception("Failed to convert audio") from e


def speech_to_text(audio_file):

# Save original audio to a temporary file
temp_input_path = 'temp_input.webm'
temp_output_path = 'temp_output.wav'

try:
with open(temp_input_path, 'wb') as f:
f.write(audio_file.read())

# Convert to WAV format
convert_audio_to_wav(temp_input_path, temp_output_path)

# Load converted audio and process
with open(temp_output_path, 'rb') as f:
audio_data = f.read()
# Convert the audio file received into a stream
audio_stream = io.BytesIO()
audio_file.save(audio_stream)
audio_stream = io.BytesIO(audio_data)

print(f"Size of audio file: {audio_stream.getbuffer().nbytes} bytes") # Debugging the size of the file
audio_stream.seek(0)

print(f"Size of audio file: {audio_stream.getbuffer().nbytes} bytes")
# Set up the speech config with your subscription details
speech_key = "c833c8ef4bb0441b98971cc2d850f462"
service_region = "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "15000") # Timeout in milliseconds
speech_config.set_property(speechsdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "5000") # Timeout in milliseconds
# Create a push stream that can be used with the speech recognizer
push_stream = speechsdk.audio.PushAudioInputStream()
audio_config = speechsdk.audio.AudioConfig(stream=push_stream)

speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)


# Read the buffer and push into the push stream
data = audio_stream.read(1024)
Expand All @@ -26,8 +64,6 @@ def speech_to_text(audio_file):
data = audio_stream.read(1024)
push_stream.close()

# Create a recognizer with the given settings
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

print("Speak into your microphone.")
result = speech_recognizer.recognize_once()
Expand All @@ -43,8 +79,9 @@ def speech_to_text(audio_file):
cancellation_details = result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".for_sessiondetails.error_details)
print("Error details: {}".format(cancellation_details.error_details))
return "Speech Recognition canceled"

else:
print("Speech Recognition canceled: {}".format(result.cancellation_details.reason))
if result.cancellation_details.reason == speechsdk.CancellationReason.Error:
Expand All @@ -55,3 +92,10 @@ def speech_to_text(audio_file):
except Exception as e:
print(f"Error during speech recognition: {str(e)}")
return None
finally:
# Clean up temporary files
if os.path.exists(temp_input_path):
os.remove(temp_input_path)
if os.path.exists(temp_output_path):
os.remove(temp_output_path)
print("Temporary files removed")

0 comments on commit 924c6ec

Please sign in to comment.