Skip to content

Commit

Permalink
Merge pull request #17 from CogitoNTNU/speech_integration
Browse files Browse the repository at this point in the history
Speech integration
  • Loading branch information
WilliamMRS authored Nov 11, 2024
2 parents 63a2ea6 + 9187515 commit 37ef343
Show file tree
Hide file tree
Showing 34 changed files with 833 additions and 344 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,6 @@ dmypy.json
#wav files
*.wav

*.webm
#calender json
/core/tools/calendarjson
3 changes: 0 additions & 3 deletions core/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the current directory contents into the container at /app
COPY . .

# Make port 8000 available to the world outside this container
EXPOSE 3001

# Run app.py when the container launches
CMD ["python","-u", "main.py"]
#CMD ["gunicorn", "--worker-class", "eventlet", "-w", "1", "-b", "0.0.0.0:8000", "app:app"]
59 changes: 56 additions & 3 deletions core/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
import asyncio
from modules.user_data_setup import check_folders
from modules.chat import read_chat
import requests
import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR) #INFO, DEBUG, WARNING, ERROR, or CRITICAL - config as needed during development.
log.setLevel(logging.ERROR)
from time import sleep
from collections import defaultdict

#
Expand Down Expand Up @@ -50,7 +52,7 @@ def hello_world():
# Route to get metadata like name, id, descriptions of all user chats
@app.route("/chats/metadata")
def get_chats():
return "lmao"
return "lmao" # Why does this return lmao?

@app.route('/vectorize_chat', methods=['POST'])
def summarize_store():
Expand Down Expand Up @@ -133,6 +135,56 @@ async def run_and_store():
print(f'Something very bad happened: {e}')
return jsonify({"status": "error"})

# Custom event. Fired when the user click the button with the cute little microphone icon.
@app.route('/start_recording', methods=['POST'])
def start_recording_route():
data = request.json
conversation_id = data.get('conversation_id')

print("Starting recording...")

# Send POST request to the recorder to start recording
headers = {'Content-Type': 'application/json'}
response = requests.post(f'http://speech-to-text:3001/start_recording/{conversation_id}', headers=headers, json=data)

if response.status_code != 200:
return jsonify({"status": "error", "text": "Failed to start recording"}), 500

return jsonify({"status": "recording_started"}), 200


@socketio.on('start_recording')
def start_recording_socket(data):
# This function handles the socket event to start recording
conversation_id = data.get('conversation_id')

print("Starting recording via socket...")

# Send POST request to the recorder to start recording
headers = {'Content-Type': 'application/json'}
response = requests.post(f'http://speech-to-text:3001/start_recording/{conversation_id}', headers=headers, json=data)

if response.status_code != 200:
socketio.emit('recording_failed', {"status": "error", "text": "Failed to start recording"})
return

socketio.emit('recording_started', {"status": "recording_started"})

@app.route('/recording_completed', methods=['POST'])
def recording_completed():
data = request.json
text = data.get('text', '')
socketio.emit("recording", text)

conversation_id = data.get('conversation_id', '')
print(f"Recording completed for conversation ID {conversation_id} with text:", text)

# Process the recorded text as needed (e.g., send to Jarvis or other services)
asyncio.run(jarvis.run(text, socketio)) # Assuming jarvis.run is asynchronous

return jsonify({"status": "success"}), 200


@socketio.on('get_chat_history')
def get_chat_history():
session_id = request.sid
Expand All @@ -143,4 +195,5 @@ def get_chat_history():
if __name__ == '__main__':
socketio.run(app, debug=True, host='0.0.0.0', port=PORT, allow_unsafe_werkzeug=True)

# hello
# hello
# TODO say hello back to whoever wrote this
10 changes: 10 additions & 0 deletions core/static/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ sendMessage = () => {
}
}

addRecordedMessage = (message) => {
let chat_history = document.getElementById("chat_history")
if (message != "") {
addUserMessage(marked.parse(message))
chat_history.scrollTop = chat_history.scrollHeight;
}

}


addStreamedChunk = (messagePart) => {
if(state.activeAIMessage){
state.activeAIMessage.innerHTML += messagePart; // Append to innertext of the message
Expand Down
8 changes: 8 additions & 0 deletions core/static/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ body {
font-size: 24px;
margin-left: 12px;
}
#voice_button_recording {
width: 10%;
height: 9vh;
background-color: #673636;
border-radius: 10px;
font-size: 24px;

}

.chat_input_container{
display: flex;
Expand Down
1 change: 1 addition & 0 deletions core/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<script src="/static/ui_elements/settings.js"></script>
<script src="/static/ui_elements/chatHistoryList.js"></script>
<script src="/static/chat.js"></script>
<script src="static/recording.js"></script>
<script src="/static/socketEvents.js"></script>
<script src="/static/index.js" defer></script>
<script src="/static/eventlisteners.js"></script>
Expand Down
47 changes: 28 additions & 19 deletions core/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,23 @@ Main js file for loading the dynamic UI elements.
*/

// Runs on inital startup, after window (html) has finished loading
init = () => {
document.getElementById("send_button").addEventListener("click", sendMessage);
document.getElementById("clear_log").addEventListener("click", clear_log);

document.querySelector(".chatHistory").innerHTML += chatHistoryList();

// To hide settings page when clicking somewhere else after it's opened.
document.addEventListener("click", function (event) {
const settings = document.getElementById("settingsPage");
const settingsButton = document.getElementById("settingsButton");
if (
!settings.contains(event.target) &&
!settingsButton.contains(event.target) &&
settings.style.display == "block"
) {
settingsPage();
}
});
};
init = () => {
document.getElementById('send_button').addEventListener('click', sendMessage)
document.getElementById('clear_log').addEventListener('click', clear_log)

document.getElementById('voice_button').addEventListener('click', startRecording)

document.querySelector(".chatHistory").innerHTML += chatHistoryList()

// To hide settings page when clicking somewhere else after it's opened.
document.addEventListener('click', function(event){
const settings = document.getElementById("settingsPage");
const settingsButton = document.getElementById("settingsButton");
if(!settings.contains(event.target) && !settingsButton.contains(event.target) && settings.style.display=="block") {
settingsPage()
}
});
}
window.onload = init;

// global state of the UI
Expand Down Expand Up @@ -107,6 +105,17 @@ async function addToolResponseToProcessContainer(toolResponse) {
let processesContainer = document.querySelector(".processesContainer");
processesContainer.scrollTop = processesContainer.scrollHeight;
}
async function addStreamedRecording(uuid, messagePart) {
let element = document.getElementById(uuid);

if (element == null) {
await addRecordedMessage(messagePart, uuid);
element = document.getElementById(uuid);
} else {
// Concat ChatPart on message with uuid
element.innerHTML += messagePart;
}
}

addUserMessage = (message) => {
let html = /*html*/ `
Expand Down
7 changes: 7 additions & 0 deletions core/static/recording.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
startRecording = () => {
document.getElementById('voice_button').style.backgroundColor = "#673636"; // Change button color to indicate recording
document.getElementById('voice_button').enabled = false; // Disable button while recording
const payload = {conversation_id: state.activeConversationId}
let res = socket.emit('start_recording', payload)
console.log("Recording started");
}
13 changes: 13 additions & 0 deletions core/static/socketEvents.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ socket.on("chunk", async (chunk) => {
await addStreamedMessage(uuid, chunk);
});

socket.on("recording", async (recording)=>{
if(!state.activeAIMessage){
console.log("RECIVED MESSAGE")
document.getElementById('voice_button').style.backgroundColor = ""; // Change button color to indicate recording
document.getElementById('voice_button').enabled = true; // Disable button while recording
uuid = generateUUID();
await addStreamedRecording(uuid, "");
ai_message = document.getElementById(uuid)
state.activeAIMessage = ai_message
}
await addStreamedRecording(uuid, recording );
})

socket.on("tokens", async (tokens) => {
state.totalTokensUsed += tokens;
console.log("Total tokens so far:", state.totalTokensUsed);
Expand Down
21 changes: 21 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
version: '2.1'

services:
llm-service:
build: ./core
Expand All @@ -21,7 +23,26 @@ services:
stop_signal: SIGINT
ports:
- "3000:3000"
deploy:
resources:
limits:
cpus: '0.5'
memory: 2048M # Memory limit for the compose

speech-to-text:
build: ./speechToText
restart: unless-stopped
environment:
FLASK_ENV: ${FLASK_ENV} # Autorestarts flask when code changes are detected
OPENAI_API_KEY: ${OPENAI_API_KEY}
PORT_STT: ${PORT_STT}
volumes:
- ./speechToText:/app # Mount the application code to detect live changes
networks:
- backend
stop_signal: SIGINT
ports:
- "3001:3001"

networks:
backend:
Expand Down
Binary file removed docs/images/enter_server_url.png
Binary file not shown.
Binary file removed docs/images/select_kernel.png
Binary file not shown.
Empty file removed speech/Dockerfile
Empty file.
Empty file removed speech/requirements.txt
Empty file.
32 changes: 32 additions & 0 deletions speechToText/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Use an official Python runtime as a parent image
FROM python:3.10-bookworm

# Set the working directory in the container
WORKDIR /app
RUN apt-get update && apt-get install -y \
portaudio19-dev \
libasound2-dev \
libpulse-dev \
sox \
libsox-dev \
&& rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y ffmpeg






# Copy only requrirements to keep cache.
COPY requirements.txt requirements.txt

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Run app.py when the container launches
CMD ["python","-u", "main.py"]
#CMD ["gunicorn", "--worker-class", "eventlet", "-w", "1", "-b", "0.0.0.0:8000", "app:app"]
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 37ef343

Please sign in to comment.