Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speech to text to front end #73

Merged
merged 1 commit into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion client/src/components/Negotiate/Chat.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import { useNavigate } from 'react-router-dom';
import { LuUserCircle2 } from "react-icons/lu";
import { IoArrowBackOutline } from "react-icons/io5";
import { AudioRecorder } from 'react-audio-voice-recorder';
import { MdCall } from "react-icons/md";
import { IoVideocam } from "react-icons/io5";
import { IoMdMore } from "react-icons/io";
Expand All @@ -30,6 +31,7 @@
const navigate = useNavigate();
const [currperson, setperson] = useState();
const [loading, setLoading] = useState(true);
const [lang, setLang] = useState("kannada");

const chatDisplayRef = useRef(null); // Ref to chat display container
const bottomRef = useRef(null); // Ref to the last message to scroll into view
Expand All @@ -53,7 +55,7 @@
// console.log("disconnected");
newSocket.disconnect();
};
}, [currentUser])

Check warning on line 58 in client/src/components/Negotiate/Chat.jsx

View workflow job for this annotation

GitHub Actions / Lint Client Code

React Hook useEffect has a missing dependency: 'userType'. Either include it or remove the dependency array

useEffect(() => {
if (contacts != null) {
Expand Down Expand Up @@ -96,7 +98,7 @@
switchRooms(socket, prevRoomID, currentRoomID);
loadMessage(currentRoomID).then((result) => { setMessages(result) });
}
}, [currentRoomID])

Check warning on line 101 in client/src/components/Negotiate/Chat.jsx

View workflow job for this annotation

GitHub Actions / Lint Client Code

React Hook useEffect has missing dependencies: 'prevRoomID' and 'socket'. Either include them or remove the dependency array

useEffect(() => {
// console.log("messages array updated");
Expand Down Expand Up @@ -226,7 +228,28 @@
value={message}
onChange={handleChange}
/>
<Recorder />
<div>
<AudioRecorder
onRecordingComplete={(blob)=>handleAudioUpload(blob, lang)} //sending POST request
audioTrackConstraints={{
noiseSuppression: true,
echoCancellation: true,
// autoGainControl,
// channelCount,
// deviceId,
// groupId,
// sampleRate,
// sampleSize,
}}
onNotAllowedOrFound={(err) => console.table(err)}
downloadOnSavePress={false}
downloadFileExtension="webm"
mediaRecorderOptions={{
audioBitsPerSecond: 128000,
}}
showVisualizer={true}
/>
</div>
<button onClick={handleClick}>Send</button>
</div>
</div>
Expand Down Expand Up @@ -294,4 +317,25 @@
console.error("Error fetching messages: ", error);
}

}

const handleAudioUpload = async (blob, lang) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = async () => {
const base64data = reader.result.split(',')[1]; // Extract Base64 part

const response = await fetch(`http://localhost:${SERVER_PORT}/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json', // Send JSON data
},
body: JSON.stringify({
audio: base64data,
mimeType: blob.type,
lang: lang,
}),
});
console.log(response.json().then((result => { console.log(result.transcription) })));
}
}
53 changes: 53 additions & 0 deletions server/audioHandler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
const { GoogleGenerativeAI } = require("@google/generative-ai");
const mime = require('mime-types');

const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash-latest" });

const transcribeAudio = async (audioBuffer, mimeType, lang) => {
console.log("transcribing audio");
const audio = {
inlineData: {
data: Buffer.from(audioBuffer).toString("base64"),
mimeType: mimeType || 'audio/wav',
},
};

const prompt = `transcribe the audio and give the transcription in ${lang} without any timestamps and new line characters`;

const result = await model.generateContent([audio, prompt]);
return result.response.text(); // Return the transcription result
};


const handleAudio = async (req, res) => {
console.log("Received POST request");
try {
const { audio, mimeType, lang } = req.body;
// console.log("audio ", audio);
// console.log("mimeType ", mimeType);
// console.log("lang ", lang);
if (!audio) {
return res.status(400).json({ message: "No audio data received" });
}
if (!lang) {
return res.status(400).json({ message: "No language data received" });
}

// Decode Base64 audio data
const audioBuffer = Buffer.from(audio, 'base64');
const mimeTypeFromHeader = mimeType || 'audio/wav';

// Perform transcription
const transcription = await transcribeAudio(audioBuffer, mimeTypeFromHeader, lang);
console.log(transcription);
res.status(200).json({ message: "Transcription completed", transcription });

} catch (error) {
console.error("Error processing audio: ", error);
res.status(500).json({ message: "Internal server error", error: error.message });
}
};


module.exports = {handleAudio};
9 changes: 9 additions & 0 deletions server/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"license": "ISC",
"description": "",
"dependencies": {
"@google/generative-ai": "^0.19.0",
"body-parser": "^1.20.2",
"cors": "^2.8.5",
"dotenv": "^16.4.5",
Expand Down
8 changes: 8 additions & 0 deletions server/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const http = require('http');
const cors = require('cors');
const app = express();
const {Server} = require('socket.io');
const {handleAudio} = require("./audioHandler");

const SERVER_PORT = process.env.SERVER_PORT;
const CLIENT_PORT = process.env.CLIENT_PORT;
Expand All @@ -14,6 +15,9 @@ const server = http.createServer(app);

//middleware
app.use(cors());
app.use(express.json());
app.use(express.urlencoded({ extended: true }));
// app.use(express.raw({ type: 'audio/wav', limit: '10mb' }));

//routings
app.get('/states', (req, res) => {
Expand All @@ -40,6 +44,7 @@ app.get('/areas/:state/:district/:subdistrict', (req, res) => {
const areas = states[state][district][subdistrict] || [];
res.json(areas);
});
app.post('/chat', handleAudio);

//socket.io connections
const io = new Server(server, {
Expand All @@ -49,6 +54,8 @@ const io = new Server(server, {
}

})


io.on("connection", (socket)=>{
console.log(socket.id);

Expand All @@ -72,3 +79,4 @@ io.on("connection", (socket)=>{
server.listen(SERVER_PORT, () => {
console.log(`Server is running on http://localhost:${SERVER_PORT}`);
});

Loading