Skip to content

Commit

Permalink
speech to text to front end
Browse files Browse the repository at this point in the history
  • Loading branch information
Samyak-SH committed Sep 14, 2024
1 parent 9bbe785 commit c036190
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 1 deletion.
46 changes: 45 additions & 1 deletion client/src/components/Negotiate/Chat.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { AuthContext } from '../context/Authcontext';
import { useNavigate } from 'react-router-dom';
import { LuUserCircle2 } from "react-icons/lu";
import { IoArrowBackOutline } from "react-icons/io5";
import { AudioRecorder } from 'react-audio-voice-recorder';
import { MdCall } from "react-icons/md";
import { IoVideocam } from "react-icons/io5";
import { IoMdMore } from "react-icons/io";
Expand All @@ -30,6 +31,7 @@ export default function Chat() {
const navigate = useNavigate();
const [currperson, setperson] = useState();
const [loading, setLoading] = useState(true);
const [lang, setLang] = useState("kannada");

const chatDisplayRef = useRef(null); // Ref to chat display container
const bottomRef = useRef(null); // Ref to the last message to scroll into view
Expand Down Expand Up @@ -226,7 +228,28 @@ export default function Chat() {
value={message}
onChange={handleChange}
/>
<Recorder />
<div>
<AudioRecorder
onRecordingComplete={(blob)=>handleAudioUpload(blob, lang)} //sending POST request
audioTrackConstraints={{
noiseSuppression: true,
echoCancellation: true,
// autoGainControl,
// channelCount,
// deviceId,
// groupId,
// sampleRate,
// sampleSize,
}}
onNotAllowedOrFound={(err) => console.table(err)}
downloadOnSavePress={false}
downloadFileExtension="webm"
mediaRecorderOptions={{
audioBitsPerSecond: 128000,
}}
showVisualizer={true}
/>
</div>
<button onClick={handleClick}>Send</button>
</div>
</div>
Expand Down Expand Up @@ -294,4 +317,25 @@ const loadMessage = async (currentRoomID) => {
console.error("Error fetching messages: ", error);
}

}

const handleAudioUpload = async (blob, lang) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = async () => {
const base64data = reader.result.split(',')[1]; // Extract Base64 part

const response = await fetch(`http://localhost:${SERVER_PORT}/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json', // Send JSON data
},
body: JSON.stringify({
audio: base64data,
mimeType: blob.type,
lang: lang,
}),
});
console.log(response.json().then((result => { console.log(result.transcription) })));
}
}
53 changes: 53 additions & 0 deletions server/audioHandler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
const { GoogleGenerativeAI } = require("@google/generative-ai");
const mime = require('mime-types');

const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash-latest" });

const transcribeAudio = async (audioBuffer, mimeType, lang) => {
console.log("transcribing audio");
const audio = {
inlineData: {
data: Buffer.from(audioBuffer).toString("base64"),
mimeType: mimeType || 'audio/wav',
},
};

const prompt = `transcribe the audio and give the transcription in ${lang} without any timestamps and new line characters`;

const result = await model.generateContent([audio, prompt]);
return result.response.text(); // Return the transcription result
};


const handleAudio = async (req, res) => {
console.log("Received POST request");
try {
const { audio, mimeType, lang } = req.body;
// console.log("audio ", audio);
// console.log("mimeType ", mimeType);
// console.log("lang ", lang);
if (!audio) {
return res.status(400).json({ message: "No audio data received" });
}
if (!lang) {
return res.status(400).json({ message: "No language data received" });
}

// Decode Base64 audio data
const audioBuffer = Buffer.from(audio, 'base64');
const mimeTypeFromHeader = mimeType || 'audio/wav';

// Perform transcription
const transcription = await transcribeAudio(audioBuffer, mimeTypeFromHeader, lang);
console.log(transcription);
res.status(200).json({ message: "Transcription completed", transcription });

} catch (error) {
console.error("Error processing audio: ", error);
res.status(500).json({ message: "Internal server error", error: error.message });
}
};


module.exports = {handleAudio};
9 changes: 9 additions & 0 deletions server/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"license": "ISC",
"description": "",
"dependencies": {
"@google/generative-ai": "^0.19.0",
"body-parser": "^1.20.2",
"cors": "^2.8.5",
"dotenv": "^16.4.5",
Expand Down
8 changes: 8 additions & 0 deletions server/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const http = require('http');
const cors = require('cors');
const app = express();
const {Server} = require('socket.io');
const {handleAudio} = require("./audioHandler");

const SERVER_PORT = process.env.SERVER_PORT;
const CLIENT_PORT = process.env.CLIENT_PORT;
Expand All @@ -14,6 +15,9 @@ const server = http.createServer(app);

//middleware
app.use(cors());
app.use(express.json());
app.use(express.urlencoded({ extended: true }));
// app.use(express.raw({ type: 'audio/wav', limit: '10mb' }));

//routings
app.get('/states', (req, res) => {
Expand All @@ -40,6 +44,7 @@ app.get('/areas/:state/:district/:subdistrict', (req, res) => {
const areas = states[state][district][subdistrict] || [];
res.json(areas);
});
app.post('/chat', handleAudio);

//socket.io connections
const io = new Server(server, {
Expand All @@ -49,6 +54,8 @@ const io = new Server(server, {
}

})


io.on("connection", (socket)=>{
console.log(socket.id);

Expand All @@ -72,3 +79,4 @@ io.on("connection", (socket)=>{
server.listen(SERVER_PORT, () => {
console.log(`Server is running on http://localhost:${SERVER_PORT}`);
});

0 comments on commit c036190

Please sign in to comment.