Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added speech to text capability #275

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 113 additions & 4 deletions app/components/chat/BaseChat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,35 @@ interface BaseChatProps {
enhancePrompt?: () => void;
}

const SpeechRecognitionButton = ({
isListening,
onStart,
onStop,
disabled
}: {
isListening: boolean;
onStart: () => void;
onStop: () => void;
disabled: boolean;
}) => {
return (
<IconButton
title={isListening ? "Stop listening" : "Start speech recognition"}
disabled={disabled}
className={classNames('transition-all', {
'text-bolt-elements-item-contentAccent': isListening,
})}
onClick={isListening ? onStop : onStart}
>
{isListening ? (
<div className="i-ph:microphone-slash text-xl" />
) : (
<div className="i-ph:microphone text-xl" />
)}
</IconButton>
);
};

export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
(
{
Expand Down Expand Up @@ -114,6 +143,9 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
const TEXTAREA_MAX_HEIGHT = chatStarted ? 400 : 200;
const [apiKeys, setApiKeys] = useState<Record<string, string>>({});
const [modelList, setModelList] = useState(MODEL_LIST);
const [isListening, setIsListening] = useState(false);
const [recognition, setRecognition] = useState<SpeechRecognition | null>(null);
const [transcript, setTranscript] = useState('');

useEffect(() => {
// Load API keys from cookies on component mount
Expand All @@ -134,8 +166,71 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
initializeModelList().then((modelList) => {
setModelList(modelList);
});
if (typeof window !== 'undefined' && ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;

recognition.onresult = (event) => {
const transcript = Array.from(event.results)
.map(result => result[0])
.map(result => result.transcript)
.join('');

setTranscript(transcript);


if (handleInputChange) {
const syntheticEvent = {
target: { value: transcript },
} as React.ChangeEvent<HTMLTextAreaElement>;
handleInputChange(syntheticEvent);
}
};

recognition.onerror = (event) => {
console.error('Speech recognition error:', event.error);
setIsListening(false);
};

setRecognition(recognition);
}
}, []);

const startListening = () => {
if (recognition) {
recognition.start();
setIsListening(true);
}
};

const stopListening = () => {
if (recognition) {
recognition.stop();
setIsListening(false);
}
};

const handleSendMessage = (event: React.UIEvent, messageInput?: string) => {
if (sendMessage) {
sendMessage(event, messageInput);
if (recognition) {
recognition.abort(); // Stop current recognition
setTranscript(''); // Clear transcript
setIsListening(false);

// Clear the input by triggering handleInputChange with empty value
if (handleInputChange) {
const syntheticEvent = {
target: { value: '' },
} as React.ChangeEvent<HTMLTextAreaElement>;
handleInputChange(syntheticEvent);
}
}
}
};

const updateApiKey = (provider: string, key: string) => {
try {
const updatedApiKeys = { ...apiKeys, [provider]: key };
Expand Down Expand Up @@ -229,8 +324,11 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
}

event.preventDefault();

sendMessage?.(event);
if (isStreaming) {
handleStop?.();
return;
}
handleSendMessage?.(event);
}
}}
value={input}
Expand All @@ -255,7 +353,7 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
return;
}

sendMessage?.(event);
handleSendMessage?.(event);
}}
/>
)}
Expand Down Expand Up @@ -284,6 +382,13 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
</>
)}
</IconButton>

<SpeechRecognitionButton
isListening={isListening}
onStart={startListening}
onStop={stopListening}
disabled={isStreaming}
/>
</div>
{input.length > 3 ? (
<div className="text-xs text-bolt-elements-textTertiary">
Expand All @@ -305,7 +410,11 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
<button
key={index}
onClick={(event) => {
sendMessage?.(event, examplePrompt.text);
if (isStreaming) {
handleStop?.();
return;
}
handleSendMessage?.(event, examplePrompt.text);
}}
className="group flex items-center w-full gap-2 justify-center bg-transparent text-bolt-elements-textTertiary hover:text-bolt-elements-textPrimary transition-theme"
>
Expand Down
2 changes: 2 additions & 0 deletions app/types/global.d.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
interface Window {
showDirectoryPicker(): Promise<FileSystemDirectoryHandle>;
webkitSpeechRecognition: typeof SpeechRecognition;
SpeechRecognition: typeof SpeechRecognition;
}