diff --git a/requirements.txt b/requirements.txt index 8ee2b256..3621e915 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,3 +40,4 @@ torch==2.3.0 ultralytics==8.3.3 diffusers==0.30.3 transformers==4.45.2 +PyAudio==0.2.14 diff --git a/src/apps/pages/programs/ImageGenerators/speech_textToImageGenerator.py b/src/apps/pages/programs/ImageGenerators/speech_textToImageGenerator.py new file mode 100644 index 00000000..bb9c8bc2 --- /dev/null +++ b/src/apps/pages/programs/ImageGenerators/speech_textToImageGenerator.py @@ -0,0 +1,77 @@ +import streamlit as st +from PIL import Image +import torch +from diffusers import StableDiffusionPipeline +import speech_recognition as sr + +# Function to generate image +@st.cache_resource +def load_pipeline(): + model_id = "CompVis/stable-diffusion-v1-4" + device = "cuda" if torch.cuda.is_available() else "cpu" + + # Load the pipeline without float16 if using CPU + if device == "cpu": + pipe = StableDiffusionPipeline.from_pretrained(model_id) + else: + pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16) + + pipe.to(device) + return pipe + +# Function to recognize speech +def recognize_speech(): + recognizer = sr.Recognizer() + with sr.Microphone() as source: + st.info("Listening for speech...") + recognizer.adjust_for_ambient_noise(source, duration=1) + audio = recognizer.listen(source) + + try: + st.info("Recognizing speech...") + text = recognizer.recognize_google(audio) + st.success(f"Recognized: {text}") + return text + except sr.UnknownValueError: + st.error("Could not understand audio") + except sr.RequestError as e: + st.error(f"Could not request results; {e}") + return None + +# Define the main function +def speech_textToImageGenerator(): + + # Button to use speech recognition outside the form + if st.button("Use Speech Recognition"): + recognized_text = recognize_speech() + if recognized_text: + st.session_state.input_text = recognized_text + + # Text input form + with st.form("Text_input_form"): + prompt = st.text_input("Enter a prompt to generate Image:", st.session_state.get("input_text", "")) + submit_button = st.form_submit_button("Generate Image") + + # Generate image if prompt is available + if prompt and submit_button: + try: + pipe = load_pipeline() + + # Generate the image + if torch.cuda.is_available(): + with torch.autocast("cuda"): + output = pipe(prompt + " 4k, High Resolution", guidance_scale=8.5) + else: + output = pipe(prompt + " 4k, High Resolution", guidance_scale=8.5) + + image = output.images[0] + st.image(image, caption="Generated Image", use_column_width=True) + + # Optionally, save the image + image.save('src/apps/pages/programs/ImageGenerators/generated_image.png') + st.success("Image generated successfully!") + except Exception as e: + st.error(f"An error occurred: {str(e)}") + elif submit_button: + st.warning("Please enter a prompt first.") + diff --git a/src/apps/pages/programs/ImageGenerators/textToImageGenerator.py b/src/apps/pages/programs/ImageGenerators/textToImageGenerator.py deleted file mode 100644 index cc782885..00000000 --- a/src/apps/pages/programs/ImageGenerators/textToImageGenerator.py +++ /dev/null @@ -1,51 +0,0 @@ -import streamlit as st -from PIL import Image -import torch -from diffusers import StableDiffusionPipeline - -# Function to generate image -@st.cache_resource -def load_pipeline(): - model_id = "CompVis/stable-diffusion-v1-4" - device = "cuda" if torch.cuda.is_available() else "cpu" - - # Load the pipeline without float16 if using CPU - if device == "cpu": - pipe = StableDiffusionPipeline.from_pretrained(model_id) - else: - pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16) - - pipe.to(device) - return pipe - -# Define the main function -def textToImageGenerator(): - with st.form("Text_input_form"): - st.session_state.input_text = st.text_input("Enter a prompt to generate Image:") - - start_button = st.form_submit_button("Generate Image") - if start_button: - st.session_state.running = True - - if st.session_state.input_text: - try: - pipe = load_pipeline() - - if torch.cuda.is_available(): - with torch.autocast("cuda"): - output = pipe(st.session_state.input_text + " 4k, High Resolution", guidance_scale=8.5) - else: - output = pipe(st.session_state.input_text + " 4k, High Resolution", guidance_scale=8.5) - - image = output.images[0] - st.image(image, caption="Generated Image", use_column_width=True) - - # Optionally, save the image - image.save('src/apps/pages/programs/ImageGenerators/generated_image.png') - st.success("Image generated successfully!") - except Exception as e: - st.error(f"An error occurred: {str(e)}") - else: - st.warning("Please enter a prompt first.") - -