-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
83 lines (64 loc) · 2.4 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import requests
from dotenv import find_dotenv, load_dotenv
from transformers import pipeline
import streamlit as st
import tensorflow
load_dotenv(find_dotenv())
HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_KEY")
def img2text(url):
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
text = image_to_text(url)[0]["generated_text"]
print("Result:", text)
return text
def generateStory(scenario):
template = """
You are a skilled storyteller. Your task is to create a short, engaging story based on the provided context. The story should be imaginative and concise, not exceeding 30 words.
Context: {scenario}
Example:
Context: They are having a conversation at a table with a cup of coffee.
Story: "And then she whispered a secret that changed everything," he said over coffee, eyes gleaming.
Story:
"""
prompt = template.format(scenario=scenario)
story_generator = pipeline("text-generation", model="gpt2")
story = story_generator(prompt, max_new_tokens=40, num_return_sequences=1)[0]['generated_text']
print("Story:", story)
return story
def text2speech(message):
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}"}
payloads ={
"inputs": message
}
response = requests.post(API_URL, headers=headers, json=payloads)
with open("audio.flac", "wb") as file:
file.write(response.content)
#scenario = img2text("conversation.jpg")
#story = generateStory(scenario)
#text2speech(story)
def main():
st.set_page_config(
page_title="image", page_icon="🤖"
)
st.header("Turn your image to an audio story!")
uploaded_file=st.file_uploader("Choose an image...", type="jpg")
if uploaded_file is not None:
print(uploaded_file)
bytes_data=uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(
uploaded_file,
caption="Uploaded file",
use_column_width= True
)
scenario = img2text(uploaded_file.name)
story = generateStory(scenario)
text2speech = story
with st.expander("Scenario: "):
st.write(scenario)
with st.expander("Story"):
st.write(story)
st.audio("audio.flac")
main()