Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated the code till date with readme #9

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,27 @@
# TextOrVideoSummarizer-G18-PS24
Repo for Text/Video summarizer
This repository is dedicated to the project ***Text/Video Summarizer***.
### Project Overview:
This tool summarizes the data provided by the user. The data can be in the form of documents or a video file or a blog link or an YouTube URL. The app is capable of running in the background enabling the push notifications whenever a new video is uploaded to the provided youtube channels.

### Steps to be followed for using the code:
**Step 1** : Clone the repository in your desired directory. The following command can be used:
```
git clone https://github.com/kmitofficial/TextOrVideoSummarizer-G18-PS24.git
```
**Step 2** : Install all the required libraries in the _Webapp_ directory using the command below:
```
pip install -r requirements.txt
```
**Step 3** : Create a new file named *.env* in the same directory.
**Step 4** : Add your *Gemini API key* in the *.env* as shown below.
```
"API_KEY" = "your_api_key"
```
A Gemini API key can be created [here](https://aistudio.google.com/app/apikey).
**Step 5** : Finally, run the application using the below command.
```
cd ./WebApp
streamlit run app.py
```
Now, you are ready to use our web application.
Upload _documents_ or _youtube links_ or _blog links_, and get your short and crisp summary.
25 changes: 25 additions & 0 deletions WebApp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/env
/venv
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# production
/build

# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*
5 changes: 5 additions & 0 deletions WebApp/Procfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<<<<<<< HEAD
web: streamlit run --server.port $PORT app.py
=======
web: streamlit run --server.port $PORT app.py
>>>>>>> 3d5074152f419f44eeca6bd9e83cb7545ef4f553
7 changes: 7 additions & 0 deletions WebApp/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import streamlit as st
st.set_page_config(
page_title="Summarize EAZY",
page_icon="fav2.jpg",
layout="wide",
)
st.header("Welcome to the **SUMMARIZE EAZY**")
19 changes: 19 additions & 0 deletions WebApp/document_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import streamlit as st
from PyPDF2 import PdfReader
import docx2txt
def document_extract():
uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
content = """"""
if uploaded_file is not None:
for x in uploaded_file:
if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
content += docx2txt.process(x)
elif(x.type=="application/pdf"):
pdf_reader = PdfReader(x)
for page_num in range(len(pdf_reader.pages)):
content += pdf_reader.pages[page_num].extract_text()
elif(x.type=="text/plain"):
content = st.read(x)
else:
st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
return content
11 changes: 11 additions & 0 deletions WebApp/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import google.generativeai as genai
import os
from dotenv import load_dotenv
def output(text):
response = model.generate_content("Summarize this and also don't make this too short "+text)
return response
load_dotenv()
# add a .env file in this directory and add your gemini api key as "API_KEY"=your_api_key
GOOGLE_API_KEY = os.getenv("API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro')
8 changes: 8 additions & 0 deletions WebApp/pages/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import streamlit as st
import document_data
import load
data=document_data.document_extract()
if data:
response=(load.output(data))
for chunk in response:
st.write(chunk.text)
39 changes: 39 additions & 0 deletions WebApp/pages/pe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import streamlit as st
import googleapiclient.discovery
import googleapiclient.errors

def main():
api_key = "AIzaSyCYUSC2ZeccGO7ax4FyETv6aSADU-fortU" # Replace with your API key
channelMail = st.text_input("Enter channel mail")

api_service_name = "youtube"
api_version = "v3"

# Create an API client
if st.button("Enter"):
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=api_key)

request = youtube.channels().list(
part="snippet,contentDetails,statistics",
forHandle=channelMail
)
response = request.execute()

if "items" in response and len(response["items"]) > 0:
channelID = response["items"][0]["id"]
request = youtube.search().list(
part="snippet",
channelId=channelID,
maxResults=1,
order="date"
)
response = request.execute()
st.write(response["items"][0]["id"]["videoId"])
print(response["items"][0]["id"]["videoId"])
else:
st.write("No channel found for the provided email.")

if __name__ == "__main__":
main()
46 changes: 46 additions & 0 deletions WebApp/pages/sample2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import streamlit as st
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors

scopes = ["https://www.googleapis.com/auth/youtube.readonly"]



def main():
channelMail = st.text_input("Enter channel mail")
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "desktop.json"

# Get credentials and create an API client
if(st.button("Enter")):
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes)
credentials = flow.run_local_server(port=0)
youtube = googleapiclient.discovery.build(
api_service_name, api_version, credentials=credentials)

request = youtube.channels().list(
part="snippet,contentDetails,statistics",
forHandle=channelMail
)
response = request.execute()
channelID = response["items"][0]["id"]
request = youtube.search().list(
part="snippet",
channelId=channelID,
maxResults=1,
order="date"
)
response = request.execute()
st.write(response["items"][0]["id"]["videoId"]);
print(response["items"][0]["id"]["videoId"])

if __name__ == "__main__":
main()
7 changes: 7 additions & 0 deletions WebApp/pages/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import load
import streamlit as st
text=st.text_input("Enter the text you want to summarize..")
if text:
response=(load.output(text))
for chunk in response:
st.write(chunk.text)
8 changes: 8 additions & 0 deletions WebApp/pages/url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import streamlit as st
import load
import website_data
url=st.text_input("Enter the url",placeholder="URL....")
if url:
response=(load.output(website_data.scrape(url)))
for chunk in response:
st.write(chunk.text)
34 changes: 34 additions & 0 deletions WebApp/pages/user_privacy_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import streamlit as st
st.write("""Privacy Assurance Statement:

Hello there, dear visitor! We're delighted to have you here. Before we delve into our shared journey, we want to extend our commitment to your privacy. We understand the value of trust and the importance of safeguarding your personal data. Allow us to illuminate our approach:

Your Data, Your Sanctuary:

Rest assured, any data we collect from you is akin to a sacred treasure trove—kept securely within our fortress of confidentiality. Your privacy isn't just a policy; it's a profound principle we uphold steadfastly.

Guardians of Your Trust:

Think of us as the vigilant guardians of your digital realm. Your data isn't just a string of characters to us; it's a testament to the trust you place in our hands. We honor this trust by implementing robust measures to ensure its safety.

Fortress of Confidentiality:

Within our digital citadel, your data finds refuge amidst layers of encryption and fortified defenses. Access is strictly regulated, limited only to those entrusted with its custodianship.

A Bond of Integrity:

Our commitment to privacy isn't merely a legal obligation; it's a bond forged in the fires of integrity. We pledge to never barter, sell, or share your data with third parties, for it belongs exclusively to you.

Transparency Illuminates Trust:

In our relationship, transparency serves as the beacon guiding our interactions. Should you have any queries or seek clarification regarding your data, we're here to illuminate every aspect, fostering a relationship built on transparency and trust.

Your Consent Matters:

Your journey with us is founded on the bedrock of consent. Every interaction, every byte of data shared, is a testament to your volition. Your permission isn't just sought; it's revered.

In Conclusion:

As you traverse our digital landscape, remember this: your privacy isn't just a priority; it's our sacred promise. Together, let's embark on a journey where trust flourishes, and your data finds solace within the sanctuary of our commitment to privacy.

Thank you for entrusting us with your digital voyage.""")
Empty file added WebApp/pages/video.py
Empty file.
12 changes: 12 additions & 0 deletions WebApp/pages/youtube_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import streamlit as st
import load
from langchain_community.document_loaders import YoutubeLoader
url=st.text_input("Enter the url",placeholder="URL....")
if url:
loader = YoutubeLoader.from_youtube_url(
url, add_video_info=False
)
transcript=loader.load()
response=load.output(str(transcript))
for chunk in response:
st.write(chunk.text)
Binary file added WebApp/requirements.txt
Binary file not shown.
26 changes: 26 additions & 0 deletions WebApp/video_c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from moviepy.editor import *
import assemblyai as aai
import tempfile


aai.settings.api_key = "4f70d0a2a54e454f90a869173b7a8c48"

# Load the mp4 file
video = VideoFileClip(r"C:\Users\HP\OneDrive\Documents\app\summarizer\summarizer\video_file.mp4")

# Create a temporary file to write the audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
# Extract audio from video and write to temporary file
video.audio.write_audiofile(temp_audio_file.name, codec='mp3')

# Close the file to ensure all data is written
temp_audio_file.close()

# Transcribe audio to text
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(temp_audio_file.name)


text_from_audio = transcript.text

print(text_from_audio)
52 changes: 52 additions & 0 deletions WebApp/website_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import requests
from bs4 import BeautifulSoup


def scrape(url):
URL = url
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
# Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.

r = requests.get(url=URL, headers=headers)
if r.status_code ==200:
# print(r.content)

soup = BeautifulSoup(r.content, 'html.parser')
if "404" in soup.title.string:
print('The page indicates a 404 error')
return None

possible_selectors = [
{'tag': 'div', 'class': 'post-content'},
{'tag': 'div', 'class': 'article-content'},
{'tag': 'article', 'class': None},
{'tag': 'div', 'class': 'content'},
{'tag': 'div', 'id': 'content'},
{'tag': 'main', 'class': None},
{'tag': 'div', 'class': 'blog-post'},
{'tag': 'div', 'class': 'entry-content'},
]

for selector in possible_selectors:
if selector['class']:
content_div = soup.find(selector['tag'], class_=selector['class'])
else:
content_div = soup.find(selector['tag'])

if content_div:
break

if content_div:
# Filter out unwanted tags
for unwanted in content_div(['aside', 'button', 'footer', 'nav', 'form']):
unwanted.decompose()

# Extract text from the div
blog_content = content_div.get_text(strip=True,separator="\n")
return blog_content

print('Content not found using predefined selectors')
return None
print()
str = scrape("https://rapidfireart.com/2017/04/06/lesson-1-how-to-sketch/")
print(str)