kmitofficial · Jayanth-137 · Apr 18, 2024 · Apr 18, 2024 · Apr 19, 2024 · Jun 24, 2024
diff --git a/README.md b/README.md
@@ -1,2 +1,27 @@
 # TextOrVideoSummarizer-G18-PS24
-Repo for Text/Video summarizer
+This repository is dedicated to the project ***Text/Video Summarizer***.
+### Project Overview:
+This tool summarizes the data provided by the user. The data can be in the form of documents or a video file or a blog link or an YouTube URL. The app is capable of running in the background enabling the push notifications whenever a new video is uploaded to the provided youtube channels. 
+
+### Steps to be followed for using the code:
+**Step 1** : Clone the repository in your desired directory. The following command can be used: 
+```
+git clone https://github.com/kmitofficial/TextOrVideoSummarizer-G18-PS24.git
+```
+**Step 2** : Install all the required libraries in the _Webapp_ directory using the command below: 
+```
+pip install -r requirements.txt
+```
+**Step 3** : Create a new file named *.env* in the same directory.  
+**Step 4** : Add your *Gemini API key* in the *.env* as shown below.  
+```
+"API_KEY" = "your_api_key"
+```
+A Gemini API key can be created [here](https://aistudio.google.com/app/apikey).  
+**Step 5** : Finally, run the application using the below command.  
+```
+cd ./WebApp
+streamlit run app.py
+```
+Now, you are ready to use our web application.  
+Upload _documents_ or _youtube links_ or _blog links_, and get your short and crisp summary.
diff --git a/WebApp/.gitignore b/WebApp/.gitignore
@@ -0,0 +1,25 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/env
+/venv
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# production
+/build
+
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
diff --git a/WebApp/Procfile b/WebApp/Procfile
@@ -0,0 +1,5 @@
+<<<<<<< HEAD
+web: streamlit run --server.port $PORT app.py
+=======
+web: streamlit run --server.port $PORT app.py
+>>>>>>> 3d5074152f419f44eeca6bd9e83cb7545ef4f553
diff --git a/WebApp/app.py b/WebApp/app.py
@@ -0,0 +1,7 @@
+import streamlit as st
+st.set_page_config(
+        page_title="Summarize EAZY",
+        page_icon="fav2.jpg",
+        layout="wide",
+)
+st.header("Welcome to the **SUMMARIZE EAZY**")
diff --git a/WebApp/document_data.py b/WebApp/document_data.py
@@ -0,0 +1,19 @@
+import streamlit as st
+from PyPDF2 import PdfReader
+import docx2txt
+def document_extract():
+    uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
+    content = """"""
+    if uploaded_file is not None:
+        for x in uploaded_file:
+            if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
+                content += docx2txt.process(x)
+            elif(x.type=="application/pdf"):
+                pdf_reader = PdfReader(x)
+                for page_num in range(len(pdf_reader.pages)):
+                    content += pdf_reader.pages[page_num].extract_text()
+            elif(x.type=="text/plain"):
+                content = st.read(x)
+            else:
+                st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
+    return content
diff --git a/WebApp/load.py b/WebApp/load.py
@@ -0,0 +1,11 @@
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+def output(text):
+    response = model.generate_content("Summarize this and also don't make this too short "+text)
+    return response
+load_dotenv()
+# add a .env file in this directory and add your gemini api key as "API_KEY"=your_api_key
+GOOGLE_API_KEY = os.getenv("API_KEY")
+genai.configure(api_key=GOOGLE_API_KEY)
+model = genai.GenerativeModel('gemini-pro')
diff --git a/WebApp/pages/document.py b/WebApp/pages/document.py
@@ -0,0 +1,8 @@
+import streamlit as st
+import document_data
+import load
+data=document_data.document_extract()
+if data:
+    response=(load.output(data))
+    for chunk in response:
+        st.write(chunk.text)
diff --git a/WebApp/pages/pe.py b/WebApp/pages/pe.py
@@ -0,0 +1,39 @@
+import os
+import streamlit as st
+import googleapiclient.discovery
+import googleapiclient.errors
+
+def main():
+    api_key = "AIzaSyCYUSC2ZeccGO7ax4FyETv6aSADU-fortU"  # Replace with your API key
+    channelMail = st.text_input("Enter channel mail")
+
+    api_service_name = "youtube"
+    api_version = "v3"
+
+    # Create an API client
+    if st.button("Enter"):
+        youtube = googleapiclient.discovery.build(
+            api_service_name, api_version, developerKey=api_key)
+
+        request = youtube.channels().list(
+            part="snippet,contentDetails,statistics",
+            forHandle=channelMail  
+        )
+        response = request.execute()
+
+        if "items" in response and len(response["items"]) > 0:
+            channelID = response["items"][0]["id"]
+            request = youtube.search().list(
+                part="snippet",
+                channelId=channelID,
+                maxResults=1,
+                order="date"
+            )
+            response = request.execute()
+            st.write(response["items"][0]["id"]["videoId"])
+            print(response["items"][0]["id"]["videoId"])
+        else:
+            st.write("No channel found for the provided email.")
+
+if __name__ == "__main__":
+    main()
diff --git a/WebApp/pages/sample2.py b/WebApp/pages/sample2.py
@@ -0,0 +1,46 @@
+import os
+import streamlit as st
+import google_auth_oauthlib.flow
+import googleapiclient.discovery
+import googleapiclient.errors
+
+scopes = ["https://www.googleapis.com/auth/youtube.readonly"]
+
+
+
+def main():
+    channelMail = st.text_input("Enter channel mail")
+    # Disable OAuthlib's HTTPS verification when running locally.
+    # *DO NOT* leave this option enabled in production.
+    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
+
+    api_service_name = "youtube"
+    api_version = "v3"
+    client_secrets_file = "desktop.json"
+
+    # Get credentials and create an API client
+    if(st.button("Enter")):
+        flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
+            client_secrets_file, scopes)
+        credentials = flow.run_local_server(port=0)
+        youtube = googleapiclient.discovery.build(
+            api_service_name, api_version, credentials=credentials)
+
+        request = youtube.channels().list(
+            part="snippet,contentDetails,statistics",
+            forHandle=channelMail
+        )
+        response = request.execute()
+        channelID = response["items"][0]["id"]
+        request = youtube.search().list(
+            part="snippet",
+            channelId=channelID,
+            maxResults=1,
+            order="date"
+        )
+        response = request.execute()
+        st.write(response["items"][0]["id"]["videoId"]);
+        print(response["items"][0]["id"]["videoId"])
+
+if __name__ == "__main__":
+    main()
diff --git a/WebApp/pages/text.py b/WebApp/pages/text.py
@@ -0,0 +1,7 @@
+import load
+import streamlit as st
+text=st.text_input("Enter the text you want to summarize..")
+if text:
+    response=(load.output(text))
+    for chunk in response:
+        st.write(chunk.text)
diff --git a/WebApp/pages/url.py b/WebApp/pages/url.py
@@ -0,0 +1,8 @@
+import streamlit as st
+import load
+import website_data
+url=st.text_input("Enter the url",placeholder="URL....")
+if url:
+    response=(load.output(website_data.scrape(url)))
+    for chunk in response:
+        st.write(chunk.text)
diff --git a/WebApp/pages/user_privacy_policy.py b/WebApp/pages/user_privacy_policy.py
@@ -0,0 +1,34 @@
+import streamlit as st
+st.write("""Privacy Assurance Statement:
+
+Hello there, dear visitor! We're delighted to have you here. Before we delve into our shared journey, we want to extend our commitment to your privacy. We understand the value of trust and the importance of safeguarding your personal data. Allow us to illuminate our approach:
+
+Your Data, Your Sanctuary:
+
+Rest assured, any data we collect from you is akin to a sacred treasure trove—kept securely within our fortress of confidentiality. Your privacy isn't just a policy; it's a profound principle we uphold steadfastly.
+
+Guardians of Your Trust:
+
+Think of us as the vigilant guardians of your digital realm. Your data isn't just a string of characters to us; it's a testament to the trust you place in our hands. We honor this trust by implementing robust measures to ensure its safety.
+
+Fortress of Confidentiality:
+
+Within our digital citadel, your data finds refuge amidst layers of encryption and fortified defenses. Access is strictly regulated, limited only to those entrusted with its custodianship.
+
+A Bond of Integrity:
+
+Our commitment to privacy isn't merely a legal obligation; it's a bond forged in the fires of integrity. We pledge to never barter, sell, or share your data with third parties, for it belongs exclusively to you.
+
+Transparency Illuminates Trust:
+
+In our relationship, transparency serves as the beacon guiding our interactions. Should you have any queries or seek clarification regarding your data, we're here to illuminate every aspect, fostering a relationship built on transparency and trust.
+
+Your Consent Matters:
+
+Your journey with us is founded on the bedrock of consent. Every interaction, every byte of data shared, is a testament to your volition. Your permission isn't just sought; it's revered.
+
+In Conclusion:
+
+As you traverse our digital landscape, remember this: your privacy isn't just a priority; it's our sacred promise. Together, let's embark on a journey where trust flourishes, and your data finds solace within the sanctuary of our commitment to privacy.
+
+Thank you for entrusting us with your digital voyage.""")
diff --git a/WebApp/pages/video.py b/WebApp/pages/video.py
diff --git a/WebApp/pages/youtube_video.py b/WebApp/pages/youtube_video.py
@@ -0,0 +1,12 @@
+import streamlit as st
+import load
+from langchain_community.document_loaders import YoutubeLoader
+url=st.text_input("Enter the url",placeholder="URL....")
+if url:
+    loader = YoutubeLoader.from_youtube_url(
+        url, add_video_info=False
+    )
+    transcript=loader.load()
+    response=load.output(str(transcript))
+    for chunk in response:
+        st.write(chunk.text)
diff --git a/WebApp/requirements.txt b/WebApp/requirements.txt
diff --git a/WebApp/video_c.py b/WebApp/video_c.py
@@ -0,0 +1,26 @@
+from moviepy.editor import *
+import assemblyai as aai
+import tempfile
+
+
+aai.settings.api_key = "4f70d0a2a54e454f90a869173b7a8c48"
+
+# Load the mp4 file
+video = VideoFileClip(r"C:\Users\HP\OneDrive\Documents\app\summarizer\summarizer\video_file.mp4")
+
+# Create a temporary file to write the audio
+with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
+    # Extract audio from video and write to temporary file
+    video.audio.write_audiofile(temp_audio_file.name, codec='mp3')
+
+    # Close the file to ensure all data is written
+    temp_audio_file.close()
+
+    # Transcribe audio to text
+    transcriber = aai.Transcriber()
+    transcript = transcriber.transcribe(temp_audio_file.name)
+
+
+text_from_audio = transcript.text
+
+print(text_from_audio)
diff --git a/WebApp/website_data.py b/WebApp/website_data.py
@@ -0,0 +1,52 @@
+import requests 
+from bs4 import BeautifulSoup 
+
+
+def scrape(url):
+    URL = url
+    headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"} 
+    # Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link. 
+
+    r = requests.get(url=URL, headers=headers) 
+    if r.status_code ==200:
+        # print(r.content)
+
+        soup = BeautifulSoup(r.content, 'html.parser') 
+        if "404" in soup.title.string:
+            print('The page indicates a 404 error')
+            return None
+
+    possible_selectors = [
+        {'tag': 'div', 'class': 'post-content'},
+        {'tag': 'div', 'class': 'article-content'},
+        {'tag': 'article', 'class': None},
+        {'tag': 'div', 'class': 'content'},
+        {'tag': 'div', 'id': 'content'},
+        {'tag': 'main', 'class': None},
+        {'tag': 'div', 'class': 'blog-post'},
+        {'tag': 'div', 'class': 'entry-content'},
+    ]
+
+    for selector in possible_selectors:
+            if selector['class']:
+                content_div = soup.find(selector['tag'], class_=selector['class'])
+            else:
+                content_div = soup.find(selector['tag'])
+
+            if content_div:
+                break
+
+    if content_div:
+        # Filter out unwanted tags
+        for unwanted in content_div(['aside', 'button', 'footer', 'nav', 'form']):
+            unwanted.decompose()
+
+        # Extract text from the div
+        blog_content = content_div.get_text(strip=True,separator="\n")
+        return blog_content
+
+    print('Content not found using predefined selectors')
+    return None
+print()
+str = scrape("https://rapidfireart.com/2017/04/06/lesson-1-how-to-sketch/")
+print(str)