From 060f111d3e2c2212ff9ead07269f137408928fde Mon Sep 17 00:00:00 2001 From: Yash Kumar Saini <115717039+yashksaini-coder@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:26:33 +0000 Subject: [PATCH 1/2] Cold Mail generator completed --- .../Cold Email generator/app/app.py | 260 ++++++++++++++++++ .../Cold Email generator/app/requirements.txt | 7 + 2 files changed, 267 insertions(+) create mode 100644 Generative Models/Cold Email generator/app/app.py create mode 100644 Generative Models/Cold Email generator/app/requirements.txt diff --git a/Generative Models/Cold Email generator/app/app.py b/Generative Models/Cold Email generator/app/app.py new file mode 100644 index 00000000..77032f77 --- /dev/null +++ b/Generative Models/Cold Email generator/app/app.py @@ -0,0 +1,260 @@ +import os +import streamlit as st +from dotenv import load_dotenv +from langchain_community.document_loaders import WebBaseLoader +from langchain_groq import ChatGroq +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.exceptions import OutputParserException +import re + + +# Load environment variables +load_dotenv() + + +def clean_text(text): + # Remove HTML tags + text = re.sub(r'<[^>]*?>', '', text) + # Remove URLs + text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text) + # Remove special characters + text = re.sub(r'[^a-zA-Z0-9 ]', '', text) + # Replace multiple spaces with a single space + text = re.sub(r'\\s{2,}', ' ', text) + # Trim leading and trailing whitespace + text = text.strip() + # Remove extra whitespace + text = ' '.join(text.split()) + return text + + +# Chain class handling the LLM processing +class Chain: + def __init__(self): + self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile") + + + def extract_jobs(self, cleaned_text): + prompt_extract = PromptTemplate.from_template( + """ + ### SCRAPED TEXT FROM WEBSITE: + {page_data} + ### INSTRUCTION: + The scraped text is from the career's page of a website. + Acts as a Senior HR manager and Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`. + Only return the valid JSON. + ### VALID JSON (NO PREAMBLE): + """ + ) + chain_extract = prompt_extract | self.llm + res = chain_extract.invoke(input={"page_data": cleaned_text}) + try: + json_parser = JsonOutputParser() + res = json_parser.parse(res.content) + except OutputParserException: + raise OutputParserException("Context too big. Unable to parse jobs.") + return res if isinstance(res, list) else [res] + + + def write_mail(self, job, links, user_name, user_about): + prompt_email = PromptTemplate.from_template( + """ + ### JOB DESCRIPTION: + {job_description} + + ### INSTRUCTION: + You are {user_name}. {user_about} + Your job is to write a cold email to the client regarding the job mentioned above, describing how you can contribute to fulfilling their needs. + Also, add the most relevant ones from the following links to showcase portfolio: {link_list} + Do not provide a preamble. + ### EMAIL (NO PREAMBLE): + + + """ + ) + chain_email = prompt_email | self.llm + res = chain_email.invoke({"job_description": str(job), "link_list": links, "user_name": user_name, "user_about": user_about}) + return res.content + + +# Portfolio class using temporary in-memory storage +class Portfolio: + def __init__(self): + # Initialize a dictionary to store skills and portfolio links temporarily + if 'portfolio' not in st.session_state: + st.session_state['portfolio'] = [] + + + def add_to_portfolio(self, skills, links): + """Add the user's skills and portfolio links to temporary storage.""" + if skills and links: + st.session_state['portfolio'].append({"skills": skills, "links": links}) + + + def query_links(self, required_skills): + """Query the temporary storage for relevant links based on provided skills.""" + if not required_skills: + return [] + + + # Find relevant portfolio entries based on skills + matched_links = [] + for entry in st.session_state['portfolio']: + portfolio_skills = entry['skills'] + if any(skill in portfolio_skills for skill in required_skills): + matched_links.append(entry['links']) + + + return matched_links[:2] # Return up to 2 matched links + + +# Function to create the Streamlit app interface +def create_streamlit_app(llm, portfolio, clean_text): + st.set_page_config(page_title="Cold Email Generator", page_icon="", layout="wide") + + + st.markdown(""" + + """, unsafe_allow_html=True) + + + st.markdown("
Cold Email Generator
", unsafe_allow_html=True) + st.markdown("
Effortlessly craft professional cold emails for job applications based on job postings.
", unsafe_allow_html=True) + + + st.markdown("
", unsafe_allow_html=True) + + + user_name = st.text_input("Enter your name:", value=" ") + user_about = st.text_area( + "Enter a brief description about yourself:", + value=" " + ) + + + url_input = st.text_input("Enter a Job Post URL:", value=" ") + + + st.subheader("Enter Your Skills and Portfolio Links") + skills_input = st.text_area("Enter your skills (comma separated):", value="") + links_input = st.text_area("Enter your portfolio links (comma separated):", value="") + + + submit_button = st.button("Submit", key='submit_button', help="Click to generate the cold email") + + + if submit_button: + try: + skills_list = [skill.strip() for skill in skills_input.split(",")] + links_list = [link.strip() for link in links_input.split(",")] + + + portfolio.add_to_portfolio(skills_list, links_list) + + + loader = WebBaseLoader([url_input]) + data = clean_text(loader.load().pop().page_content) + jobs = llm.extract_jobs(data) + + + for job in jobs: + job_skills = job.get('skills', []) + links = portfolio.query_links(job_skills) + email = llm.write_mail(job, links, user_name, user_about) + st.markdown(f"
{email}
", unsafe_allow_html=True) + + + except Exception as e: + st.error(f"An Error Occurred: {e}") + + + st.markdown("
", unsafe_allow_html=True) + + + + +# Main function to run the app +if __name__ == "__main__": + chain = Chain() + portfolio = Portfolio() + + + create_streamlit_app(chain, portfolio, clean_text) \ No newline at end of file diff --git a/Generative Models/Cold Email generator/app/requirements.txt b/Generative Models/Cold Email generator/app/requirements.txt new file mode 100644 index 00000000..661f7f0c --- /dev/null +++ b/Generative Models/Cold Email generator/app/requirements.txt @@ -0,0 +1,7 @@ +streamlit==1.35.0 +python-dotenv==1.0.0 +langchain-community==0.2.12 +langchain-groq==0.1.9 +langchain-core==0.2.37 +Gunicorn +beautifulsoup4==4.12.2 \ No newline at end of file From 4479b06aa2dd3e6a5679c6f4bbd7c6e0acb15e54 Mon Sep 17 00:00:00 2001 From: Yash Kumar Saini <115717039+yashksaini-coder@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:29:12 +0000 Subject: [PATCH 2/2] Add README --- Generative Models/Cold Email generator/.env | 1 + .../Cold Email generator/README.md | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 Generative Models/Cold Email generator/.env create mode 100644 Generative Models/Cold Email generator/README.md diff --git a/Generative Models/Cold Email generator/.env b/Generative Models/Cold Email generator/.env new file mode 100644 index 00000000..ec0bc5b0 --- /dev/null +++ b/Generative Models/Cold Email generator/.env @@ -0,0 +1 @@ +GROQ_API_KEY=your_groq_api_key \ No newline at end of file diff --git a/Generative Models/Cold Email generator/README.md b/Generative Models/Cold Email generator/README.md new file mode 100644 index 00000000..69673157 --- /dev/null +++ b/Generative Models/Cold Email generator/README.md @@ -0,0 +1,31 @@ +# 📧 Cold Mail Generator + +A cold email generator for service companies using Groq, Langchain, and Streamlit. This tool allows users to input the URL of a company's careers page, extracting job listings and generating personalized cold emails that include relevant portfolio links sourced from a vector database based on specific job descriptions. + +## Features + +- Input the URL of a company's careers page. +- Extract job listings to generate personalized cold emails. +- Include relevant portfolio links based on job descriptions. + +## Usage Example + +Imagine a scenario where a software development company can provide dedicated engineers to a major company. This tool facilitates outreach via personalized cold emails. + + +## Set-up + +1. **API Key**: Get an API key from [Groq Console](https://console.groq.com/keys) and update the value of `GROQ_API_KEY` in `app/.env` with your created API key. + +2. **Install Dependencies**: Install the required Python packages: + ```bash + pip install -r app/requirements.txt + ``` + +3. **Run the Application**: Start the Streamlit app: + ```bash + streamlit run app/app.py + ``` + + +