From 060f111d3e2c2212ff9ead07269f137408928fde Mon Sep 17 00:00:00 2001 From: Yash Kumar Saini <115717039+yashksaini-coder@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:26:33 +0000 Subject: [PATCH 1/2] Cold Mail generator completed --- .../Cold Email generator/app/app.py | 260 ++++++++++++++++++ .../Cold Email generator/app/requirements.txt | 7 + 2 files changed, 267 insertions(+) create mode 100644 Generative Models/Cold Email generator/app/app.py create mode 100644 Generative Models/Cold Email generator/app/requirements.txt diff --git a/Generative Models/Cold Email generator/app/app.py b/Generative Models/Cold Email generator/app/app.py new file mode 100644 index 00000000..77032f77 --- /dev/null +++ b/Generative Models/Cold Email generator/app/app.py @@ -0,0 +1,260 @@ +import os +import streamlit as st +from dotenv import load_dotenv +from langchain_community.document_loaders import WebBaseLoader +from langchain_groq import ChatGroq +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser +from langchain_core.exceptions import OutputParserException +import re + + +# Load environment variables +load_dotenv() + + +def clean_text(text): + # Remove HTML tags + text = re.sub(r'<[^>]*?>', '', text) + # Remove URLs + text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text) + # Remove special characters + text = re.sub(r'[^a-zA-Z0-9 ]', '', text) + # Replace multiple spaces with a single space + text = re.sub(r'\\s{2,}', ' ', text) + # Trim leading and trailing whitespace + text = text.strip() + # Remove extra whitespace + text = ' '.join(text.split()) + return text + + +# Chain class handling the LLM processing +class Chain: + def __init__(self): + self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile") + + + def extract_jobs(self, cleaned_text): + prompt_extract = PromptTemplate.from_template( + """ + ### SCRAPED TEXT FROM WEBSITE: + {page_data} + ### INSTRUCTION: + The scraped text is from the career's page of a website. + Acts as a Senior HR manager and Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`. + Only return the valid JSON. + ### VALID JSON (NO PREAMBLE): + """ + ) + chain_extract = prompt_extract | self.llm + res = chain_extract.invoke(input={"page_data": cleaned_text}) + try: + json_parser = JsonOutputParser() + res = json_parser.parse(res.content) + except OutputParserException: + raise OutputParserException("Context too big. Unable to parse jobs.") + return res if isinstance(res, list) else [res] + + + def write_mail(self, job, links, user_name, user_about): + prompt_email = PromptTemplate.from_template( + """ + ### JOB DESCRIPTION: + {job_description} + + ### INSTRUCTION: + You are {user_name}. {user_about} + Your job is to write a cold email to the client regarding the job mentioned above, describing how you can contribute to fulfilling their needs. + Also, add the most relevant ones from the following links to showcase portfolio: {link_list} + Do not provide a preamble. + ### EMAIL (NO PREAMBLE): + + + """ + ) + chain_email = prompt_email | self.llm + res = chain_email.invoke({"job_description": str(job), "link_list": links, "user_name": user_name, "user_about": user_about}) + return res.content + + +# Portfolio class using temporary in-memory storage +class Portfolio: + def __init__(self): + # Initialize a dictionary to store skills and portfolio links temporarily + if 'portfolio' not in st.session_state: + st.session_state['portfolio'] = [] + + + def add_to_portfolio(self, skills, links): + """Add the user's skills and portfolio links to temporary storage.""" + if skills and links: + st.session_state['portfolio'].append({"skills": skills, "links": links}) + + + def query_links(self, required_skills): + """Query the temporary storage for relevant links based on provided skills.""" + if not required_skills: + return [] + + + # Find relevant portfolio entries based on skills + matched_links = [] + for entry in st.session_state['portfolio']: + portfolio_skills = entry['skills'] + if any(skill in portfolio_skills for skill in required_skills): + matched_links.append(entry['links']) + + + return matched_links[:2] # Return up to 2 matched links + + +# Function to create the Streamlit app interface +def create_streamlit_app(llm, portfolio, clean_text): + st.set_page_config(page_title="Cold Email Generator", page_icon="", layout="wide") + + + st.markdown(""" + + """, unsafe_allow_html=True) + + + st.markdown("
{email}