-
Notifications
You must be signed in to change notification settings - Fork 0
/
intern_supply.py
105 lines (91 loc) · 4.28 KB
/
intern_supply.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from urllib.request import urlopen
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
import re
import argparse
import time
import datetime
import smtplib
try:
import lxml
PARSER = "lxml" # Faster parser
except ImportError:
PARSER = "html.parser" # Use html.parser if lxml not installed
URL = "http://www.intern.supply"
INFO = """
Script to track the website Intern Supply ({}). Sends the user an email when a desired internship opens.""".format(URL)
SRC = """
For companies with longer names (more than one word), put doubles quotes (\") around the full name.
Not all companies are available; check {} for the full list.
Written by Jeremiah Fan""".format(URL)
TXT = """
The following internship applications are now available: {}
This email was automatically generated by a script at {}."""
def checkAvailable(text):
if text == "Not Yet Available":
return {"status": "Not Available", "availability": False}
return {"status": "Available", "availability": True} # As opposed to "Apply" which is presented on website
def sendMail(availList):
e_usr = fhandle.readline() # fhandle initialized in main()
e_pwd = fhandle.readline()
msg = MIMEText(TXT.format(', '.join(availList), datetime.datetime.now().isoformat(sep=' ')))
msg['Subject'] = "Internship Applications Open"
msg['From'] = e_usr
if RECIPIENT == None:
msg['To'] = e_usr # If -e option not specified, send email to self
else:
msg['To'] = RECIPIENT
s = smtplib.SMTP('smtp.gmail.com', 587) # Send via Gmail's SMTP server
s.starttls() # Upgrade to secure connection, requirement by Gmail
s.login(e_usr, e_pwd)
s.send_message(msg)
print("Email successfully sent to {}".format(msg['To'].rstrip())) # Print out success message
s.quit()
def formatCompanies(inputList): # Check companies' existence and format them to save us time later
contents = urlopen(URL).read()
soup = BeautifulSoup(contents, PARSER) # Slightly inefficient, first iteration of our check later on doesn't use this data, when ideally it would
for index in range(len(inputList)):
clean = re.sub(' +', ' ', inputList[index].strip()) # Strip leading, trailing, and duplicate whitespaces
navStr = soup.find(text=re.compile("^{}$".format(clean), re.I))
if navStr == None:
print('intern_supply.py: error: {} - No company by this name could be found.'.format(clean))
exit(1)
inputList[index] = navStr
return inputList
def checkCompanies(companyList):
contents = urlopen(URL).read()
soup = BeautifulSoup(contents, PARSER)
availList = list()
for company in companyList:
navStr = soup.find(text=company)
check = checkAvailable(navStr.next.text) # Check the text of the tag following the text containing the company's name
if check["availability"]:
availList.append(navStr)
print("{}:".format(navStr), check["status"])
if len(availList) > 0:
print("\nRemoving {} from query".format(", ".join(availList)))
sendMail(availList)
return [x for x in companyList if x not in availList] # Remove the available internships from our query
def main():
print("Initiating Intern Supply Detector...\n")
COMPANYLIST = formatCompanies(args.COMPANY) # The list of companies should not be a global variable, so we first access it here
while True:
COMPANYLIST = checkCompanies(COMPANYLIST) # Check list of companies
if len(COMPANYLIST) <= 0: # If all of the companies we want to check are available, we are done
break
print("Checking again in another {} minutes\n".format(WAITTIME))
time.sleep(WAITTIME * 60) # time.time.sleep() treats argument as seconds
if __name__ == "__main__":
parser = argparse.ArgumentParser(description = INFO, epilog = SRC)
parser.add_argument('COMPANY', nargs='+', help='Name of company to track')
parser.add_argument('--interval', '-i', metavar='N', type=float, default=5, help='Interval between queries in minutes')
parser.add_argument('--email', '-e', metavar='E', help='Email address to send notification message to')
args = parser.parse_args()
WAITTIME = args.interval # Initialize WAITTIME and RECIPIENT here, they should have global scope
RECIPIENT = args.email
try:
fhandle = open('.creds') # Execution should stop immediately if '.creds' does not exist, should have global scope
except:
print("intern_supply.py: error: no file '.creds' containing email username and password was found. Unable to send email.")
exit(1)
main()