This repository has been archived by the owner on Aug 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
track_site.py
184 lines (142 loc) · 5.58 KB
/
track_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import shutil
import typing as t
import os
import time
import logging
import filecmp
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from discord_webhook import DiscordWebhook, DiscordEmbed
from dotenv import load_dotenv
load_dotenv()
URL_TO_MONITOR: str = "https://jexam.inf.tu-dresden.de/de.jexam.web.v5/spring/welcome"
PAYLOAD_URL: str | None = os.getenv("PAYLOAD_URL")
CHROME_PATH: str = os.getenv("CHROME_PATH", "/user/bin/chromedriver")
DELAY_TIME_SECONDS: int = 20
log = logging.getLogger(__name__)
def filter_new_entries(one: t.Iterable[str], other: t.Iterable[str]) -> list[str]:
"""Return elements in `other` which did not appear in `one`.
Moral equivalent of `set(other) - set(one)`, but
1. does not kill duplicates and
2. is stable with respect to the ordering of `other`.
"""
one_set = set(one)
return [line for line in other if line not in one_set]
def report_new_result(result: str, url: str) -> None:
"""Report new exam result via discord webhook"""
webhook = DiscordWebhook(url=url)
embed = DiscordEmbed(
title=f" <a:bpG:890945228679299082> Prüfungsergebnis {result} ist nun verfügbar.",
color=2158112,
)
webhook.add_embed(embed)
time.sleep(1)
webhook.execute()
class Page_Tracker:
"""
A class for monitoring a specific webpage and sending notifications when updates occur.
"""
def content_comparison(self, previous: str, new: str) -> bool:
"""
Compares the content of two files.
Args:
previous: The path of the file with previous content of the web page.
new: The path of the file with the updated content of the web page.
Returns:
bool: A boolean indicating whether the two files have identical content.
"""
return filecmp.cmp(previous, new)
def write_content_in_new(self, content: str) -> None:
"""
Writes the scraped content into`new_exams.txt`.
Args:
content: The content to be written to the file.
"""
with open("new_exams.txt", "w+") as new:
new.write(content)
def page_crawler(self) -> bool:
"""
Scrapes the webpage and checks for updates.
Returns:
bool: A boolean indicating whether updates were found.
"""
### Configurations
log.debug("Setting up chrome driver %r", CHROME_PATH)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.page_load_strategy = "none"
# returns the path web driver downloaded
chrome_service = Service(CHROME_PATH)
# pass the defined options and service objects to initialize the web driver
driver = webdriver.Chrome(options=options, service=chrome_service)
driver.implicitly_wait(2)
log.debug("Fetching url %r", URL_TO_MONITOR)
driver.get(URL_TO_MONITOR)
time.sleep(2)
page_content = driver.find_element(By.ID, "news-wrapper").find_element(
By.TAG_NAME, "ul"
)
if not os.path.exists("previous_exams.txt"):
with open("previous_exams.txt", "w+"):
pass
if not os.path.exists("new_exams.txt"):
with open("new_exams.txt", "w+") as new:
new.write(page_content.text)
return True
self.write_content_in_new(page_content.text)
return not self.content_comparison("previous_exams.txt", "new_exams.txt")
def return_new_exams(self, previous: str, new: str) -> list[str]:
"""
This method filters for any new exam results.
Args:
previous (str): The path of the first file.
new (str): The path of the second file.
Returns:
set: A set containing the difference between the contents of two files.
"""
with open(previous, "r") as f:
old = f.read().split("\n")
with open(new, "r") as f:
new = f.read().split("\n")
return filter_new_entries(one=old, other=new)[::-1] # new \ old
def send_webhook_msg(self) -> None:
"""
Sends a webhook message with new exam results.
"""
results = self.return_new_exams(
"previous_exams.txt",
"new_exams.txt",
)
log.info("Found %d new results", len(results))
log.debug("new results:\n%s", "\n".join(f" {r}" for r in results))
for result in results:
report_new_result(result, url=PAYLOAD_URL)
def overwrite_previous_content(self) -> None:
"""
Overwrites the content of the previous exams file with the new content.
"""
shutil.copy("new_exams.txt", "previous_exams.txt")
def run(self) -> None:
"""
Runs the website monitor continuously.
"""
logging.basicConfig(
level=os.environ.get("LOGLEVEL", "INFO"), format="%(asctime)s %(message)s"
)
log.info("Starting up Website Monitor")
while True:
try:
log.info("Checking Webpage...")
if self.page_crawler():
log.info("...Webpage has changed!")
self.send_webhook_msg()
self.overwrite_previous_content()
else:
log.info("...Webpage has not changed.")
except Exception as e:
log.info("Error checking website: %r", e)
time.sleep(DELAY_TIME_SECONDS)
if __name__ == "__main__":
tracker = Page_Tracker()
tracker.run()