-
Notifications
You must be signed in to change notification settings - Fork 0
/
enamad_scraper.py
59 lines (50 loc) · 1.9 KB
/
enamad_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from bs4 import BeautifulSoup
from requests import get
import re
emails = []
sites = []
with open('sites.txt') as f:
sites = f.readlines()
for site in sites:
site = site.replace('\n', '')
print('<<< === ', site, ' === >>>')
try:
r = get(site)
if r.status_code != 200:
continue
content = r.text
for line in content.split('<div'):
if 'trustseal.enamad.ir' in line:
link_uniq = 'https://trustseal.enamad.ir/?id='
if 'Verify.aspx' in line:
link_uniq = 'https://trustseal.enamad.ir/Verify.aspx'
i = line.index(link_uniq)
j = i + 80
if '"' in line[i:]:
j = line[i:].index('"')
elif '"' in line[i:]:
j = line[i:].index('"')
saman_link = line[i:i+j].replace('&', '&')
headers = {'Referer': site}
r2 = get(saman_link, verify=False, headers=headers)
soup = BeautifulSoup(r2.content, "html.parser")
email = None
if 'Verify.aspx' in saman_link:
tbody = soup.find(id='subContent1')
trows = tbody.find_all('tr')
items = []
for tr in trows:
for td in tr.find_all('td'):
items.append(td.text)
else:
items = soup.select("div.licontent")
items = [i.text for i in items]
items = [" ".join(i.split()) for i in items]
email = [i for i in items if '[at]' in i][0]
email = email.replace('[at]', '@')
print(email)
emails.append(email)
break
except Exception as e:
print(e)
print('\n'.join(emails), file=open('emails.txt', 'w+'))