-
Notifications
You must be signed in to change notification settings - Fork 1
/
empregaCrawler.py
52 lines (38 loc) · 1.59 KB
/
empregaCrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen
import csv
# INSIDE FILTROS YOU PUT THE VACCANCIE KEYWORD YOU WANT TO SEARCH
filtros = ['ASSISTENTE', 'OPERADOR', 'GERAIS', 'SERVIÇOS']
cidades = ['CAMPINAS']
# THIS IS HOW WE PREPARE A FILE TO RECEIVE OUR EXTRACTED DATA
planilha = csv.writer(open('vagas.csv', 'w'))
def pegaVagas(paginasMax):
count = 1
total = 0
for i in range(paginasMax):
page = 'http://empregacampinas.com.br/categoria/vaga/page/' + \
str(count)
pageLoad = urlopen(page)
pageCode = pageLoad.read()
pageLoad.close()
pageSoup = soup(pageCode, 'html.parser')
vagas = pageSoup.findAll('div', {'class': 'col-lg-12'})
# print(vagas) #FOR DEBUG ONLY, TO VERIFY THE HTML PAGE YOU'RE GETTING
for vaga in vagas:
try:
vagaCargo = vaga.find('a', {'class': 'thumbnail'}).get('title')
vagaLink = vaga.find('a', {'class': 'thumbnail'}).get('href')
listaDinamica = vagaCargo.split()
# print(listaDinamica)
if any(cidade in cidades for cidade in listaDinamica):
if any(palavra in filtros for palavra in listaDinamica):
planilha.writerow([vagaCargo, vagaLink])
total += 1
print(vagaCargo)
# print(vagaLink + '\n')
except:
continue
count += 1
print(total, 'vagas foram encontradas. Boa sorte!')
# INSERT AMOUNT OF PAGES YOU WANT TO EXPLORE
pegaVagas(40)