-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
106 lines (85 loc) · 3.43 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/python
import pandas as pd
import re
from collections import defaultdict
from read_job_desc import job_desc
import bs4
from bs4 import BeautifulSoup
from datetime import date, timedelta, datetime
from random import randint
import requests
from collections import defaultdict
from forms import SearchBar
from flask import Blueprint, render_template, request, flash, redirect, url_for
from config import *
search_api = Blueprint('search_api', __name__)
def get_links(job_data):
job_data_flattened = []
for i in job_data:
for j in job_data[i]:
job_data_flattened.append([i, j])
return job_data_flattened
def str_to_bs4(string):
html_soup = BeautifulSoup(string, 'html.parser')
html_tags = html_soup.find_all('li')
return html_tags
def get_job_data(location):
# https://www.linkedin.com/jobs/search?keywords=software%20engineer&position=1&pageNum=0
url = "https://www.linkedin.com/jobs/search?keywords=software%20engineer&location=" + location + "&trk=homepage-jobseeker_jobs-search-bar_search-submit&position=1&pageNum=0"
html_content = requests.get(url).text
soup = BeautifulSoup(html_content, "lxml")
lists = soup.find_all("ul", {"class": "jobs-search__results-list"})
lists = str(lists[0])
print("here are the lists", lists)
regex = r'<li.*?<\/li>'
job_listings = re.findall(regex, lists, re.DOTALL)
print("the job postings whattta ja ", job_listings)
tags = []
for i in job_listings:
i.replace("\n", " ")
tag = str_to_bs4(i)
tags.append(tag)
job_data = defaultdict(list)
company_data = defaultdict(list)
for job in tags:
job =job[0]
try:
job_titles = job.find('span',{'class': 'screen-reader-text'}).text.strip()
except:
job_titles = job.find('h3',{'class': 'base-search-card__title'}).text.strip()
try:
job_links = job.find('a',{'class': 'result-card__full-card-link'})["href"]
except:
try:
job_links = job.find('a',{'class': 'base-card__full-link'})["href"]
except:
job_links = job.find('a',{'data-tracking-control-name': 'public_jobs_jserp-result_search-card'})["href"]
job_data[job_titles].append(job_links)
try:
company_titles = job.find('a',{'class': 'result-card__subtitle-link job-result-card__subtitle-link'}).text.strip()
company_links = job.find('a',{'class': 'result-card__subtitle-link job-result-card__subtitle-link'})["href"]
except:
try:
company_titles = job.find('h4',{'class': 'result-card__subtitle job-result-card__subtitle'}).text.strip()
company_links = 'https://www.google.com/search?q=' + company_titles
except:
company_titles = job.find('a',{'class': 'hidden-nested-link'}).text.strip()
company_links = 'https://www.google.com/search?q=' + company_titles
company_data[company_titles].append(company_links)
return [job_data, company_data]
@search_api.route('/search', methods=['GET', 'POST'])
def search():
search_bar = SearchBar()
print("fartsstars", request.method)
if request.method == 'POST':
if 'job' in request.form:
job = request.form['job']
location = request.form['location']
job_data = get_job_data(location)
company_data = job_data[1]
print(company_data)
job_data = job_data[0]
return render_template('search.html', jobs=get_links(job_data),company_info=get_links(company_data))
elif request.form['apply'] == 'Apply':
job_desc()
return render_template('search.html', search_bar=search_bar)