This repository has been archived by the owner on Aug 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
scrapeProfiles.js
106 lines (90 loc) · 3.51 KB
/
scrapeProfiles.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
const puppeteer = require('puppeteer');
module.exports = async(job) => {
const browser = await puppeteer.launch({
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
]
});
const page = await browser.newPage();
// Search by DPSST ID
await page.goto('http://dpsstnet.state.or.us/PublicInquiry_CJ/smsgoperson.aspx', { waitUntil: 'load' });
await page.waitFor('input[id="RadioButtonList1_1"]');
await page.$eval('input[id="RadioButtonList1_1"]', el => el.checked = '"checked"');
await page.$eval('input[name=txtFindValue]', (el, id) => el.value = id, job.data.id);
await page.click('input[type="submit"]');
// Find and click the first link
await page.waitFor('#DataGridAgcyEmp');
await page.$eval('#DataGridAgcyEmp tr:nth-child(2) a', el => el.click());
// Click Profile Report
await page.$('#TblOrgTitle');
await page.click('input[name="BtnProfile"]');
// Click Profile Report Again
await page.$('#TblOrgTitle');
await page.click('input[name="BtnProfile"]');
// Scrape Employee Info
let nameIDAgencyTable = await page.$('#TableNmAg');
const nameIDAgencyArray = await nameIDAgencyTable.evaluate(element => {
return [...element.querySelectorAll('#TableNmAg td').values()]
.map(node => node.innerText);
});
let statusTable = await page.$('#Table1');
const statusArray = await statusTable.evaluate(element => {
return [...element.querySelectorAll('#Table1 td:nth-child(2)').values()]
.map(node => node.innerText);
});
// Scrape Employment History
let employmentTable = await page.$('#DataGridEmpHst');
const employmentArray = await employmentTable.evaluate(element => {
return [...element.querySelectorAll('#DataGridEmpHst tr').values()]
.map(node => node.innerText)
.map(row => row.split('\t'))
.map(row => ({
date: row[0],
agency: row[1],
action: row[2],
rank: row[3],
classification: row[4] || '',
assignment: row[5] || ''
}));
});
// Scrape Certifications
let certificationTable = await page.$('#DataGridEmpCert');
const certificationArray = await certificationTable.evaluate(element => {
return [...element.querySelectorAll('#DataGridEmpCert tr').values()]
.map(node => node.innerText)
.map(row => row.split('\t'));
});
// Scrape Training
let trainingTable = await page.$('#DataGrid1');
const trainingArray = await trainingTable.evaluate(element => {
return [...element.querySelectorAll('#DataGrid1 tr').values()]
.map(node => node.innerText)
.map(row => row.split('\t'));
});
// Scrape Attributes
let attributesTable = await page.$('#DataGridEmpAttr');
const attributesArray = await attributesTable.evaluate(element => {
return [...element.querySelectorAll('#DataGridEmpAttr tr').values()]
.map(node => node.innerText)
.map(row => row.split('\t'));
});
// Scrape Education
let educationTable = await page.$('#DataGridEmpEduc');
const educationArray = await educationTable.evaluate(element => {
return [...element.querySelectorAll('#DataGridEmpEduc tr').values()]
.map(node => node.innerText)
.map(row => row.split('\t'));
});
await browser.close();
console.log([
...nameIDAgencyArray,
...statusArray,
...employmentArray,
...certificationArray,
...trainingArray,
...attributesArray,
...educationArray
]);
return Promise.resolve({ nameIDAgencyArray, statusArray, employmentArray, certificationArray, trainingArray, attributesArray, educationArray });
};