-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.js
103 lines (93 loc) · 3.51 KB
/
test.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// For debugging of single URLs, change link in line 46
const puppeteer = require("puppeteer");
const keySelectorPairs = {
uni: "div.OrganisationName > h2",
title:
"#Hero > div.EssentialInformationWrapper.mdc-layout-grid.GridInContainer > section > div > header > h1",
degType:
"#Hero > div.EssentialInformationWrapper.mdc-layout-grid.GridInContainer > section > div > div.Content > div > span:nth-child(1)",
campus:
"#Hero > div.EssentialInformationWrapper.mdc-layout-grid.GridInContainer > section > div > div.Content > div > span:nth-child(2)",
duration:
"#QuickFacts > div > div:nth-child(1) > div > div.LabelContainer > div.Title > span",
tuition:
"#QuickFacts > div > div:nth-child(2) > div > div.LabelContainer > div.Title > div > div:nth-child(2) > span",
about: "#StudySummary > p",
uniRank: "#js-worldRankingReadMoreButton > span.ValueAndType > span.Value",
languages:
"#StudyKeyFacts > article.FactItem.LanguageFact.js-languageFact > div",
ects: "#StudyKeyFacts > article:nth-child(5) > div",
// disciplines:
// "#StudyKeyFacts > article.FactItem.Disciplines > a.TextOnly:not(.LandingPageLink)",
city:
"#OrganisationInformation > header > span > div > span > a:nth-child(1)",
country:
"#OrganisationInformation > header > span > div > span > a:nth-child(3)",
};
const hrefPairs = {
origLink:
"#Hero > div.EssentialInformationWrapper.mdc-layout-grid.GridInContainer > section > div > header > h1 > a",
};
const datePairs = {
applyBy:
"#QuickFacts > div > div:nth-child(3) > div > div.LabelContainer > div.Title > div > div:not(.Hidden) > time",
startDate:
"#QuickFacts > div > div:nth-child(4) > div > div.LabelContainer > div.Title > div > div:not(.Hidden) > time",
};
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
page.setViewport({ width: 1000, height: 1000 });
const prog = {
link: "https://www.mastersportal.com/studies/37198/",
};
await page.goto(prog.link);
let record = { platformLink: prog.link };
// Regular innerText values
for (let info in keySelectorPairs) {
if (await page.$(keySelectorPairs[info])) {
record[info] = await page.$eval(
keySelectorPairs[info],
(res) => res.innerText
);
} else {
record[info] = ".";
}
}
// Href values
for (let info in hrefPairs) {
if (await page.$(hrefPairs[info])) {
record[info] = await page.$eval(hrefPairs[info], (res) => res.href);
} else {
record[info] = ".";
}
}
// Date values
for (let info in datePairs) {
console.log(datePairs[info]);
if (!!(await page.$(datePairs[info]))) {
record[info] = await page.$eval(datePairs[info], (res) => {
console.log(res.getAttribute("datetime"));
let date = new Date(res.getAttribute("datetime"));
return date.toLocaleDateString("German");
});
} else {
record[info] = ".";
}
}
// Misc manual cases
if (
!!(await page.$(
"#StudyKeyFacts > article.FactItem.Disciplines > a.TextOnly:not(.LandingPageLink)"
))
) {
record.disciplines = await page.$$eval(
"#StudyKeyFacts > article.FactItem.Disciplines > a.TextOnly:not(.LandingPageLink)",
(res) => res.map((r) => r.innerText).join(", ")
);
} else {
record.disciplines = ".";
}
console.log(record);
await browser.close();
})();