-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
67 lines (56 loc) · 2.13 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// Import Node dependencies
const axios = require('axios');
// Import local dependencies
const { logger } = require('./logger');
/**
* Fetch the latest manga chapter from TCBScans asynchronously.
*
* Scrap the homepage of TCBScans and search for data matching the given manga name.
* The data fetched from TCBScans are the URL to the chapter page as well as the chapter number.
* Finally, the data is returned in a JSON object wrapped into a Promise.
* If the data could not be fetch, an error will be returned.
*
* @since 1.0.0
* @access public
*
* @param {string} manga The manga name in Title Case.
* @param {string} website The url of TCBScans homepage.
*
* @return {Object} A Promise object containing a JSON object.
*/
async function fetchLatestChapter(manga, website) {
const html = (await axios.get(website)).data.split('\n');
const tagPrefix = '<a href="';
logger.info('Start scraping scan website', { website, manga });
for (const line of html) {
if (line.includes(manga) && line.includes(tagPrefix)) {
logger.info('Successfully found manga entry', { website, manga });
// Extract the chapter URL from the current line
const urlStart = line.indexOf(tagPrefix);
let url = line.slice(urlStart + tagPrefix.length);
url = url.slice(0, url.indexOf('">'));
const chapNumStart = url.lastIndexOf('-');
if (chapNumStart === -1) {
return {
// Remove excess slashes from the URL
url: (website + url).replace(/([^:]\/)\/+/g, '$1'),
};
}
// Extract the chapter number from the current line
let chapNum = url.slice(chapNumStart + 1);
if (chapNum.charAt(chapNum.length) === '\\') {
// Remove the slash at the end
chapNum = chapNum.slice(0, -1);
}
return {
chapter: chapNum,
// Remove excess slashes from the URL
url: (website + url).replace(/([^:]\/)\/+/g, '$1'),
};
}
}
logger.error('Scraping failed', { website, manga });
// There is no entry for the given manga on TCB's homepage
return { error: 'NOT_FOUND' };
}
module.exports = { fetchLatestChapter };