-
Notifications
You must be signed in to change notification settings - Fork 0
/
thetvdb.js
192 lines (173 loc) · 6.38 KB
/
thetvdb.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/* eslint-disable eqeqeq */
/*
Yarr is an Electron program used to try and rename files using metadata
scrapped from the internet.
Copyright (C) 2020 James Mackie
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
const JSSoup = require('jssoup').default
const r2 = require('r2')
function getMetadataFromSource (tvShowMetadataURL) {
return new Promise((resolve, reject) => {
// Some kind of weird bug with R2 and certain web pages:
// https://github.com/mikeal/r2/issues/45
r2(tvShowMetadataURL).response
// First wait for the R2 reponse promise to resolve,
.then(response => {
// R2 response and helper functions just return promises, to be used with await
// R2.response.text() is a promise that returns the response body. We return it so
// that we can attach a new callback handler via a '.then()' call.
return response.text()
}, fail => {
console.error(fail)
reject(fail)
})
// and then take the response text and convert into JSSoup
.then(responseText => {
// console.log("My response text: " + responseText)
const metaDataSoup = new JSSoup(responseText)
const episodeInfo = processSoup(metaDataSoup)
resolve(episodeInfo)
})
})
}
function processSoup (soup) {
// We need to find all the listed episodes in the response text and return it as a dict of
// episode ids and names
// We are using a simple dict for now, but if we need to get more metadata per episode we
// may have to use a list of dicts.
const episodes = {}
const episodeSoup = soup.findAll('span', undefined, /S(\d{1,2})E(\d{1,2})/)
if (episodeSoup.length === 0) return null
// Iterate through our matched SoupTags and extract the episodeId and name, add them to the dict.
for (const episode of episodeSoup) {
const episodeId = episode.text.trim()
const episodeName = episode.nextElement.nextElement.text.trim()
episodes[episodeId] = episodeName
}
// Now we pull the exact show name from the page
const showName = soup.find('div', 'crumbs').nextElement.nextElement.nextElement.nextElement.nextElement.nextElement.nextElement.text.trim()
// Add it to the dict under a special key
episodes.showName = showName
return episodes
}
exports.getMetadataFromSource = getMetadataFromSource
// JSSoup doesn't support regex matching on the text,
// so we are going to provide that functionality by overriding
// the function (if we can). Extremely experimental. We'll be copying the whole of the
// SoupStrainer class, and overwriting the findAll function of JSSoup
JSSoup.prototype.findAll = function (name = undefined, attrs = undefined, string = undefined) {
var results = []
var strainer = new SoupStrainer2(name, attrs, string)
var descendants = this.descendants
for (var i = 0; i < descendants.length; ++i) {
if (descendants[i] instanceof JSSoup.__proto__) {
var tag = strainer.match(descendants[i])
if (tag) {
results.push(tag)
}
}
}
return results
}
class SoupStrainer2 {
constructor (name, attrs, string) {
if (typeof attrs === 'string') {
attrs = { class: [attrs] }
} else if (Array.isArray(attrs)) {
attrs = { class: attrs }
} else if (attrs && attrs.class && typeof attrs.class === 'string') {
attrs.class = [attrs.class]
}
if (attrs && attrs.class) {
for (var i = 0; i < attrs.class.length; ++i) {
attrs.class[i] = attrs.class[i].trim()
}
}
this.name = name
this.attrs = attrs
this.string = string
}
match (tag) {
// match string
if (this.name == undefined && this.attrs == undefined) {
if (this.string && tag.string) {
if (this._matchName(tag.string, this.string)) {
return tag.string
} else {
return null
}
}
return tag
}
// match tag name
var match = this._matchName(tag.name, this.name)
if (!match) return null
// match string
match = this._matchName(tag.string, this.string)
if (!match) return null
// match attributes
if (typeof this.attrs === 'object') {
if (!this._isEmptyObject(this.attrs)) {
var props = Object.getOwnPropertyNames(this.attrs)
var found = false
for (var i = 0; i < props.length; ++i) {
if (props[i] in tag.attrs && this._matchAttrs(props[i], tag.attrs[props[i]], this.attrs[props[i]])) {
found = true
break
}
}
if (!found) return null
}
}
return tag
}
_matchName (tagItem, name) {
// if name is undefined or empty, then we'll treat it as a wildcard and return true
if (name == undefined || name == null) return true
// if tagItem is undefined or null, and name is not then we'll treat it as not matching
if (tagItem == undefined || tagItem == null) return false
// if name is an array, then tag match any item in this array is a match.
if (Array.isArray(name)) {
for (var i = 0; i < name.length; ++i) {
var match = this._matchName(tagItem, name[i])
if (match) return true
}
return false
}
// if name is a RegExp see if the tag item matches it
if (name instanceof RegExp) {
return tagItem.toString().match(name)
}
return tagItem == name
}
_matchAttrs (name, candidateAttrs, attrs) {
if (typeof candidateAttrs === 'string') {
if (name == 'class') {
candidateAttrs = candidateAttrs.replace(/\s\s+/g, ' ').trim().split(' ')
} else {
candidateAttrs = [candidateAttrs]
}
}
if (typeof attrs === 'string') {
attrs = [attrs]
}
for (var i = 0; i < attrs.length; ++i) {
if (candidateAttrs.indexOf(attrs[i]) < 0) { return false }
}
return true
}
_isEmptyObject (obj) {
return Object.keys(obj).length == 0
}
}