Skip to content

Commit

Permalink
v1.2.0
Browse files Browse the repository at this point in the history
 - Overall codebase enhancements and performance improvements
 - Compatibility with `[email protected]`
 - Update NPM dependencies
 - Update examples in the docs
  • Loading branch information
dr-dimitru committed Jun 2, 2017
1 parent 69c67c8 commit 013fe63
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 55 deletions.
52 changes: 27 additions & 25 deletions .versions
Original file line number Diff line number Diff line change
@@ -1,49 +1,51 @@
[email protected]
babel-compiler@6.14.1
babel-compiler@6.19.1
[email protected]
[email protected]
[email protected]
blaze@2.1.8
[email protected].9
boilerplate-generator@1.0.11
blaze@2.3.2
[email protected].10
boilerplate-generator@1.1.0
[email protected]
[email protected].4
[email protected].5
[email protected]
[email protected].3
[email protected].4
[email protected]
[email protected].13
[email protected].14
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected].10
[email protected].10
[email protected].11
[email protected].11
[email protected]
[email protected]
local-test:ostrio:spiderable-middleware@1.1.0
local-test:ostrio:spiderable-middleware@1.2.0
[email protected]
[email protected]
minimongo@1.0.20
modules@0.7.9
modules-runtime@0.7.9
[email protected].15
minimongo@1.2.0
modules@0.9.0
modules-runtime@0.8.0
[email protected].18
[email protected]
[email protected].16_1
[email protected].15
[email protected].24
[email protected].16
[email protected]
ostrio:spiderable-middleware@1.1.0
[email protected].8
ostrio:spiderable-middleware@1.2.0
[email protected].9
[email protected]
[email protected]
[email protected]
[email protected]
[email protected].12
spacebars-compiler@1.0.12
[email protected].15
spacebars-compiler@1.1.2
[email protected]
[email protected].2
[email protected].11
[email protected].3
[email protected].13
[email protected]
[email protected].13
[email protected].16
[email protected]
68 changes: 44 additions & 24 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,44 @@
var url = require('url');
var request = require('request');
module.exports = (function() {
var url = require('url');
var request = require('request');
var re = {
proto: /^https?:\/\//i,
trailingSlash: /\/$/,
beginningSlash: /^\//,
staticExt: /\.(?:3ds|3g2|3gp|3gpp|7z|a|aac|aaf|adp|ai|aif|aiff|alz|ape|apk|appcache|ar|arj|asf|asx|atom|au|avchd|avi|bak|bbaw|bh|bin|bk|bmp|btif|bz2|bzip2|cab|caf|cco|cgm|class|cmx|cpio|cr2|crt|crx|css|csv|cur|dat|deb|der|dex|djvu|dll|dmg|dng|doc|docm|docx|dot|dotm|dra|drc|DS_Store|dsk|dts|dtshd|dvb|dwg|dxf|ear|ecelp4800|ecelp7470|ecelp9600|egg|eol|eot|eps|epub|exe|f4a|f4b|f4p|f4v|fbs|fh|fla|flac|fli|flv|fpx|fst|fvt|g3|geojson|gif|graffle|gz|gzip|h261|h263|h264|hqx|htc|ico|ief|img|ipa|iso|jad|jar|jardiff|jng|jnlp|jpeg|jpg|jpgv|jpm|js|jxr|key|kml|kmz|ktx|less|lha|lvp|lz|lzh|lzma|lzo|m2v|m3u|m4a|m4p|m4v|map|manifest|mar|markdown|md|mdi|mdown|mdwn|mht|mid|midi|mj2|mka|mkd|mkdn|mkdown|mkv|mml|mmr|mng|mobi|mov|movie|mp2|mp3|mp4|mp4a|mpe|mpeg|mpg|mpga|mpv|msi|msm|msp|mxf|mxu|nef|npx|nsv|numbers|o|oex|oga|ogg|ogv|opus|otf|pages|pbm|pcx|pdb|pdf|pea|pem|pgm|pic|pl|pm|png|pnm|pot|potm|potx|ppa|ppam|ppm|pps|ppsm|ppsx|ppt|pptm|pptx|prc|ps|psd|pya|pyc|pyo|pyv|qt|ra|rar|ras|raw|rdf|rgb|rip|rlc|rm|rmf|rmvb|ron|roq|rpm|rss|rtf|run|rz|s3m|s7z|safariextz|scpt|sea|sgi|shar|sil|sit|slk|smv|so|sub|svg|svgz|svi|swf|tar|tbz|tbz2|tcl|tga|tgz|thmx|tif|tiff|tk|tlz|topojson|torrent|ttc|ttf|txt|txz|udf|uvh|uvi|uvm|uvp|uvs|uvu|vcard|vcf|viv|vob|vtt|war|wav|wax|wbmp|wdp|weba|webapp|webm|webmanifest|webp|whl|wim|wm|wma|wml|wmlc|wmv|wmx|woff|woff2|wvx|xbm|xif|xla|xlam|xloc|xls|xlsb|xlsm|xlsx|xlt|xltm|xltx|xm|xmind|xml|xpi|xpm|xsl|xwd|xz|yuv|z|zip|zipx)(?:\?[a-zA-Z0-9\-\.\_\~\:\/\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=]*)?$/i
};

module.exports = (function () {
function Spiderable(opts) {
if (opts == null) {
if (!opts || Object.prototype.toString.call(opts) !== '[object Object]') {
opts = {};
}

var bots = opts.bots;
this.auth = opts.auth;
var ignore = opts.ignore;
this.botsUA = opts.botsUA || Spiderable.prototype.botsUA;
this.rootURL = opts.rootURL;
this.staticExt = opts.staticExt || re.staticExt;
this.serviceURL = opts.serviceURL;

if (Object.prototype.toString.call(this.staticExt) !== '[object RegExp]') {
console.warn('[Spiderable-Middleware] `opts.staticExt` must be instance of RegExp, falling back to defaults.');
this.staticExt = re.staticExt;
}

if (Object.prototype.toString.call(this.botsUA) !== '[object Array]') {
console.warn('[Spiderable-Middleware] `opts.botsUA` must be instance of Array, falling back to defaults.');
this.botsUA = this.prototype.botsUA;
}

if (!this.handler) {
this.handler = this.middleware;
}

if (!this.handle) {
this.handle = this.middleware;
}

var defBots = ['googlebot', 'yahoo', 'bingbot', 'facebookexternalhit', 'twitterbot', 'rogerbot', 'linkedinbot', 'embedly', 'quora link preview', 'showyoubot', 'outbrain', 'pinterest', 'developers.google.com/+/web/snippet', 'slackbot', 'vkShare', 'W3C_Validator', 'redditbot', 'Applebot', 'WhatsApp', 'flipboard', 'yandex', 'google-structured-data-testing-tool', 'MJ12Bot', 'tweetmemeBot', 'baiduSpider', 'Mail\.RU_Bot', 'ahrefsBot', 'SiteLockSpider', 'visionutils', 'tumblr', 'bitlybot', 'SkypeUriPreview', 'nuzzel', 'Discordbot'];

if (bots) {
defBots = defBots.concat(bots);
}

this.botsRE = new RegExp(defBots.join('|'), 'i');
this.botsRE = new RegExp(this.botsUA.join('|'), 'i');
if (!this.auth) {
this.auth = process.env.SPIDERABLE_SERVICE_AUTH || process.env.PRERENDER_SERVICE_AUTH || '';
}
Expand All @@ -50,16 +63,16 @@ module.exports = (function() {
throw new Error('[Spiderable-Middleware] {serviceURL} or env variable SPIDERABLE_SERVICE_URL or PRERENDER_SERVICE_URL is not detected! But must be specified!');
}

if (!/^http(s)?:\/\//i.test(this.rootURL)) {
if (!re.proto.test(this.rootURL)) {
throw new Error('[Spiderable-Middleware] {rootURL} is malformed! Must start with protocol http or https');
}

if (!/^http(s)?:\/\//i.test(this.serviceURL)) {
if (!re.proto.test(this.serviceURL)) {
throw new Error('[Spiderable-Middleware] {serviceURL} is malformed! Must start with protocol http or https');
}

this.rootURL = this.rootURL.replace(/\/$/, '').replace(/^\//, '');
this.serviceURL = this.serviceURL.replace(/\/$/, '').replace(/^\//, '');
this.rootURL = this.rootURL.replace(re.trailingSlash, '').replace(re.beginningSlash, '');
this.serviceURL = this.serviceURL.replace(re.trailingSlash, '').replace(re.beginningSlash, '');

request.defaults({
proxy: this.serviceURL
Expand All @@ -72,7 +85,9 @@ module.exports = (function() {
}
}

Spiderable.prototype.middleware = function(req, res, next) {
Spiderable.prototype.botsUA = ['\\.net crawler', '360spider', '50\\.nu', '8bo crawler bot', 'aboundex', 'accoona', 'adldxbot', 'adsbot-google', 'ahrefsbot', 'altavista', 'appengine-google', 'applebot', 'archiver', 'arielisbot', 'ask jeeves', 'auskunftbot', 'baidumobaider', 'baiduspider', 'becomebot', 'bingbot', 'bingpreview', 'bitbot', 'bitlybot', 'blitzbot', 'blogbridge', 'boardreader', 'botseer', 'catchbot', 'catchpoint bot', 'charlotte', 'checklinks', 'cliqzbot', 'clumboot', 'coccocbot', 'converacrawler', 'crawl-e', 'crawlconvera', 'dataparksearch', 'daum', 'deusu', 'developers\\.google\\.com/+/web/snippet', 'discordbot', 'dotbot', 'duckduckbot', 'elefent', 'embedly', 'evernote', 'exabot', 'facebookbot', 'facebookexternalhit', 'fatbot', 'fdse robot', 'feed seeker bot', 'feedfetcher', 'femtosearchbot', 'findlinks', 'flamingo_searchengine', 'flipboard', 'followsite bot', 'furlbot', 'fyberspider', 'gaisbot', 'galaxybot', 'geniebot', 'genieo', 'gigablast', 'gigabot', 'girafabot', 'gomezagent', 'gonzo1', 'google sketchup', 'google-structured-data-testing-tool', 'googlebot', 'haosouspider', 'heritrix', 'holmes', 'hoowwwer', 'htdig', 'ia_archiver', 'idbot', 'infuzapp', 'innovazion crawler', 'instagram', 'internetarchive', 'iqdb', 'iskanie', 'istellabot', 'izsearch\\.com', 'kaloogabot', 'kaz\\.kz_bot', 'kd bot', 'konqueror', 'kraken', 'kurzor', 'larbin', 'leia', 'lesnikbot', 'linguee bot', 'linkaider', 'linkapediabot', 'linkedinbot', 'lite bot', 'llaut', 'lookseek', 'lycos', 'mail\\.ru_bot', 'masidani_bot', 'masscan', 'mediapartners-google', 'metajobbot', 'mj12bot', 'mnogosearch', 'mogimogi', 'mojeekbot', 'motominerbot', 'mozdex', 'msiecrawler', 'msnbot', 'msrbot', 'netpursual', 'netresearch', 'netvibes', 'newsgator', 'ng-search', 'nicebot', 'nutchcvs', 'nuzzel', 'nymesis', 'objectssearch', 'odklbot', 'omgili', 'oovoo', 'oozbot', 'openfosbot', 'orangebot', 'orbiter', 'org_bot', 'outbrain', 'pagepeeker', 'pagesinventory', 'parsijoobot', 'paxleframework', 'peeplo screenshot bot', 'pinterest', 'plantynet_webrobot', 'plukkie', 'pompos', 'psbot', 'quora link preview', 'qwantify', 'read%20later', 'reaper', 'redcarpet', 'redditbot', 'retreiver', 'riddler', 'rival iq', 'rogerbot', 'saucenao', 'scooter', 'scrapy', 'scrubby', 'searchie', 'searchsight', 'seekbot', 'semanticdiscovery', 'seznambot', 'showyoubot', 'simplepie', 'simpy', 'sitelockspider', 'skypeuripreview', 'slack-imgproxy', 'slackbot', 'slurp', 'snappy', 'sogou', 'solofield', 'speedy spider', 'speedyspider', 'sputnikbot', 'stackrambler', 'teeraidbot', 'teoma', 'theusefulbot', 'thumbshots\\.ru', 'thumbshotsbot', 'tineye', 'toweya\\.com', 'toweyabot', 'tumblr', 'tweetedtimes', 'tweetmemebot', 'twitterbot', 'url2png', 'vagabondo', 'vebidoobot', 'viber', 'visionutils', 'vkshare', 'voilabot', 'vortex', 'votay bot', 'voyager', 'w3c_validator', 'wasalive\\.bot', 'web-sniffer', 'websquash\\.com', 'webthumb', 'whatsapp', 'whatweb', 'wire', 'wotbox', 'yacybot', 'yahoo', 'yandex', 'yeti', 'yisouspider', 'yodaobot', 'yooglifetchagent', 'yoozbot', 'yottaamonitor', 'yowedo', 'zao-crawler', 'zebot_www\\.ze\\.bz', 'zooshot', 'zyborg'];

Spiderable.prototype.middleware = function (req, res, next) {
var urlObj = url.parse(req.url, true);
var hasIgnored = false;

Expand All @@ -87,13 +102,18 @@ module.exports = (function() {
if ((urlObj.query._escaped_fragment_ !== void 0 || this.botsRE.test(req.headers['user-agent'])) && !hasIgnored) {
var reqUrl = this.rootURL;

urlObj.path = urlObj.path.replace(/\/$/, '').replace(/^\//, '');
urlObj.path = urlObj.path.replace(re.trailingSlash, '').replace(re.beginningSlash, '');
if (urlObj.query._escaped_fragment_ !== void 0 && urlObj.query._escaped_fragment_.length) {
urlObj.pathname += '/' + urlObj.query._escaped_fragment_.replace(/^\//, '');
urlObj.pathname += '/' + urlObj.query._escaped_fragment_.replace(re.beginningSlash, '');
}

reqUrl += '/' + urlObj.pathname;
reqUrl = reqUrl.replace(/([^:]\/)\/+/g, '$1');

if (this.staticExt.test(req.url)) {
return next();
}

var opts = {
url: (this.serviceURL + '/?url=' + encodeURIComponent(reqUrl)).replace(/([^:]\/)\/+/g, '$1')
};
Expand All @@ -107,26 +127,26 @@ module.exports = (function() {
}

try {
req.on('error', function(error) {
req.on('error', function (error) {
console.warn('[Spiderable-Middleware] [REQ] Unexpected error:', error);
next();
});

res.on('error', function(error) {
res.on('error', function (error) {
console.warn('[Spiderable-Middleware] [RES] Unexpected error:', error);
next();
});

request.get(opts, function(error) {
request.get(opts, function (error) {
if (error) {
console.warn('[Spiderable-Middleware] [request.get] Error while connecting to external service:', error);
next();
}
}).on('response', function(response) {
}).on('response', function (response) {
if (response.statusCode === 401) {
console.warn('[Spiderable-Middleware] Can\'t authenticate! Please check you "auth" parameter and other settings.');
}
}).pipe(res).on('error', function(error) {
}).pipe(res).on('error', function (error) {
console.warn('[Spiderable-Middleware] Unexpected error:', error);
next();
});
Expand Down
8 changes: 4 additions & 4 deletions package.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package.describe({
name: 'ostrio:spiderable-middleware',
version: '1.1.0',
version: '1.2.0',
summary: 'Allow your JavaScript website to be crawled perfectly by search engines.',
git: 'https://github.com/VeliovGroup/spiderable-middleware',
documentation: 'README.md'
Expand All @@ -9,15 +9,15 @@ Package.describe({
Package.onUse(function(api) {
api.versionsFrom('1.4');
api.use('ecmascript', 'server');
api.mainModule('./lib/meteor.js', 'server');
api.mainModule('lib/meteor.js', 'server');
});

Package.onTest(function(api) {
api.use(['tinytest', 'ecmascript']);
api.use(['ostrio:spiderable-middleware', 'underscore'], 'server');
api.addFiles('./lib/meteor-tests.js', 'server');
api.addFiles('lib/meteor-tests.js', 'server');
});

Npm.depends({
'spiderable-middleware': '1.1.0'
'spiderable-middleware': '1.2.0'
});
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "spiderable-middleware",
"version": "1.1.0",
"version": "1.2.0",
"description": "Allow your JavaScript website to be crawled perfectly by search engines.",
"main": "./lib/index.js",
"scripts": {
Expand Down Expand Up @@ -38,6 +38,6 @@
},
"homepage": "https://github.com/VeliovGroup/spiderable-middleware",
"dependencies": {
"request": "^2.80.0"
"request": "^2.81.0"
}
}

0 comments on commit 013fe63

Please sign in to comment.