diff --git a/src/__tests__/utils.js b/src/__tests__/utils.js index a3ca73dfd..b60b98b70 100644 --- a/src/__tests__/utils.js +++ b/src/__tests__/utils.js @@ -5,7 +5,13 @@ * currently not supported in the browser lib). */ -import { _copyAndTruncateStrings, _info, loadScript } from '../utils' +import { _copyAndTruncateStrings, _info, _isBlockedUA, DEFAULT_BLOCKED_UA_STRS, loadScript } from '../utils' + +function userAgentFor(botString) { + const randOne = (Math.random() + 1).toString(36).substring(7) + const randTwo = (Math.random() + 1).toString(36).substring(7) + return `Mozilla/5.0 (compatible; ${botString}/${randOne}; +http://a.com/bot/${randTwo})` +} describe(`utils.js`, () => { it('should have $host and $pathname in properties', () => { @@ -208,4 +214,15 @@ describe('loadScript', () => { new_script.onerror('uh-oh') expect(callback).toHaveBeenCalledWith('uh-oh') }) + + describe('user agent blocking', () => { + it.each(DEFAULT_BLOCKED_UA_STRS.concat('testington'))( + 'blocks a bot based on the user agent %s', + (botString) => { + const randomisedUserAgent = userAgentFor(botString) + + expect(_isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true) + } + ) + }) }) diff --git a/src/utils.ts b/src/utils.ts index f4a0a2772..e98058d73 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -461,7 +461,7 @@ export const _utf8Encode = function (string: string): string { return utftext } -const BLOCKED_UA_STRS = [ +export const DEFAULT_BLOCKED_UA_STRS = [ 'ahrefsbot', 'applebot', 'baiduspider', @@ -503,18 +503,13 @@ const BLOCKED_UA_STRS = [ 'storebot-google', ] -let botRegex: RegExp | null = null // _.isBlockedUA() // This is to block various web spiders from executing our JS and // sending false capturing data export const _isBlockedUA = function (ua: string, customBlockedUserAgents: string[]): boolean { - if (botRegex === null) { - // convert BLOCKED_UA_STRS to a regex like bot.php|hubspot|crawler|prerender etc.: - const joinedBots = BLOCKED_UA_STRS.concat(customBlockedUserAgents).join('|') - botRegex = new RegExp(joinedBots, 'i') - } - - return !botRegex.test(ua) + return DEFAULT_BLOCKED_UA_STRS.concat(customBlockedUserAgents).some((blockedUA) => { + return ua.includes(blockedUA) + }) } /**