diff --git a/packages/adblocker-webextension/src/index.ts b/packages/adblocker-webextension/src/index.ts index b1a63c0dfc..d447d62eb8 100644 --- a/packages/adblocker-webextension/src/index.ts +++ b/packages/adblocker-webextension/src/index.ts @@ -12,7 +12,6 @@ import { parse } from 'tldts-experimental'; import { FiltersEngine, HTMLSelector, - HTMLModifier, isUTF8, Request, StreamingHtmlFilter, @@ -136,13 +135,12 @@ export function filterRequestHTML( filterResponseData: Browser['webRequest']['filterResponseData'], { id }: { id: string }, rules: HTMLSelector[], - modifiers: HTMLModifier[], ): void { // Create filter to observe loading of resource const filter = filterResponseData(id) as StreamFilter; const decoder = new TextDecoder(); const encoder = new TextEncoder(); - const htmlFilter = new StreamingHtmlFilter(rules, modifiers); + const htmlFilter = new StreamingHtmlFilter(rules); const teardown = (event: { data?: ArrayBuffer }) => { // Before disconnecting our streaming filter, we need to be extra careful @@ -408,14 +406,8 @@ export class WebExtensionBlocker extends FiltersEngine { typeof TextEncoder !== 'undefined' ) { const htmlFilters = this.getHtmlFilters(request); - const htmlModifiers = this.getHtmlModifiers(request); - if (htmlFilters.length !== 0 && htmlModifiers.length !== 0) { - filterRequestHTML( - browser.webRequest.filterResponseData, - request, - htmlFilters, - htmlModifiers, - ); + if (htmlFilters.length !== 0) { + filterRequestHTML(browser.webRequest.filterResponseData, request, htmlFilters); } } } diff --git a/packages/adblocker/src/engine/bucket/network.ts b/packages/adblocker/src/engine/bucket/network.ts index 5d02ef9106..2dec8df505 100644 --- a/packages/adblocker/src/engine/bucket/network.ts +++ b/packages/adblocker/src/engine/bucket/network.ts @@ -187,7 +187,7 @@ export default class NetworkFilterBucket { return this.badFiltersIds.has(filter.getId()); } - public getHtmlModifiers( + public getHTMLFilters( request: Request, isFilterExcluded?: (filter: NetworkFilter) => boolean, ): NetworkFilter[] { diff --git a/packages/adblocker/src/engine/engine.ts b/packages/adblocker/src/engine/engine.ts index af832cc126..0d386f89f4 100644 --- a/packages/adblocker/src/engine/engine.ts +++ b/packages/adblocker/src/engine/engine.ts @@ -19,7 +19,7 @@ import { fetchResources, fullLists, } from '../fetch.js'; -import { HTMLModifier, HTMLSelector } from '../html-filtering.js'; +import { HTMLSelector } from '../html-filtering.js'; import CosmeticFilter from '../filters/cosmetic.js'; import NetworkFilter from '../filters/network.js'; import { block } from '../filters/dsl.js'; @@ -95,11 +95,16 @@ type NetworkFilterMatchingContext = { filterType: FilterType.NETWORK; }; -type CosmeticFilterMatchingContext = { - url: string; - callerContext: any; // Additional context given from user - filterType: FilterType.COSMETIC; -}; +type CosmeticFilterMatchingContext = + | { + url: string; + callerContext: any; // Additional context given from user + filterType: FilterType.COSMETIC; + } + | { + request: Request; // For HTML Filters + filterType: FilterType.COSMETIC; + }; type NetworkFilterMatchEvent = (request: Request, result: BlockingResponse) => void; @@ -835,81 +840,86 @@ export default class FilterEngine extends EventEmitter { /** * Return a list of HTML filtering rules. */ - public getHtmlFilters({ - // Page information - url, - hostname, - domain, - - callerContext, - }: { - url: string; - hostname: string; - domain: string | null | undefined; - - callerContext?: any | undefined; - }): HTMLSelector[] { + public getHtmlFilters(request: Request): HTMLSelector[] { const htmlSelectors: HTMLSelector[] = []; - if (this.config.enableHtmlFiltering === false || this.config.loadCosmeticFilters === false) { + if (this.config.enableHtmlFiltering === false) { return htmlSelectors; } - domain ||= ''; + if (this.config.loadCosmeticFilters === true) { + const domain = request.domain || ''; - const { filters, unhides } = this.cosmetics.getHtmlFilters({ - domain, - hostname, - isFilterExcluded: this.isFilterExcluded.bind(this), - }); - const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide])); + const { filters, unhides } = this.cosmetics.getHtmlFilters({ + domain, + hostname: request.hostname, + isFilterExcluded: this.isFilterExcluded.bind(this), + }); + const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide])); - for (const filter of filters) { - const extended = filter.getExtendedSelector(); - if (extended === undefined) { - continue; - } - const exception = exceptions.get(filter.getSelector()); - if (exception !== undefined) { - htmlSelectors.push(extended); + for (const filter of filters) { + const extended = filter.getExtendedSelector(); + if (extended === undefined) { + continue; + } + const exception = exceptions.get(filter.getSelector()); + if (exception !== undefined) { + htmlSelectors.push(extended); + } + this.emit( + 'filter-matched', + { filter, exception }, + { + request, + filterType: FilterType.COSMETIC, + }, + ); } - this.emit( - 'filter-matched', - { filter, exception }, - { - url, - callerContext, - filterType: FilterType.COSMETIC, - }, - ); } - if (htmlSelectors.length !== 0) { - this.emit('html-filtered', htmlSelectors, url); - } + if (this.config.loadNetworkFilters === true) { + const replaceFilters = this.filters.getHTMLFilters( + request, + this.isFilterExcluded.bind(this), + ); - return htmlSelectors; - } + if (replaceFilters.length !== 0) { + const exception = this.exceptions.match(request, this.isFilterExcluded.bind(this)); + let modifiers = []; + for (const filter of replaceFilters) { + const modifier = filter.getHtmlModifier(); - public getHtmlModifiers(request: Request): HTMLModifier[] { - const htmlModifiers: HTMLModifier[] = []; + if (modifier !== null) { + if (!exception) { + modifiers.push(['replace', modifier]); + } - if (this.config.enableHtmlFiltering === false || this.config.loadNetworkFilters === false) { - return htmlModifiers; + this.emit( + 'filter-matched', + { filter, exception }, + { + request, + filterType: FilterType.COSMETIC, + }, + ); + } else { + // Disable all replace modifiers if empty replace modifier found + modifiers = []; + break; + } + } + } + + if (modifiers.length !== 0) { + htmlSelectors.push(...modifiers.map((modifier) => ['replace', modifier])); + } } - const filters = this.filters.getHtmlModifiers(request, this.isFilterExcluded.bind(this)); - - for (const filter of filters) { - const modifier = filter.getHtmlModifier(); - // Disable all replace modifiers if empty replace modifier found - if (modifier === null) { - return []; - } - htmlModifiers.push(modifier); + if (htmlSelectors.length !== 0) { + this.emit('html-filtered', htmlSelectors, request.url); } - return htmlModifiers; + return htmlSelectors; } /** diff --git a/packages/adblocker/src/html-filtering.ts b/packages/adblocker/src/html-filtering.ts index 374603ea40..4331393f67 100644 --- a/packages/adblocker/src/html-filtering.ts +++ b/packages/adblocker/src/html-filtering.ts @@ -10,7 +10,11 @@ // which is able to consume an HTML document over time and filter part of it // using adblocker selectors. -export type HTMLSelector = readonly ['script', readonly string[]]; +export type HTMLModifier = readonly [RegExp, string]; + +export type HTMLSelector = + | readonly ['script', readonly string[]] + | readonly ['replace', HTMLModifier]; export function extractHTMLSelectorFromRule(rule: string): HTMLSelector | undefined { if (rule.startsWith('^script') === false) { @@ -139,7 +143,10 @@ type Patterns = readonly [readonly string[], readonly RegExp[]][]; export function extractSelectorsFromRules(filter: HTMLSelector[]): Patterns { const patterns: [string[], RegExp[]][] = []; - for (const [, selectors] of filter) { + for (const [type, selectors] of filter) { + if (type !== 'script') { + continue; + } const plainPatterns: string[] = []; const regexpPatterns: RegExp[] = []; @@ -220,8 +227,6 @@ export function removeTagsFromHtml(html: string, toRemove: [number, string][]): return filteredHtml; } -export type HTMLModifier = readonly [RegExp, string]; - function applyModifiersToHtml(html: string, modifiers: HTMLModifier[]): string { if (modifiers.length === 0) { return html; @@ -239,9 +244,18 @@ export default class StreamingHtmlFilter { private readonly patterns: Patterns; private readonly modifiers: HTMLModifier[]; - constructor(selectors: HTMLSelector[], modifiers: HTMLModifier[] = []) { + constructor(selectors: HTMLSelector[]) { this.buffer = ''; - this.patterns = extractSelectorsFromRules(selectors); + const modifiers = []; + const rules = []; + for (const selector of selectors) { + if (selector[0] === 'replace') { + modifiers.push(selector[1]); + } else if (selector[0] === 'script') { + rules.push(selector); + } + } + this.patterns = extractSelectorsFromRules(rules); this.modifiers = modifiers; } diff --git a/packages/adblocker/src/index.ts b/packages/adblocker/src/index.ts index 335c4578a9..99f9b0fa7f 100644 --- a/packages/adblocker/src/index.ts +++ b/packages/adblocker/src/index.ts @@ -40,4 +40,4 @@ export { isUTF8 } from './encoding.js'; export { default as Config } from './config.js'; export { default as Resources } from './resources.js'; export { default as StreamingHtmlFilter } from './html-filtering.js'; -export type { HTMLModifier, HTMLSelector } from './html-filtering.js'; +export type { HTMLSelector } from './html-filtering.js'; diff --git a/packages/adblocker/test/html-filtering.test.ts b/packages/adblocker/test/html-filtering.test.ts index 7fb8e72294..c66521a4a9 100644 --- a/packages/adblocker/test/html-filtering.test.ts +++ b/packages/adblocker/test/html-filtering.test.ts @@ -10,7 +10,6 @@ import { expect } from 'chai'; import 'mocha'; import { - HTMLModifier, HTMLSelector, default as StreamingHtmlFilter, extractSelectorsFromRules, @@ -132,12 +131,8 @@ describe('html-filtering', () => { }); describe('#StreamingHtmlFilter', () => { - const filter = ( - html: string, - filters: HTMLSelector[] = [], - modifiers: HTMLModifier[] = [], - ): string => { - const stream = new StreamingHtmlFilter(filters, modifiers); + const filter = (html: string, filters: HTMLSelector[] = []): string => { + const stream = new StreamingHtmlFilter(filters); // Feed `html` at once const res1 = stream.write(html) + stream.flush(); @@ -224,19 +219,17 @@ describe('html-filtering', () => { describe('handles modifiers', () => { it('handles simple forms', () => { expect( - filter( - `{"trackingParam":"a"}`, - [], - [[new RegExp('"trackingParam":"(\\w+)"'), '"$1":""']], - ), + filter(`{"trackingParam":"a"}`, [ + ['replace', [new RegExp('"trackingParam":"(\\w+)"'), '"$1":""']], + ]), ).to.be.eql(`{"a":""}`); }); it('handles html modifiers with global replaces', () => { - expect(filter(doc, [], [[new RegExp("__perfMark\\('.+?'\\);", 'g'), '']])).not.to.include( - "__perfMark('", - ); - expect(filter(doc, [], [[new RegExp('redditstatic\\.com', 'g'), 'domain.tld']])) + expect( + filter(doc, [['replace', [new RegExp("__perfMark\\('.+?'\\);", 'g'), '']]]), + ).not.to.include("__perfMark('"); + expect(filter(doc, [['replace', [new RegExp('redditstatic\\.com', 'g'), 'domain.tld']]])) .to.include('https://www.domain.tld/desktop2x/js/ads.js') .to.include( 'https://www.domain.tld/desktop2x/RedesignContentFonts.509eef5d33306bd3b0d5.js', @@ -249,14 +242,10 @@ describe('html-filtering', () => { it('handles multiple modifiers', () => { expect( - filter( - doc, - [], - [ - [new RegExp('__SUPPORTS_TIMING_API &&'), 'false &&'], - [new RegExp('redditstatic\\.com', 'g'), 'domain.tld'], - ], - ), + filter(doc, [ + ['replace', [new RegExp('__SUPPORTS_TIMING_API &&'), 'false &&']], + ['replace', [new RegExp('redditstatic\\.com', 'g'), 'domain.tld']], + ]), ) .to.include('function __perfMark(name) { false && performance.mark(name); };') .not.to.include('redditstatic.com'); @@ -264,22 +253,29 @@ describe('html-filtering', () => { it('handles html modifiers with html selectors', () => { expect( - filter( - doc, - [['script', ["__perfMark('"]]], - [[new RegExp('(__firstLoaded = )false'), '$1true']], - ), + filter(doc, [ + ['script', ["__perfMark('"]], + ['replace', [new RegExp('(__firstLoaded = )false'), '$1true']], + ]), ) .not.to.include("__perfMark('") .not.to.include('__firstLoaded = false'); // -- html selector should not be inferenced by modifiers - expect(filter(doc, [['script', ["__perfMark('"]]], [[new RegExp('script', 'g'), 'pre']])) + expect( + filter(doc, [ + ['script', ["__perfMark('"]], + ['replace', [new RegExp('script', 'g'), 'pre']], + ]), + ) .not.to.include("__perfMark('") .not.to.include('script'); expect( - filter(doc, [['script', ['app_html_start']]], [[new RegExp('app_html_start', 'g'), '']]), + filter(doc, [ + ['script', ['app_html_start']], + ['replace', [new RegExp('app_html_start', 'g'), '']], + ]), ).not.to.include('app_html_start'); }); @@ -309,8 +305,7 @@ describe('html-filtering', () => { it(`filters: ${filters.join(',')}`, () => { const modified = filter( loadRequestSample(getRequestSamplePath(url)), - [], - filters.map((filter) => replaceOptionValueToRegexp(filter)!), + filters.map((filter) => ['replace', replaceOptionValueToRegexp(filter)!]), ); expect(modified).to.be.eql(loadRequestSample(getRequestSamplePath(url + '.modified'))); });