Skip to content

Commit

Permalink
Simplify html filters usage (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrmod authored Aug 6, 2024
1 parent 175582a commit fb82f74
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 116 deletions.
14 changes: 3 additions & 11 deletions packages/adblocker-webextension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import { parse } from 'tldts-experimental';
import {
FiltersEngine,
HTMLSelector,
HTMLModifier,
isUTF8,
Request,
StreamingHtmlFilter,
Expand Down Expand Up @@ -136,13 +135,12 @@ export function filterRequestHTML(
filterResponseData: Browser['webRequest']['filterResponseData'],
{ id }: { id: string },
rules: HTMLSelector[],
modifiers: HTMLModifier[],
): void {
// Create filter to observe loading of resource
const filter = filterResponseData(id) as StreamFilter;
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const htmlFilter = new StreamingHtmlFilter(rules, modifiers);
const htmlFilter = new StreamingHtmlFilter(rules);

const teardown = (event: { data?: ArrayBuffer }) => {
// Before disconnecting our streaming filter, we need to be extra careful
Expand Down Expand Up @@ -408,14 +406,8 @@ export class WebExtensionBlocker extends FiltersEngine {
typeof TextEncoder !== 'undefined'
) {
const htmlFilters = this.getHtmlFilters(request);
const htmlModifiers = this.getHtmlModifiers(request);
if (htmlFilters.length !== 0 && htmlModifiers.length !== 0) {
filterRequestHTML(
browser.webRequest.filterResponseData,
request,
htmlFilters,
htmlModifiers,
);
if (htmlFilters.length !== 0) {
filterRequestHTML(browser.webRequest.filterResponseData, request, htmlFilters);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion packages/adblocker/src/engine/bucket/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ export default class NetworkFilterBucket {
return this.badFiltersIds.has(filter.getId());
}

public getHtmlModifiers(
public getHTMLFilters(
request: Request,
isFilterExcluded?: (filter: NetworkFilter) => boolean,
): NetworkFilter[] {
Expand Down
138 changes: 74 additions & 64 deletions packages/adblocker/src/engine/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
fetchResources,
fullLists,
} from '../fetch.js';
import { HTMLModifier, HTMLSelector } from '../html-filtering.js';
import { HTMLSelector } from '../html-filtering.js';
import CosmeticFilter from '../filters/cosmetic.js';
import NetworkFilter from '../filters/network.js';
import { block } from '../filters/dsl.js';
Expand Down Expand Up @@ -95,11 +95,16 @@ type NetworkFilterMatchingContext = {
filterType: FilterType.NETWORK;
};

type CosmeticFilterMatchingContext = {
url: string;
callerContext: any; // Additional context given from user
filterType: FilterType.COSMETIC;
};
type CosmeticFilterMatchingContext =
| {
url: string;
callerContext: any; // Additional context given from user
filterType: FilterType.COSMETIC;
}
| {
request: Request; // For HTML Filters
filterType: FilterType.COSMETIC;
};

type NetworkFilterMatchEvent = (request: Request, result: BlockingResponse) => void;

Expand Down Expand Up @@ -835,81 +840,86 @@ export default class FilterEngine extends EventEmitter<EngineEventHandlers> {
/**
* Return a list of HTML filtering rules.
*/
public getHtmlFilters({
// Page information
url,
hostname,
domain,

callerContext,
}: {
url: string;
hostname: string;
domain: string | null | undefined;

callerContext?: any | undefined;
}): HTMLSelector[] {
public getHtmlFilters(request: Request): HTMLSelector[] {
const htmlSelectors: HTMLSelector[] = [];

if (this.config.enableHtmlFiltering === false || this.config.loadCosmeticFilters === false) {
if (this.config.enableHtmlFiltering === false) {
return htmlSelectors;
}

domain ||= '';
if (this.config.loadCosmeticFilters === true) {
const domain = request.domain || '';

const { filters, unhides } = this.cosmetics.getHtmlFilters({
domain,
hostname,
isFilterExcluded: this.isFilterExcluded.bind(this),
});
const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide]));
const { filters, unhides } = this.cosmetics.getHtmlFilters({
domain,
hostname: request.hostname,
isFilterExcluded: this.isFilterExcluded.bind(this),
});
const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide]));

for (const filter of filters) {
const extended = filter.getExtendedSelector();
if (extended === undefined) {
continue;
}
const exception = exceptions.get(filter.getSelector());
if (exception !== undefined) {
htmlSelectors.push(extended);
for (const filter of filters) {
const extended = filter.getExtendedSelector();
if (extended === undefined) {
continue;
}
const exception = exceptions.get(filter.getSelector());
if (exception !== undefined) {
htmlSelectors.push(extended);
}
this.emit(
'filter-matched',
{ filter, exception },
{
request,
filterType: FilterType.COSMETIC,
},
);
}
this.emit(
'filter-matched',
{ filter, exception },
{
url,
callerContext,
filterType: FilterType.COSMETIC,
},
);
}

if (htmlSelectors.length !== 0) {
this.emit('html-filtered', htmlSelectors, url);
}
if (this.config.loadNetworkFilters === true) {
const replaceFilters = this.filters.getHTMLFilters(
request,
this.isFilterExcluded.bind(this),
);

return htmlSelectors;
}
if (replaceFilters.length !== 0) {
const exception = this.exceptions.match(request, this.isFilterExcluded.bind(this));
let modifiers = [];
for (const filter of replaceFilters) {
const modifier = filter.getHtmlModifier();

public getHtmlModifiers(request: Request): HTMLModifier[] {
const htmlModifiers: HTMLModifier[] = [];
if (modifier !== null) {
if (!exception) {
modifiers.push(['replace', modifier]);
}

if (this.config.enableHtmlFiltering === false || this.config.loadNetworkFilters === false) {
return htmlModifiers;
this.emit(
'filter-matched',
{ filter, exception },
{
request,
filterType: FilterType.COSMETIC,
},
);
} else {
// Disable all replace modifiers if empty replace modifier found
modifiers = [];
break;
}
}
}

if (modifiers.length !== 0) {
htmlSelectors.push(...modifiers.map((modifier) => ['replace', modifier]));
}
}

const filters = this.filters.getHtmlModifiers(request, this.isFilterExcluded.bind(this));

for (const filter of filters) {
const modifier = filter.getHtmlModifier();
// Disable all replace modifiers if empty replace modifier found
if (modifier === null) {
return [];
}
htmlModifiers.push(modifier);
if (htmlSelectors.length !== 0) {
this.emit('html-filtered', htmlSelectors, request.url);
}

return htmlModifiers;
return htmlSelectors;
}

/**
Expand Down
26 changes: 20 additions & 6 deletions packages/adblocker/src/html-filtering.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
// which is able to consume an HTML document over time and filter part of it
// using adblocker selectors.

export type HTMLSelector = readonly ['script', readonly string[]];
export type HTMLModifier = readonly [RegExp, string];

export type HTMLSelector =
| readonly ['script', readonly string[]]
| readonly ['replace', HTMLModifier];

export function extractHTMLSelectorFromRule(rule: string): HTMLSelector | undefined {
if (rule.startsWith('^script') === false) {
Expand Down Expand Up @@ -139,7 +143,10 @@ type Patterns = readonly [readonly string[], readonly RegExp[]][];
export function extractSelectorsFromRules(filter: HTMLSelector[]): Patterns {
const patterns: [string[], RegExp[]][] = [];

for (const [, selectors] of filter) {
for (const [type, selectors] of filter) {
if (type !== 'script') {
continue;
}
const plainPatterns: string[] = [];
const regexpPatterns: RegExp[] = [];

Expand Down Expand Up @@ -220,8 +227,6 @@ export function removeTagsFromHtml(html: string, toRemove: [number, string][]):
return filteredHtml;
}

export type HTMLModifier = readonly [RegExp, string];

function applyModifiersToHtml(html: string, modifiers: HTMLModifier[]): string {
if (modifiers.length === 0) {
return html;
Expand All @@ -239,9 +244,18 @@ export default class StreamingHtmlFilter {
private readonly patterns: Patterns;
private readonly modifiers: HTMLModifier[];

constructor(selectors: HTMLSelector[], modifiers: HTMLModifier[] = []) {
constructor(selectors: HTMLSelector[]) {
this.buffer = '';
this.patterns = extractSelectorsFromRules(selectors);
const modifiers = [];
const rules = [];
for (const selector of selectors) {
if (selector[0] === 'replace') {
modifiers.push(selector[1]);
} else if (selector[0] === 'script') {
rules.push(selector);
}
}
this.patterns = extractSelectorsFromRules(rules);
this.modifiers = modifiers;
}

Expand Down
2 changes: 1 addition & 1 deletion packages/adblocker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ export { isUTF8 } from './encoding.js';
export { default as Config } from './config.js';
export { default as Resources } from './resources.js';
export { default as StreamingHtmlFilter } from './html-filtering.js';
export type { HTMLModifier, HTMLSelector } from './html-filtering.js';
export type { HTMLSelector } from './html-filtering.js';
Loading

0 comments on commit fb82f74

Please sign in to comment.