Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify html filters usage #12

Merged
merged 6 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions packages/adblocker-webextension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import { parse } from 'tldts-experimental';
import {
FiltersEngine,
HTMLSelector,
HTMLModifier,
isUTF8,
Request,
StreamingHtmlFilter,
Expand Down Expand Up @@ -136,13 +135,12 @@ export function filterRequestHTML(
filterResponseData: Browser['webRequest']['filterResponseData'],
{ id }: { id: string },
rules: HTMLSelector[],
modifiers: HTMLModifier[],
): void {
// Create filter to observe loading of resource
const filter = filterResponseData(id) as StreamFilter;
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const htmlFilter = new StreamingHtmlFilter(rules, modifiers);
const htmlFilter = new StreamingHtmlFilter(rules);

const teardown = (event: { data?: ArrayBuffer }) => {
// Before disconnecting our streaming filter, we need to be extra careful
Expand Down Expand Up @@ -408,14 +406,8 @@ export class WebExtensionBlocker extends FiltersEngine {
typeof TextEncoder !== 'undefined'
) {
const htmlFilters = this.getHtmlFilters(request);
const htmlModifiers = this.getHtmlModifiers(request);
if (htmlFilters.length !== 0 && htmlModifiers.length !== 0) {
filterRequestHTML(
browser.webRequest.filterResponseData,
request,
htmlFilters,
htmlModifiers,
);
if (htmlFilters.length !== 0) {
filterRequestHTML(browser.webRequest.filterResponseData, request, htmlFilters);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion packages/adblocker/src/engine/bucket/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ export default class NetworkFilterBucket {
return this.badFiltersIds.has(filter.getId());
}

public getHtmlModifiers(
public getHTMLFilters(
request: Request,
isFilterExcluded?: (filter: NetworkFilter) => boolean,
): NetworkFilter[] {
Expand Down
122 changes: 59 additions & 63 deletions packages/adblocker/src/engine/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,16 @@ type NetworkFilterMatchingContext = {
filterType: FilterType.NETWORK;
};

type CosmeticFilterMatchingContext = {
url: string;
callerContext: any; // Additional context given from user
filterType: FilterType.COSMETIC;
};
type CosmeticFilterMatchingContext =
| {
url: string;
callerContext: any; // Additional context given from user
filterType: FilterType.COSMETIC;
}
| {
request: Request; // For HTML Filters
filterType: FilterType.COSMETIC;
};

type NetworkFilterMatchEvent = (request: Request, result: BlockingResponse) => void;

Expand Down Expand Up @@ -835,79 +840,70 @@ export default class FilterEngine extends EventEmitter<EngineEventHandlers> {
/**
* Return a list of HTML filtering rules.
*/
public getHtmlFilters({
// Page information
url,
hostname,
domain,

callerContext,
}: {
url: string;
hostname: string;
domain: string | null | undefined;

callerContext?: any | undefined;
}): HTMLSelector[] {
public getHtmlFilters(request: Request): HTMLSelector[] {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder if this is an actually breaking change

const htmlSelectors: HTMLSelector[] = [];

if (this.config.enableHtmlFiltering === false || this.config.loadCosmeticFilters === false) {
if (this.config.enableHtmlFiltering === false) {
return htmlSelectors;
}

domain ||= '';
if (this.config.loadCosmeticFilters === true) {
const domain = request.domain || '';

const { filters, unhides } = this.cosmetics.getHtmlFilters({
domain,
hostname,
isFilterExcluded: this.isFilterExcluded.bind(this),
});
const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide]));
const { filters, unhides } = this.cosmetics.getHtmlFilters({
domain,
hostname: request.hostname,
isFilterExcluded: this.isFilterExcluded.bind(this),
});
const exceptions = new Map(unhides.map((unhide) => [unhide.getSelector(), unhide]));

for (const filter of filters) {
const extended = filter.getExtendedSelector();
if (extended === undefined) {
continue;
}
const exception = exceptions.get(filter.getSelector());
if (exception !== undefined) {
htmlSelectors.push(extended);
for (const filter of filters) {
const extended = filter.getExtendedSelector();
if (extended === undefined) {
continue;
}
const exception = exceptions.get(filter.getSelector());
if (exception !== undefined) {
htmlSelectors.push(extended);
}
this.emit(
'filter-matched',
{ filter, exception },
{
request,
filterType: FilterType.COSMETIC,
},
);
}
this.emit(
'filter-matched',
{ filter, exception },
{
url,
callerContext,
filterType: FilterType.COSMETIC,
},
);
}

if (htmlSelectors.length !== 0) {
this.emit('html-filtered', htmlSelectors, url);
}

return htmlSelectors;
}

public getHtmlModifiers(request: Request): HTMLModifier[] {
const htmlModifiers: HTMLModifier[] = [];
if (this.config.loadNetworkFilters === true) {
const replaceFilters = this.filters.getHTMLFilters(
request,
this.isFilterExcluded.bind(this),
);

if (this.config.enableHtmlFiltering === false || this.config.loadNetworkFilters === false) {
return htmlModifiers;
for (const filter of replaceFilters) {
const modifier = filter.getHtmlModifier();
if (modifier !== null) {
htmlSelectors.push(['replace', modifier]);
this.emit(
'filter-matched',
{ filter, exception: undefined },
{
request,
filterType: FilterType.COSMETIC,
},
);
}
}
}

const filters = this.filters.getHtmlModifiers(request, this.isFilterExcluded.bind(this));

for (const filter of filters) {
const modifier = filter.getHtmlModifier();
if (modifier !== null) {
htmlModifiers.push(modifier);
}
if (htmlSelectors.length !== 0) {
this.emit('html-filtered', htmlSelectors, request.url);
}

return htmlModifiers;
return htmlSelectors;
}

/**
Expand Down
26 changes: 20 additions & 6 deletions packages/adblocker/src/html-filtering.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
// which is able to consume an HTML document over time and filter part of it
// using adblocker selectors.

export type HTMLSelector = readonly ['script', readonly string[]];
export type HTMLModifier = readonly [RegExp, string];

export type HTMLSelector =
| readonly ['script', readonly string[]]
| readonly ['replace', HTMLModifier];
Comment on lines +13 to +17
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about we rename HTMLModifier with HTMLReplacer ?


export function extractHTMLSelectorFromRule(rule: string): HTMLSelector | undefined {
if (rule.startsWith('^script') === false) {
Expand Down Expand Up @@ -139,7 +143,10 @@ type Patterns = readonly [readonly string[], readonly RegExp[]][];
export function extractSelectorsFromRules(filter: HTMLSelector[]): Patterns {
const patterns: [string[], RegExp[]][] = [];

for (const [, selectors] of filter) {
for (const [type, selectors] of filter) {
if (type !== 'script') {
continue;
}
const plainPatterns: string[] = [];
const regexpPatterns: RegExp[] = [];

Expand Down Expand Up @@ -220,8 +227,6 @@ export function removeTagsFromHtml(html: string, toRemove: [number, string][]):
return filteredHtml;
}

export type HTMLModifier = readonly [RegExp, string];

function applyModifiersToHtml(html: string, modifiers: HTMLModifier[]): string {
if (modifiers.length === 0) {
return html;
Expand All @@ -239,9 +244,18 @@ export default class StreamingHtmlFilter {
private readonly patterns: Patterns;
private readonly modifiers: HTMLModifier[];

constructor(selectors: HTMLSelector[], modifiers: HTMLModifier[] = []) {
constructor(selectors: HTMLSelector[]) {
this.buffer = '';
this.patterns = extractSelectorsFromRules(selectors);
const modifiers = [];
const rules = [];
for (const selector of selectors) {
if (selector[0] === 'replace') {
modifiers.push(selector[1]);
} else if (selector[0] === 'script') {
rules.push(selector);
}
}
this.patterns = extractSelectorsFromRules(rules);
this.modifiers = modifiers;
}

Expand Down
Loading