Skip to content

Commit

Permalink
HTML bucket (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrmod authored Aug 13, 2024
1 parent 8d480c0 commit a545f4a
Show file tree
Hide file tree
Showing 6 changed files with 313 additions and 231 deletions.
4 changes: 1 addition & 3 deletions packages/adblocker-webextension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,7 @@ export class WebExtensionBlocker extends FiltersEngine {
typeof TextDecoder !== 'undefined' &&
typeof TextEncoder !== 'undefined'
) {
const htmlFilters = this.getHtmlFilters(request, {
selectors: request.isMainFrame() === true ? ['script', 'replace'] : undefined,
});
const htmlFilters = this.getHtmlFilters(request);
if (htmlFilters.length !== 0) {
filterRequestHTML(browser.webRequest.filterResponseData, request, htmlFilters);
}
Expand Down
63 changes: 1 addition & 62 deletions packages/adblocker/src/engine/bucket/cosmetic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ function createStylesheetFromRules(rules: CosmeticFilter[]): string {
return createStylesheet(selectors, DEFAULT_HIDDING_STYLE);
}

function createLookupTokens(hostname: string, domain: string): Uint32Array {
export function createLookupTokens(hostname: string, domain: string): Uint32Array {
const hostnamesHashes = getHostnameHashesFromLabelsBackward(hostname, domain);
const entitiesHashes = getEntityHashesFromLabelsBackward(hostname, domain);
const tokens = new Uint32Array(hostnamesHashes.length + entitiesHashes.length);
Expand Down Expand Up @@ -160,13 +160,6 @@ export default class CosmeticFilterBucket {
config,
);

bucket.htmlIndex = ReverseIndex.deserialize(
buffer,
CosmeticFilter.deserialize,
noopOptimizeCosmetic,
config,
);

bucket.idsIndex = ReverseIndex.deserialize(
buffer,
CosmeticFilter.deserialize,
Expand Down Expand Up @@ -198,7 +191,6 @@ export default class CosmeticFilterBucket {
public classesIndex: ReverseIndex<CosmeticFilter>;
public hostnameIndex: ReverseIndex<CosmeticFilter>;
public hrefsIndex: ReverseIndex<CosmeticFilter>;
public htmlIndex: ReverseIndex<CosmeticFilter>;
public idsIndex: ReverseIndex<CosmeticFilter>;
public unhideIndex: ReverseIndex<CosmeticFilter>;

Expand Down Expand Up @@ -234,13 +226,6 @@ export default class CosmeticFilterBucket {
optimize: noopOptimizeCosmetic,
});

this.htmlIndex = new ReverseIndex({
config,
deserialize: CosmeticFilter.deserialize,
filters: [],
optimize: noopOptimizeCosmetic,
});

this.idsIndex = new ReverseIndex({
config,
deserialize: CosmeticFilter.deserialize,
Expand Down Expand Up @@ -271,7 +256,6 @@ export default class CosmeticFilterBucket {
this.classesIndex.getFilters(),
this.hostnameIndex.getFilters(),
this.hrefsIndex.getFilters(),
this.htmlIndex.getFilters(),
this.idsIndex.getFilters(),
this.unhideIndex.getFilters(),
);
Expand All @@ -286,15 +270,12 @@ export default class CosmeticFilterBucket {
const genericHideRules: CosmeticFilter[] = [];
const hostnameSpecificRules: CosmeticFilter[] = [];
const hrefSelectors: CosmeticFilter[] = [];
const htmlRules: CosmeticFilter[] = [];
const idSelectors: CosmeticFilter[] = [];
const unHideRules: CosmeticFilter[] = [];

for (const rule of newFilters) {
if (rule.isUnhide()) {
unHideRules.push(rule);
} else if (rule.isHtmlFiltering()) {
htmlRules.push(rule);
} else if (rule.isGenericHide()) {
if (rule.isClassSelector()) {
classSelectors.push(rule);
Expand All @@ -314,7 +295,6 @@ export default class CosmeticFilterBucket {
this.classesIndex.update(classSelectors, removedFilters);
this.hostnameIndex.update(hostnameSpecificRules, removedFilters);
this.hrefsIndex.update(hrefSelectors, removedFilters);
this.htmlIndex.update(htmlRules, removedFilters);
this.idsIndex.update(idSelectors, removedFilters);
this.unhideIndex.update(unHideRules, removedFilters);
}
Expand All @@ -325,7 +305,6 @@ export default class CosmeticFilterBucket {
this.classesIndex.getSerializedSize() +
this.hostnameIndex.getSerializedSize() +
this.hrefsIndex.getSerializedSize() +
this.htmlIndex.getSerializedSize() +
this.idsIndex.getSerializedSize() +
this.unhideIndex.getSerializedSize()
);
Expand All @@ -336,50 +315,10 @@ export default class CosmeticFilterBucket {
this.classesIndex.serialize(buffer);
this.hostnameIndex.serialize(buffer);
this.hrefsIndex.serialize(buffer);
this.htmlIndex.serialize(buffer);
this.idsIndex.serialize(buffer);
this.unhideIndex.serialize(buffer);
}

public getHtmlFilters({
domain,
hostname,

isFilterExcluded,
}: {
domain: string;
hostname: string;

isFilterExcluded?: (filter: CosmeticFilter) => boolean;
}): { filters: CosmeticFilter[]; unhides: CosmeticFilter[] } {
const filters: CosmeticFilter[] = [];

// Tokens from `hostname` and `domain` which will be used to lookup filters
// from the reverse index. The same tokens are re-used for multiple indices.
const hostnameTokens = createLookupTokens(hostname, domain);
this.htmlIndex.iterMatchingFilters(hostnameTokens, (rule: CosmeticFilter) => {
if (rule.match(hostname, domain) && !isFilterExcluded?.(rule)) {
filters.push(rule);
}
return true;
});

const unhides: CosmeticFilter[] = [];

// If we found at least one candidate, check if we have unhidden rules.
if (filters.length !== 0) {
this.unhideIndex.iterMatchingFilters(hostnameTokens, (rule: CosmeticFilter) => {
if (rule.match(hostname, domain) && !isFilterExcluded?.(rule)) {
unhides.push(rule);
}

return true;
});
}

return { filters, unhides };
}

/**
* Request cosmetics and scripts to inject in a page.
*/
Expand Down
213 changes: 213 additions & 0 deletions packages/adblocker/src/engine/bucket/html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*!
* Copyright (c) 2017-present Ghostery GmbH. All rights reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import Config from '../../config.js';
import { StaticDataView } from '../../data-view.js';
import NetworkFilter from '../../filters/network.js';
import CosmeticFilter from '../../filters/cosmetic.js';
import Request from '../../request.js';
import { noopOptimizeNetwork, optimizeNetwork, noopOptimizeCosmetic } from '../optimizer.js';
import ReverseIndex from '../reverse-index.js';
import { createLookupTokens } from './cosmetic.js';

export default class HTMLBucket {
public static deserialize(buffer: StaticDataView, config: Config): HTMLBucket {
const bucket = new HTMLBucket({ config });

bucket.networkIndex = ReverseIndex.deserialize(
buffer,
NetworkFilter.deserialize,
config.enableOptimizations ? optimizeNetwork : noopOptimizeNetwork,
config,
);

bucket.exceptionsIndex = ReverseIndex.deserialize(
buffer,
NetworkFilter.deserialize,
config.enableOptimizations ? optimizeNetwork : noopOptimizeNetwork,
config,
);

bucket.cosmeticIndex = ReverseIndex.deserialize(
buffer,
CosmeticFilter.deserialize,
noopOptimizeCosmetic,
config,
);

bucket.unhideIndex = ReverseIndex.deserialize(
buffer,
CosmeticFilter.deserialize,
noopOptimizeCosmetic,
config,
);

return bucket;
}

public networkIndex: ReverseIndex<NetworkFilter>;
public exceptionsIndex: ReverseIndex<NetworkFilter>;
public cosmeticIndex: ReverseIndex<CosmeticFilter>;
public unhideIndex: ReverseIndex<CosmeticFilter>;

constructor({
filters = [],
config,
}: {
filters?: (CosmeticFilter | NetworkFilter)[];
config: Config;
}) {
this.networkIndex = new ReverseIndex({
config,
deserialize: NetworkFilter.deserialize,
filters: [],
optimize: config.enableOptimizations ? optimizeNetwork : noopOptimizeNetwork,
});

this.exceptionsIndex = new ReverseIndex({
config,
deserialize: NetworkFilter.deserialize,
filters: [],
optimize: config.enableOptimizations ? optimizeNetwork : noopOptimizeNetwork,
});

this.cosmeticIndex = new ReverseIndex({
config,
deserialize: CosmeticFilter.deserialize,
filters: [],
optimize: noopOptimizeCosmetic,
});

this.unhideIndex = new ReverseIndex({
config,
deserialize: CosmeticFilter.deserialize,
filters: [],
optimize: noopOptimizeCosmetic,
});

if (filters.length !== 0) {
this.update(filters, undefined);
}
}

public update(
newFilters: (NetworkFilter | CosmeticFilter)[],
removedFilters: Set<number> | undefined,
): void {
const networkFilters: NetworkFilter[] = [];
const exceptionFilters: NetworkFilter[] = [];
const cosmeticFilters: CosmeticFilter[] = [];
const unhideFilters: CosmeticFilter[] = [];

for (const filter of newFilters) {
if (filter.isNetworkFilter()) {
const networkFilter = filter as NetworkFilter;
if (networkFilter.isException()) {
exceptionFilters.push(networkFilter);
} else {
networkFilters.push(networkFilter);
}
} else if (filter.isCosmeticFilter()) {
const cosmeticFilter = filter as CosmeticFilter;
if (cosmeticFilter.isUnhide()) {
unhideFilters.push(cosmeticFilter);
} else {
cosmeticFilters.push(cosmeticFilter);
}
}
}

this.networkIndex.update(networkFilters, removedFilters);
this.exceptionsIndex.update(exceptionFilters, removedFilters);
this.cosmeticIndex.update(cosmeticFilters, removedFilters);
this.unhideIndex.update(unhideFilters, removedFilters);
}

public serialize(buffer: StaticDataView): void {
this.networkIndex.serialize(buffer);
this.exceptionsIndex.serialize(buffer);
this.cosmeticIndex.serialize(buffer);
this.unhideIndex.serialize(buffer);
}

public getSerializedSize(): number {
return (
this.networkIndex.getSerializedSize() +
this.exceptionsIndex.getSerializedSize() +
this.cosmeticIndex.getSerializedSize() +
this.unhideIndex.getSerializedSize()
);
}

public getHTMLFilters(
request: Request,
isFilterExcluded?: (filter: NetworkFilter | CosmeticFilter) => boolean,
): {
networkFilters: NetworkFilter[];
cosmeticFilters: CosmeticFilter[];
exceptions: NetworkFilter[];
unhides: CosmeticFilter[];
} {
const networkFilters: NetworkFilter[] = [];
const cosmeticFilters: CosmeticFilter[] = [];
const exceptions: NetworkFilter[] = [];
const unhides: CosmeticFilter[] = [];

this.networkIndex.iterMatchingFilters(request.getTokens(), (filter: NetworkFilter) => {
if (filter.match(request) && !isFilterExcluded?.(filter)) {
networkFilters.push(filter);
}
return true;
});

this.exceptionsIndex.iterMatchingFilters(request.getTokens(), (filter: NetworkFilter) => {
if (filter.match(request) && !isFilterExcluded?.(filter)) {
exceptions.push(filter);
}
return true;
});

if (request.isMainFrame()) {
const { hostname, domain = '' } = request;
const hostnameTokens = createLookupTokens(hostname, domain);
this.cosmeticIndex.iterMatchingFilters(hostnameTokens, (filter: CosmeticFilter) => {
if (filter.match(hostname, domain) && !isFilterExcluded?.(filter)) {
cosmeticFilters.push(filter);
}
return true;
});

// If we found at least one candidate, check if we have unhidden rules.
if (cosmeticFilters.length !== 0) {
this.unhideIndex.iterMatchingFilters(hostnameTokens, (rule: CosmeticFilter) => {
if (rule.match(hostname, domain) && !isFilterExcluded?.(rule)) {
unhides.push(rule);
}

return true;
});
}
}

return {
networkFilters,
cosmeticFilters,
unhides,
exceptions,
};
}

public getFilters(): (NetworkFilter | CosmeticFilter)[] {
const filters: (NetworkFilter | CosmeticFilter)[] = [];
return filters.concat(
this.networkIndex.getFilters(),
this.exceptionsIndex.getFilters(),
this.cosmeticIndex.getFilters(),
this.unhideIndex.getFilters(),
);
}
}
Loading

0 comments on commit a545f4a

Please sign in to comment.