From d76074e86ee54f9de0de30f6c8c2507c3e4d4451 Mon Sep 17 00:00:00 2001 From: surilindur Date: Mon, 12 Feb 2024 17:31:38 +0100 Subject: [PATCH] Extract all query terms, group s p o, use distinct filters --- ...rmediaLinksQueueWrapperMembershipFilter.ts | 63 ++++++++++--------- .../lib/LinkQueueMembershipFilter.ts | 31 ++++++--- .../lib/MembershipFilterStorage.ts | 6 +- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.ts b/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.ts index 1f12cb0..14209b7 100644 --- a/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.ts +++ b/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.ts @@ -8,7 +8,7 @@ import { import { KeysInitQuery } from '@comunica/context-entries'; import { ActionContextKey, type IActorArgs, type IActorTest } from '@comunica/core'; import type * as RDF from '@rdfjs/types'; -import { Util, type Algebra } from 'sparqlalgebrajs'; +import { Util } from 'sparqlalgebrajs'; import { LinkQueueMembershipFilter } from './LinkQueueMembershipFilter'; /** @@ -58,45 +58,48 @@ export class ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter extends linkQueue, queryTerms, filterStorage: membershipFilterStorage, - members: this.members, ignorePatterns: this.ignorePatterns, }), }; } - protected extractQueryTermsFromContext(action: IActionRdfResolveHypermediaLinksQueue): Set { - const queryPatterns: Algebra.Pattern[] = []; - const queryTerms = new Set(); + protected extractQueryTermsFromContext(action: IActionRdfResolveHypermediaLinksQueue): Map { + const terms: Map = new Map(); + const registerNode = (member: string, term: RDF.Term): void => { + if (this.members.has(member)) { + let members = terms.get(member); + if (!members) { + members = []; + terms.set(member, members); + } + if (!members.some(mem => mem.equals(term))) { + members.push(term); + } + } + }; + + // TODO: Check if there are any other elements in the parsed query that should be extracted Util.recurseOperation(action.context.getSafe(KeysInitQuery.query), { pattern(pattern) { - queryPatterns.push(pattern); - return true; + if (pattern.subject.termType === 'NamedNode') { + registerNode(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_SUBJECT, pattern.subject); + } + if (pattern.predicate.termType === 'NamedNode') { + registerNode(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_PREDICATE, pattern.predicate); + } + if (pattern.object.termType === 'NamedNode') { + registerNode(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_OBJECT, pattern.object); + } + return false; + }, + link(link) { + registerNode(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_PREDICATE, link.iri); + return false; }, }); - for (const pattern of queryPatterns) { - if ( - pattern.subject.termType === 'NamedNode' && - this.members.has(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_SUBJECT) - ) { - queryTerms.add(pattern.subject); - } - if ( - pattern.predicate.termType === 'NamedNode' && - this.members.has(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_SUBJECT) - ) { - queryTerms.add(pattern.predicate); - } - if ( - pattern.object.termType === 'NamedNode' && - this.members.has(ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter.RDF_OBJECT) - ) { - queryTerms.add(pattern.object); - } - } - - return queryTerms; + return terms; } } @@ -111,7 +114,7 @@ export interface IActorRdfResolveHypermediaLinksQueueWrapperMembershipFilterArgs */ ignorePatterns?: string[]; /** - * The exact set of triple members a filter needs to cover to be used for pruning of links. + * The link queue filter will only consider filters for these triple members. */ members: string[]; } diff --git a/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/LinkQueueMembershipFilter.ts b/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/LinkQueueMembershipFilter.ts index 17f06f7..0124321 100644 --- a/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/LinkQueueMembershipFilter.ts +++ b/packages/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/lib/LinkQueueMembershipFilter.ts @@ -7,17 +7,16 @@ import type * as RDF from '@rdfjs/types'; * A link queue that filters out links based on known membership filters. */ export class LinkQueueMembershipFilter extends LinkQueueWrapper { - private readonly queryTerms: RDF.Term[]; + private readonly queryTerms: Map; private readonly filterStorage: IMembershipFilterStorage; private readonly ignorePatterns: RegExp[] | undefined; - private readonly members: string[]; public constructor(args: ILinkQueueMembershipFilterArgs) { super(args.linkQueue); this.filterStorage = args.filterStorage; this.ignorePatterns = args.ignorePatterns; - this.queryTerms = [ ...args.queryTerms.values() ]; - this.members = [ ...args.members.values() ]; + this.queryTerms = args.queryTerms; + // Console.log(this.queryTerms); } /** @@ -26,11 +25,24 @@ export class LinkQueueMembershipFilter extends LinkQueueWrapper { * @returns Whether the link should be accepted */ private acceptable(link: ILink): boolean { - if (!this.ignorePatterns?.some(pattern => pattern.test(link.url))) { - const filter = this.filterStorage.get(link.url, this.members); - return !filter || this.queryTerms.some(term => filter.test(term)); + if (this.ignorePatterns?.some(pattern => pattern.test(link.url))) { + return true; } - return true; + for (const [ member, terms ] of this.queryTerms) { + const filter = this.filterStorage.get(link.url, member); + if (!filter) { + // Console.log(`ACCEPT: <${link.url}> has no filter for <${member}`); + return true; + } + for (const term of terms) { + if (filter.test(term)) { + // Console.log(`ACCEPT: <${link.url}> includes <${term.value}>`); + return true; + } + } + } + // Console.log(`REJECT: <${link.url}>`); + return false; } public pop(): ILink | undefined { @@ -48,8 +60,7 @@ export class LinkQueueMembershipFilter extends LinkQueueWrapper { export interface ILinkQueueMembershipFilterArgs { linkQueue: ILinkQueue; - queryTerms: Set; + queryTerms: Map; filterStorage: IMembershipFilterStorage; ignorePatterns?: RegExp[]; - members: Set; } diff --git a/packages/bus-rdf-parse-membership-filter/lib/MembershipFilterStorage.ts b/packages/bus-rdf-parse-membership-filter/lib/MembershipFilterStorage.ts index 72e5b35..c708aaa 100644 --- a/packages/bus-rdf-parse-membership-filter/lib/MembershipFilterStorage.ts +++ b/packages/bus-rdf-parse-membership-filter/lib/MembershipFilterStorage.ts @@ -2,7 +2,7 @@ import { ActionContextKey } from '@comunica/core'; import type { IMembershipFilter } from './MembershipFilter'; export interface IMembershipFilterStorage { - get: (uri: string, members?: string[]) => IMembershipFilter | undefined; + get: (uri: string, member: string) => IMembershipFilter | undefined; add: (uriPattern: RegExp, filter: IMembershipFilter) => void; } @@ -13,9 +13,9 @@ export class MembershipFilterStorage implements IMembershipFilterStorage { this.filters = new Map(); } - public get(uri: string, members?: string[]): IMembershipFilter | undefined { + public get(uri: string, member?: string): IMembershipFilter | undefined { for (const [ exp, filter ] of this.filters) { - if (exp.test(uri) && (!members || filter.members.every(mem => members.includes(mem)))) { + if (exp.test(uri) && (!member || filter.members.every(mem => member === mem))) { return filter; } }