Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add link filters and their extraction from VoID descriptions #153

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"ccqs:config/rdf-metadata-extract/actors.json",
"ccqs:config/rdf-metadata-extract/mediators.json",
"ccqslt:config/rdf-metadata-extract/actors/traverse.json",
"ccqslt:config/rdf-metadata-extract/actors/link-filter-void.json",
"ccqs:config/rdf-parse/actors.json",
"ccqs:config/rdf-parse/mediators.json",
"ccqs:config/rdf-parse-html/actors.json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"ccqslt:config/extract-links/actors/content-policies-conditional.json",
"ccqslt:config/extract-links/actors/quad-pattern-query.json",
"ccqslt:config/rdf-resolve-hypermedia-links/actors/traverse-replace-conditional.json",
"ccqslt:config/rdf-resolve-hypermedia-links-queue/actors/wrapper-filter.json",
"ccqslt:config/rdf-resolve-hypermedia-links-queue/actors/wrapper-limit-count.json"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^4.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-link-filter-void/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
"actors": [
{
"@id": "urn:comunica:default:rdf-metadata-extract/actors#link-filter-void",
"@type": "ActorRdfMetadataExtractLinkFilterVoid"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^4.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
"actors": [
{
"@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#wrapper-filter",
"@type": "ActorRdfResolveHypermediaLinksQueueWrapperFilter",
"beforeActors": { "@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#fifo" },
"mediatorRdfResolveHypermediaLinksQueue": { "@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/mediators#main" }
}
]
}
2 changes: 2 additions & 0 deletions engines/query-sparql-link-traversal-solid/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@
"@comunica/actor-rdf-metadata-extract-hydra-controls": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-hydra-count": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-hydra-pagesize": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-link-filter-void": "^0.6.0",
"@comunica/actor-rdf-metadata-extract-patch-sparql-update": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-put-accepted": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-request-time": "^4.0.2",
Expand All @@ -282,6 +283,7 @@
"@comunica/actor-rdf-parse-xml-rdfa": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-next": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-queue-fifo": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter": "^0.6.0",
"@comunica/actor-rdf-resolve-hypermedia-links-traverse": "^0.6.0",
"@comunica/actor-rdf-serialize-jsonld": "^4.0.2",
"@comunica/actor-rdf-serialize-n3": "^4.0.2",
Expand Down
2 changes: 2 additions & 0 deletions engines/query-sparql-link-traversal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@
"@comunica/actor-rdf-metadata-extract-hydra-controls": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-hydra-count": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-hydra-pagesize": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-link-filter-void": "^0.6.0",
"@comunica/actor-rdf-metadata-extract-patch-sparql-update": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-put-accepted": "^4.0.2",
"@comunica/actor-rdf-metadata-extract-request-time": "^4.0.2",
Expand All @@ -278,6 +279,7 @@
"@comunica/actor-rdf-parse-xml-rdfa": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-next": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-queue-fifo": "^4.0.2",
"@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter": "^0.6.0",
"@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-limit-count": "^0.6.0",
"@comunica/actor-rdf-resolve-hypermedia-links-traverse": "^0.6.0",
"@comunica/actor-rdf-resolve-hypermedia-links-traverse-replace-conditional": "^0.6.0",
Expand Down
35 changes: 35 additions & 0 deletions packages/actor-rdf-metadata-extract-link-filter-void/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Comunica VoID Link Filter RDF Metadata Extract Actor

[![npm version](https://badge.fury.io/js/%40comunica%2Factor-rdf-metadata-extract-link-filter-void.svg)](https://www.npmjs.com/package/@comunica/actor-rdf-metadata-extract-link-filter-void)

An [RDF Metadata Extract](https://github.com/comunica/comunica/tree/master/packages/bus-rdf-metadata-extract) actor that
creates link filters based on [VoID descriptions](https://www.w3.org/TR/void/) to filter out redundant links.
The filters are added to the context filter list.

This module is part of the [Comunica framework](https://github.com/comunica/comunica),
and should only be used by [developers that want to build their own query engine](https://comunica.dev/docs/modify/).

[Click here if you just want to query with Comunica](https://comunica.dev/docs/query/).

## Install

```bash
$ yarn add @comunica/actor-rdf-metadata-extract-link-filter-void
```

## Configure

After installing, this package can be added to your engine's configuration as follows:
```json
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-link-filter-void/^0.0.0/components/context.jsonld"
],
"actors": [
{
"@id": "urn:comunica:default:rdf-metadata-extract/actors#link-filter-void",
"@type": "ActorRdfMetadataExtractLinkFilterVoid"
}
]
}
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import type { MediatorExtractLinks } from '@comunica/bus-extract-links';
import type {
IActionRdfMetadataExtract,
IActorRdfMetadataExtractOutput,
IActorRdfMetadataExtractArgs,
} from '@comunica/bus-rdf-metadata-extract';
import { ActorRdfMetadataExtract } from '@comunica/bus-rdf-metadata-extract';
import { KeysRdfResolveHypermediaLinks } from '@comunica/context-entries-link-traversal';
import type { IActorTest, TestResult } from '@comunica/core';
import { passTestVoid, failTest } from '@comunica/core';
import type { ILink } from '@comunica/types';
import type { LinkFilterType } from '@comunica/types-link-traversal';
import type * as RDF from '@rdfjs/types';

/**
* Comunica RDF metadata extract actor to collect link filters from VoID descriptions.
*/
export class ActorRdfMetadataExtractLinkFilterVoid extends ActorRdfMetadataExtract {
private readonly mediatorExtractLinks: MediatorExtractLinks;

public constructor(args: IActorRdfMetadataExtractArgs) {
super(args);
}

public async test(action: IActionRdfMetadataExtract): Promise<TestResult<IActorTest>> {
if (!action.context.has(KeysRdfResolveHypermediaLinks.linkFilters)) {
return failTest('unable to extract link filters without context storage target present');
}
return passTestVoid();
}

public async run(action: IActionRdfMetadataExtract): Promise<IActorRdfMetadataExtractOutput> {
const discoveredFilters = await this.extractFilters(action.metadata);
if (discoveredFilters.length > 0) {
const linkFilters = action.context.getSafe(KeysRdfResolveHypermediaLinks.linkFilters);
linkFilters.push(...discoveredFilters);
}
return { metadata: {}};
}

public async extractFilters(stream: RDF.Stream): Promise<LinkFilterType[]> {
return new Promise<LinkFilterType[]>((resolve, reject) => {
const filters = new Map<string, LinkFilterType>();
const subjectsWithEndpoints = new Set<string>();
stream
.on('error', reject)
.on('data', (quad: RDF.Quad) => {
switch (quad.predicate.value) {
case 'http://rdfs.org/ns/void#sparqlEndpoint':
subjectsWithEndpoints.add(quad.subject.value);
break;
case 'http://rdfs.org/ns/void#uriRegexPattern':
filters.set(quad.subject.value, (link: ILink) => !new RegExp(quad.object.value, 'u').test(link.url));
break;
case 'http://rdfs.org/ns/void#uriSpace':
filters.set(quad.subject.value, (link: ILink) => !link.url.startsWith(quad.object.value));
break;
}
})
.on('end', () => {
const output: LinkFilterType[] = [];
for (const [ subject, filter ] of filters) {
if (subjectsWithEndpoints.has(subject)) {
output.push(filter);
}
}
resolve(output);
});
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './ActorRdfMetadataExtractLinkFilterVoid';
48 changes: 48 additions & 0 deletions packages/actor-rdf-metadata-extract-link-filter-void/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "@comunica/actor-rdf-metadata-extract-link-filter-void",
"version": "0.6.0",
"description": "An actor to extract link filters based on VoID descriptions",
"lsd:module": true,
"license": "MIT",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/comunica-association"
},
"homepage": "https://comunica.dev/",
"repository": {
"type": "git",
"url": "https://github.com/comunica/comunica-feature-link-traversal.git",
"directory": "packages/actor-rdf-metadata-extract-link-filter-void"
},
"bugs": {
"url": "https://github.com/comunica/comunica-feature-link-traversal/issues"
},
"keywords": [
"comunica",
"actor",
"rdf-metadata-link-filter-void"
],
"sideEffects": false,
"main": "lib/index.js",
"typings": "lib/index",
"publishConfig": {
"access": "public"
},
"files": [
"components",
"lib/**/*.d.ts",
"lib/**/*.js",
"lib/**/*.js.map"
],
"scripts": {
"build": "yarn run build:ts && yarn run build:components",
"build:ts": "node \"../../node_modules/typescript/bin/tsc\"",
"build:components": "componentsjs-generator"
},
"dependencies": {
"@comunica/bus-extract-links": "^0.6.0",
"@comunica/bus-rdf-metadata-extract": "^4.0.2",
"@comunica/context-entries-link-traversal": "^0.6.0",
"@comunica/core": "^4.0.2"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { KeysRdfResolveHypermediaLinks } from '@comunica/context-entries-link-traversal';
import { Bus, ActionContext } from '@comunica/core';
import type { LinkFilterType } from '@comunica/types-link-traversal';
import { DataFactory } from 'rdf-data-factory';
import '@comunica/utils-jest';

import { ActorRdfMetadataExtractLinkFilterVoid } from '../lib/ActorRdfMetadataExtractLinkFilterVoid';

const streamifyArray = require('streamify-array');

const DF = new DataFactory();

const voidUriSpace = DF.namedNode('http://rdfs.org/ns/void#uriSpace');
const voidUriRegexPattern = DF.namedNode('http://rdfs.org/ns/void#uriRegexPattern');
const voidSparqlEndpoint = DF.namedNode('http://rdfs.org/ns/void#sparqlEndpoint');

describe('ActorRdfMetadataExtractLinkFilterVoid', () => {
let bus: any;
let actor: ActorRdfMetadataExtractLinkFilterVoid;
let linkFilters: LinkFilterType[];

beforeEach(() => {
jest.resetAllMocks();
bus = new Bus({ name: 'bus' });
actor = new ActorRdfMetadataExtractLinkFilterVoid({ bus, name: 'actor' });
linkFilters = [];
});

describe('test', () => {
it('should pass with filter storage in context', async() => {
await expect(actor.test({
context: new ActionContext({ [KeysRdfResolveHypermediaLinks.linkFilters.name]: linkFilters }),
metadata: <any>{},
requestTime: 0,
url: 'url',
})).resolves.toPassTestVoid();
});

it('should fail without filter storage in context', async() => {
await expect(actor.test({
context: new ActionContext(),
metadata: <any>{},
requestTime: 0,
url: 'url',
})).resolves.toFailTest('unable to extract link filters without context storage target present');
});
});

describe('run', () => {
it('should register discovered link filters', async() => {
jest.spyOn(actor, 'extractFilters').mockResolvedValue([ <any>'filter' ]);
await expect(actor.run({
context: new ActionContext({ [KeysRdfResolveHypermediaLinks.linkFilters.name]: linkFilters }),
metadata: <any>{},
requestTime: 0,
url: 'url',
})).resolves.toEqual({ metadata: {}});
expect(linkFilters).toEqual([ 'filter' ]);
});

it('should not register any filters when none are discovered', async() => {
jest.spyOn(actor, 'extractFilters').mockResolvedValue([]);
await expect(actor.run({
context: new ActionContext({ [KeysRdfResolveHypermediaLinks.linkFilters.name]: linkFilters }),
metadata: <any>{},
requestTime: 0,
url: 'url',
})).resolves.toEqual({ metadata: {}});
expect(linkFilters).toEqual([]);
});
});

describe('extractFilters', () => {
it('should parse filters from void:uriSpace', async() => {
const subjectWithEndpoint = DF.blankNode();
const subjectWithoutEndpoint = DF.blankNode();
const stream = streamifyArray([
DF.quad(subjectWithEndpoint, voidSparqlEndpoint, DF.literal('http://localhost/endpoint')),
DF.quad(subjectWithEndpoint, voidUriSpace, DF.literal('http://localhost/')),
DF.quad(subjectWithoutEndpoint, voidUriSpace, DF.literal('http://otherhost/')),
]);
const filters = await actor.extractFilters(stream);
expect(filters).toHaveLength(1);
expect(filters[0]({ url: 'http://localhost/some/uri' })).toBeFalsy();
});

it('should parse filters from void:uriRegexPattern', async() => {
const subjectWithEndpoint = DF.blankNode();
const subjectWithoutEndpoint = DF.blankNode();
const stream = streamifyArray([
DF.quad(subjectWithEndpoint, voidSparqlEndpoint, DF.literal('http://localhost/endpoint')),
DF.quad(subjectWithEndpoint, voidUriRegexPattern, DF.literal('^http://localhost/')),
DF.quad(subjectWithoutEndpoint, voidUriRegexPattern, DF.literal('^http://otherhost/')),
]);
const filters = await actor.extractFilters(stream);
expect(filters).toHaveLength(1);
expect(filters[0]({ url: 'http://localhost/some/uri' })).toBeFalsy();
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Comunica Hypermedia Links Queue Wrapper for Link Filtering

[![npm version](https://badge.fury.io/js/%40comunica%2Factor-rdf-resolve-hypermedia-links-queue-wrapper-filter.svg)](https://www.npmjs.com/package/@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter)

An [RDF Resolve Hypermedia Links Queue](https://github.com/comunica/comunica/tree/master/packages/bus-rdf-resolve-hypermedia-links-queue) actor
that wraps over another link queue provided by the bus,
and filters the links that can be taken out of the queue.

This module is part of the [Comunica framework](https://github.com/comunica/comunica),
and should only be used by [developers that want to build their own query engine](https://comunica.dev/docs/modify/).

[Click here if you just want to query with Comunica](https://comunica.dev/docs/query/).

## Install

```bash
$ yarn add @comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter
```

## Configure

After installing, this package can be added to your engine's configuration as follows:
```json
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-filter/^0.0.0/components/context.jsonld"
],
"actors": [
{
"@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#wrapper-filter",
"@type": "ActorRdfResolveHypermediaLinksQueueWrapperFilter",
"beforeActors": { "@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#fifo" },
"mediatorRdfResolveHypermediaLinksQueue": { "@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/mediators#main" }
}
]
}
```
Loading
Loading