Skip to content

Commit

Permalink
Updates and fixes to components and configs
Browse files Browse the repository at this point in the history
  • Loading branch information
surilindur committed Jan 24, 2024
1 parent 7b0cc1f commit f38ce7a
Show file tree
Hide file tree
Showing 18 changed files with 288 additions and 12,540 deletions.
4 changes: 3 additions & 1 deletion .componentsjs-generator-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"ignoreComponents": [
"BindingsStreamAdaptiveHeuristics",
"Buffer",
"Map"
"RegExp",
"Map",
"Set"
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
],
"import": [
"ccqsc:config/config-base-adaptive.json",
"ccqsc:config/override/disable-aggregate-store.json",
"ccqsc:config/context-preprocess/actors.json",
"ccqsc:config/rdf-metadata-extract/actors.json",
"ccqsc:config/rdf-metadata-accumulate/actors.json",
"ccqsc:config/rdf-join/actors.json"
"ccqsc:config/rdf-join/actors.json",
"ccqsc:config/rdf-parse/mediators.json",
"ccqsc:config/rdf-parse/actors.json",
"ccqsc:config/rdf-resolve-hypermedia-links-queue/actors.json"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^2.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-context-preprocess-membership-filter/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
"actors": [
{
"@id": "urn:comunica:default:context-preprocess/actors#membership-filter",
"@type": "ActorContextPreprocessMembershipFilter"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^2.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-void-description/^0.0.0/components/context.jsonld"
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-void-description/^0.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-membership-filter/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
Expand All @@ -13,6 +14,16 @@
"@id": "urn:comunica:default:dereference-rdf/mediators#main"
},
"datasetSubjectRegex": "^(https?:\\/\\/.*\\/)$"
},
{
"@id": "urn:comunica:default:rdf-metadata-extract/actors#membership-filter",
"@type": "ActorRdfMetadataExtractMembershipFilter",
"mediatorRdfParseMembershipFilter": {
"@id": "urn:comunica:default:rdf-parse-membership-filter/mediators#parse"
},
"membershipFilterTypes": [
"http://semweb.mmlab.be/ns/membership#BloomFilter"
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^2.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-membership-filter-bloom/^0.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-parse-membership-filter-gcs/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
"actors": [
{
"@id": "urn:comunica:default:rdf-parse-membership-filter/actors#bloom",
"@type": "ActorRdfParseMembershipFilterBloom"
},
{
"@id": "urn:comunica:default:rdf-parse-membership-filter/actors#gcs",
"@type": "ActorRdfParseMembershipFilterGcs"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/bus-rdf-parse-membership-filter/^0.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/mediator-race/^2.0.0/components/context.jsonld"
],
"@graph": [
{
"@id": "urn:comunica:default:rdf-parse-membership-filter/mediators#parse",
"@type": "MediatorRace",
"bus": {
"@id": "ActorRdfParseMembershipFilter:_default_bus"
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^2.0.0/components/context.jsonld",
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-resolve-hypermedia-links-queue-wrapper-membership-filter/^0.0.0/components/context.jsonld"
],
"@id": "urn:comunica:default:Runner",
"@type": "Runner",
"actors": [
{
"@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#wrapper-membership-filter",
"@type": "ActorRdfResolveHypermediaLinksQueueWrapperMembershipFilter",
"beforeActors": {
"@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/actors#fifo"
},
"mediatorRdfResolveHypermediaLinksQueue": {
"@id": "urn:comunica:default:rdf-resolve-hypermedia-links-queue/mediators#main"
},
"ignorePatterns": [
"^(.*\\/(public|private)TypeIndex)$"
],
"members": [
"http://www.w3.org/1999/02/22-rdf-syntax-ns#subject",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#object"
]
}
]
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"@rdfjs/types": "*",
"@rubensworks/eslint-config": "^2.0.0",
"componentsjs-generator": "^3.0.0",
"eslint": "8.0.0",
"eslint": "8.48.0",
"lerna": "^7.0.0",
"rdf-js": "^4.0.0",
"typescript": "^5.0.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,15 @@ import type * as RDF from '@rdfjs/types';
* A comunica Membership RDF Metadata Extract Actor.
*/
export class ActorRdfMetadataExtractMembershipFilter extends ActorRdfMetadataExtract {
public static readonly RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';

private readonly mediatorRdfParseMembershipFilter: MediatorRdfParseMembershipFilter;
protected readonly mediatorRdfParseMembershipFilter: MediatorRdfParseMembershipFilter;
protected readonly membershipFilterTypes: Set<string>;

private readonly membershipFilterTypes: Set<string>;
private readonly membershipFilterPredicates: Set<string>;
public static readonly RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';

public constructor(args: IActorRdfMetadataExtractMembershipFilterArgs) {
super(args);
this.mediatorRdfParseMembershipFilter = args.mediatorRdfParseMembershipFilter;
this.membershipFilterTypes = new Set(args.membershipFilterTypes);
this.membershipFilterPredicates = new Set(args.membershipFilterPredicates);
}

public async test(action: IActionRdfMetadataExtract): Promise<IActorTest> {
Expand All @@ -50,34 +47,33 @@ export class ActorRdfMetadataExtractMembershipFilter extends ActorRdfMetadataExt
* @param stream The RDF metadata stream to process
* @returns The collected membership filter data
*/
private async extractFilters(stream: RDF.Stream): Promise<Map<string, RDF.Quad[]>> {
protected async extractFilters(stream: RDF.Stream): Promise<Map<string, RDF.Quad[]>> {
return new Promise((resolve, reject) => {
const filters: Record<string, RDF.Quad[]> = {};
const quads: Record<string, RDF.Quad[]> = {};
const filters = new Map<string, RDF.Quad[]>();
const quads = new Map<string, RDF.Quad[]>();
stream
.on('data', (quad: RDF.Quad) => {
const subject = quad.subject.value;
if (filters[subject]) {
filters[subject].push(quad);
if (filters.has(quad.subject.value)) {
filters.get(quad.subject.value)!.push(quad);
} else if (
quad.predicate.value === ActorRdfMetadataExtractMembershipFilter.RDF_TYPE &&
quad.object.termType === 'NamedNode' &&
this.membershipFilterTypes.has(quad.object.value)
) {
filters[subject] = quads[subject] ?? [];
filters[subject].push(quad);
delete quads[subject];
} else if (this.membershipFilterPredicates.has(quad.predicate.value)) {
const filterUri = quad.object.value;
filters[filterUri] = quads[filterUri] ?? [];
filters[filterUri].push(quad);
delete quads[filterUri];
} else if (quads[subject]) {
quads[subject].push(quad);
const data = quads.get(quad.subject.value) ?? [];
data.push(quad);
filters.set(quad.subject.value, data);
quads.delete(quad.subject.value);
} else {
quads[subject] = [ quad ];
const data = quads.get(quad.subject.value);
if (data) {
data.push(quad);
} else {
quads.set(quad.subject.value, [ quad ]);
}
}
})
.on('end', () => resolve(new Map(Object.entries(filters))))
.on('end', () => resolve(filters))
.on('error', reject);
});
}
Expand All @@ -88,7 +84,7 @@ export class ActorRdfMetadataExtractMembershipFilter extends ActorRdfMetadataExt
* @param filters Membership filter data.
* @returns The parsed membership filters.
*/
private async parseFilters(
protected async parseFilters(
context: IActionContext,
filters: Map<string, RDF.Quad[]>,
): Promise<Map<RegExp, IMembershipFilter>> {
Expand All @@ -115,10 +111,6 @@ export interface IActorRdfMetadataExtractMembershipFilterArgs extends IActorRdfM
* RDF type IRIs of membership filters for detection from metadata stream
*/
membershipFilterTypes: string[];
/**
* Predicate IRIs that should point as membership filters
*/
membershipFilterPredicates: string[];
/**
* Mediator on the membership filter parse bus
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,30 @@ import type { IActorTest } from '@comunica/core';
import type * as RDF from '@rdfjs/types';
import type { IVoIDDescription } from './VoIDDescription';

const VOID = 'http://rdfs.org/ns/void#';
const VOID_TRIPLES = `${VOID}triples`;
const VOID_ENTITIES = `${VOID}entities`;
const VOID_CLASS = `${VOID}class`;
const VOID_CLASSES = `${VOID}classes`;
const VOID_PROPERTY = `${VOID}property`;
const VOID_PROPERTIES = `${VOID}properties`;
const VOID_INDATASET = `${VOID}inDataset`;
const VOID_URISPACE = `${VOID}uriSpace`;
const VOID_DATASET = `${VOID}Dataset`;
const VOID_DSUBJECTS = `${VOID}distinctSubjects`;
const VOID_DOBJECTS = `${VOID}distinctObjects`;
const VOID_PPARTITION = `${VOID}propertyPartition`;
const VOID_CPARTITION = `${VOID}classPartition`;
const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
const XSD_INTEGER = 'http://www.w3.org/2001/XMLSchema#integer';

/**
* An RDF Metadata Extract Actor that extracts dataset metadata from their VOID descriptions
*/
export class ActorRdfMetadataExtractVoIDDescription extends ActorRdfMetadataExtract {
public readonly mediatorDereferenceRdf: MediatorDereferenceRdf;
private readonly datasetSubjectRegex: RegExp;
private readonly shouldCompletePartialDescriptions: boolean;
protected readonly mediatorDereferenceRdf: MediatorDereferenceRdf;
protected readonly datasetSubjectRegex: RegExp;
protected readonly shouldCompletePartialDescriptions: boolean;

public static readonly VOID_PREFIX = 'http://rdfs.org/ns/void#';
public static readonly VOID_TRIPLES = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}triples`;
public static readonly VOID_ENTITIES = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}entities`;
public static readonly VOID_CLASS = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}class`;
public static readonly VOID_CLASSES = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}classes`;
public static readonly VOID_PROPERTY = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}property`;
public static readonly VOID_PROPERTIES = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}properties`;
public static readonly VOID_INDATASET = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}inDataset`;
public static readonly VOID_URISPACE = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}uriSpace`;
public static readonly VOID_DATASET = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}Dataset`;
public static readonly VOID_DSUBJECTS = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}distinctSubjects`;
public static readonly VOID_DOBJECTS = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}distinctObjects`;
public static readonly VOID_PPARTITION = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}propertyPartition`;
public static readonly VOID_CPARTITION = `${ActorRdfMetadataExtractVoIDDescription.VOID_PREFIX}classPartition`;
public static readonly RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
public static readonly XSD_INTEGER = 'http://www.w3.org/2001/XMLSchema#integer';

public constructor(args: IActorRdfMetadataExtractVoIDDescriptionArgs) {
super(args);
Expand Down Expand Up @@ -91,8 +91,8 @@ export class ActorRdfMetadataExtractVoIDDescription extends ActorRdfMetadataExtr
if (quad.subject.value in descriptions) {
descriptions[quad.subject.value].push(quad);
} else if (
quad.predicate.value === RDF_TYPE &&
quad.object.value === VOID_DATASET &&
quad.predicate.value === ActorRdfMetadataExtractVoIDDescription.RDF_TYPE &&
quad.object.value === ActorRdfMetadataExtractVoIDDescription.VOID_DATASET &&
this.datasetSubjectRegex.test(quad.subject.value)
) {
if (quad.subject.value in data) {
Expand All @@ -101,7 +101,7 @@ export class ActorRdfMetadataExtractVoIDDescription extends ActorRdfMetadataExtr
} else {
descriptions[quad.subject.value] = [ quad ];
}
} else if (quad.predicate.value === VOID_INDATASET) {
} else if (quad.predicate.value === ActorRdfMetadataExtractVoIDDescription.VOID_INDATASET) {
links.push(quad.object.value);
} else if (quad.subject.value in data) {
data[quad.subject.value].push(quad);
Expand All @@ -122,35 +122,38 @@ export class ActorRdfMetadataExtractVoIDDescription extends ActorRdfMetadataExtr
};
for (const quad of quads) {
switch (quad.predicate.value) {
case VOID_DSUBJECTS:
case ActorRdfMetadataExtractVoIDDescription.VOID_DSUBJECTS:
voidDescription.distinctSubjects = Number.parseInt(quad.object.value, 10);
break;
case VOID_DOBJECTS:
case ActorRdfMetadataExtractVoIDDescription.VOID_DOBJECTS:
voidDescription.distinctObjects = Number.parseInt(quad.object.value, 10);
break;
case VOID_TRIPLES:
case ActorRdfMetadataExtractVoIDDescription.VOID_TRIPLES:
voidDescription.triples = Number.parseInt(quad.object.value, 10);
break;
case VOID_URISPACE:
case ActorRdfMetadataExtractVoIDDescription.VOID_URISPACE:
voidDescription.uriSpace = quad.object.value;
break;
case VOID_PROPERTIES:
case ActorRdfMetadataExtractVoIDDescription.VOID_PROPERTIES:
voidDescription.properties = Number.parseInt(quad.object.value, 10);
break;
case VOID_CLASSES:
case ActorRdfMetadataExtractVoIDDescription.VOID_CLASSES:
voidDescription.classes = Number.parseInt(quad.object.value, 10);
break;
case VOID_CPARTITION:
case ActorRdfMetadataExtractVoIDDescription.VOID_CPARTITION:
if (quad.object.value in data) {
let partitionClass: string | undefined;
let partitionEntities: number | undefined;
for (const pq of data[quad.object.value]) {
if (pq.predicate.value === VOID_CLASS && pq.object.termType === 'NamedNode') {
if (
pq.predicate.value === ActorRdfMetadataExtractVoIDDescription.VOID_CLASS &&
pq.object.termType === 'NamedNode'
) {
partitionClass = pq.object.value;
} else if (
pq.predicate.value === VOID_ENTITIES &&
pq.predicate.value === ActorRdfMetadataExtractVoIDDescription.VOID_ENTITIES &&
pq.object.termType === 'Literal' &&
pq.object.datatype.value === XSD_INTEGER
pq.object.datatype.value === ActorRdfMetadataExtractVoIDDescription.XSD_INTEGER
) {
partitionEntities = Number.parseInt(pq.object.value, 10);
}
Expand All @@ -160,24 +163,30 @@ export class ActorRdfMetadataExtractVoIDDescription extends ActorRdfMetadataExtr
}
}
break;
case VOID_PPARTITION:
case ActorRdfMetadataExtractVoIDDescription.VOID_PPARTITION:
if (quad.object.value in data) {
let partitionProperty: string | undefined;
let partitionTriples: number | undefined;
let partitionDistinctSubjects: number | undefined;
let partitionDistinctObjects: number | undefined;
for (const pq of data[quad.object.value]) {
if (pq.object.termType === 'NamedNode' && pq.predicate.value === VOID_PROPERTY) {
if (
pq.object.termType === 'NamedNode' &&
pq.predicate.value === ActorRdfMetadataExtractVoIDDescription.VOID_PROPERTY
) {
partitionProperty = pq.object.value;
} else if (pq.object.termType === 'Literal' && pq.object.datatype.value === XSD_INTEGER) {
} else if (
pq.object.termType === 'Literal' &&
pq.object.datatype.value === ActorRdfMetadataExtractVoIDDescription.XSD_INTEGER
) {
switch (pq.predicate.value) {
case VOID_TRIPLES:
case ActorRdfMetadataExtractVoIDDescription.VOID_TRIPLES:
partitionTriples = Number.parseInt(pq.object.value, 10);
break;
case VOID_DSUBJECTS:
case ActorRdfMetadataExtractVoIDDescription.VOID_DSUBJECTS:
partitionDistinctSubjects = Number.parseInt(pq.object.value, 10);
break;
case VOID_DOBJECTS:
case ActorRdfMetadataExtractVoIDDescription.VOID_DOBJECTS:
partitionDistinctObjects = Number.parseInt(pq.object.value, 10);
break;
}
Expand Down
Loading

0 comments on commit f38ce7a

Please sign in to comment.