Skip to content

Commit

Permalink
Merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
pietercolpaert committed Apr 12, 2024
2 parents f2cec4d + c786d61 commit 41aa922
Show file tree
Hide file tree
Showing 16 changed files with 775 additions and 245 deletions.
8 changes: 4 additions & 4 deletions bin/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ let before: Date | undefined;
let paramPollInterval: number;
let urlIsView = false;
let noShape = false;
let shapeFiles: string[] | undefined;
let shapeFile: string | undefined;
let ordered: Ordered = "none";
let quiet: boolean = false;
let verbose: boolean = false;
Expand All @@ -39,7 +39,7 @@ program
"follow only relations including members before a certain point in time",
)
.option("--poll-interval <number>", "specify poll interval")
.option("--shape-files [shapeFiles...]", "specify a shapefile")
.option("--shape-file <shapeFile>", "specify a shapefile")
.option(
"--no-shape",
"don't extract members with a shape (only use cbd and named graphs)",
Expand Down Expand Up @@ -68,7 +68,7 @@ program
noShape = !program.shape;
save = program.save;
paramURL = url;
shapeFiles = program.shapeFiles;
shapeFile = program.shapeFile;
paramFollow = program.follow;
paramPollInterval = program.pollInterval;
ordered = program.ordered;
Expand Down Expand Up @@ -109,7 +109,7 @@ async function main() {
pollInterval: paramPollInterval,
fetcher: { maxFetched: 2, concurrentRequests: 10 },
urlIsView: urlIsView,
shapeFiles,
shapeFile,
onlyDefaultGraph,
after,
before,
Expand Down
Binary file modified bun.lockb
Binary file not shown.
147 changes: 55 additions & 92 deletions lib/client.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { Config, intoConfig, ShapeConfig } from "./config";
import { Config, intoConfig } from "./config";
import { Member } from "./page";
import rdfDereference, { RdfDereferencer } from "rdf-dereference";
import { FileStateFactory, NoStateFactory, State, StateFactory } from "./state";
import { CBDShapeExtractor } from "extract-cbd-shape";
import { RdfStore } from "rdf-stores";
import { DataFactory, Writer as NWriter } from "n3";
import { DataFactory } from "rdf-data-factory";
import { Writer as NWriter } from "n3";
import { Quad_Object, Term } from "@rdfjs/types";
import {
extractMainNodeShape,
Expand All @@ -13,20 +14,20 @@ import {
Notifier,
streamToArray,
} from "./utils";
import { LDES, SDS, SHACL, TREE } from "@treecg/types";
import { LDES, SDS, TREE } from "@treecg/types";
import { FetchedPage, Fetcher, longPromise, resetPromise } from "./pageFetcher";
import { Manager } from "./memberManager";
import { OrderedStrategy, StrategyEvents, UnorderedStrategy } from "./strategy";
import debug from "debug";
import type { Writer } from "@ajuvercr/js-runner";

export { intoConfig } from "./config";
export { retry_fetch } from "./utils";
export { retry_fetch, extractMainNodeShape } from "./utils";
export type { Member, Page, Relation } from "./page";
export type { Config, MediatorConfig, ShapeConfig } from "./config";

const log = debug("client");
const { namedNode, blankNode, quad } = DataFactory;
const df = new DataFactory();

type Controller = ReadableStreamDefaultController<Member>;

Expand All @@ -46,7 +47,7 @@ export function replicateLDES(
}

export type LDESInfo = {
shapeMap?: Map<string, Term>;
shape: Term;
extractor: CBDShapeExtractor;
timestampPath?: Term;
isVersionOfPath?: Term;
Expand All @@ -60,40 +61,20 @@ async function getInfo(
): Promise<LDESInfo> {
const logger = log.extend("getShape");

const shapeConfigStore = RdfStore.createDefault();
if (config.shapeFiles && config.shapeFiles.length > 0) {
config.shapes = [];

for (const shapeFile of config.shapeFiles) {
const tempShapeStore = RdfStore.createDefault();
const shapeId = shapeFile.startsWith("http")
? shapeFile
: "file://" + shapeFile;

const resp = await rdfDereference.dereference(shapeFile, {
localFiles: true,
fetch: config.fetch,
});
const quads = await streamToArray(resp.data);
// Add retrieved quads to local stores
quads.forEach((q) => {
tempShapeStore.addQuad(q);
shapeConfigStore.addQuad(q);
});
if (config.shapeFile) {
const shapeId = config.shapeFile.startsWith("http")
? config.shapeFile
: "file://" + config.shapeFile;

if (shapeId.startsWith("file://")) {
// We have to find the actual IRI/Blank Node of the main shape within the file
config.shapes.push({
quads,
shapeId: extractMainNodeShape(tempShapeStore),
});
} else {
config.shapes.push({
quads: quads,
shapeId: namedNode(shapeId),
});
}
}
const resp = await rdfDereference.dereference(config.shapeFile, {
localFiles: true,
fetch: config.fetch
});
const quads = await streamToArray(resp.data);
config.shape = {
quads: quads,
shapeId: df.namedNode(shapeId),
};
}

let shapeIds = config.noShape
Expand Down Expand Up @@ -134,7 +115,11 @@ async function getInfo(
timestampPaths.length,
isVersionOfPaths.length,
);
} catch (ex: any) {}
} catch (ex: any) { }
}

if (shapeIds.length > 1) {
console.error("Expected at most one shape id, found " + shapeIds.length);
}

if (timestampPaths.length > 1) {
Expand All @@ -149,49 +134,27 @@ async function getInfo(
);
}

// Create a map of shapes and member types
const shapeMap = new Map<string, Term>();

if (config.shapes) {
for (const shape of config.shapes) {
const memberType = getObjects(
shapeConfigStore,
shape.shapeId,
SHACL.terms.targetClass,
)[0];
if (memberType) {
shapeMap.set(memberType.value, shape.shapeId);
} else {
console.error(
"Ignoring SHACL shape without a declared sh:targetClass: ",
shape.shapeId,
);
}
const shapeConfigStore = RdfStore.createDefault();
if (config.shape) {
for (const quad of config.shape.quads) {
shapeConfigStore.addQuad(quad);
}
} else {
for (const shapeId of shapeIds) {
const memberType = getObjects(store, shapeId, SHACL.terms.targetClass)[0];
if (memberType) {
shapeMap.set(memberType.value, shapeId);
} else {
console.error(
"Ignoring SHACL shape without a declared sh:targetClass: ",
shapeId,
);
}
// Make sure the shapeId is as defined in the given shape file
if (config.shape.shapeId.value.startsWith("file://")) {
config.shape.shapeId = extractMainNodeShape(shapeConfigStore);
}
}

return {
extractor: new CBDShapeExtractor(
config.shapes && config.shapes.length > 0 ? shapeConfigStore : store,
config.shape ? shapeConfigStore : store,
dereferencer,
{
cbdDefaultGraph: config.onlyDefaultGraph,
fetch: config.fetch,
},
),
shapeMap: config.noShape ? undefined : shapeMap,
shape: config.shape ? config.shape.shapeId : shapeIds[0],
timestampPath: timestampPaths[0],
isVersionOfPath: isVersionOfPaths[0],
};
Expand Down Expand Up @@ -293,7 +256,7 @@ export class Client {
// TODO Choose a view
const viewQuads = root.data.getQuads(null, TREE.terms.view, null, null);

let ldesId: Term = namedNode(this.config.url);
let ldesId: Term = df.namedNode(this.config.url);
if (!this.config.urlIsView) {
if (viewQuads.length === 0) {
console.error(
Expand Down Expand Up @@ -381,22 +344,22 @@ export class Client {
this.strategy =
this.ordered !== "none"
? new OrderedStrategy(
this.memberManager,
this.fetcher,
notifier,
factory,
this.ordered,
this.config.polling,
this.config.pollInterval,
)
this.memberManager,
this.fetcher,
notifier,
factory,
this.ordered,
this.config.polling,
this.config.pollInterval,
)
: new UnorderedStrategy(
this.memberManager,
this.fetcher,
notifier,
factory,
this.config.polling,
this.config.pollInterval,
);
this.memberManager,
this.fetcher,
notifier,
factory,
this.config.polling,
this.config.pollInterval,
);

logger("Found %d views, choosing %s", viewQuads.length, ldesId.value);
this.strategy.start(ldesId.value);
Expand Down Expand Up @@ -464,7 +427,7 @@ export async function processor(
ordered?: string,
follow?: boolean,
pollInterval?: number,
shapes?: string[],
shape?: string,
noShape?: boolean,
save?: string,
loose?: boolean,
Expand All @@ -475,7 +438,7 @@ export async function processor(
intoConfig({
loose,
noShape,
shapeFiles: shapes,
shapeFile: shape,
polling: follow,
url: url,
after,
Expand Down Expand Up @@ -515,11 +478,11 @@ export async function processor(
}
}

const blank = blankNode();
const blank = df.blankNode();
const quads = el.value.quads.slice();
quads.push(
quad(blank, SDS.terms.stream, <Quad_Object>client.streamId!),
quad(blank, SDS.terms.payload, <Quad_Object>el.value.id!),
df.quad(blank, SDS.terms.stream, <Quad_Object>client.streamId!),
df.quad(blank, SDS.terms.payload, <Quad_Object>el.value.id!),
);

await writer.push(new NWriter().quadsToString(quads));
Expand Down
4 changes: 2 additions & 2 deletions lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ export interface Config {
fetcher: FetcherConfig;
before?: Date;
after?: Date;
shapes?: ShapeConfig[];
shapeFiles?: string[];
shape?: ShapeConfig;
shapeFile?: string;
onlyDefaultGraph?: boolean;
fetch?: typeof fetch;
basicAuth?: string;
Expand Down
35 changes: 3 additions & 32 deletions lib/memberManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export class Manager {

private state: Set<string>;
private extractor: CBDShapeExtractor;
private shapeMap?: Map<string, Term>;
private shapeId?: Term;

private timestampPath?: Term;
private isVersionOfPath?: Term;
Expand All @@ -47,7 +47,7 @@ export class Manager {
this.extractor = info.extractor;
this.timestampPath = info.timestampPath;
this.isVersionOfPath = info.isVersionOfPath;
this.shapeMap = info.shapeMap;
this.shapeId = info.shape;

logger("new %s %o", ldesId.value, info);
}
Expand All @@ -70,36 +70,7 @@ export class Manager {
member: Term,
data: RdfStore,
): Promise<Member | undefined> {
let quads: Quad[] = [];

if (this.shapeMap) {
if (this.shapeMap.size === 1) {
// Use the only shape available
quads = await this.extractor.extract(
data,
member,
Array.from(this.shapeMap.values())[0],
);
} else if (this.shapeMap.size > 1) {
// Find what is the proper shape for this member based on its rdf:type
const memberType = getObjects(data, member, RDF.terms.type)[0];
if (memberType) {
const shapeId = this.shapeMap.get(memberType.value);
if (shapeId) {
quads = await this.extractor.extract(data, member, shapeId);
}
} else {
// There is no rdf:type defined for this member. Fallback to CBD extraction
quads = await this.extractor.extract(data, member);
}
} else {
// Do a simple CBD extraction
quads = await this.extractor.extract(data, member);
}
} else {
// Do a simple CBD extraction
quads = await this.extractor.extract(data, member);
}
const quads = await this.extractor.extract(data, member, this.shapeId);

if (this.state.has(member.value)) {
return;
Expand Down
2 changes: 1 addition & 1 deletion lib/page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ export function extractRelations(
continue;
}
}
} else { /* No filters, everything is allowed*/ }
} else { /* No filters, everything is allowed */ }
}
}

Expand Down
Loading

0 comments on commit 41aa922

Please sign in to comment.