diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b34bffad5..9856cc893c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,25 @@ The following changes are pending, and will be applied on the next major release
### Patch changes
- `getThing` now supports Blank Node identifiers in addition to IRIs and skolems to refer to a subject.
+- `getThingAll(dataset, { allowacceptBlankNodes: true })` now returns all Blank Nodes
+ subjects in the Dataset, in particular including those part of a single chain of
+ predicates. For instance, given the following dataset:
+
+ ```
+ @prefix ex: .
+ @prefix foaf: .
+
+ ex:camille
+ foaf:knows [
+ foaf:name "Dominique"@en ;
+ ] .
+ ;
+ ```
+
+ `getThingAll(dataset, { allowacceptBlankNodes: true })` would have previously returned
+ a single element for the Named Node (`ex:camille`), it will now also include a second
+ element for the Blank Node. Blank Node identifiers are by definition unstable and shouldn't
+ be relied upon beyond local resolution.
## [2.0.1]
diff --git a/package-lock.json b/package-lock.json
index e3fa5c6d2b..7ae7f30f95 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9220,9 +9220,9 @@
}
},
"node_modules/typedoc": {
- "version": "0.25.12",
- "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.12.tgz",
- "integrity": "sha512-F+qhkK2VoTweDXd1c42GS/By2DvI2uDF4/EpG424dTexSHdtCH52C6IcAvMA6jR3DzAWZjHpUOW+E02kyPNUNw==",
+ "version": "0.25.13",
+ "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.13.tgz",
+ "integrity": "sha512-pQqiwiJ+Z4pigfOnnysObszLiU3mVLWAExSPf+Mu06G/qsc3wzbuM56SZQvONhHLncLUhYzOVkjFFpFfL5AzhQ==",
"dev": true,
"dependencies": {
"lunr": "^2.3.9",
@@ -9277,9 +9277,9 @@
}
},
"node_modules/typescript": {
- "version": "5.4.3",
- "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.3.tgz",
- "integrity": "sha512-KrPd3PKaCLr78MalgiwJnA25Nm8HAmdwN3mYUYZgG/wizIo9EainNVQI9/yDavtVFRN2h3k8uf3GLHuhDMgEHg==",
+ "version": "5.4.4",
+ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.4.tgz",
+ "integrity": "sha512-dGE2Vv8cpVvw28v8HCPqyb08EzbBURxDpuhJvTrusShUfGnhHBafDsLdS1EhhxyL6BJQE+2cT3dDPAv+MQ6oLw==",
"dev": true,
"bin": {
"tsc": "bin/tsc",
@@ -9318,9 +9318,9 @@
}
},
"node_modules/undici": {
- "version": "5.28.3",
- "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz",
- "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==",
+ "version": "5.28.4",
+ "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz",
+ "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==",
"dev": true,
"dependencies": {
"@fastify/busboy": "^2.0.0"
@@ -16692,9 +16692,9 @@
}
},
"typedoc": {
- "version": "0.25.12",
- "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.12.tgz",
- "integrity": "sha512-F+qhkK2VoTweDXd1c42GS/By2DvI2uDF4/EpG424dTexSHdtCH52C6IcAvMA6jR3DzAWZjHpUOW+E02kyPNUNw==",
+ "version": "0.25.13",
+ "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.13.tgz",
+ "integrity": "sha512-pQqiwiJ+Z4pigfOnnysObszLiU3mVLWAExSPf+Mu06G/qsc3wzbuM56SZQvONhHLncLUhYzOVkjFFpFfL5AzhQ==",
"dev": true,
"requires": {
"lunr": "^2.3.9",
@@ -16733,9 +16733,9 @@
}
},
"typescript": {
- "version": "5.4.3",
- "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.3.tgz",
- "integrity": "sha512-KrPd3PKaCLr78MalgiwJnA25Nm8HAmdwN3mYUYZgG/wizIo9EainNVQI9/yDavtVFRN2h3k8uf3GLHuhDMgEHg==",
+ "version": "5.4.4",
+ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.4.tgz",
+ "integrity": "sha512-dGE2Vv8cpVvw28v8HCPqyb08EzbBURxDpuhJvTrusShUfGnhHBafDsLdS1EhhxyL6BJQE+2cT3dDPAv+MQ6oLw==",
"dev": true
},
"uglify-js": {
@@ -16758,9 +16758,9 @@
}
},
"undici": {
- "version": "5.28.3",
- "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz",
- "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==",
+ "version": "5.28.4",
+ "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz",
+ "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==",
"dev": true,
"requires": {
"@fastify/busboy": "^2.0.0"
diff --git a/src/formats/solidDatasetAsTurtle.test.ts b/src/formats/solidDatasetAsTurtle.test.ts
index 6ab47e894e..05c90d3d17 100644
--- a/src/formats/solidDatasetAsTurtle.test.ts
+++ b/src/formats/solidDatasetAsTurtle.test.ts
@@ -33,10 +33,11 @@ async function getDataset(ttl: string): Promise {
}
const ttl = `
- prefix : <#>
- prefix ex:
- prefix foaf:
- prefix vcard:
+ @prefix : <#> .
+ @prefix ex: .
+ @prefix foaf: .
+ @prefix vcard: .
+ @base .
<>
a foaf:PersonalProfileDocument ;
diff --git a/src/rdf.test.ts b/src/rdf.test.ts
index 8050f5f545..b4ec342194 100644
--- a/src/rdf.test.ts
+++ b/src/rdf.test.ts
@@ -38,9 +38,11 @@ import {
serializeInteger,
xmlSchemaTypes,
} from "./datatypes";
-import type { ImmutableDataset } from "./rdf.internal";
+import { isBlankNodeId, type ImmutableDataset } from "./rdf.internal";
import { addRdfJsQuadToDataset } from "./rdfjs.internal";
import { fromRdfJsDataset, toRdfJsDataset } from "./rdfjs";
+import { asUrl, getThing, getThingAll } from "./thing/thing";
+import { getTermAll } from "./thing/get";
describe("fromRdfJsDataset", () => {
const fcNamedNode = fc
@@ -212,7 +214,7 @@ describe("fromRdfJsDataset", () => {
expect(fromRdfJsDataset(rdfJsDataset)).toStrictEqual({
type: "Dataset",
graphs: {
- default: {
+ default: expect.objectContaining({
[subject1IriString]: {
url: subject1IriString,
type: "Subject",
@@ -231,41 +233,35 @@ describe("fromRdfJsDataset", () => {
},
},
},
- },
- [acrGraphIriString]: {
+ }),
+ [acrGraphIriString]: expect.objectContaining({
[subject2IriString]: {
url: subject2IriString,
type: "Subject",
predicates: {
[predicate1IriString]: {
blankNodes: [
- {
- [predicate1IriString]: {
- literals: {
- [xmlSchemaTypes.string]: [literalStringValue],
- },
- },
- },
- {
- [predicate1IriString]: {
- literals: {
- [xmlSchemaTypes.string]: [literalStringValue],
- [xmlSchemaTypes.integer]: [literalIntegerValue],
- },
- },
- [predicate2IriString]: {
- literals: {
- [xmlSchemaTypes.integer]: [literalIntegerValue],
- },
- },
- },
+ expect.stringMatching(/_:/),
+ expect.stringMatching(/_:/),
],
},
},
},
- },
+ }),
},
});
+ const subjectsExcludingBlankNodes = getThingAll(
+ fromRdfJsDataset(rdfJsDataset),
+ { scope: acrGraphIriString },
+ );
+ const subjectsIncludingBlankNodes = getThingAll(
+ fromRdfJsDataset(rdfJsDataset),
+ { scope: acrGraphIriString, acceptBlankNodes: true },
+ );
+ // There should be two blank nodes in the resulting dataset.
+ expect(
+ subjectsIncludingBlankNodes.length - subjectsExcludingBlankNodes.length,
+ ).toBe(2);
});
it("can represent lists", () => {
@@ -453,104 +449,6 @@ describe("fromRdfJsDataset", () => {
);
});
- it("throws an error when passed unknown Predicate types with chain Blank Node Subjects", () => {
- const mockDataset: ImmutableDataset = {
- type: "Dataset",
- graphs: { default: {} },
- };
- const chainBlankNode = DF.blankNode();
- const otherQuad = DF.quad(
- DF.namedNode("https://arbitrary.subject"),
- DF.namedNode("https://arbitrary.predicate"),
- chainBlankNode,
- DF.defaultGraph(),
- );
- const mockQuad = DF.quad(
- chainBlankNode,
- { termType: "Unknown term type" } as any,
- DF.namedNode("https://arbitrary.object"),
- DF.defaultGraph(),
- );
- expect(() =>
- addRdfJsQuadToDataset(mockDataset, otherQuad, {
- chainBlankNodes: [chainBlankNode],
- otherQuads: [mockQuad],
- }),
- ).toThrow(
- "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.",
- );
- });
-
- it("throws an error when passed unknown Predicate types in connecting Quads for chain Blank Node Objects", () => {
- const mockDataset: ImmutableDataset = {
- type: "Dataset",
- graphs: { default: {} },
- };
- const chainBlankNode1 = DF.blankNode();
- const chainBlankNode2 = DF.blankNode();
- const otherQuad = DF.quad(
- DF.namedNode("https://arbitrary.subject"),
- DF.namedNode("https://arbitrary.predicate"),
- chainBlankNode1,
- DF.defaultGraph(),
- );
- const inBetweenQuad = DF.quad(
- chainBlankNode1,
- { termType: "Unknown term type" } as any,
- chainBlankNode2,
- DF.defaultGraph(),
- );
- const mockQuad = DF.quad(
- chainBlankNode2,
- DF.namedNode("https://arbitrary.predicate"),
- DF.namedNode("https://arbitrary.object"),
- DF.defaultGraph(),
- );
- expect(() =>
- addRdfJsQuadToDataset(mockDataset, otherQuad, {
- chainBlankNodes: [chainBlankNode1, chainBlankNode2],
- otherQuads: [mockQuad, inBetweenQuad],
- }),
- ).toThrow(
- "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.",
- );
- });
-
- it("throws an error when passed unknown Predicate types in the terminating Quads for chain Blank Node Objects", () => {
- const mockDataset: ImmutableDataset = {
- type: "Dataset",
- graphs: { default: {} },
- };
- const chainBlankNode1 = DF.blankNode();
- const chainBlankNode2 = DF.blankNode();
- const otherQuad = DF.quad(
- DF.namedNode("https://arbitrary.subject"),
- DF.namedNode("https://arbitrary.predicate"),
- chainBlankNode1,
- DF.defaultGraph(),
- );
- const inBetweenQuad = DF.quad(
- chainBlankNode1,
- DF.namedNode("https://arbitrary.predicate"),
- chainBlankNode2,
- DF.defaultGraph(),
- );
- const mockQuad = DF.quad(
- chainBlankNode2,
- { termType: "Unknown term type" } as any,
- DF.namedNode("https://arbitrary.object"),
- DF.defaultGraph(),
- );
- expect(() =>
- addRdfJsQuadToDataset(mockDataset, otherQuad, {
- chainBlankNodes: [chainBlankNode1, chainBlankNode2],
- otherQuads: [mockQuad, inBetweenQuad],
- }),
- ).toThrow(
- "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.",
- );
- });
-
it("throws an error when passed unknown Object types", () => {
const mockDataset: ImmutableDataset = {
type: "Dataset",
@@ -586,33 +484,36 @@ describe("fromRdfJsDataset", () => {
DF.defaultGraph(),
);
- const updatedDataset = addRdfJsQuadToDataset(mockDataset, otherQuad, {
- chainBlankNodes: [chainBlankNode1],
- otherQuads: [mockQuad],
- });
+ const updatedDataset = [mockQuad, otherQuad].reduce(
+ addRdfJsQuadToDataset,
+ mockDataset,
+ );
- expect(updatedDataset).toStrictEqual({
- graphs: {
- default: {
- "https://some.subject": {
- predicates: {
- "https://some.predicate/1": {
- blankNodes: [
- {
- "https://some.predicate/2": {
- blankNodes: ["_:some-blank-node"],
- },
- },
- ],
- },
- },
- type: "Subject",
- url: "https://some.subject",
- },
- },
- },
- type: "Dataset",
+ // There should be one blank node subject.
+ expect(
+ getThingAll(updatedDataset, { acceptBlankNodes: false }),
+ ).toHaveLength(1);
+ expect(
+ getThingAll(updatedDataset, { acceptBlankNodes: true }),
+ ).toHaveLength(2);
+
+ // The blank nodes should be linked
+ const blankNodes = getThingAll(updatedDataset, {
+ acceptBlankNodes: true,
+ }).filter((thing) => isBlankNodeId(asUrl(thing)));
+ let bnAreLinked = false;
+ blankNodes.forEach((bn) => {
+ const candidateObjects = getTermAll(bn, "https://some.predicate/2");
+ bnAreLinked ||=
+ candidateObjects.length > 0 &&
+ candidateObjects.some((obj) => obj.termType === "BlankNode");
});
+
+ // The named node should be linked to a blank node
+ getTermAll(
+ getThing(updatedDataset, "https://some.subject")!,
+ "https://some.predicate/1",
+ ).some((term) => term.termType === "BlankNode");
});
it("can parse chained Blank Nodes that end in a dangling Blank Node", () => {
@@ -640,40 +541,42 @@ describe("fromRdfJsDataset", () => {
DF.blankNode("some-blank-node"),
DF.defaultGraph(),
);
+ const updatedDataset = [mockQuad, inBetweenQuad, otherQuad].reduce(
+ addRdfJsQuadToDataset,
+ mockDataset,
+ );
- const updatedDataset = addRdfJsQuadToDataset(mockDataset, otherQuad, {
- chainBlankNodes: [chainBlankNode1, chainBlankNode2],
- otherQuads: [mockQuad, inBetweenQuad],
- });
-
- expect(updatedDataset).toStrictEqual({
- graphs: {
- default: {
- "https://some.subject": {
- predicates: {
- "https://some.predicate/1": {
- blankNodes: [
- {
- "https://some.predicate/2": {
- blankNodes: [
- {
- "https://some.predicate/3": {
- blankNodes: ["_:some-blank-node"],
- },
- },
- ],
- },
- },
- ],
- },
- },
- type: "Subject",
- url: "https://some.subject",
- },
- },
- },
- type: "Dataset",
- });
+ // There should be 2 blank node subjects
+ expect(
+ getThingAll(updatedDataset, { acceptBlankNodes: false }),
+ ).toHaveLength(1);
+ expect(
+ getThingAll(updatedDataset, { acceptBlankNodes: true }),
+ ).toHaveLength(3);
+
+ // The blank nodes subjects and the blank node object should be linked.
+ const blankNodes = getThingAll(updatedDataset, {
+ acceptBlankNodes: true,
+ }).filter((thing) => isBlankNodeId(asUrl(thing)));
+ // Count the number of links between blank nodes,
+ // based on known predicates.
+ const bnLinks = blankNodes.reduce(
+ (prev, cur) =>
+ prev +
+ [
+ ...getTermAll(cur, "https://some.predicate/2"),
+ ...getTermAll(cur, "https://some.predicate/3"),
+ ].filter((obj) => obj.termType === "BlankNode").length,
+ 0,
+ );
+ // There should be a chain of links between blank nodes.
+ expect(bnLinks).toBe(2);
+
+ // The named node should be linked to a blank node.
+ getTermAll(
+ getThing(updatedDataset, "https://some.subject")!,
+ "https://some.predicate/1",
+ ).some((term) => term.termType === "BlankNode");
});
});
});
diff --git a/src/rdfjs.internal.ts b/src/rdfjs.internal.ts
index 1c3a1dc4f7..a9ccc15c71 100644
--- a/src/rdfjs.internal.ts
+++ b/src/rdfjs.internal.ts
@@ -22,7 +22,6 @@
import { DataFactory } from "n3";
import type * as RdfJs from "@rdfjs/types";
import type {
- BlankNodeId,
Graph,
ImmutableDataset,
Objects,
@@ -41,15 +40,9 @@ import { xmlSchemaTypes } from "./datatypes";
export { DataFactory };
-type QuadParseOptions = Partial<{
- otherQuads: RdfJs.Quad[];
- chainBlankNodes: RdfJs.BlankNode[];
-}>;
-
export function addRdfJsQuadToDataset(
dataset: ImmutableDataset,
quad: RdfJs.Quad,
- quadParseOptions: QuadParseOptions = {},
): ImmutableDataset {
const supportedGraphTypes: Array = [
"NamedNode",
@@ -68,16 +61,12 @@ export function addRdfJsQuadToDataset(
...dataset,
graphs: freeze({
...dataset.graphs,
- [graphId]: addRdfJsQuadToGraph(graph, quad, quadParseOptions),
+ [graphId]: addRdfJsQuadToGraph(graph, quad),
}),
});
}
-function addRdfJsQuadToGraph(
- graph: Graph,
- quad: RdfJs.Quad,
- quadParseOptions: QuadParseOptions,
-): Graph {
+function addRdfJsQuadToGraph(graph: Graph, quad: RdfJs.Quad): Graph {
const supportedSubjectTypes: Array = [
"NamedNode",
"BlankNode",
@@ -100,29 +89,20 @@ function addRdfJsQuadToGraph(
};
return freeze({
...graph,
- [subjectIri]: addRdfJsQuadToSubject(subject, quad, quadParseOptions),
+ [subjectIri]: addRdfJsQuadToSubject(subject, quad),
});
}
-function addRdfJsQuadToSubject(
- subject: Subject,
- quad: RdfJs.Quad,
- quadParseOptions: QuadParseOptions,
-): Subject {
+function addRdfJsQuadToSubject(subject: Subject, quad: RdfJs.Quad): Subject {
return freeze({
...subject,
- predicates: addRdfJsQuadToPredicates(
- subject.predicates,
- quad,
- quadParseOptions,
- ),
+ predicates: addRdfJsQuadToPredicates(subject.predicates, quad),
});
}
function addRdfJsQuadToPredicates(
predicates: Predicates,
quad: RdfJs.Quad,
- quadParseOptions: QuadParseOptions,
): Predicates {
const supportedPredicateTypes: Array = [
"NamedNode",
@@ -136,15 +116,11 @@ function addRdfJsQuadToPredicates(
const objects = predicates[predicateIri] ?? {};
return freeze({
...predicates,
- [predicateIri]: addRdfJsQuadToObjects(objects, quad, quadParseOptions),
+ [predicateIri]: addRdfJsQuadToObjects(objects, quad),
});
}
-function addRdfJsQuadToObjects(
- objects: Objects,
- quad: RdfJs.Quad,
- quadParseOptions: QuadParseOptions,
-): Objects {
+function addRdfJsQuadToObjects(objects: Objects, quad: RdfJs.Quad): Objects {
if (quad.object.termType === "NamedNode") {
const namedNodes = freeze([
...(objects.namedNodes ?? []),
@@ -189,13 +165,9 @@ function addRdfJsQuadToObjects(
}
if (quad.object.termType === "BlankNode") {
- const blankNodePredicates = getPredicatesForBlankNode(
- quad.object,
- quadParseOptions,
- );
const blankNodes = freeze([
...(objects.blankNodes ?? []),
- blankNodePredicates,
+ getBlankNodeId(quad.object),
]);
return freeze({
...objects,
@@ -208,149 +180,6 @@ function addRdfJsQuadToObjects(
);
}
-function getPredicatesForBlankNode(
- node: RdfJs.BlankNode,
- quadParseOptions: QuadParseOptions,
-): Predicates | BlankNodeId {
- const chainBlankNodes = quadParseOptions.chainBlankNodes ?? [];
- if (
- chainBlankNodes.find((chainBlankNode) => chainBlankNode.equals(node)) ===
- undefined
- ) {
- // If this Blank Node is not used to provide nested values for another Subject,
- // just return its identifier.
- // That identifier will also be listed among the Subjects in the Graph.
- return getBlankNodeId(node);
- }
-
- /* istanbul ignore next: If there are chain nodes, there will always be other Quads, so the `?? []` can't be reached: */
- const quads = quadParseOptions.otherQuads ?? [];
- const quadsWithNodeAsSubject = quads.filter((quad) =>
- quad.subject.equals(node),
- );
-
- // First add the Quads with regular Objects
- const predicates = quadsWithNodeAsSubject
- .filter((quad) => !isBlankNode(quad.object))
- .reduce((predicatesAcc, quad) => {
- const supportedPredicateTypes: Array = [
- "NamedNode",
- ];
- if (!supportedPredicateTypes.includes(quad.predicate.termType)) {
- throw new Error(
- `Cannot parse Quads with nodes of type [${quad.predicate.termType}] as their Predicate node.`,
- );
- }
- const objects: Objects = predicatesAcc[quad.predicate.value] ?? {};
- return freeze({
- ...predicatesAcc,
- [quad.predicate.value]: addRdfJsQuadToObjects(
- objects,
- quad,
- quadParseOptions,
- ),
- });
- }, {} as Predicates);
-
- // And then also add the Quads that have another Blank Node as the Object
- // in addition to the Blank Node `node` as the Subject:
- const blankNodeObjectQuads = quadsWithNodeAsSubject.filter((quad) =>
- isBlankNode(quad.object),
- );
- return blankNodeObjectQuads.reduce((predicatesAcc, quad) => {
- const supportedPredicateTypes: Array = [
- "NamedNode",
- ];
- if (!supportedPredicateTypes.includes(quad.predicate.termType)) {
- throw new Error(
- `Cannot parse Quads with nodes of type [${quad.predicate.termType}] as their Predicate node.`,
- );
- }
- /* istanbul ignore next: The `?? {}` doesn't get hit; presumably it's initialised above. */
- const objects: Objects = predicatesAcc[quad.predicate.value] ?? {};
- /* istanbul ignore next: The `?? []` doesn't get hit; presumably it's initialised above. */
- const blankNodes = objects.blankNodes ?? [];
- return freeze({
- ...predicatesAcc,
- // The BlankNode assertions are valid because we filtered on BlankNodes above:
- [quad.predicate.value]: {
- ...objects,
- blankNodes: [
- ...blankNodes,
- getPredicatesForBlankNode(
- quad.object as RdfJs.BlankNode,
- quadParseOptions,
- ),
- ],
- },
- });
- }, predicates);
-}
-
-/**
- * Given an array of Quads, returns all Blank Nodes that are used in a single chain of Nodes.
- *
- * This allows you to obtain which Blank Nodes are involved in e.g. RDF lists.
- * This is useful because those can be represented as nested data that will have
- * a deterministic structure, whereas a representation of Blank Nodes that
- * create a cycle or are re-used will need ad-hoc, non-deterministic identifiers
- * to allow for representation without inifinite nesting.
- */
-export function getChainBlankNodes(quads: RdfJs.Quad[]): RdfJs.BlankNode[] {
- // All Blank Nodes that occur in Subject position:
- const blankNodeSubjects = quads
- .map((quad) => quad.subject)
- .filter(isBlankNode);
- // All Blank Nodes that occur in Object position:
- const blankNodeObjects = quads.map((quad) => quad.object).filter(isBlankNode);
- // Makes sure that all given Nodes are the same,
- // which will be used to verify that a set of Quads all have the same Subject:
- function everyNodeTheSame(nodes: RdfJs.Term[]): boolean {
- // This could potentially be made more performant by mapping every term
- // to their value and using native JS comparisons, assuming every node is
- // either a Blank or a Named Node.
- return nodes.every((otherNode) =>
- nodes.every((anotherNode) => otherNode.equals(anotherNode)),
- );
- }
-
- // Get all Blank Nodes that are part of a cycle in the graph:
- const cycleBlankNodes: RdfJs.BlankNode[] = [];
- blankNodeObjects.forEach((blankNodeObject) => {
- cycleBlankNodes.push(...getCycleBlankNodes(blankNodeObject, quads));
- });
-
- // Get Blank Nodes that are used to provide nested values for a single Subject,
- // which we'll represent as nested values as well
- // (this allows us to avoid generating a non-deterministic, ad-hoc identifier
- // for those Blank Nodes).
- // We'll do this by taking all Blank Nodes in the given Quads...
- const chainBlankNodes = blankNodeSubjects
- .concat(blankNodeObjects)
- .filter((blankNode) => {
- // ....removing those Blank Nodes that are part of a cycle...
- if (
- cycleBlankNodes.some((cycleBlankNode) =>
- cycleBlankNode.equals(blankNode),
- )
- ) {
- return false;
- }
- // ...and then returning only those Blank Nodes that only occur in the
- // Object position for a single Subject, i.e. that are part of a single
- // chain:
- const subjectsWithThisNodeAsObject = quads
- .filter((quad) => quad.object.equals(blankNode))
- .map((quad) => quad.subject);
- return (
- subjectsWithThisNodeAsObject.length > 0 &&
- everyNodeTheSame(subjectsWithThisNodeAsObject)
- );
- });
-
- return chainBlankNodes;
-}
-
export function toRdfJsQuads(
dataset: ImmutableDataset,
options: ToRdfJsOptions = {},
@@ -474,58 +303,3 @@ export function subjectToRdfJsQuads(
return quads;
}
-
-/**
- * A recursive function that finds all Blank Nodes in an array of Quads that create a cycle in the graph.
- *
- * This function will traverse the graph starting from `currentNode`, keeping
- * track of all the Blank Nodes it encounters twice while doing so, and
- * returning those.
- */
-function getCycleBlankNodes(
- currentNode: RdfJs.BlankNode,
- quads: RdfJs.Quad[],
- traversedBlankNodes: RdfJs.BlankNode[] = [],
-): RdfJs.BlankNode[] {
- // If we've encountered `currentNode` before, all the Blank Nodes we've
- // encountered so far are part of a cycle. Return those.
- if (
- traversedBlankNodes.find((traversedBlankNode) =>
- traversedBlankNode.equals(currentNode),
- ) !== undefined
- ) {
- return traversedBlankNodes;
- }
-
- // Find all Blank Nodes that are connected to `currentNode`:
- const blankNodeObjects = quads
- .filter(
- (quad) => quad.subject.equals(currentNode) && isBlankNode(quad.object),
- )
- .map((quad) => quad.object as RdfJs.BlankNode);
-
- // If no Blank Nodes are connected to `currentNode`, and `currentNode` is not
- // part of a cycle, we're done; the currently traversed Nodes do not form a
- // cycle:
- if (blankNodeObjects.length === 0) {
- return [];
- }
-
- // Store that we've traversed `currentNode`, then move on to all the Blank
- // Nodes connected to it, which will then take up the role of `currentNode`:
- const nextTraversedNodes = [...traversedBlankNodes, currentNode];
- const cycleBlankNodeArrays = blankNodeObjects.map((nextNode) =>
- getCycleBlankNodes(nextNode, quads, nextTraversedNodes),
- );
- // Collect all the cycle Blank Nodes found in those traverals,
- // then return them:
- const allCycleBlankNodes: RdfJs.BlankNode[] = [];
- for (const cycleBlankNodes of cycleBlankNodeArrays) {
- allCycleBlankNodes.push(...cycleBlankNodes);
- }
- return allCycleBlankNodes;
-}
-
-function isBlankNode(term: RdfJs.Term): term is RdfJs.BlankNode {
- return term.termType === "BlankNode";
-}
diff --git a/src/rdfjs.ts b/src/rdfjs.ts
index 02d4093f11..be6ecaf73b 100644
--- a/src/rdfjs.ts
+++ b/src/rdfjs.ts
@@ -40,11 +40,7 @@ import type {
DatasetCoreFactory,
} from "@rdfjs/types";
import { rdfJsDataset, type ImmutableDataset } from "./rdf.internal";
-import {
- addRdfJsQuadToDataset,
- getChainBlankNodes,
- toRdfJsQuads,
-} from "./rdfjs.internal";
+import { addRdfJsQuadToDataset, toRdfJsQuads } from "./rdfjs.internal";
/**
* Convert an RDF/JS Dataset into a [[SolidDataset]]
@@ -58,31 +54,14 @@ import {
* @returns A [[SolidDataset]] containing the same data as the given RDF/JS Dataset.
* @since 1.9.0
*/
-export function fromRdfJsDataset(rdfJsDataset: DatasetCore): ImmutableDataset {
- const dataset: ImmutableDataset = {
+export function fromRdfJsDataset(dataset: DatasetCore): ImmutableDataset {
+ const solidDataset: ImmutableDataset = {
graphs: { default: {} },
type: "Dataset",
};
-
- const quads = Array.from(rdfJsDataset);
-
- const chainBlankNodes = getChainBlankNodes(quads);
-
- // Quads with chain Blank Nodes as their Subject will be parsed when those
- // Blank Nodes are referred to in an Object. See `addRdfJsQuadToObjects`.
- const quadsWithoutChainBlankNodeSubjects = quads.filter((quad) =>
- chainBlankNodes.every(
- (chainBlankNode) => !chainBlankNode.equals(quad.subject),
- ),
- );
-
- return quadsWithoutChainBlankNodeSubjects.reduce(
- (datasetAcc, quad) =>
- addRdfJsQuadToDataset(datasetAcc, quad, {
- otherQuads: quads,
- chainBlankNodes,
- }),
- dataset,
+ return Array.from(dataset).reduce(
+ (datasetAcc, quad) => addRdfJsQuadToDataset(datasetAcc, quad),
+ solidDataset,
);
}
diff --git a/src/resource/__snapshots__/solidDataset.test.ts.snap b/src/resource/__snapshots__/solidDataset.test.ts.snap
index 0d28879089..b349d4eaa3 100644
--- a/src/resource/__snapshots__/solidDataset.test.ts.snap
+++ b/src/resource/__snapshots__/solidDataset.test.ts.snap
@@ -105,7 +105,7 @@ exports[`getWellKnownSolid returns the contents of .well-known/solid for the giv
{
"graphs": {
"default": {
- "_:n3-2007": {
+ "_:n3-3": {
"predicates": {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
"namedNodes": [
@@ -143,7 +143,7 @@ exports[`getWellKnownSolid returns the contents of .well-known/solid for the giv
},
},
"type": "Subject",
- "url": "_:n3-2007",
+ "url": "_:n3-3",
},
},
},
diff --git a/src/resource/solidDataset.test.ts b/src/resource/solidDataset.test.ts
index 94af2c8ce1..7226422db8 100644
--- a/src/resource/solidDataset.test.ts
+++ b/src/resource/solidDataset.test.ts
@@ -133,6 +133,7 @@ describe("createSolidDataset", () => {
describe("responseToSolidDataset", () => {
it("returns a SolidDataset representing the fetched Turtle", async () => {
const turtle = `
+ @base .
@prefix : <#>.
@prefix foaf: .
@prefix vcard: .
@@ -154,99 +155,22 @@ describe("responseToSolidDataset", () => {
"https://some.pod/resource",
);
const solidDataset = await responseToSolidDataset(response);
-
expect(solidDataset).toEqual(
expect.objectContaining({
graphs: {
default: {
- "https://some.pod/resource": {
- predicates: {
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
- namedNodes: [
- "http://xmlns.com/foaf/0.1/PersonalProfileDocument",
- ],
- },
- "http://xmlns.com/foaf/0.1/maker": {
- namedNodes: ["https://some.pod/resource#me"],
- },
- "http://xmlns.com/foaf/0.1/primaryTopic": {
- namedNodes: ["https://some.pod/resource#me"],
- },
- },
+ // The blank node identifier is by definition unstable.
+ // If this test starts failing, it may be due to the
+ // identifier changing, which is not forbidden.
+ "_:n3-0": {
type: "Subject",
- url: "https://some.pod/resource",
- },
- "https://some.pod/resource#me": {
+ url: "_:n3-0",
predicates: {
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
- namedNodes: ["http://xmlns.com/foaf/0.1/Person"],
- },
- "http://www.w3.org/2006/vcard/ns#fn": {
- blankNodes: [
- {
- "https://some.pod/resource#predicate": {
- namedNodes: ["for://a.blank/node"],
- },
- },
- ],
- literals: {
- "http://www.w3.org/2001/XMLSchema#string": ["Vincent"],
- },
+ "https://some.pod/resource#predicate": {
+ namedNodes: ["for://a.blank/node"],
},
},
- type: "Subject",
- url: "https://some.pod/resource#me",
},
- },
- },
- internal_resourceInfo: {
- contentType: "text/turtle",
- isRawData: false,
- linkedResources: {},
- sourceIri: "https://some.pod/resource",
- },
- type: "Dataset",
- }),
- );
- });
-
- it("does not include non-deterministic identifiers when it detects non-cyclic chains of Blank Nodes", async () => {
- const turtle = `
- @prefix : <#>.
- @prefix foaf: .
- @prefix vcard: .
- @prefix acl: .
-
- <> a foaf:PersonalProfileDocument; foaf:maker :me; foaf:primaryTopic :me.
-
- :me
- a foaf:Person;
- vcard:fn "Vincent";
- acl:trustedApp
- [
- acl:mode acl:Append, acl:Control, acl:Read, acl:Write;
- acl:origin
- ],
- [
- acl:mode acl:Append, acl:Control, acl:Read, acl:Write;
- acl:origin
- ].
- `;
-
- const response = new Response(turtle, {
- headers: {
- "Content-Type": "text/turtle",
- },
- });
- jest
- .spyOn(response, "url", "get")
- .mockReturnValue("https://some.pod/resource");
- const solidDataset = await responseToSolidDataset(response);
-
- expect(solidDataset).toEqual(
- expect.objectContaining({
- graphs: {
- default: {
"https://some.pod/resource": {
predicates: {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
@@ -270,40 +194,12 @@ describe("responseToSolidDataset", () => {
namedNodes: ["http://xmlns.com/foaf/0.1/Person"],
},
"http://www.w3.org/2006/vcard/ns#fn": {
+ // Here, the blank node identifier isn't referenced explicitly for resiliency.
+ blankNodes: [expect.stringMatching(/^_:/)],
literals: {
"http://www.w3.org/2001/XMLSchema#string": ["Vincent"],
},
},
- "http://www.w3.org/ns/auth/acl#trustedApp": {
- blankNodes: [
- {
- "http://www.w3.org/ns/auth/acl#mode": {
- namedNodes: [
- "http://www.w3.org/ns/auth/acl#Append",
- "http://www.w3.org/ns/auth/acl#Control",
- "http://www.w3.org/ns/auth/acl#Read",
- "http://www.w3.org/ns/auth/acl#Write",
- ],
- },
- "http://www.w3.org/ns/auth/acl#origin": {
- namedNodes: ["http://localhost:3000"],
- },
- },
- {
- "http://www.w3.org/ns/auth/acl#mode": {
- namedNodes: [
- "http://www.w3.org/ns/auth/acl#Append",
- "http://www.w3.org/ns/auth/acl#Control",
- "http://www.w3.org/ns/auth/acl#Read",
- "http://www.w3.org/ns/auth/acl#Write",
- ],
- },
- "http://www.w3.org/ns/auth/acl#origin": {
- namedNodes: ["https://penny.vincenttunru.com"],
- },
- },
- ],
- },
},
type: "Subject",
url: "https://some.pod/resource#me",
@@ -321,53 +217,6 @@ describe("responseToSolidDataset", () => {
);
});
- it("does not attempt to detect chains when there are many Blank Nodes, to avoid performance bottlenecks", async () => {
- function getChainedBlankNode(iteration: number): string {
- if (iteration === 1000) {
- return ` "Base case"`;
- }
- return ` [${getChainedBlankNode(
- iteration + 1,
- )}]`;
- }
- const turtle = `
- @prefix : <#>.
- @prefix vcard: .
-
- :me vcard:fn [${getChainedBlankNode(0)}].
- `;
-
- // This test uses constructs native to Node 16.
- const t0 = performance.now();
- await responseToSolidDataset(
- mockResponse(turtle, {
- headers: {
- "Content-Type": "text/turtle",
- },
- }),
- );
- const t1 = performance.now();
-
- // Parsing a document with over 1000 statements will always be somewhat slow
- // (hence allowing it to take 1.5 seconds), but if it attempts to detect
- // chains, it will take on the order of >20 seconds.
- // eslint-disable-next-line jest/no-conditional-expect
- expect(t1 - t0).toBeLessThan(1500);
-
- const solidDataset = await responseToSolidDataset(
- mockResponse(turtle, {
- headers: {
- "Content-Type": "text/turtle",
- },
- }),
- );
- // Blank Nodes should be listed explicitly, rather than as properties on
- // https://some.pod/resource#me:
- expect(Object.keys(solidDataset.graphs.default)).not.toStrictEqual([
- "https://some.pod/resource#me",
- ]);
- });
-
it("throws a meaningful error when the server returned a 403", async () => {
const response = new Response("Not allowed", {
status: 403,
diff --git a/src/resource/solidDataset.ts b/src/resource/solidDataset.ts
index 15150d44ae..83c7dfd1f8 100644
--- a/src/resource/solidDataset.ts
+++ b/src/resource/solidDataset.ts
@@ -19,13 +19,9 @@
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
-import type { Quad, NamedNode, Quad_Object } from "@rdfjs/types";
-import {
- addRdfJsQuadToDataset,
- DataFactory,
- getChainBlankNodes,
- toRdfJsQuads,
-} from "../rdfjs.internal";
+import type { Quad, NamedNode, Quad_Object, DatasetCore } from "@rdfjs/types";
+import { Store as N3Store } from "n3";
+import { DataFactory, toRdfJsQuads } from "../rdfjs.internal";
import { ldp, pim } from "../constants";
import { getJsonLdParser } from "../formats/jsonLd";
import { triplesToTurtle, getTurtleParser } from "../formats/turtle";
@@ -65,6 +61,7 @@ import {
import { getIriAll } from "../thing/get";
import { normalizeServerSideIri } from "./iri.internal";
import { freeze, getLocalNodeName, isLocalNodeIri } from "../rdf.internal";
+import { fromRdfJsDataset } from "../rdfjs";
/**
* Initialise a new [[SolidDataset]] in memory.
@@ -156,6 +153,7 @@ type ContentType = string;
export type ParseOptions = {
parsers: Record;
};
+
/**
* @hidden This interface is not exposed yet until we've tried it out in practice.
*/
@@ -200,81 +198,31 @@ export async function responseToSolidDataset(
}
const data = await response.text();
- const parsingPromise = new Promise(
- (resolve, reject) => {
- let solidDataset: SolidDataset = freeze({
- graphs: freeze({ default: freeze({}) }),
- type: "Dataset",
- });
-
- // While Quads without Blank Nodes can be added to the SolidDataset as we
- // encounter them, to parse Quads with Blank Nodes, we'll have to wait until
- // we've seen all the Quads, so that we can reconcile equal Blank Nodes.
- const quadsWithBlankNodes: Quad[] = [];
- const allQuads: Quad[] = [];
-
- parser.onError((error) => {
- reject(
- new Error(
- `Encountered an error parsing the Resource at [${getSourceUrl(
- resourceInfo,
- )}] with content type [${contentType}]: ${error}`,
- ),
- );
- });
- parser.onQuad((quad) => {
- allQuads.push(quad);
- if (
- quad.subject.termType === "BlankNode" ||
- quad.object.termType === "BlankNode"
- ) {
- // Quads with Blank Nodes will be parsed when all Quads are known,
- // so that equal Blank Nodes can be reconciled:
- quadsWithBlankNodes.push(quad);
- } else {
- solidDataset = addRdfJsQuadToDataset(solidDataset, quad);
- }
- });
- parser.onComplete(async () => {
- // If a Resource contains more than this number of Blank Nodes,
- // we consider the detection of chains (O(n^2), I think) to be too
- // expensive, and just incorporate them as regular Blank Nodes with
- // non-deterministic, ad-hoc identifiers into the SolidDataset:
- const maxBlankNodesToDetectChainsFor = 20;
- // Some Blank Nodes only serve to use a set of Quads as the Object for a
- // single Subject. Those Quads will be added to the SolidDataset when
- // their Subject's Blank Node is encountered in the Object position.
- const chainBlankNodes =
- quadsWithBlankNodes.length <= maxBlankNodesToDetectChainsFor
- ? getChainBlankNodes(quadsWithBlankNodes)
- : [];
- const quadsWithoutChainBlankNodeSubjects = quadsWithBlankNodes.filter(
- (quad) =>
- chainBlankNodes.every(
- (chainBlankNode) => !chainBlankNode.equals(quad.subject),
- ),
- );
- solidDataset = quadsWithoutChainBlankNodeSubjects.reduce(
- (datasetAcc, quad) =>
- addRdfJsQuadToDataset(datasetAcc, quad, {
- otherQuads: allQuads,
- chainBlankNodes,
- }),
- solidDataset,
- );
- const solidDatasetWithResourceInfo: SolidDataset &
- WithServerResourceInfo = freeze({
- ...solidDataset,
- ...resourceInfo,
- });
- resolve(solidDatasetWithResourceInfo);
- });
-
- parser.parse(data, resourceInfo);
- },
- );
+ const rdfjsDataset = await new Promise((resolve, reject) => {
+ const store = new N3Store();
+ parser.onError((error) => {
+ reject(
+ new Error(
+ `Encountered an error parsing the Resource at [${getSourceUrl(
+ resourceInfo,
+ )}] with content type [${contentType}]: ${error}`,
+ ),
+ );
+ });
+ parser.onQuad((quad) => {
+ store.add(quad);
+ });
+ parser.onComplete(() => {
+ resolve(store);
+ });
- return parsingPromise;
+ parser.parse(data, resourceInfo);
+ });
+ const solidDataset: SolidDataset = freeze(fromRdfJsDataset(rdfjsDataset));
+ return freeze({
+ ...solidDataset,
+ ...resourceInfo,
+ });
}
/**