diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b34bffad5..9856cc893c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,25 @@ The following changes are pending, and will be applied on the next major release ### Patch changes - `getThing` now supports Blank Node identifiers in addition to IRIs and skolems to refer to a subject. +- `getThingAll(dataset, { allowacceptBlankNodes: true })` now returns all Blank Nodes + subjects in the Dataset, in particular including those part of a single chain of + predicates. For instance, given the following dataset: + + ``` + @prefix ex: . + @prefix foaf: . + + ex:camille + foaf:knows [ + foaf:name "Dominique"@en ; + ] . + ; + ``` + + `getThingAll(dataset, { allowacceptBlankNodes: true })` would have previously returned + a single element for the Named Node (`ex:camille`), it will now also include a second + element for the Blank Node. Blank Node identifiers are by definition unstable and shouldn't + be relied upon beyond local resolution. ## [2.0.1] diff --git a/package-lock.json b/package-lock.json index e3fa5c6d2b..7ae7f30f95 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9220,9 +9220,9 @@ } }, "node_modules/typedoc": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.12.tgz", - "integrity": "sha512-F+qhkK2VoTweDXd1c42GS/By2DvI2uDF4/EpG424dTexSHdtCH52C6IcAvMA6jR3DzAWZjHpUOW+E02kyPNUNw==", + "version": "0.25.13", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.13.tgz", + "integrity": "sha512-pQqiwiJ+Z4pigfOnnysObszLiU3mVLWAExSPf+Mu06G/qsc3wzbuM56SZQvONhHLncLUhYzOVkjFFpFfL5AzhQ==", "dev": true, "dependencies": { "lunr": "^2.3.9", @@ -9277,9 +9277,9 @@ } }, "node_modules/typescript": { - "version": "5.4.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.3.tgz", - "integrity": "sha512-KrPd3PKaCLr78MalgiwJnA25Nm8HAmdwN3mYUYZgG/wizIo9EainNVQI9/yDavtVFRN2h3k8uf3GLHuhDMgEHg==", + "version": "5.4.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.4.tgz", + "integrity": "sha512-dGE2Vv8cpVvw28v8HCPqyb08EzbBURxDpuhJvTrusShUfGnhHBafDsLdS1EhhxyL6BJQE+2cT3dDPAv+MQ6oLw==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -9318,9 +9318,9 @@ } }, "node_modules/undici": { - "version": "5.28.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz", - "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==", + "version": "5.28.4", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", + "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", "dev": true, "dependencies": { "@fastify/busboy": "^2.0.0" @@ -16692,9 +16692,9 @@ } }, "typedoc": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.12.tgz", - "integrity": "sha512-F+qhkK2VoTweDXd1c42GS/By2DvI2uDF4/EpG424dTexSHdtCH52C6IcAvMA6jR3DzAWZjHpUOW+E02kyPNUNw==", + "version": "0.25.13", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.25.13.tgz", + "integrity": "sha512-pQqiwiJ+Z4pigfOnnysObszLiU3mVLWAExSPf+Mu06G/qsc3wzbuM56SZQvONhHLncLUhYzOVkjFFpFfL5AzhQ==", "dev": true, "requires": { "lunr": "^2.3.9", @@ -16733,9 +16733,9 @@ } }, "typescript": { - "version": "5.4.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.3.tgz", - "integrity": "sha512-KrPd3PKaCLr78MalgiwJnA25Nm8HAmdwN3mYUYZgG/wizIo9EainNVQI9/yDavtVFRN2h3k8uf3GLHuhDMgEHg==", + "version": "5.4.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.4.tgz", + "integrity": "sha512-dGE2Vv8cpVvw28v8HCPqyb08EzbBURxDpuhJvTrusShUfGnhHBafDsLdS1EhhxyL6BJQE+2cT3dDPAv+MQ6oLw==", "dev": true }, "uglify-js": { @@ -16758,9 +16758,9 @@ } }, "undici": { - "version": "5.28.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz", - "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==", + "version": "5.28.4", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", + "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", "dev": true, "requires": { "@fastify/busboy": "^2.0.0" diff --git a/src/formats/solidDatasetAsTurtle.test.ts b/src/formats/solidDatasetAsTurtle.test.ts index 6ab47e894e..05c90d3d17 100644 --- a/src/formats/solidDatasetAsTurtle.test.ts +++ b/src/formats/solidDatasetAsTurtle.test.ts @@ -33,10 +33,11 @@ async function getDataset(ttl: string): Promise { } const ttl = ` - prefix : <#> - prefix ex: - prefix foaf: - prefix vcard: + @prefix : <#> . + @prefix ex: . + @prefix foaf: . + @prefix vcard: . + @base . <> a foaf:PersonalProfileDocument ; diff --git a/src/rdf.test.ts b/src/rdf.test.ts index 8050f5f545..b4ec342194 100644 --- a/src/rdf.test.ts +++ b/src/rdf.test.ts @@ -38,9 +38,11 @@ import { serializeInteger, xmlSchemaTypes, } from "./datatypes"; -import type { ImmutableDataset } from "./rdf.internal"; +import { isBlankNodeId, type ImmutableDataset } from "./rdf.internal"; import { addRdfJsQuadToDataset } from "./rdfjs.internal"; import { fromRdfJsDataset, toRdfJsDataset } from "./rdfjs"; +import { asUrl, getThing, getThingAll } from "./thing/thing"; +import { getTermAll } from "./thing/get"; describe("fromRdfJsDataset", () => { const fcNamedNode = fc @@ -212,7 +214,7 @@ describe("fromRdfJsDataset", () => { expect(fromRdfJsDataset(rdfJsDataset)).toStrictEqual({ type: "Dataset", graphs: { - default: { + default: expect.objectContaining({ [subject1IriString]: { url: subject1IriString, type: "Subject", @@ -231,41 +233,35 @@ describe("fromRdfJsDataset", () => { }, }, }, - }, - [acrGraphIriString]: { + }), + [acrGraphIriString]: expect.objectContaining({ [subject2IriString]: { url: subject2IriString, type: "Subject", predicates: { [predicate1IriString]: { blankNodes: [ - { - [predicate1IriString]: { - literals: { - [xmlSchemaTypes.string]: [literalStringValue], - }, - }, - }, - { - [predicate1IriString]: { - literals: { - [xmlSchemaTypes.string]: [literalStringValue], - [xmlSchemaTypes.integer]: [literalIntegerValue], - }, - }, - [predicate2IriString]: { - literals: { - [xmlSchemaTypes.integer]: [literalIntegerValue], - }, - }, - }, + expect.stringMatching(/_:/), + expect.stringMatching(/_:/), ], }, }, }, - }, + }), }, }); + const subjectsExcludingBlankNodes = getThingAll( + fromRdfJsDataset(rdfJsDataset), + { scope: acrGraphIriString }, + ); + const subjectsIncludingBlankNodes = getThingAll( + fromRdfJsDataset(rdfJsDataset), + { scope: acrGraphIriString, acceptBlankNodes: true }, + ); + // There should be two blank nodes in the resulting dataset. + expect( + subjectsIncludingBlankNodes.length - subjectsExcludingBlankNodes.length, + ).toBe(2); }); it("can represent lists", () => { @@ -453,104 +449,6 @@ describe("fromRdfJsDataset", () => { ); }); - it("throws an error when passed unknown Predicate types with chain Blank Node Subjects", () => { - const mockDataset: ImmutableDataset = { - type: "Dataset", - graphs: { default: {} }, - }; - const chainBlankNode = DF.blankNode(); - const otherQuad = DF.quad( - DF.namedNode("https://arbitrary.subject"), - DF.namedNode("https://arbitrary.predicate"), - chainBlankNode, - DF.defaultGraph(), - ); - const mockQuad = DF.quad( - chainBlankNode, - { termType: "Unknown term type" } as any, - DF.namedNode("https://arbitrary.object"), - DF.defaultGraph(), - ); - expect(() => - addRdfJsQuadToDataset(mockDataset, otherQuad, { - chainBlankNodes: [chainBlankNode], - otherQuads: [mockQuad], - }), - ).toThrow( - "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.", - ); - }); - - it("throws an error when passed unknown Predicate types in connecting Quads for chain Blank Node Objects", () => { - const mockDataset: ImmutableDataset = { - type: "Dataset", - graphs: { default: {} }, - }; - const chainBlankNode1 = DF.blankNode(); - const chainBlankNode2 = DF.blankNode(); - const otherQuad = DF.quad( - DF.namedNode("https://arbitrary.subject"), - DF.namedNode("https://arbitrary.predicate"), - chainBlankNode1, - DF.defaultGraph(), - ); - const inBetweenQuad = DF.quad( - chainBlankNode1, - { termType: "Unknown term type" } as any, - chainBlankNode2, - DF.defaultGraph(), - ); - const mockQuad = DF.quad( - chainBlankNode2, - DF.namedNode("https://arbitrary.predicate"), - DF.namedNode("https://arbitrary.object"), - DF.defaultGraph(), - ); - expect(() => - addRdfJsQuadToDataset(mockDataset, otherQuad, { - chainBlankNodes: [chainBlankNode1, chainBlankNode2], - otherQuads: [mockQuad, inBetweenQuad], - }), - ).toThrow( - "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.", - ); - }); - - it("throws an error when passed unknown Predicate types in the terminating Quads for chain Blank Node Objects", () => { - const mockDataset: ImmutableDataset = { - type: "Dataset", - graphs: { default: {} }, - }; - const chainBlankNode1 = DF.blankNode(); - const chainBlankNode2 = DF.blankNode(); - const otherQuad = DF.quad( - DF.namedNode("https://arbitrary.subject"), - DF.namedNode("https://arbitrary.predicate"), - chainBlankNode1, - DF.defaultGraph(), - ); - const inBetweenQuad = DF.quad( - chainBlankNode1, - DF.namedNode("https://arbitrary.predicate"), - chainBlankNode2, - DF.defaultGraph(), - ); - const mockQuad = DF.quad( - chainBlankNode2, - { termType: "Unknown term type" } as any, - DF.namedNode("https://arbitrary.object"), - DF.defaultGraph(), - ); - expect(() => - addRdfJsQuadToDataset(mockDataset, otherQuad, { - chainBlankNodes: [chainBlankNode1, chainBlankNode2], - otherQuads: [mockQuad, inBetweenQuad], - }), - ).toThrow( - "Cannot parse Quads with nodes of type [Unknown term type] as their Predicate node.", - ); - }); - it("throws an error when passed unknown Object types", () => { const mockDataset: ImmutableDataset = { type: "Dataset", @@ -586,33 +484,36 @@ describe("fromRdfJsDataset", () => { DF.defaultGraph(), ); - const updatedDataset = addRdfJsQuadToDataset(mockDataset, otherQuad, { - chainBlankNodes: [chainBlankNode1], - otherQuads: [mockQuad], - }); + const updatedDataset = [mockQuad, otherQuad].reduce( + addRdfJsQuadToDataset, + mockDataset, + ); - expect(updatedDataset).toStrictEqual({ - graphs: { - default: { - "https://some.subject": { - predicates: { - "https://some.predicate/1": { - blankNodes: [ - { - "https://some.predicate/2": { - blankNodes: ["_:some-blank-node"], - }, - }, - ], - }, - }, - type: "Subject", - url: "https://some.subject", - }, - }, - }, - type: "Dataset", + // There should be one blank node subject. + expect( + getThingAll(updatedDataset, { acceptBlankNodes: false }), + ).toHaveLength(1); + expect( + getThingAll(updatedDataset, { acceptBlankNodes: true }), + ).toHaveLength(2); + + // The blank nodes should be linked + const blankNodes = getThingAll(updatedDataset, { + acceptBlankNodes: true, + }).filter((thing) => isBlankNodeId(asUrl(thing))); + let bnAreLinked = false; + blankNodes.forEach((bn) => { + const candidateObjects = getTermAll(bn, "https://some.predicate/2"); + bnAreLinked ||= + candidateObjects.length > 0 && + candidateObjects.some((obj) => obj.termType === "BlankNode"); }); + + // The named node should be linked to a blank node + getTermAll( + getThing(updatedDataset, "https://some.subject")!, + "https://some.predicate/1", + ).some((term) => term.termType === "BlankNode"); }); it("can parse chained Blank Nodes that end in a dangling Blank Node", () => { @@ -640,40 +541,42 @@ describe("fromRdfJsDataset", () => { DF.blankNode("some-blank-node"), DF.defaultGraph(), ); + const updatedDataset = [mockQuad, inBetweenQuad, otherQuad].reduce( + addRdfJsQuadToDataset, + mockDataset, + ); - const updatedDataset = addRdfJsQuadToDataset(mockDataset, otherQuad, { - chainBlankNodes: [chainBlankNode1, chainBlankNode2], - otherQuads: [mockQuad, inBetweenQuad], - }); - - expect(updatedDataset).toStrictEqual({ - graphs: { - default: { - "https://some.subject": { - predicates: { - "https://some.predicate/1": { - blankNodes: [ - { - "https://some.predicate/2": { - blankNodes: [ - { - "https://some.predicate/3": { - blankNodes: ["_:some-blank-node"], - }, - }, - ], - }, - }, - ], - }, - }, - type: "Subject", - url: "https://some.subject", - }, - }, - }, - type: "Dataset", - }); + // There should be 2 blank node subjects + expect( + getThingAll(updatedDataset, { acceptBlankNodes: false }), + ).toHaveLength(1); + expect( + getThingAll(updatedDataset, { acceptBlankNodes: true }), + ).toHaveLength(3); + + // The blank nodes subjects and the blank node object should be linked. + const blankNodes = getThingAll(updatedDataset, { + acceptBlankNodes: true, + }).filter((thing) => isBlankNodeId(asUrl(thing))); + // Count the number of links between blank nodes, + // based on known predicates. + const bnLinks = blankNodes.reduce( + (prev, cur) => + prev + + [ + ...getTermAll(cur, "https://some.predicate/2"), + ...getTermAll(cur, "https://some.predicate/3"), + ].filter((obj) => obj.termType === "BlankNode").length, + 0, + ); + // There should be a chain of links between blank nodes. + expect(bnLinks).toBe(2); + + // The named node should be linked to a blank node. + getTermAll( + getThing(updatedDataset, "https://some.subject")!, + "https://some.predicate/1", + ).some((term) => term.termType === "BlankNode"); }); }); }); diff --git a/src/rdfjs.internal.ts b/src/rdfjs.internal.ts index 1c3a1dc4f7..a9ccc15c71 100644 --- a/src/rdfjs.internal.ts +++ b/src/rdfjs.internal.ts @@ -22,7 +22,6 @@ import { DataFactory } from "n3"; import type * as RdfJs from "@rdfjs/types"; import type { - BlankNodeId, Graph, ImmutableDataset, Objects, @@ -41,15 +40,9 @@ import { xmlSchemaTypes } from "./datatypes"; export { DataFactory }; -type QuadParseOptions = Partial<{ - otherQuads: RdfJs.Quad[]; - chainBlankNodes: RdfJs.BlankNode[]; -}>; - export function addRdfJsQuadToDataset( dataset: ImmutableDataset, quad: RdfJs.Quad, - quadParseOptions: QuadParseOptions = {}, ): ImmutableDataset { const supportedGraphTypes: Array = [ "NamedNode", @@ -68,16 +61,12 @@ export function addRdfJsQuadToDataset( ...dataset, graphs: freeze({ ...dataset.graphs, - [graphId]: addRdfJsQuadToGraph(graph, quad, quadParseOptions), + [graphId]: addRdfJsQuadToGraph(graph, quad), }), }); } -function addRdfJsQuadToGraph( - graph: Graph, - quad: RdfJs.Quad, - quadParseOptions: QuadParseOptions, -): Graph { +function addRdfJsQuadToGraph(graph: Graph, quad: RdfJs.Quad): Graph { const supportedSubjectTypes: Array = [ "NamedNode", "BlankNode", @@ -100,29 +89,20 @@ function addRdfJsQuadToGraph( }; return freeze({ ...graph, - [subjectIri]: addRdfJsQuadToSubject(subject, quad, quadParseOptions), + [subjectIri]: addRdfJsQuadToSubject(subject, quad), }); } -function addRdfJsQuadToSubject( - subject: Subject, - quad: RdfJs.Quad, - quadParseOptions: QuadParseOptions, -): Subject { +function addRdfJsQuadToSubject(subject: Subject, quad: RdfJs.Quad): Subject { return freeze({ ...subject, - predicates: addRdfJsQuadToPredicates( - subject.predicates, - quad, - quadParseOptions, - ), + predicates: addRdfJsQuadToPredicates(subject.predicates, quad), }); } function addRdfJsQuadToPredicates( predicates: Predicates, quad: RdfJs.Quad, - quadParseOptions: QuadParseOptions, ): Predicates { const supportedPredicateTypes: Array = [ "NamedNode", @@ -136,15 +116,11 @@ function addRdfJsQuadToPredicates( const objects = predicates[predicateIri] ?? {}; return freeze({ ...predicates, - [predicateIri]: addRdfJsQuadToObjects(objects, quad, quadParseOptions), + [predicateIri]: addRdfJsQuadToObjects(objects, quad), }); } -function addRdfJsQuadToObjects( - objects: Objects, - quad: RdfJs.Quad, - quadParseOptions: QuadParseOptions, -): Objects { +function addRdfJsQuadToObjects(objects: Objects, quad: RdfJs.Quad): Objects { if (quad.object.termType === "NamedNode") { const namedNodes = freeze([ ...(objects.namedNodes ?? []), @@ -189,13 +165,9 @@ function addRdfJsQuadToObjects( } if (quad.object.termType === "BlankNode") { - const blankNodePredicates = getPredicatesForBlankNode( - quad.object, - quadParseOptions, - ); const blankNodes = freeze([ ...(objects.blankNodes ?? []), - blankNodePredicates, + getBlankNodeId(quad.object), ]); return freeze({ ...objects, @@ -208,149 +180,6 @@ function addRdfJsQuadToObjects( ); } -function getPredicatesForBlankNode( - node: RdfJs.BlankNode, - quadParseOptions: QuadParseOptions, -): Predicates | BlankNodeId { - const chainBlankNodes = quadParseOptions.chainBlankNodes ?? []; - if ( - chainBlankNodes.find((chainBlankNode) => chainBlankNode.equals(node)) === - undefined - ) { - // If this Blank Node is not used to provide nested values for another Subject, - // just return its identifier. - // That identifier will also be listed among the Subjects in the Graph. - return getBlankNodeId(node); - } - - /* istanbul ignore next: If there are chain nodes, there will always be other Quads, so the `?? []` can't be reached: */ - const quads = quadParseOptions.otherQuads ?? []; - const quadsWithNodeAsSubject = quads.filter((quad) => - quad.subject.equals(node), - ); - - // First add the Quads with regular Objects - const predicates = quadsWithNodeAsSubject - .filter((quad) => !isBlankNode(quad.object)) - .reduce((predicatesAcc, quad) => { - const supportedPredicateTypes: Array = [ - "NamedNode", - ]; - if (!supportedPredicateTypes.includes(quad.predicate.termType)) { - throw new Error( - `Cannot parse Quads with nodes of type [${quad.predicate.termType}] as their Predicate node.`, - ); - } - const objects: Objects = predicatesAcc[quad.predicate.value] ?? {}; - return freeze({ - ...predicatesAcc, - [quad.predicate.value]: addRdfJsQuadToObjects( - objects, - quad, - quadParseOptions, - ), - }); - }, {} as Predicates); - - // And then also add the Quads that have another Blank Node as the Object - // in addition to the Blank Node `node` as the Subject: - const blankNodeObjectQuads = quadsWithNodeAsSubject.filter((quad) => - isBlankNode(quad.object), - ); - return blankNodeObjectQuads.reduce((predicatesAcc, quad) => { - const supportedPredicateTypes: Array = [ - "NamedNode", - ]; - if (!supportedPredicateTypes.includes(quad.predicate.termType)) { - throw new Error( - `Cannot parse Quads with nodes of type [${quad.predicate.termType}] as their Predicate node.`, - ); - } - /* istanbul ignore next: The `?? {}` doesn't get hit; presumably it's initialised above. */ - const objects: Objects = predicatesAcc[quad.predicate.value] ?? {}; - /* istanbul ignore next: The `?? []` doesn't get hit; presumably it's initialised above. */ - const blankNodes = objects.blankNodes ?? []; - return freeze({ - ...predicatesAcc, - // The BlankNode assertions are valid because we filtered on BlankNodes above: - [quad.predicate.value]: { - ...objects, - blankNodes: [ - ...blankNodes, - getPredicatesForBlankNode( - quad.object as RdfJs.BlankNode, - quadParseOptions, - ), - ], - }, - }); - }, predicates); -} - -/** - * Given an array of Quads, returns all Blank Nodes that are used in a single chain of Nodes. - * - * This allows you to obtain which Blank Nodes are involved in e.g. RDF lists. - * This is useful because those can be represented as nested data that will have - * a deterministic structure, whereas a representation of Blank Nodes that - * create a cycle or are re-used will need ad-hoc, non-deterministic identifiers - * to allow for representation without inifinite nesting. - */ -export function getChainBlankNodes(quads: RdfJs.Quad[]): RdfJs.BlankNode[] { - // All Blank Nodes that occur in Subject position: - const blankNodeSubjects = quads - .map((quad) => quad.subject) - .filter(isBlankNode); - // All Blank Nodes that occur in Object position: - const blankNodeObjects = quads.map((quad) => quad.object).filter(isBlankNode); - // Makes sure that all given Nodes are the same, - // which will be used to verify that a set of Quads all have the same Subject: - function everyNodeTheSame(nodes: RdfJs.Term[]): boolean { - // This could potentially be made more performant by mapping every term - // to their value and using native JS comparisons, assuming every node is - // either a Blank or a Named Node. - return nodes.every((otherNode) => - nodes.every((anotherNode) => otherNode.equals(anotherNode)), - ); - } - - // Get all Blank Nodes that are part of a cycle in the graph: - const cycleBlankNodes: RdfJs.BlankNode[] = []; - blankNodeObjects.forEach((blankNodeObject) => { - cycleBlankNodes.push(...getCycleBlankNodes(blankNodeObject, quads)); - }); - - // Get Blank Nodes that are used to provide nested values for a single Subject, - // which we'll represent as nested values as well - // (this allows us to avoid generating a non-deterministic, ad-hoc identifier - // for those Blank Nodes). - // We'll do this by taking all Blank Nodes in the given Quads... - const chainBlankNodes = blankNodeSubjects - .concat(blankNodeObjects) - .filter((blankNode) => { - // ....removing those Blank Nodes that are part of a cycle... - if ( - cycleBlankNodes.some((cycleBlankNode) => - cycleBlankNode.equals(blankNode), - ) - ) { - return false; - } - // ...and then returning only those Blank Nodes that only occur in the - // Object position for a single Subject, i.e. that are part of a single - // chain: - const subjectsWithThisNodeAsObject = quads - .filter((quad) => quad.object.equals(blankNode)) - .map((quad) => quad.subject); - return ( - subjectsWithThisNodeAsObject.length > 0 && - everyNodeTheSame(subjectsWithThisNodeAsObject) - ); - }); - - return chainBlankNodes; -} - export function toRdfJsQuads( dataset: ImmutableDataset, options: ToRdfJsOptions = {}, @@ -474,58 +303,3 @@ export function subjectToRdfJsQuads( return quads; } - -/** - * A recursive function that finds all Blank Nodes in an array of Quads that create a cycle in the graph. - * - * This function will traverse the graph starting from `currentNode`, keeping - * track of all the Blank Nodes it encounters twice while doing so, and - * returning those. - */ -function getCycleBlankNodes( - currentNode: RdfJs.BlankNode, - quads: RdfJs.Quad[], - traversedBlankNodes: RdfJs.BlankNode[] = [], -): RdfJs.BlankNode[] { - // If we've encountered `currentNode` before, all the Blank Nodes we've - // encountered so far are part of a cycle. Return those. - if ( - traversedBlankNodes.find((traversedBlankNode) => - traversedBlankNode.equals(currentNode), - ) !== undefined - ) { - return traversedBlankNodes; - } - - // Find all Blank Nodes that are connected to `currentNode`: - const blankNodeObjects = quads - .filter( - (quad) => quad.subject.equals(currentNode) && isBlankNode(quad.object), - ) - .map((quad) => quad.object as RdfJs.BlankNode); - - // If no Blank Nodes are connected to `currentNode`, and `currentNode` is not - // part of a cycle, we're done; the currently traversed Nodes do not form a - // cycle: - if (blankNodeObjects.length === 0) { - return []; - } - - // Store that we've traversed `currentNode`, then move on to all the Blank - // Nodes connected to it, which will then take up the role of `currentNode`: - const nextTraversedNodes = [...traversedBlankNodes, currentNode]; - const cycleBlankNodeArrays = blankNodeObjects.map((nextNode) => - getCycleBlankNodes(nextNode, quads, nextTraversedNodes), - ); - // Collect all the cycle Blank Nodes found in those traverals, - // then return them: - const allCycleBlankNodes: RdfJs.BlankNode[] = []; - for (const cycleBlankNodes of cycleBlankNodeArrays) { - allCycleBlankNodes.push(...cycleBlankNodes); - } - return allCycleBlankNodes; -} - -function isBlankNode(term: RdfJs.Term): term is RdfJs.BlankNode { - return term.termType === "BlankNode"; -} diff --git a/src/rdfjs.ts b/src/rdfjs.ts index 02d4093f11..be6ecaf73b 100644 --- a/src/rdfjs.ts +++ b/src/rdfjs.ts @@ -40,11 +40,7 @@ import type { DatasetCoreFactory, } from "@rdfjs/types"; import { rdfJsDataset, type ImmutableDataset } from "./rdf.internal"; -import { - addRdfJsQuadToDataset, - getChainBlankNodes, - toRdfJsQuads, -} from "./rdfjs.internal"; +import { addRdfJsQuadToDataset, toRdfJsQuads } from "./rdfjs.internal"; /** * Convert an RDF/JS Dataset into a [[SolidDataset]] @@ -58,31 +54,14 @@ import { * @returns A [[SolidDataset]] containing the same data as the given RDF/JS Dataset. * @since 1.9.0 */ -export function fromRdfJsDataset(rdfJsDataset: DatasetCore): ImmutableDataset { - const dataset: ImmutableDataset = { +export function fromRdfJsDataset(dataset: DatasetCore): ImmutableDataset { + const solidDataset: ImmutableDataset = { graphs: { default: {} }, type: "Dataset", }; - - const quads = Array.from(rdfJsDataset); - - const chainBlankNodes = getChainBlankNodes(quads); - - // Quads with chain Blank Nodes as their Subject will be parsed when those - // Blank Nodes are referred to in an Object. See `addRdfJsQuadToObjects`. - const quadsWithoutChainBlankNodeSubjects = quads.filter((quad) => - chainBlankNodes.every( - (chainBlankNode) => !chainBlankNode.equals(quad.subject), - ), - ); - - return quadsWithoutChainBlankNodeSubjects.reduce( - (datasetAcc, quad) => - addRdfJsQuadToDataset(datasetAcc, quad, { - otherQuads: quads, - chainBlankNodes, - }), - dataset, + return Array.from(dataset).reduce( + (datasetAcc, quad) => addRdfJsQuadToDataset(datasetAcc, quad), + solidDataset, ); } diff --git a/src/resource/__snapshots__/solidDataset.test.ts.snap b/src/resource/__snapshots__/solidDataset.test.ts.snap index 0d28879089..b349d4eaa3 100644 --- a/src/resource/__snapshots__/solidDataset.test.ts.snap +++ b/src/resource/__snapshots__/solidDataset.test.ts.snap @@ -105,7 +105,7 @@ exports[`getWellKnownSolid returns the contents of .well-known/solid for the giv { "graphs": { "default": { - "_:n3-2007": { + "_:n3-3": { "predicates": { "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { "namedNodes": [ @@ -143,7 +143,7 @@ exports[`getWellKnownSolid returns the contents of .well-known/solid for the giv }, }, "type": "Subject", - "url": "_:n3-2007", + "url": "_:n3-3", }, }, }, diff --git a/src/resource/solidDataset.test.ts b/src/resource/solidDataset.test.ts index 94af2c8ce1..7226422db8 100644 --- a/src/resource/solidDataset.test.ts +++ b/src/resource/solidDataset.test.ts @@ -133,6 +133,7 @@ describe("createSolidDataset", () => { describe("responseToSolidDataset", () => { it("returns a SolidDataset representing the fetched Turtle", async () => { const turtle = ` + @base . @prefix : <#>. @prefix foaf: . @prefix vcard: . @@ -154,99 +155,22 @@ describe("responseToSolidDataset", () => { "https://some.pod/resource", ); const solidDataset = await responseToSolidDataset(response); - expect(solidDataset).toEqual( expect.objectContaining({ graphs: { default: { - "https://some.pod/resource": { - predicates: { - "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { - namedNodes: [ - "http://xmlns.com/foaf/0.1/PersonalProfileDocument", - ], - }, - "http://xmlns.com/foaf/0.1/maker": { - namedNodes: ["https://some.pod/resource#me"], - }, - "http://xmlns.com/foaf/0.1/primaryTopic": { - namedNodes: ["https://some.pod/resource#me"], - }, - }, + // The blank node identifier is by definition unstable. + // If this test starts failing, it may be due to the + // identifier changing, which is not forbidden. + "_:n3-0": { type: "Subject", - url: "https://some.pod/resource", - }, - "https://some.pod/resource#me": { + url: "_:n3-0", predicates: { - "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { - namedNodes: ["http://xmlns.com/foaf/0.1/Person"], - }, - "http://www.w3.org/2006/vcard/ns#fn": { - blankNodes: [ - { - "https://some.pod/resource#predicate": { - namedNodes: ["for://a.blank/node"], - }, - }, - ], - literals: { - "http://www.w3.org/2001/XMLSchema#string": ["Vincent"], - }, + "https://some.pod/resource#predicate": { + namedNodes: ["for://a.blank/node"], }, }, - type: "Subject", - url: "https://some.pod/resource#me", }, - }, - }, - internal_resourceInfo: { - contentType: "text/turtle", - isRawData: false, - linkedResources: {}, - sourceIri: "https://some.pod/resource", - }, - type: "Dataset", - }), - ); - }); - - it("does not include non-deterministic identifiers when it detects non-cyclic chains of Blank Nodes", async () => { - const turtle = ` - @prefix : <#>. - @prefix foaf: . - @prefix vcard: . - @prefix acl: . - - <> a foaf:PersonalProfileDocument; foaf:maker :me; foaf:primaryTopic :me. - - :me - a foaf:Person; - vcard:fn "Vincent"; - acl:trustedApp - [ - acl:mode acl:Append, acl:Control, acl:Read, acl:Write; - acl:origin - ], - [ - acl:mode acl:Append, acl:Control, acl:Read, acl:Write; - acl:origin - ]. - `; - - const response = new Response(turtle, { - headers: { - "Content-Type": "text/turtle", - }, - }); - jest - .spyOn(response, "url", "get") - .mockReturnValue("https://some.pod/resource"); - const solidDataset = await responseToSolidDataset(response); - - expect(solidDataset).toEqual( - expect.objectContaining({ - graphs: { - default: { "https://some.pod/resource": { predicates: { "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { @@ -270,40 +194,12 @@ describe("responseToSolidDataset", () => { namedNodes: ["http://xmlns.com/foaf/0.1/Person"], }, "http://www.w3.org/2006/vcard/ns#fn": { + // Here, the blank node identifier isn't referenced explicitly for resiliency. + blankNodes: [expect.stringMatching(/^_:/)], literals: { "http://www.w3.org/2001/XMLSchema#string": ["Vincent"], }, }, - "http://www.w3.org/ns/auth/acl#trustedApp": { - blankNodes: [ - { - "http://www.w3.org/ns/auth/acl#mode": { - namedNodes: [ - "http://www.w3.org/ns/auth/acl#Append", - "http://www.w3.org/ns/auth/acl#Control", - "http://www.w3.org/ns/auth/acl#Read", - "http://www.w3.org/ns/auth/acl#Write", - ], - }, - "http://www.w3.org/ns/auth/acl#origin": { - namedNodes: ["http://localhost:3000"], - }, - }, - { - "http://www.w3.org/ns/auth/acl#mode": { - namedNodes: [ - "http://www.w3.org/ns/auth/acl#Append", - "http://www.w3.org/ns/auth/acl#Control", - "http://www.w3.org/ns/auth/acl#Read", - "http://www.w3.org/ns/auth/acl#Write", - ], - }, - "http://www.w3.org/ns/auth/acl#origin": { - namedNodes: ["https://penny.vincenttunru.com"], - }, - }, - ], - }, }, type: "Subject", url: "https://some.pod/resource#me", @@ -321,53 +217,6 @@ describe("responseToSolidDataset", () => { ); }); - it("does not attempt to detect chains when there are many Blank Nodes, to avoid performance bottlenecks", async () => { - function getChainedBlankNode(iteration: number): string { - if (iteration === 1000) { - return ` "Base case"`; - } - return ` [${getChainedBlankNode( - iteration + 1, - )}]`; - } - const turtle = ` - @prefix : <#>. - @prefix vcard: . - - :me vcard:fn [${getChainedBlankNode(0)}]. - `; - - // This test uses constructs native to Node 16. - const t0 = performance.now(); - await responseToSolidDataset( - mockResponse(turtle, { - headers: { - "Content-Type": "text/turtle", - }, - }), - ); - const t1 = performance.now(); - - // Parsing a document with over 1000 statements will always be somewhat slow - // (hence allowing it to take 1.5 seconds), but if it attempts to detect - // chains, it will take on the order of >20 seconds. - // eslint-disable-next-line jest/no-conditional-expect - expect(t1 - t0).toBeLessThan(1500); - - const solidDataset = await responseToSolidDataset( - mockResponse(turtle, { - headers: { - "Content-Type": "text/turtle", - }, - }), - ); - // Blank Nodes should be listed explicitly, rather than as properties on - // https://some.pod/resource#me: - expect(Object.keys(solidDataset.graphs.default)).not.toStrictEqual([ - "https://some.pod/resource#me", - ]); - }); - it("throws a meaningful error when the server returned a 403", async () => { const response = new Response("Not allowed", { status: 403, diff --git a/src/resource/solidDataset.ts b/src/resource/solidDataset.ts index 15150d44ae..83c7dfd1f8 100644 --- a/src/resource/solidDataset.ts +++ b/src/resource/solidDataset.ts @@ -19,13 +19,9 @@ // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // -import type { Quad, NamedNode, Quad_Object } from "@rdfjs/types"; -import { - addRdfJsQuadToDataset, - DataFactory, - getChainBlankNodes, - toRdfJsQuads, -} from "../rdfjs.internal"; +import type { Quad, NamedNode, Quad_Object, DatasetCore } from "@rdfjs/types"; +import { Store as N3Store } from "n3"; +import { DataFactory, toRdfJsQuads } from "../rdfjs.internal"; import { ldp, pim } from "../constants"; import { getJsonLdParser } from "../formats/jsonLd"; import { triplesToTurtle, getTurtleParser } from "../formats/turtle"; @@ -65,6 +61,7 @@ import { import { getIriAll } from "../thing/get"; import { normalizeServerSideIri } from "./iri.internal"; import { freeze, getLocalNodeName, isLocalNodeIri } from "../rdf.internal"; +import { fromRdfJsDataset } from "../rdfjs"; /** * Initialise a new [[SolidDataset]] in memory. @@ -156,6 +153,7 @@ type ContentType = string; export type ParseOptions = { parsers: Record; }; + /** * @hidden This interface is not exposed yet until we've tried it out in practice. */ @@ -200,81 +198,31 @@ export async function responseToSolidDataset( } const data = await response.text(); - const parsingPromise = new Promise( - (resolve, reject) => { - let solidDataset: SolidDataset = freeze({ - graphs: freeze({ default: freeze({}) }), - type: "Dataset", - }); - - // While Quads without Blank Nodes can be added to the SolidDataset as we - // encounter them, to parse Quads with Blank Nodes, we'll have to wait until - // we've seen all the Quads, so that we can reconcile equal Blank Nodes. - const quadsWithBlankNodes: Quad[] = []; - const allQuads: Quad[] = []; - - parser.onError((error) => { - reject( - new Error( - `Encountered an error parsing the Resource at [${getSourceUrl( - resourceInfo, - )}] with content type [${contentType}]: ${error}`, - ), - ); - }); - parser.onQuad((quad) => { - allQuads.push(quad); - if ( - quad.subject.termType === "BlankNode" || - quad.object.termType === "BlankNode" - ) { - // Quads with Blank Nodes will be parsed when all Quads are known, - // so that equal Blank Nodes can be reconciled: - quadsWithBlankNodes.push(quad); - } else { - solidDataset = addRdfJsQuadToDataset(solidDataset, quad); - } - }); - parser.onComplete(async () => { - // If a Resource contains more than this number of Blank Nodes, - // we consider the detection of chains (O(n^2), I think) to be too - // expensive, and just incorporate them as regular Blank Nodes with - // non-deterministic, ad-hoc identifiers into the SolidDataset: - const maxBlankNodesToDetectChainsFor = 20; - // Some Blank Nodes only serve to use a set of Quads as the Object for a - // single Subject. Those Quads will be added to the SolidDataset when - // their Subject's Blank Node is encountered in the Object position. - const chainBlankNodes = - quadsWithBlankNodes.length <= maxBlankNodesToDetectChainsFor - ? getChainBlankNodes(quadsWithBlankNodes) - : []; - const quadsWithoutChainBlankNodeSubjects = quadsWithBlankNodes.filter( - (quad) => - chainBlankNodes.every( - (chainBlankNode) => !chainBlankNode.equals(quad.subject), - ), - ); - solidDataset = quadsWithoutChainBlankNodeSubjects.reduce( - (datasetAcc, quad) => - addRdfJsQuadToDataset(datasetAcc, quad, { - otherQuads: allQuads, - chainBlankNodes, - }), - solidDataset, - ); - const solidDatasetWithResourceInfo: SolidDataset & - WithServerResourceInfo = freeze({ - ...solidDataset, - ...resourceInfo, - }); - resolve(solidDatasetWithResourceInfo); - }); - - parser.parse(data, resourceInfo); - }, - ); + const rdfjsDataset = await new Promise((resolve, reject) => { + const store = new N3Store(); + parser.onError((error) => { + reject( + new Error( + `Encountered an error parsing the Resource at [${getSourceUrl( + resourceInfo, + )}] with content type [${contentType}]: ${error}`, + ), + ); + }); + parser.onQuad((quad) => { + store.add(quad); + }); + parser.onComplete(() => { + resolve(store); + }); - return parsingPromise; + parser.parse(data, resourceInfo); + }); + const solidDataset: SolidDataset = freeze(fromRdfJsDataset(rdfjsDataset)); + return freeze({ + ...solidDataset, + ...resourceInfo, + }); } /**