diff --git a/use-cases/abide/ABIDE_FBIRN_Query.ipynb b/use-cases/abide/ABIDE_FBIRN_Query.ipynb deleted file mode 100644 index e964b9a..0000000 --- a/use-cases/abide/ABIDE_FBIRN_Query.ipynb +++ /dev/null @@ -1,147 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import os\n", - "import rdflib as rdf\n", - "#import csv for reading csv files\n", - "import csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#Read in Turtle files" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "abide_graph = rdf.Graph()\n", - "abide_graph_parse = abide_graph.parse('ABIDE_to_NIDM_RDFLib.ttl', format='turtle')\n", - "\n", - "fbirn_graph = rdf.Graph()\n", - "fbirn_graph_parse = fbirn_graph.parse('FBIRN_to_NIDM_RDFLib.ttl', format='turtle')\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#SPARQL query for FIQ measures across graphs\n", - "##Step 1: Figure out that ABIDE_FIQ and FBIRN FSIQ are the same thing based on looking up the terms\n", - "##Step 2: Create a union graph\n", - "##Step 3: Perform SPARQL Query" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#Step 2:\n", - "union_graph = abide_graph_parse + fbirn_graph_parse" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#step 3a: Add namespaces from each graph\n", - "nidm = rdf.URIRef(\"http://nidm.nidash.org/\")\n", - "prov = rdf.URIRef(\"http://www.w3.org/ns/prov#\")\n", - "ncit = rdf.URIRef(\"http://ncitt.ncit.nih.gov/\")\n", - "nidash = rdf.URIRef(\"http://purl.org/nidash/nidm/\")\n", - "fbirn = rdf.URIRef(\"http://www.birncommunity.org/collaborators/function-birn/\")\n", - "abide = rdf.URIRef(\"http://fcon_1000.projects.nitrc.org/indi/abide/\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "303851049 knows http://purl.org/nidash/nidm/agent_303851049\n", - "CMU_50647 knows http://purl.org/nidash/nidm/agent_CMU_50647\n", - "301882920 knows http://purl.org/nidash/nidm/agent_301882920\n", - "303269784 knows http://purl.org/nidash/nidm/agent_303269784\n", - "CMU_50646 knows http://purl.org/nidash/nidm/agent_CMU_50646\n", - "305738011 knows http://purl.org/nidash/nidm/agent_305738011\n", - "304253859 knows http://purl.org/nidash/nidm/agent_304253859\n", - "CMU_50645 knows http://purl.org/nidash/nidm/agent_CMU_50645\n", - "CMU_50644 knows http://purl.org/nidash/nidm/agent_CMU_50644\n", - "CMU_50643 knows http://purl.org/nidash/nidm/agent_CMU_50643\n" - ] - } - ], - "source": [ - "#Step 3:\n", - "qres = union_graph.query(\n", - " \"\"\"SELECT DISTINCT *\n", - " WHERE {\n", - " ?agent a prov:Agent ;\n", - " ncit:subjectID ?subjectid .\n", - " }\"\"\")\n", - "\n", - "for row in qres:\n", - " print(\"%s knows %s\" % row)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/use-cases/abide/abide_terms.ttl b/use-cases/abide/abide_terms.ttl index 3eb1d24..4e0cef1 100644 --- a/use-cases/abide/abide_terms.ttl +++ b/use-cases/abide/abide_terms.ttl @@ -8,21 +8,20 @@ @prefix xml: . @prefix xsd: . -nidash:entity_FSIQ a prov:Entity ; +abide:ABIDE_FSIQ a prov:Entity ; abide:form "WASI" ; abide:term "ABIDE_FIQ" ; prov:definition "FIQ Standard Score" ; prov:label "ABIDE vocabulary term" . -nidash:entity_PIQ a prov:Entity ; +abide:ABIDE_PIQ a prov:Entity ; abide:form "WASI" ; abide:term "ABIDE_PIQ" ; prov:definition "PIQ Standard Score" ; prov:label "ABIDE vocabulary term" . -nidash:entity_VIQ a prov:Entity ; +abide:ABIDE_VIQ a prov:Entity ; abide:form "WASI" ; abide:term "ABIDE_VIQ" ; prov:definition "VIQ Standard Score" ; prov:label "ABIDE vocabulary term" . - diff --git a/use-cases/fbirn/fbirn_terms.ttl b/use-cases/fbirn/fbirn_terms.ttl index 623fb5a..bb93d26 100644 --- a/use-cases/fbirn/fbirn_terms.ttl +++ b/use-cases/fbirn/fbirn_terms.ttl @@ -8,21 +8,20 @@ @prefix xml: . @prefix xsd: . -nidash:entity_FSIQ a prov:Entity ; +fbirn:FSIQ a prov:Entity ; fbirn:form "North American Adult Reading Test (NAART)" ; fbirn:term "FSIQ" ; prov:definition "Estimated Full Scale IQ = 127.8 - .78 * errors" ; prov:label "FBIRN vocabulary term" . -nidash:entity_PIQ a prov:Entity ; +fbirn:PIQ a prov:Entity ; fbirn:form "North American Adult Reading Test (NAART)" ; fbirn:term "PIQ" ; prov:definition "Estimated Performance IQ = 119.4 - .42 * errors" ; prov:label "FBIRN vocabulary term" . -nidash:entity_VIQ a prov:Entity ; +fbirn:VIQ a prov:Entity ; fbirn:form "North American Adult Reading Test (NAART)" ; fbirn:term "VIQ" ; prov:definition "Estimated Verbal IQ = 128.7 - .89 * errors" ; prov:label "FBIRN vocabulary term" . - diff --git a/use-cases/integration-queries/ABIDE_FBIRN_Query.ipynb b/use-cases/integration-queries/ABIDE_FBIRN_Query.ipynb new file mode 100644 index 0000000..1b8a5c2 --- /dev/null +++ b/use-cases/integration-queries/ABIDE_FBIRN_Query.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# An example of integrating two RDF datasets\n", + "- Read in the original datasets converted to RDF\n", + "- Create a mapping between a few measures in in each file\n", + "- Query the integrated dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import rdflib as rdf\n", + "#import csv for reading csv files\n", + "import csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Read in Datasets Expressed in Turtle Format" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "abide_graph = rdf.Graph()\n", + "abide_graph_parse = abide_graph.parse('../abide/ABIDE_to_NIDM_RDFLib.ttl', format='turtle')\n", + "\n", + "fbirn_graph = rdf.Graph()\n", + "fbirn_graph_parse = fbirn_graph.parse('../fbirn/FBIRN_to_NIDM_RDFLib.ttl', format='turtle')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#SPARQL query for FIQ measures across graphs\n", + "- Step 1: Figure out that ABIDE_FIQ and FBIRN FSIQ are the same thing based on looking up the terms\n", + "- Step 2: Create a union graph\n", + "- Step 3: Perform SPARQL Query" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#Step 2:\n", + "union_graph = abide_graph_parse + fbirn_graph_parse" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#step 3a: Add namespaces from each graph\n", + "nidm = rdf.URIRef(\"http://nidm.nidash.org/\")\n", + "prov = rdf.URIRef(\"http://www.w3.org/ns/prov#\")\n", + "ncit = rdf.URIRef(\"http://ncitt.ncit.nih.gov/\")\n", + "nidash = rdf.URIRef(\"http://purl.org/nidash/nidm/\")\n", + "fbirn = rdf.URIRef(\"http://www.birncommunity.org/collaborators/function-birn/\")\n", + "abide = rdf.URIRef(\"http://fcon_1000.projects.nitrc.org/indi/abide/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "303851049 has_subject_id http://purl.org/nidash/nidm/agent_303851049\n", + "CMU_50646 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50646\n", + "CMU_50643 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50643\n", + "301882920 has_subject_id http://purl.org/nidash/nidm/agent_301882920\n", + "CMU_50647 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50647\n", + "303269784 has_subject_id http://purl.org/nidash/nidm/agent_303269784\n", + "CMU_50645 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50645\n", + "305738011 has_subject_id http://purl.org/nidash/nidm/agent_305738011\n", + "CMU_50644 has_subject_id http://purl.org/nidash/nidm/agent_CMU_50644\n", + "304253859 has_subject_id http://purl.org/nidash/nidm/agent_304253859\n" + ] + } + ], + "source": [ + "#Step 3:\n", + "qres = union_graph.query(\n", + " \"\"\"SELECT DISTINCT *\n", + " WHERE {\n", + " ?agent a prov:Agent ;\n", + " ncit:subjectID ?subjectid .\n", + " }\"\"\")\n", + "\n", + "for row in qres:\n", + " print(\"%s has_subject_id %s\" % row)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Examine the term definitions\n", + "- look at the definitions for each term and try to figure out if ter is a direct mapping or not\n", + "- if there is a direct mapping, we can create a simple mapping file to query across both datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@prefix abide: .\n", + "@prefix ncit: .\n", + "@prefix nidash: .\n", + "@prefix nidm: .\n", + "@prefix prov: .\n", + "@prefix rdf: .\n", + "@prefix rdfs: .\n", + "@prefix xml: .\n", + "@prefix xsd: .\n", + "\n", + "abide:ABIDE_FSIQ a prov:Entity ;\n", + " abide:form \"WASI\" ;\n", + " abide:term \"ABIDE_FIQ\" ;\n", + " prov:definition \"FIQ Standard Score\" ;\n", + " prov:label \"ABIDE vocabulary term\" .\n", + "\n", + "abide:ABIDE_PIQ a prov:Entity ;\n", + " abide:form \"WASI\" ;\n", + " abide:term \"ABIDE_PIQ\" ;\n", + " prov:definition \"PIQ Standard Score\" ;\n", + " prov:label \"ABIDE vocabulary term\" .\n", + "\n", + "abide:ABIDE_VIQ a prov:Entity ;\n", + " abide:form \"WASI\" ;\n", + " abide:term \"ABIDE_VIQ\" ;\n", + " prov:definition \"VIQ Standard Score\" ;\n", + " prov:label \"ABIDE vocabulary term\" .\n", + "\n", + "\n" + ] + } + ], + "source": [ + "abide_terms = rdf.Graph()\n", + "abide_terms.parse('../abide/abide_terms.ttl', format='turtle')\n", + "print abide_terms.serialize(format='turtle')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@prefix fbirn: .\n", + "@prefix ncit: .\n", + "@prefix nidash: .\n", + "@prefix nidm: .\n", + "@prefix prov: .\n", + "@prefix rdf: .\n", + "@prefix rdfs: .\n", + "@prefix xml: .\n", + "@prefix xsd: .\n", + "\n", + "fbirn:FSIQ a prov:Entity ;\n", + " fbirn:form \"North American Adult Reading Test (NAART)\" ;\n", + " fbirn:term \"FSIQ\" ;\n", + " prov:definition \"Estimated Full Scale IQ = 127.8 - .78 * errors\" ;\n", + " prov:label \"FBIRN vocabulary term\" .\n", + "\n", + "fbirn:PIQ a prov:Entity ;\n", + " fbirn:form \"North American Adult Reading Test (NAART)\" ;\n", + " fbirn:term \"PIQ\" ;\n", + " prov:definition \"Estimated Performance IQ = 119.4 - .42 * errors\" ;\n", + " prov:label \"FBIRN vocabulary term\" .\n", + "\n", + "fbirn:VIQ a prov:Entity ;\n", + " fbirn:form \"North American Adult Reading Test (NAART)\" ;\n", + " fbirn:term \"VIQ\" ;\n", + " prov:definition \"Estimated Verbal IQ = 128.7 - .89 * errors\" ;\n", + " prov:label \"FBIRN vocabulary term\" .\n", + "\n", + "\n" + ] + } + ], + "source": [ + "fbirn_terms = rdf.Graph()\n", + "fbirn_terms.parse('../fbirn/fbirn_terms.ttl', format='turtle')\n", + "print fbirn_terms.serialize(format='turtle')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example Mapping\n", + "- We see that the term `ABIDE FSIQ` matches `FBIRN FIQ`\n", + "- Now we can encode this knowlede by creating a parent term that matches the two together" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "mapping = \"\"\"\n", + "@prefix abide: .\n", + "@prefix fbirn: .\n", + "@prefix ncit: .\n", + "@prefix nidash: .\n", + "@prefix nidm: .\n", + "@prefix prov: .\n", + "@prefix rdf: .\n", + "@prefix rdfs: .\n", + "@prefix xml: .\n", + "@prefix xsd: .\n", + "\n", + "# Declare our general term for mapping\n", + "nidm:FSIQ a prov:Entity ;\n", + " nidm:form \"North American Adult Reading Test (NAART)\" ;\n", + " nidm:term \"FSIQ\" ;\n", + " prov:definition \"Estimated Full Scale IQ = 127.8 - .78 * errors\" ;\n", + " prov:label \"Generic vocabulary term.\" .\n", + "\n", + "# Now include that these are subclasses of this \n", + "abide:ABIDE_FIQ a prov:Entity ;\n", + " rdfs:subClassOf nidm:FSIQ ;\n", + " abide:form \"WASI\" ;\n", + " abide:term \"ABIDE_FIQ\" ;\n", + " prov:definition \"FIQ Standard Score\" ;\n", + " prov:label \"ABIDE vocabulary term\" .\n", + " \n", + "fbirn:FSIQ a prov:Entity ;\n", + " rdfs:subClassOf nidm:FSIQ ;\n", + " fbirn:form \"North American Adult Reading Test (NAART)\" ;\n", + " fbirn:term \"FSIQ\" ;\n", + " prov:definition \"Estimated Full Scale IQ = 127.8 - .78 * errors\" ;\n", + " prov:label \"FBIRN vocabulary term\" .\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "gmap = rdf.Graph()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "@prefix abide: .\n", + "@prefix fbirn: .\n", + "@prefix ncit: .\n", + "@prefix nidash: .\n", + "@prefix nidm: .\n", + "@prefix prov: .\n", + "@prefix rdf: .\n", + "@prefix rdfs: .\n", + "@prefix xml: .\n", + "@prefix xsd: .\n", + "\n", + "abide:ABIDE_FIQ a prov:Entity ;\n", + " abide:form \"WASI\" ;\n", + " abide:term \"ABIDE_FIQ\" ;\n", + " rdfs:subClassOf nidm:FSIQ ;\n", + " prov:definition \"FIQ Standard Score\" ;\n", + " prov:label \"ABIDE vocabulary term\" .\n", + "\n", + "fbirn:FSIQ a prov:Entity ;\n", + " fbirn:form \"North American Adult Reading Test (NAART)\" ;\n", + " fbirn:term \"FSIQ\" ;\n", + " rdfs:subClassOf nidm:FSIQ ;\n", + " prov:definition \"Estimated Full Scale IQ = 127.8 - .78 * errors\" ;\n", + " prov:label \"FBIRN vocabulary term\" .\n", + "\n", + "nidm:FSIQ a prov:Entity ;\n", + " nidm:form \"North American Adult Reading Test (NAART)\" ;\n", + " nidm:term \"FSIQ\" ;\n", + " prov:definition \"Estimated Full Scale IQ = 127.8 - .78 * errors\" ;\n", + " prov:label \"Generic vocabulary term.\" .\n", + "\n", + "\n" + ] + } + ], + "source": [ + "gmap.parse(data=mapping, format='turtle')\n", + "print gmap.serialize(format='turtle')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Query for Mapping\n", + "- now that we have created a mapping we can query for the list of mapped terms\n", + "- Secondly, we can use this mapping to filter our query results over actual data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "subclasses\r\n", + "http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ\r\n", + "http://www.birncommunity.org/collaborators/function-birn/FSIQ\r\n", + "\n" + ] + } + ], + "source": [ + "qres = gmap.query(\n", + " \"\"\"SELECT DISTINCT ?subclasses\n", + " WHERE { \n", + " ?subclasses rdfs:subClassOf nidm:FSIQ .\n", + " }\"\"\")\n", + "print qres.serialize(format='csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add the Mappings to the Joinined Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "mapped_graph = union_graph + gmap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now use the mappings to filter the results!" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "s,subclasses,o\r\n", + "http://purl.org/nidash/nidm/entity_CMU_50645,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,124\r\n", + "http://purl.org/nidash/nidm/entity_CMU_50643,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,123\r\n", + "http://purl.org/nidash/nidm/entity_CMU_50647,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,104\r\n", + "http://purl.org/nidash/nidm/entity_CMU_50646,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,108\r\n", + "http://purl.org/nidash/nidm/entity_CMU_50644,http://fcon_1000.projects.nitrc.org/indi/abide/ABIDE_FIQ,107\r\n", + "http://purl.org/nidash/nidm/entity_303851049,http://www.birncommunity.org/collaborators/function-birn/FSIQ,117.66\r\n", + "http://purl.org/nidash/nidm/entity_301882920,http://www.birncommunity.org/collaborators/function-birn/FSIQ,118.44\r\n", + "http://purl.org/nidash/nidm/entity_304253859,http://www.birncommunity.org/collaborators/function-birn/FSIQ,112.2\r\n", + "http://purl.org/nidash/nidm/entity_303269784,http://www.birncommunity.org/collaborators/function-birn/FSIQ,123.12\r\n", + "http://purl.org/nidash/nidm/entity_305738011,http://www.birncommunity.org/collaborators/function-birn/FSIQ,116.1\r\n", + "\n" + ] + } + ], + "source": [ + "qres = mapped_graph.query(\n", + " \"\"\"SELECT DISTINCT ?s ?subclasses ?o \n", + " WHERE { \n", + " ?subclasses rdfs:subClassOf nidm:FSIQ .\n", + " ?s ?subclasses ?o\n", + " }\"\"\")\n", + "print qres.serialize(format='csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}