From 210a28d385e9a66471acab22a87899c29a725e0c Mon Sep 17 00:00:00 2001 From: Sarath Menon Date: Tue, 28 Nov 2023 20:25:24 +0100 Subject: [PATCH] add metadata schema --- notebooks/data_read.ipynb | 2 +- notebooks/data_set.ipynb | 2 +- notebooks/metadata_schema.ipynb | 227 ++++++++++++++++++++++++++++++++ notebooks/schema.yml | 51 +++++++ 4 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 notebooks/metadata_schema.ipynb create mode 100644 notebooks/schema.yml diff --git a/notebooks/data_read.ipynb b/notebooks/data_read.ipynb index 9b5bd67..1b38c45 100644 --- a/notebooks/data_read.ipynb +++ b/notebooks/data_read.ipynb @@ -134,7 +134,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/data_set.ipynb b/notebooks/data_set.ipynb index a63521c..27c5fe1 100644 --- a/notebooks/data_set.ipynb +++ b/notebooks/data_set.ipynb @@ -153,7 +153,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/metadata_schema.ipynb b/notebooks/metadata_schema.ipynb new file mode 100644 index 0000000..6bbe994 --- /dev/null +++ b/notebooks/metadata_schema.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "9058253b-d5f3-4e70-8952-61f7d2992b08", + "metadata": {}, + "outputs": [], + "source": [ + "from pyscal_rdf.network.ontology import read_ontology\n", + "from rdflib import Graph, Literal, Namespace\n", + "import copy" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4c5d73b6-da02-420c-a223-4eef93c872bc", + "metadata": {}, + "outputs": [], + "source": [ + "onto = read_ontology()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "09f55784-36b9-471d-a766-4a3a017f7696", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cmso:hasAltName': cmso:hasAltName,\n", + " 'cmso:hasName': cmso:hasName,\n", + " 'cmso:hasAngle_alpha': cmso:hasAngle_alpha,\n", + " 'cmso:hasAngle_beta': cmso:hasAngle_beta,\n", + " 'cmso:hasAngle_gamma': cmso:hasAngle_gamma,\n", + " 'cmso:hasChemicalSymbol': cmso:hasChemicalSymbol,\n", + " 'cmso:hasSymbol': cmso:hasSymbol,\n", + " 'cmso:hasComponent_x': cmso:hasComponent_x,\n", + " 'cmso:hasComponent_y': cmso:hasComponent_y,\n", + " 'cmso:hasComponent_z': cmso:hasComponent_z,\n", + " 'cmso:hasCoordinationNumber': cmso:hasCoordinationNumber,\n", + " 'cmso:hasElementRatio': cmso:hasElementRatio,\n", + " 'cmso:hasLength_x': cmso:hasLength_x,\n", + " 'cmso:hasLength_y': cmso:hasLength_y,\n", + " 'cmso:hasLength_z': cmso:hasLength_z,\n", + " 'cmso:hasNumberOfAtoms': cmso:hasNumberOfAtoms,\n", + " 'cmso:hasReference': cmso:hasReference,\n", + " 'cmso:hasRepetition_x': cmso:hasRepetition_x,\n", + " 'cmso:hasRepetition_y': cmso:hasRepetition_y,\n", + " 'cmso:hasRepetition_z': cmso:hasRepetition_z,\n", + " 'cmso:hasSpaceGroupNumber': cmso:hasSpaceGroupNumber,\n", + " 'cmso:hasSpaceGroupSymbol': cmso:hasSpaceGroupSymbol,\n", + " 'cmso:hasValue': cmso:hasValue,\n", + " 'cmso:hasVolume': cmso:hasVolume,\n", + " 'pldo:geometricalDegreesOfFreedom': pldo:geometricalDegreesOfFreedom,\n", + " 'pldo:hasGBplane': pldo:hasGBplane,\n", + " 'pldo:macroscopicDegreesOfFreedom': pldo:macroscopicDegreesOfFreedom,\n", + " 'pldo:hasMisorientationAngle': pldo:hasMisorientationAngle,\n", + " 'pldo:hasRotationAxis': pldo:hasRotationAxis,\n", + " 'pldo:hasSigmaValue': pldo:hasSigmaValue,\n", + " 'pldo:microscopicDegreesOfFreedom': pldo:microscopicDegreesOfFreedom,\n", + " 'podo:hasNumberOfVacancies': podo:hasNumberOfVacancies,\n", + " 'podo:hasVacancyConcentration': podo:hasVacancyConcentration,\n", + " 'rdfs:label': rdfs:label}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "onto.attributes['data_property']" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ce296bb0-8e6a-4ab2-8940-52d6a0121e6f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['cmso:SimulationCell']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "onto.attributes['data_property']['cmso:hasVolume'].domain" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8dc0e696-b128-4ebe-9ebe-3312e8b04124", + "metadata": {}, + "outputs": [], + "source": [ + "def create_dict(datadict, path):\n", + " for p in path:\n", + " datadict[str(p[0])] ={}\n", + " datadict = datadict[str(p[0])]\n", + " #exists\n", + " datadict[str(p[0])] = {str(p[-1]): None}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7d267c94-b4f2-4fe8-be2e-c44f4300c128", + "metadata": {}, + "outputs": [], + "source": [ + "collected_dicts = []" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c87873b0-62d1-40d1-a6f3-9418e0f8c8d2", + "metadata": {}, + "outputs": [], + "source": [ + "for key in onto.attributes['data_property']:\n", + " try:\n", + " path = onto.get_path_from_sample(key)\n", + " datadict = {}\n", + " create_dict(datadict, path)\n", + " collected_dicts.append(copy.deepcopy(datadict))\n", + " except:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30d5eae4-d4a4-432d-99cc-7ec8e5944225", + "metadata": {}, + "outputs": [], + "source": [ + "def dict_merge(dct, merge_dct):\n", + " for k, v in merge_dct.items():\n", + " if (k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], dict)): #noqa\n", + " dict_merge(dct[k], merge_dct[k])\n", + " else:\n", + " dct[k] = merge_dct[k]\n", + " \n", + " \n", + "collected_dicts = []\n", + "\n", + "for key in o.attributes['data_node']:\n", + " try:\n", + " path = o.get_path_from_sample(key)\n", + " datadict = {}\n", + " create_dict(datadict, path)\n", + " collected_dicts.append(copy.deepcopy(datadict))\n", + " except:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f407cffc-c1ba-4d51-ae78-0c580a77dea1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cmso:ComputationalSample': {'cmso:SimulationCell': {'cmso:Angle': {'cmso:Angle': {'cmso:hasAngle_gamma': None}}}}}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collected_dicts[3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "836f088d-dee5-4ebf-8397-9339d0a1362a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/schema.yml b/notebooks/schema.yml new file mode 100644 index 0000000..21b4878 --- /dev/null +++ b/notebooks/schema.yml @@ -0,0 +1,51 @@ +sample: + material: + element_ratio: + element1: float + crystal_structure: + altname: string + spacegroup_symbol: string + spacegroup_number: int + unit_cell: + bravais_lattice: string + lattice_parameter: + x: float + y: float + z: float + angle: + alpha: float + beta: float + gamma: float + defect: + grain_boundary: + type: string + sigma: int + plane: int(3) + rotation_axis: int(3) + misorientation_angle: float + vacancy: + type: string + + simulation_cell: + volume: float + number_of_atoms: int + length: + x: float + y: float + z: float + vector: + x: float(3) + y: float(3) + z: float(3) + angle: + alpha: float + beta: float + gamma: float + vacancy_concentration: float + number_of_vacancies: int + + calculated_property: + - label: string + value: float + unit: string +