Skip to content

Commit

Permalink
Merge pull request #111 from pyscal/add_sample_operations
Browse files Browse the repository at this point in the history
Add sample operations
  • Loading branch information
srmnitc authored May 23, 2024
2 parents 714910f + 4ff1e67 commit 6e57f97
Show file tree
Hide file tree
Showing 5 changed files with 432 additions and 31 deletions.
171 changes: 165 additions & 6 deletions atomrdf/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
# from pyscal3.core import System
from pyscal3.atoms import Atoms

from atomrdf.visualize import visualize_graph
from atomrdf.visualize import visualize_graph, visualize_provenance
from atomrdf.network.network import OntologyNetwork
from atomrdf.network.ontology import read_ontology
from atomrdf.structure import System
Expand All @@ -49,7 +49,7 @@
from atomrdf.workflow.workflow import Workflow
from atomrdf.sample import Sample

from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO, PROV
from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO, PROV, MATH, UNSAFECMSO, UNSAFEASMO

# read element data file
file_location = os.path.dirname(__file__).split("/")
Expand Down Expand Up @@ -103,7 +103,12 @@ def _replace_keys(refdict, indict):
def _dummy_log(str):
pass


def _name(term):
try:
return str(term.toPython())
except:
return str(term)

def _prepare_log(file):
logger = logging.getLogger(__name__)
handler = logging.FileHandler(file)
Expand Down Expand Up @@ -1173,10 +1178,13 @@ def get_sample(self, sample, no_atoms=False):
return self.sgraph, na
return self.sgraph

def get_sample_label(self, sample):
label = self.graph.value(sample, RDFS.label)
def get_label(self, item):
label = self.graph.value(item, RDFS.label)
if label is not None:
return label.toPython()

def get_sample_label(self, sample):
label = self.get_label(sample)
return label

def change_label(self, sample, label):
Expand Down Expand Up @@ -1229,6 +1237,7 @@ def get_system_from_sample(self, sample):
sys.atoms = at
sys.sample = sample
sys.graph = self
sys._name = sample.toPython().split('sample:')[-1]
return sys

def to_file(self, sample, filename=None, format="poscar", add_sample_id=True,
Expand Down Expand Up @@ -1278,7 +1287,7 @@ def to_file(self, sample, filename=None, format="poscar", add_sample_id=True,
filename = os.path.join(os.getcwd(), "out")

sys = self.get_system_from_sample(sample)
sys.to_file(filename, format=format, add_sample_id=add_sample_id, input_data=input_data,
sys.to_file(filename=filename, format=format, add_sample_id=add_sample_id, input_data=input_data,
pseudopotentials=pseudopotentials, kspacing=kspacing,
kpts=kpts, koffset=koffset, crystal_coordinates=crystal_coordinates)

Expand All @@ -1293,3 +1302,153 @@ def add_workflow(self, job, workflow_environment=None, workflow_module=None, job
workflow_module=workflow_module,
job_dicts=job_dicts,
add_intermediate_jobs=add_intermediate_jobs)

def find_property(self, label):
prop_list = list(self.graph.triples((None, RDFS.label, label)))
if len(prop_list) == 0:
raise RuntimeError(f'Property {label} not found in the graph')
prop = prop_list[0][0]
return prop

def get_string_label(self, item):
label = self.get_label(item)

if label is None:
try:
label = str(item.toPython())
except:
label = str(item)

if "activity" in label:
method = self.value(item, ASMO.hasComputationalMethod)
if method is not None:
method_name = self.value(method, RDF.type)
if method_name is not None:
label = method_name.toPython().split("/")[-1]
return label

def _add_to_dict(self, prop, indict):
name = _name(prop)
if name not in indict.keys():
indict[name] = {}
indict[name]['found'] = False
indict[name]['label'] = self.get_string_label(prop)

def _get_ancestor(self, prop, prov):
#note that only one operation and parent are present!
if isinstance(prop, str):
prop = URIRef(prop)
propname = _name(prop)

operation = [x[1] for x in self.triples((prop, ASMO.wasCalculatedBy, None))]
if len(operation) > 0:
parent = [x[2] for x in self.triples((prop, ASMO.wasCalculatedBy, None))]
operation = operation[0]
parent = parent[0]
prov[propname]['operation'] = 'output_parameter'
prov[propname]['inputs'] = {}
prov[propname]['inputs']['0'] = _name(parent)
self._add_to_dict(parent, prov)
prov[_name(parent)]['inputs'] = {}
associated_samples = [x[0] for x in self.triples((None, PROV.wasGeneratedBy, parent))]
for count, sample in enumerate(associated_samples):
prov[_name(parent)]['inputs'][str(count)] = _name(sample)
self._add_to_dict(sample, prov)
prov[_name(parent)]['found'] = True
prov[_name(parent)]['operation'] = 'sample_for_activity'

else:
operation = [x[1] for x in self.triples((None, None, prop))]
parent = [list(self.triples((None, op, prop)))[0][0] for op in operation]
if len(operation) == 0:
prov[propname]['found'] = True
return prov
operation = operation[0]
parent = parent[0]

if operation.toPython() == "http://purls.helmholtz-metadaten.de/asmo/hasInputParameter":
#print(f'we ran 1 for {operation.toPython()}')
prov[propname]['operation'] = 'input_parameter'
prov[propname]['inputs'] = {}
prov[propname]['inputs']['0'] = _name(parent)
self._add_to_dict(parent, prov)
prov[_name(parent)]['inputs'] = {}
associated_samples = [x[0] for x in self.triples((None, PROV.wasGeneratedBy, parent))]
for count, sample in enumerate(associated_samples):
prov[_name(parent)]['inputs'][str(count)] = _name(sample)
self._add_to_dict(sample, prov)
prov[_name(parent)]['found'] = True
prov[_name(parent)]['operation'] = 'sample_for_activity'

elif operation.toPython() == "http://purls.helmholtz-metadaten.de/cmso/hasCalculatedProperty":
#print(f'we ran 2 for {operation.toPython()}')
prov[propname]['operation'] = 'output_parameter'
prov[propname]['inputs'] = {}
prov[propname]['inputs']['0'] = _name(parent)
self._add_to_dict(parent, prov)
prov[_name(parent)]['found'] = True
prov[_name(parent)]['operation'] = 'sample_output'

elif operation == MATH.hasSum:
addends = list(x[2] for x in self.triples((parent, MATH.hasAddend, None)))
prov[propname]['operation'] = 'addition'
prov[propname]['inputs'] = {}
for count, term in enumerate(addends):
prov[propname]['inputs'][f'{count}'] = _name(term)
self._add_to_dict(term, prov)

elif operation == MATH.hasDifference:
minuend = self.value(parent, MATH.hasMinuend)
subtrahend = self.value(parent, MATH.hasSubtrahend)
prov[propname]['operation'] = 'subtraction'
prov[propname]['inputs'] = {}
prov[propname]['inputs']['0'] = _name(minuend)
prov[propname]['inputs']['1'] = _name(subtrahend)
self._add_to_dict(minuend, prov)
self._add_to_dict(subtrahend, prov)

elif operation == MATH.hasProduct:
factors = list(x[2] for x in self.triples((parent, MATH.hasFactor, None)))
prov[propname]['operation'] = 'multiplication'
prov[propname]['inputs'] = {}
for count, term in enumerate(factors):
prov[propname]['inputs'][f'{count}'] = _name(term)
self._add_to_dict(term, prov)

elif operation == MATH.hasQuotient:
divisor = self.value(parent, MATH.hasDivisor)
dividend = self.value(parent, MATH.hasDividend)
prov[propname]['operation'] = 'division'
prov[propname]['inputs'] = {}
prov[propname]['inputs']['0'] = _name(divisor)
prov[propname]['inputs']['1'] = _name(dividend)
self._add_to_dict(divisor, prov)
self._add_to_dict(dividend, prov)

prov[propname]['found'] = True
return prov

def generate_provenance(self, prop=None, label=None, visualize=False):
if (prop is None) and (label is None):
raise ValueError('Either prop or label must be provided')

if prop is None:
prop = self.find_property(label)

name = _name(prop)
prov = {}
self._add_to_dict(prop, prov)

done = False
while not done:
done = True
keys = list(prov.keys())
for prop in keys:
if not prov[prop]['found']:
prov = self._get_ancestor(prop, prov)
done = False

if visualize:
return visualize_provenance(prov)

return prov
4 changes: 4 additions & 0 deletions atomrdf/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ def __init__(self, infile, delimiter="/"):
ASMO = Namespace(os.path.join(file_location, "data/asmo.owl"))
PROV = RDFLibNamespace("http://www.w3.org/ns/prov#")
MDO = RDFLibNamespace("https://w3id.org/mdo/calculation/")

MATH = RDFLibNamespace("http://purls.helmholtz-metadaten.de/asmo/")
UNSAFECMSO = RDFLibNamespace(os.path.join(file_location, "data/cmso.owl"))
UNSAFEASMO = RDFLibNamespace(os.path.join(file_location, "data/asmo.owl"))
Loading

0 comments on commit 6e57f97

Please sign in to comment.