Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update kb_tests.py #265

Merged
merged 4 commits into from
Jul 26, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 46 additions & 35 deletions src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from uk.ac.ebi.vfb.neo4j.neo4j_tools import neo4j_connect, results_2_dict_list
from uk.ac.ebi.vfb.neo4j.neo4j_tools import neo4j_connect, results_2_dict_list
import sys
import warnings
import json

# KB tests
nc = neo4j_connect(sys.argv[1], sys.argv[2], sys.argv[3])

# Query for dataSets (would be helpful if these were tagged as image)
silent_mode = False
# prevents sys.exit(1) on failure, just silently logs the result and exits
if len(sys.argv) > 4 and sys.argv[4] == 'silent_fail':
silent_mode = True

nc = neo4j_connect(sys.argv[1], sys.argv[2], sys.argv[3])

def query(query):
q = nc.commit_list([query])
Expand All @@ -19,6 +21,7 @@ def query(query):
else:
return dc


def query_ind_count(query):
q = nc.commit_list([query])
if not q:
Expand All @@ -32,7 +35,8 @@ def query_ind_count(query):
else:
return dc[0]['ind_count']

def compare(dataset, description, query1, query2, verbose = False):

def compare(dataset: str, description: str, query1: str, query2: str, log: dict, verbose=False):
r1 = query(query1)[0]
r2 = query(query2)[0]
if r1['ind_count'] == r2['ind_count']:
Expand All @@ -44,26 +48,24 @@ def compare(dataset, description, query1, query2, verbose = False):
else:
print("Testing assertion:" + description)
print(query2)
print("Result: inds_in_datset: %d ; Compliant with pattern: %d" % (r1['ind_count'], r2['ind_count']))
# Should probably turn this into a report
bad_inds = list(set(r1['ind_list']) - set(r2['ind_list']))
file = open(dataset + ".report", 'w')
file.write(json.dumps(bad_inds))
file.close()
print("Result: inds_in_datset: %d ; Compliant with pattern: %d" % (r1['ind_count'], r2['ind_count']))
log[description + '. Failing Inds'] = list(set(r1['ind_list']) - set(r2['ind_list']))
return False


datasets = nc.commit_list(["MATCH (ds:DataSet) WHERE ds.schema = 'image' RETURN ds.label"])
datasets = nc.commit_list(
["MATCH (ds:DataSet) RETURN ds.short_form"]) # removed "WHERE ds.schema = 'image'" as not in kb2
dc = results_2_dict_list(datasets)

return_state = True

for d in dc:
ds = d['ds.label']
log = {}
ds = d['ds.short_form']
dataset_status = True
print("\n")
print ("Testing: " + ds)
final_clauses = " WHERE ds.label = '%s' RETURN COUNT (DISTINCT i) as ind_count" \
print("Testing: " + ds)
final_clauses = " WHERE ds.short_form = '%s' RETURN COUNT (DISTINCT i) as ind_count" \
", COLLECT(i.short_form) as ind_list" % ds
base_query = "MATCH (ds:DataSet)<-[:has_source]-(i:Individual)"
new_base_query = "MATCH (ds:DataSet)<-[:Annotation { short_form: 'source'}]-(i:Individual)"
Expand All @@ -75,10 +77,10 @@ def compare(dataset, description, query1, query2, verbose = False):
print("This dataset has no content")
continue
query1 = base_query + final_clauses
extended_base_query = base_query + "<-[:Related { short_form: 'depicts' }]-(j:Individual)"
extended_base_query = base_query + "<-[:depicts]-(j:Individual)"
query2 = extended_base_query + final_clauses
query3 = extended_base_query + "-[{ iri: 'http://purl.obolibrary.org/obo/RO_0002026' }]->(k:Individual)" + final_clauses
query4 = extended_base_query + "-[:Related { label: 'is_specified_output_of'} ]->(:Class)" + final_clauses
query3 = extended_base_query + "-[in_register_with]->(k:Individual)" + final_clauses
query4 = extended_base_query + "-[:is_specified_output_of]->(:Class)" + final_clauses
query5 = extended_base_query + "-[:INSTANCEOF]->(c:Class { label: 'channel'})" + final_clauses
query6 = base_query + "-[:INSTANCEOF]->(c:Class)" + final_clauses

Expand All @@ -87,30 +89,39 @@ def compare(dataset, description, query1, query2, verbose = False):
test_stats.append(compare(dataset=ds,
description="All anatomical individuals in dataset have matching channel individuals.",
query1=query1,
query2=query2))
test_stats.append(compare(description="All anatomical individuals in dataset have matching registered channel individuals.",
dataset=ds,
query1=query1,
query2=query3))
test_stats.append(compare(description="All anatomical individuals in dataset have matching channel individuals with imaging method",
dataset=ds,
query1=query1,
query2=query4))
test_stats.append(compare(description="All anatomical individuals in dataset have matching channel, typed individuals",
dataset=ds,
query1=query1,
query2=query5))
query2=query2,
log=log))
test_stats.append(
compare(description="All anatomical individuals in dataset have matching registered channel individuals.",
dataset=ds,
query1=query1,
query2=query3,
log=log))
test_stats.append(compare(
description="All anatomical individuals in dataset have matching channel individuals with imaging method",
dataset=ds,
query1=query1,
query2=query4,
log=log))
test_stats.append(
compare(description="All anatomical individuals in dataset have matching channel, typed individuals",
dataset=ds,
query1=query1,
query2=query5,
log=log))
test_stats.append(compare(description="All anatomical individuals in dataset are typed",
dataset=ds,
query1=query1,
query2=query4))
query2=query6,
log=log))
if False in test_stats:
return_state = False
with open(ds + ".report", 'w') as report:
report.write(json.dumps(log))
else:
print("Passes!")


if not return_state:
if not return_state and not silent_mode:
sys.exit(1)

# KB <-> prod check numbers
# KB <-> prod check numbers