From 5c2a32d8b6352c6a1f71847740ad6f9f3c5e125f Mon Sep 17 00:00:00 2001 From: dosumis Date: Tue, 12 Jul 2022 14:51:42 +0100 Subject: [PATCH 1/4] Update kb_tests.py Updating schema checking script for KB2. #264 --- src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py index 4dab5d55..ba0ac50b 100644 --- a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py +++ b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py @@ -47,13 +47,13 @@ def compare(dataset, description, query1, query2, verbose = False): print("Result: inds_in_datset: %d ; Compliant with pattern: %d" % (r1['ind_count'], r2['ind_count'])) # Should probably turn this into a report bad_inds = list(set(r1['ind_list']) - set(r2['ind_list'])) - file = open(dataset + ".report", 'w') - file.write(json.dumps(bad_inds)) - file.close() +# file = open(dataset + ".report", 'w') +# file.write(json.dumps(bad_inds)) +# file.close() return False -datasets = nc.commit_list(["MATCH (ds:DataSet) WHERE ds.schema = 'image' RETURN ds.label"]) +datasets = nc.commit_list(["MATCH (ds:DataSet) RETURN ds.label"]) # removed "WHERE ds.schema = 'image'" as not in kb2 dc = results_2_dict_list(datasets) return_state = True @@ -75,10 +75,10 @@ def compare(dataset, description, query1, query2, verbose = False): print("This dataset has no content") continue query1 = base_query + final_clauses - extended_base_query = base_query + "<-[:Related { short_form: 'depicts' }]-(j:Individual)" + extended_base_query = base_query + "<-[:depicts]-(j:Individual)" query2 = extended_base_query + final_clauses - query3 = extended_base_query + "-[{ iri: 'http://purl.obolibrary.org/obo/RO_0002026' }]->(k:Individual)" + final_clauses - query4 = extended_base_query + "-[:Related { label: 'is_specified_output_of'} ]->(:Class)" + final_clauses + query3 = extended_base_query + "-[in_register_with]->(k:Individual)" + final_clauses + query4 = extended_base_query + "-[:is_specified_output_of]->(:Class)" + final_clauses query5 = extended_base_query + "-[:INSTANCEOF]->(c:Class { label: 'channel'})" + final_clauses query6 = base_query + "-[:INSTANCEOF]->(c:Class)" + final_clauses @@ -103,7 +103,7 @@ def compare(dataset, description, query1, query2, verbose = False): test_stats.append(compare(description="All anatomical individuals in dataset are typed", dataset=ds, query1=query1, - query2=query4)) + query2=query6)) if False in test_stats: return_state = False else: From f19fcb74efd39cecd04334e3759e83fa7c1d4bc9 Mon Sep 17 00:00:00 2001 From: dosumis Date: Tue, 12 Jul 2022 15:47:01 +0100 Subject: [PATCH 2/4] Update kb_tests.py Adding working logging. --- src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py | 70 +++++++++++---------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py index ba0ac50b..00c5dd63 100644 --- a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py +++ b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py @@ -1,14 +1,11 @@ -from uk.ac.ebi.vfb.neo4j.neo4j_tools import neo4j_connect, results_2_dict_list +from uk.ac.ebi.vfb.neo4j.neo4j_tools import neo4j_connect, results_2_dict_list import sys import warnings import json -# KB tests - -# Query for dataSets (would be helpful if these were tagged as image) - nc = neo4j_connect(sys.argv[1], sys.argv[2], sys.argv[3]) + def query(query): q = nc.commit_list([query]) if not q: @@ -19,6 +16,7 @@ def query(query): else: return dc + def query_ind_count(query): q = nc.commit_list([query]) if not q: @@ -32,7 +30,8 @@ def query_ind_count(query): else: return dc[0]['ind_count'] -def compare(dataset, description, query1, query2, verbose = False): + +def compare(dataset: str, description: str, query1: str, query2: str, log: dict, verbose=False): r1 = query(query1)[0] r2 = query(query2)[0] if r1['ind_count'] == r2['ind_count']: @@ -44,26 +43,24 @@ def compare(dataset, description, query1, query2, verbose = False): else: print("Testing assertion:" + description) print(query2) - print("Result: inds_in_datset: %d ; Compliant with pattern: %d" % (r1['ind_count'], r2['ind_count'])) - # Should probably turn this into a report - bad_inds = list(set(r1['ind_list']) - set(r2['ind_list'])) -# file = open(dataset + ".report", 'w') -# file.write(json.dumps(bad_inds)) -# file.close() + print("Result: inds_in_datset: %d ; Compliant with pattern: %d" % (r1['ind_count'], r2['ind_count'])) + log[description + '. Failing Inds'] = list(set(r1['ind_list']) - set(r2['ind_list'])) return False -datasets = nc.commit_list(["MATCH (ds:DataSet) RETURN ds.label"]) # removed "WHERE ds.schema = 'image'" as not in kb2 +datasets = nc.commit_list( + ["MATCH (ds:DataSet) RETURN ds.short_form"]) # removed "WHERE ds.schema = 'image'" as not in kb2 dc = results_2_dict_list(datasets) return_state = True for d in dc: - ds = d['ds.label'] + log = {} + ds = d['ds.short_form'] dataset_status = True print("\n") - print ("Testing: " + ds) - final_clauses = " WHERE ds.label = '%s' RETURN COUNT (DISTINCT i) as ind_count" \ + print("Testing: " + ds) + final_clauses = " WHERE ds.short_form = '%s' RETURN COUNT (DISTINCT i) as ind_count" \ ", COLLECT(i.short_form) as ind_list" % ds base_query = "MATCH (ds:DataSet)<-[:has_source]-(i:Individual)" new_base_query = "MATCH (ds:DataSet)<-[:Annotation { short_form: 'source'}]-(i:Individual)" @@ -87,30 +84,39 @@ def compare(dataset, description, query1, query2, verbose = False): test_stats.append(compare(dataset=ds, description="All anatomical individuals in dataset have matching channel individuals.", query1=query1, - query2=query2)) - test_stats.append(compare(description="All anatomical individuals in dataset have matching registered channel individuals.", - dataset=ds, - query1=query1, - query2=query3)) - test_stats.append(compare(description="All anatomical individuals in dataset have matching channel individuals with imaging method", - dataset=ds, - query1=query1, - query2=query4)) - test_stats.append(compare(description="All anatomical individuals in dataset have matching channel, typed individuals", - dataset=ds, - query1=query1, - query2=query5)) + query2=query2, + log=log)) + test_stats.append( + compare(description="All anatomical individuals in dataset have matching registered channel individuals.", + dataset=ds, + query1=query1, + query2=query3, + log=log)) + test_stats.append(compare( + description="All anatomical individuals in dataset have matching channel individuals with imaging method", + dataset=ds, + query1=query1, + query2=query4, + log=log)) + test_stats.append( + compare(description="All anatomical individuals in dataset have matching channel, typed individuals", + dataset=ds, + query1=query1, + query2=query5, + log=log)) test_stats.append(compare(description="All anatomical individuals in dataset are typed", dataset=ds, query1=query1, - query2=query6)) + query2=query6, + log=log)) if False in test_stats: return_state = False + with open(ds + ".report", 'w') as report: + report.write(json.dumps(log)) else: print("Passes!") - if not return_state: sys.exit(1) -# KB <-> prod check numbers \ No newline at end of file +# KB <-> prod check numbers From 40c7df01d2ed0f28e42b634ab54a7151722fdf26 Mon Sep 17 00:00:00 2001 From: hkir-dev Date: Thu, 14 Jul 2022 13:47:09 +0100 Subject: [PATCH 3/4] added option to disable fail. For emailing the report, it just logs the issues and exists without fail. --- src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py index 00c5dd63..786b9868 100644 --- a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py +++ b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py @@ -5,6 +5,11 @@ nc = neo4j_connect(sys.argv[1], sys.argv[2], sys.argv[3]) +silent_mode = False +# prevents sys.exit(1) on failure, just silently logs the result and exits +if len(sys.argv) > 3 and sys.argv[4] == 'silent_fail': + silent_mode = True + def query(query): q = nc.commit_list([query]) @@ -116,7 +121,7 @@ def compare(dataset: str, description: str, query1: str, query2: str, log: dict, else: print("Passes!") -if not return_state: +if not return_state and not silent_mode: sys.exit(1) # KB <-> prod check numbers From cb074bb01dfaf51e3f7e6832e697beceadee025a Mon Sep 17 00:00:00 2001 From: hkir-dev Date: Thu, 14 Jul 2022 13:53:38 +0100 Subject: [PATCH 4/4] added option to disable fail. For emailing the report, it just logs the issues and exists without fail. --- src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py index 786b9868..5e5ce47e 100644 --- a/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py +++ b/src/uk/ac/ebi/vfb/neo4j/neo2neo/kb_tests.py @@ -7,7 +7,7 @@ silent_mode = False # prevents sys.exit(1) on failure, just silently logs the result and exits -if len(sys.argv) > 3 and sys.argv[4] == 'silent_fail': +if len(sys.argv) > 4 and sys.argv[4] == 'silent_fail': silent_mode = True