From bee77791622171590fe18da817b2f245a2f31736 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Fri, 26 Jan 2024 20:17:19 -0800 Subject: [PATCH] use max_depth calculated by owlapi_wrapper for UMLS ontologies Get max depth from the metrics.csv file which is already generated by owlapi_wrapper when new submission of UMLS ontology is created. Ruby code/sparql for calculating max_depth fails for large UMLS ontologie with AllegroGraph backend Addresses #181 --- lib/ontologies_linked_data/metrics/metrics.rb | 5 ++-- .../models/ontology_submission.rb | 24 ++++++++++--------- test/models/test_ontology_submission.rb | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/ontologies_linked_data/metrics/metrics.rb b/lib/ontologies_linked_data/metrics/metrics.rb index 5909db69..6ec9a208 100644 --- a/lib/ontologies_linked_data/metrics/metrics.rb +++ b/lib/ontologies_linked_data/metrics/metrics.rb @@ -35,7 +35,7 @@ def self.metrics_for_submission(submission, logger) logger.flush # re-generate metrics file - submission.generate_metrics_file2(cls_metrics[:classes], indiv_count, prop_count, cls_metrics[:maxDepth]) + submission.generate_metrics_file(cls_metrics[:classes], indiv_count, prop_count, cls_metrics[:maxDepth]) logger.info("generation of metrics file finished") logger.flush @@ -54,10 +54,11 @@ def self.max_depth_fn(submission, logger, is_flat, rdfsSC) if (mx_from_file && mx_from_file.length == 2 && mx_from_file[0].length >= 4) then max_depth = mx_from_file[1][3].to_i + logger.info("Metrics max_depth retrieved #{max_depth} from the metrics csv file.") else logger.info("Unable to find metrics providing max_depth in file for submission #{submission.id.to_s}. Using ruby calculation of max_depth.") roots = submission.roots - + unless is_flat depths = [] roots.each do |root| diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index a0146920..24f7bfb8 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -418,14 +418,7 @@ def metrics_from_file(logger=nil) metrics end - def generate_metrics_file(class_count, indiv_count, prop_count) - CSV.open(self.metrics_path, "wb") do |csv| - csv << ["Class Count", "Individual Count", "Property Count"] - csv << [class_count, indiv_count, prop_count] - end - end - - def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth) + def generate_metrics_file(class_count, indiv_count, prop_count, max_depth) CSV.open(self.metrics_path, "wb") do |csv| csv << ["Class Count", "Individual Count", "Property Count", "Max Depth"] csv << [class_count, indiv_count, prop_count, max_depth] @@ -433,10 +426,11 @@ def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth) end def generate_umls_metrics_file(tr_file_path=nil) - tr_file_path ||= self.triples_file_path + tr_file_path ||= triples_file_path class_count = 0 indiv_count = 0 prop_count = 0 + max_depth = 0 File.foreach(tr_file_path) do |line| class_count += 1 if line =~ /owl:Class/ @@ -444,7 +438,15 @@ def generate_umls_metrics_file(tr_file_path=nil) prop_count += 1 if line =~ /owl:ObjectProperty/ prop_count += 1 if line =~ /owl:DatatypeProperty/ end - self.generate_metrics_file(class_count, indiv_count, prop_count) + + # Get max depth from the metrics.csv file which is already generated + # by owlapi_wrapper when new submission of UMLS ontology is created. + # Ruby code/sparql for calculating max_depth fails for large UMLS + # ontologie with AllegroGraph backend + metrics_from_owlapi = metrics_from_file + max_depth = metrics_from_owlapi[1][3] unless metrics_from_owlapi.empty? + + generate_metrics_file(class_count, indiv_count, prop_count, max_depth) end def generate_rdf(logger, reasoning: true) @@ -452,7 +454,7 @@ def generate_rdf(logger, reasoning: true) if self.hasOntologyLanguage.umls? triples_file_path = self.triples_file_path - logger.info("Using UMLS turtle file found, skipping OWLAPI parse") + logger.info("UMLS turtle file found; doing OWLAPI parse to extract metrics") logger.flush mime_type = LinkedData::MediaTypes.media_type_from_base(LinkedData::MediaTypes::TURTLE) generate_umls_metrics_file(triples_file_path) diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index dffab3c5..3049b6f1 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -603,7 +603,7 @@ def test_umls_metrics_file metrics = sub.metrics_from_file(Logger.new(sub.parsing_log_path)) assert !metrics.nil?, "Metrics is nil: #{metrics}" assert !metrics.empty?, "Metrics is empty: #{metrics}" - metrics.each { |m| assert_equal 3, m.length } + metrics.each { |m| assert_equal 4, m.length } assert_equal "Individual Count", metrics[0][1] assert_equal 133, metrics[1][0].to_i end