From e4d6e63b64560363d3caef1330ed40c09979dce6 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 7 Mar 2024 18:11:31 -0800 Subject: [PATCH] d3viz writer (#700) * Adding d3viz json tree export * Adding ability to output d3viz json. Also added graph transformers to allow for dynamic generation of SEP-style grouping classes. Fixes #696 * reformat --- src/oaklib/cli.py | 163 +++++++++++++----- .../sqldb/sql_implementation.py | 8 + src/oaklib/transformers/__init__.py | 0 .../chained_ontology_transformer.py | 18 ++ .../transformers/edge_filter_transformer.py | 75 ++++++++ src/oaklib/transformers/graph_transformer.py | 43 +++++ .../transformers/node_filter_transformer.py | 49 ++++++ .../transformers/ontology_transformer.py | 20 +++ src/oaklib/transformers/sep_transformer.py | 163 ++++++++++++++++++ .../transformers/transformers_factory.py | 49 ++++++ src/oaklib/utilities/obograph_utils.py | 124 ++++++++++++- tests/test_cli.py | 49 ++++++ tests/test_utilities/test_obograph_utils.py | 17 +- 13 files changed, 729 insertions(+), 49 deletions(-) create mode 100644 src/oaklib/transformers/__init__.py create mode 100644 src/oaklib/transformers/chained_ontology_transformer.py create mode 100644 src/oaklib/transformers/edge_filter_transformer.py create mode 100644 src/oaklib/transformers/graph_transformer.py create mode 100644 src/oaklib/transformers/node_filter_transformer.py create mode 100644 src/oaklib/transformers/ontology_transformer.py create mode 100644 src/oaklib/transformers/sep_transformer.py create mode 100644 src/oaklib/transformers/transformers_factory.py diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py index a0998494f..fc2bddf6d 100644 --- a/src/oaklib/cli.py +++ b/src/oaklib/cli.py @@ -142,6 +142,10 @@ from oaklib.parsers.association_parser_factory import get_association_parser from oaklib.resource import OntologyResource from oaklib.selector import get_adapter, get_resource_from_shorthand +from oaklib.transformers.transformers_factory import ( + apply_ontology_transformation, + get_ontology_transformer, +) from oaklib.types import CURIE, PRED_CURIE from oaklib.utilities import table_filler from oaklib.utilities.apikey_manager import set_apikey_value @@ -180,6 +184,7 @@ from oaklib.utilities.obograph_utils import ( ancestors_with_stats, default_stylemap_path, + graph_to_d3viz_objects, graph_to_image, graph_to_tree_display, shortest_paths, @@ -1977,42 +1982,54 @@ def tree( impl = settings.impl if configure: logging.warning("Configure is not yet supported") - if isinstance(impl, OboGraphInterface): - curies = list(query_terms_iterator(terms, impl)) - if stylemap is None: - stylemap = default_stylemap_path() - actual_predicates = _process_predicates_arg(predicates) - if add_mrcas: - if isinstance(impl, SemanticSimilarityInterface): - curies_to_add = [ - lca - for s, o, lca in impl.multiset_most_recent_common_ancestors( - curies, predicates=actual_predicates - ) - ] - curies = list(set(curies + curies_to_add)) - logging.info(f"Expanded CURIEs = {curies}") - else: - raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface") - if down: - graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates) - elif gap_fill: - logging.info("Using gap-fill strategy") - if isinstance(impl, SubsetterInterface): - rels = impl.gap_fill_relationships(curies, predicates=actual_predicates) - if isinstance(impl, OboGraphInterface): - graph = impl.relationships_to_graph(rels) - else: - raise AssertionError(f"{impl} needs to be of type OboGraphInterface") + if not isinstance(impl, OboGraphInterface): + raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}") + curies = list(query_terms_iterator(terms, impl)) + if stylemap is None: + stylemap = default_stylemap_path() + actual_predicates = _process_predicates_arg(predicates) + if add_mrcas: + if isinstance(impl, SemanticSimilarityInterface): + curies_to_add = [ + lca + for s, o, lca in impl.multiset_most_recent_common_ancestors( + curies, predicates=actual_predicates + ) + ] + curies = list(set(curies + curies_to_add)) + logging.info(f"Expanded CURIEs = {curies}") + else: + raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface") + if down: + graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates) + elif gap_fill: + logging.info("Using gap-fill strategy") + if isinstance(impl, SubsetterInterface): + rels = impl.gap_fill_relationships(curies, predicates=actual_predicates) + if isinstance(impl, OboGraphInterface): + graph = impl.relationships_to_graph(rels) else: - raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill") + raise AssertionError(f"{impl} needs to be of type OboGraphInterface") else: - graph = impl.ancestor_graph(curies, predicates=actual_predicates) - logging.info( - f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}" + raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill") + else: + graph = impl.ancestor_graph(curies, predicates=actual_predicates) + logging.info( + f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}" + ) + if max_hops is not None: + graph = trim_graph(graph, curies, distance=max_hops) + if output_type in ["d3viz", "d3viz_relational"]: + trees = graph_to_d3viz_objects( + graph, + predicates=actual_predicates, + start_curies=list(root) if root else None, + relations_as_nodes=output_type == "d3viz_relational", + max_paths=None, ) - if max_hops is not None: - graph = trim_graph(graph, curies, distance=max_hops) + json_dump = json.dumps(trees, indent=2) + output.write(json_dump) + else: graph_to_tree_display( graph, seeds=curies, @@ -2024,8 +2041,6 @@ def tree( display_options=display.split(","), output=output, ) - else: - raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}") @main.command() @@ -2450,11 +2465,6 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs): """ Exports (dumps) the entire contents of an ontology. - :param terms: A list of terms to dump. If not specified, the entire ontology will be dumped. - :param output: Path to output file - :param output_type: The output format. One of: obo, obojson, ofn, rdf, json, yaml, fhirjson, csv, nl - :param config_file: Path to a configuration JSON file for additional params (which may be required for some formats) - Example: runoak -i pato.obo dump -o pato.json -O json @@ -2485,14 +2495,73 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs): if terms: raise NotImplementedError("Currently dump for a subset of terms is not supported") impl = settings.impl - if isinstance(impl, BasicOntologyInterface): - logging.info(f"Out={output} syntax={output_type}") - if config_file: - with open(config_file) as file: - kwargs |= json.load(file) - impl.dump(output, syntax=output_type, **kwargs) - else: + if not isinstance(impl, BasicOntologyInterface): + raise NotImplementedError + logging.info(f"Out={output} syntax={output_type}") + if config_file: + with open(config_file) as file: + kwargs |= json.load(file) + impl.dump(output, syntax=output_type, **kwargs) + + +@main.command() +@click.argument("terms", nargs=-1) +@click.option("-o", "--output", help="Path to output file") +@output_type_option +@click.option( + "-c", + "--config-file", + help="""Config file for additional transform params.""", +) +@click.option( + "-t", + "--transform", + required=True, + help="""Name of transformation to apply.""", +) +def transform(terms, transform, output, output_type: str, config_file: str = None, **kwargs): + """ + Transforms an ontology + + Example: + + runoak -i pato.obo dump -o pato.json -O json + + Example: + + runoak -i pato.owl dump -o pato.ttl -O turtle + + You can also pass in a JSON configuration file to parameterize the dump process. + + Currently this is only used for fhirjson dumps, the configuration options are specified here: + + https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html + + Example: + + runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json + + Currently each implementation only supports a subset of formats. + + The dump command is also blocked for remote endpoints such as Ubergraph, + to avoid killer queries. + + Python API: + + https://incatools.github.io/ontology-access-kit/interfaces/basic + """ + if terms: + raise NotImplementedError("Currently transform for a subset of terms is not supported") + impl = settings.impl + if not isinstance(impl, BasicOntologyInterface): raise NotImplementedError + logging.info(f"Out={output} syntax={output_type}") + if config_file: + with open(config_file) as file: + kwargs |= yaml.safe_load(file) + transformer = get_ontology_transformer(transform, **kwargs) + new_impl = apply_ontology_transformation(impl, transformer) + new_impl.dump(output, syntax=output_type) @main.command() diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py index 1838a2aa0..2ce355f4b 100644 --- a/src/oaklib/implementations/sqldb/sql_implementation.py +++ b/src/oaklib/implementations/sqldb/sql_implementation.py @@ -247,6 +247,13 @@ def _is_quoted_url(curie: CURIE): return curie.startswith("<") +def _remove_uri_quotes(curie: CURIE): + if _is_quoted_url(curie): + return curie[1:-1] + else: + return curie + + @dataclass class SqlImplementation( RelationGraphInterface, @@ -2689,6 +2696,7 @@ def _filter(select_expr, filter_expr=None): f"Ad-hoc repair of literal value for contributor: {contributor_id}" ) contributor_id = string_as_base64_curie(contributor_id) + contributor_id = _remove_uri_quotes(contributor_id) if contributor_id not in ssc.contributor_summary: ssc.contributor_summary[contributor_id] = ContributorStatistics( contributor_id=contributor_id, contributor_name=contributor_name diff --git a/src/oaklib/transformers/__init__.py b/src/oaklib/transformers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/oaklib/transformers/chained_ontology_transformer.py b/src/oaklib/transformers/chained_ontology_transformer.py new file mode 100644 index 000000000..18fb9d44d --- /dev/null +++ b/src/oaklib/transformers/chained_ontology_transformer.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass +from typing import Any, Collection + +from oaklib.transformers.ontology_transformer import OntologyTransformer + + +@dataclass +class ChainedOntologyTransformer(OntologyTransformer): + """ + An ontology graph transformer that chains multiple other transformers + """ + + chained_transformers: Collection[OntologyTransformer] + + def transform(self, source_ontology: Any, **kwargs) -> Any: + for transformer in self.chained_transformers: + source_ontology = transformer.transform(source_ontology, **kwargs) + return source_ontology diff --git a/src/oaklib/transformers/edge_filter_transformer.py b/src/oaklib/transformers/edge_filter_transformer.py new file mode 100644 index 000000000..159f252d9 --- /dev/null +++ b/src/oaklib/transformers/edge_filter_transformer.py @@ -0,0 +1,75 @@ +from dataclasses import dataclass +from typing import Collection, Optional + +from oaklib.datamodels.obograph import Graph +from oaklib.datamodels.vocabulary import IS_A +from oaklib.transformers.graph_transformer import GraphTransformer +from oaklib.types import PRED_CURIE + + +@dataclass +class EdgeFilterTransformer(GraphTransformer): + """ + An ontology graph transformer that filters edges + """ + + include_predicates: Optional[Collection[PRED_CURIE]] = None + """A collection of predicates to include""" + + exclude_predicates: Optional[Collection[PRED_CURIE]] = None + """A collection of predicates to exclude""" + + filter_function: Optional[callable] = None + """A function that takes an edge and returns True if it should be included""" + + def transform(self, source_ontology: Graph, **kwargs) -> Graph: + """ + Filters edges from a graph. + + Example: + + >>> from oaklib import get_adapter + >>> from oaklib.transformers.transformers_factory import get_ontology_transformer + >>> from oaklib.datamodels.vocabulary import IS_A + >>> adapter = get_adapter("tests/input/go-nucleus.obo") + >>> graph = adapter.as_obograph() + >>> transformer = get_ontology_transformer("EdgeFilterTransformer", include_predicates=[IS_A]) + >>> filtered_graph = transformer.transform(graph) + >>> set([e.pred for e in filtered_graph.edges]) + {'is_a'} + + :param graph: + :return: + """ + include_predicates = self.include_predicates + exclude_predicates = self.exclude_predicates + + if include_predicates is None and exclude_predicates is None: + return source_ontology + + def _normalize_id(pred: PRED_CURIE) -> PRED_CURIE: + if pred == IS_A: + return "is_a" + else: + return pred + + if include_predicates is not None: + include_predicates = {_normalize_id(pred) for pred in include_predicates} + + if exclude_predicates is not None: + exclude_predicates = {_normalize_id(pred) for pred in exclude_predicates} + + new_edges = [] + for edge in source_ontology.edges: + if include_predicates is not None: + if edge.pred not in include_predicates: + continue + if exclude_predicates is not None: + if edge.pred in exclude_predicates: + continue + if self.filter_function is not None: + if not self.filter_function(edge): + continue + new_edges.append(edge) + new_graph = Graph(id=source_ontology.id, nodes=source_ontology.nodes, edges=new_edges) + return self._post_process(new_graph) diff --git a/src/oaklib/transformers/graph_transformer.py b/src/oaklib/transformers/graph_transformer.py new file mode 100644 index 000000000..99b478758 --- /dev/null +++ b/src/oaklib/transformers/graph_transformer.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass + +from oaklib.datamodels.obograph import Graph +from oaklib.transformers.ontology_transformer import OntologyTransformer + + +@dataclass +class GraphTransformer(OntologyTransformer): + """ + An ontology transformer that operates on a graph + """ + + remove_dangling_edges: bool = False + """If true, removes edges that point to nodes that are not in the graph""" + + def transform(self, source_ontology: Graph, **kwargs) -> Graph: + """ + Transforms a graph into an ontology + + :param graph: + :return: + """ + raise NotImplementedError + + def apply_remove_dangling_edges(self, graph: Graph): + """ + Removes edges that point to nodes that are not in the graph. + + :param graph: + :return: + """ + node_ids = {n.id for n in graph.nodes} + new_edges = [] + for edge in graph.edges: + if edge.sub in node_ids and edge.obj in node_ids: + new_edges.append(edge) + return Graph(id=graph.id, nodes=graph.nodes, edges=new_edges) + + def _post_process(self, graph: Graph): + if self.remove_dangling_edges: + return self.apply_remove_dangling_edges(graph) + else: + return graph diff --git a/src/oaklib/transformers/node_filter_transformer.py b/src/oaklib/transformers/node_filter_transformer.py new file mode 100644 index 000000000..c9c0e8bd9 --- /dev/null +++ b/src/oaklib/transformers/node_filter_transformer.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import Optional + +from oaklib.datamodels.obograph import Graph +from oaklib.transformers.graph_transformer import GraphTransformer + + +@dataclass +class NodeFilterTransformer(GraphTransformer): + """ + An ontology graph transformer that filters nodes + """ + + filter_function: Optional[callable] = None + """A function that takes an Node and returns True if it should be included""" + + remove_dangling_edges: bool = False + """If true, removes edges that point to nodes that are not in the graph""" + + def transform(self, source_ontology: Graph, **kwargs) -> Graph: + """ + Filters Nodes from a graph. + + Example: + + >>> from oaklib import get_adapter + >>> from oaklib.transformers.node_filter_transformer import NodeFilterTransformer + >>> from oaklib.datamodels.vocabulary import IS_A + >>> adapter = get_adapter("tests/input/go-nucleus.obo") + >>> graph = adapter.as_obograph() + >>> transformer = NodeFilterTransformer( + ... filter_function=lambda node: node.lbl.startswith("nuclear"), + ... remove_dangling_edges=True) + >>> filtered_graph = transformer.transform(graph) + >>> sorted([n.lbl for n in filtered_graph.nodes]) + ['nuclear envelope', 'nuclear membrane', 'nuclear particle'] + + :param graph: + :return: + """ + + new_nodes = [] + for node in source_ontology.nodes: + if self.filter_function is not None: + if not self.filter_function(node): + continue + new_nodes.append(node) + new_graph = Graph(id=source_ontology.id, nodes=new_nodes, edges=source_ontology.edges) + return self._post_process(new_graph) diff --git a/src/oaklib/transformers/ontology_transformer.py b/src/oaklib/transformers/ontology_transformer.py new file mode 100644 index 000000000..729118d11 --- /dev/null +++ b/src/oaklib/transformers/ontology_transformer.py @@ -0,0 +1,20 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Any + + +@dataclass +class OntologyTransformer(ABC): + """ + A class for transforming ontologies + """ + + def transform(self, source_ontology: Any, **kwargs) -> Any: + """ + Transforms an ontology into another ontology + + :param source_ontology: + :param kwargs: additional configuration arguments + :return: + """ + raise NotImplementedError diff --git a/src/oaklib/transformers/sep_transformer.py b/src/oaklib/transformers/sep_transformer.py new file mode 100644 index 000000000..8147e8157 --- /dev/null +++ b/src/oaklib/transformers/sep_transformer.py @@ -0,0 +1,163 @@ +from dataclasses import dataclass, field +from typing import Collection, Dict, Optional + +from oaklib.datamodels.obograph import Edge, Graph, Node +from oaklib.transformers.graph_transformer import GraphTransformer +from oaklib.types import PRED_CURIE +from oaklib.utilities.obograph_utils import index_graph_nodes + + +@dataclass +class Labeler: + """ + Generates labels and ids for generated nodes + """ + + code: str + label: str = None + separator: str = field(default="-") + + def generate(self, node: Node) -> Node: + """ + Generates a label for a node + + :param node: + :return: + """ + if self.label is not None: + label = self.label + else: + label = f"{node.lbl} ({self.code})" + sep = self.separator + id = f"{node.id}{sep}{self.code}" + new_node = Node(id=id, lbl=label, type="CLASS") + return new_node + + +@dataclass +class SEPTransformer(GraphTransformer): + """ + An ontology graph transformer that maps an ontology to a generalized SEP pattern. + + The SEP (Structured-Entities-Parts) pattern is used for partonomies and represents each + entity E as a triad of terms (S, E, P): + + - S is the union of E and P + - E is the entity + - P is a grouping for all parts of E + + The result is a diamond shape, where E is the top node, and S and P are the bottom nodes; + all the (proper) parts of E are listed under P, and all the subclasses of E are listed under S. + + This transformer implements a generalization of this that generates R1, R2, ... Rn + nodes for each predicate, in addition to the E node + """ + + structure_labeler: Optional[Labeler] = None + entity_labeler: Optional[Labeler] = None + relationship_labelers: Optional[Dict[PRED_CURIE, Labeler]] = None + + include_predicates: Optional[Collection[PRED_CURIE]] = None + """A collection of predicates to include""" + + make_entity_top_node: Optional[bool] = field(default=True) + """If true, makes the entity node the top node in the graph""" + + def transform(self, source_ontology: Graph, **kwargs) -> Graph: + """ + Filters edges from a graph. + + Example: + + >>> from oaklib import get_adapter + >>> from oaklib.transformers.sep_transformer import SEPTransformer + >>> from oaklib.datamodels.vocabulary import PART_OF + >>> adapter = get_adapter("tests/input/go-nucleus.obo") + >>> graph = adapter.as_obograph() + >>> transformer = SEPTransformer(include_predicates=[PART_OF]) + >>> filtered_graph = transformer.transform(graph) + >>> nucleus = "GO:0005634" + >>> nuc_edges = [(e.sub, e.obj) for e in filtered_graph.edges if nucleus in [e.sub, e.obj]] + >>> for e in sorted(nuc_edges): + ... print(e) + ('GO:0005634', 'GO:0043231-SUB') + ('GO:0005634-BFO:0000050', 'GO:0005634') + ('GO:0005634-SUB', 'GO:0005634') + + :param source_ontology: + :return: + """ + subsumption_pred = "is_a" + make_entity_top_node = self.make_entity_top_node + structure_labeler = self.structure_labeler + entity_labeler = self.entity_labeler + nix = index_graph_nodes(source_ontology) + include_predicates = self.include_predicates + if structure_labeler is None: + structure_labeler = Labeler(code="S") + if entity_labeler is None: + if make_entity_top_node: + code = "SUB" + else: + code = "E" + entity_labeler = Labeler(code=code) + relationship_labelers = self.relationship_labelers + if relationship_labelers is None: + relationship_labelers = {} + for edge in source_ontology.edges: + pred = edge.pred + if pred not in relationship_labelers: + relationship_labelers[pred] = Labeler(code=pred) + new_edges = [] + new_node_map = {} + + def add_node(n: Node): + if n.id not in new_node_map: + new_node_map[n.id] = n + + upper_node_map = {} # diamond parents + lower_node_map = {} # diamond children + for node in source_ontology.nodes: + # ensure all original nodes are preserved + add_node(node) + if node.type != "CLASS": + continue + structure_node = structure_labeler.generate(node) + entity_node = entity_labeler.generate(node) + if make_entity_top_node: + upper_node_map[node.id] = node # e.g. Nucleus + lower_node_map[node.id] = entity_node # e.g. NucleusSubtype + else: + upper_node_map[node.id] = structure_node # e.g. NucleusStructure + lower_node_map[node.id] = node # e.g. Nucleus + + for edge in source_ontology.edges: + pred = edge.pred + if include_predicates is not None: + if pred != subsumption_pred and pred not in include_predicates: + continue + orig_parent_node_id = edge.obj + orig_child_node_id = edge.sub + upper_node = upper_node_map.get(orig_parent_node_id, None) + if upper_node is None: + continue + e_node = lower_node_map.get(orig_parent_node_id, None) + if e_node is None: + continue + # E is_a S (e.g. Nucleus is_a NucleusStructure) + new_edges.append((e_node.id, subsumption_pred, upper_node.id)) + orig_parent_node = nix.get(orig_parent_node_id, None) + if orig_parent_node is None: + continue + if pred == subsumption_pred: + new_edges.append((orig_child_node_id, subsumption_pred, e_node.id)) + add_node(e_node) + else: + # e.g. NucleusPart + p_node = relationship_labelers[pred].generate(orig_parent_node) + # P is_a S (e.g. NucleusPart is_a NucleusStructure) + new_edges.append((p_node.id, subsumption_pred, upper_node.id)) + new_edges.append((orig_child_node_id, subsumption_pred, p_node.id)) + add_node(p_node) + edges = [Edge(sub=s, pred=p, obj=o) for s, p, o in set(new_edges)] + return Graph(id=source_ontology.id, nodes=list(new_node_map.values()), edges=edges) diff --git a/src/oaklib/transformers/transformers_factory.py b/src/oaklib/transformers/transformers_factory.py new file mode 100644 index 000000000..e9645949b --- /dev/null +++ b/src/oaklib/transformers/transformers_factory.py @@ -0,0 +1,49 @@ +import importlib +import re +from typing import Optional, Type, Union + +from oaklib.datamodels.obograph import GraphDocument +from oaklib.implementations.obograph.obograph_implementation import ( + OboGraphImplementation, +) +from oaklib.interfaces import OboGraphInterface +from oaklib.interfaces.dumper_interface import DumperInterface +from oaklib.transformers.graph_transformer import GraphTransformer +from oaklib.transformers.ontology_transformer import OntologyTransformer + + +def camel_to_snake(name): + name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() + + +def get_ontology_transformer( + name: Union[str, Type], package: Optional[str] = None, **kwargs +) -> OntologyTransformer: + if isinstance(name, str): + if package is None: + snakecase = camel_to_snake(name) + package = f"oaklib.transformers.{snakecase}" + package_obj = importlib.import_module(package) + # instantiate the class + class_obj = getattr(package_obj, name) + else: + class_obj = name + return class_obj(**kwargs) + + +def apply_ontology_transformation( + impl, transformer: Union[str, Type, OntologyTransformer], **kwargs +) -> DumperInterface: + if not isinstance(transformer, OntologyTransformer): + transformer = get_ontology_transformer(transformer, **kwargs) + if isinstance(transformer, GraphTransformer): + if not isinstance(impl, OboGraphInterface): + raise NotImplementedError + graph = impl.as_obograph() + new_graph = transformer.transform(graph) + gdoc = GraphDocument(graphs=[new_graph]) + new_impl = OboGraphImplementation(obograph_document=gdoc) + return new_impl + else: + raise NotImplementedError diff --git a/src/oaklib/utilities/obograph_utils.py b/src/oaklib/utilities/obograph_utils.py index 4f646aa39..9be421083 100644 --- a/src/oaklib/utilities/obograph_utils.py +++ b/src/oaklib/utilities/obograph_utils.py @@ -15,7 +15,7 @@ import sys import tempfile from collections import defaultdict -from copy import deepcopy +from copy import copy, deepcopy from enum import Enum from pathlib import Path from typing import Any, Callable, Dict, Iterator, List, Optional, TextIO, Tuple, Union @@ -25,6 +25,7 @@ from curies import Converter from linkml_runtime.dumpers import json_dumper from linkml_runtime.loaders import json_loader +from pydantic import BaseModel # https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package from oaklib import conf as conf_package @@ -664,6 +665,7 @@ def graph_to_tree_display( :param stylemap: kgviz stylemap (not yet used) :return: """ + # TODO: refactor this to use graph_to_tree_structure if not display_options: display_options = [] show_all = "all" in display_options @@ -742,6 +744,126 @@ def graph_to_tree_display( return output.getvalue() +class TreeNode(BaseModel): + id: Optional[CURIE] = (None,) + lbl: Optional[str] = None + meta: Optional[dict] = None + children: Dict[PRED_CURIE, List["TreeNode"]] = {} + parent_id: Optional[str] = None + parent_relation: Optional[PRED_CURIE] = None + path_to_root: List[CURIE] = [] + + +def graph_to_tree_structure( + graph: Graph, + predicates: List[PRED_CURIE] = None, + skip: List[CURIE] = None, + start_curies: List[CURIE] = None, + predicate_label_map: Dict[PRED_CURIE, str] = None, + max_paths: int = 10, +) -> List[TreeNode]: + """ + Linearizes a graph to a list of trees. + + The list will contain one element for each root + :param graph: + :param predicates: + :param skip: + :param start_curies: + :param max_paths: + :return: + """ + logging.info(f"graph = {graph_info(graph)}") + if not predicate_label_map: + predicate_label_map = { + IS_A: "subtypes", + PART_OF: "parts", + } + nix = index_graph_nodes(graph) + if predicates is not None: + subgraph = filter_by_predicates(graph, predicates) + else: + subgraph = graph + logging.info(f"Subgraph = {graph_info(subgraph)}, filtered by {predicates}") + children_ix = index_graph_edges_by_object(subgraph) + dg = as_multi_digraph(subgraph, filter_reflexive=True) + if start_curies is None: + root_ids = [n for n, d in dg.in_degree if d == 0] + else: + root_ids = start_curies + logging.info(f"Roots={root_ids}") + stack = [TreeNode(id=n) for n in root_ids] + tree_roots = copy(stack) + counts = defaultdict(int) + + pointer = 0 + while len(stack) > pointer: + next_node = stack[pointer] + next_node_id = next_node.id + pointer += 1 + counts[next_node_id] += 1 + logging.debug(f"Visited {next_node_id} {counts[next_node_id]} times (max = {max_paths})") + if max_paths is not None and counts[next_node_id] > max_paths: + logging.info( + f"Reached {counts[next_node_id]} for node {next_node_id};; truncating rest" + ) + break + if next_node_id in nix: + next_node_obj = nix[next_node_id] + next_node.lbl = next_node_obj.lbl + # TODO: meta + child_edges = children_ix.get(next_node_id, []) + for child_edge in child_edges: + pred = child_edge.pred + pred = predicate_label_map.get(pred, pred) + if skip and child_edge.sub in skip: + continue + if not reflexive(child_edge): + if child_edge.sub in next_node.path_to_root: + continue + child_node = TreeNode( + id=child_edge.sub, parent_id=next_node_id, parent_relation=pred + ) + child_node.path_to_root = next_node.path_to_root + [next_node_id] + if pred not in next_node.children: + next_node.children[pred] = [] + next_node.children[pred].append(child_node) + stack.append(child_node) + + return tree_roots + + +def graph_to_d3viz_objects( + graph: Graph, + predicates: List[PRED_CURIE] = None, + relations_as_nodes=False, + **kwargs, +) -> List[Dict]: + roots = graph_to_tree_structure(graph, predicates=predicates, **kwargs) + return [ + tree_node_to_d3viz_object(root, relations_as_nodes=relations_as_nodes) for root in roots + ] + + +def tree_node_to_d3viz_object(tree_node: TreeNode, relations_as_nodes=False) -> Dict: + obj = {"name": tree_node.lbl, "parent": tree_node.parent_id} + if tree_node.children: + obj["children"] = [] + if relations_as_nodes: + for pred, children in tree_node.children.items(): + pred_node = { + "name": pred, + "parent": tree_node.id, + "children": [tree_node_to_d3viz_object(child, True) for child in children], + } + obj["children"].append(pred_node) + else: + for children in tree_node.children.values(): + for child in children: + obj["children"].append(tree_node_to_d3viz_object(child)) + return obj + + def expand_all_graph_ids(graph: Union[Graph, GraphDocument], converter: Converter) -> None: def _expand(x): try: diff --git a/tests/test_cli.py b/tests/test_cli.py index 34b97f1dc..11504c6ee 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -563,6 +563,55 @@ def test_dump(self): else: raise AssertionError(f"Unexpected output format: {output_format}") + def test_transform(self): + cases = [ + (TEST_ONT, "obo", None), + (TEST_ONT, "obojson", None), + ] + transformers = [ + ("SEPTransformer", {}, (413, None)), + ("EdgeFilterTransformer", {}, (176, None)), + ("EdgeFilterTransformer", {"include_predicates": [IS_A]}, (176, None)), + ] + for transformer, conf_object, (expected_n_terms, expected_n_edges) in transformers: + for input, output_format, _ in cases: + if conf_object: + conf_path = INPUT_DIR / f"{output_format}_conf.yaml" + with open(conf_path, "w", encoding="utf-8") as f: + yaml.dump(conf_object, f) + else: + conf_path = None + output_path = str(OUTPUT_DIR / f"test_transform-{output_format}.out") + logging.info(f"input={input}, output_format={output_format}") + cmd = [ + "-i", + str(input), + "transform", + "-t", + transformer, + "-o", + output_path, + "-O", + output_format, + ] + if conf_path: + cmd.extend(["-c", conf_path]) + result = self.runner.invoke(main, cmd) + self.assertEqual( + 0, result.exit_code, f"input={input}, output_format={output_format}" + ) + if output_format == "obo": + output_path = f"simpleobo:{output_path}" + elif output_format == "obojson": + output_path = f"obograph:{output_path}" + adapter = get_adapter(output_path) + terms = list(adapter.entities()) + edges = list(adapter.relationships()) + if expected_n_terms is not None: + assert len(terms) == expected_n_terms + if expected_n_edges is not None: + assert len(edges) == expected_n_edges + def test_extract(self): obojson_input = f"obograph:{TEST_OBOJSON}" cases = [ diff --git a/tests/test_utilities/test_obograph_utils.py b/tests/test_utilities/test_obograph_utils.py index 79725e012..7170d5519 100644 --- a/tests/test_utilities/test_obograph_utils.py +++ b/tests/test_utilities/test_obograph_utils.py @@ -17,7 +17,9 @@ filter_by_predicates, graph_as_dict, graph_ids, + graph_to_d3viz_objects, graph_to_tree_display, + graph_to_tree_structure, induce_graph_prefix_map, shortest_paths, trim_graph, @@ -115,7 +117,7 @@ def test_filter_by_predicates(self): self.assertGreater(len(g.edges), len(g2.edges)) self.assertGreater(len(g2.edges), 100) - def test_as_tree(self): + def test_as_tree_display(self): t = graph_to_tree_display(self.graph, predicates=[IS_A]) lines = t.split("\n") self.assertIn("[i] BFO:0000015 ! process", t) @@ -128,6 +130,19 @@ def test_as_tree(self): self.assertIn("* [p] GO:0019209 ! kinase activator activity", t) self.assertGreater(len(lines), 100) + def test_as_tree_structure(self): + ts = graph_to_tree_structure(self.graph, predicates=[IS_A]) + objs = [t.model_dump() for t in ts] + print(json.dumps(objs, indent=2)) + + def test_as_d3viz(self): + for preds in [[IS_A], [IS_A, PART_OF]]: + for relations_as_nodes in [True, False]: + objs = graph_to_d3viz_objects( + self.graph, predicates=preds, relations_as_nodes=relations_as_nodes + ) + print(json.dumps(objs, indent=2)) + def test_trim_ancestors(self): oi = self.oi both = [IS_A, PART_OF]