From e4d6e63b64560363d3caef1330ed40c09979dce6 Mon Sep 17 00:00:00 2001
From: Chris Mungall <cjm@berkeleybop.org>
Date: Thu, 7 Mar 2024 18:11:31 -0800
Subject: [PATCH] d3viz writer (#700)

* Adding d3viz json tree export

* Adding ability to output d3viz json.

Also added graph transformers to allow for dynamic generation of
SEP-style grouping classes.

Fixes #696

* reformat
---
 src/oaklib/cli.py                             | 163 +++++++++++++-----
 .../sqldb/sql_implementation.py               |   8 +
 src/oaklib/transformers/__init__.py           |   0
 .../chained_ontology_transformer.py           |  18 ++
 .../transformers/edge_filter_transformer.py   |  75 ++++++++
 src/oaklib/transformers/graph_transformer.py  |  43 +++++
 .../transformers/node_filter_transformer.py   |  49 ++++++
 .../transformers/ontology_transformer.py      |  20 +++
 src/oaklib/transformers/sep_transformer.py    | 163 ++++++++++++++++++
 .../transformers/transformers_factory.py      |  49 ++++++
 src/oaklib/utilities/obograph_utils.py        | 124 ++++++++++++-
 tests/test_cli.py                             |  49 ++++++
 tests/test_utilities/test_obograph_utils.py   |  17 +-
 13 files changed, 729 insertions(+), 49 deletions(-)
 create mode 100644 src/oaklib/transformers/__init__.py
 create mode 100644 src/oaklib/transformers/chained_ontology_transformer.py
 create mode 100644 src/oaklib/transformers/edge_filter_transformer.py
 create mode 100644 src/oaklib/transformers/graph_transformer.py
 create mode 100644 src/oaklib/transformers/node_filter_transformer.py
 create mode 100644 src/oaklib/transformers/ontology_transformer.py
 create mode 100644 src/oaklib/transformers/sep_transformer.py
 create mode 100644 src/oaklib/transformers/transformers_factory.py

diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py
index a0998494f..fc2bddf6d 100644
--- a/src/oaklib/cli.py
+++ b/src/oaklib/cli.py
@@ -142,6 +142,10 @@
 from oaklib.parsers.association_parser_factory import get_association_parser
 from oaklib.resource import OntologyResource
 from oaklib.selector import get_adapter, get_resource_from_shorthand
+from oaklib.transformers.transformers_factory import (
+    apply_ontology_transformation,
+    get_ontology_transformer,
+)
 from oaklib.types import CURIE, PRED_CURIE
 from oaklib.utilities import table_filler
 from oaklib.utilities.apikey_manager import set_apikey_value
@@ -180,6 +184,7 @@
 from oaklib.utilities.obograph_utils import (
     ancestors_with_stats,
     default_stylemap_path,
+    graph_to_d3viz_objects,
     graph_to_image,
     graph_to_tree_display,
     shortest_paths,
@@ -1977,42 +1982,54 @@ def tree(
     impl = settings.impl
     if configure:
         logging.warning("Configure is not yet supported")
-    if isinstance(impl, OboGraphInterface):
-        curies = list(query_terms_iterator(terms, impl))
-        if stylemap is None:
-            stylemap = default_stylemap_path()
-        actual_predicates = _process_predicates_arg(predicates)
-        if add_mrcas:
-            if isinstance(impl, SemanticSimilarityInterface):
-                curies_to_add = [
-                    lca
-                    for s, o, lca in impl.multiset_most_recent_common_ancestors(
-                        curies, predicates=actual_predicates
-                    )
-                ]
-                curies = list(set(curies + curies_to_add))
-                logging.info(f"Expanded CURIEs = {curies}")
-            else:
-                raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface")
-        if down:
-            graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates)
-        elif gap_fill:
-            logging.info("Using gap-fill strategy")
-            if isinstance(impl, SubsetterInterface):
-                rels = impl.gap_fill_relationships(curies, predicates=actual_predicates)
-                if isinstance(impl, OboGraphInterface):
-                    graph = impl.relationships_to_graph(rels)
-                else:
-                    raise AssertionError(f"{impl} needs to be of type OboGraphInterface")
+    if not isinstance(impl, OboGraphInterface):
+        raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")
+    curies = list(query_terms_iterator(terms, impl))
+    if stylemap is None:
+        stylemap = default_stylemap_path()
+    actual_predicates = _process_predicates_arg(predicates)
+    if add_mrcas:
+        if isinstance(impl, SemanticSimilarityInterface):
+            curies_to_add = [
+                lca
+                for s, o, lca in impl.multiset_most_recent_common_ancestors(
+                    curies, predicates=actual_predicates
+                )
+            ]
+            curies = list(set(curies + curies_to_add))
+            logging.info(f"Expanded CURIEs = {curies}")
+        else:
+            raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface")
+    if down:
+        graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates)
+    elif gap_fill:
+        logging.info("Using gap-fill strategy")
+        if isinstance(impl, SubsetterInterface):
+            rels = impl.gap_fill_relationships(curies, predicates=actual_predicates)
+            if isinstance(impl, OboGraphInterface):
+                graph = impl.relationships_to_graph(rels)
             else:
-                raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill")
+                raise AssertionError(f"{impl} needs to be of type OboGraphInterface")
         else:
-            graph = impl.ancestor_graph(curies, predicates=actual_predicates)
-        logging.info(
-            f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}"
+            raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill")
+    else:
+        graph = impl.ancestor_graph(curies, predicates=actual_predicates)
+    logging.info(
+        f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}"
+    )
+    if max_hops is not None:
+        graph = trim_graph(graph, curies, distance=max_hops)
+    if output_type in ["d3viz", "d3viz_relational"]:
+        trees = graph_to_d3viz_objects(
+            graph,
+            predicates=actual_predicates,
+            start_curies=list(root) if root else None,
+            relations_as_nodes=output_type == "d3viz_relational",
+            max_paths=None,
         )
-        if max_hops is not None:
-            graph = trim_graph(graph, curies, distance=max_hops)
+        json_dump = json.dumps(trees, indent=2)
+        output.write(json_dump)
+    else:
         graph_to_tree_display(
             graph,
             seeds=curies,
@@ -2024,8 +2041,6 @@ def tree(
             display_options=display.split(","),
             output=output,
         )
-    else:
-        raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")
 
 
 @main.command()
@@ -2450,11 +2465,6 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
     """
     Exports (dumps) the entire contents of an ontology.
 
-    :param terms: A list of terms to dump. If not specified, the entire ontology will be dumped.
-    :param output: Path to output file
-    :param output_type: The output format. One of: obo, obojson, ofn, rdf, json, yaml, fhirjson, csv, nl
-    :param config_file: Path to a configuration JSON file for additional params (which may be required for some formats)
-
     Example:
 
         runoak -i pato.obo dump -o pato.json -O json
@@ -2485,14 +2495,73 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
     if terms:
         raise NotImplementedError("Currently dump for a subset of terms is not supported")
     impl = settings.impl
-    if isinstance(impl, BasicOntologyInterface):
-        logging.info(f"Out={output} syntax={output_type}")
-        if config_file:
-            with open(config_file) as file:
-                kwargs |= json.load(file)
-        impl.dump(output, syntax=output_type, **kwargs)
-    else:
+    if not isinstance(impl, BasicOntologyInterface):
+        raise NotImplementedError
+    logging.info(f"Out={output} syntax={output_type}")
+    if config_file:
+        with open(config_file) as file:
+            kwargs |= json.load(file)
+    impl.dump(output, syntax=output_type, **kwargs)
+
+
+@main.command()
+@click.argument("terms", nargs=-1)
+@click.option("-o", "--output", help="Path to output file")
+@output_type_option
+@click.option(
+    "-c",
+    "--config-file",
+    help="""Config file for additional transform params.""",
+)
+@click.option(
+    "-t",
+    "--transform",
+    required=True,
+    help="""Name of transformation to apply.""",
+)
+def transform(terms, transform, output, output_type: str, config_file: str = None, **kwargs):
+    """
+    Transforms an ontology
+
+    Example:
+
+        runoak -i pato.obo dump -o pato.json -O json
+
+    Example:
+
+        runoak -i pato.owl dump -o pato.ttl -O turtle
+
+    You can also pass in a JSON configuration file to parameterize the dump process.
+
+    Currently this is only used for fhirjson dumps, the configuration options are specified here:
+
+    https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html
+
+    Example:
+
+        runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json
+
+    Currently each implementation only supports a subset of formats.
+
+    The dump command is also blocked for remote endpoints such as Ubergraph,
+    to avoid killer queries.
+
+    Python API:
+
+       https://incatools.github.io/ontology-access-kit/interfaces/basic
+    """
+    if terms:
+        raise NotImplementedError("Currently transform for a subset of terms is not supported")
+    impl = settings.impl
+    if not isinstance(impl, BasicOntologyInterface):
         raise NotImplementedError
+    logging.info(f"Out={output} syntax={output_type}")
+    if config_file:
+        with open(config_file) as file:
+            kwargs |= yaml.safe_load(file)
+    transformer = get_ontology_transformer(transform, **kwargs)
+    new_impl = apply_ontology_transformation(impl, transformer)
+    new_impl.dump(output, syntax=output_type)
 
 
 @main.command()
diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py
index 1838a2aa0..2ce355f4b 100644
--- a/src/oaklib/implementations/sqldb/sql_implementation.py
+++ b/src/oaklib/implementations/sqldb/sql_implementation.py
@@ -247,6 +247,13 @@ def _is_quoted_url(curie: CURIE):
     return curie.startswith("<")
 
 
+def _remove_uri_quotes(curie: CURIE):
+    if _is_quoted_url(curie):
+        return curie[1:-1]
+    else:
+        return curie
+
+
 @dataclass
 class SqlImplementation(
     RelationGraphInterface,
@@ -2689,6 +2696,7 @@ def _filter(select_expr, filter_expr=None):
                         f"Ad-hoc repair of literal value for contributor: {contributor_id}"
                     )
                     contributor_id = string_as_base64_curie(contributor_id)
+            contributor_id = _remove_uri_quotes(contributor_id)
             if contributor_id not in ssc.contributor_summary:
                 ssc.contributor_summary[contributor_id] = ContributorStatistics(
                     contributor_id=contributor_id, contributor_name=contributor_name
diff --git a/src/oaklib/transformers/__init__.py b/src/oaklib/transformers/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/oaklib/transformers/chained_ontology_transformer.py b/src/oaklib/transformers/chained_ontology_transformer.py
new file mode 100644
index 000000000..18fb9d44d
--- /dev/null
+++ b/src/oaklib/transformers/chained_ontology_transformer.py
@@ -0,0 +1,18 @@
+from dataclasses import dataclass
+from typing import Any, Collection
+
+from oaklib.transformers.ontology_transformer import OntologyTransformer
+
+
+@dataclass
+class ChainedOntologyTransformer(OntologyTransformer):
+    """
+    An ontology graph transformer that chains multiple other transformers
+    """
+
+    chained_transformers: Collection[OntologyTransformer]
+
+    def transform(self, source_ontology: Any, **kwargs) -> Any:
+        for transformer in self.chained_transformers:
+            source_ontology = transformer.transform(source_ontology, **kwargs)
+        return source_ontology
diff --git a/src/oaklib/transformers/edge_filter_transformer.py b/src/oaklib/transformers/edge_filter_transformer.py
new file mode 100644
index 000000000..159f252d9
--- /dev/null
+++ b/src/oaklib/transformers/edge_filter_transformer.py
@@ -0,0 +1,75 @@
+from dataclasses import dataclass
+from typing import Collection, Optional
+
+from oaklib.datamodels.obograph import Graph
+from oaklib.datamodels.vocabulary import IS_A
+from oaklib.transformers.graph_transformer import GraphTransformer
+from oaklib.types import PRED_CURIE
+
+
+@dataclass
+class EdgeFilterTransformer(GraphTransformer):
+    """
+    An ontology graph transformer that filters edges
+    """
+
+    include_predicates: Optional[Collection[PRED_CURIE]] = None
+    """A collection of predicates to include"""
+
+    exclude_predicates: Optional[Collection[PRED_CURIE]] = None
+    """A collection of predicates to exclude"""
+
+    filter_function: Optional[callable] = None
+    """A function that takes an edge and returns True if it should be included"""
+
+    def transform(self, source_ontology: Graph, **kwargs) -> Graph:
+        """
+        Filters edges from a graph.
+
+        Example:
+
+        >>> from oaklib import get_adapter
+        >>> from oaklib.transformers.transformers_factory import get_ontology_transformer
+        >>> from oaklib.datamodels.vocabulary import IS_A
+        >>> adapter = get_adapter("tests/input/go-nucleus.obo")
+        >>> graph = adapter.as_obograph()
+        >>> transformer = get_ontology_transformer("EdgeFilterTransformer", include_predicates=[IS_A])
+        >>> filtered_graph = transformer.transform(graph)
+        >>> set([e.pred for e in filtered_graph.edges])
+        {'is_a'}
+
+        :param graph:
+        :return:
+        """
+        include_predicates = self.include_predicates
+        exclude_predicates = self.exclude_predicates
+
+        if include_predicates is None and exclude_predicates is None:
+            return source_ontology
+
+        def _normalize_id(pred: PRED_CURIE) -> PRED_CURIE:
+            if pred == IS_A:
+                return "is_a"
+            else:
+                return pred
+
+        if include_predicates is not None:
+            include_predicates = {_normalize_id(pred) for pred in include_predicates}
+
+        if exclude_predicates is not None:
+            exclude_predicates = {_normalize_id(pred) for pred in exclude_predicates}
+
+        new_edges = []
+        for edge in source_ontology.edges:
+            if include_predicates is not None:
+                if edge.pred not in include_predicates:
+                    continue
+            if exclude_predicates is not None:
+                if edge.pred in exclude_predicates:
+                    continue
+            if self.filter_function is not None:
+                if not self.filter_function(edge):
+                    continue
+            new_edges.append(edge)
+        new_graph = Graph(id=source_ontology.id, nodes=source_ontology.nodes, edges=new_edges)
+        return self._post_process(new_graph)
diff --git a/src/oaklib/transformers/graph_transformer.py b/src/oaklib/transformers/graph_transformer.py
new file mode 100644
index 000000000..99b478758
--- /dev/null
+++ b/src/oaklib/transformers/graph_transformer.py
@@ -0,0 +1,43 @@
+from dataclasses import dataclass
+
+from oaklib.datamodels.obograph import Graph
+from oaklib.transformers.ontology_transformer import OntologyTransformer
+
+
+@dataclass
+class GraphTransformer(OntologyTransformer):
+    """
+    An ontology transformer that operates on a graph
+    """
+
+    remove_dangling_edges: bool = False
+    """If true, removes edges that point to nodes that are not in the graph"""
+
+    def transform(self, source_ontology: Graph, **kwargs) -> Graph:
+        """
+        Transforms a graph into an ontology
+
+        :param graph:
+        :return:
+        """
+        raise NotImplementedError
+
+    def apply_remove_dangling_edges(self, graph: Graph):
+        """
+        Removes edges that point to nodes that are not in the graph.
+
+        :param graph:
+        :return:
+        """
+        node_ids = {n.id for n in graph.nodes}
+        new_edges = []
+        for edge in graph.edges:
+            if edge.sub in node_ids and edge.obj in node_ids:
+                new_edges.append(edge)
+        return Graph(id=graph.id, nodes=graph.nodes, edges=new_edges)
+
+    def _post_process(self, graph: Graph):
+        if self.remove_dangling_edges:
+            return self.apply_remove_dangling_edges(graph)
+        else:
+            return graph
diff --git a/src/oaklib/transformers/node_filter_transformer.py b/src/oaklib/transformers/node_filter_transformer.py
new file mode 100644
index 000000000..c9c0e8bd9
--- /dev/null
+++ b/src/oaklib/transformers/node_filter_transformer.py
@@ -0,0 +1,49 @@
+from dataclasses import dataclass
+from typing import Optional
+
+from oaklib.datamodels.obograph import Graph
+from oaklib.transformers.graph_transformer import GraphTransformer
+
+
+@dataclass
+class NodeFilterTransformer(GraphTransformer):
+    """
+    An ontology graph transformer that filters nodes
+    """
+
+    filter_function: Optional[callable] = None
+    """A function that takes an Node and returns True if it should be included"""
+
+    remove_dangling_edges: bool = False
+    """If true, removes edges that point to nodes that are not in the graph"""
+
+    def transform(self, source_ontology: Graph, **kwargs) -> Graph:
+        """
+        Filters Nodes from a graph.
+
+        Example:
+
+        >>> from oaklib import get_adapter
+        >>> from oaklib.transformers.node_filter_transformer import NodeFilterTransformer
+        >>> from oaklib.datamodels.vocabulary import IS_A
+        >>> adapter = get_adapter("tests/input/go-nucleus.obo")
+        >>> graph = adapter.as_obograph()
+        >>> transformer = NodeFilterTransformer(
+        ...                 filter_function=lambda node: node.lbl.startswith("nuclear"),
+        ...                 remove_dangling_edges=True)
+        >>> filtered_graph = transformer.transform(graph)
+        >>> sorted([n.lbl for n in filtered_graph.nodes])
+        ['nuclear envelope', 'nuclear membrane', 'nuclear particle']
+
+        :param graph:
+        :return:
+        """
+
+        new_nodes = []
+        for node in source_ontology.nodes:
+            if self.filter_function is not None:
+                if not self.filter_function(node):
+                    continue
+            new_nodes.append(node)
+        new_graph = Graph(id=source_ontology.id, nodes=new_nodes, edges=source_ontology.edges)
+        return self._post_process(new_graph)
diff --git a/src/oaklib/transformers/ontology_transformer.py b/src/oaklib/transformers/ontology_transformer.py
new file mode 100644
index 000000000..729118d11
--- /dev/null
+++ b/src/oaklib/transformers/ontology_transformer.py
@@ -0,0 +1,20 @@
+from abc import ABC
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class OntologyTransformer(ABC):
+    """
+    A class for transforming ontologies
+    """
+
+    def transform(self, source_ontology: Any, **kwargs) -> Any:
+        """
+        Transforms an ontology into another ontology
+
+        :param source_ontology:
+        :param kwargs: additional configuration arguments
+        :return:
+        """
+        raise NotImplementedError
diff --git a/src/oaklib/transformers/sep_transformer.py b/src/oaklib/transformers/sep_transformer.py
new file mode 100644
index 000000000..8147e8157
--- /dev/null
+++ b/src/oaklib/transformers/sep_transformer.py
@@ -0,0 +1,163 @@
+from dataclasses import dataclass, field
+from typing import Collection, Dict, Optional
+
+from oaklib.datamodels.obograph import Edge, Graph, Node
+from oaklib.transformers.graph_transformer import GraphTransformer
+from oaklib.types import PRED_CURIE
+from oaklib.utilities.obograph_utils import index_graph_nodes
+
+
+@dataclass
+class Labeler:
+    """
+    Generates labels and ids for generated nodes
+    """
+
+    code: str
+    label: str = None
+    separator: str = field(default="-")
+
+    def generate(self, node: Node) -> Node:
+        """
+        Generates a label for a node
+
+        :param node:
+        :return:
+        """
+        if self.label is not None:
+            label = self.label
+        else:
+            label = f"{node.lbl} ({self.code})"
+        sep = self.separator
+        id = f"{node.id}{sep}{self.code}"
+        new_node = Node(id=id, lbl=label, type="CLASS")
+        return new_node
+
+
+@dataclass
+class SEPTransformer(GraphTransformer):
+    """
+    An ontology graph transformer that maps an ontology to a generalized SEP pattern.
+
+    The SEP (Structured-Entities-Parts) pattern is used for partonomies and represents each
+    entity E as a triad of terms (S, E, P):
+
+    - S is the union of E and P
+    - E is the entity
+    - P is a grouping for all parts of E
+
+    The result is a diamond shape, where E is the top node, and S and P are the bottom nodes;
+    all the (proper) parts of E are listed under P, and all the subclasses of E are listed under S.
+
+    This transformer implements a generalization of this that generates R1, R2, ... Rn
+    nodes for each predicate, in addition to the E node
+    """
+
+    structure_labeler: Optional[Labeler] = None
+    entity_labeler: Optional[Labeler] = None
+    relationship_labelers: Optional[Dict[PRED_CURIE, Labeler]] = None
+
+    include_predicates: Optional[Collection[PRED_CURIE]] = None
+    """A collection of predicates to include"""
+
+    make_entity_top_node: Optional[bool] = field(default=True)
+    """If true, makes the entity node the top node in the graph"""
+
+    def transform(self, source_ontology: Graph, **kwargs) -> Graph:
+        """
+        Filters edges from a graph.
+
+        Example:
+
+        >>> from oaklib import get_adapter
+        >>> from oaklib.transformers.sep_transformer import SEPTransformer
+        >>> from oaklib.datamodels.vocabulary import PART_OF
+        >>> adapter = get_adapter("tests/input/go-nucleus.obo")
+        >>> graph = adapter.as_obograph()
+        >>> transformer = SEPTransformer(include_predicates=[PART_OF])
+        >>> filtered_graph = transformer.transform(graph)
+        >>> nucleus = "GO:0005634"
+        >>> nuc_edges = [(e.sub, e.obj) for e in filtered_graph.edges if nucleus in [e.sub, e.obj]]
+        >>> for e in sorted(nuc_edges):
+        ...    print(e)
+        ('GO:0005634', 'GO:0043231-SUB')
+        ('GO:0005634-BFO:0000050', 'GO:0005634')
+        ('GO:0005634-SUB', 'GO:0005634')
+
+        :param source_ontology:
+        :return:
+        """
+        subsumption_pred = "is_a"
+        make_entity_top_node = self.make_entity_top_node
+        structure_labeler = self.structure_labeler
+        entity_labeler = self.entity_labeler
+        nix = index_graph_nodes(source_ontology)
+        include_predicates = self.include_predicates
+        if structure_labeler is None:
+            structure_labeler = Labeler(code="S")
+        if entity_labeler is None:
+            if make_entity_top_node:
+                code = "SUB"
+            else:
+                code = "E"
+            entity_labeler = Labeler(code=code)
+        relationship_labelers = self.relationship_labelers
+        if relationship_labelers is None:
+            relationship_labelers = {}
+        for edge in source_ontology.edges:
+            pred = edge.pred
+            if pred not in relationship_labelers:
+                relationship_labelers[pred] = Labeler(code=pred)
+        new_edges = []
+        new_node_map = {}
+
+        def add_node(n: Node):
+            if n.id not in new_node_map:
+                new_node_map[n.id] = n
+
+        upper_node_map = {}  # diamond parents
+        lower_node_map = {}  # diamond children
+        for node in source_ontology.nodes:
+            # ensure all original nodes are preserved
+            add_node(node)
+            if node.type != "CLASS":
+                continue
+            structure_node = structure_labeler.generate(node)
+            entity_node = entity_labeler.generate(node)
+            if make_entity_top_node:
+                upper_node_map[node.id] = node  # e.g. Nucleus
+                lower_node_map[node.id] = entity_node  # e.g. NucleusSubtype
+            else:
+                upper_node_map[node.id] = structure_node  # e.g. NucleusStructure
+                lower_node_map[node.id] = node  # e.g. Nucleus
+
+        for edge in source_ontology.edges:
+            pred = edge.pred
+            if include_predicates is not None:
+                if pred != subsumption_pred and pred not in include_predicates:
+                    continue
+            orig_parent_node_id = edge.obj
+            orig_child_node_id = edge.sub
+            upper_node = upper_node_map.get(orig_parent_node_id, None)
+            if upper_node is None:
+                continue
+            e_node = lower_node_map.get(orig_parent_node_id, None)
+            if e_node is None:
+                continue
+            # E is_a S (e.g. Nucleus is_a NucleusStructure)
+            new_edges.append((e_node.id, subsumption_pred, upper_node.id))
+            orig_parent_node = nix.get(orig_parent_node_id, None)
+            if orig_parent_node is None:
+                continue
+            if pred == subsumption_pred:
+                new_edges.append((orig_child_node_id, subsumption_pred, e_node.id))
+                add_node(e_node)
+            else:
+                # e.g. NucleusPart
+                p_node = relationship_labelers[pred].generate(orig_parent_node)
+                # P is_a S (e.g. NucleusPart is_a NucleusStructure)
+                new_edges.append((p_node.id, subsumption_pred, upper_node.id))
+                new_edges.append((orig_child_node_id, subsumption_pred, p_node.id))
+                add_node(p_node)
+        edges = [Edge(sub=s, pred=p, obj=o) for s, p, o in set(new_edges)]
+        return Graph(id=source_ontology.id, nodes=list(new_node_map.values()), edges=edges)
diff --git a/src/oaklib/transformers/transformers_factory.py b/src/oaklib/transformers/transformers_factory.py
new file mode 100644
index 000000000..e9645949b
--- /dev/null
+++ b/src/oaklib/transformers/transformers_factory.py
@@ -0,0 +1,49 @@
+import importlib
+import re
+from typing import Optional, Type, Union
+
+from oaklib.datamodels.obograph import GraphDocument
+from oaklib.implementations.obograph.obograph_implementation import (
+    OboGraphImplementation,
+)
+from oaklib.interfaces import OboGraphInterface
+from oaklib.interfaces.dumper_interface import DumperInterface
+from oaklib.transformers.graph_transformer import GraphTransformer
+from oaklib.transformers.ontology_transformer import OntologyTransformer
+
+
+def camel_to_snake(name):
+    name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
+    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
+
+
+def get_ontology_transformer(
+    name: Union[str, Type], package: Optional[str] = None, **kwargs
+) -> OntologyTransformer:
+    if isinstance(name, str):
+        if package is None:
+            snakecase = camel_to_snake(name)
+            package = f"oaklib.transformers.{snakecase}"
+        package_obj = importlib.import_module(package)
+        # instantiate the class
+        class_obj = getattr(package_obj, name)
+    else:
+        class_obj = name
+    return class_obj(**kwargs)
+
+
+def apply_ontology_transformation(
+    impl, transformer: Union[str, Type, OntologyTransformer], **kwargs
+) -> DumperInterface:
+    if not isinstance(transformer, OntologyTransformer):
+        transformer = get_ontology_transformer(transformer, **kwargs)
+    if isinstance(transformer, GraphTransformer):
+        if not isinstance(impl, OboGraphInterface):
+            raise NotImplementedError
+        graph = impl.as_obograph()
+        new_graph = transformer.transform(graph)
+        gdoc = GraphDocument(graphs=[new_graph])
+        new_impl = OboGraphImplementation(obograph_document=gdoc)
+        return new_impl
+    else:
+        raise NotImplementedError
diff --git a/src/oaklib/utilities/obograph_utils.py b/src/oaklib/utilities/obograph_utils.py
index 4f646aa39..9be421083 100644
--- a/src/oaklib/utilities/obograph_utils.py
+++ b/src/oaklib/utilities/obograph_utils.py
@@ -15,7 +15,7 @@
 import sys
 import tempfile
 from collections import defaultdict
-from copy import deepcopy
+from copy import copy, deepcopy
 from enum import Enum
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterator, List, Optional, TextIO, Tuple, Union
@@ -25,6 +25,7 @@
 from curies import Converter
 from linkml_runtime.dumpers import json_dumper
 from linkml_runtime.loaders import json_loader
+from pydantic import BaseModel
 
 # https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package
 from oaklib import conf as conf_package
@@ -664,6 +665,7 @@ def graph_to_tree_display(
     :param stylemap: kgviz stylemap (not yet used)
     :return:
     """
+    # TODO: refactor this to use graph_to_tree_structure
     if not display_options:
         display_options = []
     show_all = "all" in display_options
@@ -742,6 +744,126 @@ def graph_to_tree_display(
         return output.getvalue()
 
 
+class TreeNode(BaseModel):
+    id: Optional[CURIE] = (None,)
+    lbl: Optional[str] = None
+    meta: Optional[dict] = None
+    children: Dict[PRED_CURIE, List["TreeNode"]] = {}
+    parent_id: Optional[str] = None
+    parent_relation: Optional[PRED_CURIE] = None
+    path_to_root: List[CURIE] = []
+
+
+def graph_to_tree_structure(
+    graph: Graph,
+    predicates: List[PRED_CURIE] = None,
+    skip: List[CURIE] = None,
+    start_curies: List[CURIE] = None,
+    predicate_label_map: Dict[PRED_CURIE, str] = None,
+    max_paths: int = 10,
+) -> List[TreeNode]:
+    """
+    Linearizes a graph to a list of trees.
+
+    The list will contain one element for each root
+    :param graph:
+    :param predicates:
+    :param skip:
+    :param start_curies:
+    :param max_paths:
+    :return:
+    """
+    logging.info(f"graph = {graph_info(graph)}")
+    if not predicate_label_map:
+        predicate_label_map = {
+            IS_A: "subtypes",
+            PART_OF: "parts",
+        }
+    nix = index_graph_nodes(graph)
+    if predicates is not None:
+        subgraph = filter_by_predicates(graph, predicates)
+    else:
+        subgraph = graph
+    logging.info(f"Subgraph = {graph_info(subgraph)}, filtered by {predicates}")
+    children_ix = index_graph_edges_by_object(subgraph)
+    dg = as_multi_digraph(subgraph, filter_reflexive=True)
+    if start_curies is None:
+        root_ids = [n for n, d in dg.in_degree if d == 0]
+    else:
+        root_ids = start_curies
+    logging.info(f"Roots={root_ids}")
+    stack = [TreeNode(id=n) for n in root_ids]
+    tree_roots = copy(stack)
+    counts = defaultdict(int)
+
+    pointer = 0
+    while len(stack) > pointer:
+        next_node = stack[pointer]
+        next_node_id = next_node.id
+        pointer += 1
+        counts[next_node_id] += 1
+        logging.debug(f"Visited {next_node_id} {counts[next_node_id]} times (max = {max_paths})")
+        if max_paths is not None and counts[next_node_id] > max_paths:
+            logging.info(
+                f"Reached {counts[next_node_id]} for node {next_node_id};; truncating rest"
+            )
+            break
+        if next_node_id in nix:
+            next_node_obj = nix[next_node_id]
+            next_node.lbl = next_node_obj.lbl
+            # TODO: meta
+        child_edges = children_ix.get(next_node_id, [])
+        for child_edge in child_edges:
+            pred = child_edge.pred
+            pred = predicate_label_map.get(pred, pred)
+            if skip and child_edge.sub in skip:
+                continue
+            if not reflexive(child_edge):
+                if child_edge.sub in next_node.path_to_root:
+                    continue
+                child_node = TreeNode(
+                    id=child_edge.sub, parent_id=next_node_id, parent_relation=pred
+                )
+                child_node.path_to_root = next_node.path_to_root + [next_node_id]
+                if pred not in next_node.children:
+                    next_node.children[pred] = []
+                next_node.children[pred].append(child_node)
+                stack.append(child_node)
+
+    return tree_roots
+
+
+def graph_to_d3viz_objects(
+    graph: Graph,
+    predicates: List[PRED_CURIE] = None,
+    relations_as_nodes=False,
+    **kwargs,
+) -> List[Dict]:
+    roots = graph_to_tree_structure(graph, predicates=predicates, **kwargs)
+    return [
+        tree_node_to_d3viz_object(root, relations_as_nodes=relations_as_nodes) for root in roots
+    ]
+
+
+def tree_node_to_d3viz_object(tree_node: TreeNode, relations_as_nodes=False) -> Dict:
+    obj = {"name": tree_node.lbl, "parent": tree_node.parent_id}
+    if tree_node.children:
+        obj["children"] = []
+        if relations_as_nodes:
+            for pred, children in tree_node.children.items():
+                pred_node = {
+                    "name": pred,
+                    "parent": tree_node.id,
+                    "children": [tree_node_to_d3viz_object(child, True) for child in children],
+                }
+                obj["children"].append(pred_node)
+        else:
+            for children in tree_node.children.values():
+                for child in children:
+                    obj["children"].append(tree_node_to_d3viz_object(child))
+    return obj
+
+
 def expand_all_graph_ids(graph: Union[Graph, GraphDocument], converter: Converter) -> None:
     def _expand(x):
         try:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 34b97f1dc..11504c6ee 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -563,6 +563,55 @@ def test_dump(self):
             else:
                 raise AssertionError(f"Unexpected output format: {output_format}")
 
+    def test_transform(self):
+        cases = [
+            (TEST_ONT, "obo", None),
+            (TEST_ONT, "obojson", None),
+        ]
+        transformers = [
+            ("SEPTransformer", {}, (413, None)),
+            ("EdgeFilterTransformer", {}, (176, None)),
+            ("EdgeFilterTransformer", {"include_predicates": [IS_A]}, (176, None)),
+        ]
+        for transformer, conf_object, (expected_n_terms, expected_n_edges) in transformers:
+            for input, output_format, _ in cases:
+                if conf_object:
+                    conf_path = INPUT_DIR / f"{output_format}_conf.yaml"
+                    with open(conf_path, "w", encoding="utf-8") as f:
+                        yaml.dump(conf_object, f)
+                else:
+                    conf_path = None
+                output_path = str(OUTPUT_DIR / f"test_transform-{output_format}.out")
+                logging.info(f"input={input}, output_format={output_format}")
+                cmd = [
+                    "-i",
+                    str(input),
+                    "transform",
+                    "-t",
+                    transformer,
+                    "-o",
+                    output_path,
+                    "-O",
+                    output_format,
+                ]
+                if conf_path:
+                    cmd.extend(["-c", conf_path])
+                result = self.runner.invoke(main, cmd)
+                self.assertEqual(
+                    0, result.exit_code, f"input={input}, output_format={output_format}"
+                )
+                if output_format == "obo":
+                    output_path = f"simpleobo:{output_path}"
+                elif output_format == "obojson":
+                    output_path = f"obograph:{output_path}"
+                adapter = get_adapter(output_path)
+                terms = list(adapter.entities())
+                edges = list(adapter.relationships())
+                if expected_n_terms is not None:
+                    assert len(terms) == expected_n_terms
+                if expected_n_edges is not None:
+                    assert len(edges) == expected_n_edges
+
     def test_extract(self):
         obojson_input = f"obograph:{TEST_OBOJSON}"
         cases = [
diff --git a/tests/test_utilities/test_obograph_utils.py b/tests/test_utilities/test_obograph_utils.py
index 79725e012..7170d5519 100644
--- a/tests/test_utilities/test_obograph_utils.py
+++ b/tests/test_utilities/test_obograph_utils.py
@@ -17,7 +17,9 @@
     filter_by_predicates,
     graph_as_dict,
     graph_ids,
+    graph_to_d3viz_objects,
     graph_to_tree_display,
+    graph_to_tree_structure,
     induce_graph_prefix_map,
     shortest_paths,
     trim_graph,
@@ -115,7 +117,7 @@ def test_filter_by_predicates(self):
         self.assertGreater(len(g.edges), len(g2.edges))
         self.assertGreater(len(g2.edges), 100)
 
-    def test_as_tree(self):
+    def test_as_tree_display(self):
         t = graph_to_tree_display(self.graph, predicates=[IS_A])
         lines = t.split("\n")
         self.assertIn("[i] BFO:0000015 ! process", t)
@@ -128,6 +130,19 @@ def test_as_tree(self):
         self.assertIn("* [p] GO:0019209 ! kinase activator activity", t)
         self.assertGreater(len(lines), 100)
 
+    def test_as_tree_structure(self):
+        ts = graph_to_tree_structure(self.graph, predicates=[IS_A])
+        objs = [t.model_dump() for t in ts]
+        print(json.dumps(objs, indent=2))
+
+    def test_as_d3viz(self):
+        for preds in [[IS_A], [IS_A, PART_OF]]:
+            for relations_as_nodes in [True, False]:
+                objs = graph_to_d3viz_objects(
+                    self.graph, predicates=preds, relations_as_nodes=relations_as_nodes
+                )
+                print(json.dumps(objs, indent=2))
+
     def test_trim_ancestors(self):
         oi = self.oi
         both = [IS_A, PART_OF]