Skip to content

Commit

Permalink
d3viz writer (#700)
Browse files Browse the repository at this point in the history
* Adding d3viz json tree export

* Adding ability to output d3viz json.

Also added graph transformers to allow for dynamic generation of
SEP-style grouping classes.

Fixes #696

* reformat
  • Loading branch information
cmungall authored Mar 8, 2024
1 parent 15bf85c commit e4d6e63
Show file tree
Hide file tree
Showing 13 changed files with 729 additions and 49 deletions.
163 changes: 116 additions & 47 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@
from oaklib.parsers.association_parser_factory import get_association_parser
from oaklib.resource import OntologyResource
from oaklib.selector import get_adapter, get_resource_from_shorthand
from oaklib.transformers.transformers_factory import (
apply_ontology_transformation,
get_ontology_transformer,
)
from oaklib.types import CURIE, PRED_CURIE
from oaklib.utilities import table_filler
from oaklib.utilities.apikey_manager import set_apikey_value
Expand Down Expand Up @@ -180,6 +184,7 @@
from oaklib.utilities.obograph_utils import (
ancestors_with_stats,
default_stylemap_path,
graph_to_d3viz_objects,
graph_to_image,
graph_to_tree_display,
shortest_paths,
Expand Down Expand Up @@ -1977,42 +1982,54 @@ def tree(
impl = settings.impl
if configure:
logging.warning("Configure is not yet supported")
if isinstance(impl, OboGraphInterface):
curies = list(query_terms_iterator(terms, impl))
if stylemap is None:
stylemap = default_stylemap_path()
actual_predicates = _process_predicates_arg(predicates)
if add_mrcas:
if isinstance(impl, SemanticSimilarityInterface):
curies_to_add = [
lca
for s, o, lca in impl.multiset_most_recent_common_ancestors(
curies, predicates=actual_predicates
)
]
curies = list(set(curies + curies_to_add))
logging.info(f"Expanded CURIEs = {curies}")
else:
raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface")
if down:
graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates)
elif gap_fill:
logging.info("Using gap-fill strategy")
if isinstance(impl, SubsetterInterface):
rels = impl.gap_fill_relationships(curies, predicates=actual_predicates)
if isinstance(impl, OboGraphInterface):
graph = impl.relationships_to_graph(rels)
else:
raise AssertionError(f"{impl} needs to be of type OboGraphInterface")
if not isinstance(impl, OboGraphInterface):
raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")
curies = list(query_terms_iterator(terms, impl))
if stylemap is None:
stylemap = default_stylemap_path()
actual_predicates = _process_predicates_arg(predicates)
if add_mrcas:
if isinstance(impl, SemanticSimilarityInterface):
curies_to_add = [
lca
for s, o, lca in impl.multiset_most_recent_common_ancestors(
curies, predicates=actual_predicates
)
]
curies = list(set(curies + curies_to_add))
logging.info(f"Expanded CURIEs = {curies}")
else:
raise NotImplementedError(f"{impl} does not implement SemanticSimilarityInterface")
if down:
graph = impl.subgraph_from_traversal(curies, predicates=actual_predicates)
elif gap_fill:
logging.info("Using gap-fill strategy")
if isinstance(impl, SubsetterInterface):
rels = impl.gap_fill_relationships(curies, predicates=actual_predicates)
if isinstance(impl, OboGraphInterface):
graph = impl.relationships_to_graph(rels)
else:
raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill")
raise AssertionError(f"{impl} needs to be of type OboGraphInterface")
else:
graph = impl.ancestor_graph(curies, predicates=actual_predicates)
logging.info(
f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}"
raise NotImplementedError(f"{impl} needs to implement Subsetter for --gap-fill")
else:
graph = impl.ancestor_graph(curies, predicates=actual_predicates)
logging.info(
f"Drawing graph with {len(graph.nodes)} nodes seeded from {curies} // {output_type}"
)
if max_hops is not None:
graph = trim_graph(graph, curies, distance=max_hops)
if output_type in ["d3viz", "d3viz_relational"]:
trees = graph_to_d3viz_objects(
graph,
predicates=actual_predicates,
start_curies=list(root) if root else None,
relations_as_nodes=output_type == "d3viz_relational",
max_paths=None,
)
if max_hops is not None:
graph = trim_graph(graph, curies, distance=max_hops)
json_dump = json.dumps(trees, indent=2)
output.write(json_dump)
else:
graph_to_tree_display(
graph,
seeds=curies,
Expand All @@ -2024,8 +2041,6 @@ def tree(
display_options=display.split(","),
output=output,
)
else:
raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")


@main.command()
Expand Down Expand Up @@ -2450,11 +2465,6 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
"""
Exports (dumps) the entire contents of an ontology.
:param terms: A list of terms to dump. If not specified, the entire ontology will be dumped.
:param output: Path to output file
:param output_type: The output format. One of: obo, obojson, ofn, rdf, json, yaml, fhirjson, csv, nl
:param config_file: Path to a configuration JSON file for additional params (which may be required for some formats)
Example:
runoak -i pato.obo dump -o pato.json -O json
Expand Down Expand Up @@ -2485,14 +2495,73 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
if terms:
raise NotImplementedError("Currently dump for a subset of terms is not supported")
impl = settings.impl
if isinstance(impl, BasicOntologyInterface):
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= json.load(file)
impl.dump(output, syntax=output_type, **kwargs)
else:
if not isinstance(impl, BasicOntologyInterface):
raise NotImplementedError
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= json.load(file)
impl.dump(output, syntax=output_type, **kwargs)


@main.command()
@click.argument("terms", nargs=-1)
@click.option("-o", "--output", help="Path to output file")
@output_type_option
@click.option(
"-c",
"--config-file",
help="""Config file for additional transform params.""",
)
@click.option(
"-t",
"--transform",
required=True,
help="""Name of transformation to apply.""",
)
def transform(terms, transform, output, output_type: str, config_file: str = None, **kwargs):
"""
Transforms an ontology
Example:
runoak -i pato.obo dump -o pato.json -O json
Example:
runoak -i pato.owl dump -o pato.ttl -O turtle
You can also pass in a JSON configuration file to parameterize the dump process.
Currently this is only used for fhirjson dumps, the configuration options are specified here:
https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html
Example:
runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json
Currently each implementation only supports a subset of formats.
The dump command is also blocked for remote endpoints such as Ubergraph,
to avoid killer queries.
Python API:
https://incatools.github.io/ontology-access-kit/interfaces/basic
"""
if terms:
raise NotImplementedError("Currently transform for a subset of terms is not supported")
impl = settings.impl
if not isinstance(impl, BasicOntologyInterface):
raise NotImplementedError
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= yaml.safe_load(file)
transformer = get_ontology_transformer(transform, **kwargs)
new_impl = apply_ontology_transformation(impl, transformer)
new_impl.dump(output, syntax=output_type)


@main.command()
Expand Down
8 changes: 8 additions & 0 deletions src/oaklib/implementations/sqldb/sql_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ def _is_quoted_url(curie: CURIE):
return curie.startswith("<")


def _remove_uri_quotes(curie: CURIE):
if _is_quoted_url(curie):
return curie[1:-1]
else:
return curie


@dataclass
class SqlImplementation(
RelationGraphInterface,
Expand Down Expand Up @@ -2689,6 +2696,7 @@ def _filter(select_expr, filter_expr=None):
f"Ad-hoc repair of literal value for contributor: {contributor_id}"
)
contributor_id = string_as_base64_curie(contributor_id)
contributor_id = _remove_uri_quotes(contributor_id)
if contributor_id not in ssc.contributor_summary:
ssc.contributor_summary[contributor_id] = ContributorStatistics(
contributor_id=contributor_id, contributor_name=contributor_name
Expand Down
Empty file.
18 changes: 18 additions & 0 deletions src/oaklib/transformers/chained_ontology_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from dataclasses import dataclass
from typing import Any, Collection

from oaklib.transformers.ontology_transformer import OntologyTransformer


@dataclass
class ChainedOntologyTransformer(OntologyTransformer):
"""
An ontology graph transformer that chains multiple other transformers
"""

chained_transformers: Collection[OntologyTransformer]

def transform(self, source_ontology: Any, **kwargs) -> Any:
for transformer in self.chained_transformers:
source_ontology = transformer.transform(source_ontology, **kwargs)
return source_ontology
75 changes: 75 additions & 0 deletions src/oaklib/transformers/edge_filter_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from dataclasses import dataclass
from typing import Collection, Optional

from oaklib.datamodels.obograph import Graph
from oaklib.datamodels.vocabulary import IS_A
from oaklib.transformers.graph_transformer import GraphTransformer
from oaklib.types import PRED_CURIE


@dataclass
class EdgeFilterTransformer(GraphTransformer):
"""
An ontology graph transformer that filters edges
"""

include_predicates: Optional[Collection[PRED_CURIE]] = None
"""A collection of predicates to include"""

exclude_predicates: Optional[Collection[PRED_CURIE]] = None
"""A collection of predicates to exclude"""

filter_function: Optional[callable] = None
"""A function that takes an edge and returns True if it should be included"""

def transform(self, source_ontology: Graph, **kwargs) -> Graph:
"""
Filters edges from a graph.
Example:
>>> from oaklib import get_adapter
>>> from oaklib.transformers.transformers_factory import get_ontology_transformer
>>> from oaklib.datamodels.vocabulary import IS_A
>>> adapter = get_adapter("tests/input/go-nucleus.obo")
>>> graph = adapter.as_obograph()
>>> transformer = get_ontology_transformer("EdgeFilterTransformer", include_predicates=[IS_A])
>>> filtered_graph = transformer.transform(graph)
>>> set([e.pred for e in filtered_graph.edges])
{'is_a'}
:param graph:
:return:
"""
include_predicates = self.include_predicates
exclude_predicates = self.exclude_predicates

if include_predicates is None and exclude_predicates is None:
return source_ontology

def _normalize_id(pred: PRED_CURIE) -> PRED_CURIE:
if pred == IS_A:
return "is_a"
else:
return pred

if include_predicates is not None:
include_predicates = {_normalize_id(pred) for pred in include_predicates}

if exclude_predicates is not None:
exclude_predicates = {_normalize_id(pred) for pred in exclude_predicates}

new_edges = []
for edge in source_ontology.edges:
if include_predicates is not None:
if edge.pred not in include_predicates:
continue
if exclude_predicates is not None:
if edge.pred in exclude_predicates:
continue
if self.filter_function is not None:
if not self.filter_function(edge):
continue
new_edges.append(edge)
new_graph = Graph(id=source_ontology.id, nodes=source_ontology.nodes, edges=new_edges)
return self._post_process(new_graph)
43 changes: 43 additions & 0 deletions src/oaklib/transformers/graph_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from dataclasses import dataclass

from oaklib.datamodels.obograph import Graph
from oaklib.transformers.ontology_transformer import OntologyTransformer


@dataclass
class GraphTransformer(OntologyTransformer):
"""
An ontology transformer that operates on a graph
"""

remove_dangling_edges: bool = False
"""If true, removes edges that point to nodes that are not in the graph"""

def transform(self, source_ontology: Graph, **kwargs) -> Graph:
"""
Transforms a graph into an ontology
:param graph:
:return:
"""
raise NotImplementedError

def apply_remove_dangling_edges(self, graph: Graph):
"""
Removes edges that point to nodes that are not in the graph.
:param graph:
:return:
"""
node_ids = {n.id for n in graph.nodes}
new_edges = []
for edge in graph.edges:
if edge.sub in node_ids and edge.obj in node_ids:
new_edges.append(edge)
return Graph(id=graph.id, nodes=graph.nodes, edges=new_edges)

def _post_process(self, graph: Graph):
if self.remove_dangling_edges:
return self.apply_remove_dangling_edges(graph)
else:
return graph
Loading

0 comments on commit e4d6e63

Please sign in to comment.