Skip to content

Commit

Permalink
Skip matches on the same branch.
Browse files Browse the repository at this point in the history
Attempt at optimization, #114
  • Loading branch information
nsbgn committed Jul 23, 2023
1 parent 671de1f commit 49a56af
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
23 changes: 16 additions & 7 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,14 @@ def test_query_specific_aspects(self):
[C, [A, [B]]], by_io=True, by_chronology=False,
results={TEST.wf2}
)

# print(graph.serialize(format="trig"))
# print(make_query(lang, [C, [A, [B]]], by_io=True,
# by_chronology=True).sparql())
# I don't really understand why the below matches with wf2

self.assertQuery(lang, graph,
[C, [A, [B]]], by_io=True, by_chronology=True,
results={}
[C, [A, [B]]], by_io=True, by_chronology=True, results={}
)

def test_multiple_options_accept_union(self):
Expand All @@ -242,7 +247,7 @@ def test_multiple_options_accept_union(self):
TEST.wf3: Source(A)
})

self.assertQuery(lang, workflows, [A],
self.assertQuery(lang, workflows, [A], by_penultimate=False,
results={TEST.wf3})
self.assertQuery(lang, workflows, [B],
results={TEST.wf1, TEST.wf2})
Expand Down Expand Up @@ -325,8 +330,10 @@ def test_tree_unfold(self):
g.add((b, TF.type, TEST.B))
g.add((c, TF.type, TEST.C))
g.add((d, TF.type, TEST.D))
query1 = TransformationQuery(lang, g, unfold_tree=False)
query2 = TransformationQuery(lang, g, unfold_tree=True)
query1 = TransformationQuery(lang, g, unfold_tree=False,
skip_same_branch_matches=False)
query2 = TransformationQuery(lang, g, unfold_tree=True,
skip_same_branch_matches=False)
self.assertQuery(lang, wfgraph, query1, results=set())
self.assertQuery(lang, wfgraph, query2, results={TEST.wf1})

Expand Down Expand Up @@ -406,7 +413,8 @@ def test_multiple_outputs(self):
graph.add((root, TF.output, B))
graph.add((A, TF["from"], C))
graph.add((B, TF["from"], C))
query = TransformationQuery(lang, graph, unfold_tree=False)
query = TransformationQuery(lang, graph, by_penultimate=False,
unfold_tree=False, skip_same_branch_matches=False)
result = list(query.chronology())
self.assertIn(result, [
['?workflow :output ?_0.', '?workflow :output ?_1.',
Expand Down Expand Up @@ -443,7 +451,8 @@ def test_sensible_order(self):
graph.add((A, TF["from"], B))
graph.add((A, TF["from"], C))
graph.add((B, TF["from"], C))
query = TransformationQuery(lang, graph, unfold_tree=False)
query = TransformationQuery(lang, graph, by_penultimate=False,
unfold_tree=False, skip_same_branch_matches=False)
result = list(query.chronology())
self.assertIn(result, [
['?workflow :output ?_0.', '?_0 :from* ?_1.',
Expand Down
37 changes: 20 additions & 17 deletions transforge/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self, lang: Language, graph: Graph,
with_noncanonical_types: bool = False, by_io: bool = True,
by_types: bool = True, by_operators: bool = True,
by_chronology: bool = True, by_penultimate: bool = True,
skip_same_branch_matches: bool = True,
unfold_tree: bool = False) -> None:

self.lang = lang
Expand All @@ -90,6 +91,7 @@ def __init__(self, lang: Language, graph: Graph,
self.by_operators = by_operators
self.by_chronology = by_chronology
self.unfold_tree = unfold_tree
self.skip_same_branch_matches = skip_same_branch_matches

# Keep track of the type and operator of each step
self.type: dict[Variable, list[Node]] = dict()
Expand Down Expand Up @@ -324,13 +326,16 @@ def chronology(self) -> Iterator[str]:
if current in visited:
continue

# afters = " ".join(a.n3() for a in self.after[current])
# yield f"{{SELECT DISTINCT {current.n3()} {afters} WHERE {{"
if self.skip_same_branch_matches:
yield f"\n{{SELECT {current.n3()} WHERE {{"

# Connect the initial nodes (ie outputs)
if not self.after[current]:
assert current in self.outputs
yield f"?workflow :output {current.n3()}."
if self.by_penultimate:
yield f"?workflow :output/:from? {current.n3()}."
else:
yield f"?workflow :output {current.n3()}."

# Write connections to previous nodes (ie ones that come after)
for c in self.after[current]:
Expand Down Expand Up @@ -361,20 +366,18 @@ def chronology(self) -> Iterator[str]:

# Make sure as early as possible that there is no earlier on the
# same branch
# for c in self.after[current]:
# types = self.type.get(current) or []
# if not len(types) == 1:
# continue
# between = next(self.generator)
# yield "FILTER NOT EXISTS {"
# yield f"{current.n3()} ^:to+ {between.n3()}."
# yield f"{c.n3()} :to* {between.n3()}."
# for t in types:
# yield f"{between.n3()} :type/rdfs:subClassOf* {t.n3()}."
# yield "}"

# Make sure we don't check the same node
# yield "}}"
if self.skip_same_branch_matches and self.after[current]:
yield "FILTER NOT EXISTS {"
predecessor = next(self.generator)
for c in self.after[current]:
yield f"{c.n3()} :from+ {predecessor.n3()}."
yield f"{predecessor.n3()} :from+ {current.n3()}."
yield from union(f"{predecessor.n3()} :type/rdfs:subClassOf*",
(self.lang.uri(t) for t in type_set))
yield "}"

if self.skip_same_branch_matches:
yield "}}"

visited.add(current)

Expand Down

0 comments on commit 49a56af

Please sign in to comment.