Skip to content

Commit

Permalink
Merge pull request #219 from singnet/senna-das106-1
Browse files Browse the repository at this point in the history
[das/#106] Change patterns and templates indexes to store handles only rather than handles+targets
  • Loading branch information
andre-senna authored Sep 24, 2024
2 parents ebd9913 + af10583 commit 638abf9
Show file tree
Hide file tree
Showing 11 changed files with 445 additions and 1,073 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
[#214] Added type alias for atom handle
[#216] Removed cursor from api of get_matched*() and get_incoming_links()
[das/#106] Change patterns and templates indexes to store handles only rather than handles+targets
3 changes: 1 addition & 2 deletions hyperon_das_atomdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
if sys.version_info < (3, 10):
raise RuntimeError("hyperon-das-atomdb requires Python 3.10 or higher")

from .database import UNORDERED_LINK_TYPES, WILDCARD, AtomDB
from .database import WILDCARD, AtomDB
from .exceptions import AtomDoesNotExist

__all__ = [
"AtomDB",
"WILDCARD",
"UNORDERED_LINK_TYPES",
"AtomDoesNotExist",
]

Expand Down
88 changes: 24 additions & 64 deletions hyperon_das_atomdb/adapters/ram_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,19 @@
from typing import Any, Iterable

from hyperon_das_atomdb.database import (
UNORDERED_LINK_TYPES,
WILDCARD,
AtomDB,
AtomT,
FieldIndexType,
FieldNames,
HandleListT,
IncomingLinksT,
LinkParamsT,
LinkT,
MatchedLinksResultT,
MatchedTargetsListT,
MatchedTypesResultT,
NodeParamsT,
NodeT,
)
from hyperon_das_atomdb.exceptions import AtomDoesNotExist, InvalidOperationException
from hyperon_das_atomdb.exceptions import AtomDoesNotExist
from hyperon_das_atomdb.logger import logger
from hyperon_das_atomdb.utils.expression_hasher import ExpressionHasher
from hyperon_das_atomdb.utils.patterns import build_pattern_keys
Expand All @@ -43,10 +40,10 @@ class Database:
atom_type: dict[str, Any] = dc_field(default_factory=dict)
node: dict[str, AtomT] = dc_field(default_factory=dict)
link: dict[str, AtomT] = dc_field(default_factory=dict)
outgoing_set: dict[str, Any] = dc_field(default_factory=dict)
outgoing_set: dict[str, set[str]] = dc_field(default_factory=dict)
incoming_set: dict[str, set[str]] = dc_field(default_factory=dict)
patterns: dict[str, set[tuple[str, tuple[str, ...]]]] = dc_field(default_factory=dict)
templates: dict[str, set[tuple[str, tuple[str, ...]]]] = dc_field(default_factory=dict)
patterns: dict[str, set[str]] = dc_field(default_factory=dict)
templates: dict[str, set[str]] = dc_field(default_factory=dict)


class InMemoryDB(AtomDB):
Expand Down Expand Up @@ -225,7 +222,6 @@ def _add_templates(
composite_type_hash: str,
named_type_hash: str,
key: str,
targets_hash: list[str],
) -> None:
"""
Add templates to the database.
Expand All @@ -234,38 +230,36 @@ def _add_templates(
composite_type_hash (str): The hash of the composite type.
named_type_hash (str): The hash of the named type.
key (str): The key for the template.
targets_hash (list[str]): A list of target hashes to be added to the template.
"""
template_composite_type_hash = self.db.templates.get(composite_type_hash)
template_named_type_hash = self.db.templates.get(named_type_hash)

if template_composite_type_hash is not None:
template_composite_type_hash.add((key, tuple(targets_hash)))
template_composite_type_hash.add(key)
else:
self.db.templates[composite_type_hash] = {(key, tuple(targets_hash))}
self.db.templates[composite_type_hash] = {key}

if template_named_type_hash is not None:
template_named_type_hash.add((key, tuple(targets_hash)))
template_named_type_hash.add(key)
else:
self.db.templates[named_type_hash] = {(key, tuple(targets_hash))}
self.db.templates[named_type_hash] = {key}

def _delete_templates(self, link_document: dict, targets_hash: list[str]) -> None:
def _delete_templates(self, link_document: dict) -> None:
"""
Delete templates from the database.
Args:
link_document (dict): The document of the link whose templates are to be deleted.
targets_hash (list[str]): A list of target hashes associated with the link.
"""
template_composite_type = self.db.templates.get(
link_document[FieldNames.COMPOSITE_TYPE_HASH], set()
)
if len(template_composite_type) > 0:
template_composite_type.remove((link_document[FieldNames.ID_HASH], tuple(targets_hash)))
template_composite_type.remove(link_document[FieldNames.ID_HASH])

template_named_type = self.db.templates.get(link_document[FieldNames.TYPE_NAME_HASH], set())
if len(template_named_type) > 0:
template_named_type.remove((link_document[FieldNames.ID_HASH], tuple(targets_hash)))
template_named_type.remove(link_document[FieldNames.ID_HASH])

def _add_patterns(self, named_type_hash: str, key: str, targets_hash: list[str]) -> None:
"""
Expand All @@ -282,7 +276,7 @@ def _add_patterns(self, named_type_hash: str, key: str, targets_hash: list[str])
self.db.patterns.setdefault(
pattern_key,
set(),
).add((key, tuple(targets_hash)))
).add(key)

def _delete_patterns(self, link_document: dict, targets_hash: list[str]) -> None:
"""
Expand All @@ -295,7 +289,7 @@ def _delete_patterns(self, link_document: dict, targets_hash: list[str]) -> None
pattern_keys = build_pattern_keys([link_document[FieldNames.TYPE_NAME_HASH], *targets_hash])
for pattern_key in pattern_keys:
if pattern := self.db.patterns.get(pattern_key):
pattern.remove((link_document[FieldNames.ID_HASH], tuple(targets_hash)))
pattern.remove(link_document[FieldNames.ID_HASH])

def _delete_link_and_update_index(self, link_handle: str) -> None:
"""
Expand All @@ -307,22 +301,21 @@ def _delete_link_and_update_index(self, link_handle: str) -> None:
if link_document := self._get_and_delete_link(link_handle):
self._update_index(atom=link_document, delete_atom=True)

def _filter_non_toplevel(self, matches: MatchedTargetsListT) -> MatchedTargetsListT:
def _filter_non_toplevel(self, matches: HandleListT) -> HandleListT:
"""
Filter out non-toplevel matches from the provided list.
Args:
matches (MatchedTargetsListT): A list of matches, where each match is a tuple
containing a link handle and a tuple of target handles.
matches (HandleListT): A list of matches
Returns:
MatchedTargetsListT: A list of matches that are toplevel only.
HandleListT: Filtered matches
"""
if not self.db.link:
return matches
return [
(link_handle, matched_targets)
for link_handle, matched_targets in matches
link_handle
for link_handle in matches
if (link := self.db.link.get(link_handle)) and link.get(FieldNames.IS_TOPLEVEL)
]

Expand Down Expand Up @@ -378,9 +371,7 @@ def _delete_atom_index(self, atom: AtomT) -> None:
self._delete_incoming_set(link_handle, outgoing_atoms)

targets_hash = self._build_targets_list(atom)

self._delete_templates(atom, targets_hash)

self._delete_templates(atom)
self._delete_patterns(atom, targets_hash)

def _add_atom_index(self, atom: AtomT) -> None:
Expand All @@ -398,20 +389,14 @@ def _add_atom_index(self, atom: AtomT) -> None:
if FieldNames.NODE_NAME not in atom:
handle = atom[FieldNames.ID_HASH]
targets_hash = self._build_targets_list(atom)
# self._add_atom_type(atom_type_name=atom_type_name) # see 4 ln above - duplicate?
self._add_outgoing_set(handle, targets_hash)
self._add_incoming_set(handle, targets_hash)
self._add_templates(
atom[FieldNames.COMPOSITE_TYPE_HASH],
atom[FieldNames.TYPE_NAME_HASH],
handle,
targets_hash,
)
self._add_patterns(
atom[FieldNames.TYPE_NAME_HASH],
handle,
targets_hash,
)
self._add_patterns(atom[FieldNames.TYPE_NAME_HASH], handle, targets_hash)

def _update_index(self, atom: AtomT, **kwargs) -> None:
"""
Expand Down Expand Up @@ -536,22 +521,7 @@ def get_link_targets(self, link_handle: str) -> list[str]:
details=f"link_handle: {link_handle}",
)

def is_ordered(self, link_handle: str) -> bool:
link = self._get_link(link_handle)
if link is not None:
return True
logger().error(
f"Failed to retrieve document for link handle: {link_handle}. "
f"The link may not exist."
)
raise AtomDoesNotExist(
message="Nonexistent atom",
details=f"link_handle: {link_handle}",
)

def get_matched_links(
self, link_type: str, target_handles: list[str], **kwargs
) -> MatchedLinksResultT:
def get_matched_links(self, link_type: str, target_handles: list[str], **kwargs) -> HandleListT:
if link_type != WILDCARD and WILDCARD not in target_handles:
try:
answer = [self.get_link_handle(link_type, target_handles)]
Expand All @@ -562,16 +532,6 @@ def get_matched_links(
link_type_hash = (
WILDCARD if link_type == WILDCARD else ExpressionHasher.named_type_hash(link_type)
)
# NOTE unreachable
if link_type in UNORDERED_LINK_TYPES: # pragma: no cover
logger().error(
"Failed to get matched links: Queries with unordered links are not implemented. "
f"link_type: {link_type}"
)
raise InvalidOperationException(
message="Queries with unordered links are not implemented",
details=f"link_type: {link_type}",
)

pattern_hash = ExpressionHasher.composite_hash([link_type_hash, *target_handles])

Expand All @@ -588,15 +548,15 @@ def get_incoming_links(self, atom_handle: str, **kwargs) -> IncomingLinksT:
return list(links)
return [self.get_atom(handle, **kwargs) for handle in links]

def get_matched_type_template(self, template: list[Any], **kwargs) -> MatchedTypesResultT:
def get_matched_type_template(self, template: list[Any], **kwargs) -> HandleListT:
hash_base = self._build_named_type_hash_template(template)
template_hash = ExpressionHasher.composite_hash(hash_base)
templates_matched = list(self.db.templates.get(template_hash, set()))
if kwargs.get("toplevel_only", False):
return self._filter_non_toplevel(templates_matched)
return templates_matched

def get_matched_type(self, link_type: str, **kwargs) -> MatchedTypesResultT:
def get_matched_type(self, link_type: str, **kwargs) -> HandleListT:
link_type_hash = ExpressionHasher.named_type_hash(link_type)
templates_matched = list(self.db.templates.get(link_type_hash, set()))
if kwargs.get("toplevel_only", False):
Expand Down
Loading

0 comments on commit 638abf9

Please sign in to comment.