Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store.groupby enhancements #670

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
10 changes: 6 additions & 4 deletions src/maggma/core/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def query(

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -161,7 +161,7 @@ def groupby(
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -190,10 +190,12 @@ def query_one(
Queries the Store for a single document.

Args:
criteria: PyMongo filter for documents to search
properties: properties to return in the document
criteria: PyMongo filter for documents to search in
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned
"""
return next(self.query(criteria=criteria, properties=properties, sort=sort), None)

Expand Down
26 changes: 20 additions & 6 deletions src/maggma/stores/advanced_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -292,7 +292,7 @@
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand All @@ -317,7 +317,14 @@

lazy_substitute(criteria, self.reverse_aliases)

return self.store.groupby(keys=keys, properties=properties, criteria=criteria, skip=skip, limit=limit)
return self.store.groupby(

Check warning on line 320 in src/maggma/stores/advanced_stores.py

View check run for this annotation

Codecov / codecov/patch

src/maggma/stores/advanced_stores.py#L320

Added line #L320 was not covered by tests
keys=keys,
properties=properties,
criteria=criteria,
skip=skip,
limit=limit,
sort=sort,
)

def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None):
"""
Expand Down Expand Up @@ -443,7 +450,7 @@

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand All @@ -468,7 +475,7 @@
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand All @@ -479,7 +486,14 @@
"""
criteria = dict(**criteria, **self.sbx_criteria) if criteria else self.sbx_criteria

return self.store.groupby(keys=keys, properties=properties, criteria=criteria, skip=skip, limit=limit)
return self.store.groupby(
keys=keys,
properties=properties,
criteria=criteria,
skip=skip,
limit=limit,
sort=sort,
)

def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None):
"""
Expand Down
26 changes: 13 additions & 13 deletions src/maggma/stores/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,12 @@ def query(
Queries the Store for a set of documents.

Args:
criteria: PyMongo filter for documents to search in.
properties: properties to return in grouped documents.
sort: Dictionary of sort order for fields. Keys are field names and values
are 1 for ascending or -1 for descending.
skip: number documents to skip.
limit: limit on total number of documents returned.
criteria: PyMongo filter for documents to search in
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned

"""
prop_keys = set()
Expand Down Expand Up @@ -265,13 +265,13 @@ def groupby(
Simple grouping function that will group documents by keys.

Args:
keys: fields to group documents.
criteria: PyMongo filter for documents to search in.
properties: properties to return in grouped documents.
sort: Dictionary of sort order for fields. Keys are field names and values
are 1 for ascending or -1 for descending.
skip: number documents to skip.
limit: limit on total number of documents returned.
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned

Returns:
generator returning tuples of (dict, list of docs)
Expand Down
4 changes: 2 additions & 2 deletions src/maggma/stores/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def query(

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -266,7 +266,7 @@ def groupby(
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down
9 changes: 5 additions & 4 deletions src/maggma/stores/compound_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def query(
agg = self._collection.aggregate(pipeline)
yield from agg

# TODO - sort kwarg is not passed anywhere
def groupby(
self,
keys: Union[List[str], str],
Expand Down Expand Up @@ -267,8 +268,8 @@ def query_one(self, criteria=None, properties=None, **kwargs):
Get one document.

Args:
properties: properties to return in query
criteria: filter for matching
criteria: PyMongo filter for documents to search in
properties: fields to include in returned documents. By default, all fields are returned.
kwargs: kwargs for collection.aggregate

Returns:
Expand Down Expand Up @@ -436,7 +437,7 @@ def query(

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand All @@ -463,7 +464,7 @@ def groupby(
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down
2 changes: 1 addition & 1 deletion src/maggma/stores/file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def query( # type: ignore

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
hint: Dictionary of indexes to use as hints for query optimizer.
Expand Down
5 changes: 3 additions & 2 deletions src/maggma/stores/gridfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def query(

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -289,6 +289,7 @@ def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool

return self._files_store.distinct(field=field, criteria=criteria)

# TODO - sort, skip, limit, and properties are not functional
def groupby(
self,
keys: Union[List[str], str],
Expand All @@ -306,7 +307,7 @@ def groupby(
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down
96 changes: 49 additions & 47 deletions src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@

return distinct_vals if distinct_vals is not None else []

# TODO - sort, skip, and limit are not functional
def groupby(
self,
keys: Union[List[str], str],
Expand All @@ -205,7 +206,7 @@
Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
Expand Down Expand Up @@ -308,7 +309,7 @@

Args:
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
properties: fields to include in returned documents. By default, all fields are returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
hint: Dictionary of indexes to use as hints for query optimizer.
Expand Down Expand Up @@ -553,51 +554,6 @@
"""Hash for the store."""
return hash((self.name, self.last_updated_field))

def groupby(
self,
keys: Union[List[str], str],
criteria: Optional[Dict] = None,
properties: Union[Dict, List, None] = None,
sort: Optional[Dict[str, Union[Sort, int]]] = None,
skip: int = 0,
limit: int = 0,
) -> Iterator[Tuple[Dict, List[Dict]]]:
"""
Simple grouping function that will group documents
by keys.

Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: properties to return in grouped documents
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned

Returns:
generator returning tuples of (key, list of elements)
"""
keys = keys if isinstance(keys, list) else [keys]

if properties is None:
properties = []
if isinstance(properties, dict):
properties = list(properties.keys())

data = [
doc for doc in self.query(properties=keys + properties, criteria=criteria) if all(has(doc, k) for k in keys)
]

def grouping_keys(doc):
return tuple(get(doc, k) for k in keys)

for vals, group in groupby(sorted(data, key=grouping_keys), key=grouping_keys):
doc = {} # type: ignore
for k, v in zip(keys, vals):
set_(doc, k, v)
yield doc, list(group)

def __eq__(self, other: object) -> bool:
"""
Check equality for MemoryStore
Expand Down Expand Up @@ -944,3 +900,49 @@
search_doc = {k: d[k] for k in key} if isinstance(key, list) else {key: d[key]}

self._collection.replace_one(search_doc, d, upsert=True)

# Moved this from MemoryStore b/c MontyDB does not implement aggregate() as
# of May 2022. See https://github.com/davidlatwe/montydb/issues/66
def groupby(
self,
keys: Union[List[str], str],
criteria: Optional[Dict] = None,
properties: Union[Dict, List, None] = None,
sort: Optional[Dict[str, Union[Sort, int]]] = None,
skip: int = 0,
limit: int = 0,
) -> Iterator[Tuple[Dict, List[Dict]]]:
"""
Simple grouping function that will group documents
by keys.

Args:
keys: fields to group documents
criteria: PyMongo filter for documents to search in
properties: fields to include in grouped documents. By default, only the 'id' field is returned.
sort: Dictionary of sort order for fields. Keys are field names and
values are 1 for ascending or -1 for descending.
skip: number documents to skip
limit: limit on total number of documents returned
Returns:
generator returning tuples of (key, list of elements)
"""
keys = keys if isinstance(keys, list) else [keys]

if properties is None:
properties = []
if isinstance(properties, dict):
properties = list(properties.keys())

Check warning on line 935 in src/maggma/stores/mongolike.py

View check run for this annotation

Codecov / codecov/patch

src/maggma/stores/mongolike.py#L935

Added line #L935 was not covered by tests

data = [
doc for doc in self.query(properties=keys + properties, criteria=criteria) if all(has(doc, k) for k in keys)
]

def grouping_keys(doc):
return tuple(get(doc, k) for k in keys)

for vals, group in groupby(sorted(data, key=grouping_keys), key=grouping_keys):
doc = {} # type: Dict[Any,Any]
for k, v in zip(keys, vals):
set_(doc, k, v)
yield doc, list(group)
Loading
Loading