diff --git a/src/maggma/core/store.py b/src/maggma/core/store.py index b98a20f7f..43e13b13c 100644 --- a/src/maggma/core/store.py +++ b/src/maggma/core/store.py @@ -111,7 +111,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -161,7 +161,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -190,10 +190,12 @@ def query_one( Queries the Store for a single document. Args: - criteria: PyMongo filter for documents to search - properties: properties to return in the document + criteria: PyMongo filter for documents to search in + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. + skip: number documents to skip + limit: limit on total number of documents returned """ return next(self.query(criteria=criteria, properties=properties, sort=sort), None) diff --git a/src/maggma/stores/advanced_stores.py b/src/maggma/stores/advanced_stores.py index 138aa06a3..8a5acf27b 100644 --- a/src/maggma/stores/advanced_stores.py +++ b/src/maggma/stores/advanced_stores.py @@ -243,7 +243,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -292,7 +292,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -317,7 +317,14 @@ def groupby( lazy_substitute(criteria, self.reverse_aliases) - return self.store.groupby(keys=keys, properties=properties, criteria=criteria, skip=skip, limit=limit) + return self.store.groupby( + keys=keys, + properties=properties, + criteria=criteria, + skip=skip, + limit=limit, + sort=sort, + ) def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ @@ -443,7 +450,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -468,7 +475,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -479,7 +486,14 @@ def groupby( """ criteria = dict(**criteria, **self.sbx_criteria) if criteria else self.sbx_criteria - return self.store.groupby(keys=keys, properties=properties, criteria=criteria, skip=skip, limit=limit) + return self.store.groupby( + keys=keys, + properties=properties, + criteria=criteria, + skip=skip, + limit=limit, + sort=sort, + ) def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ diff --git a/src/maggma/stores/aws.py b/src/maggma/stores/aws.py index 6059aec68..c1f975593 100644 --- a/src/maggma/stores/aws.py +++ b/src/maggma/stores/aws.py @@ -172,12 +172,12 @@ def query( Queries the Store for a set of documents. Args: - criteria: PyMongo filter for documents to search in. - properties: properties to return in grouped documents. - sort: Dictionary of sort order for fields. Keys are field names and values - are 1 for ascending or -1 for descending. - skip: number documents to skip. - limit: limit on total number of documents returned. + criteria: PyMongo filter for documents to search in + properties: fields to include in returned documents. By default, all fields are returned. + sort: Dictionary of sort order for fields. Keys are field names and + values are 1 for ascending or -1 for descending. + skip: number documents to skip + limit: limit on total number of documents returned """ prop_keys = set() @@ -265,13 +265,13 @@ def groupby( Simple grouping function that will group documents by keys. Args: - keys: fields to group documents. - criteria: PyMongo filter for documents to search in. - properties: properties to return in grouped documents. - sort: Dictionary of sort order for fields. Keys are field names and values - are 1 for ascending or -1 for descending. - skip: number documents to skip. - limit: limit on total number of documents returned. + keys: fields to group documents + criteria: PyMongo filter for documents to search in + properties: fields to include in grouped documents. By default, only the 'id' field is returned. + sort: Dictionary of sort order for fields. Keys are field names and + values are 1 for ascending or -1 for descending. + skip: number documents to skip + limit: limit on total number of documents returned Returns: generator returning tuples of (dict, list of docs) diff --git a/src/maggma/stores/azure.py b/src/maggma/stores/azure.py index 8136b6daa..250d950d4 100644 --- a/src/maggma/stores/azure.py +++ b/src/maggma/stores/azure.py @@ -192,7 +192,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -266,7 +266,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip diff --git a/src/maggma/stores/compound_stores.py b/src/maggma/stores/compound_stores.py index 5a27ca467..585b0590c 100644 --- a/src/maggma/stores/compound_stores.py +++ b/src/maggma/stores/compound_stores.py @@ -240,6 +240,7 @@ def query( agg = self._collection.aggregate(pipeline) yield from agg + # TODO - sort kwarg is not passed anywhere def groupby( self, keys: Union[List[str], str], @@ -267,8 +268,8 @@ def query_one(self, criteria=None, properties=None, **kwargs): Get one document. Args: - properties: properties to return in query - criteria: filter for matching + criteria: PyMongo filter for documents to search in + properties: fields to include in returned documents. By default, all fields are returned. kwargs: kwargs for collection.aggregate Returns: @@ -436,7 +437,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -463,7 +464,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip diff --git a/src/maggma/stores/file_store.py b/src/maggma/stores/file_store.py index fa1a3296a..b05ef2541 100644 --- a/src/maggma/stores/file_store.py +++ b/src/maggma/stores/file_store.py @@ -388,7 +388,7 @@ def query( # type: ignore Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. hint: Dictionary of indexes to use as hints for query optimizer. diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index 2350dc14d..af8cdd91e 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -224,7 +224,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -289,6 +289,7 @@ def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool return self._files_store.distinct(field=field, criteria=criteria) + # TODO - sort, skip, limit, and properties are not functional def groupby( self, keys: Union[List[str], str], @@ -306,7 +307,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 6d431aa1a..346298d59 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -189,6 +189,7 @@ def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool return distinct_vals if distinct_vals is not None else [] + # TODO - sort, skip, and limit are not functional def groupby( self, keys: Union[List[str], str], @@ -205,7 +206,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -308,7 +309,7 @@ def query( # type: ignore Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. hint: Dictionary of indexes to use as hints for query optimizer. @@ -553,51 +554,6 @@ def __hash__(self): """Hash for the store.""" return hash((self.name, self.last_updated_field)) - def groupby( - self, - keys: Union[List[str], str], - criteria: Optional[Dict] = None, - properties: Union[Dict, List, None] = None, - sort: Optional[Dict[str, Union[Sort, int]]] = None, - skip: int = 0, - limit: int = 0, - ) -> Iterator[Tuple[Dict, List[Dict]]]: - """ - Simple grouping function that will group documents - by keys. - - Args: - keys: fields to group documents - criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents - sort: Dictionary of sort order for fields. Keys are field names and - values are 1 for ascending or -1 for descending. - skip: number documents to skip - limit: limit on total number of documents returned - - Returns: - generator returning tuples of (key, list of elements) - """ - keys = keys if isinstance(keys, list) else [keys] - - if properties is None: - properties = [] - if isinstance(properties, dict): - properties = list(properties.keys()) - - data = [ - doc for doc in self.query(properties=keys + properties, criteria=criteria) if all(has(doc, k) for k in keys) - ] - - def grouping_keys(doc): - return tuple(get(doc, k) for k in keys) - - for vals, group in groupby(sorted(data, key=grouping_keys), key=grouping_keys): - doc = {} # type: ignore - for k, v in zip(keys, vals): - set_(doc, k, v) - yield doc, list(group) - def __eq__(self, other: object) -> bool: """ Check equality for MemoryStore @@ -944,3 +900,49 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No search_doc = {k: d[k] for k in key} if isinstance(key, list) else {key: d[key]} self._collection.replace_one(search_doc, d, upsert=True) + + # Moved this from MemoryStore b/c MontyDB does not implement aggregate() as + # of May 2022. See https://github.com/davidlatwe/montydb/issues/66 + def groupby( + self, + keys: Union[List[str], str], + criteria: Optional[Dict] = None, + properties: Union[Dict, List, None] = None, + sort: Optional[Dict[str, Union[Sort, int]]] = None, + skip: int = 0, + limit: int = 0, + ) -> Iterator[Tuple[Dict, List[Dict]]]: + """ + Simple grouping function that will group documents + by keys. + + Args: + keys: fields to group documents + criteria: PyMongo filter for documents to search in + properties: fields to include in grouped documents. By default, only the 'id' field is returned. + sort: Dictionary of sort order for fields. Keys are field names and + values are 1 for ascending or -1 for descending. + skip: number documents to skip + limit: limit on total number of documents returned + Returns: + generator returning tuples of (key, list of elements) + """ + keys = keys if isinstance(keys, list) else [keys] + + if properties is None: + properties = [] + if isinstance(properties, dict): + properties = list(properties.keys()) + + data = [ + doc for doc in self.query(properties=keys + properties, criteria=criteria) if all(has(doc, k) for k in keys) + ] + + def grouping_keys(doc): + return tuple(get(doc, k) for k in keys) + + for vals, group in groupby(sorted(data, key=grouping_keys), key=grouping_keys): + doc = {} # type: Dict[Any,Any] + for k, v in zip(keys, vals): + set_(doc, k, v) + yield doc, list(group) diff --git a/src/maggma/stores/shared_stores.py b/src/maggma/stores/shared_stores.py index 11babc5bc..d647b1106 100644 --- a/src/maggma/stores/shared_stores.py +++ b/src/maggma/stores/shared_stores.py @@ -109,7 +109,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -167,7 +167,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -200,8 +200,8 @@ def query_one( Queries the Store for a single document. Args: - criteria: PyMongo filter for documents to search - properties: properties to return in the document + criteria: PyMongo filter for documents to search in + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. """ @@ -417,7 +417,7 @@ def query( Args: criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -477,7 +477,7 @@ def groupby( Args: keys: fields to group documents criteria: PyMongo filter for documents to search in - properties: properties to return in grouped documents + properties: fields to include in grouped documents. By default, only the 'id' field is returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. skip: number documents to skip @@ -513,8 +513,8 @@ def query_one( Queries the Store for a single document. Args: - criteria: PyMongo filter for documents to search - properties: properties to return in the document + criteria: PyMongo filter for documents to search in + properties: fields to include in returned documents. By default, all fields are returned. sort: Dictionary of sort order for fields. Keys are field names and values are 1 for ascending or -1 for descending. """ diff --git a/tests/stores/test_mongolike.py b/tests/stores/test_mongolike.py index 997b7edde..41745e2ea 100644 --- a/tests/stores/test_mongolike.py +++ b/tests/stores/test_mongolike.py @@ -69,11 +69,22 @@ def test_mongostore_connect_reconnect(): def test_mongostore_query(mongostore): mongostore._collection.insert_one({"a": 1, "b": 2, "c": 3}) + mongostore._collection.insert_one({"a": 2, "b": 2, "c": 3}) + mongostore._collection.insert_one({"a": 4, "b": 5, "e": 6, "g": {"h": 1}}) assert mongostore.query_one(properties=["a"])["a"] == 1 assert mongostore.query_one(properties=["a"])["a"] == 1 assert mongostore.query_one(properties=["b"])["b"] == 2 assert mongostore.query_one(properties=["c"])["c"] == 3 + # the whole document should be returned when properties=None + assert all(d.get("a") for d in mongostore.query()) + assert all(d.get("b") for d in mongostore.query()) + + # test sort, skip, limit + assert len(list(mongostore.query(limit=2))) == 2 + assert len(list(mongostore.query(skip=1))) == 2 + assert next(iter(mongostore.query(sort={"g": -1}))).get("e") + def test_mongostore_count(mongostore): mongostore._collection.insert_one({"a": 1, "b": 2, "c": 3})