From 7481bb43a517a2899b5cfc018acbba7374292c44 Mon Sep 17 00:00:00 2001 From: Alessio Izzo Date: Tue, 20 Aug 2024 22:59:57 +0200 Subject: [PATCH 1/3] ruff format and ruff check --- conftest.py | 2 +- docs/conf.py | 43 +- integration/fixtures.py | 120 +- integration/test_accounts.py | 1 - integration/test_backfill.py | 192 +- integration/test_dataset.py | 149 +- integration/test_expressions.py | 422 +- integration/test_folders.py | 261 +- integration/test_projects.py | 41 +- integration/test_recodes.py | 301 +- integration/test_scripts.py | 39 +- integration/test_views.py | 13 +- ruff.toml | 2 + scrunch/__init__.py | 16 +- scrunch/accounts.py | 5 +- scrunch/categories.py | 58 +- scrunch/connections.py | 27 +- scrunch/crunchboxes.py | 63 +- scrunch/cubes.py | 2 +- scrunch/datasets.py | 1761 ++-- scrunch/exceptions.py | 4 +- scrunch/expressions.py | 602 +- scrunch/folders.py | 62 +- scrunch/helpers.py | 119 +- scrunch/mutable_dataset.py | 191 +- scrunch/order.py | 157 +- scrunch/scripts.py | 20 +- scrunch/session.py | 8 +- scrunch/streaming_dataset.py | 18 +- scrunch/subentity.py | 125 +- scrunch/tests/conftest.py | 4 +- .../scrunch_workflow_integration_test.py | 1371 +-- scrunch/tests/mock_session.py | 13 +- scrunch/tests/test_accounts.py | 78 +- scrunch/tests/test_categories.py | 224 +- scrunch/tests/test_cubes.py | 1 - scrunch/tests/test_datasets.py | 8569 ++++++++--------- scrunch/tests/test_expressions.py | 4505 +++------ scrunch/tests/test_folders.py | 319 +- scrunch/tests/test_projects.py | 489 +- scrunch/tests/test_recodes.py | 777 +- scrunch/tests/test_scripts.py | 115 +- scrunch/tests/test_teams.py | 66 +- scrunch/tests/test_utilities.py | 260 +- scrunch/tests/test_views.py | 283 +- scrunch/variables.py | 52 +- scrunch/version.py | 4 +- scrunch/views.py | 4 +- setup.py | 97 +- 49 files changed, 10472 insertions(+), 11583 deletions(-) create mode 100644 ruff.toml diff --git a/conftest.py b/conftest.py index 1c8290e..b6dbb4a 100644 --- a/conftest.py +++ b/conftest.py @@ -1 +1 @@ -collect_ignore = ['integration'] +collect_ignore = ["integration"] diff --git a/docs/conf.py b/docs/conf.py index ce02db5..b716d1a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,32 +3,25 @@ # flake8: noqa W191 -extensions = [ - 'sphinx.ext.autodoc', - 'jaraco.packaging.sphinx', - 'rst.linker', -] +extensions = ["sphinx.ext.autodoc", "jaraco.packaging.sphinx", "rst.linker"] -master_doc = 'index' +master_doc = "index" link_files = { - '../CHANGES.rst': dict( - using=dict( - GH='https://github.com', - ), - replace=[ - dict( - pattern=r'(Issue )?#(?P\d+)', - url='{package_url}/issues/{issue}', - ), - dict( - pattern=r'^(?m)((?Pv?\d+(\.\d+){1,2}))\n[-=]+\n', - with_scm='{text}\n{rev[timestamp]:%d %b %Y}\n', - ), - dict( - pattern=r'PEP[- ](?P\d+)', - url='https://www.python.org/dev/peps/pep-{pep_number:0>4}/', - ), - ], - ), + "../CHANGES.rst": dict( + using=dict(GH="https://github.com"), + replace=[ + dict( + pattern=r"(Issue )?#(?P\d+)", url="{package_url}/issues/{issue}" + ), + dict( + pattern=r"^(?m)((?Pv?\d+(\.\d+){1,2}))\n[-=]+\n", + with_scm="{text}\n{rev[timestamp]:%d %b %Y}\n", + ), + dict( + pattern=r"PEP[- ](?P\d+)", + url="https://www.python.org/dev/peps/pep-{pep_number:0>4}/", + ), + ], + ) } diff --git a/integration/fixtures.py b/integration/fixtures.py index 5af06d6..7306442 100644 --- a/integration/fixtures.py +++ b/integration/fixtures.py @@ -12,79 +12,64 @@ def setUp(self): username = os.environ["SCRUNCH_USER"] password = os.environ["SCRUNCH_PASS"] self.site = connect(username, password, self.host) - assert (self - .site is not None), "Unable to connect to %s" % self.host + assert self.site is not None, "Unable to connect to %s" % self.host # These are the categories that multiple response use. Selected and Not Selected MR_CATS = [ - {"id": 1, "name": "Selected", "missing": False, "numeric_value": None, "selected": True}, - {"id": 3, "name": "Other Selected", "missing": False, "numeric_value": None, "selected": True}, - {"id": 2, "name": "Not selected", "missing": False, "numeric_value": None, "selected": False} + { + "id": 1, + "name": "Selected", + "missing": False, + "numeric_value": None, + "selected": True, + }, + { + "id": 3, + "name": "Other Selected", + "missing": False, + "numeric_value": None, + "selected": True, + }, + { + "id": 2, + "name": "Not selected", + "missing": False, + "numeric_value": None, + "selected": False, + }, ] NEWS_DATASET = { - "caseid": { - "name": "Case ID", - "type": "numeric" - }, - "age": { - "name": "Age", - "type": 'numeric', - }, + "caseid": {"name": "Case ID", "type": "numeric"}, + "age": {"name": "Age", "type": "numeric"}, "newssource": { "name": "News source", "type": "multiple_response", "categories": MR_CATS, - "subreferences": [{ - "name": "Facebook", - "alias": "newssource_1" - }, { - "name": "Twitter", - "alias": "newssource_2" - }, { - "name": "Google news", - "alias": "newssource_3" - }, { - "name": "Reddit", - "alias": "newssource_4" - }, { - "name": "NY Times (Print)", - "alias": "newssource_5" - }, { - "name": "Washington Post (Print)", - "alias": "newssource_6" - }, { - "name": "NBC News", - "alias": "newssource_7" - }, { - "name": "NPR", - "alias": "newssource_8" - }, { - "name": "Fox", - "alias": "newssource_9" - }, { - "name": "Local radio", - "alias": "newssource_10" - }] + "subreferences": [ + {"name": "Facebook", "alias": "newssource_1"}, + {"name": "Twitter", "alias": "newssource_2"}, + {"name": "Google news", "alias": "newssource_3"}, + {"name": "Reddit", "alias": "newssource_4"}, + {"name": "NY Times (Print)", "alias": "newssource_5"}, + {"name": "Washington Post (Print)", "alias": "newssource_6"}, + {"name": "NBC News", "alias": "newssource_7"}, + {"name": "NPR", "alias": "newssource_8"}, + {"name": "Fox", "alias": "newssource_9"}, + {"name": "Local radio", "alias": "newssource_10"}, + ], }, "socialmedia": { "name": "Accounts in social media", "type": "multiple_response", "categories": MR_CATS, - "subreferences": [{ - "name": "Facebook", - "alias": "socialmedia_1" - }, { - "name": "Twitter", - "alias": "socialmedia_2" - }, { - "name": "Google+", - "alias": "socialmedia_3" - }, { - "name": "VK", - "alias": "socialmedia_4" - }] + "subreferences": [ + {"name": "Facebook", "alias": "socialmedia_1"}, + {"name": "Twitter", "alias": "socialmedia_2"}, + {"name": "Google+", "alias": "socialmedia_3"}, + {"name": "VK", "alias": "socialmedia_4"}, + ], }, "gender": { "name": "Gender", @@ -93,8 +78,8 @@ def setUp(self): {"id": 1, "name": "Female", "missing": False, "numeric_value": None}, {"id": 2, "name": "Male", "missing": False, "numeric_value": None}, {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, - ] - } + ], + }, } RECODES_CSV_OUTPUT = """newssource_1,newssource_2,newssource_3,newssource_4,newssource_5,newssource_6,newssource_7,newssource_8,newssource_9,newssource_10,age,socialmedia_1,socialmedia_2,socialmedia_3,socialmedia_4,caseid,gender,agerange,origintype_1,origintype_2,origintype_3,origintype_4,origintype_copy_1,origintype_copy_2,origintype_copy_3,origintype_copy_4,onlinenewssource_1,onlinenewssource_2,over35 @@ -122,7 +107,7 @@ def setUp(self): "socialmedia_2": [1, 2, 1, 1, 2, 1, 2], "socialmedia_3": [2, 2, 1, 2, 2, 1, 2], "socialmedia_4": [2, 2, 1, 2, 2, 2, 2], - "gender": [1, 2, 2, 1, 1, 1, 2] + "gender": [1, 2, 2, 1, 1, 1, 2], } @@ -131,14 +116,13 @@ def mr_in(ds, mr_alias, subvars): Temporary helper until scrunch can parse correctly the expression: mr.has_any([sv1, sv2...]) """ - variables = ds.resource.variables.by('alias') + variables = ds.resource.variables.by("alias") mr = variables[mr_alias].entity - subvariables = mr.subvariables.by('alias') + subvariables = mr.subvariables.by("alias") return { - 'function': 'any', - 'args': [{ - 'variable': mr.self - }, { - 'column': [subvariables[subvar_alias(mr_alias, sv)].id for sv in subvars] - }] + "function": "any", + "args": [ + {"variable": mr.self}, + {"column": [subvariables[subvar_alias(mr_alias, sv)].id for sv in subvars]}, + ], } diff --git a/integration/test_accounts.py b/integration/test_accounts.py index 2a26b6d..51c416b 100644 --- a/integration/test_accounts.py +++ b/integration/test_accounts.py @@ -10,7 +10,6 @@ class TestAccount(BaseIntegrationTestCase): - def test_current_account(self): act = Account.current_account(self.site) assert act.resource.self == self.site.account.self diff --git a/integration/test_backfill.py b/integration/test_backfill.py index b84ea80..ba33adf 100644 --- a/integration/test_backfill.py +++ b/integration/test_backfill.py @@ -13,16 +13,12 @@ class TestBackFill(BaseIntegrationTestCase): def _prepare_ds(self, values): ds = self.site.datasets.create( - as_entity({"name": "test_backfill_values"})).refresh() + as_entity({"name": "test_backfill_values"}) + ).refresh() # We need a numeric PK pk = ds.variables.create( as_entity( - { - "name": "pk", - "alias": "pk", - "type": "numeric", - "values": values["pk"], - } + {"name": "pk", "alias": "pk", "type": "numeric", "values": values["pk"]} ) ) @@ -140,17 +136,21 @@ def _prepare_ds(self, values): return ds def test_backfill_values(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], - "cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11], - "cat3": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], - }) - csv_file = StringIO(textwrap.dedent("""pk,cat1,cat2 + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], + "cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11], + "cat3": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], + } + ) + csv_file = StringIO( + textwrap.dedent("""pk,cat1,cat2 4,1,22 5,2,33 6,3,11 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) rows_expr = "pk >= 4 and pk <=6" @@ -167,30 +167,37 @@ def test_backfill_values(self): ds.delete() def test_backfill_on_subvars(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], - "cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11], - "cat3": [2, 3, 1, -1, -1, -1, 2, 3, 1, 2] - }) + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1], + "cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11], + "cat3": [2, 3, 1, -1, -1, -1, 2, 3, 1, 2], + } + ) vars = ds.variables.by("alias") - array = ds.variables.create(as_entity({ - "name": "array", - "alias": "array", - "type": "categorical_array", - "subvariables": [vars["cat1"].entity_url, vars["cat3"].entity_url], - })).refresh() - - csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3 + array = ds.variables.create( + as_entity( + { + "name": "array", + "alias": "array", + "type": "categorical_array", + "subvariables": [vars["cat1"].entity_url, vars["cat3"].entity_url], + } + ) + ).refresh() + + csv_file = StringIO( + textwrap.dedent("""pk,cat1,cat3 4,1,2 5,2,3 6,3,1 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) rows_expr = "pk >= 4 and pk <=6" - scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, - rows_expr) + scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr) data = ds.follow("table", "limit=10")["data"] assert data[array.body["id"]] == [ @@ -203,34 +210,46 @@ def test_backfill_on_subvars(self): [1, 2], [2, 3], [3, 1], - [1, 2] + [1, 2], ] ds.delete() def test_backfill_on_subvars_full_row(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5], - "cat1": [1, 2, 3, -1, -1], - "cat2": [11, 22, 33, -1, -1], - "cat3": [2, 3, 1, -1, -1] - }) + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5], + "cat1": [1, 2, 3, -1, -1], + "cat2": [11, 22, 33, -1, -1], + "cat3": [2, 3, 1, -1, -1], + } + ) vars = ds.variables.by("alias") - subvars = [vars["cat1"].entity_url, vars["cat2"].entity_url, vars["cat3"].entity_url] - array = ds.variables.create(as_entity({ - "name": "array", - "alias": "array", - "type": "categorical_array", - "subvariables": subvars, - })).refresh() - - csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3 + subvars = [ + vars["cat1"].entity_url, + vars["cat2"].entity_url, + vars["cat3"].entity_url, + ] + array = ds.variables.create( + as_entity( + { + "name": "array", + "alias": "array", + "type": "categorical_array", + "subvariables": subvars, + } + ) + ).refresh() + + csv_file = StringIO( + textwrap.dedent("""pk,cat1,cat3 1,1,2 2,2,3 3,3,1 4,2,3 5,2,1 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) # Not including a row_filter, same as passing None @@ -241,27 +260,30 @@ def test_backfill_on_subvars_full_row(self): [2, 2, 3], [3, 3, 1], [2, {"?": -1}, 3], - [2, {"?": -1}, 1] + [2, {"?": -1}, 1], ] ds.delete() def test_backfill_on_non_missing(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5], - "cat1": [1, 2, 3, 3, 3], - "cat2": [11, 22, 33, 11, 22], - "cat3": [1, 1, 1, 1, 1] - }) - csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3 + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5], + "cat1": [1, 2, 3, 3, 3], + "cat2": [11, 22, 33, 11, 22], + "cat3": [1, 1, 1, 1, 1], + } + ) + csv_file = StringIO( + textwrap.dedent("""pk,cat1,cat3 4,1,2 5,2,3 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) rows_expr = "pk >= 4 and pk <=5" - scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, - rows_expr) + scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr) vars = ds.variables.by("alias") data = ds.follow("table", "limit=10")["data"] @@ -278,15 +300,18 @@ def test_bad_csv(self): "cat3": [1, -1, 3, -1], } ds = self._prepare_ds(original_data) - csv_file = StringIO(textwrap.dedent("""pk,BOGUS,BAD + csv_file = StringIO( + textwrap.dedent("""pk,BOGUS,BAD 2,1,22 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) rows_expr = "pk == 2" with pytest.raises(ValueError) as err: - scrunch_dataset.backfill_from_csv(["cat1", "cat2"], "pk", csv_file, - rows_expr) + scrunch_dataset.backfill_from_csv( + ["cat1", "cat2"], "pk", csv_file, rows_expr + ) assert err.value.args[0].startswith("Invalid data provided: Expected column ") # Verify that the backfill didn't proceed @@ -301,23 +326,26 @@ def test_bad_csv(self): ds.delete() def test_with_exclusion_filter(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5], - "cat1": [1, 2, 3, 3, 3], - "cat2": [11, 11, 11, 11, 11], - "cat3": [1, 1, 1, 1, 1] - }) - csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3 + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5], + "cat1": [1, 2, 3, 3, 3], + "cat2": [11, 11, 11, 11, 11], + "cat3": [1, 1, 1, 1, 1], + } + ) + csv_file = StringIO( + textwrap.dedent("""pk,cat1,cat3 4,1,2 5,2,3 - """)) + """) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) excl = "pk == 4" scrunch_dataset.exclude(excl) rows_expr = "pk in [4, 5]" - scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, - rows_expr) + scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr) # Exclusion gets set after backfilling assert scrunch_dataset.get_exclusion() == excl @@ -331,15 +359,17 @@ def test_with_exclusion_filter(self): ds.delete() def test_too_big_file(self): - ds = self._prepare_ds({ - "pk": [1, 2, 3, 4, 5], - "cat1": [1, 2, 3, 3, 3], - "cat2": [11, 11, 11, 11, 11], - "cat3": [1, 1, 1, 1, 1] - }) + ds = self._prepare_ds( + { + "pk": [1, 2, 3, 4, 5], + "cat1": [1, 2, 3, 3, 3], + "cat2": [11, 11, 11, 11, 11], + "cat3": [1, 1, 1, 1, 1], + } + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) - size_200MB = 200 * 2 ** 20 + size_200MB = 200 * 2**20 csv_file = StringIO("x" * size_200MB) with pytest.raises(ValueError) as err: scrunch_dataset.backfill_from_csv(["cat1"], "pk", csv_file, None) diff --git a/integration/test_dataset.py b/integration/test_dataset.py index 54b5558..e6d51e7 100644 --- a/integration/test_dataset.py +++ b/integration/test_dataset.py @@ -11,7 +11,9 @@ class TestDatasetMethods(BaseIntegrationTestCase): def test_replace_values(self): - ds = self.site.datasets.create(as_entity({"name": "test_replace_values"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_replace_values"}) + ).refresh() variable = ds.variables.create( as_entity( { @@ -23,9 +25,7 @@ def test_replace_values(self): ) ).refresh() scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) - resp = scrunch_dataset.replace_values({ - "my_var": 4 - }, filter="missing(my_var)") + resp = scrunch_dataset.replace_values({"my_var": 4}, filter="missing(my_var)") if resp is not None: # We got a 202 response. Scrunch should have waited for the # progress to finish @@ -34,8 +34,8 @@ def test_replace_values(self): progress_status = progress.payload["value"] assert ( # Check for new or old complete task message - progress_status == {'progress': 100, 'message': 'complete'} - or progress_status == {'progress': 100, 'message': 'completed'} + progress_status == {"progress": 100, "message": "complete"} + or progress_status == {"progress": 100, "message": "completed"} ) else: # This means the API handled this synchronously. 204 response @@ -47,9 +47,14 @@ def test_replace_values(self): finally: ds.delete() - @pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") + @pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", + ) def test_append_dataset(self): - ds = self.site.datasets.create(as_entity({"name": "test_scrunch_append_dataset"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_scrunch_append_dataset"}) + ).refresh() ds.variables.create( as_entity( { @@ -77,7 +82,9 @@ def test_append_dataset(self): } ) ).refresh() - ds_to_append = self.site.datasets.create(as_entity({"name": "test_scrunch_dataset_to_append"})).refresh() + ds_to_append = self.site.datasets.create( + as_entity({"name": "test_scrunch_dataset_to_append"}) + ).refresh() ds_to_append.variables.create( as_entity( { @@ -109,7 +116,7 @@ def test_append_dataset(self): scrunch_dataset_to_append = get_mutable_dataset(ds_to_append.body.id, self.site) try: scrunch_dataset.append_dataset(scrunch_dataset_to_append) - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] datetime_values = data[datetime_var.body.id] # We should have 5 (original dataset) + 5 (from the append_dataset) assert len(datetime_values) == 10 @@ -130,9 +137,14 @@ def test_append_dataset(self): ds.delete() ds_to_append.delete() - @pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") + @pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", + ) def test_append_dataset_with_filter(self): - ds = self.site.datasets.create(as_entity({"name": "test_scrunch_append_dataset"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_scrunch_append_dataset"}) + ).refresh() ds.variables.create( as_entity( { @@ -160,7 +172,9 @@ def test_append_dataset_with_filter(self): } ) ).refresh() - ds_to_append = self.site.datasets.create(as_entity({"name": "test_scrunch_dataset_to_append"})).refresh() + ds_to_append = self.site.datasets.create( + as_entity({"name": "test_scrunch_dataset_to_append"}) + ).refresh() ds_to_append.variables.create( as_entity( { @@ -197,19 +211,16 @@ def test_append_dataset_with_filter(self): try: resp = scrunch_dataset.append_dataset( scrunch_dataset_to_append, - filter="my_datetime_var > '{}'".format(filter_value) + filter="my_datetime_var > '{}'".format(filter_value), ) - assert resp['body']['filter'] == { - 'args': [ - { - 'variable': datetime_append_var['self'] - }, { - 'value': filter_value - } + assert resp["body"]["filter"] == { + "args": [ + {"variable": datetime_append_var["self"]}, + {"value": filter_value}, ], - 'function': '>' + "function": ">", } - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] datetime_values = data[datetime_var.body.id] # We should have 5 (original dataset) + 2 (filtered in append_dataset) assert len(datetime_values) == 7 @@ -227,9 +238,14 @@ def test_append_dataset_with_filter(self): ds.delete() ds_to_append.delete() - @pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") + @pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", + ) def test_append_dataset_with_filter_and_exclusion(self): - ds = self.site.datasets.create(as_entity({"name": "test_scrunch_append_dataset_with_filter_exclusion"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_scrunch_append_dataset_with_filter_exclusion"}) + ).refresh() ds.variables.create( as_entity( { @@ -258,11 +274,7 @@ def test_append_dataset_with_filter_and_exclusion(self): ) ).refresh() ds_to_append = self.site.datasets.create( - as_entity( - { - "name": "test_scrunch_dataset_with_filter_exclusion_to_append" - } - ) + as_entity({"name": "test_scrunch_dataset_with_filter_exclusion_to_append"}) ).refresh() ds_to_append.variables.create( as_entity( @@ -306,9 +318,9 @@ def test_append_dataset_with_filter_and_exclusion(self): try: scrunch_dataset.append_dataset( scrunch_dataset_to_append, - filter="my_datetime_var > '{}'".format(filter_value) + filter="my_datetime_var > '{}'".format(filter_value), ) - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] datetime_values = data[datetime_var.body.id] # We should have 5 (original dataset) + 1 (filtered in append_dataset) assert len(datetime_values) == 6 @@ -325,9 +337,14 @@ def test_append_dataset_with_filter_and_exclusion(self): ds.delete() ds_to_append.delete() - @pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") + @pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", + ) def test_append_dataset_with_variables_list_and_exclusion(self): - ds = self.site.datasets.create(as_entity({"name": "test_scrunch_append_dataset_with_variable_exclusion"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_scrunch_append_dataset_with_variable_exclusion"}) + ).refresh() ds.variables.create( as_entity( { @@ -356,11 +373,7 @@ def test_append_dataset_with_variables_list_and_exclusion(self): ) ).refresh() ds_to_append = self.site.datasets.create( - as_entity( - { - "name": "test_scrunch_dataset_with_exclusion_to_append" - } - ) + as_entity({"name": "test_scrunch_dataset_with_exclusion_to_append"}) ).refresh() ds_to_append.variables.create( as_entity( @@ -399,10 +412,9 @@ def test_append_dataset_with_variables_list_and_exclusion(self): scrunch_dataset_to_append.exclude(exclusion) try: scrunch_dataset.append_dataset( - scrunch_dataset_to_append, - variables=["my_var", "my_datetime_var"] + scrunch_dataset_to_append, variables=["my_var", "my_datetime_var"] ) - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] datetime_values = data[datetime_var.body.id] # We should have 5 (original dataset) + 4 (filtered by exclusion in append_dataset) assert len(datetime_values) == 9 @@ -422,11 +434,16 @@ def test_append_dataset_with_variables_list_and_exclusion(self): ds.delete() ds_to_append.delete() - @pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") + @pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", + ) def test_append_dataset_with_variables_list_filters_and_exclusion(self): - ds = self.site.datasets.create(as_entity({ - "name": "test_scrunch_append_dataset_with_variable_filters_exclusion" - })).refresh() + ds = self.site.datasets.create( + as_entity( + {"name": "test_scrunch_append_dataset_with_variable_filters_exclusion"} + ) + ).refresh() ds.variables.create( as_entity( { @@ -455,11 +472,7 @@ def test_append_dataset_with_variables_list_filters_and_exclusion(self): ) ).refresh() ds_to_append = self.site.datasets.create( - as_entity( - { - "name": "test_scrunch_dataset_with_exclusion_to_append" - } - ) + as_entity({"name": "test_scrunch_dataset_with_exclusion_to_append"}) ).refresh() ds_to_append.variables.create( as_entity( @@ -503,9 +516,9 @@ def test_append_dataset_with_variables_list_filters_and_exclusion(self): scrunch_dataset.append_dataset( scrunch_dataset_to_append, variables=["my_var", "my_datetime_var"], - filter="my_datetime_var > '{}'".format(filter_value) + filter="my_datetime_var > '{}'".format(filter_value), ) - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] datetime_values = data[datetime_var.body.id] # We should have 5 (original dataset) + 1 (filtered by exclusion and filter in append_dataset) assert len(datetime_values) == 6 @@ -525,20 +538,26 @@ def test_append_dataset_with_variables_list_filters_and_exclusion(self): class TestCategories(BaseIntegrationTestCase): def test_edit_category(self): - ds = self.site.datasets.create(as_entity({"name": "test_edit_category"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_edit_category"}) + ).refresh() categories = [ {"id": 1, "name": "One", "missing": False, "numeric_value": None}, {"id": 2, "name": "Two", "missing": False, "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, "numeric_value": None} + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] - my_cat = ds.variables.create(as_entity({ - "name": "my_cat", - "alias": "my_cat", - "type": "categorical", - "categories": categories - })) + my_cat = ds.variables.create( + as_entity( + { + "name": "my_cat", + "alias": "my_cat", + "type": "categorical", + "categories": categories, + } + ) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) my_cat = scrunch_dataset[my_cat.body["alias"]] # Ensure refreshed var @@ -546,7 +565,11 @@ def test_edit_category(self): my_cat_reloaded = scrunch_dataset[my_cat.alias] try: - assert my_cat_reloaded.categories[1].as_dict() == dict(categories[0], numeric_value=1, selected=False) - assert my_cat_reloaded.categories[2].as_dict() == dict(categories[1], selected=False) + assert my_cat_reloaded.categories[1].as_dict() == dict( + categories[0], numeric_value=1, selected=False + ) + assert my_cat_reloaded.categories[2].as_dict() == dict( + categories[1], selected=False + ) finally: ds.delete() diff --git a/integration/test_expressions.py b/integration/test_expressions.py index 1809a1f..a858732 100644 --- a/integration/test_expressions.py +++ b/integration/test_expressions.py @@ -10,37 +10,33 @@ from scrunch.mutable_dataset import get_mutable_dataset -@pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") +@pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", +) class TestExpressions(BaseIntegrationTestCase): - def _create_mr_dataset(self, name, rows): _dataset_metadata = { "mr_variable": { "name": "Multiple Response", "type": "multiple_response", "categories": MR_CATS, - "subreferences": [{ - "name": "Response 1", - "alias": "response_1" - }, { - "name": "Response 2", - "alias": "response_2" - }, { - "name": "Response 3", - "alias": "response_3" - }] - }, + "subreferences": [ + {"name": "Response 1", "alias": "response_1"}, + {"name": "Response 2", "alias": "response_2"}, + {"name": "Response 3", "alias": "response_3"}, + ], + } } - ds = self.site.datasets.create({ - 'element': 'shoji:entity', - 'body': { - 'name': name, - 'table': { - 'element': 'crunch:table', - 'metadata': _dataset_metadata + ds = self.site.datasets.create( + { + "element": "shoji:entity", + "body": { + "name": name, + "table": {"element": "crunch:table", "metadata": _dataset_metadata}, }, } - }).refresh() + ).refresh() Importer().append_rows(ds, rows) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) return ds, scrunch_dataset @@ -50,18 +46,18 @@ def test_multiple_response_all_add_filter_value(self): ["response_1", "response_2", "response_3"], [2, 2, 1], [1, 2, 2], - [1, 1, 1] + [1, 1, 1], ] - ds, scrunch_dataset = self._create_mr_dataset('test_mr_any', ds_rows) + ds, scrunch_dataset = self._create_mr_dataset("test_mr_any", ds_rows) _filter = "mr_variable.all([1])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") mr_variable_id = ds_variables["mr_variable"].id - assert data[mr_variable_id] == [ - [1, 1, 1] - ] + assert data[mr_variable_id] == [[1, 1, 1]] finally: # cleanup ds.delete() @@ -71,18 +67,18 @@ def test_multiple_response_any_add_filter_single_subvar(self): ["response_1", "response_2", "response_3"], [2, 2, 1], [1, 2, 2], - [2, 1, 1] + [2, 1, 1], ] - ds, scrunch_dataset = self._create_mr_dataset('test_mr_any', ds_rows) + ds, scrunch_dataset = self._create_mr_dataset("test_mr_any", ds_rows) _filter = "mr_variable.any([response_1])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") mr_variable_id = ds_variables["mr_variable"].id - assert data[mr_variable_id] == [ - [1, 2, 2] - ] + assert data[mr_variable_id] == [[1, 2, 2]] finally: # cleanup ds.delete() @@ -92,199 +88,168 @@ def test_multiple_response_any_add_filter_subvar(self): ["response_1", "response_2", "response_3"], [2, 2, 1], [2, 2, 2], - [2, 1, 1] + [2, 1, 1], ] - ds, scrunch_dataset = self._create_mr_dataset('test_mr_any', ds_rows) + ds, scrunch_dataset = self._create_mr_dataset("test_mr_any", ds_rows) _filter = "mr_variable.any([response_1, response_2])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") mr_variable_id = ds_variables["mr_variable"].id - assert data[mr_variable_id] == [ - [2, 1, 1] - ] + assert data[mr_variable_id] == [[2, 1, 1]] finally: # cleanup ds.delete() def test_categorical_array_any_add_filter(self): - ds = self.site.datasets.create(as_entity({"name": "test_any_categorical_add_filter"})).refresh() - ds.variables.create(as_entity({ - "name": "Categorical Var", - "alias": "categorical_var", - "type": "categorical_array", - "categories": [ - { - 'id': 1, - 'missing': False, - 'name': 'Very interested', - 'numeric_value': 1 - }, - { - 'id': 2, - 'missing': False, - 'name': 'Somewhat interested', - 'numeric_value': 2 - }, + ds = self.site.datasets.create( + as_entity({"name": "test_any_categorical_add_filter"}) + ).refresh() + ds.variables.create( + as_entity( { - 'id': 3, - 'missing': False, - 'name': 'A little interested', - 'numeric_value': 3 - }, - { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None + "name": "Categorical Var", + "alias": "categorical_var", + "type": "categorical_array", + "categories": [ + { + "id": 1, + "missing": False, + "name": "Very interested", + "numeric_value": 1, + }, + { + "id": 2, + "missing": False, + "name": "Somewhat interested", + "numeric_value": 2, + }, + { + "id": 3, + "missing": False, + "name": "A little interested", + "numeric_value": 3, + }, + { + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, + }, + ], + "subvariables": [ + {"alias": "response_1", "name": "Response1"}, + {"alias": "response_2", "name": "Response2"}, + {"alias": "response_3", "name": "Response3"}, + ], + "values": [[1, 3, -1], [2, 1, 1], [2, 3, 2]], } - ], - 'subvariables': [ - { - 'alias': 'response_1', - 'name': 'Response1' - }, - { - 'alias': 'response_2', - 'name': 'Response2' - }, - { - 'alias': 'response_3', - 'name': 'Response3' - }, - ], - "values": [ - [1, 3, -1], - [2, 1, 1], - [2, 3, 2] - ] - })) + ) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) _filter = "categorical_var.any([1])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") cat_var_variable_id = ds_variables["categorical_var"].id - assert data[cat_var_variable_id] == [ - [1, 3, {"?": -1}], - [2, 1, 1] - ] + assert data[cat_var_variable_id] == [[1, 3, {"?": -1}], [2, 1, 1]] assert resp.resource.body.entity.body.definition == { "function": "or", "args": [ { "function": "in", "args": [ - { - "var": "categorical_var", - "axes": ["response_1"] - }, - { - "value": [1] - } - ] + {"var": "categorical_var", "axes": ["response_1"]}, + {"value": [1]}, + ], }, { "function": "in", - "args": [{ - "var": "categorical_var", - "axes": ["response_2"] - }, - {"value": [1] - } - ] + "args": [ + {"var": "categorical_var", "axes": ["response_2"]}, + {"value": [1]}, + ], }, { "function": "in", - "args": [{ - "var": "categorical_var", - "axes": ["response_3"] - }, - {"value": [1] - } - ] - } - ] + "args": [ + {"var": "categorical_var", "axes": ["response_3"]}, + {"value": [1]}, + ], + }, + ], } finally: # cleanup ds.delete() def test_categorical_array_any_w_bracket_subvar(self): - ds = self.site.datasets.create(as_entity({"name": "test_any_categorical_w_bracket_add_filter"})).refresh() - cat_var = ds.variables.create(as_entity({ - "name": "Categorical Var", - "alias": "categorical_var", - "type": "categorical_array", - "categories": [ - { - 'id': 1, - 'missing': False, - 'name': 'Very interested', - 'numeric_value': 1 - }, - { - 'id': 2, - 'missing': False, - 'name': 'Somewhat interested', - 'numeric_value': 2 - }, - { - 'id': 3, - 'missing': False, - 'name': 'A little interested', - 'numeric_value': 3 - }, + ds = self.site.datasets.create( + as_entity({"name": "test_any_categorical_w_bracket_add_filter"}) + ).refresh() + cat_var = ds.variables.create( + as_entity( { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None + "name": "Categorical Var", + "alias": "categorical_var", + "type": "categorical_array", + "categories": [ + { + "id": 1, + "missing": False, + "name": "Very interested", + "numeric_value": 1, + }, + { + "id": 2, + "missing": False, + "name": "Somewhat interested", + "numeric_value": 2, + }, + { + "id": 3, + "missing": False, + "name": "A little interested", + "numeric_value": 3, + }, + { + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, + }, + ], + "subvariables": [ + {"alias": "response_1", "name": "Response1"}, + {"alias": "response_2", "name": "Response2"}, + {"alias": "response_3", "name": "Response3"}, + ], + "values": [[1, 3, -1], [2, 1, 1], [2, 3, 2]], } - ], - 'subvariables': [ - { - 'alias': 'response_1', - 'name': 'Response1' - }, - { - 'alias': 'response_2', - 'name': 'Response2' - }, - { - 'alias': 'response_3', - 'name': 'Response3' - }, - ], - "values": [ - [1, 3, -1], - [2, 1, 1], - [2, 3, 2] - ] - })) + ) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) _filter = "categorical_var[response_1].any([1])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") cat_var_variable_id = ds_variables["categorical_var"].id - assert data[cat_var_variable_id] == [ - [1, 3, {"?": -1}] - ] + assert data[cat_var_variable_id] == [[1, 3, {"?": -1}]] assert resp.resource.body.entity.body.definition == { "function": "in", "args": [ - { - "var": "categorical_var", - "axes": ["response_1"] - }, - { - "value": [1] - } - ] + {"var": "categorical_var", "axes": ["response_1"]}, + {"value": [1]}, + ], } finally: # cleanup @@ -295,18 +260,17 @@ def test_append_dataset_any_filter_multiple_response(self): ["response_1", "response_2", "response_3"], [1, 2, 1], [1, 2, 2], - [1, 1, 1] + [1, 1, 1], ] ds_to_append_rows = [ ["response_1", "response_2", "response_3"], [2, 1, 2], [2, 2, 1], - [2, 2, 1] + [2, 2, 1], ] - ds, scrunch_dataset = self._create_mr_dataset('test_mr_any_subvar', ds_rows) + ds, scrunch_dataset = self._create_mr_dataset("test_mr_any_subvar", ds_rows) ds_to_append, scrunch_dataset_to_append = self._create_mr_dataset( - 'test_mr_any_to_append_subvar', - ds_to_append_rows + "test_mr_any_to_append_subvar", ds_to_append_rows ) # This filter should get only the rows that have the mr_response variable with the value 1 # at the same time for both response_1 and response_2 @@ -315,13 +279,8 @@ def test_append_dataset_any_filter_multiple_response(self): scrunch_dataset.append_dataset(scrunch_dataset_to_append, filter=_filter) ds_variables = ds.variables.by("alias") mr_variable_id = ds_variables["mr_variable"].id - data = ds.follow("table", "limit=20")['data'] - assert data[mr_variable_id] == [ - [1, 2, 1], - [1, 2, 2], - [1, 1, 1], - [2, 1, 2], - ] + data = ds.follow("table", "limit=20")["data"] + assert data[mr_variable_id] == [[1, 2, 1], [1, 2, 2], [1, 1, 1], [2, 1, 2]] finally: # cleanup ds.delete() @@ -332,18 +291,17 @@ def test_append_dataset_any_filter_multiple_response_single_subvar(self): ["response_1", "response_2", "response_3"], [1, 2, 1], [1, 2, 2], - [1, 1, 1] + [1, 1, 1], ] ds_to_append_rows = [ ["response_1", "response_2", "response_3"], [1, 1, 2], [2, 2, 1], - [1, 1, 1] + [1, 1, 1], ] - ds, scrunch_dataset = self._create_mr_dataset('test_mr_any_subvar', ds_rows) + ds, scrunch_dataset = self._create_mr_dataset("test_mr_any_subvar", ds_rows) ds_to_append, scrunch_dataset_to_append = self._create_mr_dataset( - 'test_mr_any_to_append_subvar', - ds_to_append_rows + "test_mr_any_to_append_subvar", ds_to_append_rows ) # This filter should get only the rows that have the mr_response variable with the value 1 (selected) # for response_1 (not the 2nd row in this test) @@ -352,13 +310,13 @@ def test_append_dataset_any_filter_multiple_response_single_subvar(self): scrunch_dataset.append_dataset(scrunch_dataset_to_append, filter=_filter) ds_variables = ds.variables.by("alias") mr_variable_id = ds_variables["mr_variable"].id - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] assert data[mr_variable_id] == [ [1, 2, 1], [1, 2, 2], [1, 1, 1], [1, 1, 2], - [1, 1, 1] + [1, 1, 1], ] finally: # cleanup @@ -366,59 +324,73 @@ def test_append_dataset_any_filter_multiple_response_single_subvar(self): ds_to_append.delete() def test_categorical_any_add_filter_value(self): - ds = self.site.datasets.create(as_entity({"name": "test_any_categorical_filter"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_any_categorical_filter"}) + ).refresh() categories = [ {"id": 1, "name": "One", "missing": False, "numeric_value": None}, {"id": 2, "name": "Two", "missing": False, "numeric_value": None}, {"id": 3, "name": "Three", "missing": False, "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, "numeric_value": None} + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] - ds.variables.create(as_entity({ - "name": "my_cat", - "alias": "my_cat", - "type": "categorical", - "categories": categories, - "values": [1, -1, 2, 3, 3, 1] - })) + ds.variables.create( + as_entity( + { + "name": "my_cat", + "alias": "my_cat", + "type": "categorical", + "categories": categories, + "values": [1, -1, 2, 3, 3, 1], + } + ) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) _filter = "my_cat.any([1])" try: - scrunch_dataset.add_filter(name='filter_1', expr=_filter) + scrunch_dataset.add_filter(name="filter_1", expr=_filter) # Adding the filter as exclusion. In this case, we are expecting the opposite # of the filter since it is an exclusion one scrunch_dataset.exclude(_filter) - data = ds.follow("table", "limit=20")['data'] + data = ds.follow("table", "limit=20")["data"] ds_variables = ds.variables.by("alias") variable_id = ds_variables["my_cat"].id - assert data[variable_id] == [{'?': -1}, 2, 3, 3] + assert data[variable_id] == [{"?": -1}, 2, 3, 3] finally: # cleanup ds.delete() def test_categorical_any_add_filter_multiple_values(self): - ds = self.site.datasets.create(as_entity({"name": "test_any_categorical_filter_multiple_values"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_any_categorical_filter_multiple_values"}) + ).refresh() categories = [ {"id": 1, "name": "One", "missing": False, "numeric_value": None}, {"id": 2, "name": "Two", "missing": False, "numeric_value": None}, {"id": 3, "name": "Three", "missing": False, "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, "numeric_value": None} + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] - ds.variables.create(as_entity({ - "name": "my_cat", - "alias": "my_cat", - "type": "categorical", - "categories": categories, - "values": [1, -1, 2, 3, 3, 1] - })) + ds.variables.create( + as_entity( + { + "name": "my_cat", + "alias": "my_cat", + "type": "categorical", + "categories": categories, + "values": [1, -1, 2, 3, 3, 1], + } + ) + ) scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) _filter = "my_cat.any([1, 3])" try: - resp = scrunch_dataset.add_filter(name='filter_1', expr=_filter) - data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))['data'] + resp = scrunch_dataset.add_filter(name="filter_1", expr=_filter) + data = ds.follow("table", "limit=20&filter={}".format(resp.resource.self))[ + "data" + ] ds_variables = ds.variables.by("alias") cat_var_variable_id = ds_variables["my_cat"].id assert data[cat_var_variable_id] == [1, 3, 3, 1] diff --git a/integration/test_folders.py b/integration/test_folders.py index cbb13e8..21ce8e9 100644 --- a/integration/test_folders.py +++ b/integration/test_folders.py @@ -17,154 +17,136 @@ def setup_folders(ds): sess = ds.session ds.settings.edit(variable_folders=True) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar1', 'type': 'numeric'} - }) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar2', 'type': 'numeric'} - }) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar3', 'type': 'numeric'} - }) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar4', 'type': 'numeric'} - }) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar5', 'type': 'numeric'} - }) - ds.variables.create({ - 'element': 'shoji:entity', - 'body': {'name': 'testvar6', 'type': 'numeric'} - }) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar1", "type": "numeric"}} + ) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar2", "type": "numeric"}} + ) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar3", "type": "numeric"}} + ) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar4", "type": "numeric"}} + ) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar5", "type": "numeric"}} + ) + ds.variables.create( + {"element": "shoji:entity", "body": {"name": "testvar6", "type": "numeric"}} + ) ds.refresh() - sf1 = Catalog(sess, body={ - 'name': 'Subfolder 1' - }) + sf1 = Catalog(sess, body={"name": "Subfolder 1"}) sf1 = ds.folders.public.create(sf1) - sfa = Catalog(sess, body={ - 'name': 'Subfolder A' - }) + sfa = Catalog(sess, body={"name": "Subfolder A"}) sfa = sf1.create(sfa) - sf2 = Catalog(sess, body={ - 'name': 'Subfolder 2' - }) + sf2 = Catalog(sess, body={"name": "Subfolder 2"}) sf2 = ds.folders.public.create(sf2) - variables = ds.variables.by('alias') - sf1.patch({'index': { - variables['age'].entity_url: {} - }}) - sfa.patch({'index': { - variables['gender'].entity_url: {} - }}) - sf2.patch({'index': { - variables['socialmedia'].entity_url: {} - }}) + variables = ds.variables.by("alias") + sf1.patch({"index": {variables["age"].entity_url: {}}}) + sfa.patch({"index": {variables["gender"].entity_url: {}}}) + sf2.patch({"index": {variables["socialmedia"].entity_url: {}}}) class TestFolders(BaseIntegrationTestCase): def setUp(self): super(TestFolders, self).setUp() - self._ds = self.site.datasets.create({ - 'element': 'shoji:entity', - 'body': { - 'name': 'test_folders', - 'table': { - 'element': 'crunch:table', - 'metadata': NEWS_DATASET + self._ds = self.site.datasets.create( + { + "element": "shoji:entity", + "body": { + "name": "test_folders", + "table": {"element": "crunch:table", "metadata": NEWS_DATASET}, }, } - }).refresh() + ).refresh() ds = self._ds setup_folders(ds) self.ds = get_dataset(ds.body.id) def test_get_folders(self): ds = self.ds - public = ds.folders.get('|') - sf1 = ds.folders.get('| Subfolder 1') - sfa = ds.folders.get('| Subfolder 1 | Subfolder A') + public = ds.folders.get("|") + sf1 = ds.folders.get("| Subfolder 1") + sfa = ds.folders.get("| Subfolder 1 | Subfolder A") # Equivalent ways of fetching Subfolder A - sfa2 = public.get('Subfolder 1 | Subfolder A') - sfa3 = sf1.get('Subfolder A') + sfa2 = public.get("Subfolder 1 | Subfolder A") + sfa3 = sf1.get("Subfolder A") self.assertEqual(sfa.url, sfa2.url) self.assertEqual(sfa.url, sfa3.url) # Fetching a variable by path - variable = ds.folders.get('| Subfolder 1 | Subfolder A | Gender') + variable = ds.folders.get("| Subfolder 1 | Subfolder A | Gender") self.assertTrue(isinstance(sf1, Folder)) self.assertTrue(isinstance(sfa, Folder)) self.assertTrue(isinstance(variable, Variable)) - self.assertEqual(sf1.name, 'Subfolder 1') - self.assertEqual(sfa.name, 'Subfolder A') + self.assertEqual(sf1.name, "Subfolder 1") + self.assertEqual(sfa.name, "Subfolder A") self.assertEqual(sf1.parent, public) self.assertEqual(sfa.parent.name, sf1.name) self.assertEqual(sfa.parent.path, sf1.path) - self.assertEqual(sfa.path, '| Subfolder 1 | Subfolder A') - self.assertEqual(variable.alias, 'gender') - self.assertEqual(variable.type, 'categorical') + self.assertEqual(sfa.path, "| Subfolder 1 | Subfolder A") + self.assertEqual(variable.alias, "gender") + self.assertEqual(variable.type, "categorical") - bad_path = '| bad folder' + bad_path = "| bad folder" with self.assertRaises(InvalidPathError) as err: ds.folders.get(bad_path) self.assertEqual(str(err.exception), "Invalid path: %s" % bad_path) def test_create_folder(self): ds = self.ds - public = ds.folders.get('|') - mit = public.create_folder('Made in test') + public = ds.folders.get("|") + mit = public.create_folder("Made in test") self.assertEqual(mit.path, "| Made in test") mit2 = public.get(mit.name) self.assertEqual(mit2.url, mit.url) - nested = mit.create_folder('nested level') + nested = mit.create_folder("nested level") self.assertEqual(mit.get_child(nested.name).url, nested.url) def test_create_folder_with_variables(self): ds = self.ds public = ds.folders.public - sf = public.create_folder("with children", alias=['testvar1', 'testvar2']) + sf = public.create_folder("with children", alias=["testvar1", "testvar2"]) self.assertEqual([Variable, Variable], [type(c) for c in sf.children]) - self.assertEqual(['testvar1', 'testvar2'], - [c.alias for c in sf.children]) + self.assertEqual(["testvar1", "testvar2"], [c.alias for c in sf.children]) def test_reorder_folder(self): ds = self.ds - public = ds.folders.get('|') - folder = public.create_folder('ToReorder') - sf1 = folder.create_folder('1') - sf2 = folder.create_folder('2') - sf3 = folder.create_folder('3') - var = ds['testvar1'] + public = ds.folders.get("|") + folder = public.create_folder("ToReorder") + sf1 = folder.create_folder("1") + sf2 = folder.create_folder("2") + sf3 = folder.create_folder("3") + var = ds["testvar1"] folder.move_here([var]) children = folder.children - self.assertEqual([c.url for c in children], - [c.url for c in [sf1, sf2, sf3, var]]) - self.assertEqual(list(map(type, children)), - [Folder, Folder, Folder, Variable]) + self.assertEqual( + [c.url for c in children], [c.url for c in [sf1, sf2, sf3, var]] + ) + self.assertEqual(list(map(type, children)), [Folder, Folder, Folder, Variable]) # Reorder placing sf1 at the end folder.reorder([sf2, var, sf3, sf1]) children = folder.children - self.assertEqual([c.url for c in children], - [c.url for c in [sf2, var, sf3, sf1]]) + self.assertEqual( + [c.url for c in children], [c.url for c in [sf2, var, sf3, sf1]] + ) # Test reorder providing children names only folder.reorder(var.alias, sf1.name, sf2.name, sf3.name) - self.assertEqual([c.url for c in folder.children], - [c.url for c in [var, sf1, sf2, sf3]]) + self.assertEqual( + [c.url for c in folder.children], [c.url for c in [var, sf1, sf2, sf3]] + ) def test_move_to_folder(self): ds = self.ds sf = ds.folders.public.create_folder("target") - ds['testvar1'].move_to_folder(sf.path) - self.assertEqual(['testvar1'], [c.alias for c in sf.children]) + ds["testvar1"].move_to_folder(sf.path) + self.assertEqual(["testvar1"], [c.alias for c in sf.children]) def test_move_between_folders(self): public = self.ds.folders.public @@ -172,111 +154,112 @@ def test_move_between_folders(self): target2 = public.create_folder("t2") self.assertEqual(target1.children, []) self.assertEqual(target2.children, []) - var1 = self.ds['testvar2'] - var2 = self.ds['testvar3'] + var1 = self.ds["testvar2"] + var2 = self.ds["testvar3"] target1.move_here([var2, var1]) - self.assertEqual([c.url for c in target1.children], - [var2.url, var1.url]) + self.assertEqual([c.url for c in target1.children], [var2.url, var1.url]) - nested = target1.create_folder('nested') - self.assertEqual([c.url for c in target1.children], - [var2.url, var1.url, nested.url]) + nested = target1.create_folder("nested") + self.assertEqual( + [c.url for c in target1.children], [var2.url, var1.url, nested.url] + ) # Move var2 to t2 target2.move_here(var2) # Now, t1 does not have var2, it is under t2 - self.assertEqual([c.url for c in target1.children], - [var1.url, nested.url]) - self.assertEqual([c.url for c in target2.children], - [var2.url]) + self.assertEqual([c.url for c in target1.children], [var1.url, nested.url]) + self.assertEqual([c.url for c in target2.children], [var2.url]) # Move nested and var1 to t2 target2.move_here([nested, var1]) # Now, they are all in t2, t1 is empty - self.assertEqual([c.url for c in target2.children], - [var2.url, nested.url, var1.url]) + self.assertEqual( + [c.url for c in target2.children], [var2.url, nested.url, var1.url] + ) self.assertEqual([c.url for c in target1.children], []) # Move var1 to nested nested.move_here(var1) # Now, t2 doesn't have var1, but nested does - self.assertEqual([c.url for c in target2.children], - [var2.url, nested.url]) - self.assertEqual([c.url for c in nested.children], - [var1.url]) + self.assertEqual([c.url for c in target2.children], [var2.url, nested.url]) + self.assertEqual([c.url for c in nested.children], [var1.url]) # Move nested to t1 target1.move_here(nested) # Now t2 has only var2 and t1 has nested in it - self.assertEqual([c.url for c in target2.children], - [var2.url]) - self.assertEqual([c.url for c in target1.children], - [nested.url]) + self.assertEqual([c.url for c in target2.children], [var2.url]) + self.assertEqual([c.url for c in target1.children], [nested.url]) def test_move_here_position(self): public = self.ds.folders.public sf = public.create_folder("here") - sf.move_here(self.ds['testvar1']) - sf.move_here(self.ds['testvar2'], before='testvar1') - self.assertEqual([c.name for c in sf.children], ['testvar2', 'testvar1']) - sf.move_here(self.ds['testvar3'], after='testvar2') - self.assertEqual([c.name for c in sf.children], ['testvar2', 'testvar3', 'testvar1']) - sf.move_here(self.ds['testvar4'], position=1) - self.assertEqual([c.name for c in sf.children], ['testvar2', 'testvar4', 'testvar3', 'testvar1']) + sf.move_here(self.ds["testvar1"]) + sf.move_here(self.ds["testvar2"], before="testvar1") + self.assertEqual([c.name for c in sf.children], ["testvar2", "testvar1"]) + sf.move_here(self.ds["testvar3"], after="testvar2") + self.assertEqual( + [c.name for c in sf.children], ["testvar2", "testvar3", "testvar1"] + ) + sf.move_here(self.ds["testvar4"], position=1) + self.assertEqual( + [c.name for c in sf.children], + ["testvar2", "testvar4", "testvar3", "testvar1"], + ) def test_move_by_alias(self): public = self.ds.folders.public target = public.create_folder("test_move_by_alias") - target.move_here('testvar2') + target.move_here("testvar2") self.assertEqual([c.name for c in target.children], ["testvar2"]) def test_make_folder_in_position(self): - public = self.ds.folders.public.create_folder('testhere') + public = self.ds.folders.public.create_folder("testhere") public.create_folder("p1") public.create_folder("p2") public.create_folder("p3") public.create_folder("A", before="p2") - self.assertEqual([c.name for c in public.children], - ["p1", "A", "p2", "p3"]) + self.assertEqual([c.name for c in public.children], ["p1", "A", "p2", "p3"]) public.create_folder("B", after="p2") - self.assertEqual([c.name for c in public.children], - ["p1", "A", "p2", "B", "p3"]) + self.assertEqual( + [c.name for c in public.children], ["p1", "A", "p2", "B", "p3"] + ) public.create_folder("C", position=3) - self.assertEqual([c.name for c in public.children], - ["p1", "A", "p2", "C", "B", "p3"]) + self.assertEqual( + [c.name for c in public.children], ["p1", "A", "p2", "C", "B", "p3"] + ) def test_hide_variables(self): hidden_folder = self.ds.folders.hidden public = self.ds.folders.public - var1 = self.ds['testvar4'] - var2 = self.ds['testvar5'] + var1 = self.ds["testvar4"] + var2 = self.ds["testvar5"] public.move_here(var1, var2) hidden_folder.move_here(var2) var1_id = var1.resource.body.id var2_id = var2.resource.body.id - self.assertTrue(var2_id in self._ds.folders.hidden.by('id')) - self.assertTrue(var2_id not in self._ds.folders.public.by('id')) - self.assertTrue(var1_id in self._ds.folders.public.by('id')) - self.assertTrue(var1_id not in self._ds.folders.hidden.by('id')) + self.assertTrue(var2_id in self._ds.folders.hidden.by("id")) + self.assertTrue(var2_id not in self._ds.folders.public.by("id")) + self.assertTrue(var1_id in self._ds.folders.public.by("id")) + self.assertTrue(var1_id not in self._ds.folders.hidden.by("id")) def test_rename(self): public = self.ds.folders.public - sf = public.create_folder('rename me') + sf = public.create_folder("rename me") sf.rename("renamed") - self.assertTrue('renamed' in [c.name for c in public.children]) - self.assertEqual(sf.name, 'renamed') + self.assertTrue("renamed" in [c.name for c in public.children]) + self.assertEqual(sf.name, "renamed") def test_delete_folder(self): public = self.ds.folders.public - var = self.ds['testvar6'] - sf = public.create_folder('delete me') + var = self.ds["testvar6"] + sf = public.create_folder("delete me") sf.move_here(var) self.assertTrue(var.url in self._ds.variables.index) sf.delete() @@ -301,8 +284,9 @@ def test_dict_protocol(self): # more dict methods self.assertEqual(set(dict(public).keys()), set(public.keys())) - self.assertEqual({c.url for c in dict(public).values()}, - {c.url for c in public.children}) + self.assertEqual( + {c.url for c in dict(public).values()}, {c.url for c in public.children} + ) # The right key corresponds to the correct value self.assertEqual(dict(public)[sf1.name].url, sf1.url) @@ -320,13 +304,12 @@ def test_dict_protocol_on_helper(self): self.assertEqual(top_level[sf1.name].url, sf1.url) # __iter__ method - self.assertEqual([c.url for c in top_level], - [c.url for c in public.children]) + self.assertEqual([c.url for c in top_level], [c.url for c in public.children]) # more dict methods self.assertEqual(set(dict(top_level).keys()), set(public.keys())) - self.assertEqual({c.url for c in dict(top_level).values()}, - {c.url for c in public.children}) + self.assertEqual( + {c.url for c in dict(top_level).values()}, {c.url for c in public.children} + ) # The right key corresponds to the correct value self.assertEqual(dict(top_level)[sf1.name].url, sf1.url) - diff --git a/integration/test_projects.py b/integration/test_projects.py index d5a7d04..8f81b43 100644 --- a/integration/test_projects.py +++ b/integration/test_projects.py @@ -8,8 +8,8 @@ from scrunch import connect, get_project, get_dataset, get_user from fixtures import BaseIntegrationTestCase -UNIQUE_PREFIX = str(datetime.now()).replace(':', '').replace('.', '') -FEATURE_FLAG = 'old_projects_order' +UNIQUE_PREFIX = str(datetime.now()).replace(":", "").replace(".", "") +FEATURE_FLAG = "old_projects_order" class TestProjects(BaseIntegrationTestCase): @@ -22,13 +22,13 @@ def setUp(self): self.site.session.feature_flags[FEATURE_FLAG] = False def new_project(self, name): - res = self.site.projects.create(shoji_entity_wrapper({ - "name": name + UNIQUE_PREFIX - })).refresh() + res = self.site.projects.create( + shoji_entity_wrapper({"name": name + UNIQUE_PREFIX}) + ).refresh() return Project(res) def test_create_subprojects(self): - pa = self.new_project('A') + pa = self.new_project("A") pb = pa.create_project("B") pa.resource.refresh() self.assertTrue(pb.url in pa.resource.index) @@ -57,7 +57,7 @@ def test_rename(self): self.assertEqual(_project.url, project.url) def test_reorder(self): - pa = self.new_project('test_reorder') + pa = self.new_project("test_reorder") p1 = pa.create_project("1") p2 = pa.create_project("2") pa.resource.refresh() @@ -67,24 +67,23 @@ def test_reorder(self): self.assertEqual(pa.resource.graph, [p2.url, p1.url]) def test_move_dataset(self): - username_2 = os.environ['SCRUNCH_USER2'] - password_2 = os.environ['SCRUNCH_PASS2'] + username_2 = os.environ["SCRUNCH_USER2"] + password_2 = os.environ["SCRUNCH_PASS2"] fo = get_user(username_2) fo_site = connect(fo.email, password_2, self.host) # These two datasets are created by the default logged user - _ds1 = self.site.datasets.create(shoji_entity_wrapper({ - 'name': 'test_move_dataset1' - })).refresh() - _ds2 = self.site.datasets.create(shoji_entity_wrapper({ - 'name': 'test_move_dataset2' - })).refresh() + _ds1 = self.site.datasets.create( + shoji_entity_wrapper({"name": "test_move_dataset1"}) + ).refresh() + _ds2 = self.site.datasets.create( + shoji_entity_wrapper({"name": "test_move_dataset2"}) + ).refresh() # This dataset is created and owned by the other user - _ds4 = fo_site.datasets.create(shoji_entity_wrapper({ - 'name': 'test_move_dataset4', - 'owner': fo.url - })).refresh() + _ds4 = fo_site.datasets.create( + shoji_entity_wrapper({"name": "test_move_dataset4", "owner": fo.url}) + ).refresh() ds1 = get_dataset(_ds1.body.id) ds2 = get_dataset(_ds2.body.id) @@ -114,7 +113,9 @@ def test_move_dataset(self): pa.place(ds2, path="| %s" % pb.name) pb.resource.refresh() - assert sorted(pb.resource.index.keys()) == sorted([_ds1.self, _ds2.self, _ds4.self]) + assert sorted(pb.resource.index.keys()) == sorted( + [_ds1.self, _ds2.self, _ds4.self] + ) self.assertEqual(ds2.resource.project.self, pb.url) def test_execute_script(self): diff --git a/integration/test_recodes.py b/integration/test_recodes.py index 4be8850..98d22c2 100644 --- a/integration/test_recodes.py +++ b/integration/test_recodes.py @@ -10,7 +10,13 @@ from six import StringIO -from fixtures import NEWS_DATASET, NEWS_DATASET_ROWS, mr_in, RECODES_CSV_OUTPUT, BaseIntegrationTestCase +from fixtures import ( + NEWS_DATASET, + NEWS_DATASET_ROWS, + mr_in, + RECODES_CSV_OUTPUT, + BaseIntegrationTestCase, +) from scrunch.streaming_dataset import get_streaming_dataset from scrunch.mutable_dataset import get_mutable_dataset from pycrunch.importing import Importer @@ -20,17 +26,16 @@ class TestRecodes(BaseIntegrationTestCase): def test_recodes(self): raise self.skipTest("Temporarily disabling for API update") # Create a dataset for usage - ds = self.site.datasets.create({ - 'element': 'shoji:entity', - 'body': { - 'name': 'test_recodes', - 'table': { - 'element': 'crunch:table', - 'metadata': NEWS_DATASET + ds = self.site.datasets.create( + { + "element": "shoji:entity", + "body": { + "name": "test_recodes", + "table": {"element": "crunch:table", "metadata": NEWS_DATASET}, + "streaming": "streaming", }, - 'streaming': 'streaming' } - }).refresh() + ).refresh() dataset = get_streaming_dataset(ds.body.id, self.site) print("Dataset %s created" % dataset.id) @@ -39,73 +44,83 @@ def test_recodes(self): dataset.push_rows(total) # Recode a new single response variable - agerange = dataset.create_categorical([ - {'id': 1, 'name': 'Underage', 'case': 'age < 18'}, - {'id': 2, 'name': 'Millennials', 'case': 'age > 18 and age < 25'}, - {'id': 3, 'name': 'Gen X', 'case': 'age < 35 and age >= 25'}, - {'id': 4, 'name': 'Grown ups', 'case': 'age < 60 and age >= 35'}, - {'id': 5, 'name': '60+', 'case': 'age >= 60'} - ], alias='agerange', name='Age range', multiple=False) + agerange = dataset.create_categorical( + [ + {"id": 1, "name": "Underage", "case": "age < 18"}, + {"id": 2, "name": "Millennials", "case": "age > 18 and age < 25"}, + {"id": 3, "name": "Gen X", "case": "age < 35 and age >= 25"}, + {"id": 4, "name": "Grown ups", "case": "age < 60 and age >= 35"}, + {"id": 5, "name": "60+", "case": "age >= 60"}, + ], + alias="agerange", + name="Age range", + multiple=False, + ) print("Variable %s created" % agerange.alias) # Recode a new multiple response variable from an existing multiple # response variable - origintype = dataset.create_categorical([ - {'id': 1, 'name': "Online", - # Mixed support for using "category"(subvariables really) IDs - 'case': mr_in(dataset, 'newssource', [1, 2, 3, 4])}, # Only in the helper - {'id': 2, 'name': "Print", 'case': mr_in(dataset, 'newssource', [5, 6])}, - {'id': 3, 'name': "Tv", 'case': mr_in(dataset, 'newssource', [7, 9])}, - {'id': 4, 'name': "Radio", 'case': mr_in(dataset, 'newssource', [8, 10])}, - ], alias='origintype', name="News source by type", multiple=True) + origintype = dataset.create_categorical( + [ + { + "id": 1, + "name": "Online", + # Mixed support for using "category"(subvariables really) IDs + "case": mr_in(dataset, "newssource", [1, 2, 3, 4]), + }, # Only in the helper + { + "id": 2, + "name": "Print", + "case": mr_in(dataset, "newssource", [5, 6]), + }, + {"id": 3, "name": "Tv", "case": mr_in(dataset, "newssource", [7, 9])}, + { + "id": 4, + "name": "Radio", + "case": mr_in(dataset, "newssource", [8, 10]), + }, + ], + alias="origintype", + name="News source by type", + multiple=True, + ) print("Variable %s created" % origintype.alias) # Add an exclusion filter - dataset.exclude('agerange == 1') # Remove underage + dataset.exclude("agerange == 1") # Remove underage # MAYBE: Create a new numeric expression based on arbitrary rules # Copy a variable origintype_copy = dataset.copy_variable( - origintype, - name='Copy of origintype', - alias='origintype_copy') + origintype, name="Copy of origintype", alias="origintype_copy" + ) print("Variable %s created" % origintype_copy.alias) onlinenewssource = dataset.combine_categories( origintype_copy, - map={ - 1: 1, - 2: [2, 3, 4] - }, - categories={ - 1: "online", - 2: "notonline", - }, + map={1: 1, 2: [2, 3, 4]}, + categories={1: "online", 2: "notonline"}, alias="onlinenewssource", - name='Online or not') - print('Created combination: %s' % onlinenewssource.alias) + name="Online or not", + ) + print("Created combination: %s" % onlinenewssource.alias) # Combine a single categorical - Combine with subvar 3 on the wrong place over35 = dataset.combine_categories( agerange, - map={ - 1: [1, 2, 3], - 2: [4, 5] - }, - categories={ - 1: 'under35', - 2: 'over35' - }, - name='over 35?', - alias='over35') - print('Created combination: %s' % over35.alias) + map={1: [1, 2, 3], 2: [4, 5]}, + categories={1: "under35", 2: "over35"}, + name="over 35?", + alias="over35", + ) + print("Created combination: %s" % over35.alias) self.maxDiff = None # Export some rows - output = tempfile.NamedTemporaryFile('rw', delete=True) + output = tempfile.NamedTemporaryFile("rw", delete=True) dataset.export(output.name) reader = csv.DictReader(output) @@ -127,8 +142,8 @@ def test_recodes(self): writer.writerow([actual[h][i] for h in headers]) actualf.seek(0) - result = [l.strip() for l in actualf.read().strip().split('\n')] - expected = RECODES_CSV_OUTPUT.split('\n') + result = [l.strip() for l in actualf.read().strip().split("\n")] + expected = RECODES_CSV_OUTPUT.split("\n") # Rows are unordered under streaming conditions self.assertEqual(sorted(result), sorted(expected)) @@ -140,46 +155,42 @@ class TestFill(BaseIntegrationTestCase): def prepare_ds(self): cats = [ {"id": 1, "name": "Daily", "missing": False, "numeric_value": None}, - {"id": 2, "name": "Weekly", "missing": False, - "numeric_value": None}, - {"id": 3, "name": "Monthly", "missing": False, - "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, - "numeric_value": None}, + {"id": 2, "name": "Weekly", "missing": False, "numeric_value": None}, + {"id": 3, "name": "Monthly", "missing": False, "numeric_value": None}, + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] metadata = { "coke_freq": { "name": "frequency coke", "type": "categorical", - "categories": cats + "categories": cats, }, "pepsi_freq": { "name": "frequency pepsi", "type": "categorical", - "categories": cats + "categories": cats, }, "pop_pref": { "name": "Soda preference", "type": "categorical", "categories": [ - {"id": 1, "name": "Coke", "missing": False, - "numeric_value": None}, - {"id": 2, "name": "Pepsi", "missing": False, - "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, - "numeric_value": None}, - ] - } + {"id": 1, "name": "Coke", "missing": False, "numeric_value": None}, + {"id": 2, "name": "Pepsi", "missing": False, "numeric_value": None}, + { + "id": -1, + "name": "No Data", + "missing": True, + "numeric_value": None, + }, + ], + }, } ds_payload = { - 'element': 'shoji:entity', - 'body': { - 'name': 'test_fill', - 'table': { - 'element': 'crunch:table', - 'metadata': metadata - }, - } + "element": "shoji:entity", + "body": { + "name": "test_fill", + "table": {"element": "crunch:table", "metadata": metadata}, + }, } rows = [ @@ -198,10 +209,14 @@ def prepare_ds(self): def test_fill(self): dataset, api_ds = self.prepare_ds() - dataset.create_fill_values([ - {"case": "pop_pref == 1", "variable": "coke_freq"}, - {"case": "pop_pref == 2", "variable": "pepsi_freq"}, - ], alias="pop_freq", name="Pop frequency") + dataset.create_fill_values( + [ + {"case": "pop_pref == 1", "variable": "coke_freq"}, + {"case": "pop_pref == 2", "variable": "pepsi_freq"}, + ], + alias="pop_freq", + name="Pop frequency", + ) variables = api_ds.variables.by("alias") new_id = variables["pop_freq"]["id"] @@ -211,24 +226,31 @@ def test_fill(self): data = api_ds.follow("table", "limit=6") cats = {c["name"]: c["id"] for c in data["metadata"][new_id]["categories"]} - self.assertEqual(data["data"][new_id], [ - # Coke chunk - cats["Daily"], - cats["Weekly"], - cats["Monthly"], - # Pepsi chunk - cats["Monthly"], - cats["Weekly"], - cats["Daily"], - ]) + self.assertEqual( + data["data"][new_id], + [ + # Coke chunk + cats["Daily"], + cats["Weekly"], + cats["Monthly"], + # Pepsi chunk + cats["Monthly"], + cats["Weekly"], + cats["Daily"], + ], + ) api_ds.delete() def test_fill_w_else(self): dataset, ds = self.prepare_ds() - dataset.create_fill_values([ - {"case": "pop_pref == 1", "variable": "coke_freq"}, - {"case": "else", "variable": "pepsi_freq"}, - ], alias="pop_freq", name="Pop frequency") + dataset.create_fill_values( + [ + {"case": "pop_pref == 1", "variable": "coke_freq"}, + {"case": "else", "variable": "pepsi_freq"}, + ], + alias="pop_freq", + name="Pop frequency", + ) variables = ds.variables.by("alias") new_id = variables["pop_freq"]["id"] @@ -238,24 +260,31 @@ def test_fill_w_else(self): data = ds.follow("table", "limit=6") cats = {c["name"]: c["id"] for c in data["metadata"][new_id]["categories"]} - self.assertEqual(data["data"][new_id], [ - # Coke chunk - cats["Daily"], - cats["Weekly"], - cats["Monthly"], - # Pepsi chunk - Default case - cats["Monthly"], - cats["Weekly"], - cats["Daily"], - ]) + self.assertEqual( + data["data"][new_id], + [ + # Coke chunk + cats["Daily"], + cats["Weekly"], + cats["Monthly"], + # Pepsi chunk - Default case + cats["Monthly"], + cats["Weekly"], + cats["Daily"], + ], + ) ds.delete() def test_fill_w_else_code(self): dataset, ds = self.prepare_ds() - dataset.create_fill_values([ - {"case": "pop_pref == 1", "variable": "coke_freq"}, - {"case": "else", "name": "Not Asked", "id": 99, "missing": False}, - ], alias="pop_freq", name="Pop frequency") + dataset.create_fill_values( + [ + {"case": "pop_pref == 1", "variable": "coke_freq"}, + {"case": "else", "name": "Not Asked", "id": 99, "missing": False}, + ], + alias="pop_freq", + name="Pop frequency", + ) variables = ds.variables.by("alias") new_id = variables["pop_freq"]["id"] @@ -265,23 +294,28 @@ def test_fill_w_else_code(self): data = ds.follow("table", "limit=6") cats = {c["name"]: c["id"] for c in data["metadata"][new_id]["categories"]} - self.assertEqual(data["data"][new_id], [ - # Coke chunk - cats["Daily"], - cats["Weekly"], - cats["Monthly"], - # Default value - cats["Not Asked"], - cats["Not Asked"], - cats["Not Asked"], - ]) + self.assertEqual( + data["data"][new_id], + [ + # Coke chunk + cats["Daily"], + cats["Weekly"], + cats["Monthly"], + # Default value + cats["Not Asked"], + cats["Not Asked"], + cats["Not Asked"], + ], + ) ds.delete() def test_fill_w_else_default(self): dataset, ds = self.prepare_ds() - dataset.create_fill_values([ - {"case": "pop_pref == 1", "variable": "coke_freq"}, - ], alias="pop_freq", name="Pop frequency") + dataset.create_fill_values( + [{"case": "pop_pref == 1", "variable": "coke_freq"}], + alias="pop_freq", + name="Pop frequency", + ) variables = ds.variables.by("alias") new_id = variables["pop_freq"]["id"] @@ -291,14 +325,17 @@ def test_fill_w_else_default(self): data = ds.follow("table", "limit=6") cats = {c["name"]: c["id"] for c in data["metadata"][new_id]["categories"]} - self.assertEqual(data["data"][new_id], [ - # Coke chunk - cats["Daily"], - cats["Weekly"], - cats["Monthly"], - # Default value - {"?": -1}, - {"?": -1}, - {"?": -1}, - ]) + self.assertEqual( + data["data"][new_id], + [ + # Coke chunk + cats["Daily"], + cats["Weekly"], + cats["Monthly"], + # Default value + {"?": -1}, + {"?": -1}, + {"?": -1}, + ], + ) ds.delete() diff --git a/integration/test_scripts.py b/integration/test_scripts.py index e3b05e6..9116753 100644 --- a/integration/test_scripts.py +++ b/integration/test_scripts.py @@ -13,17 +13,24 @@ from fixtures import BaseIntegrationTestCase -@pytest.mark.skipif(os.environ.get("LOCAL_INTEGRATION") is None, reason="Do not run this test during CI/CD") +@pytest.mark.skipif( + os.environ.get("LOCAL_INTEGRATION") is None, + reason="Do not run this test during CI/CD", +) class TestSystemScripts(BaseIntegrationTestCase): def new_project(self, name): - res = self.site.projects.create(shoji_entity_wrapper({ - "name": name + datetime.now().strftime("%Y%m%d%H%M%S") - })).refresh() + res = self.site.projects.create( + shoji_entity_wrapper( + {"name": name + datetime.now().strftime("%Y%m%d%H%M%S")} + ) + ).refresh() return Project(res) def test_define_view_strict_subvariable_syntax(self): project = self.new_project("test_view_strict_subvariable") - ds = self.site.datasets.create(as_entity({"name": "test_dataset_script"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_dataset_script"}) + ).refresh() categories = [ {"id": 2, "name": "Home"}, {"id": 3, "name": "Work"}, @@ -74,7 +81,9 @@ def test_define_view_strict_subvariable_syntax(self): def test_define_view_strict_subvariable_syntax_error(self): project = self.new_project("test_view_strict_subvariable_false") - ds = self.site.datasets.create(as_entity({"name": "test_dataset_script_false"})).refresh() + ds = self.site.datasets.create( + as_entity({"name": "test_dataset_script_false"}) + ).refresh() categories = [ {"id": 2, "name": "Home"}, {"id": 3, "name": "Work"}, @@ -120,12 +129,14 @@ def test_define_view_strict_subvariable_syntax_error(self): scrunch_dataset = get_mutable_dataset(ds.body.id, self.site) project.move_here([scrunch_dataset]) resp = project.execute(script_body) - with pytest.raises(TaskError) as err: + with pytest.raises(TaskError) as err: wait_progress(resp, self.site.session) err_value = err.value[0] err_value["type"] == "script:validation" err_value["description"] == "Errors processing the script" - err_value["resolutions"][0]["message"] == "The following subvariables: bird, cat, dog exist in multiple arrays: pets, pets_2" + err_value["resolutions"][0][ + "message" + ] == "The following subvariables: bird, cat, dog exist in multiple arrays: pets, pets_2" finally: ds.delete() project.delete() @@ -135,13 +146,7 @@ class TestDatasetScripts(BaseIntegrationTestCase): def _create_ds(self): ds = self.site.datasets.create(as_entity({"name": "test_script"})).refresh() variable = ds.variables.create( - as_entity( - { - "name": "pk", - "alias": "pk", - "type": "numeric", - } - ) + as_entity({"name": "pk", "alias": "pk", "type": "numeric"}) ) return ds, variable @@ -179,7 +184,7 @@ def test_handle_error(self): # Script big enough to trigger async validation async_script = ["""BAD-RENAME pk TO varA;"""] * 50000 async_script = "\n".join(async_script) - assert len(async_script) > 2 ** 20 # This is the threshold for async + assert len(async_script) > 2**20 # This is the threshold for async with pytest.raises(ScriptExecutionError) as err: scrunch_dataset.scripts.execute(async_script) assert len(err.value.resolutions) == 50000 # All lines raised error @@ -224,5 +229,5 @@ def test_fetch_all_and_collapse(self): r = scrunch_dataset.scripts.all() assert len(r) == 1 - finally: + finally: ds.delete() diff --git a/integration/test_views.py b/integration/test_views.py index 894fefd..270cc7a 100644 --- a/integration/test_views.py +++ b/integration/test_views.py @@ -7,21 +7,13 @@ class TestViews(BaseIntegrationTestCase): - FIXTURE_VARIABLES = [ - ("var1", "numeric"), - ("var2", "text"), - ("var3", "numeric"), - ] + FIXTURE_VARIABLES = [("var1", "numeric"), ("var2", "text"), ("var3", "numeric")] def _create_ds(self): ds = self.site.datasets.create(as_entity({"name": "test_script"})).refresh() for alias, v_type in self.FIXTURE_VARIABLES: - var_body = as_entity({ - "name": alias, - "alias": alias, - "type": v_type, - }) + var_body = as_entity({"name": alias, "alias": alias, "type": v_type}) ds.variables.create(var_body) return ds @@ -75,4 +67,3 @@ def test_fetch_view_names(self): fetched_view = scrunch_dataset.views.get_by_name("view 2") assert fetched_view.id == view2.id - diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..79b3b87 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +[format] +skip-magic-trailing-comma = true \ No newline at end of file diff --git a/scrunch/__init__.py b/scrunch/__init__.py index 1bebb30..b5268dc 100644 --- a/scrunch/__init__.py +++ b/scrunch/__init__.py @@ -1,13 +1,19 @@ from .session import connect -from .datasets import ( - get_user, get_project, get_dataset, get_team, create_team) +from .datasets import get_user, get_project, get_dataset, get_team, create_team from .streaming_dataset import get_streaming_dataset from .mutable_dataset import get_mutable_dataset, create_dataset from .version import __version__ __all__ = [ - 'connect', 'get_user', 'get_project', 'get_dataset', 'get_team', - 'get_streaming_dataset', 'get_mutable_dataset', 'create_team', - 'create_dataset', '__version__' + "connect", + "get_user", + "get_project", + "get_dataset", + "get_team", + "get_streaming_dataset", + "get_mutable_dataset", + "create_team", + "create_dataset", + "__version__", ] diff --git a/scrunch/accounts.py b/scrunch/accounts.py index 4a2fef9..8aaafe0 100644 --- a/scrunch/accounts.py +++ b/scrunch/accounts.py @@ -1,8 +1,6 @@ # coding: utf-8 -import pycrunch -from scrunch.helpers import shoji_view_wrapper -from scrunch.scripts import ScriptExecutionError, SystemScript +from scrunch.scripts import SystemScript from scrunch.connections import _default_connection from scrunch.datasets import Project @@ -60,4 +58,3 @@ def execute(self, script_body, strict_subvariable_syntax=None): def projects(self): act_projects = self.resource.projects return AccountProjects(act_projects) - diff --git a/scrunch/categories.py b/scrunch/categories.py index 56bd1db..5c3ddcc 100644 --- a/scrunch/categories.py +++ b/scrunch/categories.py @@ -6,8 +6,8 @@ class Category(ReadOnly): - _MUTABLE_ATTRIBUTES = {'name', 'numeric_value', 'missing', 'selected', 'date'} - _IMMUTABLE_ATTRIBUTES = {'id'} + _MUTABLE_ATTRIBUTES = {"name", "numeric_value", "missing", "selected", "date"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES _NULLABLE_ATTRIBUTES = {"date", "numeric_value", "selected"} @@ -17,9 +17,9 @@ def __init__(self, variable_resource, category): def __getattr__(self, item): if item in self._ENTITY_ATTRIBUTES: - if item == 'selected': + if item == "selected": # Default is False; not always present - return self._category.get('selected', False) + return self._category.get("selected", False) try: return self._category[item] except KeyError: @@ -30,62 +30,72 @@ def __getattr__(self, item): # Attribute doesn't exist, must raise an AttributeError raise AttributeError( - 'Category %s has no attribute %s' % (self.category['name'], item) + "Category %s has no attribute %s" % (self.category["name"], item) ) def __repr__(self): attrs = self.as_dict() - return 'Category(%s)' % ', '.join('%s=%s' % c for c in attrs.items()) + return "Category(%s)" % ", ".join("%s=%s" % c for c in attrs.items()) def as_dict(self, **kwargs): - attributes = self._ENTITY_ATTRIBUTES - {'date'} # `date` needs special handling + attributes = self._ENTITY_ATTRIBUTES - {"date"} # `date` needs special handling dct = {attr: getattr(self, attr) for attr in attributes} try: - dct['date'] = getattr(self, 'date') + dct["date"] = getattr(self, "date") except KeyError: # `date` is not there, just move on pass if "date" in dct and dct["date"] is None: del dct["date"] if PY2: - dct['name'] = dct['name'].encode("ascii", "replace") + dct["name"] = dct["name"].encode("ascii", "replace") dct.update(**kwargs or {}) return dct def delete(self): - if self.resource.body.get('derivation'): - raise TypeError("Cannot delete categories on derived variables. Re-derive with the appropriate expression") - - categories = [cat for cat in self.resource.body['categories'] - if cat['id'] != self.id] + if self.resource.body.get("derivation"): + raise TypeError( + "Cannot delete categories on derived variables. Re-derive with the appropriate expression" + ) + + categories = [ + cat for cat in self.resource.body["categories"] if cat["id"] != self.id + ] self.resource.edit(categories=categories) self.resource.refresh() def edit(self, **kwargs): - if self.resource.body.get('derivation'): - raise TypeError("Cannot edit categories on derived variables. Re-derive with the appropriate expression") + if self.resource.body.get("derivation"): + raise TypeError( + "Cannot edit categories on derived variables. Re-derive with the appropriate expression" + ) extra_attrs = set(kwargs.keys()) - self._MUTABLE_ATTRIBUTES if extra_attrs: - raise AttributeError("Cannot edit the following attributes: %s" % ', '.join(extra_attrs)) - - categories = [self.as_dict(**kwargs) if cat['id'] == self.id else cat - for cat in self.resource.body['categories']] + raise AttributeError( + "Cannot edit the following attributes: %s" % ", ".join(extra_attrs) + ) + + categories = [ + self.as_dict(**kwargs) if cat["id"] == self.id else cat + for cat in self.resource.body["categories"] + ] self.resource.edit(categories=categories) self.resource.refresh() class CategoryList(OrderedDict): - @classmethod def _from(cls, variable_resource): cls.resource = variable_resource - categories = [(cat['id'], Category(variable_resource, cat)) - for cat in variable_resource.body['categories']] + categories = [ + (cat["id"], Category(variable_resource, cat)) + for cat in variable_resource.body["categories"] + ] return cls(categories) def order(self, *new_order): categories = sorted( - self.resource.body['categories'], key=lambda c: new_order.index(c['id']) + self.resource.body["categories"], key=lambda c: new_order.index(c["id"]) ) self.resource.edit(categories=categories) self.resource.refresh() diff --git a/scrunch/connections.py b/scrunch/connections.py index f6ff760..a809519 100644 --- a/scrunch/connections.py +++ b/scrunch/connections.py @@ -13,7 +13,7 @@ import configparser -LOG = logging.getLogger('scrunch') +LOG = logging.getLogger("scrunch") def _set_debug_log(): @@ -36,7 +36,7 @@ def _set_debug_log(): requests_log.propagate = True -def _get_connection(file_path='crunch.ini'): +def _get_connection(file_path="crunch.ini"): """ Utilitarian function that reads credentials from file or from ENV variables @@ -47,13 +47,13 @@ def _get_connection(file_path='crunch.ini'): connection_kwargs = {} # try to get credentials from environment - site = os.environ.get('CRUNCH_URL') + site = os.environ.get("CRUNCH_URL") if site: connection_kwargs["site_url"] = site - api_key = os.environ.get('CRUNCH_API_KEY') - username = os.environ.get('CRUNCH_USERNAME') - password = os.environ.get('CRUNCH_PASSWORD') + api_key = os.environ.get("CRUNCH_API_KEY") + username = os.environ.get("CRUNCH_USERNAME") + password = os.environ.get("CRUNCH_PASSWORD") if api_key: connection_kwargs["api_key"] = api_key elif username and password: @@ -67,15 +67,14 @@ def _get_connection(file_path='crunch.ini'): config = configparser.ConfigParser() config.read(file_path) try: - site = config.get('DEFAULT', 'CRUNCH_URL') + site = config.get("DEFAULT", "CRUNCH_URL") except Exception: pass # Config not found in .ini file. Do not change env value else: connection_kwargs["site_url"] = site - try: - api_key = config.get('DEFAULT', 'CRUNCH_API_KEY') + api_key = config.get("DEFAULT", "CRUNCH_API_KEY") except Exception: pass # Config not found in .ini file. Do not change env value else: @@ -83,8 +82,8 @@ def _get_connection(file_path='crunch.ini'): if not api_key: try: - username = config.get('DEFAULT', 'CRUNCH_USERNAME') - password = config.get('DEFAULT', 'CRUNCH_PASSWORD') + username = config.get("DEFAULT", "CRUNCH_USERNAME") + password = config.get("DEFAULT", "CRUNCH_PASSWORD") except Exception: pass # Config not found in .ini file. Do not change env value else: @@ -97,7 +96,8 @@ def _get_connection(file_path='crunch.ini'): else: raise AuthenticationError( "Unable to find crunch session, crunch.ini file " - "or environment variables.") + "or environment variables." + ) def _default_connection(connection): @@ -106,5 +106,6 @@ def _default_connection(connection): if not connection: raise AttributeError( "Authenticate first with scrunch.connect() or by providing " - "config/environment variables") + "config/environment variables" + ) return connection diff --git a/scrunch/crunchboxes.py b/scrunch/crunchboxes.py index 0e04741..1abfee8 100644 --- a/scrunch/crunchboxes.py +++ b/scrunch/crunchboxes.py @@ -29,16 +29,15 @@ class CrunchBox(object): instance. """ - WIDGET_URL = 'https://s.crunch.io/widget/index.html#/ds/{id}/' + WIDGET_URL = "https://s.crunch.io/widget/index.html#/ds/{id}/" DIMENSIONS = dict(height=480, width=600) # the attributes on entity.body.metadata - _METADATA_ATTRIBUTES = {'title', 'notes', 'header', 'footer'} + _METADATA_ATTRIBUTES = {"title", "notes", "header", "footer"} _MUTABLE_ATTRIBUTES = _METADATA_ATTRIBUTES - _IMMUTABLE_ATTRIBUTES = { - 'id', 'user_id', 'creation_time', 'filters', 'variables'} + _IMMUTABLE_ATTRIBUTES = {"id", "user_id", "creation_time", "filters", "variables"} # removed `dataset` from the set above since it overlaps with the Dataset # instance on self. `boxdata.dataset` simply points to the dataset url @@ -51,36 +50,35 @@ def __init__(self, shoji_tuple, dataset): self.dataset = dataset def __setattr__(self, attr, value): - """ known attributes should be readonly """ + """known attributes should be readonly""" if attr in self._IMMUTABLE_ATTRIBUTES: - raise AttributeError( - "Can't edit attibute '%s'" % attr) + raise AttributeError("Can't edit attibute '%s'" % attr) if attr in self._MUTABLE_ATTRIBUTES: raise AttributeError( "Can't edit '%s' of a CrunchBox. Create a new one with " - "the same filters and variables to update its metadata" % attr) + "the same filters and variables to update its metadata" % attr + ) object.__setattr__(self, attr, value) def __getattr__(self, attr): if attr in self._METADATA_ATTRIBUTES: return self.resource.metadata[attr] - if attr == 'filters': + if attr == "filters": # return a list of `Filters` instead of the filters expr on `body` _filters = [] for obj in self.resource.filters: - f_url = obj['filter'] - _filters.append( - Filter(self.dataset.resource.filters.index[f_url])) + f_url = obj["filter"] + _filters.append(Filter(self.dataset.resource.filters.index[f_url])) return _filters - if attr == 'variables': + if attr == "variables": # return a list of `Variables` instead of the where expr on `body` _var_urls = [] _var_map = self.resource.where.args[0].map for v in _var_map: - _var_urls.append(_var_map[v]['variable']) + _var_urls.append(_var_map[v]["variable"]) return [ Variable(entity, self.dataset) @@ -91,11 +89,10 @@ def __getattr__(self, attr): # all other attributes not catched so far if attr in self._ENTITY_ATTRIBUTES: return self.resource[attr] - raise AttributeError('CrunchBox has no attribute %s' % attr) + raise AttributeError("CrunchBox has no attribute %s" % attr) def __repr__(self): - return "".format( - self.title, self.id) + return "".format(self.title, self.id) def __str__(self): return self.title @@ -109,7 +106,7 @@ def widget_url(self): @widget_url.setter def widget_url(self, _): - """ prevent edits to the widget_url """ + """prevent edits to the widget_url""" raise AttributeError("Can't edit 'widget_url' of a CrunchBox") def iframe(self, logo=None, dimensions=None): @@ -117,28 +114,34 @@ def iframe(self, logo=None, dimensions=None): widget_url = self.widget_url if not isinstance(dimensions, dict): - raise TypeError('`dimensions` needs to be a dict') + raise TypeError("`dimensions` needs to be a dict") def _figure(html): - return '
' + ' {}'.format(html) + \ - '
' + return ( + '
' + " {}".format(html) + "
" + ) _iframe = ( '') + "" + ) if logo: - _img = '' + _img = ( + '' + ) _iframe = _figure(_img) + _iframe elif self.title: - _div = '
'\ - ' ' + self.title + ''\ - '
' + _div = ( + '
' + ' ' + self.title + "" + "
" + ) _iframe = _figure(_div) + _iframe - return _iframe.format(**locals()) \ No newline at end of file + return _iframe.format(**locals()) diff --git a/scrunch/cubes.py b/scrunch/cubes.py index d15a6be..0c03d62 100644 --- a/scrunch/cubes.py +++ b/scrunch/cubes.py @@ -45,7 +45,7 @@ def crtabs(dataset, variables, weight=None, filter_=None, transforms=None, **mea count=count(), weight=weight, filter=filter_, - **measures + **measures, ), transforms=transforms, ) diff --git a/scrunch/datasets.py b/scrunch/datasets.py index 9e932d5..ea01c01 100644 --- a/scrunch/datasets.py +++ b/scrunch/datasets.py @@ -34,16 +34,32 @@ from scrunch.folders import DatasetFolders from scrunch.views import DatasetViews from scrunch.scripts import DatasetScripts, SystemScript -from scrunch.helpers import (ReadOnly, _validate_category_rules, abs_url, - case_expr, download_file, shoji_entity_wrapper, - subvar_alias, validate_categories, shoji_catalog_wrapper, - get_else_case, else_case_not_selected, SELECTED_ID, - NOT_SELECTED_ID, NO_DATA_ID, valid_categorical_date, - generate_subvariable_codes) +from scrunch.helpers import ( + ReadOnly, + _validate_category_rules, + abs_url, + case_expr, + download_file, + shoji_entity_wrapper, + subvar_alias, + validate_categories, + shoji_catalog_wrapper, + get_else_case, + else_case_not_selected, + SELECTED_ID, + NOT_SELECTED_ID, + NO_DATA_ID, + valid_categorical_date, + generate_subvariable_codes, +) from scrunch.order import DatasetVariablesOrder, ProjectDatasetsOrder from scrunch.subentity import Deck, Filter, Multitable -from scrunch.variables import (combinations_from_map, combine_categories_expr, - combine_responses_expr, responses_from_map) +from scrunch.variables import ( + combinations_from_map, + combine_categories_expr, + combine_responses_expr, + responses_from_map, +) from scrunch.connections import LOG, _default_connection, _get_connection @@ -54,11 +70,9 @@ from urllib.parse import urljoin -_MR_TYPE = 'multiple_response' -CATEGORICAL_TYPES = { - 'categorical', 'multiple_response', 'categorical_array', -} -RESOLUTION_TYPES = ['Y', 'Q', 'M', 'W', 'D', 'h', 'm', 's', 'ms'] +_MR_TYPE = "multiple_response" +CATEGORICAL_TYPES = {"categorical", "multiple_response", "categorical_array"} +RESOLUTION_TYPES = ["Y", "Q", "M", "W", "D", "h", "m", "s", "ms"] class SavepointRestore: @@ -83,7 +97,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.savepoint.refresh() resp = self.savepoint.revert.post({}) if resp.status_code == 204: - return # Empty response, reverted. + return # Empty response, reverted. pycrunch.shoji.wait_progress(resp, self.dataset.resource.session) @@ -92,6 +106,7 @@ class NoExclusion: Use this context manager to temporarily operate on a dataset ignoring the exclusion filter. This will unset and re-set on exit. """ + def __init__(self, dataset): self.dataset = dataset self.exclusion = dataset.get_exclusion() @@ -106,7 +121,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.dataset.exclude(self.exclusion) - def _get_dataset(dataset, connection=None, editor=False, project=None): """ Helper method for specific get_dataset and get_streaming_dataset @@ -141,8 +155,7 @@ def _get_dataset(dataset, connection=None, editor=False, project=None): else: try: # search by id on any project - dataset_url = urljoin( - root.catalogs.datasets, '{}/'.format(dataset)) + dataset_url = urljoin(root.catalogs.datasets, "{}/".format(dataset)) shoji_ds = root.session.get(dataset_url).payload except pycrunch.ClientError as e: # it is ok to have a 404, it mean that given dataset reference @@ -151,13 +164,11 @@ def _get_dataset(dataset, connection=None, editor=False, project=None): raise e if shoji_ds is None: - result = root.follow("datasets_by_name", { - "name": dataset - }) + result = root.follow("datasets_by_name", {"name": dataset}) if not result.index: raise KeyError( - "Dataset (name or id: %s) not found in context." - % dataset) + "Dataset (name or id: %s) not found in context." % dataset + ) shoji_ds = result.by("name")[dataset].entity return shoji_ds, root @@ -186,16 +197,16 @@ def get_project(project, connection=None): connection = _default_connection(connection) sub_project = None - if '|' in project: - project_split = project.split('|') + if "|" in project: + project_split = project.split("|") project = project_split.pop(0) - sub_project = '|' + '|'.join(project_split) + sub_project = "|" + "|".join(project_split) try: - ret = connection.projects.by('name')[project].entity + ret = connection.projects.by("name")[project].entity except KeyError: try: - ret = connection.projects.by('id')[project].entity + ret = connection.projects.by("id")[project].entity except KeyError: raise KeyError("Project (name or id: %s) not found." % project) @@ -213,7 +224,8 @@ def get_personal_project(connection=None): if not connection: raise AttributeError( "Authenticate first with scrunch.connect() or by providing " - "config/environment variables") + "config/environment variables" + ) root = connection return Project(root.projects.personal) @@ -226,7 +238,7 @@ def get_user(user, connection=None): """ connection = _default_connection(connection) try: - ret = connection.users.by('email')[user].entity + ret = connection.users.by("email")[user].entity except KeyError: raise KeyError("User email '%s' not found." % user) return User(ret) @@ -240,7 +252,7 @@ def get_team(team, connection=None): """ connection = _default_connection(connection) try: - ret = connection.teams.by('name')[team].entity + ret = connection.teams.by("name")[team].entity except KeyError: raise KeyError("Team name: %s not found." % team) return Team(ret) @@ -248,8 +260,7 @@ def get_team(team, connection=None): def create_team(name, connection=None): connection = _default_connection(connection) - shoji_team = connection.teams.create( - shoji_entity_wrapper({'name': name})).refresh() + shoji_team = connection.teams.create(shoji_entity_wrapper({"name": name})).refresh() return Team(shoji_team) @@ -259,7 +270,7 @@ def list_geodata(name=None, connection=None): :return: Dict of geodata objects, keyed by geodata name """ connection = _default_connection(connection) - return connection.geodata.by('name') + return connection.geodata.by("name") def get_geodata(name=None, connection=None): @@ -275,8 +286,8 @@ def get_geodata(name=None, connection=None): class User: - _MUTABLE_ATTRIBUTES = {'name', 'email'} - _IMMUTABLE_ATTRIBUTES = {'id'} + _MUTABLE_ATTRIBUTES = {"name", "email"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES def __init__(self, user_resource): @@ -288,7 +299,7 @@ def __getattr__(self, item): return self.resource.body[item] # Has to exist # Attribute doesn't exists, must raise an AttributeError - raise AttributeError('User has no attribute %s' % item) + raise AttributeError("User has no attribute %s" % item) def __repr__(self): return "".format(self.email, self.id) @@ -305,8 +316,9 @@ def teams(): if not connection: raise AttributeError( "Authenticate first with scrunch.connect() or by providing " - "config/environment variables") - return list(connection.teams.by('name').keys()) + "config/environment variables" + ) + return list(connection.teams.by("name").keys()) class Members: @@ -323,13 +335,13 @@ def list(self, permissions=False): :return: A list of members of the Entity as strings. A member can be a User or a Team. Returns ['user1@example.com', 'Team A'] """ - members = {'edit': [], 'view': []} if permissions else [] - for name, member in six.iteritems(self.resource.members.by('name')): + members = {"edit": [], "view": []} if permissions else [] + for name, member in six.iteritems(self.resource.members.by("name")): # members can be users or teams - user = member.get('email', name) + user = member.get("email", name) if permissions: - edit = member['permissions'][self._EDIT_ATTRIBUTE] - group = 'edit' if edit else 'view' + edit = member["permissions"][self._EDIT_ATTRIBUTE] + group = "edit" if edit else "view" members[group].append(user) else: members.append(user) @@ -347,8 +359,8 @@ def _validate_member(self, member): except KeyError: try: member = get_team(member) - except: - raise KeyError('Member %s is not a Team nor a User' % member) + except KeyError: + raise KeyError("Member %s is not a Team nor a User" % member) return member def remove(self, member): @@ -365,9 +377,9 @@ def add(self, member, edit=False): :return: None """ member = self._validate_member(member) - self.resource.members.patch({member.url: { - 'permissions': {self._EDIT_ATTRIBUTE: edit} - }}) + self.resource.members.patch( + {member.url: {"permissions": {self._EDIT_ATTRIBUTE: edit}}} + ) def edit(self, member, edit): """ @@ -378,22 +390,22 @@ def edit(self, member, edit): project.members.edit('mathias.bustamante@yougov.com', edit=True) """ member = self._validate_member(member) - self.resource.members.patch({member.url: { - 'permissions': {self._EDIT_ATTRIBUTE: edit}} - }) + self.resource.members.patch( + {member.url: {"permissions": {self._EDIT_ATTRIBUTE: edit}}} + ) class ProjectMembers(Members): - _EDIT_ATTRIBUTE = 'edit' + _EDIT_ATTRIBUTE = "edit" class TeamMembers(Members): - _EDIT_ATTRIBUTE = 'team_admin' + _EDIT_ATTRIBUTE = "team_admin" class Team: - _MUTABLE_ATTRIBUTES = {'name'} - _IMMUTABLE_ATTRIBUTES = {'id'} + _MUTABLE_ATTRIBUTES = {"name"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES def __init__(self, team_resource): @@ -403,7 +415,7 @@ def __init__(self, team_resource): def __getattr__(self, item): if item in self._ENTITY_ATTRIBUTES: return self.resource.body[item] - raise AttributeError('Team has no attribute %s' % item) + raise AttributeError("Team has no attribute %s" % item) def __repr__(self): return "".format(self.name, self.id) @@ -420,10 +432,10 @@ def delete(self): class Project: - _MUTABLE_ATTRIBUTES = {'name', 'description', 'icon'} - _IMMUTABLE_ATTRIBUTES = {'id'} + _MUTABLE_ATTRIBUTES = {"name", "description", "icon"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES - LAZY_ATTRIBUTES = {'order'} + LAZY_ATTRIBUTES = {"order"} def __init__(self, project_resource): self.resource = project_resource @@ -436,7 +448,7 @@ def __getattr__(self, item): elif item in self.LAZY_ATTRIBUTES: if not self._lazy: - if self.resource.session.feature_flags['old_projects_order']: + if self.resource.session.feature_flags["old_projects_order"]: datasets = self.resource.datasets self.order = ProjectDatasetsOrder(datasets, datasets.order) else: @@ -445,7 +457,7 @@ def __getattr__(self, item): self._lazy = True return getattr(self, item) - raise AttributeError('Project has no attribute %s' % item) + raise AttributeError("Project has no attribute %s" % item) def __repr__(self): return "".format(self.name, self.id) @@ -471,7 +483,6 @@ def users(self): LOG.warning("""This method is legacy and will be deprecated in future releases. Please make use of project.members.list() instead""") # noqa: E501 - users = [] return self.members.list() def remove_user(self, user): @@ -499,26 +510,25 @@ def edit_user(self, user, edit): LOG.warning("""This method is legacy and will be deprecated in future releases. Please make use of project.members.edit() instead""") # noqa: E501 - self.members.edit(user, {'permissions': {'edit': edit}}) + self.members.edit(user, {"permissions": {"edit": edit}}) def get_dataset(self, dataset): datasets = self.resource.datasets try: - shoji_ds = datasets.by('name')[dataset].entity + shoji_ds = datasets.by("name")[dataset].entity except KeyError: try: - shoji_ds = datasets.by('id')[dataset].entity + shoji_ds = datasets.by("id")[dataset].entity except KeyError: raise KeyError( - "Dataset (name or id: %s) not found in project." % dataset) + "Dataset (name or id: %s) not found in project." % dataset + ) ds = BaseDataset(shoji_ds) return ds def create_project(self, name): # This should be a method of the Project class - proj_res = self.resource.create(shoji_entity_wrapper({ - 'name': name - })).refresh() + proj_res = self.resource.create(shoji_entity_wrapper({"name": name})).refresh() return Project(proj_res) # Compatibility method to comply with Group API @@ -526,17 +536,18 @@ def create_project(self, name): @property def is_root(self): - return self.resource.catalogs['project'].endswith('/projects/') + return self.resource.catalogs["project"].endswith("/projects/") def get(self, path): from scrunch.order import Path, InvalidPathError + self.resource.refresh() # Always up to date node = self for p_name in Path(path).get_parts(): try: node = node.get_child(p_name) except KeyError: - raise InvalidPathError('Project not found %s' % p_name) + raise InvalidPathError("Project not found %s" % p_name) return node def __getitem__(self, path): @@ -544,16 +555,17 @@ def __getitem__(self, path): def get_child(self, name): from scrunch.order import InvalidPathError - by_name = self.resource.by('name') + + by_name = self.resource.by("name") if name in by_name: # Found by name, if it's not a folder, return the variable tup = by_name[name] - if tup.type == 'project': + if tup.type == "project": return Project(tup.entity) return self.root.dataset[name] - raise InvalidPathError('Invalid path: %s' % name) + raise InvalidPathError("Invalid path: %s" % name) @property def children(self): @@ -562,9 +574,9 @@ def children(self): self.resource.refresh() for child_url in self.resource.graph: tup = self.resource.index[child_url] - if tup['type'] == 'project': + if tup["type"] == "project": yield Project(tup.entity) - elif tup['type'] == 'dataset': + elif tup["type"] == "dataset": yield Dataset(tup.entity) def delete(self): @@ -577,31 +589,31 @@ def move_here(self, items, **kwargs): if not items: return items = items if isinstance(items, (list, tuple)) else [items] - position, before, after = [kwargs.get('position'), - kwargs.get('before'), kwargs.get('after')] - kwargs = { - 'index': { - item.url: {} for item in items - } - } + position, before, after = [ + kwargs.get("position"), + kwargs.get("before"), + kwargs.get("after"), + ] + kwargs = {"index": {item.url: {} for item in items}} if {position, before, after} != {None}: # Some of the positional args was not None graph = self._position_items(items, position, before, after) - kwargs['graph'] = graph + kwargs["graph"] = graph self.resource.patch(shoji_entity_wrapper({}, **kwargs)) self.resource.refresh() for item in items: item.resource.refresh() def _position_items(self, new_items, position, before, after): - graph = getattr(self.resource, 'graph', []) + graph = getattr(self.resource, "graph", []) if before is not None or after is not None: # Before and After are strings that map to a Project or Dataset.name target = before or after index = self.resource.index - position = [x for x, _u in enumerate(graph) if index[_u]['name'] == target] + position = [x for x, _u in enumerate(graph) if index[_u]["name"] == target] if not position: from scrunch.order import InvalidPathError + raise InvalidPathError("No project with name %s found" % target) position = position[0] if before is not None: @@ -620,52 +632,51 @@ def _position_items(self, new_items, position, before, after): def place(self, entity, path, position=None, before=None, after=None): from scrunch.order import Path, InvalidPathError + if not Path(path).is_absolute: raise InvalidPathError( - 'Invalid path %s: only absolute paths are allowed.' % path + "Invalid path %s: only absolute paths are allowed." % path ) position = 0 if (before or after) else position target = self.get(path) target.move_here([entity], position=position, before=before, after=after) def reorder(self, items): - name2tup = self.resource.by('name') + name2tup = self.resource.by("name") graph = [ name2tup[c].entity_url if isinstance(c, six.string_types) else c.url for c in items ] - self.resource.patch({ - 'element': 'shoji:entity', - 'body': {}, - 'index': {}, - 'graph': graph - }) + self.resource.patch( + {"element": "shoji:entity", "body": {}, "index": {}, "graph": graph} + ) self.resource.refresh() def append(self, *children): self.move_here(children) def insert(self, *children, **kwargs): - self.move_here(children, position=kwargs.get('position', 0)) + self.move_here(children, position=kwargs.get("position", 0)) def move(self, path, position=-1, before=None, after=None): from scrunch.order import Path, InvalidPathError + ppath = Path(path) if not ppath.is_absolute: raise InvalidPathError( - 'Invalid path %s: only absolute paths are allowed.' % path + "Invalid path %s: only absolute paths are allowed." % path ) parts = ppath.get_parts() top_proj_name, sub_path = parts[0], parts[1:] try: - top_project = self.projects_root().by('name')[top_proj_name].entity + top_project = self.projects_root().by("name")[top_proj_name].entity except KeyError: raise InvalidPathError("Invalid target project: %s" % path) target = top_project for name in sub_path: - target = target.by('name')[name] - if not target['type'] == 'project': + target = target.by("name")[name] + if not target["type"] == "project": raise InvalidPathError("Invalid target project: %s" % path) target = target.entity @@ -675,14 +686,13 @@ def move(self, path, position=-1, before=None, after=None): def projects_root(self): # Hack, because we cannot navigate to the projects catalog from a # single catalog entity. - projects_root_url = self.url.rsplit('/', 2)[0] + '/' + projects_root_url = self.url.rsplit("/", 2)[0] + "/" return self.resource.session.get(projects_root_url).payload class DatasetSettings(dict): - def __readonly__(self, *args, **kwargs): - raise RuntimeError('Please use the change_settings() method instead.') + raise RuntimeError("Please use the change_settings() method instead.") __setitem__ = __readonly__ __delitem__ = __readonly__ @@ -705,16 +715,17 @@ def __getitem__(self, item): name or URL """ # Check if the attribute corresponds to a variable alias - variable = self._catalog.by('alias').get(item) + variable = self._catalog.by("alias").get(item) if variable is None: # Not found by alias - variable = self._catalog.by('name').get(item) + variable = self._catalog.by("name").get(item) if variable is None: # Not found by name variable = self._catalog.index.get(item) if variable is None: # Not found by URL # Variable doesn't exists, must raise a ValueError raise ValueError( - 'Entity %s has no (sub)variable with a name or alias %s' - % (self.name, item)) + "Entity %s has no (sub)variable with a name or alias %s" + % (self.name, item) + ) # make sure we pass the parent dataset to subvariables if isinstance(self, Variable): return Variable(variable, self.dataset) @@ -740,7 +751,9 @@ def order(self): """ if "hier" not in self._catalog.orders: - raise TypeError("This dataset does not expose a hierarchical order. Use .folders") + raise TypeError( + "This dataset does not expose a hierarchical order. Use .folders" + ) if self._order is None: order = self._catalog.hier @@ -756,7 +769,7 @@ def _var_create_reload_return(self, payload): # needed to update the variables collection self._reload_variables() # return an instance of Variable - return self[new_var['self']] + return self[new_var["self"]] def __iter__(self): for var in self._vars: @@ -801,14 +814,25 @@ class BaseDataset(ReadOnly, DatasetVariablesMixin): A pycrunch.shoji.Entity wrapper that provides basic dataset methods. """ - _MUTABLE_ATTRIBUTES = {'name', 'notes', 'description', 'is_published', - 'archived', 'end_date', 'start_date', 'streaming'} - _IMMUTABLE_ATTRIBUTES = {'id', 'creation_time', 'modification_time', - 'size'} + _MUTABLE_ATTRIBUTES = { + "name", + "notes", + "description", + "is_published", + "archived", + "end_date", + "start_date", + "streaming", + } + _IMMUTABLE_ATTRIBUTES = {"id", "creation_time", "modification_time", "size"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES - _EDITABLE_SETTINGS = {'viewers_can_export', 'viewers_can_change_weight', - 'viewers_can_share', 'dashboard_deck', - 'variable_folders'} + _EDITABLE_SETTINGS = { + "viewers_can_export", + "viewers_can_change_weight", + "viewers_can_share", + "dashboard_deck", + "variable_folders", + } def __init__(self, resource): """ @@ -831,7 +855,8 @@ def __getattr__(self, item): def __repr__(self): return "<{}: name='{}'; id='{}'>".format( - self.__class__.__name__, self.name, self.id) + self.__class__.__name__, self.name, self.id + ) def __str__(self): return self.name @@ -839,14 +864,14 @@ def __str__(self): @property def editor(self): try: - return User(self.resource.follow('editor_url')) + return User(self.resource.follow("editor_url")) except pycrunch.lemonpy.ClientError: return self.resource.body.current_editor @editor.setter def editor(self, _): # Protect the `editor` from external modifications. - raise TypeError('Unsupported operation on the editor property') + raise TypeError("Unsupported operation on the editor property") def change_editor(self, user): """ @@ -866,17 +891,19 @@ def change_editor(self, user): # Otherwise, assume the provided argument is a URL user_url = user - payload = shoji_entity_wrapper({'current_editor': user_url}) + payload = shoji_entity_wrapper({"current_editor": user_url}) self.resource.patch(payload) self.resource.refresh() def make_mutable(self): from scrunch.mutable_dataset import MutableDataset + return MutableDataset(self.resource) def make_streaming(self): from scrunch.streaming_dataset import StreamingDataset - self.edit(streaming='streaming') + + self.edit(streaming="streaming") return StreamingDataset(self.resource) @property @@ -888,19 +915,17 @@ def owner(self): warn("Access Dataset.project instead", DeprecationWarning) owner_url = self.resource.body.owner try: - if '/users/' in owner_url: - return User(self.resource.follow('owner_url')) + if "/users/" in owner_url: + return User(self.resource.follow("owner_url")) else: - return Project(self.resource.follow('owner_url')) + return Project(self.resource.follow("owner_url")) except pycrunch.lemonpy.ClientError: return owner_url @owner.setter def owner(self, _): # Protect `owner` from external modifications. - raise TypeError( - 'Unsupported operation on the owner property' - ) + raise TypeError("Unsupported operation on the owner property") def change_owner(self, user=None, project=None): """ @@ -910,16 +935,17 @@ def change_owner(self, user=None, project=None): """ warn("Use Dataset.move() to move datasets between projects", DeprecationWarning) if user and project: - raise AttributeError( - "Must provide user or project. Not both" - ) + raise AttributeError("Must provide user or project. Not both") if user: - warn("Changing owner to users is deprecated. Move to projects", DeprecationWarning) + warn( + "Changing owner to users is deprecated. Move to projects", + DeprecationWarning, + ) if not isinstance(user, User): user = get_user(user) owner_url = user.url - self.resource.patch({'owner': owner_url}) + self.resource.patch({"owner": owner_url}) self.resource.refresh() elif project: if not isinstance(project, Project): @@ -943,7 +969,7 @@ def settings(self): @settings.setter def settings(self, _): # Protect the `settings` property from external modifications. - raise TypeError('Unsupported operation on the settings property') + raise TypeError("Unsupported operation on the settings property") @property def filters(self): @@ -956,7 +982,7 @@ def filters(self): @filters.setter def filters(self, _): # Protect the `filters` property from external modifications. - raise TypeError('Use add_filter method to add filters') + raise TypeError("Use add_filter method to add filters") @property def decks(self): @@ -969,7 +995,7 @@ def decks(self): @decks.setter def decks(self, _): # Protect the `decks` property from external modifications. - raise TypeError('Use add_deck method to add a new deck') + raise TypeError("Use add_deck method to add a new deck") @property def multitables(self): @@ -982,11 +1008,12 @@ def multitables(self): @multitables.setter def multitables(self, _): # Protect the `multitables` property from direct modifications - raise TypeError('Use the `create_multitable` method to add one') + raise TypeError("Use the `create_multitable` method to add one") @property def crunchboxes(self): from scrunch.crunchboxes import CrunchBox + _crunchboxes = [] for shoji_tuple in self.resource.boxdata.index.values(): _crunchboxes.append(CrunchBox(shoji_tuple, self)) @@ -995,11 +1022,10 @@ def crunchboxes(self): @crunchboxes.setter def crunchboxes(self, _): # Protect the `crunchboxes` property from direct modifications - raise TypeError('Use the `create_crunchbox` method to add one') + raise TypeError("Use the `create_crunchbox` method to add one") def _load_settings(self): - settings = self.resource.session.get( - self.resource.fragments.settings).payload + settings = self.resource.session.get(self.resource.fragments.settings).payload self._settings = DatasetSettings( (_name, _value) for _name, _value in settings.body.items() ) @@ -1007,27 +1033,23 @@ def _load_settings(self): def change_settings(self, **kwargs): incoming_settings = set(kwargs.keys()) - invalid_settings = incoming_settings.difference( - self._EDITABLE_SETTINGS) + invalid_settings = incoming_settings.difference(self._EDITABLE_SETTINGS) if invalid_settings: raise ValueError( - 'Invalid or read-only settings: %s' - % ','.join(list(invalid_settings)) + "Invalid or read-only settings: %s" % ",".join(list(invalid_settings)) ) - if 'dashboard_deck' in kwargs: - ddeck = kwargs['dashboard_deck'] + if "dashboard_deck" in kwargs: + ddeck = kwargs["dashboard_deck"] if isinstance(ddeck, Deck): - kwargs['dashboard_deck'] = ddeck.resource.self + kwargs["dashboard_deck"] = ddeck.resource.self - settings_payload = { - setting: kwargs[setting] for setting in incoming_settings - } + settings_payload = {setting: kwargs[setting] for setting in incoming_settings} if settings_payload: self.resource.session.patch( self.resource.fragments.settings, json.dumps(settings_payload), - headers={'Content-Type': 'application/json'} + headers={"Content-Type": "application/json"}, ) self._settings = None # After changing settings, reload folders that depend on it @@ -1041,12 +1063,12 @@ def edit(self, **kwargs): for key in kwargs: if key not in self._MUTABLE_ATTRIBUTES: raise AttributeError( - "Can't edit attibute %s of dataset %s" - % (key, self.name)) - if key in ['start_date', 'end_date'] and \ - (isinstance(kwargs[key], datetime.date) or - isinstance(kwargs[key], datetime.datetime) - ): + "Can't edit attibute %s of dataset %s" % (key, self.name) + ) + if key in ["start_date", "end_date"] and ( + isinstance(kwargs[key], datetime.date) + or isinstance(kwargs[key], datetime.datetime) + ): kwargs[key] = kwargs[key].isoformat() return self.resource.edit(**kwargs) @@ -1066,23 +1088,16 @@ def add_user(self, user, edit=False): user = user.email payload = { - 'send_notification': True, - 'message': "", - user: { - 'dataset_permissions': { - 'view': True, - 'edit': edit, - }, - }, - 'url_base': - self.resource.self.split('api')[0] - + 'password/change/${token}/', - 'dataset_url': - self.resource.self.replace('/api/datasets/', '/dataset/'), + "send_notification": True, + "message": "", + user: {"dataset_permissions": {"view": True, "edit": edit}}, + "url_base": self.resource.self.split("api")[0] + + "password/change/${token}/", + "dataset_url": self.resource.self.replace("/api/datasets/", "/dataset/"), } self.resource.permissions.patch(payload) - def create_fill_values(self, variables, name, alias, description=''): + def create_fill_values(self, variables, name, alias, description=""): """ This function is similar to create_single_categorical in the sense that the output is a 1D variable. @@ -1110,7 +1125,7 @@ def create_fill_values(self, variables, name, alias, description=''): :param description: Description of the new variable :return: """ - if not hasattr(self.resource, 'variables'): + if not hasattr(self.resource, "variables"): self.resource.refresh() # Pluck `else` case out. @@ -1128,17 +1143,24 @@ def create_fill_values(self, variables, name, alias, description=''): raise ValueError("All variables must be of type `categorical`") cat_ids = list(range(1, len(variables) + 1)) - args = [{ - "column": cat_ids, - "type": { - "class": "categorical", - "ordinal": False, - "categories": [ - {"id": c, "name": str(c), "missing": False, "numeric_value": None} - for c in cat_ids - ] + args = [ + { + "column": cat_ids, + "type": { + "class": "categorical", + "ordinal": False, + "categories": [ + { + "id": c, + "name": str(c), + "missing": False, + "numeric_value": None, + } + for c in cat_ids + ], + }, } - }] + ] exprs = [parse_expr(c["case"]) for c in variables] exprs = process_expr(exprs, self.resource) args.extend(exprs) @@ -1146,39 +1168,40 @@ def create_fill_values(self, variables, name, alias, description=''): if "name" in else_case: # We are in the else_case of a category. Add there the extra default args[0]["column"].append(else_case["id"]) - args[0]["type"]["categories"].append({ - "name": else_case["name"], - "missing": else_case.get("missing", False), - "id": else_case["id"], - "numeric_value": else_case.get("numeric_value", None), - }) + args[0]["type"]["categories"].append( + { + "name": else_case["name"], + "missing": else_case.get("missing", False), + "id": else_case["id"], + "numeric_value": else_case.get("numeric_value", None), + } + ) expr = {"function": "case", "args": args} - fill_map = {str(cid): {"variable": vars_by_alias[v["variable"]]["id"]} - for cid, v in zip(cat_ids, variables)} + fill_map = { + str(cid): {"variable": vars_by_alias[v["variable"]]["id"]} + for cid, v in zip(cat_ids, variables) + } if "variable" in else_case: # We are in the case of a default fill, replace the -1 with the new # variable fill_map["-1"] = {"variable": vars_by_alias[else_case["variable"]]["id"]} - fill_expr = { - "function": "fill", - "args": [ - expr, - {"map": fill_map} - ] - } - payload = shoji_entity_wrapper({ - "alias": alias, - "name": name, - "description": description, - "derivation": fill_expr - }) + fill_expr = {"function": "fill", "args": [expr, {"map": fill_map}]} + payload = shoji_entity_wrapper( + { + "alias": alias, + "name": name, + "description": description, + "derivation": fill_expr, + } + ) return self._var_create_reload_return(payload) - def create_single_response(self, categories, name, alias, description='', - missing=True, notes=''): + def create_single_response( + self, categories, name, alias, description="", missing=True, notes="" + ): """ Creates a categorical variable deriving from other variables. Uses Crunch's `case` function. @@ -1187,36 +1210,31 @@ def create_single_response(self, categories, name, alias, description='', # keep a copy of categories because we are gonna mutate it later categories_copy = [copy.copy(c) for c in categories] for cat in categories: - case = cat.pop('case') + case = cat.pop("case") case = get_else_case(case, categories_copy) cases.append(case) # append a default numeric_value if not found - if 'numeric_value' not in cat: - cat['numeric_value'] = None + if "numeric_value" not in cat: + cat["numeric_value"] = None - if not hasattr(self.resource, 'variables'): + if not hasattr(self.resource, "variables"): self.resource.refresh() - args = [{ - 'column': [c['id'] for c in categories], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': categories - } + args = [ + { + "column": [c["id"] for c in categories], + "type": {"value": {"class": "categorical", "categories": categories}}, } - }] + ] - for cat in args[0]['type']['value']['categories']: - cat.setdefault('missing', False) + for cat in args[0]["type"]["value"]["categories"]: + cat.setdefault("missing", False) if missing: - args[0]['column'].append(-1) - args[0]['type']['value']['categories'].append(dict( - id=-1, - name='No Data', - numeric_value=None, - missing=True)) + args[0]["column"].append(-1) + args[0]["type"]["value"]["categories"].append( + dict(id=-1, name="No Data", numeric_value=None, missing=True) + ) more_args = [] for case in cases: @@ -1224,18 +1242,16 @@ def create_single_response(self, categories, name, alias, description='', more_args = process_expr(more_args, self.resource) - expr = dict(function='case', args=args + more_args) + expr = dict(function="case", args=args + more_args) - payload = shoji_entity_wrapper(dict( - alias=alias, - name=name, - expr=expr, - description=description, - notes=notes)) + payload = shoji_entity_wrapper( + dict( + alias=alias, name=name, expr=expr, description=description, notes=notes + ) + ) return self._var_create_reload_return(payload) - def rollup(self, variable_alias, name, alias, resolution, description='', - notes=''): + def rollup(self, variable_alias, name, alias, resolution, description="", notes=""): """ Rolls the source datetime variable into a new derived categorical variable. Available resolutions are: [Y, Q, M, W, D, h, m, s, ms] @@ -1244,38 +1260,39 @@ def rollup(self, variable_alias, name, alias, resolution, description='', :alias: alias for the new derived variable :resolution: one of [Y, Q, M, W, D, h, m, s, ms] """ - assert self[variable_alias].type == 'datetime', \ - 'rollup() is only allowed for datetime variable types' + assert ( + self[variable_alias].type == "datetime" + ), "rollup() is only allowed for datetime variable types" self._validate_vartypes(self[variable_alias].type, resolution) expr = { - 'function': 'rollup', - 'args': [ - { - 'variable': self[variable_alias].url - }, - { - 'value': resolution - } - ] + "function": "rollup", + "args": [{"variable": self[variable_alias].url}, {"value": resolution}], } - payload = shoji_entity_wrapper(dict( - alias=alias, - name=name, - expr=expr, - description=description, - notes=notes)) + payload = shoji_entity_wrapper( + dict( + alias=alias, name=name, expr=expr, description=description, notes=notes + ) + ) new_var = self.resource.variables.create(payload) # needed to update the variables collection self._reload_variables() # return the variable instance - return self[new_var['body']['alias']] - - def derive_multiple_response(self, categories, subvariables, name, alias, - description='', notes='', uniform_basis=False): + return self[new_var["body"]["alias"]] + + def derive_multiple_response( + self, + categories, + subvariables, + name, + alias, + description="", + notes="", + uniform_basis=False, + ): """ This is the generic approach to create_multiple_response but this allows the definition of any set of categories and rules (expressions) @@ -1305,86 +1322,93 @@ def derive_multiple_response(self, categories, subvariables, name, alias, categories = validate_categories(categories) # validate that every subvar defines rules for all categories for subvar in subvariables: - _validate_category_rules(categories, subvar['cases']) + _validate_category_rules(categories, subvar["cases"]) responses_map = collections.OrderedDict() for subvar in subvariables: _cases = [] - for case in subvar['cases'].values(): + for case in subvar["cases"].values(): if isinstance(case, six.string_types): _case = process_expr(parse_expr(case), self.resource) _cases.append(_case) - resp_id = '%04d' % subvar['id'] + resp_id = "%04d" % subvar["id"] responses_map[resp_id] = case_expr( _cases, - name=subvar['name'], - alias='%s_%d' % (alias, subvar['id']), - categories=categories + name=subvar["name"], + alias="%s_%d" % (alias, subvar["id"]), + categories=categories, ) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'notes': notes, - 'uniform_basis': uniform_basis, - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [ - {'map': responses_map}, - {'value': list(responses_map.keys())} - ] - }] + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "notes": notes, + "uniform_basis": uniform_basis, + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + {"map": responses_map}, + {"value": list(responses_map.keys())}, + ], + } + ], + }, } - }) + ) return self._var_create_reload_return(payload) - def create_multiple_response(self, responses, name, alias, description='', - notes=''): + def create_multiple_response( + self, responses, name, alias, description="", notes="" + ): """ Creates a Multiple response (array) of only 2 categories, selected and not selected. """ responses_map = collections.OrderedDict() for resp in responses: - case = resp['case'] + case = resp["case"] case = get_else_case(case, responses) if isinstance(case, six.string_types): case = process_expr(parse_expr(case), self.resource) - resp_id = '%04d' % resp['id'] + resp_id = "%04d" % resp["id"] responses_map[resp_id] = case_expr( - [case,], - name=resp['name'], - alias='%s_%d' % (alias, resp['id']) + [case], name=resp["name"], alias="%s_%d" % (alias, resp["id"]) ) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'notes': notes, - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [ - {'map': responses_map}, - {'value': list(responses_map.keys())} - ] - }] + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "notes": notes, + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + {"map": responses_map}, + {"value": list(responses_map.keys())}, + ], + } + ], + }, } - }) + ) return self._var_create_reload_return(payload) def variable_aliases(self, include_subvariables=False): existing_aliases = set() # We have to fetch from the `/table/` endpoint because it includes # the subvariables aliases. - variables_metadata = self.resource.table['metadata'] + variables_metadata = self.resource.table["metadata"] for var_tuple in variables_metadata.values(): var_alias = var_tuple["alias"] existing_aliases.add(var_alias) @@ -1398,8 +1422,15 @@ def get_url_by_alias(self, alias): # This helper allows to be mocked for tests rather than __getitem__ return self[alias].url - def bind_categorical_array(self, name, alias, subvariables, description='', - notes='', subvariable_codes=None): + def bind_categorical_array( + self, + name, + alias, + subvariables, + description="", + notes="", + subvariable_codes=None, + ): """ Creates a new categorical_array where subvariables is a subset of categorical variables already existing in the DS. @@ -1422,8 +1453,8 @@ def bind_categorical_array(self, name, alias, subvariables, description='', # creates ids if 'id' not present in subvariables list for i, elem in enumerate(subvariables, start=1): - if 'id' not in elem: - elem.update({'id': str(i)}) + if "id" not in elem: + elem.update({"id": str(i)}) if subvariable_codes is None: # The user did not provide the subvariable codes to use in this @@ -1432,7 +1463,8 @@ def bind_categorical_array(self, name, alias, subvariables, description='', if len(subvariable_codes) != len(subvariables): msg = "Should provide {} codes for {} subvariables".format( - len(subvariable_codes), len(subvariables)) + len(subvariable_codes), len(subvariables) + ) raise ValueError(msg) subreferences = [] @@ -1440,44 +1472,58 @@ def bind_categorical_array(self, name, alias, subvariables, description='', subvar_name = subvar.get("name", subvar.get("alias")) or sv_code subreferences.append({"alias": sv_code, "name": subvar_name}) - array_map = {v['id']: {'variable': self.get_url_by_alias(v['alias'])} for v in subvariables} + array_map = { + v["id"]: {"variable": self.get_url_by_alias(v["alias"])} + for v in subvariables + } expression = { - 'function': 'array', - 'args': [{'function': 'make_frame', 'args': [{'map': array_map}]}], - "references": { - "subreferences": subreferences - } + "function": "array", + "args": [{"function": "make_frame", "args": [{"map": array_map}]}], + "references": {"subreferences": subreferences}, } - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'notes': notes, - 'derivation': expression - }) + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "notes": notes, + "derivation": expression, + } + ) return self._var_create_reload_return(payload) - def create_numeric(self, alias, name, derivation, description='', notes=''): + def create_numeric(self, alias, name, derivation, description="", notes=""): """ Used to create new numeric variables using Crunch's derived expressions """ expr = process_expr(parse_expr(derivation), self.resource) - if not hasattr(self.resource, 'variables'): + if not hasattr(self.resource, "variables"): self.resource.refresh() - payload = shoji_entity_wrapper(dict( - alias=alias, - name=name, - derivation=expr, - description=description, - notes=notes - )) + payload = shoji_entity_wrapper( + dict( + alias=alias, + name=name, + derivation=expr, + description=description, + notes=notes, + ) + ) return self._var_create_reload_return(payload) - def create_categorical(self, categories, alias, name, multiple, description='', - notes='', missing_case=None, uniform_basis=False): + def create_categorical( + self, + categories, + alias, + name, + multiple, + description="", + notes="", + missing_case=None, + uniform_basis=False, + ): """ Used to create new categorical variables using Crunchs's `case` function @@ -1568,7 +1614,7 @@ def create_categorical(self, categories, alias, name, multiple, description='', multiple=True, missing_case='missing(var_1)' """ - cats_have_missing = any(['missing_case' in c.keys() for c in categories]) + cats_have_missing = any(["missing_case" in c.keys() for c in categories]) # Initially validate that we dont have `missing_case` argument and `missing_case` # in the categories list @@ -1581,78 +1627,108 @@ def create_categorical(self, categories, alias, name, multiple, description='', if missing_case: cats_have_missing = True for cat in categories: - cat['missing_case'] = missing_case + cat["missing_case"] = missing_case # In the case of MR and all cases declare a 'missing_case' if multiple and cats_have_missing: _categories = [ - {'id': SELECTED_ID, 'name': 'Selected', 'selected': True}, - {'id': NOT_SELECTED_ID, 'name': 'Not Selected'}, - {'id': NO_DATA_ID, 'name': 'No Data', 'missing': True} + {"id": SELECTED_ID, "name": "Selected", "selected": True}, + {"id": NOT_SELECTED_ID, "name": "Not Selected"}, + {"id": NO_DATA_ID, "name": "No Data", "missing": True}, ] _subvariables = [] for sv in categories: - data = { - 'id': sv['id'], - 'name': sv['name'] - } + data = {"id": sv["id"], "name": sv["name"]} # build special expressions for 'else' case if exist - else_not_selected = else_case_not_selected(sv['case'], categories, sv.get('missing_case')) - sv['case'] = get_else_case(sv['case'], categories) + else_not_selected = else_case_not_selected( + sv["case"], categories, sv.get("missing_case") + ) + sv["case"] = get_else_case(sv["case"], categories) - if 'missing_case' in sv: - selected_case = '({}) and not ({})'.format(sv['case'], sv['missing_case']) - not_selected_case = 'not {}'.format(selected_case) + if "missing_case" in sv: + selected_case = "({}) and not ({})".format( + sv["case"], sv["missing_case"] + ) + not_selected_case = "not {}".format(selected_case) if else_not_selected: not_selected_case = else_not_selected - data.update({ - 'cases': { - SELECTED_ID: selected_case, - NOT_SELECTED_ID: not_selected_case, - NO_DATA_ID: sv['missing_case'] + data.update( + { + "cases": { + SELECTED_ID: selected_case, + NOT_SELECTED_ID: not_selected_case, + NO_DATA_ID: sv["missing_case"], + } } - }) + ) else: - data.update({ - 'cases': { - SELECTED_ID: sv['case'], - NOT_SELECTED_ID: 'not ({})'.format(sv['case']), + data.update( + { + "cases": { + SELECTED_ID: sv["case"], + NOT_SELECTED_ID: "not ({})".format(sv["case"]), + } } - }) + ) _subvariables.append(data) - return self.derive_multiple_response(categories=_categories, - subvariables=_subvariables, name=name, alias=alias, - description=description, notes=notes, uniform_basis=uniform_basis) + return self.derive_multiple_response( + categories=_categories, + subvariables=_subvariables, + name=name, + alias=alias, + description=description, + notes=notes, + uniform_basis=uniform_basis, + ) elif multiple: return self.create_multiple_response( - categories, alias=alias, name=name, description=description, - notes=notes) + categories, alias=alias, name=name, description=description, notes=notes + ) else: return self.create_single_response( - categories, alias=alias, name=name, description=description, - notes=notes) + categories, alias=alias, name=name, description=description, notes=notes + ) - def _validate_vartypes(self, var_type, resolution=None, subvariables=None, - categories=None): - if var_type not in ('text', 'numeric', 'categorical', 'datetime', - 'multiple_response', 'categorical_array'): + def _validate_vartypes( + self, var_type, resolution=None, subvariables=None, categories=None + ): + if var_type not in ( + "text", + "numeric", + "categorical", + "datetime", + "multiple_response", + "categorical_array", + ): raise InvalidVariableTypeError - if var_type == 'datetime' and resolution not in RESOLUTION_TYPES: + if var_type == "datetime" and resolution not in RESOLUTION_TYPES: raise InvalidParamError( - 'Include a valid resolution parameter when creating \ - datetime variables. %s' % RESOLUTION_TYPES) + "Include a valid resolution parameter when creating \ + datetime variables. %s" + % RESOLUTION_TYPES + ) - array_types = ('multiple_response', 'categorical_array') + array_types = ("multiple_response", "categorical_array") if var_type in array_types and not isinstance(subvariables, list): raise InvalidParamError( - 'Include subvariables when creating %s variables' % var_type) + "Include subvariables when creating %s variables" % var_type + ) - def create_variable(self, var_type, name, alias=None, description='', - resolution=None, subvariables=None, categories=None, values=None): + def create_variable( + self, + var_type, + name, + alias=None, + description="", + resolution=None, + subvariables=None, + categories=None, + values=None, + ): """ A variable can be of type: text, numeric, categorical, datetime, multiple_response or categorical_array. @@ -1693,35 +1769,44 @@ def create_variable(self, var_type, name, alias=None, description='', values = [1,4,5,2,1,3,1] """ self._validate_vartypes(var_type, resolution, subvariables, categories) - payload = { - 'type': var_type, - 'name': name, - 'description': description, - } + payload = {"type": var_type, "name": name, "description": description} if alias: - payload['alias'] = alias + payload["alias"] = alias if resolution: - payload['resolution'] = resolution - if var_type == 'multiple_response' and categories is None: - payload['categories'] = [ - {'name': 'Not selected', 'id': NOT_SELECTED_ID, 'numeric_value': 2, 'missing': False}, - {'name': 'Selected', 'id': SELECTED_ID, 'numeric_value': 1, 'missing': False, 'selected': True}, + payload["resolution"] = resolution + if var_type == "multiple_response" and categories is None: + payload["categories"] = [ + { + "name": "Not selected", + "id": NOT_SELECTED_ID, + "numeric_value": 2, + "missing": False, + }, + { + "name": "Selected", + "id": SELECTED_ID, + "numeric_value": 1, + "missing": False, + "selected": True, + }, ] if categories: - payload['categories'] = categories + payload["categories"] = categories if subvariables: - payload['subreferences'] = [] + payload["subreferences"] = [] for item in subvariables: - subrefs = {'name': item['name']} - if item.get('alias'): - subrefs['alias'] = item['alias'] - payload['subreferences'].append(subrefs) + subrefs = {"name": item["name"]} + if item.get("alias"): + subrefs["alias"] = item["alias"] + payload["subreferences"].append(subrefs) if values: - payload['values'] = values + payload["values"] = values self._var_create_reload_return(shoji_entity_wrapper(payload)) - def copy_variable(self, variable, name, alias, derived=None, subvariable_codes=None): + def copy_variable( + self, variable, name, alias, derived=None, subvariable_codes=None + ): """ Makes a copy of a Variable using the `copy_variable` function. Strong suggestion to provide the `subvariable_codes` in order to have @@ -1736,7 +1821,7 @@ def copy_variable(self, variable, name, alias, derived=None, subvariable_codes=N calculate on client side unique codes to use by __# algorithm. :return: Variable() instance of new copy """ - _subvar_alias = re.compile(r'.+_(\d+)$') + _subvar_alias = re.compile(r".+_(\d+)$") variable_resource = variable.resource def subrefs(_variable, _alias): @@ -1745,55 +1830,48 @@ def subrefs(_variable, _alias): # parent's are, that is `parent_alias_#`. _subreferences = [] for _, subvar in _variable: - sv_alias = subvar['alias'] + sv_alias = subvar["alias"] match = _subvar_alias.match(sv_alias) if match: # Does this var have the subvar pattern? suffix = int(match.groups()[0], 10) # Keep the position sv_alias = subvar_alias(_alias, suffix) - _subreferences.append({ - 'name': subvar['name'], - 'alias': sv_alias - }) + _subreferences.append({"name": subvar["name"], "alias": sv_alias}) return _subreferences if variable.derived: # We are dealing with a derived variable, we want the derivation # to be executed again instead of doing a `copy_variable` - derivation = abs_url(variable.resource.body['derivation'], - variable.resource.self) - derivation.pop('references', None) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'derivation': derivation}) + derivation = abs_url( + variable.resource.body["derivation"], variable.resource.self + ) + derivation.pop("references", None) + payload = shoji_entity_wrapper( + {"name": name, "alias": alias, "derivation": derivation} + ) if variable.type == _MR_TYPE: # We are re-executing a multiple_response derivation. # We need to update the complex `array` function expression # to contain the new suffixed aliases. Given that the map is # unordered, we have to iterated and find a name match. - _ob = payload['body']['derivation']['args'][0]['args'][0] - subvars = _ob['map'] + _ob = payload["body"]["derivation"]["args"][0]["args"][0] + subvars = _ob["map"] subreferences = subrefs(variable, alias) for subref in subreferences: for subvar_pos in subvars: subvar = subvars[subvar_pos] - if subvar['references']['name'] == subref['name']: - subvar['references']['alias'] = subref['alias'] + if subvar["references"]["name"] == subref["name"]: + subvar["references"]["alias"] = subref["alias"] break else: derivation = { - 'function': 'copy_variable', - 'args': [{ - 'variable': variable_resource.self - }] + "function": "copy_variable", + "args": [{"variable": variable_resource.self}], } - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'derivation': derivation - }) + payload = shoji_entity_wrapper( + {"name": name, "alias": alias, "derivation": derivation} + ) if "subvariables" in variable_resource.body: api_subreferences = variable_resource.body["subreferences"] @@ -1804,7 +1882,8 @@ def subrefs(_variable, _alias): if len(subvariable_codes) != len(subvariables): msg = "Should provide {} codes for {} subvariables".format( - len(subvariable_codes), len(subvariables)) + len(subvariable_codes), len(subvariables) + ) raise ValueError(msg) subreferences = [] @@ -1817,25 +1896,55 @@ def subrefs(_variable, _alias): derivation["references"] = {"subreferences": subreferences} if derived is False or derived: - payload['body']['derived'] = derived + payload["body"]["derived"] = derived return self._var_create_reload_return(payload) - def combine_categories(self, variable, map, categories, missing=None, - default=None, name='', alias='', description=''): + def combine_categories( + self, + variable, + map, + categories, + missing=None, + default=None, + name="", + alias="", + description="", + ): if not alias or not name: raise ValueError("Name and alias are required") if variable.type in _MR_TYPE: return self.combine_multiple_response( - variable, map, categories, name=name, - alias=alias, description=description) + variable, + map, + categories, + name=name, + alias=alias, + description=description, + ) else: return self.combine_categorical( - variable, map, categories, missing, default, - name=name, alias=alias, description=description) + variable, + map, + categories, + missing, + default, + name=name, + alias=alias, + description=description, + ) - def combine_categorical(self, variable, map, categories=None, missing=None, - default=None, name='', alias='', description=''): + def combine_categorical( + self, + variable, + map, + categories=None, + missing=None, + default=None, + name="", + alias="", + description="", + ): """ Create a new variable in the given dataset that is a recode of an existing variable @@ -1858,19 +1967,29 @@ def combine_categorical(self, variable, map, categories=None, missing=None, variable = self[variable] # TODO: Implement `default` parameter in Crunch API - combinations = combinations_from_map( - map, categories or {}, missing or []) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'derivation': combine_categories_expr( - variable.resource.self, combinations) - }) + combinations = combinations_from_map(map, categories or {}, missing or []) + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "derivation": combine_categories_expr( + variable.resource.self, combinations + ), + } + ) return self._var_create_reload_return(payload) - def combine_multiple_response(self, variable, map, categories=None, default=None, - name='', alias='', description=''): + def combine_multiple_response( + self, + variable, + map, + categories=None, + default=None, + name="", + alias="", + description="", + ): """ Creates a new variable in the given dataset that combines existing responses into new categorized ones @@ -1890,15 +2009,17 @@ def combine_multiple_response(self, variable, map, categories=None, default=None parent_alias = variable.alias # TODO: Implement `default` parameter in Crunch API - responses = responses_from_map(variable, map, categories or {}, alias, - parent_alias) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'derivation': combine_responses_expr( - variable.resource.self, responses) - }) + responses = responses_from_map( + variable, map, categories or {}, alias, parent_alias + ) + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "derivation": combine_responses_expr(variable.resource.self, responses), + } + ) return self._var_create_reload_return(payload) def cast_summary(self, variable, cast_type): @@ -1910,12 +2031,14 @@ def cast_summary(self, variable, cast_type): """ try: resp = self.resource.session.get( - self[variable].resource.views.cast, - params={'cast_as': cast_type} + self[variable].resource.views.cast, params={"cast_as": cast_type} ) except pycrunch.lemonpy.ClientError as e: return 'Impossible to cast var "%s" to type "%s". Error: %s' % ( - variable, cast_type, e) + variable, + cast_type, + e, + ) return resp.content def cast(self, variable, cast_type): @@ -1925,13 +2048,13 @@ def cast(self, variable, cast_type): @param cast_type: one of ['numeric', 'text', 'categorical'] :return: the casted variable or an error """ - allowed_types = 'numeric', 'text', 'categorical' + allowed_types = "numeric", "text", "categorical" assert cast_type in allowed_types, "Cast type not allowed" - payload = {'cast_as': cast_type} + payload = {"cast_as": cast_type} # try casting the variable in place self.resource.session.post( - self[variable].resource.views.cast, - data=json.dumps(payload)) + self[variable].resource.views.cast, data=json.dumps(payload) + ) # make sure to update the dataset variables with the casted one self._reload_variables() return self[variable] @@ -1946,13 +2069,14 @@ def create_savepoint(self, description): description as any other savepoint. """ if len(self.resource.savepoints.index) > 0: - if description in self.savepoint_attributes('description'): + if description in self.savepoint_attributes("description"): raise KeyError( - "A checkpoint with the description '{}' already" - " exists.".format(description) + "A checkpoint with the description '{}' already" " exists.".format( + description + ) ) - sp = shoji_entity_wrapper({'description': description}) + sp = shoji_entity_wrapper({"description": description}) return self.resource.savepoints.create(sp) def load_savepoint(self, description=None): @@ -1965,14 +2089,13 @@ def load_savepoint(self, description=None): the loaded savepoint will be destroyed permanently. """ if description is None: - description = 'initial import' - elif description not in self.savepoint_attributes('description'): + description = "initial import" + elif description not in self.savepoint_attributes("description"): raise KeyError( - "No checkpoint with the description '{}'" - " exists.".format(description) + "No checkpoint with the description '{}'" " exists.".format(description) ) - sp = self.resource.savepoints.by('description').get(description) + sp = self.resource.savepoints.by("description").get(description) self.resource.session.post(sp.revert) self._reload_variables() @@ -1992,16 +2115,23 @@ def savepoint_attributes(self, attrib): """ svpoints = self.resource.savepoints if len(svpoints.index) != 0: - attribs = [ - cp[attrib] - for url, cp in six.iteritems(svpoints.index) - ] + attribs = [cp[attrib] for url, cp in six.iteritems(svpoints.index)] return attribs return [] - def create_crunchbox(self, title='', header='', footer='', notes='', - filters=None, variables=None, force=False, min_base_size=None, - weight=DefaultWeight, palette=None): + def create_crunchbox( + self, + title="", + header="", + footer="", + notes="", + filters=None, + variables=None, + force=False, + min_base_size=None, + weight=DefaultWeight, + palette=None, + ): """ create a new boxdata entity for a CrunchBox. @@ -2035,72 +2165,65 @@ def create_crunchbox(self, title='', header='', footer='', notes='', if filters: if not isinstance(filters, list): - raise TypeError('`filters` argument must be of type `list`') + raise TypeError("`filters` argument must be of type `list`") # ensure we only have `Filter` instances - filters = [ - f if isinstance(f, Filter) else self.filters[f] - for f in filters - ] + filters = [f if isinstance(f, Filter) else self.filters[f] for f in filters] - if any(not f.is_public - for f in filters): - raise ValueError('filters need to be public') + if any(not f.is_public for f in filters): + raise ValueError("filters need to be public") - filters = [ - {'filter': f.resource.self} - for f in filters - ] + filters = [{"filter": f.resource.self} for f in filters] if variables: if not isinstance(variables, list): - raise TypeError('`variables` argument must be of type `list`') + raise TypeError("`variables` argument must be of type `list`") # ensure we only have `Variable` Tuples # NOTE: if we want to check if variables are public we would have # to use Variable instances instead of their Tuple representation. # This would cause additional GET's variables = [ - var.shoji_tuple if isinstance(var, Variable) - else self.resource.variables.by('alias')[var] + var.shoji_tuple + if isinstance(var, Variable) + else self.resource.variables.by("alias")[var] for var in variables ] variables = dict( - function='make_frame', - args=[ - {'map': { - v.id: {'variable': v.entity_url} - for v in variables - }} - ]) + function="make_frame", + args=[{"map": {v.id: {"variable": v.entity_url} for v in variables}}], + ) # use weight from preferences, remove in #158676482 if weight is DefaultWeight: - preferences = self.resource.session.get( - self.resource.fragments.preferences) + preferences = self.resource.session.get(self.resource.fragments.preferences) weight = preferences.payload.body.weight or None if not title: - title = 'CrunchBox for {}'.format(str(self)) - - payload = shoji_entity_wrapper(dict( - weight=weight, - where=variables, - filters=filters, - force=force, - title=title, - notes=notes, - header=header, - footer=footer) + title = "CrunchBox for {}".format(str(self)) + + payload = shoji_entity_wrapper( + dict( + weight=weight, + where=variables, + filters=filters, + force=force, + title=title, + notes=notes, + header=header, + footer=footer, + ) ) if min_base_size: - payload['body'].setdefault('display_settings', {}).update( - dict(minBaseSize=dict(value=min_base_size))) + payload["body"].setdefault("display_settings", {}).update( + dict(minBaseSize=dict(value=min_base_size)) + ) if palette: - payload['body'].setdefault('display_settings', {}).update( - dict(palette=palette)) + payload["body"].setdefault("display_settings", {}).update( + dict(palette=palette) + ) # create the boxdata self.resource.boxdata.create(payload) @@ -2109,12 +2232,13 @@ def create_crunchbox(self, title='', header='', footer='', notes='', # others, i.e. no id, no delete method, different entity_url... # For now, return the shoji_tuple from the index from scrunch.crunchboxes import CrunchBox + for shoji_tuple in self.resource.boxdata.index.values(): if shoji_tuple.metadata.title == title: return CrunchBox(shoji_tuple, self) def delete_crunchbox(self, **kwargs): - """ deletes crunchboxes on matching kwargs """ + """deletes crunchboxes on matching kwargs""" match = False for key in kwargs: if match: @@ -2146,25 +2270,35 @@ def forks_dataframe(self): _forks = pd.DataFrame( [fk for url, fk in six.iteritems(self.resource.forks.index)] ) - _forks = _forks[[ - 'name', - 'description', - 'is_published', - 'owner_name', - 'current_editor_name', - 'creation_time', - 'modification_time', - 'id' - ]] - _forks['creation_time'] = pd.to_datetime(_forks['creation_time']) - _forks['modification_time'] = pd.to_datetime( - _forks['modification_time']) - _forks.sort_values(by=['creation_time'], inplace=True) + _forks = _forks[ + [ + "name", + "description", + "is_published", + "owner_name", + "current_editor_name", + "creation_time", + "modification_time", + "id", + ] + ] + _forks["creation_time"] = pd.to_datetime(_forks["creation_time"]) + _forks["modification_time"] = pd.to_datetime(_forks["modification_time"]) + _forks.sort_values(by=["creation_time"], inplace=True) return _forks - def export(self, path, format='csv', filter=None, variables=None, - hidden=False, options=None, metadata_path=None, timeout=None): + def export( + self, + path, + format="csv", + filter=None, + variables=None, + hidden=False, + options=None, + metadata_path=None, + timeout=None, + ): """ Downloads a dataset as CSV or as SPSS to the given path. This includes hidden variables. @@ -2175,41 +2309,43 @@ def export(self, path, format='csv', filter=None, variables=None, By default, categories in CSV exports are provided as id's. """ - valid_options = ['use_category_ids', 'prefix_subvariables', - 'var_label_field', 'missing_values'] + valid_options = [ + "use_category_ids", + "prefix_subvariables", + "var_label_field", + "missing_values", + ] # Only CSV and SPSS exports are currently supported. - if format not in ('csv', 'spss'): + if format not in ("csv", "spss"): raise ValueError( - 'Invalid format %s. Allowed formats are: "csv" and "spss".' - % format + 'Invalid format %s. Allowed formats are: "csv" and "spss".' % format ) - if format == 'csv': + if format == "csv": # Default options for CSV exports. - export_options = {'use_category_ids': True} + export_options = {"use_category_ids": True} else: # Default options for SPSS exports. export_options = { - 'prefix_subvariables': False, - 'var_label_field': 'description' + "prefix_subvariables": False, + "var_label_field": "description", } # Validate the user-provided export options. options = options or {} if not isinstance(options, dict): - raise ValueError( - 'The options argument must be a dictionary.' - ) + raise ValueError("The options argument must be a dictionary.") for k in options.keys(): if k not in valid_options: raise ValueError( - 'Invalid options for format "%s": %s.' - % (format, ','.join(k)) + 'Invalid options for format "%s": %s.' % (format, ",".join(k)) ) - if 'var_label_field' in options \ - and options['var_label_field'] not in ('name', 'description'): + if "var_label_field" in options and options["var_label_field"] not in ( + "name", + "description", + ): raise ValueError( 'The "var_label_field" export option must be either "name" ' 'or "description".' @@ -2219,34 +2355,33 @@ def export(self, path, format='csv', filter=None, variables=None, export_options.update(options) # the payload should include all hidden variables by default - payload = {'options': export_options} + payload = {"options": export_options} # Option for exporting metadata as json if metadata_path is not None: - metadata = self.resource.table['metadata'] + metadata = self.resource.table["metadata"] if variables is not None: if sys.version_info >= (3, 0): metadata = { key: value for key, value in metadata.items() - if value['alias'] in variables + if value["alias"] in variables } else: metadata = { key: value for key, value in metadata.iteritems() - if value['alias'] in variables + if value["alias"] in variables } - with open(metadata_path, 'w+') as f: + with open(metadata_path, "w+") as f: json.dump(metadata, f, sort_keys=True) # add filter to rows if passed if filter: if isinstance(filter, Filter): - payload['filter'] = {'filter': filter.resource.self} + payload["filter"] = {"filter": filter.resource.self} else: - payload['filter'] = process_expr( - parse_expr(filter), self.resource) + payload["filter"] = process_expr(parse_expr(filter), self.resource) # convert variable list to crunch identifiers if variables and isinstance(variables, list): @@ -2255,33 +2390,29 @@ def export(self, path, format='csv', filter=None, variables=None, id_vars.append(self[var].url) if len(id_vars) != len(variables): LOG.debug( - "Variables passed: %s Variables detected: %s" - % (variables, id_vars) + "Variables passed: %s Variables detected: %s" % (variables, id_vars) ) raise AttributeError("At least a variable was not found") # Now build the payload with selected variables - payload['where'] = { - 'function': 'make_frame', - 'args': [{ - 'map': { - x: {'variable': x} for x in id_vars - } - }] + payload["where"] = { + "function": "make_frame", + "args": [{"map": {x: {"variable": x} for x in id_vars}}], } # hidden is mutually exclusive with # variables to include in the download if hidden and not variables: if not self.resource.body.permissions.edit: - raise AttributeError( - "Only Dataset editors can export hidden variables") - payload['where'] = { - 'function': 'make_frame', - 'args': [{ - 'map': { - x: {'variable': x} - for x in self.resource.variables.index.keys() + raise AttributeError("Only Dataset editors can export hidden variables") + payload["where"] = { + "function": "make_frame", + "args": [ + { + "map": { + x: {"variable": x} + for x in self.resource.variables.index.keys() + } } - }] + ], } progress_tracker = pycrunch.progress.DefaultProgressTracking(timeout) @@ -2289,7 +2420,7 @@ def export(self, path, format='csv', filter=None, variables=None, dataset=self.resource, options=payload, format=format, - progress_tracker=progress_tracker + progress_tracker=progress_tracker, ) download_file(url, path) @@ -2327,35 +2458,39 @@ def exclude(self, expr=None): return self.resource.session.patch( self.resource.fragments.exclusion, - data=json.dumps(dict(expression=expr_obj)) + data=json.dumps(dict(expression=expr_obj)), ) def get_exclusion(self): exclusion = self.resource.exclusion - if 'body' not in exclusion: + if "body" not in exclusion: return None - expr = exclusion['body'].get('expression') + expr = exclusion["body"].get("expression") return prettify(expr, self) if expr else None def add_filter(self, name, expr, public=False): expression = process_expr(parse_expr(expr), self.resource) - payload = shoji_entity_wrapper(dict( - name=name, - expression=expression, - is_public=public)) + payload = shoji_entity_wrapper( + dict(name=name, expression=expression, is_public=public) + ) new_filter = self.resource.filters.create(payload) - return self.filters[new_filter.body['name']] + return self.filters[new_filter.body["name"]] def add_deck(self, name, description="", public=False): - payload = shoji_entity_wrapper(dict( - name=name, - description=description, - is_public=public)) + payload = shoji_entity_wrapper( + dict(name=name, description=description, is_public=public) + ) new_deck = self.resource.decks.create(payload) - return self.decks[new_deck.self.split('/')[-2]] + return self.decks[new_deck.self.split("/")[-2]] - def fork(self, description=None, name=None, is_published=False, - preserve_owner=True, **kwargs): + def fork( + self, + description=None, + name=None, + is_published=False, + preserve_owner=True, + **kwargs + ): """ Create a fork of ds and add virgin savepoint. @@ -2377,28 +2512,24 @@ def fork(self, description=None, name=None, is_published=False, :returns _fork: scrunch.datasets.BaseDataset """ from scrunch.mutable_dataset import MutableDataset + nforks = len(self.resource.forks.index) if name is None: if six.PY2: name = "FORK #{} of {}".format( - nforks + 1, - self.resource.body.name.encode("ascii", "ignore")) + nforks + 1, self.resource.body.name.encode("ascii", "ignore") + ) else: - name = "FORK #{} of {}".format( - nforks + 1, - self.resource.body.name) + name = "FORK #{} of {}".format(nforks + 1, self.resource.body.name) if description is None: description = self.resource.body.description body = dict( - name=name, - description=description, - is_published=is_published, - **kwargs + name=name, description=description, is_published=is_published, **kwargs ) if preserve_owner: - body['owner'] = self.resource.body.owner + body["owner"] = self.resource.body.owner # not returning a dataset payload = shoji_entity_wrapper(body) _fork = self.resource.forks.create(payload).refresh() @@ -2412,38 +2543,37 @@ def replace_values(self, variables, filter=None, literal_subvar=False, timeout=6 Alows subvariable alias as well :param filter: string, an Scrunch expression, i.e; 'var_alias > 1' """ - payload = { - 'command': 'update', - 'variables': {}, - } + payload = {"command": "update", "variables": {}} for alias, val in variables.items(): if isinstance(val, list): if literal_subvar: - payload['variables'][alias] = {'column': val} + payload["variables"][alias] = {"column": val} else: - payload['variables'][self[alias].id] = {'column': val} + payload["variables"][self[alias].id] = {"column": val} else: if literal_subvar: - payload['variables'][alias] = {'value': val} + payload["variables"][alias] = {"value": val} else: - payload['variables'][self[alias].id] = {'value': val} + payload["variables"][self[alias].id] = {"value": val} if filter: - payload['filter'] = process_expr(parse_expr(filter), self.resource) + payload["filter"] = process_expr(parse_expr(filter), self.resource) # Remove query parameters from table url table = self.resource.table - table.self = table.self[:table.self.find('?')] + table.self = table.self[: table.self.find("?")] resp = self.resource.table.post(json.dumps(payload)) if resp.status_code == 204: - LOG.info('Dataset Updated') + LOG.info("Dataset Updated") return progress_tracker = DefaultProgressTracking(timeout) pycrunch.shoji.wait_progress(resp, self.resource.session, progress_tracker) return resp - def backfill_from_csv(self, aliases, pk_alias, csv_fh, rows_filter=None, timeout=None): + def backfill_from_csv( + self, aliases, pk_alias, csv_fh, rows_filter=None, timeout=None + ): """ :param aliases: List of strings for the aliases present in the CSV file @@ -2455,7 +2585,7 @@ def backfill_from_csv(self, aliases, pk_alias, csv_fh, rows_filter=None, timeout :return: """ - MAX_FILE_SIZE = 150 * 2 ** 20 # 150MB + MAX_FILE_SIZE = 150 * 2**20 # 150MB file_size = len(csv_fh.read()) if file_size >= MAX_FILE_SIZE: @@ -2482,21 +2612,18 @@ def replace_from_csv(self, filename, chunksize=1000): """ warnings.warn( "This method is deprecated. Use Dataset.backfill_from_csv", - PendingDeprecationWarning) - streaming_state = self.resource.body.get('streaming', 'no') + PendingDeprecationWarning, + ) + streaming_state = self.resource.body.get("streaming", "no") ds = self - if streaming_state != 'streaming': + if streaming_state != "streaming": ds = self.make_streaming() importer = pycrunch.importing.Importer() - df_chunks = pd.read_csv( - filename, - header=0, - chunksize=chunksize - ) + df_chunks = pd.read_csv(filename, header=0, chunksize=chunksize) for chunk in df_chunks: # This is a trick to get rid of np.int64, which is not # json serializable - stream = chunk.to_json(orient='records') + stream = chunk.to_json(orient="records") stream = json.loads(stream) # trap the timeout and allow it to finish try: @@ -2507,7 +2634,7 @@ def replace_from_csv(self, filename, chunksize=1000): ds.push_rows(chunksize) except TaskProgressTimeoutError as exc: exc.entity.wait_progress(exc.response) - if streaming_state != 'streaming': + if streaming_state != "streaming": ds.edit(streaming=streaming_state) def merge(self, fork_id=None, autorollback=True): @@ -2521,42 +2648,44 @@ def merge(self, fork_id=None, autorollback=True): if False the dataset and fork are beeing left 'dirty' """ if isinstance(fork_id, int) or ( - isinstance(fork_id, six.string_types) and - fork_id.isdigit()): + isinstance(fork_id, six.string_types) and fork_id.isdigit() + ): fork_id = "FORK #{} of {}".format(fork_id, self.resource.body.name) elif fork_id is None: - raise ValueError('fork id, name or number missing') + raise ValueError("fork id, name or number missing") fork_index = self.resource.forks.index - forks = [f for f in fork_index - if fork_index[f].get('name') == fork_id or - fork_index[f].get('id') == fork_id] + forks = [ + f + for f in fork_index + if fork_index[f].get("name") == fork_id + or fork_index[f].get("id") == fork_id + ] if len(forks) == 1: fork_url = forks[0] else: raise ValueError( - "Couldn't find a (unique) fork. " - "Please try again using its id") + "Couldn't find a (unique) fork. " "Please try again using its id" + ) - body = dict( - dataset=fork_url, - autorollback=autorollback) + body = dict(dataset=fork_url, autorollback=autorollback) resp = self.resource.session.post( - self.resource.actions.self, - data=json.dumps(shoji_entity_wrapper(body))) + self.resource.actions.self, data=json.dumps(shoji_entity_wrapper(body)) + ) if resp.status_code == 204: LOG.info("Dataset merged") return elif resp.status_code == 202: - if 'location' in resp.headers: - LOG.info("Dataset merge in progress, see %s" % - resp.headers['location']) + if "location" in resp.headers: + LOG.info("Dataset merge in progress, see %s" % resp.headers["location"]) else: - LOG.info("Dataset merge in progress, but no location header. " - "Content %s" % resp.content) + LOG.info( + "Dataset merge in progress, but no location header. " + "Content %s" % resp.content + ) return resp def delete_forks(self): @@ -2605,51 +2734,41 @@ def create_multitable(self, name, template, is_public=False): # the special case of q being a multiple_response variable alias, # we need to build a different payload - if q['query'] in self.keys(): + if q["query"] in self.keys(): # this means is a variable in this dataset - var_alias = q['query'] + var_alias = q["query"] var_url = self[var_alias].resource.self - multi_types = 'multiple_response', 'categorical_array' + multi_types = "multiple_response", "categorical_array" if self[var_alias].type in multi_types: - as_json['query'] = [ - { - 'each': var_url - }, - { - 'function': 'as_selected', - 'args': [ - { - 'variable': var_url - } - ] - } + as_json["query"] = [ + {"each": var_url}, + {"function": "as_selected", "args": [{"variable": var_url}]}, ] processed = True else: - as_json['query'] = [{'variable': var_url}] + as_json["query"] = [{"variable": var_url}] processed = True - elif isinstance(q['query'], Variable): - var_url = q['query'].resource.self - as_json['query'] = [{'variable': var_url}] + elif isinstance(q["query"], Variable): + var_url = q["query"].resource.self + as_json["query"] = [{"variable": var_url}] processed = True if not processed: - parsed_q = process_expr(parse_expr(q['query']), self.resource) + parsed_q = process_expr(parse_expr(q["query"]), self.resource) # wrap the query in a list of one dict element - as_json['query'] = [parsed_q] - if 'transform' in q.keys(): - as_json['transform'] = q['transform'] + as_json["query"] = [parsed_q] + if "transform" in q.keys(): + as_json["transform"] = q["transform"] parsed_template.append(as_json) - payload = shoji_entity_wrapper(dict( - name=name, - is_public=is_public, - template=parsed_template)) + payload = shoji_entity_wrapper( + dict(name=name, is_public=is_public, template=parsed_template) + ) new_multi = self.resource.multitables.create(payload) - return self.multitables[new_multi.body['name']] + return self.multitables[new_multi.body["name"]] def import_multitable(self, name, multi): """ @@ -2658,9 +2777,7 @@ def import_multitable(self, name, multi): :name: Name of the new multitable :multi: Multitable instance to clone into this Dataset """ - payload = shoji_entity_wrapper(dict( - name=name, - multitable=multi.resource.self)) + payload = shoji_entity_wrapper(dict(name=name, multitable=multi.resource.self)) self.resource.multitables.create(payload) return self.multitables[name] @@ -2675,22 +2792,25 @@ def set_weight(self, variables=None): graph = [self[v].url for v in variables] if variables is None: graph = [] - payload = {'graph': graph} + payload = {"graph": graph} return self.resource.variables.weights.patch(json.dumps(payload)) @property def weights(self): weight_urls = self.resource.variables.weights.graph - return [self.resource.variables.index[weight_alias].alias - for weight_alias in weight_urls] + return [ + self.resource.variables.index[weight_alias].alias + for weight_alias in weight_urls + ] def remove_weight(self, variables): """ :param: variables: List of variable aliases or sting of variable alias to remove from weights """ - if not isinstance(variables, six.string_types) and \ - not isinstance(variables, list): + if not isinstance(variables, six.string_types) and not isinstance( + variables, list + ): raise TypeError("variable must be a string or a list") weights = self.weights @@ -2707,7 +2827,7 @@ def remove_weight(self, variables): raise NameError("%s was not found in weights" % variables) graph = [self[v].url for v in weights] - payload = {'graph': graph} + payload = {"graph": graph} return self.resource.variables.weights.patch(json.dumps(payload)) def drop_rows(self, filter): @@ -2715,10 +2835,7 @@ def drop_rows(self, filter): :param: filter: An scrunch filter expression that matches rows to drop """ filters = process_expr(parse_expr(filter), self.resource) - payload = { - 'command': 'delete', - 'filter': filters, - } + payload = {"command": "delete", "filter": filters} self.resource.table.post(json.dumps(payload)) @property @@ -2728,7 +2845,7 @@ def size(self): """ return self.resource.body.size - def derive_weight(self, targets, alias, name, description=''): + def derive_weight(self, targets, alias, name, description=""): """ Derives a new variable to be used as raked weight. https://docs.crunch.io/feature-guide/feature-deriving.html?highlight=rake#weights @@ -2757,21 +2874,21 @@ def derive_weight(self, targets, alias, name, description=''): for target in targets: for key, val in target.items(): if fsum(val.values()) != 1.0: - raise ValueError('Weights for target {} need to add up to 1.0'.format(key)) - _targets.append({ - 'variable': self[key].id, - 'targets': list(map(list, val.items())) - }) - - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'description': description, - 'derivation': { - 'function': 'rake', - 'args': _targets + raise ValueError( + "Weights for target {} need to add up to 1.0".format(key) + ) + _targets.append( + {"variable": self[key].id, "targets": list(map(list, val.items()))} + ) + + payload = shoji_entity_wrapper( + { + "name": name, + "alias": alias, + "description": description, + "derivation": {"function": "rake", "args": _targets}, } - }) + ) return self._var_create_reload_return(payload) @property @@ -2788,8 +2905,7 @@ def views(self): class Dataset(BaseDataset): - - _BASE_MUTABLE_ATTRIBUTES = {'streaming'} + _BASE_MUTABLE_ATTRIBUTES = {"streaming"} def __init__(self, resource): LOG.warning("""Dataset is deprecated, instead use now @@ -2797,11 +2913,12 @@ def __init__(self, resource): with it's corresponding get_mutable_dataset and get_streaming_dataset methods""") # noqa: E501 super(Dataset, self).__init__(resource) - self._MUTABLE_ATTRIBUTES = self._BASE_MUTABLE_ATTRIBUTES | self._BASE_MUTABLE_ATTRIBUTES + self._MUTABLE_ATTRIBUTES = ( + self._BASE_MUTABLE_ATTRIBUTES | self._BASE_MUTABLE_ATTRIBUTES + ) class DatasetSubvariablesMixin(DatasetVariablesMixin): - def _reload_variables(self): """ Helper that takes care of updating self._vars on init and @@ -2809,12 +2926,12 @@ def _reload_variables(self): """ self._vars = [] self._catalog = {} - if getattr(self.resource, 'subvariables', None): + if getattr(self.resource, "subvariables", None): self._catalog = self.resource.subvariables self._vars = self._catalog.index.items() def __iter__(self): - if getattr(self.resource, 'subvariables', None): + if getattr(self.resource, "subvariables", None): for var_url in self.subvariables: yield (var_url, dict(self._vars)[var_url]) @@ -2832,20 +2949,20 @@ def __init__(self, resource, *args): # pythonic dict behaviour data = {} for k, v in args[0].items(): - data[k] = v['value'] + data[k] = v["value"] dict.__init__(self, data) def __setitem__(self, key, value): - data = {key: {'value': value}} + data = {key: {"value": value}} for k, v in self.items(): # wrap value in a {'value': value} for crunch - data[k] = {'value': v} + data[k] = {"value": v} if key == k: - data[k]['value'] = value + data[k]["value"] = value # send the json to the missing_rules endpoint result = self.resource.session.put( - self.resource.fragments.missing_rules, - json.dumps({'rules': data})) + self.resource.fragments.missing_rules, json.dumps({"rules": data}) + ) assert result.status_code == 204 super(MissingRules, self).__setitem__(key, value) @@ -2854,18 +2971,18 @@ def __delitem__(self, key): data = {} for k, v in self.items(): # wrap value in a {'value': value} for crunch - data[k] = {'value': v} + data[k] = {"value": v} del data[key] result = self.resource.session.put( - self.resource.fragments.missing_rules, - json.dumps({'rules': data})) + self.resource.fragments.missing_rules, json.dumps({"rules": data}) + ) assert result.status_code == 204 super(MissingRules, self).__delitem__(key) def clear(self): self.resource.session.put( - self.resource.fragments.missing_rules, - json.dumps({'rules': {}})) + self.resource.fragments.missing_rules, json.dumps({"rules": {}}) + ) super(MissingRules, self).clear() @@ -2874,13 +2991,21 @@ class Variable(ReadOnly, DatasetSubvariablesMixin): A pycrunch.shoji.Entity wrapper that provides variable-specific methods. DatasetSubvariablesMixin provides for subvariable interactions. """ - _MUTABLE_ATTRIBUTES = {'name', 'description', 'uniform_basis', - 'view', 'notes', 'format', 'derived'} - _IMMUTABLE_ATTRIBUTES = {'id', 'alias', 'type', 'discarded'} + + _MUTABLE_ATTRIBUTES = { + "name", + "description", + "uniform_basis", + "view", + "notes", + "format", + "derived", + } + _IMMUTABLE_ATTRIBUTES = {"id", "alias", "type", "discarded"} # We won't expose owner and private # categories in immutable. IMO it should be handled separately _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES - _OVERRIDDEN_ATTRIBUTES = {'categories'} + _OVERRIDDEN_ATTRIBUTES = {"categories"} def __init__(self, var_tuple, dataset): """ @@ -2894,17 +3019,17 @@ def __init__(self, var_tuple, dataset): self.dataset = dataset self._reload_variables() if self._is_alias_mutable(): - self._MUTABLE_ATTRIBUTES.add('alias') - self._IMMUTABLE_ATTRIBUTES.discard('alias') + self._MUTABLE_ATTRIBUTES.add("alias") + self._IMMUTABLE_ATTRIBUTES.discard("alias") def _is_alias_mutable(self): - if self.dataset.resource.body.get('streaming') == 'no' and not self.derived: + if self.dataset.resource.body.get("streaming") == "no" and not self.derived: return True return False @property def is_subvar(self): - return 'subvariables' in self.url + return "subvariables" in self.url @property def resource(self): @@ -2921,16 +3046,15 @@ def __getattr__(self, item): try: return self.resource.body[item] # Has to exist except KeyError: - raise AttributeError( - "Variable does not have attribute %s" % item) + raise AttributeError("Variable does not have attribute %s" % item) return super(Variable, self).__getattribute__(item) def edit(self, **kwargs): for key in kwargs: if key not in self._MUTABLE_ATTRIBUTES: raise AttributeError( - "Can't edit attribute %s of variable %s" - % (key, self.name)) + "Can't edit attribute %s of variable %s" % (key, self.name) + ) self.dataset._reload_variables() return self.resource.edit(**kwargs) @@ -2942,10 +3066,10 @@ def __str__(self): @property def categories(self): - if self.resource.body['type'] not in CATEGORICAL_TYPES: + if self.resource.body["type"] not in CATEGORICAL_TYPES: raise TypeError( - "Variable of type %s do not have categories" - % self.resource.body.type) + "Variable of type %s do not have categories" % self.resource.body.type + ) return CategoryList._from(self.resource) def delete(self): @@ -2963,21 +3087,25 @@ def integrate(self): self.resource.edit(derived=False) self.dataset._reload_variables() - def add_category(self, id, name, numeric_value, missing=False, date=None, before_id=False): - if self.resource.body['type'] not in CATEGORICAL_TYPES: + def add_category( + self, id, name, numeric_value, missing=False, date=None, before_id=False + ): + if self.resource.body["type"] not in CATEGORICAL_TYPES: raise TypeError( - "Variable of type %s do not have categories" - % self.resource.body.type) + "Variable of type %s do not have categories" % self.resource.body.type + ) - if self.resource.body.get('derivation'): - raise TypeError("Cannot add categories on derived variables. Re-derive with the appropriate expression") + if self.resource.body.get("derivation"): + raise TypeError( + "Cannot add categories on derived variables. Re-derive with the appropriate expression" + ) - categories = self.resource.body['categories'] + categories = self.resource.body["categories"] category_data = { - 'id': id, - 'missing': missing, - 'name': name, - 'numeric_value': numeric_value, + "id": id, + "missing": missing, + "name": name, + "numeric_value": numeric_value, } if date is not None: if not isinstance(date, six.string_types): @@ -2993,12 +3121,12 @@ def add_category(self, id, name, numeric_value, missing=False, date=None, before # see if id exist try: self.categories[before_id] - except: - raise AttributeError('before_id not found: {}'.format(before_id)) + except (KeyError, TypeError): + raise AttributeError("before_id not found: {}".format(before_id)) new_categories = [] for category in categories: - if category['id'] == before_id: + if category["id"] == before_id: new_categories.append(category_data) new_categories.append(category) categories = new_categories @@ -3012,12 +3140,12 @@ def add_category(self, id, name, numeric_value, missing=False, date=None, before def edit_categorical(self, categories, rules): # validate rules and categories are same size _validate_category_rules(categories, rules) - args = [{ - 'column': [c['id'] for c in categories], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': categories}}}] + args = [ + { + "column": [c["id"] for c in categories], + "type": {"value": {"class": "categorical", "categories": categories}}, + } + ] # build the expression more_args = [] for rule in rules: @@ -3025,7 +3153,7 @@ def edit_categorical(self, categories, rules): # get dataset and build the expression more_args = process_expr(more_args, self.dataset) # epression value building - expr = dict(function='case', args=args + more_args) + expr = dict(function="case", args=args + more_args) payload = shoji_entity_wrapper(dict(expr=expr)) # patch the variable with the new payload resp = self.resource.patch(payload) @@ -3036,32 +3164,33 @@ def edit_derived(self, variable, mapper): raise NotImplementedError("Use edit_combination") def move(self, path, position=-1, before=None, after=None): - self.dataset.order.place(self, path, position=position, - before=before, after=after) + self.dataset.order.place( + self, path, position=position, before=before, after=after + ) def move_to_folder(self, path, position=None, after=None, before=None): target = self.dataset.folders.get(path) target.move_here(self, position=position, after=after, before=before) def unbind(self): - """ Unbinds all subvariables from the current Array type + """Unbinds all subvariables from the current Array type variable. Works only for non-derived material variables """ - payload = json.dumps(shoji_entity_wrapper({'unbind': []})) + payload = json.dumps(shoji_entity_wrapper({"unbind": []})) resp = self.resource.post(payload) return resp @property def missing_rules(self): - if self.resource.body['type'] in CATEGORICAL_TYPES: + if self.resource.body["type"] in CATEGORICAL_TYPES: raise TypeError( "Variable of type %s do not have missing rules" - % self.resource.body.type) + % self.resource.body.type + ) - result = self.resource.session.get( - self.resource.fragments.missing_rules) + result = self.resource.session.get(self.resource.fragments.missing_rules) assert result.status_code == 200 - return MissingRules(self.resource, result.json()['body']['rules']) + return MissingRules(self.resource, result.json()["body"]["rules"]) def set_missing_rules(self, rules): """ @@ -3077,18 +3206,18 @@ def set_missing_rules(self, rules): ds['varname'].set_missing_rules(missing_rules) """ - if self.resource.body['type'] in CATEGORICAL_TYPES: + if self.resource.body["type"] in CATEGORICAL_TYPES: raise TypeError( "Variable of type %s do not have missing rules" - % self.resource.body.type) + % self.resource.body.type + ) data = {} for k, v in rules.items(): # wrap value in a {'value': value} for crunch - data[k] = {'value': v} + data[k] = {"value": v} result = self.resource.session.put( - self.resource.fragments.missing_rules, - json.dumps({'rules': data}) + self.resource.fragments.missing_rules, json.dumps({"rules": data}) ) assert result.status_code == 204 @@ -3104,9 +3233,7 @@ def set_geodata_view(self, geodata, feature_key): # we need the geodata url if isinstance(geodata, six.string_types): - is_url = ( - geodata.startswith('http://') or geodata.startswith('https://') - ) + is_url = geodata.startswith("http://") or geodata.startswith("https://") if not is_url: # is a name, get the url @@ -3115,16 +3242,9 @@ def set_geodata_view(self, geodata, feature_key): if isinstance(geodata, Entity): geodata = geodata.self - self._resource.patch({ - 'view': { - 'geodata': [ - { - 'geodatum': geodata, - 'feature_key': feature_key - } - ] - } - }) + self._resource.patch( + {"view": {"geodata": [{"geodatum": geodata, "feature_key": feature_key}]}} + ) self._resource.refresh() @@ -3135,9 +3255,9 @@ def unset_geodata_view(self): view = self.view - if 'geodata' in view: - view['geodata'] = [] - self._resource.patch({'view': view}) + if "geodata" in view: + view["geodata"] = [] + self._resource.patch({"view": view}) self._resource.refresh() def replace_values(self, value, filter=None): @@ -3150,7 +3270,7 @@ def replace_values(self, value, filter=None): {'variable_id.subvariable_id': value} """ if self.is_subvar: - subvar_reference = '{}.{}'.format(self.resource.variable.body.id, self.id) + subvar_reference = "{}.{}".format(self.resource.variable.body.id, self.id) return self.dataset.replace_values( {subvar_reference: value}, filter=filter, literal_subvar=True ) @@ -3167,15 +3287,18 @@ def reorder_subvariables(self, subvariables): new_var = old_var.reorder_subvariables(['alias1', 'alias2']) """ # verify there is no repeated aliases - assert len(set(subvariables)) == len(subvariables), \ - 'Repeated aliases found in subvariables: {}'.format(subvariables) + assert len(set(subvariables)) == len( + subvariables + ), "Repeated aliases found in subvariables: {}".format(subvariables) # verify there is no missing subvariables - assert sorted(list(self.keys())) == sorted(subvariables), \ - 'Missing subvariables for this Variable. Existing: {}. Given: {}'.format( - list(self.keys()), subvariables) + assert sorted(list(self.keys())) == sorted( + subvariables + ), "Missing subvariables for this Variable. Existing: {}. Given: {}".format( + list(self.keys()), subvariables + ) reordered_urls = [self[sv].url for sv in subvariables] - self.resource.patch(json.dumps({'subvariables': reordered_urls})) + self.resource.patch(json.dumps({"subvariables": reordered_urls})) self.dataset._reload_variables() return self.dataset[self.alias] @@ -3211,13 +3334,7 @@ def _subtotal_headings(self, operation, name, categories, anchor, negative=None) insertion["kwargs"] = {"negative": negative} insertions.append(insertion) - payload = { - 'view': { - 'transform': { - 'insertions': insertions - } - } - } + payload = {"view": {"transform": {"insertions": insertions}}} self.resource.patch(payload) self.dataset._reload_variables() return self.dataset[self.alias] @@ -3238,7 +3355,7 @@ def add_subtotal(self, name, categories=None, anchor=None): var = var.add_subtotal('At the bottom', [3], 'bottom') var = ... """ - return self._subtotal_headings('subtotal', name, categories, anchor) + return self._subtotal_headings("subtotal", name, categories, anchor) def add_subtotal_difference(self, name, add, subtract, anchor): """ @@ -3252,7 +3369,7 @@ def add_subtotal_difference(self, name, add, subtract, anchor): Note: to concatenate subtotals the procedure requires to reassign the variable: var.add_subtotal_difference("F - M", add=["Female"], subtract=["Male"], anchor="bottom") """ - return self._subtotal_headings('subtotal', name, add, anchor, subtract) + return self._subtotal_headings("subtotal", name, add, anchor, subtract) def add_heading(self, name, categories=None, anchor=None): """ @@ -3270,10 +3387,10 @@ def add_heading(self, name, categories=None, anchor=None): var = var.add_heading('At the bottom', [3], 'bottom') var = ... """ - return self._subtotal_headings('heading', name, categories, anchor) + return self._subtotal_headings("heading", name, categories, anchor) def transformations(self): - if 'transform' in self.view: + if "transform" in self.view: return self.view.transform.insertions return None @@ -3287,10 +3404,10 @@ def edit_resolution(self, resolution): :usage: edited_var = var.edit_resolution('M') assert editar_var.rollup_resolution == 'M' """ - assert self.type == 'datetime', 'Method only allowed for datetime variables' + assert self.type == "datetime", "Method only allowed for datetime variables" self.dataset._validate_vartypes(self.type, resolution=resolution) view = self.view - view['rollup_resolution'] = resolution + view["rollup_resolution"] = resolution self.resource.edit(view=view) return self @@ -3309,6 +3426,7 @@ class BackfillFromCSV: dataset are deleted. """ + TIMEOUT = 60 * 10 # 10 minutes def __init__(self, dataset, pk_alias, aliases, rows_expr, timeout=None): @@ -3318,9 +3436,7 @@ def __init__(self, dataset, pk_alias, aliases, rows_expr, timeout=None): self.pk_alias = pk_alias self.rows_expr = rows_expr self.alias_to_url = self.load_vars_by_alias() - self.tmp_aliases = { - a: "{}-{}".format(dataset.id, a) for a in aliases - } + self.tmp_aliases = {a: "{}-{}".format(dataset.id, a) for a in aliases} self.progress_tracker = DefaultProgressTracking(timeout or self.TIMEOUT) self.timestamp = datetime.datetime.now().strftime("%Y-%m-%d:%H:%M:%S") @@ -3367,18 +3483,20 @@ def create_tmp_ds(self, csv_file): * Renames the variables to disambiguate on the join """ tmp_name = "Scrunch-backfill-{}-{}-{}".format( - self.dataset.name, self.dataset.id, self.timestamp) + self.dataset.name, self.dataset.id, self.timestamp + ) # Create the new tmp dataset with the schema for the variables # from the target dataset. To ensure they are all the same type metadata = self.obtain_schema() - tmp_ds = self.root.datasets.create(shoji_entity_wrapper({ - "name": tmp_name, - "table": { - "element": "crunch:table", - "metadata": metadata - } - })).refresh() + tmp_ds = self.root.datasets.create( + shoji_entity_wrapper( + { + "name": tmp_name, + "table": {"element": "crunch:table", "metadata": metadata}, + } + ) + ).refresh() try: importing.importer.append_csv_string(tmp_ds, csv_file) except TaskError as err: @@ -3388,9 +3506,13 @@ def create_tmp_ds(self, csv_file): tmp_ds.delete() if exc.status_code == 400: # This is a validation error from the server - raise ValueError("Error importing CSV file - Columns should match specified types") + raise ValueError( + "Error importing CSV file - Columns should match specified types" + ) elif exc.status_code == 413: - raise ValueError("Upload failed because the CSV file is too large. Limit is 150MB") + raise ValueError( + "Upload failed because the CSV file is too large. Limit is 150MB" + ) else: # Other kind of error. Probably 413, or other kind. Don'w # swallow it. Expose it. @@ -3398,9 +3520,14 @@ def create_tmp_ds(self, csv_file): # Rename the aliases in the tmp dataset to disambiguate on the join tmp_aliases = tmp_ds.variables.by("alias") - tmp_ds.variables.patch(shoji_catalog_wrapper({ - tmp_aliases[a].entity_url: {"alias": self.tmp_aliases[a]} for a in self.aliases - })) + tmp_ds.variables.patch( + shoji_catalog_wrapper( + { + tmp_aliases[a].entity_url: {"alias": self.tmp_aliases[a]} + for a in self.aliases + } + ) + ) return tmp_ds def join_tmp_ds(self, tmp_ds): @@ -3421,8 +3548,9 @@ def join_tmp_ds(self, tmp_ds): } ) resp = self.dataset.resource.variables.post(join_payload) - pycrunch.shoji.wait_progress(resp, self.dataset.resource.session, - progress_tracker=self.progress_tracker) + pycrunch.shoji.wait_progress( + resp, self.dataset.resource.session, progress_tracker=self.progress_tracker + ) def backfill(self): variables_expr = {} @@ -3442,10 +3570,7 @@ def backfill(self): # We can perform an update command here because we're guaranteed # that the types for each of the variables matches the column we # want to backfill. - update_expr = { - "command": "update", - "variables": variables_expr, - } + update_expr = {"command": "update", "variables": variables_expr} if self.rows_expr: update_expr["filter"] = self.rows_expr with NoExclusion(self.dataset) as ds: @@ -3455,8 +3580,11 @@ def backfill(self): # filter gets re-applied while we wait. if resp.status_code == 202: # If the response was async. Wait for it finishing - pycrunch.shoji.wait_progress(resp, self.dataset.resource.session, - progress_tracker=self.progress_tracker) + pycrunch.shoji.wait_progress( + resp, + self.dataset.resource.session, + progress_tracker=self.progress_tracker, + ) def execute(self, csv_file): # Create a new dataset with the CSV file, We want this TMP dataset @@ -3475,4 +3603,3 @@ def execute(self, csv_file): folders_by_name[folder_name].entity.delete() # Always delete the tmp dataset no matter what tmp_ds.delete() - diff --git a/scrunch/exceptions.py b/scrunch/exceptions.py index f53e08f..9a0762b 100644 --- a/scrunch/exceptions.py +++ b/scrunch/exceptions.py @@ -1,8 +1,8 @@ - class AuthenticationError(Exception): - """ An exception to signal there was a problem trying to authenticate + """An exception to signal there was a problem trying to authenticate a user. """ + pass diff --git a/scrunch/expressions.py b/scrunch/expressions.py index a282bef..be7926e 100644 --- a/scrunch/expressions.py +++ b/scrunch/expressions.py @@ -58,58 +58,47 @@ else: from urllib.parse import urlencode -ARRAY_TYPES = ('categorical_array', 'multiple_response', 'numeric_array') +ARRAY_TYPES = ("categorical_array", "multiple_response", "numeric_array") CRUNCH_FUNC_MAP = { - 'valid': 'is_valid', - 'missing': 'is_missing', - 'bin': 'bin', - 'selected': 'selected', - 'not_selected': 'not_selected', + "valid": "is_valid", + "missing": "is_missing", + "bin": "bin", + "selected": "selected", + "not_selected": "not_selected", } CRUNCH_METHOD_MAP = { - 'any': 'any', - 'all': 'all', - 'duplicates': 'duplicates', - 'bin': 'bin', - 'selected': 'selected', - 'not_selected': 'not_selected', + "any": "any", + "all": "all", + "duplicates": "duplicates", + "bin": "bin", + "selected": "selected", + "not_selected": "not_selected", } # according to http://docs.crunch.io/#function-terms -BINARY_FUNC_OPERATORS = [ - '+', - '-', - '*', - '/', - '//', - '^', - '%', - '&', - '|', - '~', -] +BINARY_FUNC_OPERATORS = ["+", "-", "*", "/", "//", "^", "%", "&", "|", "~"] COMPARISSON_OPERATORS = [ - '==', - '!=', - '=><=', - '<', - '>', - '<=', - '>=', - '~=', - 'in', - 'and', - 'or', - 'not', + "==", + "!=", + "=><=", + "<", + ">", + "<=", + ">=", + "~=", + "in", + "and", + "or", + "not", ] COMPARISSON_FUNCS = [ # 'between', - 'all', - 'any', + "all", + "any", ] BUILTIN_FUNCTIONS = [] @@ -129,14 +118,11 @@ def _nest(args, func, concatenator=None): # multiple arguments and nest them concatenator = func # for the moment we are just nesting and & or - if func not in ['or', 'and', 'is_missing', 'is_valid'] or len(args) < 3: - return { - 'function': concatenator, - 'args': args - } + if func not in ["or", "and", "is_missing", "is_valid"] or len(args) < 3: + return {"function": concatenator, "args": args} return { - 'function': concatenator, - 'args': [args[0], _nest(args[1:], func, concatenator)] + "function": concatenator, + "args": [args[0], _nest(args[1:], func, concatenator)], } @@ -146,7 +132,7 @@ def unfold_list(_list): for e in _list: if isinstance(e, ast.Call): name = e.func.id - if name == 'r': + if name == "r": try: lower = e.args[0].n upper = e.args[1].n @@ -154,8 +140,7 @@ def unfold_list(_list): for elem in r_list: new_list.append(ast.Num(elem)) except Exception: - raise AttributeError( - "function 'r' needs 2 integer arguments") + raise AttributeError("function 'r' needs 2 integer arguments") else: return _list else: @@ -184,7 +169,7 @@ def _var_term(_var_id): if platonic: return {"var": _var_id} else: - return {'variable': _var_id} + return {"variable": _var_id} def _parse(node, parent=None): obj = {} @@ -200,55 +185,52 @@ def _parse(node, parent=None): _id = fields[0][1] # A function identifier. - if getattr(node, '_func_type', None) == 'function': + if getattr(node, "_func_type", None) == "function": return _id # A variable identifier. return _var_term(_id) elif isinstance(node, ast.Num) or isinstance(node, ast.Str): - if isinstance(parent, ast.Call) \ - and 'func' in parent._fields: + if isinstance(parent, ast.Call) and "func" in parent._fields: _id = fields[0][1] return _var_term(_id) _val = fields[0][1] - return { - 'value': _val - } + return {"value": _val} elif isinstance(node, ast.Add): - return '+' + return "+" elif isinstance(node, ast.Sub): - return '-' + return "-" elif isinstance(node, ast.Mult): - return '*' + return "*" elif isinstance(node, ast.Div): - return '/' + return "/" elif isinstance(node, ast.FloorDiv): - return '//' + return "//" elif isinstance(node, ast.Pow): - return '^' + return "^" elif isinstance(node, ast.Mod): - return '%' + return "%" elif isinstance(node, ast.BitAnd): - return '&' + return "&" elif isinstance(node, ast.BitOr): - return '|' + return "|" elif isinstance(node, ast.Invert): - return '~' + return "~" elif isinstance(node, ast.Eq): - return '==' + return "==" elif isinstance(node, ast.NotEq): - return '!=' + return "!=" elif isinstance(node, ast.Lt): - return '<' + return "<" elif isinstance(node, ast.LtE): - return '<=' + return "<=" elif isinstance(node, ast.Gt): - return '>' + return ">" elif isinstance(node, ast.GtE): - return '>=' + return ">=" elif isinstance(node, ast.In): - return 'in' + return "in" elif isinstance(node, ast.NotIn): return NOT_IN elif isinstance(node, ast.List) or isinstance(node, ast.Tuple): @@ -257,26 +239,28 @@ def _parse(node, parent=None): _list = unfold_list(_list) if all(isinstance(el, ast.Name) for el in _list): # This is the case of `any([subvar_1, subvar_2])] - return {'column': [el.id for el in _list]} - elif not (all(isinstance(el, ast.Str) for el in _list) or - all(isinstance(el, ast.Num) for el in _list)): + return {"column": [el.id for el in _list]} + elif not ( + all(isinstance(el, ast.Str) for el in _list) + or all(isinstance(el, ast.Num) for el in _list) + ): # Only list-of-int or list-of-str are currently supported - raise ValueError('Only list-of-int or list-of-str are currently supported') + raise ValueError( + "Only list-of-int or list-of-str are currently supported" + ) return { - 'value': [ - getattr(el, 's', None) or getattr(el, 'n') - for el in _list + "value": [ + getattr(el, "s", None) or getattr(el, "n") for el in _list ] } - elif isinstance(node, ast.Attribute) \ - and isinstance(parent, ast.Call): + elif isinstance(node, ast.Attribute) and isinstance(parent, ast.Call): # The variable. _id_node = fields[0][1] if not isinstance(_id_node, (ast.Name, ast.Subscript)): msg = ( 'calling methods of "{}" object not allowed, ' - 'variable name expected.' + "variable name expected." ).format(type(_id_node).__name__) raise SyntaxError(msg) @@ -287,8 +271,7 @@ def _parse(node, parent=None): if method not in CRUNCH_METHOD_MAP.keys(): raise ValueError( 'unknown method "{}", valid methods are: [{}]'.format( - method, - ', '.join(CRUNCH_METHOD_MAP.keys()) + method, ", ".join(CRUNCH_METHOD_MAP.keys()) ) ) @@ -298,7 +281,7 @@ def _parse(node, parent=None): # We will take the subvariable alias bit from the subscript # and return an object with the array and subvariable alias array_alias = dict(ast.iter_fields(fields[0][1]))["id"] - if PY311: + if PY311: name_node = dict(ast.iter_fields(fields[1][1])) subvariable_alias = name_node["id"] else: @@ -310,91 +293,104 @@ def _parse(node, parent=None): else: # For non-platonic expressions, keep track of both the array # and subvariable to make a proper url lookup. - return {"variable": {"array": array_alias, "subvariable": subvariable_alias}} + return { + "variable": { + "array": array_alias, + "subvariable": subvariable_alias, + } + } # "Non-terminal" nodes. else: for _name, _val in fields: if not isinstance(node, ast.UnaryOp) and ( - isinstance(_val, (ast.BoolOp, ast.UnaryOp, ast.Compare, ast.Call))): + isinstance( + _val, (ast.BoolOp, ast.UnaryOp, ast.Compare, ast.Call) + ) + ): # Descend. obj.update(_parse(_val, parent=node)) elif isinstance(_val, ast.And): - op = 'and' + op = "and" elif isinstance(_val, ast.Or): - op = 'or' + op = "or" elif isinstance(_val, ast.Not): - op = 'not' + op = "not" elif isinstance(_val, ast.Mult): - op = '*' + op = "*" elif isinstance(_val, ast.Add): - op = '+' + op = "+" elif isinstance(_val, ast.Div): - op = '/' + op = "/" elif isinstance(_val, ast.Sub): - op = '-' - elif _name == 'left': + op = "-" + elif _name == "left": left = _parse(_val, parent=node) args.append(left) - elif _name == 'right': + elif _name == "right": right = _parse(_val, parent=node) args.append(right) - elif _name == 'func' and isinstance(_val, ast.Attribute): + elif _name == "func" and isinstance(_val, ast.Attribute): # Method-like call. Example: # variable.any([1,2]) - func_type = 'method' - setattr(_val, '_func_type', func_type) + func_type = "method" + setattr(_val, "_func_type", func_type) left, op = _parse(_val, parent=node) args.append(left) - elif _name == 'func' and isinstance(_val, ast.Name): + elif _name == "func" and isinstance(_val, ast.Name): # Function call. Example: # valid(birthyear, birthmonth) - func_type = 'function' - setattr(_val, '_func_type', func_type) + func_type = "function" + setattr(_val, "_func_type", func_type) _id = _parse(_val, parent=node) if _id not in CRUNCH_FUNC_MAP.keys(): raise ValueError( 'unknown method "{}", valid methods are: [{}]'.format( - _id, - ', '.join(CRUNCH_METHOD_MAP.keys()) + _id, ", ".join(CRUNCH_METHOD_MAP.keys()) ) ) op = CRUNCH_FUNC_MAP[_id] - elif _name == 'ops': + elif _name == "ops": if len(_val) != 1: - raise ValueError('only one logical operator at a time') + raise ValueError("only one logical operator at a time") op = _parse(_val[0], parent=node) - elif _name == 'comparators' or _name == 'args': # right + elif _name == "comparators" or _name == "args": # right if len(_val) == 0: continue - if func_type == 'method': + if func_type == "method": if len(_val) > 1: - raise ValueError('1 argument expected, got {}'.format(len(_val))) + raise ValueError( + "1 argument expected, got {}".format(len(_val)) + ) - if op == 'duplicates': + if op == "duplicates": # No parameters allowed for 'duplicates'. - raise ValueError('No parameters allowed for "duplicates"') + raise ValueError( + 'No parameters allowed for "duplicates"' + ) for arg in _val: right = _parse(arg, parent=node) # For method calls, we only allow list-of-int # parameters. - if _name == 'args' and func_type == 'method': - right_val = right.get('value', right.get('column')) + if _name == "args" and func_type == "method": + right_val = right.get("value", right.get("column")) if not isinstance(right_val, list): raise ValueError( 'expected list, got "{}"'.format( - type(right.get('value')) + type(right.get("value")) ) ) args.append(right) - elif _name in ('keywords', 'starargs', 'kwargs') and _val: + elif _name in ("keywords", "starargs", "kwargs") and _val: # We don't support these in function/method calls. - raise ValueError('unsupported call with argument "{}"'.format(_name)) - elif _name == 'operand' and isinstance(node, ast.UnaryOp): + raise ValueError( + 'unsupported call with argument "{}"'.format(_name) + ) + elif _name == "operand" and isinstance(node, ast.UnaryOp): right = _parse(_val, parent=node) args.append(right) elif isinstance(_val, list): @@ -409,42 +405,32 @@ def _parse(node, parent=None): if op is NOT_IN: # Special treatment for the `not in` operator. obj = { - 'function': 'not', - 'args': [ - { - 'function': 'in', - 'args': [] - } - ] - } - elif op in CRUNCH_FUNC_MAP.values() \ - and isinstance(args, list) and len(args) > 1: - obj = { - 'function': 'or', - 'args': [] + "function": "not", + "args": [{"function": "in", "args": []}], } + elif ( + op in CRUNCH_FUNC_MAP.values() + and isinstance(args, list) + and len(args) > 1 + ): + obj = {"function": "or", "args": []} else: - obj = { - 'function': op, - 'args': [] - } + obj = {"function": op, "args": []} - if args and 'args' in obj: + if args and "args" in obj: if op is NOT_IN: # Special treatment for the args in a `not in` expr. - obj['args'][0]['args'] = args - elif op in CRUNCH_FUNC_MAP.values() \ - and isinstance(args, list) and len(args) > 1: + obj["args"][0]["args"] = args + elif ( + op in CRUNCH_FUNC_MAP.values() + and isinstance(args, list) + and len(args) > 1 + ): for arg in args: - obj['args'].append( - { - 'function': op, - 'args': [arg] - } - ) + obj["args"].append({"function": op, "args": [arg]}) # concatenate with or when there is more than # 2 arguments in the list - obj = _nest(obj['args'], op, concatenator='or') + obj = _nest(obj["args"], op, concatenator="or") else: obj = _nest(args, op) @@ -458,7 +444,7 @@ def _parse(node, parent=None): if expr is None: return dict() - return _parse(ast.parse(expr, mode='eval')) + return _parse(ast.parse(expr, mode="eval")) def get_dataset_variables(ds): @@ -469,40 +455,38 @@ def get_dataset_variables(ds): :param ds: Dataset() instance :return: Dictionary keyed by alias """ - table = ds.follow("table", urlencode({ - 'limit': 0 - })) + table = ds.follow("table", urlencode({"limit": 0})) # Build the variables dict, using `alias` as the key. variables = dict() for _id, var in table.metadata.items(): - var['id'] = _id - variables[var['alias']] = var + var["id"] = _id + variables[var["alias"]] = var - if var['type'] in ARRAY_TYPES: - subreferences = var.get('subreferences', {}) + if var["type"] in ARRAY_TYPES: + subreferences = var.get("subreferences", {}) for subvar_id, subvar in subreferences.items(): - subvar['is_subvar'] = True - subvar['id'] = subvar_id - subvar['parent_id'] = _id - subvar['type'] = 'categorical' - subvar['description'] = '' + subvar["is_subvar"] = True + subvar["id"] = subvar_id + subvar["parent_id"] = _id + subvar["type"] = "categorical" + subvar["description"] = "" if var.get("categories") is not None: # Numeric arrays do not have categories - subvar['categories'] = var.get("categories") + subvar["categories"] = var.get("categories") # TODO: This is a problem when subvariable codes are reused - variables[subvar['alias']] = subvar + variables[subvar["alias"]] = subvar # Poorman's square bracket lookup - variables["%s[%s]" % (var["alias"], subvar['alias'])] = subvar + variables["%s[%s]" % (var["alias"], subvar["alias"])] = subvar return variables def get_subvariables_resource(var_url, var_index): variable = var_index[var_url].entity - sub_variables = variable.subvariables['index'] - return {sv['alias'].strip('#'): sv['id'] for sv in sub_variables.values()} + sub_variables = variable.subvariables["index"] + return {sv["alias"].strip("#"): sv["id"] for sv in sub_variables.values()} def _get_categories_from_var_index(var_index, var_url): @@ -526,21 +510,22 @@ def adapt_multiple_response(var_url, values, var_index): # scenario var.any([subvar1, subvar2]) # in this scenario, we only want category ids that refers to `selected` categories column = [ - cat.get("id") for cat in _get_categories_from_var_index(var_index, var_url) if cat.get("selected") + cat.get("id") + for cat in _get_categories_from_var_index(var_index, var_url) + if cat.get("selected") ] variables = [var_id for alias, var_id in aliases.items() if alias in values] for variable_id in variables: variable_url = "{}subvariables/{}/".format(var_url, variable_id) - result.append({ - "variable": variable_url, - "column": column - }) + result.append({"variable": variable_url, "column": column}) return result, True -def _update_values_for_multiple_response(new_values, values, subitem, var_index, arrays): +def _update_values_for_multiple_response( + new_values, values, subitem, var_index, arrays +): """ - Multiple response does not need the `value` key, but it relies on the `column` key - Remove from `arrays` (subvariable list) the ones that should not be considered @@ -548,11 +533,11 @@ def _update_values_for_multiple_response(new_values, values, subitem, var_index, var_url = subitem.get("variable", "").split("subvariables")[0] column = new_values[0].get("column") value = values[0].get("value") - if var_url and var_index[var_url]['type'] == 'multiple_response': + if var_url and var_index[var_url]["type"] == "multiple_response": if column: - values[0]['column'] = column + values[0]["column"] = column elif value is not None: - values[0]['column'] = value + values[0]["column"] = value values[0].pop("value", None) arrays[0] = [new_value["variable"] for new_value in new_values] @@ -576,7 +561,7 @@ def ensure_category_ids(subitems, values, arrays, variables=variables): _subitems = [] def variable_id(variable_url): - return variable_url.split('/')[-2] + return variable_url.split("/")[-2] def category_ids(var_id, var_value, variables=variables): value = None @@ -589,11 +574,11 @@ def category_ids(var_id, var_value, variables=variables): value.append(val) continue for var in variables: - if variables[var]['id'] == var_id: - if 'categories' in variables[var]: - for cat in variables[var]['categories']: - if cat['name'] == val: - value.append(cat['id']) + if variables[var]["id"] == var_id: + if "categories" in variables[var]: + for cat in variables[var]["categories"]: + if cat["name"] == val: + value.append(cat["id"]) else: # variable has no categories, return original # list of values @@ -602,18 +587,24 @@ def category_ids(var_id, var_value, variables=variables): elif isinstance(var_value, str): for var in variables: # if the variable is a date, don't try to process it's categories - if variables[var]['type'] == 'datetime': + if variables[var]["type"] == "datetime": return var_value - if variables[var]['id'] == var_id and 'categories' in variables[var]: + if ( + variables[var]["id"] == var_id + and "categories" in variables[var] + ): found = False - for cat in variables[var]['categories']: - if cat['name'] == var_value: - value = cat['id'] + for cat in variables[var]["categories"]: + if cat["name"] == var_value: + value = cat["id"] found = True break if not found: - raise ValueError("Couldn't find a category id for category %s in filter for variable %s" % (var_value, var)) - elif 'categories' not in variables[var]: + raise ValueError( + "Couldn't find a category id for category %s in filter for variable %s" + % (var_value, var) + ) + elif "categories" not in variables[var]: return var_value else: @@ -623,20 +614,31 @@ def category_ids(var_id, var_value, variables=variables): # special case for multiple_response variables if len(subitems) == 2: _variable, _value = subitems - var_url = _variable.get('variable') + var_url = _variable.get("variable") _value_key = next(iter(_value)) - if _value_key in {'column', "value"} and var_url: - if var_url in var_index and var_index[var_url]['type'] == 'multiple_response': - result = adapt_multiple_response(var_url, _value[_value_key], var_index) + if _value_key in {"column", "value"} and var_url: + if ( + var_url in var_index + and var_index[var_url]["type"] == "multiple_response" + ): + result = adapt_multiple_response( + var_url, _value[_value_key], var_index + ) # handle the multiple response type - _update_values_for_multiple_response(result[0], values, subitems[0], var_index, arrays) + _update_values_for_multiple_response( + result[0], values, subitems[0], var_index, arrays + ) return result for item in subitems: - if isinstance(item, dict) and 'variable' in item and not isinstance(item["variable"], dict): - var_id = variable_id(item['variable']) - elif isinstance(item, dict) and 'value' in item: - item['value'] = category_ids(var_id, item['value']) + if ( + isinstance(item, dict) + and "variable" in item + and not isinstance(item["variable"], dict) + ): + var_id = variable_id(item["variable"]) + elif isinstance(item, dict) and "value" in item: + item["value"] = category_ids(var_id, item["value"]) _subitems.append(item) return _subitems, True @@ -650,10 +652,10 @@ def _process(obj, variables): # inspect function, then inspect variable, if multiple_response, # then change in --> any - if 'function' in obj and 'args' in obj: - if obj['function'] == 'in': - args = obj['args'] - if 'variable' in args[0]: + if "function" in obj and "args" in obj: + if obj["function"] == "in": + args = obj["args"] + if "variable" in args[0]: if isinstance(args[0], dict): # This is the case of a square bracket subvariable # array[subvar] in [values] @@ -663,10 +665,15 @@ def _process(obj, variables): pass else: try: - if variables.get(args[0]['variable'])['type'] == 'multiple_response': - obj['function'] = 'any' + if ( + variables.get(args[0]["variable"])["type"] + == "multiple_response" + ): + obj["function"] = "any" except TypeError: - raise ValueError("Invalid variable alias '%s'" % args[0]['variable']) + raise ValueError( + "Invalid variable alias '%s'" % args[0]["variable"] + ) for key, val in obj.items(): if isinstance(val, dict) and "array" not in val: @@ -678,30 +685,32 @@ def _process(obj, variables): for subitem in val: if isinstance(subitem, dict): subitem = _process(subitem, variables) - if 'subvariables' in subitem: - arrays.append(subitem.pop('subvariables')) - elif 'value' in subitem or 'column' in subitem: + if "subvariables" in subitem: + arrays.append(subitem.pop("subvariables")) + elif "value" in subitem or "column" in subitem: values.append(subitem) subitems.append(subitem) has_value = any( - 'value' in item for item in subitems if not is_number(item) + "value" in item for item in subitems if not is_number(item) ) if not has_value: # Since values can be see with `value` or `column` keys # check if `column` is there if not `value` - has_value = any('column' in item for item in subitems if not is_number(item)) + has_value = any( + "column" in item for item in subitems if not is_number(item) + ) has_variable = any( - 'variable' in item for item in subitems if not is_number(item) + "variable" in item for item in subitems if not is_number(item) ) if has_value and has_variable: subitems, needs_wrap = ensure_category_ids(subitems, values, arrays) obj[key] = subitems - elif key == 'variable': + elif key == "variable": if isinstance(val, dict) and "array" in val: # This is a subvariable reference with this shape: # {"variable": {"array": array_alias, "subvariable": subvariable_alias}` @@ -711,13 +720,22 @@ def _process(obj, variables): except KeyError: raise ValueError("Invalid variable alias '%s'" % array_alias) subreferences = array_value["subreferences"] - subvar_map = {sr["alias"]: sv_id for sv_id, sr in subreferences.items()} + subvar_map = { + sr["alias"]: sv_id for sv_id, sr in subreferences.items() + } array_id = array_value["id"] try: subvar_id = subvar_map[subvar_alias] except KeyError: - raise ValueError("Invalid subvariable `%s` for array '%s'" % (subvariables, array_alias)) - subvar_url = "%svariables/%s/subvariables/%s/" % (base_url, array_id, subvar_id) + raise ValueError( + "Invalid subvariable `%s` for array '%s'" + % (subvariables, array_alias) + ) + subvar_url = "%svariables/%s/subvariables/%s/" % ( + base_url, + array_id, + subvar_id, + ) obj[key] = subvar_url else: # Otherwise a regular variable references {"variable": alias} @@ -726,54 +744,57 @@ def _process(obj, variables): raise ValueError("Invalid variable alias '%s'" % val) # TODO: We shouldn't stitch URLs together, use the API - if var.get('is_subvar'): - obj[key] = '%svariables/%s/subvariables/%s/' \ - % (base_url, var['parent_id'], var['id']) + if var.get("is_subvar"): + obj[key] = "%svariables/%s/subvariables/%s/" % ( + base_url, + var["parent_id"], + var["id"], + ) else: - obj[key] = '%svariables/%s/' % (base_url, var['id']) + obj[key] = "%svariables/%s/" % (base_url, var["id"]) - if var['type'] in ARRAY_TYPES: + if var["type"] in ARRAY_TYPES: subvariables = [] - for subvar_id in var.get('subvariables', []): + for subvar_id in var.get("subvariables", []): # In case the subvar_id comes as a subvariable URL # we want to only consider the ID bit of the URL subvar_id = subvar_id.strip("/").split("/")[-1] subvariables.append( - '%svariables/%s/subvariables/%s/' - % (base_url, var['id'], subvar_id) + "%svariables/%s/subvariables/%s/" + % (base_url, var["id"], subvar_id) ) - elif key == 'function': + elif key == "function": op = val if subvariables: - obj['subvariables'] = subvariables + obj["subvariables"] = subvariables # support for categorical variables with `any` if not arrays and op == "any": obj["function"] = "in" - if arrays and op in ('any', 'all', 'is_valid', 'is_missing') and needs_wrap: + if arrays and op in ("any", "all", "is_valid", "is_missing") and needs_wrap: # Support for array variables. if len(arrays) != 1: raise ValueError - real_op = 'in' - expansion_op = 'or' - if op == 'all': - real_op = '==' - expansion_op = 'and' - elif op == 'is_valid': - real_op = 'all_valid' - elif op == 'is_missing': - real_op = 'is_missing' - - if op in ('is_valid', 'is_missing'): + real_op = "in" + expansion_op = "or" + if op == "all": + real_op = "==" + expansion_op = "and" + elif op == "is_valid": + real_op = "all_valid" + elif op == "is_missing": + real_op = "is_missing" + + if op in ("is_valid", "is_missing"): if len(values) != 0: raise ValueError # Just swap the op. Yep, that's it. - obj['function'] = real_op + obj["function"] = real_op else: if len(values) != 1: raise ValueError @@ -781,41 +802,30 @@ def _process(obj, variables): subvariables = arrays[0] value = values[0] - if op == 'all': + if op == "all": inner_value = value.get("value", value.get("column", [])) if len(inner_value) != 1: raise ValueError value.pop("column", None) - value['value'] = inner_value[0] + value["value"] = inner_value[0] if len(subvariables) == 1: - obj['function'] = real_op - obj["args"] = [ - {'variable': subvariables[0]}, - value - ] + obj["function"] = real_op + obj["args"] = [{"variable": subvariables[0]}, value] else: - obj = { - 'function': expansion_op, - 'args': [] - } - args_ref = obj['args'] + obj = {"function": expansion_op, "args": []} + args_ref = obj["args"] args_ref.extend( - [{ - 'function': real_op, - 'args': [ - {'variable': subvar}, - value - ] - } for subvar in subvariables] + [ + {"function": real_op, "args": [{"variable": subvar}, value]} + for subvar in subvariables + ] ) return obj if isinstance(obj, list): - return [ - _process(copy.deepcopy(element), variables) for element in obj - ] + return [_process(copy.deepcopy(element), variables) for element in obj] else: return _process(copy.deepcopy(obj), variables) @@ -849,8 +859,8 @@ def _resolve_variable(var): return var elif not isinstance(ds, scrunch.datasets.BaseDataset): raise Exception( - 'Valid Dataset instance is required to resolve variable urls ' - 'in the expression' + "Valid Dataset instance is required to resolve variable urls " + "in the expression" ) var_resource = ds.resource.session.get(var).payload var_alias = var_resource.body["alias"] @@ -859,11 +869,13 @@ def _resolve_variable(var): # subvariable by checking the adjacent resources linked. A subvariable # will point to its parent `/subvariables/` catalog and refer to its # array variable by `.fragments["variable"]`. - is_subvariable = 'parent' in var_resource.catalogs and 'variable' in var_resource.fragments + is_subvariable = ( + "parent" in var_resource.catalogs and "variable" in var_resource.fragments + ) if is_subvariable: # Fetch the array variable - array_url = var_resource.fragments['variable'] + array_url = var_resource.fragments["variable"] array_var = ds.resource.session.get(array_url).payload array_alias = array_var.body["alias"] var_alias = "%s[%s]" % (array_alias, var_alias) @@ -871,64 +883,65 @@ def _resolve_variable(var): return var_alias def _resolve_variables(_expr): - new_expr = dict( - function=_expr['function'], - args=[] - ) - for arg in _expr['args']: - if 'function' in arg: + new_expr = dict(function=_expr["function"], args=[]) + for arg in _expr["args"]: + if "function" in arg: # arg is a function, resolve inner variables - new_expr['args'].append(_resolve_variables(arg)) - elif 'variable' in arg: + new_expr["args"].append(_resolve_variables(arg)) + elif "variable" in arg: # arg is a variable, resolve - new_expr['args'].append( - {'variable': _resolve_variable(arg['variable'])} + new_expr["args"].append( + {"variable": _resolve_variable(arg["variable"])} ) else: # arg is neither a variable or function, pass as is - new_expr['args'].append(arg) + new_expr["args"].append(arg) return new_expr def _transform(f, args, nest=False): - result = '' + result = "" if f in operators: if len(args) == 1: - result = '%s %s' % (f, args[0]) + result = "%s %s" % (f, args[0]) else: - op = ' %s ' % f + op = " %s " % f result = op.join(str(x) for x in args) elif f in methods: - if f in ['selected', 'not_selected']: - result = '%s(%s)%s' % (methods[f], args[0], ', '.join(str(x) for x in args[1:])) - else: - result = '%s.%s(%s)' % ( - args[0], methods[f], ', '.join(str(x) for x in args[1:]) + if f in ["selected", "not_selected"]: + result = "%s(%s)%s" % ( + methods[f], + args[0], + ", ".join(str(x) for x in args[1:]), + ) + else: + result = "%s.%s(%s)" % ( + args[0], + methods[f], + ", ".join(str(x) for x in args[1:]), ) elif f in functions: - result = '%s(%s)' % (functions[f], args[0]) + result = "%s(%s)" % (functions[f], args[0]) else: raise Exception('Unknown function "%s"' % f) if nest: - result = '(%s)' % result + result = "(%s)" % result return result def _quote_value(v): # escape the quotes from the string, also escape the backslash - return "'{}'".format( - v.replace("\\", "\\\\").replace("\'", "\\\'") - ) + return "'{}'".format(v.replace("\\", "\\\\").replace("'", "\\'")) def _process(fragment, parent=None): - _func = fragment.get('function') + _func = fragment.get("function") if _func is None: # This is not a function, but a plain argument - if 'value' in fragment: + if "value" in fragment: # This argument is a value, not a variable - value = fragment['value'] + value = fragment["value"] if isinstance(value, six.string_types): # Must escape single-quote from string value @@ -945,16 +958,17 @@ def _process(fragment, parent=None): return list(fragment.values())[0] - args = [_process(arg, _func) for arg in fragment['args']] + args = [_process(arg, _func) for arg in fragment["args"]] child_functions = [ - arg.get('function') - for arg in fragment['args'] if arg.get('function') is not None + arg.get("function") + for arg in fragment["args"] + if arg.get("function") is not None ] - has_child_and_or = 'or' in child_functions + has_child_and_or = "or" in child_functions nest = parent is not None and ( - has_child_and_or or - (parent == 'or' and len(child_functions) > 1) or - _func == 'or' + has_child_and_or + or (parent == "or" and len(child_functions) > 1) + or _func == "or" ) return _transform(_func, args, nest=nest) diff --git a/scrunch/folders.py b/scrunch/folders.py index 360e23b..9095291 100644 --- a/scrunch/folders.py +++ b/scrunch/folders.py @@ -15,7 +15,7 @@ def __init__(self, folder_ent, root, parent): self.url = folder_ent.self def __repr__(self): - return '' % self.name + return "" % self.name def get(self, path): self.folder_ent.refresh() # Always up to date @@ -26,7 +26,7 @@ def get(self, path): try: node = node.get_child(p_name) except KeyError: - raise InvalidPathError('Subfolder not found %s' % p) + raise InvalidPathError("Subfolder not found {}".format(p_name)) return node def __getitem__(self, path): @@ -57,8 +57,8 @@ def items(self): return list(self.iteritems()) def get_child(self, name): - by_name = self.folder_ent.by('name') - by_alias = self.folder_ent.by('alias') + by_name = self.folder_ent.by("name") + by_alias = self.folder_ent.by("alias") # If found by alias, then it's a variable, return the variable if name in by_alias: @@ -66,13 +66,13 @@ def get_child(self, name): elif name in by_name: # Found by name, if it's not a folder, return the variable tup = by_name[name] - if tup.type != 'folder': + if tup.type != "folder": return self.root.dataset[name] return Folder(tup.entity, self.root, self) # Not a folder nor a variable path = self.path_pieces() + [name] - raise InvalidPathError('Invalid path: | %s' % ' | '.join(path)) + raise InvalidPathError("Invalid path: | %s" % " | ".join(path)) def path_pieces(self): if self.parent: @@ -81,12 +81,14 @@ def path_pieces(self): @property def path(self): - return '| ' + ' | '.join(self.path_pieces()) - - def create_folder(self, folder_name, position=None, after=None, before=None, alias=None): - new_ent = self.folder_ent.create(Catalog(self.folder_ent.session, body={ - 'name': folder_name - })) + return "| " + " | ".join(self.path_pieces()) + + def create_folder( + self, folder_name, position=None, after=None, before=None, alias=None + ): + new_ent = self.folder_ent.create( + Catalog(self.folder_ent.session, body={"name": folder_name}) + ) new_ent.refresh() subfolder = Folder(new_ent, self.root, self) if position is not None or after is not None or before is not None: @@ -131,12 +133,12 @@ def children(self): if item_url not in index: continue item_tup = index[item_url] - if item_tup['type'] == 'folder': + if item_tup["type"] == "folder": subfolder = Folder(item_tup.entity, self.root, self) _children.append(subfolder) else: # Add the variable - _children.append(ds[item_tup['alias']]) + _children.append(ds[item_tup["alias"]]) return _children def move_here(self, *children, **kwargs): @@ -144,20 +146,20 @@ def move_here(self, *children, **kwargs): return children = children[0] if isinstance(children[0], list) else children children = [ - self.root.dataset[c] if isinstance(c, string_types) else c - for c in children + self.root.dataset[c] if isinstance(c, string_types) else c for c in children ] index = {c.url: {} for c in children} - position, before, after = [kwargs.get('position'), - kwargs.get('before'), kwargs.get('after')] + position, before, after = [ + kwargs.get("position"), + kwargs.get("before"), + kwargs.get("after"), + ] if position is not None or after is not None or before is not None: children = self._position_items(children, position, before, after) graph = self.folder_ent.graph + [c.url for c in children] - self.folder_ent.patch({ - 'element': 'shoji:catalog', - 'index': index, - 'graph': graph - }) + self.folder_ent.patch( + {"element": "shoji:catalog", "index": index, "graph": graph} + ) self.folder_ent.refresh() def append(self, *children): @@ -166,7 +168,7 @@ def append(self, *children): def insert(self, *children, **kwargs): """Alias of move_here with a specific position""" - self.move_here(*children, position=kwargs.get('position', 0)) + self.move_here(*children, position=kwargs.get("position", 0)) def move(self, path, position=None, after=None, before=None): """ @@ -178,10 +180,7 @@ def move(self, path, position=None, after=None, before=None): target.move_here(self, position=position, after=after, before=before) def rename(self, new_name): - self.folder_ent.patch({ - 'element': 'shoji:catalog', - 'body': {'name': new_name} - }) + self.folder_ent.patch({"element": "shoji:catalog", "body": {"name": new_name}}) self.name = new_name def delete(self): @@ -189,15 +188,12 @@ def delete(self): def reorder(self, *items): items = items[0] if isinstance(items[0], list) else items - name2tup = self.folder_ent.by('name') + name2tup = self.folder_ent.by("name") graph = [ name2tup[c].entity_url if isinstance(c, string_types) else c.url for c in items ] - self.folder_ent.patch({ - 'element': 'shoji:catalog', - 'graph': graph - }) + self.folder_ent.patch({"element": "shoji:catalog", "graph": graph}) self.folder_ent.refresh() diff --git a/scrunch/helpers.py b/scrunch/helpers.py index e2e8041..dce5f36 100644 --- a/scrunch/helpers.py +++ b/scrunch/helpers.py @@ -15,8 +15,20 @@ DEFAULT_MULTIPLE_RESONSE_CATEGORIES = [ - {'id': SELECTED_ID, 'name': 'Selected', 'missing': False, 'numeric_value': None, 'selected': True}, - {'id': NOT_SELECTED_ID, 'name': 'Not selected', 'missing': False, 'numeric_value': None, 'selected': False}, + { + "id": SELECTED_ID, + "name": "Selected", + "missing": False, + "numeric_value": None, + "selected": True, + }, + { + "id": NOT_SELECTED_ID, + "name": "Not selected", + "missing": False, + "numeric_value": None, + "selected": False, + }, ] @@ -24,6 +36,7 @@ class ReadOnly(object): """ class for protecting undesired writes to attributes """ + def __init__(self, resource): # need to call parent to make sure we call other mixin's __init__ object.__setattr__(self, "resource", resource) @@ -31,16 +44,14 @@ def __init__(self, resource): def __setattr__(self, attr, value): if attr in self._IMMUTABLE_ATTRIBUTES: - raise AttributeError( - "Can't edit attibute '%s'" % attr) + raise AttributeError("Can't edit attibute '%s'" % attr) if attr in self._MUTABLE_ATTRIBUTES: - raise AttributeError('use the edit() method for ' - 'mutating attributes') + raise AttributeError("use the edit() method for " "mutating attributes") object.__setattr__(self, attr, value) def is_relative_url(url): - return url.startswith(('.', '/')) + return url.startswith((".", "/")) def abs_url(expr, base_url): @@ -53,7 +64,7 @@ def abs_url(expr, base_url): """ if isinstance(expr, dict): for k in expr: - if k == 'variable': + if k == "variable": if is_relative_url(expr[k]): expr[k] = urljoin(base_url, expr[k]) elif isinstance(expr[k], dict): @@ -66,19 +77,20 @@ def abs_url(expr, base_url): def subvar_alias(parent_alias, response_id): - return '%s_%d' % (parent_alias, response_id) + return "%s_%d" % (parent_alias, response_id) def download_file(url, filename): - if url.startswith('file://'): + if url.startswith("file://"): # Result is in local filesystem (for local development mostly) import shutil - shutil.copyfile(url.split('file://', 1)[1], filename) + + shutil.copyfile(url.split("file://", 1)[1], filename) else: r = requests.get(url, stream=True) - with open(filename, 'wb') as f: + with open(filename, "wb") as f: for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks + if chunk: # filter out keep-alive new chunks f.write(chunk) return filename @@ -93,9 +105,13 @@ def get_else_case(case, responses): ] build the case for else: 'not (age==30) and not (age==40)' """ - if case == 'else': - case = ' and '.join( - ['not({})'.format(_case['case']) for _case in responses if _case['case'] != 'else'] + if case == "else": + case = " and ".join( + [ + "not({})".format(_case["case"]) + for _case in responses + if _case["case"] != "else" + ] ) return case @@ -125,12 +141,16 @@ def else_case_not_selected(case, responses, missing_case): In this case, the else case needs to be manually built in the form: - not selected: '((q1 in [1]) or (q1 in [2]) and not (missing(screener4)))' """ - if case == 'else' and missing_case: - missing = ' or '.join( - ['({})'.format(_case['case']) for _case in responses if _case['case'] != 'else'] + if case == "else" and missing_case: + missing = " or ".join( + [ + "({})".format(_case["case"]) + for _case in responses + if _case["case"] != "else" + ] ) - missing = '({})'.format(missing) - missing += ' and not ({}) '.format(missing_case) + missing = "({})".format(missing) + missing += " and not ({}) ".format(missing_case) return missing return None @@ -153,17 +173,17 @@ def validate_categories(categories): {'id': 4, 'name': 'Missing', 'missing': True, 'numeric_value': None, 'selected': False} ] """ - defaults = {'missing': False, 'numeric_value': None, 'selected': False} + defaults = {"missing": False, "numeric_value": None, "selected": False} selected_count = 0 for category in categories: - if category.get('selected'): + if category.get("selected"): selected_count += 1 - if not category.get('id'): + if not category.get("id"): raise ValueError('An "id" must be provided to all categories') - if not category.get('name'): + if not category.get("name"): raise ValueError('A "name" must be provided to all categories') if selected_count > 1 or selected_count == 0: - raise ValueError('Categories must define one category as selected') + raise ValueError("Categories must define one category as selected") _categories = [] for category in categories: default = defaults.copy() @@ -178,23 +198,17 @@ def case_expr(rules, name, alias, categories=DEFAULT_MULTIPLE_RESONSE_CATEGORIES variable. """ expression = { - 'references': { - 'name': name, - 'alias': alias, - }, - 'function': 'case', - 'args': [{ - 'column': [category['id'] for category in categories], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': categories - } + "references": {"name": name, "alias": alias}, + "function": "case", + "args": [ + { + "column": [category["id"] for category in categories], + "type": {"value": {"class": "categorical", "categories": categories}}, } - }] + ], } for rule in rules: - expression['args'].append(rule) + expression["args"].append(rule) return expression @@ -204,9 +218,7 @@ def _validate_category_rules(categories, rules): """ if not ((len(categories) - 1) <= len(rules) <= len(categories)): - raise ValueError( - 'Amount of rules should match categories (or categories -1)' - ) + raise ValueError("Amount of rules should match categories (or categories -1)") def shoji_view_wrapper(value, **kwargs): @@ -214,10 +226,7 @@ def shoji_view_wrapper(value, **kwargs): receives a dictionary and wraps its content on a body keyed dictionary with the appropriate shoji: attribute """ - payload = { - 'element': 'shoji:view', - 'value': value - } + payload = {"element": "shoji:view", "value": value} payload.update(**kwargs) return payload @@ -227,10 +236,7 @@ def shoji_entity_wrapper(body, **kwargs): receives a dictionary and wraps its content on a body keyed dictionary with the appropriate shoji: attribute """ - payload = { - 'element': 'shoji:entity', - 'body': body - } + payload = {"element": "shoji:entity", "body": body} payload.update(**kwargs) return payload @@ -240,10 +246,7 @@ def shoji_catalog_wrapper(index, **kwargs): receives a dictionary and wraps its content on a body keyed dictionary with the appropriate shoji: attribute """ - payload = { - 'element': 'shoji:catalog', - 'index': index - } + payload = {"element": "shoji:catalog", "index": index} payload.update(**kwargs) return payload @@ -254,11 +257,7 @@ def valid_categorical_date(date_str): In order to keep dependencies reduced (no dateutil) and Python2x support, we will support a limited set of simple date formats. """ - valid_date_masks = [ - "%Y", - "%Y-%m", - "%Y-%m-%d", - ] + valid_date_masks = ["%Y", "%Y-%m", "%Y-%m-%d"] for mask in valid_date_masks: try: datetime.strptime(date_str, mask) diff --git a/scrunch/mutable_dataset.py b/scrunch/mutable_dataset.py index 69f9dca..ebe0a6b 100644 --- a/scrunch/mutable_dataset.py +++ b/scrunch/mutable_dataset.py @@ -2,8 +2,13 @@ from pycrunch.shoji import wait_progress from pycrunch.progress import DefaultProgressTracking -from scrunch.datasets import (LOG, BaseDataset, _get_connection, _get_dataset, - CATEGORICAL_TYPES) +from scrunch.datasets import ( + LOG, + BaseDataset, + _get_connection, + _get_dataset, + CATEGORICAL_TYPES, +) from scrunch.exceptions import InvalidDatasetTypeError from scrunch.expressions import parse_expr, process_expr from scrunch.helpers import shoji_entity_wrapper @@ -15,9 +20,12 @@ def get_mutable_dataset(dataset, connection=None, editor=False, project=None): """ shoji_ds, root = _get_dataset(dataset, connection, editor, project) # make sure the Dataset is of type streaming != "streaming" - if shoji_ds['body'].get('streaming') == 'streaming': - raise InvalidDatasetTypeError("Dataset %s is of type 'streaming',\ - use get_streaming_dataset method instead" % dataset) + if shoji_ds["body"].get("streaming") == "streaming": + raise InvalidDatasetTypeError( + "Dataset %s is of type 'streaming',\ + use get_streaming_dataset method instead" + % dataset + ) ds = MutableDataset(shoji_ds) if editor is True: authenticated_url = root.urls["user_url"] @@ -31,14 +39,12 @@ def create_dataset(name, variables, connection=None, **kwargs): if not connection: raise AttributeError( "Authenticate first with scrunch.connect() or by providing " - "config/environment variables") + "config/environment variables" + ) dataset_doc = { - 'name': name, - 'table': { - 'element': 'crunch:table', - 'metadata': variables - } + "name": name, + "table": {"element": "crunch:table", "metadata": variables}, } dataset_doc.update(**kwargs) @@ -58,8 +64,9 @@ def delete(self): """ self.resource.delete() - def join(self, left_var, right_ds, right_var, columns=None, - filter=None, timeout=30): + def join( + self, left_var, right_ds, right_var, columns=None, filter=None, timeout=30 + ): """ Joins a given variable. In crunch joins are left joins, where left is the dataset variable and right is other dataset variable. @@ -81,12 +88,12 @@ def join(self, left_var, right_ds, right_var, columns=None, left_var_url = self[left_var].url # this dictionary sets the main part of the join adapter = { - 'function': 'adapt', - 'args': [ - {'dataset': right_ds.url}, - {'variable': right_var_url}, - {'variable': left_var_url} - ] + "function": "adapt", + "args": [ + {"dataset": right_ds.url}, + {"variable": right_var_url}, + {"variable": left_var_url}, + ], } # wrap the adapter method on a shoji and body entity @@ -94,29 +101,32 @@ def join(self, left_var, right_ds, right_var, columns=None, if columns and isinstance(columns, list): # overwrite body to new format - payload['body'] = { - 'frame': adapter, - 'function': 'make_frame', - 'args': [ - {'map': {}} - ] + payload["body"] = { + "frame": adapter, + "function": "make_frame", + "args": [{"map": {}}], } # add the individual variable columns to the payload alias_list = right_ds.resource.variables.by("alias") var_urls = [alias_list[alias].entity_url for alias in columns] var_url_list = {var_url: {"variable": var_url} for var_url in var_urls} - payload['body']['args'][0]['map'] = var_url_list + payload["body"]["args"][0]["map"] = var_url_list if filter: # in the case of a filter, convert it to crunch # and attach the filter to the payload expr = process_expr(parse_expr(filter), right_ds) - payload['body']['filter'] = {'expression': expr} + payload["body"]["filter"] = {"expression": expr} progress = self.resource.variables.post(payload) # poll for progress to finish or return the url to progress progress_tracker = DefaultProgressTracking(timeout) - return wait_progress(r=progress, session=self.resource.session, progress_tracker=progress_tracker, entity=self) + return wait_progress( + r=progress, + session=self.resource.session, + progress_tracker=progress_tracker, + entity=self, + ) def compare_dataset(self, dataset, use_crunch=False): """ @@ -143,20 +153,17 @@ def compare_dataset(self, dataset, use_crunch=False): if use_crunch: resp = self.resource.batches.follow( - 'compare', 'dataset={}'.format(dataset.url)) + "compare", "dataset={}".format(dataset.url) + ) return resp diff = { - 'variables': { - 'by_type': [], - 'by_alias': [], - 'by_missing_rules': [], - }, - 'categories': {}, - 'subvariables': {} + "variables": {"by_type": [], "by_alias": [], "by_missing_rules": []}, + "categories": {}, + "subvariables": {}, } - array_types = ['multiple_response', 'categorical_array'] + array_types = ["multiple_response", "categorical_array"] vars_a = {v.alias: v.type for v in self.values()} vars_b = {v.alias: v.type for v in dataset.values()} @@ -165,10 +172,10 @@ def compare_dataset(self, dataset, use_crunch=False): common_aliases = frozenset(vars_a.keys()) & frozenset(vars_b.keys()) for alias in common_aliases: if vars_a[alias] != vars_b[alias]: - diff['variables']['by_type'].append(dataset[alias].name) + diff["variables"]["by_type"].append(dataset[alias].name) # 3. match variable alias and distcint categories names for same id's - if vars_b[alias] == 'categorical' and vars_a[alias] == 'categorical': + if vars_b[alias] == "categorical" and vars_a[alias] == "categorical": a_ids = frozenset([v.id for v in self[alias].categories.values()]) b_ids = frozenset([v.id for v in dataset[alias].categories.values()]) common_ids = a_ids & b_ids @@ -177,51 +184,62 @@ def compare_dataset(self, dataset, use_crunch=False): a_name = self[alias].categories[id].name b_name = dataset[alias].categories[id].name if a_name != b_name: - if diff['categories'].get(dataset[alias].name): - diff['categories'][dataset[alias].name].append(id) + if diff["categories"].get(dataset[alias].name): + diff["categories"][dataset[alias].name].append(id) else: - diff['categories'][dataset[alias].name] = [] - diff['categories'][dataset[alias].name].append(id) + diff["categories"][dataset[alias].name] = [] + diff["categories"][dataset[alias].name].append(id) # 2. match variables by names and compare aliases - common_names = frozenset(self.variable_names()) & frozenset(dataset.variable_names()) + common_names = frozenset(self.variable_names()) & frozenset( + dataset.variable_names() + ) for name in common_names: if self[name].alias != dataset[name].alias: - diff['variables']['by_alias'].append(name) + diff["variables"]["by_alias"].append(name) # 4. array types that match, subvars with same name and != alias - if dataset[name].type == self[name].type and \ - self[name].type in array_types and \ - self[name].type in array_types: - + if ( + dataset[name].type == self[name].type + and self[name].type in array_types + and self[name].type in array_types + ): a_names = frozenset(self[name].variable_names()) b_names = frozenset(dataset[name].variable_names()) common_subnames = a_names & b_names for sv_name in common_subnames: if self[name][sv_name].alias != dataset[name][sv_name].alias: - if diff['subvariables'].get(name): - diff['subvariables'][name].append(dataset[name][sv_name].alias) + if diff["subvariables"].get(name): + diff["subvariables"][name].append( + dataset[name][sv_name].alias + ) else: - diff['subvariables'][name] = [] - diff['subvariables'][name].append(dataset[name][sv_name].alias) + diff["subvariables"][name] = [] + diff["subvariables"][name].append( + dataset[name][sv_name].alias + ) # 6. missing rules mismatch - if self[name].type not in CATEGORICAL_TYPES and dataset[name].type not in CATEGORICAL_TYPES: + if ( + self[name].type not in CATEGORICAL_TYPES + and dataset[name].type not in CATEGORICAL_TYPES + ): if self[name].missing_rules != dataset[name].missing_rules: rules1 = self[name].missing_rules rules2 = dataset[name].missing_rules if len(rules1.keys()) == len(rules2.keys()): for key, value in rules1.items(): if key not in rules2 or rules2[key] != value: - diff['variables']['by_missing_rules'].append(name) + diff["variables"]["by_missing_rules"].append(name) else: - diff['variables']['by_missing_rules'].append(name) + diff["variables"]["by_missing_rules"].append(name) return diff - def append_dataset(self, dataset, filter=None, variables=None, - autorollback=True, delete_pk=True): - """ Append dataset into self. If this operation fails, the + def append_dataset( + self, dataset, filter=None, variables=None, autorollback=True, delete_pk=True + ): + """Append dataset into self. If this operation fails, the append is rolledback. Dataset variables and subvariables are matched on their aliases and categories are matched by name. @@ -237,33 +255,35 @@ def append_dataset(self, dataset, filter=None, variables=None, raise AttributeError("'variables' must be a list of variable names") if delete_pk: - LOG.info("Any pk's found will be deleted, to avoid these pass delete_pk=False") + LOG.info( + "Any pk's found will be deleted, to avoid these pass delete_pk=False" + ) self.resource.pk.delete() dataset.resource.pk.delete() - payload = shoji_entity_wrapper({'dataset': dataset.url}) - payload['autorollback'] = autorollback + payload = shoji_entity_wrapper({"dataset": dataset.url}) + payload["autorollback"] = autorollback if variables: # This contains a list of variable IDs, not URLs id_vars = [dataset[var].id for var in variables] # build the payload with selected variables - payload['body']['where'] = { - 'function': 'frame_subset', - "args": [ - {"frame": "primary"}, - {"value": id_vars}, - ], + payload["body"]["where"] = { + "function": "frame_subset", + "args": [{"frame": "primary"}, {"value": id_vars}], } if filter: # parse the filter expression - payload['body']['filter'] = process_expr(parse_expr(filter), dataset.resource) + payload["body"]["filter"] = process_expr( + parse_expr(filter), dataset.resource + ) return self.resource.batches.create(payload) def move_to_categorical_array( - self, name, alias, subvariables, description='', notes=''): + self, name, alias, subvariables, description="", notes="" + ): """ This is a dangerous method that allows moving variables (effectively translating them as variables in a dataset) as subvariables in the @@ -278,31 +298,32 @@ def move_to_categorical_array( :param: notes: Notes to attach to the new variable """ payload = { - 'name': name, - 'alias': alias, - 'description': description, - 'notes': notes, - 'type': 'categorical_array', - 'subvariables': [self[v].url for v in subvariables] + "name": name, + "alias": alias, + "description": description, + "notes": notes, + "type": "categorical_array", + "subvariables": [self[v].url for v in subvariables], } self.resource.variables.create(shoji_entity_wrapper(payload)) self._reload_variables() return self[alias] def move_to_multiple_response( - self, name, alias, subvariables, description='', notes=''): + self, name, alias, subvariables, description="", notes="" + ): """ This method is a replication of the method move_to_categorical_array, only this time we are creting a multiple_response variable. Note: the subvariables need to have at least 1 selected catagory. """ payload = { - 'name': name, - 'alias': alias, - 'description': description, - 'notes': notes, - 'type': 'multiple_response', - 'subvariables': [self[v].url for v in subvariables] + "name": name, + "alias": alias, + "description": description, + "notes": notes, + "type": "multiple_response", + "subvariables": [self[v].url for v in subvariables], } self.resource.variables.create(shoji_entity_wrapper(payload)) self._reload_variables() @@ -316,5 +337,7 @@ def move_as_subvariable(self, destination, source): :param: destination: The alias of the variable that will receive the subvariable :param: source: Alias of the variable to move into destination as subvariable """ - payload = json.dumps({"element": "shoji:catalog", "index": {self[source].url: {}}}) + payload = json.dumps( + {"element": "shoji:catalog", "index": {self[source].url: {}}} + ) self[destination].resource.subvariables.patch(payload) diff --git a/scrunch/order.py b/scrunch/order.py index f41ff9c..428004b 100644 --- a/scrunch/order.py +++ b/scrunch/order.py @@ -12,33 +12,32 @@ import pycrunch import scrunch.datasets -from scrunch.exceptions import (InvalidPathError, InvalidReferenceError, - OrderUpdateError) +from scrunch.exceptions import InvalidPathError, InvalidReferenceError, OrderUpdateError -NAME_REGEX = re.compile(r'^\|$|^\|?([\w\s,&\(\)\-\/\\]+\|?)+$', re.UNICODE) +NAME_REGEX = re.compile(r"^\|$|^\|?([\w\s,&\(\)\-\/\\]+\|?)+$", re.UNICODE) class Path(object): def __init__(self, path): if not isinstance(path, six.string_types): - raise TypeError('The path must be a string object') + raise TypeError("The path must be a string object") self.path = path @property def is_root(self): - return self.path == '|' + return self.path == "|" @property def is_absolute(self): - return self.path.startswith('|') + return self.path.startswith("|") @property def is_relative(self): return not self.is_absolute def get_parts(self): - return [part.strip() for part in self.path.split('|') if part] + return [part.strip() for part in self.path.split("|") if part] def __str__(self): return self.path @@ -48,7 +47,6 @@ def __repr__(self): class Group(object): - INDENT_SIZE = 4 def __init__(self, obj, order, parent=None): @@ -60,25 +58,25 @@ def __init__(self, obj, order, parent=None): # Load all the elements. for element in obj[self.name]: if isinstance(element, six.string_types): - _id = element.split('/')[-2] + _id = element.split("/")[-2] # NOTE: instantiating Variable/Dataset here seems overkill to # me. While its as simple as `Dataset(dataset.entity)` for the # `dataset` tuple below, for the Variable we would first need # to instantiate it's `Dataset`, going through all this order # machinery again... - if 'datasets' not in element or 'variables' in element: + if "datasets" not in element or "variables" in element: # 1. relative variable URL: ..// # 2. compl variable URL: /api/datasets//variables// var = self.order.vars.get(_id) if var: self.elements[var.alias] = var - elif 'datasets' in element and 'variables' not in element: + elif "datasets" in element and "variables" not in element: # 3. it's a dataset URL dataset = self.order.datasets.get(_id) if dataset: self.elements[dataset.id] = dataset else: - refresh_ds = self.order.catalog.refresh()['index'] + refresh_ds = self.order.catalog.refresh()["index"] for key, obj in refresh_ds.items(): if _id in key: self.elements[obj.id] = obj @@ -92,7 +90,7 @@ def __init__(self, obj, order, parent=None): elif isinstance(element, scrunch.datasets.BaseDataset): self.elements[element.id] = element else: - raise TypeError('Invalid OrderObject %s' % element) + raise TypeError("Invalid OrderObject %s" % element) def __str__(self): def _get_elements(group): @@ -101,8 +99,9 @@ def _get_elements(group): if isinstance(obj, Group): elements.append({key: _get_elements(obj)}) # TODO unreached code - elif isinstance(obj, (scrunch.datasets.Variable, - scrunch.datasets.BaseDataset)): + elif isinstance( + obj, (scrunch.datasets.Variable, scrunch.datasets.BaseDataset) + ): elements.append(obj.name) else: elements.append(obj.name) @@ -136,7 +135,7 @@ def items(self): def __getitem__(self, path): if not isinstance(path, six.string_types): - raise TypeError('arg 1 must be a string') + raise TypeError("arg 1 must be a string") path = Path(path) @@ -144,9 +143,7 @@ def __getitem__(self, path): return self if path.is_absolute and not self.is_root: - raise InvalidPathError( - 'Absolute paths can only be used on the root Group.' - ) + raise InvalidPathError("Absolute paths can only be used on the root Group.") group = self for part in path.get_parts(): @@ -154,15 +151,15 @@ def __getitem__(self, path): group = group.elements[part] except KeyError: raise InvalidPathError( - 'Invalid path %s: element %s does not exist.' % (path, part) + "Invalid path %s: element %s does not exist." % (path, part) ) except AttributeError: raise InvalidPathError( - 'Invalid path %s: element %s is not a Group.' % (path, part) + "Invalid path %s: element %s is not a Group." % (path, part) ) if not isinstance(group, Group): raise InvalidPathError( - 'Invalid path %s: element %s is not a Group.' % (path, part) + "Invalid path %s: element %s is not a Group." % (path, part) ) return group @@ -181,7 +178,7 @@ def __delitem__(self, item): @property def is_root(self): - return self.parent is None and self.name == '__root__' + return self.parent is None and self.name == "__root__" @staticmethod def _validate_alias_arg(alias): @@ -189,32 +186,24 @@ def _validate_alias_arg(alias): alias = [alias] if not isinstance(alias, Iterable): raise ValueError( - 'Invalid list of aliases/ids/groups to be inserted' - ' into the Group.' + "Invalid list of aliases/ids/groups to be inserted" " into the Group." ) if not all(isinstance(a, six.string_types) for a in alias): raise ValueError( - 'Only string references to aliases/ids/group names' - ' are allowed.' + "Only string references to aliases/ids/group names" " are allowed." ) return alias def _validate_name_arg(self, name): if not isinstance(name, six.string_types): - raise ValueError( - 'The name argument must be a string object.' - ) - if '|' in name: - raise ValueError( - 'The pipe (|) character is not allowed.' - ) + raise ValueError("The name argument must be a string object.") + if "|" in name: + raise ValueError("The pipe (|) character is not allowed.") if name in self.elements: - raise ValueError( - 'A variable/sub-group named \'%s\' already exists.' % name - ) + raise ValueError("A variable/sub-group named '%s' already exists." % name) if six.PY2: - regex_match = re.match(NAME_REGEX, name.decode('utf-8')) + regex_match = re.match(NAME_REGEX, name.decode("utf-8")) else: regex_match = re.match(NAME_REGEX, name) @@ -225,16 +214,14 @@ def _validate_name_arg(self, name): def _validate_reference_arg(self, reference): if not isinstance(reference, six.string_types): - raise TypeError('Invalid reference. It must be a string.') + raise TypeError("Invalid reference. It must be a string.") if reference not in self.elements: raise InvalidReferenceError( - 'Invalid reference %s: it is not part of the current Group.' - % reference + "Invalid reference %s: it is not part of the current Group." % reference ) return reference def find(self, name): - def _find(group): for _name, obj in group.elements.items(): if isinstance(obj, Group): @@ -247,7 +234,6 @@ def _find(group): return _find(self) def find_group(self, name): - def _find(group): if group.name == name: return group @@ -263,9 +249,9 @@ def insert(self, alias, position=0, before=None, after=None): elements = self._validate_alias_arg(alias) if not isinstance(position, int): - raise ValueError('Invalid position. It must be an integer.') + raise ValueError("Invalid position. It must be an integer.") if position < -1 or position > len(self.elements): - raise IndexError('Invalid position %d' % position) + raise IndexError("Invalid position %d" % position) if position == 0 and (before or after): reference = self._validate_reference_arg(before or after) i = 0 @@ -285,23 +271,29 @@ def insert(self, alias, position=0, before=None, after=None): elements_to_move = collections.OrderedDict() for element_name in elements: if element_name in self.elements: - elements_to_move[element_name] = \ - (self.elements[element_name], '__move__') + elements_to_move[element_name] = ( + self.elements[element_name], + "__move__", + ) else: current_group = self.order.group.find(element_name) if current_group: # A variable. - elements_to_move[element_name] = \ - (current_group, '__migrate_element__') + elements_to_move[element_name] = ( + current_group, + "__migrate_element__", + ) else: # Not a variable. A group, maybe? group_to_move = self.order.group.find_group(element_name) if group_to_move: - elements_to_move[element_name] = \ - (group_to_move, '__migrate_group__') + elements_to_move[element_name] = ( + group_to_move, + "__migrate_group__", + ) else: raise ValueError( - 'Invalid alias/id/group name \'%s\'' % element_name + "Invalid alias/id/group name '%s'" % element_name ) # Make all necessary changes to the order structure. @@ -321,14 +313,14 @@ def insert(self, alias, position=0, before=None, after=None): for element_name in elements_to_move.keys(): obj, operation = elements_to_move[element_name] - if operation == '__move__': + if operation == "__move__": _elements[element_name] = obj - elif operation == '__migrate_element__': + elif operation == "__migrate_element__": current_group = obj element = current_group.elements[element_name] del current_group.elements[element_name] _elements[element_name] = element - elif operation == '__migrate_group__': + elif operation == "__migrate_group__": group_to_move = obj orig_parent = group_to_move.parent group_to_move.parent = self @@ -350,9 +342,10 @@ def append(self, alias): def reorder(self, items): existing_items = [name for name in self.elements.keys()] - if len(items) != len(existing_items) or \ - not all(i in existing_items for i in items): - raise ValueError('Invalid list of items for the reorder operation.') + if len(items) != len(existing_items) or not all( + i in existing_items for i in items + ): + raise ValueError("Invalid list of items for the reorder operation.") if items == existing_items: # Nothing to do. @@ -365,13 +358,11 @@ def reorder(self, items): self.order.update() - def create_group(self, name, alias=None, position=-1, before=None, - after=None): + def create_group(self, name, alias=None, position=-1, before=None, after=None): name = self._validate_name_arg(name) # when we want to create an empty group if not alias: - self.elements[name] = Group( - {name: []}, order=self.order, parent=self) + self.elements[name] = Group({name: []}, order=self.order, parent=self) self.order.update() return elements = self._validate_alias_arg(alias) @@ -383,20 +374,17 @@ def create_group(self, name, alias=None, position=-1, before=None, # add the new Group to self.elements so that `insert` detects it self.elements[name] = new_group - self.insert(new_group.name, position=position, - before=before, after=after) + self.insert(new_group.name, position=position, before=before, after=after) def rename(self, name): name = self._validate_name_arg(name) if self.is_root: - raise ValueError( - 'Renaming the root Group is not allowed.' - ) + raise ValueError("Renaming the root Group is not allowed.") if name in self.parent.elements: raise ValueError( - 'Parent Group \'%s\' already contains an element named \'%s\'.' + "Parent Group '%s' already contains an element named '%s'." % (self.parent.name, name) ) @@ -422,17 +410,15 @@ def move(self, path, position=-1, before=None, after=None): path = Path(path) if not path.is_absolute: raise InvalidPathError( - 'Invalid path %s: only absolute paths are allowed.' % path + "Invalid path %s: only absolute paths are allowed." % path ) target_group = self.order[str(path)] if target_group == self: raise InvalidPathError( - 'Invalid path %s: cannot move Group into itself.' % path + "Invalid path %s: cannot move Group into itself." % path ) - target_group.insert( - self.name, position=position, before=before, after=after - ) + target_group.insert(self.name, position=position, before=before, after=after) class Order(object): @@ -460,7 +446,7 @@ def _load(self, refresh=True): if refresh: self.catalog.refresh() self.order.refresh() - self.group = Group({'__root__': self.order.graph}, order=self) + self.group = Group({"__root__": self.order.graph}, order=self) def place(self, entity, path, position=-1, before=None, after=None): """ @@ -470,7 +456,7 @@ def place(self, entity, path, position=-1, before=None, after=None): path = Path(path) if not path.is_absolute: raise InvalidPathError( - 'Invalid path %s: only absolute paths are allowed.' % path + "Invalid path %s: only absolute paths are allowed." % path ) target_group = self.group[str(path)] if isinstance(entity, scrunch.datasets.Variable): @@ -478,11 +464,9 @@ def place(self, entity, path, position=-1, before=None, after=None): elif isinstance(entity, scrunch.datasets.BaseDataset): element = entity.id else: - raise TypeError('entity must be a `Variable` or `Dataset`') + raise TypeError("entity must be a `Variable` or `Dataset`") - target_group.insert( - element, position=position, - before=before, after=after) + target_group.insert(element, position=position, before=before, after=after) def _prepare_shoji_graph(self): """ @@ -493,9 +477,7 @@ def _get(group): _elements = [] for obj in group.elements.values(): if isinstance(obj, Group): - _elements.append({ - obj.name: _get(obj) - }) + _elements.append({obj.name: _get(obj)}) else: url = obj.entity.self _elements.append(url) @@ -504,10 +486,7 @@ def _get(group): return _get(self.group) def update(self): - updated_order = { - 'element': 'shoji:order', - 'graph': self._prepare_shoji_graph() - } + updated_order = {"element": "shoji:order", "graph": self._prepare_shoji_graph()} try: # NOTE: Order has no Attribute edit self.order.put(updated_order) @@ -547,14 +526,12 @@ def __getitem__(self, item): class DatasetVariablesOrder(Order): - def _load(self, refresh=True): - self.vars = self.catalog.by('id') + self.vars = self.catalog.by("id") super(DatasetVariablesOrder, self)._load(refresh=refresh) class ProjectDatasetsOrder(Order): - def _load(self, refresh=False): - self.datasets = self.catalog.by('id') + self.datasets = self.catalog.by("id") super(ProjectDatasetsOrder, self)._load(refresh=refresh) diff --git a/scrunch/scripts.py b/scrunch/scripts.py index 412e2b4..daf1993 100644 --- a/scrunch/scripts.py +++ b/scrunch/scripts.py @@ -47,16 +47,19 @@ def get_default_syntax_flag(self, strict_subvariable_syntax): if strict_subvariable_syntax is not None: return strict_subvariable_syntax flags = self.resource.session.feature_flags - return flags.get("clients_strict_subvariable_syntax", DEFAULT_SUBVARIABLE_SYNTAX) + return flags.get( + "clients_strict_subvariable_syntax", DEFAULT_SUBVARIABLE_SYNTAX + ) def execute(self, script_body, strict_subvariable_syntax=None): pass class SystemScript(BaseScript): - def format_request_url(self, request_url, strict_subvariable_syntax=None): - strict_subvariable_syntax_flag = self.get_default_syntax_flag(strict_subvariable_syntax) + strict_subvariable_syntax_flag = self.get_default_syntax_flag( + strict_subvariable_syntax + ) if strict_subvariable_syntax_flag: request_url += "?strict_subvariable_syntax=true" return request_url @@ -71,7 +74,9 @@ def execute(self, script_body, strict_subvariable_syntax=None): # The script execution endpoint is a shoji:view payload = shoji_view_wrapper(script_body) try: - execute_url = self.format_request_url(self.resource.views['execute'], strict_subvariable_syntax) + execute_url = self.format_request_url( + self.resource.views["execute"], strict_subvariable_syntax + ) return self.resource.session.post(execute_url, json=payload) except pycrunch.ClientError as err: resolutions = err.args[2]["resolutions"] @@ -79,12 +84,13 @@ def execute(self, script_body, strict_subvariable_syntax=None): class DatasetScripts(BaseScript): - def execute(self, script_body, strict_subvariable_syntax=None, dry_run=False): - strict_subvariable_syntax = self.get_default_syntax_flag(strict_subvariable_syntax) + strict_subvariable_syntax = self.get_default_syntax_flag( + strict_subvariable_syntax + ) payload = { "body": script_body, - "strict_subvariable_syntax": strict_subvariable_syntax + "strict_subvariable_syntax": strict_subvariable_syntax, } if dry_run: diff --git a/scrunch/session.py b/scrunch/session.py index be8198e..de6e82a 100644 --- a/scrunch/session.py +++ b/scrunch/session.py @@ -16,6 +16,7 @@ class ScrunchSSLUnsafeSession(ScrunchSession): A subclass of `ScrunchSession` that skips SSL certificate validation when trying to connect to the API server. Useful for local testing. """ + def __init__(self, *args, **kwargs): super(ScrunchSSLUnsafeSession, self).__init__(*args, **kwargs) self.verify = False @@ -26,11 +27,12 @@ def __init__(self, *args, **kwargs): def set_feature_flags(site): feature_flags = { - flag_name: site.follow('feature_flag', - 'feature_name=%s' % flag_name).value['active'] + flag_name: site.follow("feature_flag", "feature_name=%s" % flag_name).value[ + "active" + ] for flag_name in FLAGS_TO_CHECK } - setattr(site.session, 'feature_flags', feature_flags) + setattr(site.session, "feature_flags", feature_flags) return site diff --git a/scrunch/streaming_dataset.py b/scrunch/streaming_dataset.py index c5d3c2d..f39f419 100644 --- a/scrunch/streaming_dataset.py +++ b/scrunch/streaming_dataset.py @@ -10,9 +10,12 @@ def get_streaming_dataset(dataset, connection=None, editor=False, project=None): """ shoji_ds, root = _get_dataset(dataset, connection, editor, project) # make sure the Dataset is of type streaming != "streaming" - if shoji_ds['body'].get('streaming') != 'streaming': - raise InvalidDatasetTypeError("Dataset %s is of type 'mutable',\ - use get_mutable_dataset method instead" % dataset) + if shoji_ds["body"].get("streaming") != "streaming": + raise InvalidDatasetTypeError( + "Dataset %s is of type 'mutable',\ + use get_mutable_dataset method instead" + % dataset + ) ds = StreamingDataset(shoji_ds) if editor is True: authenticated_url = root.urls["user_url"] @@ -37,8 +40,7 @@ def stream_rows(self, columns): importer = Importer() count = len(list(columns.values())[0]) for x in range(count): - importer.stream_rows(self.resource, - {a: columns[a][x] for a in columns}) + importer.stream_rows(self.resource, {a: columns[a][x] for a in columns}) return count def push_rows(self, count=None): @@ -49,7 +51,5 @@ def push_rows(self, count=None): """ if bool(self.resource.stream.body.pending_messages): self.resource.batches.create( - shoji_entity_wrapper({ - 'stream': count, - 'type': 'ldjson'} - )) + shoji_entity_wrapper({"stream": count, "type": "ldjson"}) + ) diff --git a/scrunch/subentity.py b/scrunch/subentity.py index ec20fb4..2bbdde4 100644 --- a/scrunch/subentity.py +++ b/scrunch/subentity.py @@ -15,6 +15,7 @@ class SubEntity: A pycrunch.shoji.Entity directly related to a Dataset. For example; filters, decks """ + _MUTABLE_ATTRIBUTES = set() _IMMUTABLE_ATTRIBUTES = set() _ENTITY_ATTRIBUTES = set() @@ -26,15 +27,15 @@ def __getattr__(self, item): if item in self._ENTITY_ATTRIBUTES: return self.resource.body[item] raise AttributeError( - '{} has no attribute {}'.format(self.__class__.__name__, item)) + "{} has no attribute {}".format(self.__class__.__name__, item) + ) def __repr__(self): if PY2: name = self.name.encode("ascii", "replace") else: name = self.name - return u"<{}: name='{}'; id='{}'>".format( - self.__class__.__name__, name, self.id) + return "<{}: name='{}'; id='{}'>".format(self.__class__.__name__, name, self.id) def __str__(self): return self.__repr__() @@ -43,7 +44,8 @@ def edit(self, **kwargs): for key in kwargs: if key not in self._MUTABLE_ATTRIBUTES: raise AttributeError( - "Can't edit attribute {} of {}".format(key, self.name)) + "Can't edit attribute {} of {}".format(key, self.name) + ) return self.resource.edit(**kwargs) def remove(self): @@ -60,8 +62,9 @@ class Filter(SubEntity): """ A pycrunch.shoji.Entity for Dataset filters """ - _MUTABLE_ATTRIBUTES = {'name', 'template', 'is_public', 'owner_id'} - _IMMUTABLE_ATTRIBUTES = {'id', } + + _MUTABLE_ATTRIBUTES = {"name", "template", "is_public", "owner_id"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES @@ -69,8 +72,9 @@ class Multitable(SubEntity): """ A pycrunch.shoji.Entity for Multitables """ - _MUTABLE_ATTRIBUTES = {'name', 'template', 'is_public'} - _IMMUTABLE_ATTRIBUTES = {'id', } + + _MUTABLE_ATTRIBUTES = {"name", "template", "is_public"} + _IMMUTABLE_ATTRIBUTES = {"id"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES def __init__(self, shoji_tuple, ds): @@ -84,8 +88,15 @@ def query_cube(self): """ raise NotImplementedError - def export_tabbook(self, format, progress_tracker=None, filter=None, - where=None, options=None, weight=False): + def export_tabbook( + self, + format, + progress_tracker=None, + filter=None, + where=None, + options=None, + weight=False, + ): """ An adaption of https://github.com/Crunch-io/pycrunch/blob/master/pycrunch/exporting.py to Multitables exports (tabbboks) @@ -95,12 +106,12 @@ def export_tabbook(self, format, progress_tracker=None, filter=None, # add filter to multitable if filter: if isinstance(filter, Filter): - payload['filter'] = [{'filter': filter.resource.self}] + payload["filter"] = [{"filter": filter.resource.self}] else: - raise ValueError('filter param must be a Filter instance') + raise ValueError("filter param must be a Filter instance") if options and isinstance(options, dict): - payload['options'] = options + payload["options"] = options if where: if isinstance(where, list): @@ -108,37 +119,32 @@ def export_tabbook(self, format, progress_tracker=None, filter=None, for var in where: id_vars.append(self.ds[var].url) # Now build the payload with selected variables - payload['where'] = { - 'function': 'make_frame', - 'args': [{ - 'map': { - x: {'variable': x} for x in id_vars - } - }] + payload["where"] = { + "function": "make_frame", + "args": [{"map": {x: {"variable": x} for x in id_vars}}], } else: - raise ValueError('where param must be a list of variable names') + raise ValueError("where param must be a list of variable names") if weight: - payload['weight'] = self.ds[weight].url + payload["weight"] = self.ds[weight].url if weight is None: - payload['weight'] = None + payload["weight"] = None session = self.resource.session - endpoint = self.resource.views['tabbook'] + endpoint = self.resource.views["tabbook"] # in case of json format, we need to return the json response - if format == 'json': + if format == "json": r = session.post( - endpoint, - json.dumps(payload), - headers={'Accept': 'application/json'}) + endpoint, json.dumps(payload), headers={"Accept": "application/json"} + ) else: r = session.post(endpoint, json.dumps(payload)) - dest_file = URL(r.headers['Location'], '') + dest_file = URL(r.headers["Location"], "") if r.status_code == 202: try: - r.payload['value'] + r.payload["value"] except Exception: # Not a progress API just return the incomplete entity. # User will refresh it. @@ -148,8 +154,16 @@ def export_tabbook(self, format, progress_tracker=None, filter=None, wait_progress(r, session, progress_tracker) return dest_file - def export(self, path, format='xlsx', timeout=None, filter=None, - where=None, options=None, **kwargs): + def export( + self, + path, + format="xlsx", + timeout=None, + filter=None, + where=None, + options=None, + **kwargs + ): """ A tabbook export: http://docs.crunch.io/#tab-books Exports data as csv to the given path or as a JSON response @@ -160,7 +174,7 @@ def export(self, path, format='xlsx', timeout=None, filter=None, :options: Display options as python dictionary :weight: Name of the weight_variable """ - if format not in ['xlsx', 'json']: + if format not in ["xlsx", "json"]: raise ValueError("Format can only be 'json' or 'xlxs'") progress_tracker = DefaultProgressTracking(timeout) tabbook_args = dict( @@ -170,10 +184,10 @@ def export(self, path, format='xlsx', timeout=None, filter=None, where=where, options=options, ) - if 'weight' in kwargs: - tabbook_args['weight'] = kwargs['weight'] + if "weight" in kwargs: + tabbook_args["weight"] = kwargs["weight"] else: - tabbook_args['weight'] = False + tabbook_args["weight"] = False url = self.export_tabbook(**tabbook_args) download_file(url, path) @@ -182,14 +196,15 @@ class Deck(SubEntity): """ A pycrunch.shoji.Entity for Dataset decks """ - _MUTABLE_ATTRIBUTES = {'name', 'description', 'is_public', - 'owner_id', 'owner_name'} - _IMMUTABLE_ATTRIBUTES = {'id', 'creation_time', 'slides'} + + _MUTABLE_ATTRIBUTES = {"name", "description", "is_public", "owner_id", "owner_name"} + _IMMUTABLE_ATTRIBUTES = {"id", "creation_time", "slides"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES def __repr__(self): return "<{}: id='{}'; name='{}'>".format( - self.__class__.__name__, self.id, self.name) + self.__class__.__name__, self.id, self.name + ) @property def slides(self): @@ -202,7 +217,7 @@ def slides(self): @slides.setter def slides(self, _): # Protect `slides` property from direct modifications. - raise TypeError('Use add_decks method to add a new deck') + raise TypeError("Use add_decks method to add a new deck") def xlsx_export(self): raise NotImplementedError @@ -212,27 +227,34 @@ class Slide(SubEntity): """ A pycrunch.shoji.Entity for a Slide """ - _MUTABLE_ATTRIBUTES = {'display_settings', 'analysis_url', 'title', - 'subtitle', 'deck_id'} - _IMMUTABLE_ATTRIBUTES = {'id', 'dataset_id', 'version', 'analyses'} + + _MUTABLE_ATTRIBUTES = { + "display_settings", + "analysis_url", + "title", + "subtitle", + "deck_id", + } + _IMMUTABLE_ATTRIBUTES = {"id", "dataset_id", "version", "analyses"} _ENTITY_ATTRIBUTES = _MUTABLE_ATTRIBUTES | _IMMUTABLE_ATTRIBUTES def __repr__(self): return "<{}: id='{}'; title='{}'>".format( - self.__class__.__name__, self.id, self.title) + self.__class__.__name__, self.id, self.title + ) @property def analyses(self): _analyses = {} for url, a in self.resource.analyses.index.items(): - id = url.split('/')[-2] + id = url.split("/")[-2] _analyses[id] = Analysis(a, id) return _analyses @analyses.setter def analyses(self, _): # Protect `analyses` property from direct modifications. - raise TypeError('Use add_decks method to add a new deck') + raise TypeError("Use add_decks method to add a new deck") class Analysis: @@ -249,8 +271,8 @@ def __getitem__(self, item): return self.resource.body[item] except AttributeError: raise AttributeError( - '{} has no attribute {}'.format( - self.__class__.__name__, item)) + "{} has no attribute {}".format(self.__class__.__name__, item) + ) def __repr__(self): return "<{}: id='{}'>".format(self.__class__.__name__, self.id) @@ -261,12 +283,11 @@ def query_cube(self, ds): out of the current instance GET a cube query :return: a shoji:view json instance """ - json_string = self['query'].json + json_string = self["query"].json # this process removes newlines dict_obj = json.loads(json_string) resp = ds.resource.follow( - 'cube', - urllib.parse.urlencode({'query': json.dumps(dict_obj)}) + "cube", urllib.parse.urlencode({"query": json.dumps(dict_obj)}) ) return resp.json diff --git a/scrunch/tests/conftest.py b/scrunch/tests/conftest.py index 70785b5..428a512 100644 --- a/scrunch/tests/conftest.py +++ b/scrunch/tests/conftest.py @@ -2,4 +2,6 @@ import pytest -mark_fail_py2 = pytest.mark.xfail(sys.version_info < (3, 0,), reason="py2 order in args causes tests failures") +mark_fail_py2 = pytest.mark.xfail( + sys.version_info < (3, 0), reason="py2 order in args causes tests failures" +) diff --git a/scrunch/tests/integration/scrunch_workflow_integration_test.py b/scrunch/tests/integration/scrunch_workflow_integration_test.py index ceb592f..f75c9d9 100755 --- a/scrunch/tests/integration/scrunch_workflow_integration_test.py +++ b/scrunch/tests/integration/scrunch_workflow_integration_test.py @@ -6,8 +6,12 @@ import isodate import pycrunch import pytest +from scrunch import connect +from scrunch.datasets import Variable, get_geodata +from scrunch.streaming_dataset import StreamingDataset + -pytest.mark.skip('skip test discovery on this module') +pytest.mark.skip("skip test discovery on this module") try: from pycrunch import pandaslib @@ -15,399 +19,588 @@ # pandas is not installed pandaslib = None -from scrunch import connect -from scrunch.datasets import Variable, get_geodata -from scrunch.streaming_dataset import StreamingDataset - - -CRUNCH_URL = os.environ.get('CRUNCH_TEST_URL') -CRUNCH_USER = os.environ.get('CRUNCH_TEST_USER') -CRUNCH_PASSWORD = os.environ.get('CRUNCH_TEST_PASSWORD') +CRUNCH_URL = os.environ.get("CRUNCH_TEST_URL") +CRUNCH_USER = os.environ.get("CRUNCH_TEST_USER") +CRUNCH_PASSWORD = os.environ.get("CRUNCH_TEST_PASSWORD") # Metadata. DATASET_DOC = { - 'body': { - 'name': 'scrunch test dataset', - 'description': 'scrunch integration tests', - 'table': { - 'element': 'crunch:table', - 'metadata': { - 'identity': { - 'alias': 'identity', - 'name': 'ID', - 'type': 'numeric' - }, - 'ip_address': { - 'alias': 'ip_address', - 'name': 'Public IP Address', - 'type': 'text' + "body": { + "name": "scrunch test dataset", + "description": "scrunch integration tests", + "table": { + "element": "crunch:table", + "metadata": { + "identity": {"alias": "identity", "name": "ID", "type": "numeric"}, + "ip_address": { + "alias": "ip_address", + "name": "Public IP Address", + "type": "text", }, - 'operating_system': { - 'alias': 'operating_system', - 'name': 'Operating System', - 'type': 'text' + "operating_system": { + "alias": "operating_system", + "name": "Operating System", + "type": "text", }, - 'registration_time': { - 'alias': 'registration_time', - 'name': 'Registration Time', - 'resolution': 'ms', - 'type': 'datetime' + "registration_time": { + "alias": "registration_time", + "name": "Registration Time", + "resolution": "ms", + "type": "datetime", }, - 'speak_spanish': { - 'alias': 'speak_spanish', - 'categories': [ + "speak_spanish": { + "alias": "speak_spanish", + "categories": [ { - 'id': 1, - 'missing': False, - 'name': 'I speak Spanish primarily', - 'numeric_value': 1 + "id": 1, + "missing": False, + "name": "I speak Spanish primarily", + "numeric_value": 1, }, { - 'id': 2, - 'missing': False, - 'name': 'I speak both Spanish and English equally', - 'numeric_value': 2 + "id": 2, + "missing": False, + "name": "I speak both Spanish and English equally", + "numeric_value": 2, }, { - 'id': 3, - 'missing': False, - 'name': 'I speak English primarily but can speak Spanish', - 'numeric_value': 3 + "id": 3, + "missing": False, + "name": "I speak English primarily but can speak Spanish", + "numeric_value": 3, }, { - 'id': 4, - 'missing': False, - 'name': 'I can not speak Spanish', - 'numeric_value': 4 + "id": 4, + "missing": False, + "name": "I can not speak Spanish", + "numeric_value": 4, }, { - 'id': 32766, - 'missing': True, - 'name': 'skipped', - 'numeric_value': None + "id": 32766, + "missing": True, + "name": "skipped", + "numeric_value": None, }, { - 'id': 32767, - 'missing': True, - 'name': 'not asked', - 'numeric_value': None + "id": 32767, + "missing": True, + "name": "not asked", + "numeric_value": None, }, { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None - } + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, + }, ], - 'name': 'Do you speak Spanish?', - 'type': 'categorical' + "name": "Do you speak Spanish?", + "type": "categorical", }, - 'hobbies': { - 'alias': 'hobbies', - 'categories': [ + "hobbies": { + "alias": "hobbies", + "categories": [ { - 'id': 1, - 'missing': False, - 'name': 'Very interested', - 'numeric_value': 1 + "id": 1, + "missing": False, + "name": "Very interested", + "numeric_value": 1, }, { - 'id': 2, - 'missing': False, - 'name': 'Somewhat interested', - 'numeric_value': 2 + "id": 2, + "missing": False, + "name": "Somewhat interested", + "numeric_value": 2, }, { - 'id': 3, - 'missing': False, - 'name': 'A little interested', - 'numeric_value': 3 + "id": 3, + "missing": False, + "name": "A little interested", + "numeric_value": 3, }, { - 'id': 4, - 'missing': False, - 'name': 'Not at all interested', - 'numeric_value': 4 + "id": 4, + "missing": False, + "name": "Not at all interested", + "numeric_value": 4, }, { - 'id': 32766, - 'missing': True, - 'name': 'skipped', - 'numeric_value': None + "id": 32766, + "missing": True, + "name": "skipped", + "numeric_value": None, }, { - 'id': 32767, - 'missing': True, - 'name': 'not asked', - 'numeric_value': None + "id": 32767, + "missing": True, + "name": "not asked", + "numeric_value": None, }, { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None - } + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, + }, + ], + "name": "Hobbies", + "subvariables": [ + {"alias": "hobbies_1", "name": "Sports"}, + {"alias": "hobbies_2", "name": "Video Games"}, + {"alias": "hobbies_3", "name": "Reading"}, + {"alias": "hobbies_4", "name": "Outdoor Activities"}, ], - 'name': 'Hobbies', - 'subvariables': [ + "type": "categorical_array", + }, + "music": { + "alias": "music", + "categories": [ + { + "id": 1, + "missing": False, + "name": "selected", + "numeric_value": 1, + "selected": True, + }, { - 'alias': 'hobbies_1', - 'name': 'Sports' + "id": 2, + "missing": False, + "name": "not selected", + "numeric_value": 2, + "selected": False, }, { - 'alias': 'hobbies_2', - 'name': 'Video Games' + "id": 32767, + "missing": True, + "name": "not asked", + "numeric_value": None, }, { - 'alias': 'hobbies_3', - 'name': 'Reading' + "id": 32766, + "missing": True, + "name": "skipped", + "numeric_value": None, }, { - 'alias': 'hobbies_4', - 'name': 'Outdoor Activities' - } + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, + }, ], - 'type': 'categorical_array' + "name": "Music", + "subvariables": [ + {"alias": "music_1", "name": "Pop"}, + {"alias": "music_2", "name": "Rock"}, + {"alias": "music_97", "name": "Other"}, + {"alias": "music_98", "name": "Don't know"}, + {"alias": "music_99", "name": "None of these"}, + ], + "type": "multiple_response", }, - 'music': { - 'alias': 'music', - 'categories': [ + "religion": { + "alias": "religion", + "categories": [ { - 'id': 1, - 'missing': False, - 'name': 'selected', - 'numeric_value': 1, - 'selected': True + "id": 1, + "missing": False, + "name": "Protestant", + "numeric_value": 1, }, { - 'id': 2, - 'missing': False, - 'name': 'not selected', - 'numeric_value': 2, - 'selected': False + "id": 2, + "missing": False, + "name": "Catholic", + "numeric_value": 2, }, { - 'id': 32767, - 'missing': True, - 'name': 'not asked', - 'numeric_value': None + "id": 3, + "missing": False, + "name": "Jewish", + "numeric_value": 3, }, { - 'id': 32766, - 'missing': True, - 'name': 'skipped', - 'numeric_value': None + "id": 4, + "missing": False, + "name": "Muslim", + "numeric_value": 4, }, + {"id": 5, "missing": False, "name": "None", "numeric_value": 5}, { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None - } - ], - 'name': 'Music', - 'subvariables': [ - { - 'alias': 'music_1', - 'name': 'Pop' + "id": 6, + "missing": False, + "name": "Other", + "numeric_value": 6, }, { - 'alias': 'music_2', - 'name': 'Rock' + "id": 32766, + "missing": True, + "name": "skipped", + "numeric_value": None, }, { - 'alias': 'music_97', - 'name': 'Other' + "id": 32767, + "missing": True, + "name": "not asked", + "numeric_value": None, }, { - 'alias': 'music_98', - 'name': 'Don\'t know' + "id": -1, + "missing": True, + "name": "No Data", + "numeric_value": None, }, - { - 'alias': 'music_99', - 'name': 'None of these' - } ], - 'type': 'multiple_response' + "name": "What is your religious preference?", + "type": "categorical", }, - 'religion': { - 'alias': 'religion', - 'categories': [ + "location": { + "alias": "location", + "categories": [ { - 'id': 1, - 'missing': False, - 'name': 'Protestant', - 'numeric_value': 1 + "id": 1, + "missing": False, + "name": "Protestant", + "numeric_value": 1, }, { - 'id': 2, - 'missing': False, - 'name': 'Catholic', - 'numeric_value': 2 + "id": 2, + "missing": False, + "name": "North East", + "numeric_value": 2, }, { - 'id': 3, - 'missing': False, - 'name': 'Jewish', - 'numeric_value': 3 + "id": 3, + "missing": False, + "name": "North West", + "numeric_value": 3, }, { - 'id': 4, - 'missing': False, - 'name': 'Muslim', - 'numeric_value': 4 + "id": 4, + "missing": False, + "name": "Yorkshire and the Humber", + "numeric_value": 4, }, { - 'id': 5, - 'missing': False, - 'name': 'None', - 'numeric_value': 5 + "id": 5, + "missing": False, + "name": "East Midlands", + "numeric_value": 5, }, { - 'id': 6, - 'missing': False, - 'name': 'Other', - 'numeric_value': 6 + "id": 6, + "missing": False, + "name": "West Midlands", + "numeric_value": 6, }, { - 'id': 32766, - 'missing': True, - 'name': 'skipped', - 'numeric_value': None + "id": 7, + "missing": False, + "name": "East of England", + "numeric_value": 7, }, { - 'id': 32767, - 'missing': True, - 'name': 'not asked', - 'numeric_value': None + "id": 8, + "missing": False, + "name": "London", + "numeric_value": 8, }, { - 'id': -1, - 'missing': True, - 'name': 'No Data', - 'numeric_value': None - } - ], - 'name': 'What is your religious preference?', - 'type': 'categorical' - }, - 'location': { - 'alias': 'location', - 'categories': [ + "id": 9, + "missing": False, + "name": "South East", + "numeric_value": 9, + }, + { + "id": 10, + "missing": False, + "name": "South West", + "numeric_value": 10, + }, { - 'id': 1, - 'missing': False, - 'name': 'Protestant', - 'numeric_value': 1 - }, { - 'id': 2, - 'missing': False, - 'name': 'North East', - 'numeric_value': 2 - }, { - 'id': 3, - 'missing': False, - 'name': 'North West', - 'numeric_value': 3 - }, { - 'id': 4, - 'missing': False, - 'name': 'Yorkshire and the Humber', - 'numeric_value': 4 - }, { - 'id': 5, - 'missing': False, - 'name': 'East Midlands', - 'numeric_value': 5 - }, { - 'id': 6, - 'missing': False, - 'name': 'West Midlands', - 'numeric_value': 6 - }, { - 'id': 7, - 'missing': False, - 'name': 'East of England', - 'numeric_value': 7 - }, { - 'id': 8, - 'missing': False, - 'name': 'London', - 'numeric_value': 8 - }, { - 'id': 9, - 'missing': False, - 'name': 'South East', - 'numeric_value': 9 - }, { - 'id': 10, - 'missing': False, - 'name': 'South West', - 'numeric_value': 10 - }, { - 'id': 11, - 'missing': False, - 'name': 'Wales', - 'numeric_value': 11 - }, { - 'id': 12, - 'missing': False, - 'name': 'Scotland', - 'numeric_value': 12 - }, { - 'id': 13, - 'missing': False, - 'name': 'Northern Ireland', - 'numeric_value': 13 + "id": 11, + "missing": False, + "name": "Wales", + "numeric_value": 11, + }, + { + "id": 12, + "missing": False, + "name": "Scotland", + "numeric_value": 12, + }, + { + "id": 13, + "missing": False, + "name": "Northern Ireland", + "numeric_value": 13, }, ], - 'name': 'Where do you live?', - 'type': 'categorical' - } + "name": "Where do you live?", + "type": "categorical", + }, }, - 'order': [ + "order": [ { - 'entities': [ - 'identity', - 'ip_address', - 'operating_system', - 'registration_time', - 'speak_spanish', - 'hobbies', - 'music', - 'religion', - 'location' + "entities": [ + "identity", + "ip_address", + "operating_system", + "registration_time", + "speak_spanish", + "hobbies", + "music", + "religion", + "location", ], - 'group': 'ungrouped' + "group": "ungrouped", } - ] - } + ], + }, } } # Data ROWS = [ - ['identity', 'ip_address', 'operating_system', 'registration_time', 'speak_spanish', 'hobbies_1', 'hobbies_2', 'hobbies_3', 'hobbies_4', 'music_1', 'music_2', 'music_97', 'music_98', 'music_99', 'religion', 'location'], - [1, '10.0.0.1', 'Linux', '2014-04-21T10:00:00+00:00', 1, 32767, 32767, 32767, 32767, 2, 2, 1, 2, 2, 1, 1], - [2, '10.0.0.2', 'Solaris', '2014-05-10T00:00:00+00:00', 1, 32766, 1, 1, 4, 1, 1, 1, 2, 2, 2, 2], - [3, '10.0.0.3', 'Linux', '2015-01-01T00:00:00+00:00', 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 3, 3], - [4, '10.0.0.4', 'Windows', '2015-01-02T00:00:00+00:00', 3, 4, 3, 2, 1, 1, 2, 1, 2, 2, 4, 4], - [5, '10.0.0.5', 'Windows', '2015-02-01T00:00:00+00:00', 1, 1, 2, 32766, 4, 1, 1, 1, 2, 2, 5, 5], - [6, '10.0.0.6', 'MacOS', '2015-06-01T00:00:00+00:00', 4, 2, 4, 4, 1, 2, 2, 1, 2, 2, 6, 6], - [7, '10.0.0.7', 'Windows', '2015-12-30T00:00:00+00:00', 32766, 1, 32766, 4, 3, 2, 2, 2, 1, 2, 32766, 7], - [8, '10.0.0.8', 'Minix', '2016-01-01T00:00:00+00:00', 32766, 2, 1, 1, 2, 2, 2, 2, 1, 2, 32767, 8], - [9, '10.0.0.9', 'FreeBSD', '2016-02-01T00:00:00+00:00', 32767, 1, 1, 1, 32766, 1, 2, 1, 2, 2, 1, 9], - [10, '10.0.0.10', 'NetBSD', '2015-03-01T00:00:00+00:00', 2, 4, 3, 4, 1, 2, 2, 1, 2, 2, 2, 10], - [11, '10.0.0.10', 'NetBSD', '2015-03-01T00:01:00+00:00', 2, 4, 3, 4, 1, 1, 1, 1, 1, 1, 3, 11], - [12, '10.0.0.10', 'NetBSD', '2015-03-01T00:02:00+00:00', 2, 4, 3, 4, 1, 2, 2, 2, 2, 2, 4, 12], + [ + "identity", + "ip_address", + "operating_system", + "registration_time", + "speak_spanish", + "hobbies_1", + "hobbies_2", + "hobbies_3", + "hobbies_4", + "music_1", + "music_2", + "music_97", + "music_98", + "music_99", + "religion", + "location", + ], + [ + 1, + "10.0.0.1", + "Linux", + "2014-04-21T10:00:00+00:00", + 1, + 32767, + 32767, + 32767, + 32767, + 2, + 2, + 1, + 2, + 2, + 1, + 1, + ], + [ + 2, + "10.0.0.2", + "Solaris", + "2014-05-10T00:00:00+00:00", + 1, + 32766, + 1, + 1, + 4, + 1, + 1, + 1, + 2, + 2, + 2, + 2, + ], + [ + 3, + "10.0.0.3", + "Linux", + "2015-01-01T00:00:00+00:00", + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 2, + 2, + 1, + 3, + 3, + ], + [ + 4, + "10.0.0.4", + "Windows", + "2015-01-02T00:00:00+00:00", + 3, + 4, + 3, + 2, + 1, + 1, + 2, + 1, + 2, + 2, + 4, + 4, + ], + [ + 5, + "10.0.0.5", + "Windows", + "2015-02-01T00:00:00+00:00", + 1, + 1, + 2, + 32766, + 4, + 1, + 1, + 1, + 2, + 2, + 5, + 5, + ], + [ + 6, + "10.0.0.6", + "MacOS", + "2015-06-01T00:00:00+00:00", + 4, + 2, + 4, + 4, + 1, + 2, + 2, + 1, + 2, + 2, + 6, + 6, + ], + [ + 7, + "10.0.0.7", + "Windows", + "2015-12-30T00:00:00+00:00", + 32766, + 1, + 32766, + 4, + 3, + 2, + 2, + 2, + 1, + 2, + 32766, + 7, + ], + [ + 8, + "10.0.0.8", + "Minix", + "2016-01-01T00:00:00+00:00", + 32766, + 2, + 1, + 1, + 2, + 2, + 2, + 2, + 1, + 2, + 32767, + 8, + ], + [ + 9, + "10.0.0.9", + "FreeBSD", + "2016-02-01T00:00:00+00:00", + 32767, + 1, + 1, + 1, + 32766, + 1, + 2, + 1, + 2, + 2, + 1, + 9, + ], + [ + 10, + "10.0.0.10", + "NetBSD", + "2015-03-01T00:00:00+00:00", + 2, + 4, + 3, + 4, + 1, + 2, + 2, + 1, + 2, + 2, + 2, + 10, + ], + [ + 11, + "10.0.0.10", + "NetBSD", + "2015-03-01T00:01:00+00:00", + 2, + 4, + 3, + 4, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 11, + ], + [ + 12, + "10.0.0.10", + "NetBSD", + "2015-03-01T00:02:00+00:00", + 2, + 4, + 3, + 4, + 1, + 2, + 2, + 2, + 2, + 2, + 4, + 12, + ], ] def invalid_credentials(): - return any( - item is None - for item in (CRUNCH_URL, CRUNCH_USER, CRUNCH_PASSWORD) - ) + return any(item is None for item in (CRUNCH_URL, CRUNCH_USER, CRUNCH_PASSWORD)) def isnan(obj): @@ -418,7 +611,7 @@ def isnan(obj): def main(): assert not invalid_credentials() - assert pandaslib, 'Pandas library not installed' + assert pandaslib, "Pandas library not installed" # Login. site = connect(CRUNCH_USER, CRUNCH_PASSWORD, CRUNCH_URL) @@ -443,423 +636,452 @@ def main(): assert dataset.size.unfiltered_rows == len(df) # Also check number of columns - columns = DATASET_DOC['body']['table']['metadata'].__len__() + columns = DATASET_DOC["body"]["table"]["metadata"].__len__() assert dataset.size.columns == columns # 0. Manipulate metadata # 0.1 Start by updating the missing rules for the `identity` variable - identity_missing_rules = { - "not asked": 9999, - "skipped": 9998 - } + identity_missing_rules = {"not asked": 9999, "skipped": 9998} - assert dataset['identity'].missing_rules == {} - dataset['identity'].set_missing_rules(identity_missing_rules) - assert dataset['identity'].missing_rules == identity_missing_rules + assert dataset["identity"].missing_rules == {} + dataset["identity"].set_missing_rules(identity_missing_rules) + assert dataset["identity"].missing_rules == identity_missing_rules # 0.2 Try setting and unsetting the geodata view - location = dataset['location'] - geodata = get_geodata('UK Regions') - assert 'geodata' not in location.view + location = dataset["location"] + geodata = get_geodata("UK Regions") + assert "geodata" not in location.view # Set geodata using Entity object - location.set_geodata_view(geodata, feature_key='EER13NM') - assert 'geodata' in location.view + location.set_geodata_view(geodata, feature_key="EER13NM") + assert "geodata" in location.view location.unset_geodata_view() - assert 'geodata' not in location.view + assert "geodata" not in location.view # Set geodata using url - location.set_geodata_view(geodata.self, feature_key='EER13NM') - assert 'geodata' in location.view + location.set_geodata_view(geodata.self, feature_key="EER13NM") + assert "geodata" in location.view location.unset_geodata_view() - assert 'geodata' not in location.view + assert "geodata" not in location.view # Set geodata using name - location.set_geodata_view('UK Regions', feature_key='EER13NM') - assert 'geodata' in location.view + location.set_geodata_view("UK Regions", feature_key="EER13NM") + assert "geodata" in location.view location.unset_geodata_view() - assert 'geodata' not in location.view + assert "geodata" not in location.view # 1. Exclusion Filter Integration Tests # 1.1 Set a simple exclusion filter. - dataset.exclude('identity > 5') + dataset.exclude("identity > 5") df = pandaslib.dataframe(dataset.resource) assert len(df) == 5 - assert not any(r['identity'] > 5 for _, r in df.iterrows()) + assert not any(r["identity"] > 5 for _, r in df.iterrows()) # 1.2 More complex exclusion filters involving a categorical variable. - expr = 'speak_spanish in [32766]' + expr = "speak_spanish in [32766]" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) - valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] != 32766 - ] + valid_ids = [row[0] for row in ROWS if row[0] != "identity" and row[4] != 32766] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids + assert row["identity"] in valid_ids - expr = 'speak_spanish in (32766, 32767)' + expr = "speak_spanish in (32766, 32767)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] not in (32766, 32767) + row[0] + for row in ROWS + if row[0] != "identity" and row[4] not in (32766, 32767) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert not isnan(row['speak_spanish']) + assert row["identity"] in valid_ids + assert not isnan(row["speak_spanish"]) expr = 'not (speak_spanish in (1, 2) and operating_system == "Linux")' dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] in (1, 2) and row[2] == 'Linux' + row[0] + for row in ROWS + if row[0] != "identity" and row[4] in (1, 2) and row[2] == "Linux" ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['speak_spanish'] in \ - ('I speak Spanish primarily', - 'I speak both Spanish and English equally') - assert row['operating_system'] == 'Linux' + assert row["identity"] in valid_ids + assert row["speak_spanish"] in ( + "I speak Spanish primarily", + "I speak both Spanish and English equally", + ) + assert row["operating_system"] == "Linux" # 1.3 Exclusion filters with `any`. - expr = 'hobbies.any([32766])' + expr = "hobbies.any([32766])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and 32766 not in row[5:9] + row[0] for row in ROWS if row[0] != "identity" and 32766 not in row[5:9] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} not in row['hobbies'] + assert row["identity"] in valid_ids + assert {"?": 32766} not in row["hobbies"] - expr = 'not hobbies.any([32766])' + expr = "not hobbies.any([32766])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and 32766 in row[5:9] + row[0] for row in ROWS if row[0] != "identity" and 32766 in row[5:9] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} in row['hobbies'] + assert row["identity"] in valid_ids + assert {"?": 32766} in row["hobbies"] - expr = 'hobbies.any([32766, 32767])' + expr = "hobbies.any([32766, 32767])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' - and 32766 not in row[5:9] and 32767 not in row[5:9] + row[0] + for row in ROWS + if row[0] != "identity" and 32766 not in row[5:9] and 32767 not in row[5:9] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} not in row['hobbies'] and \ - {'?': 32767} not in row['hobbies'] + assert row["identity"] in valid_ids + assert {"?": 32766} not in row["hobbies"] and {"?": 32767} not in row[ + "hobbies" + ] - expr = 'music.any([32766])' + expr = "music.any([32766])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and 32766 not in row[9:14] + row[0] for row in ROWS if row[0] != "identity" and 32766 not in row[9:14] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} not in row['music'] + assert row["identity"] in valid_ids + assert {"?": 32766} not in row["music"] - expr = 'music.any([1])' + expr = "music.any([1])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and 1 not in row[9:14] + row[0] for row in ROWS if row[0] != "identity" and 1 not in row[9:14] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert 1 not in row['music'] + assert row["identity"] in valid_ids + assert 1 not in row["music"] - expr = 'music.any([1, 2])' + expr = "music.any([1, 2])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' - and 1 not in row[9:14] and 2 not in row[9:14] + row[0] + for row in ROWS + if row[0] != "identity" and 1 not in row[9:14] and 2 not in row[9:14] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert 1 not in row['music'] and 2 not in row['music'] + assert row["identity"] in valid_ids + assert 1 not in row["music"] and 2 not in row["music"] # 1.4 Exclusion filters with `all`. - expr = 'hobbies.all([32767])' + expr = "hobbies.all([32767])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[5:9] != [32767, 32767, 32767, 32767] + row[0] + for row in ROWS + if row[0] != "identity" and row[5:9] != [32767, 32767, 32767, 32767] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['hobbies'] != [{'?': 32767}, {'?': 32767}, - {'?': 32767}, {'?': 32767}] + assert row["identity"] in valid_ids + assert row["hobbies"] != [ + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + ] - expr = 'not hobbies.all([32767])' + expr = "not hobbies.all([32767])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[5:9] == [32767, 32767, 32767, 32767] + row[0] + for row in ROWS + if row[0] != "identity" and row[5:9] == [32767, 32767, 32767, 32767] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['hobbies'] == [{'?': 32767}, {'?': 32767}, - {'?': 32767}, {'?': 32767}] + assert row["identity"] in valid_ids + assert row["hobbies"] == [ + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + ] - expr = 'music.all([1])' + expr = "music.all([1])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[9:14] != [1, 1, 1, 1, 1] + row[0] + for row in ROWS + if row[0] != "identity" and row[9:14] != [1, 1, 1, 1, 1] ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['music'] != [1, 1, 1, 1, 1] + assert row["identity"] in valid_ids + assert row["music"] != [1, 1, 1, 1, 1] - expr = 'music.all([1]) or music.all([2])' + expr = "music.all([1]) or music.all([2])" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' + row[0] + for row in ROWS + if row[0] != "identity" and (row[9:14] != [1, 1, 1, 1, 1] and row[9:14] != [2, 2, 2, 2, 2]) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['music'] != [1, 1, 1, 1, 1] and \ - row['music'] != [2, 2, 2, 2, 2] + assert row["identity"] in valid_ids + assert row["music"] != [1, 1, 1, 1, 1] and row["music"] != [2, 2, 2, 2, 2] - expr = 'not ( music.all([1]) or music.all([2]) )' + expr = "not ( music.all([1]) or music.all([2]) )" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' + row[0] + for row in ROWS + if row[0] != "identity" and (row[9:14] == [1, 1, 1, 1, 1] or row[9:14] == [2, 2, 2, 2, 2]) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['music'] == [1, 1, 1, 1, 1] or \ - row['music'] == [2, 2, 2, 2, 2] + assert row["identity"] in valid_ids + assert row["music"] == [1, 1, 1, 1, 1] or row["music"] == [2, 2, 2, 2, 2] # 1.5 Exclusion filters with `duplicates`. - expr = 'ip_address.duplicates()' + expr = "ip_address.duplicates()" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) seen_ip_addresses = [] for _, row in df.iterrows(): - assert row['ip_address'] not in seen_ip_addresses - seen_ip_addresses.append(row['ip_address']) + assert row["ip_address"] not in seen_ip_addresses + seen_ip_addresses.append(row["ip_address"]) # 1.6 Exclusion filters with `valid` and `missing`. - expr = 'valid(speak_spanish)' + expr = "valid(speak_spanish)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] in (32766, 32767) + row[0] for row in ROWS if row[0] != "identity" and row[4] in (32766, 32767) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert isnan(row['speak_spanish']) + assert row["identity"] in valid_ids + assert isnan(row["speak_spanish"]) - expr = 'not valid(speak_spanish)' + expr = "not valid(speak_spanish)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] not in (32766, 32767) + row[0] + for row in ROWS + if row[0] != "identity" and row[4] not in (32766, 32767) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert not isnan(row['speak_spanish']) + assert row["identity"] in valid_ids + assert not isnan(row["speak_spanish"]) - expr = 'missing(speak_spanish)' + expr = "missing(speak_spanish)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[4] not in (32766, 32767) + row[0] + for row in ROWS + if row[0] != "identity" and row[4] not in (32766, 32767) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert not isnan(row['speak_spanish']) + assert row["identity"] in valid_ids + assert not isnan(row["speak_spanish"]) - expr = 'missing(hobbies)' + expr = "missing(hobbies)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' - and (row[5:9] != [32766, 32766, 32766, 32766] - and row[5:9] != [32767, 32767, 32767, 32767]) + row[0] + for row in ROWS + if row[0] != "identity" + and ( + row[5:9] != [32766, 32766, 32766, 32766] + and row[5:9] != [32767, 32767, 32767, 32767] + ) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['hobbies'] != [{'?': 32766}, {'?': 32766}, - {'?': 32766}, {'?': 32766}] \ - and row['hobbies'] != [{'?': 32767}, {'?': 32767}, - {'?': 32767}, {'?': 32767}] + assert row["identity"] in valid_ids + assert row["hobbies"] != [ + {"?": 32766}, + {"?": 32766}, + {"?": 32766}, + {"?": 32766}, + ] and row["hobbies"] != [ + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + ] - expr = 'not missing(hobbies)' + expr = "not missing(hobbies)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' - and (row[5:9] == [32766, 32766, 32766, 32766] - or row[5:9] == [32767, 32767, 32767, 32767]) + row[0] + for row in ROWS + if row[0] != "identity" + and ( + row[5:9] == [32766, 32766, 32766, 32766] + or row[5:9] == [32767, 32767, 32767, 32767] + ) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['hobbies'] == [{'?': 32766}, {'?': 32766}, - {'?': 32766}, {'?': 32766}] \ - or row['hobbies'] == [{'?': 32767}, {'?': 32767}, - {'?': 32767}, {'?': 32767}] + assert row["identity"] in valid_ids + assert row["hobbies"] == [ + {"?": 32766}, + {"?": 32766}, + {"?": 32766}, + {"?": 32766}, + ] or row["hobbies"] == [ + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + {"?": 32767}, + ] - expr = 'valid(hobbies)' + expr = "valid(hobbies)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and (32766 in row[5:9] or 32767 in row[5:9]) + row[0] + for row in ROWS + if row[0] != "identity" and (32766 in row[5:9] or 32767 in row[5:9]) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} in row['hobbies'] or \ - {'?': 32767} in row['hobbies'] + assert row["identity"] in valid_ids + assert {"?": 32766} in row["hobbies"] or {"?": 32767} in row["hobbies"] - expr = 'not valid(hobbies)' + expr = "not valid(hobbies)" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' + row[0] + for row in ROWS + if row[0] != "identity" and (32766 not in row[5:9] and 32767 not in row[5:9]) ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert {'?': 32766} not in row['hobbies'] and \ - {'?': 32767} not in row['hobbies'] + assert row["identity"] in valid_ids + assert {"?": 32766} not in row["hobbies"] and {"?": 32767} not in row[ + "hobbies" + ] # 1.7 Exclusion filter that refers to a subvariable by alias. - expr = 'hobbies_1 == 4' + expr = "hobbies_1 == 4" dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) - valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and row[5] != 4 - ] + valid_ids = [row[0] for row in ROWS if row[0] != "identity" and row[5] != 4] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids - assert row['hobbies'][0] != 4 + assert row["identity"] in valid_ids + assert row["hobbies"][0] != 4 # 1.8 Complex exclusion filters (multiple rules) expr = ( - '(religion != 1 and (not valid(speak_spanish) or speak_spanish >= 1)) ' - 'or (religion == 1 and speak_spanish == 2) ' - 'or (religion == 3 and speak_spanish == 4)' + "(religion != 1 and (not valid(speak_spanish) or speak_spanish >= 1)) " + "or (religion == 1 and speak_spanish == 2) " + "or (religion == 3 and speak_spanish == 4)" ) dataset.exclude(expr) # 1.9 Exclusion filters using date variables. - dt_str = '2014-12-30T00:00:00+00:00' + dt_str = "2014-12-30T00:00:00+00:00" dt = isodate.parse_datetime(dt_str) expr = 'registration_time < "%s"' % dt_str dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and isodate.parse_datetime(row[3]) >= dt + row[0] + for row in ROWS + if row[0] != "identity" and isodate.parse_datetime(row[3]) >= dt ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids + assert row["identity"] in valid_ids - dt_str = '2015-01-01T00:00:00+00:00' + dt_str = "2015-01-01T00:00:00+00:00" dt = isodate.parse_datetime(dt_str) expr = 'registration_time >= "%s"' % dt_str dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and isodate.parse_datetime(row[3]) < dt + row[0] + for row in ROWS + if row[0] != "identity" and isodate.parse_datetime(row[3]) < dt ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids + assert row["identity"] in valid_ids - dt_str = '2014-05-10T00:00:00+00:00' + dt_str = "2014-05-10T00:00:00+00:00" dt = isodate.parse_datetime(dt_str) expr = 'registration_time == "%s"' % dt_str dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and isodate.parse_datetime(row[3]) != dt + row[0] + for row in ROWS + if row[0] != "identity" and isodate.parse_datetime(row[3]) != dt ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids + assert row["identity"] in valid_ids - dt_str = '2014-05-10T00:00:00+00:00' + dt_str = "2014-05-10T00:00:00+00:00" dt = isodate.parse_datetime(dt_str) expr = 'not(registration_time == "%s")' % dt_str dataset.exclude(expr) df = pandaslib.dataframe(dataset.resource) valid_ids = [ - row[0] for row in ROWS - if row[0] != 'identity' and isodate.parse_datetime(row[3]) == dt + row[0] + for row in ROWS + if row[0] != "identity" and isodate.parse_datetime(row[3]) == dt ] assert len(df) == len(valid_ids) for _, row in df.iterrows(): - assert row['identity'] in valid_ids + assert row["identity"] in valid_ids # 1.10 Clear the exclusion filter. dataset.exclude() @@ -870,164 +1092,151 @@ def main(): categories = [ { - 'id': 1, - 'name': 'Nerds', - 'numeric_value': 1, - 'missing': False, - 'case': 'operating_system in ("Linux", "Solaris", "Minix", "FreeBSD", "NetBSD")', + "id": 1, + "name": "Nerds", + "numeric_value": 1, + "missing": False, + "case": 'operating_system in ("Linux", "Solaris", "Minix", "FreeBSD", "NetBSD")', }, { - 'id': 2, - 'name': 'Normal Users', - 'numeric_value': 2, - 'missing': False, - 'case': 'operating_system == "Windows"', + "id": 2, + "name": "Normal Users", + "numeric_value": 2, + "missing": False, + "case": 'operating_system == "Windows"', }, { - 'id': 3, - 'name': 'Hipsters', - 'numeric_value': 3, - 'missing': False, - 'case': 'operating_system == "MacOS"', + "id": 3, + "name": "Hipsters", + "numeric_value": 3, + "missing": False, + "case": 'operating_system == "MacOS"', }, { - 'id': 32767, - 'name': 'Unknown', - 'numeric_value': None, - 'missing': True, - 'case': 'missing(operating_system)' - } + "id": 32767, + "name": "Unknown", + "numeric_value": None, + "missing": True, + "case": "missing(operating_system)", + }, ] new_var = dataset.create_single_response( categories=categories, - name='Operating System Users', - alias='operating_system_users', - description='Type of Operating System Users' + name="Operating System Users", + alias="operating_system_users", + description="Type of Operating System Users", ) assert isinstance(new_var, Variable) - assert new_var.type == 'categorical' + assert new_var.type == "categorical" # Check the data on the new variable. df = pandaslib.dataframe(dataset.resource) - assert 'operating_system_users' in df + assert "operating_system_users" in df # Check the nerds. assert set( - r['operating_system'] - for _, r in df[df['operating_system_users'] == 'Nerds'].iterrows() - ) == {'Linux', 'Solaris', 'Minix', 'FreeBSD', 'NetBSD'} + r["operating_system"] + for _, r in df[df["operating_system_users"] == "Nerds"].iterrows() + ) == {"Linux", "Solaris", "Minix", "FreeBSD", "NetBSD"} # Check the hipsters. assert set( - r['operating_system'] - for _, r in df[df['operating_system_users'] == 'Hipsters'].iterrows() - ) == {'MacOS'} + r["operating_system"] + for _, r in df[df["operating_system_users"] == "Hipsters"].iterrows() + ) == {"MacOS"} # Check normal users. assert set( - r['operating_system'] - for _, r in df[df['operating_system_users'] == 'Normal Users'].iterrows() - ) == {'Windows'} + r["operating_system"] + for _, r in df[df["operating_system_users"] == "Normal Users"].iterrows() + ) == {"Windows"} # 3. Integration Tests for "Recodes". # 3.1 combine_categories. # On a 'categorical' variable. - cat_map = { - 1: [2, 3], - 2: [1, 4], - 99: [32766, 32767] - } - - cat_names = { - 1: 'Bilingual', - 2: 'Not Bilingual', - 99: 'Unknown' - } + cat_map = {1: [2, 3], 2: [1, 4], 99: [32766, 32767]} + + cat_names = {1: "Bilingual", 2: "Not Bilingual", 99: "Unknown"} new_var = dataset.combine_categorical( - 'speak_spanish', + "speak_spanish", map=cat_map, categories=cat_names, - name='Bilingual Person', - alias='bilingual', - missing=[99] + name="Bilingual Person", + alias="bilingual", + missing=[99], ) assert isinstance(new_var, Variable) - assert new_var.type == 'categorical' + assert new_var.type == "categorical" df = pandaslib.dataframe(dataset.resource) - assert 'bilingual' in df + assert "bilingual" in df # Check the data in the recoded variable. bilingual_ids = set( - row[0] for row in ROWS - if row[0] != 'identity' and row[4] in (2, 3) + row[0] for row in ROWS if row[0] != "identity" and row[4] in (2, 3) + ) + assert ( + set( + int(r["identity"]) + for _, r in df[df["bilingual"] == "Bilingual"].iterrows() + ) + == bilingual_ids ) - assert set( - int(r['identity']) - for _, r in df[df['bilingual'] == 'Bilingual'].iterrows() - ) == bilingual_ids non_bilingual_ids = set( - row[0] for row in ROWS - if row[0] != 'identity' and row[4] in (1, 4) + row[0] for row in ROWS if row[0] != "identity" and row[4] in (1, 4) + ) + assert ( + set( + int(r["identity"]) + for _, r in df[df["bilingual"] == "Not Bilingual"].iterrows() + ) + == non_bilingual_ids ) - assert set( - int(r['identity']) - for _, r in df[df['bilingual'] == 'Not Bilingual'].iterrows() - ) == non_bilingual_ids bilingual_null_ids = set( - row[0] for row in ROWS - if row[0] != 'identity' and row[4] in (32766, 32767) + row[0] for row in ROWS if row[0] != "identity" and row[4] in (32766, 32767) + ) + assert ( + set(int(r["identity"]) for _, r in df[df["bilingual"].isnull()].iterrows()) + == bilingual_null_ids ) - assert set( - int(r['identity']) - for _, r in df[df['bilingual'].isnull()].iterrows() - ) == bilingual_null_ids # On a 'categorical_array' variable. - cat_map = { - 1: [1, 2], - 2: [3, 4], - 99: [32766, 32767] - } + cat_map = {1: [1, 2], 2: [3, 4], 99: [32766, 32767]} - cat_names = { - 1: 'Interested', - 2: 'Not interested', - 99: 'Unknown', - } + cat_names = {1: "Interested", 2: "Not interested", 99: "Unknown"} new_var = dataset.combine_categorical( - 'hobbies', + "hobbies", map=cat_map, categories=cat_names, - name='Hobbies (recoded)', - alias='hobbies_recoded', - missing=[99] + name="Hobbies (recoded)", + alias="hobbies_recoded", + missing=[99], ) assert isinstance(new_var, Variable) - assert new_var.type == 'categorical_array' + assert new_var.type == "categorical_array" df = pandaslib.dataframe(dataset.resource) - assert 'hobbies_recoded' in df + assert "hobbies_recoded" in df # Check the data in the recoded variable. for _, row in df.iterrows(): - hobbies = row['hobbies'] - hobbies_rec = row['hobbies_recoded'] + hobbies = row["hobbies"] + hobbies_rec = row["hobbies_recoded"] assert len(hobbies) == len(hobbies_rec) for i, value in enumerate(hobbies): - if value in ({'?': 32766}, {'?': 32767}): - assert hobbies_rec[i] == {'?': 99} + if value in ({"?": 32766}, {"?": 32767}): + assert hobbies_rec[i] == {"?": 99} elif value in (1, 2): assert hobbies_rec[i] == 1 elif value in (3, 4): @@ -1035,29 +1244,21 @@ def main(): # 3.2 combine_responses. - cat_map = { - 1: [1, 2], - 2: [97], - 3: [98, 99] - } - - cat_names = { - 1: 'music_recoded_1', - 2: 'music_recoded_2', - 3: 'music_recoded_3' - } + cat_map = {1: [1, 2], 2: [97], 3: [98, 99]} + + cat_names = {1: "music_recoded_1", 2: "music_recoded_2", 3: "music_recoded_3"} new_var = dataset.combine_multiple_response( - 'music', + "music", map=cat_map, categories=cat_names, - name='Music (alt)', - alias='music_recoded' + name="Music (alt)", + alias="music_recoded", ) assert isinstance(new_var, Variable) - assert new_var.type == 'multiple_response' + assert new_var.type == "multiple_response" df = pandaslib.dataframe(dataset.resource) - assert 'music_recoded' in df + assert "music_recoded" in df # TODO: Test the data in the recoded variable. Unsure of its meaning. @@ -1065,6 +1266,6 @@ def main(): dataset.resource.delete() -if __name__ == '__main__': +if __name__ == "__main__": main() exit(0) diff --git a/scrunch/tests/mock_session.py b/scrunch/tests/mock_session.py index 7297c86..18e0b2b 100644 --- a/scrunch/tests/mock_session.py +++ b/scrunch/tests/mock_session.py @@ -30,19 +30,18 @@ def add_patch_response(self, response): def send(self, request, **kwargs): self.requests.append(request) - if request.method == 'POST': + if request.method == "POST": response = self.post_responses.pop(0) elif request.method == "PATCH": response = self.patch_responses.pop(0) else: url = request.url if url not in self.fixtures: - raise NotImplementedError( - "URL: %s does not have a fixture" % url) + raise NotImplementedError("URL: %s does not have a fixture" % url) response = Response() response.status_code = 200 - response.headers['Content-Type'] = 'application/json' - response.raw = BytesIO(json.dumps(self.fixtures[url]).encode('utf-8')) + response.headers["Content-Type"] = "application/json" + response.raw = BytesIO(json.dumps(self.fixtures[url]).encode("utf-8")) return response @@ -54,8 +53,8 @@ def __init__(self, site_url=None): ScrunchSession.__init__(self, token=self.token, site_url=self.site_url) self.feature_flags = {} self.adapter = FixtureAdapter() - self.adapters['http://'] = self.adapter - self.adapters['https://'] = self.adapter + self.adapters["http://"] = self.adapter + self.adapters["https://"] = self.adapter def add_fixture(self, url, fixture): self.adapter.add_fixture(url, fixture) diff --git a/scrunch/tests/test_accounts.py b/scrunch/tests/test_accounts.py index 5158da4..a125ba3 100644 --- a/scrunch/tests/test_accounts.py +++ b/scrunch/tests/test_accounts.py @@ -17,32 +17,25 @@ def make_session(self): execute_url = "http://host/api/account/run/" session = MockSession(site_url=root_url) - root_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": root_url, - "index": {}, - "catalogs": {}, - "views": {"account": account_url} - }) - act_resource = Entity(session, **{ - "element": "shoji:entity", - "self": account_url, - "body": { - "name": "test account", - "id": "00001", - }, - "catalogs": { - "projects": projects_url - }, - "views": { - "execute": execute_url - } - }) - execute_resource = View(session, **{ - "element": "shoji:view", - "self": execute_url, - "value": {} - }) + root_resource = Catalog( + session, + element="shoji:catalog", + self=root_url, + index={}, + catalogs={}, + views={"account": account_url}, + ) + act_resource = Entity( + session, + element="shoji:entity", + self=account_url, + body={"name": "test account", "id": "00001"}, + catalogs={"projects": projects_url}, + views={"execute": execute_url}, + ) + execute_resource = View( + session, element="shoji:view", self=execute_url, value={} + ) session.add_fixture(account_url, act_resource) session.add_fixture(root_url, root_resource) session.add_fixture(execute_url, execute_resource) @@ -68,7 +61,7 @@ def test_execute(self): post_request = session.requests[-1] assert json.loads(post_request.body) == { "element": "shoji:view", - "value": "NOOP;" + "value": "NOOP;", } def test_execute_script_with_syntax_subvariable_flag(self): @@ -84,7 +77,7 @@ def test_execute_script_with_syntax_subvariable_flag(self): post_request = session.requests[-1] assert json.loads(post_request.body) == { "element": "shoji:view", - "value": "NOOP;" + "value": "NOOP;", } assert "?strict_subvariable_syntax=true" in post_request.url @@ -93,25 +86,24 @@ def test_projects(self): projects_url = "http://host/api/account/projects/" project_url = "http://host/api/projects/abc/" - project_resource = Entity(session, **{ - "element": "shoji:entity", - "self": project_url, - "body": { - "name": "my project", - "id": "abc" - } - }) - - act_projects_res = Catalog(session, **{ - "element": "shoji:catalog", - "self": projects_url, - "index": { + project_resource = Entity( + session, + element="shoji:entity", + self=project_url, + body={"name": "my project", "id": "abc"}, + ) + + act_projects_res = Catalog( + session, + element="shoji:catalog", + self=projects_url, + index={ project_url: { "name": project_resource["body"]["name"], "id": project_resource["body"]["id"], } - } - }) + }, + ) session.add_fixture(projects_url, act_projects_res) session.add_fixture(project_url, project_resource) current_act = Account.current_account(session.root) diff --git a/scrunch/tests/test_categories.py b/scrunch/tests/test_categories.py index a7cbe59..b037f6b 100644 --- a/scrunch/tests/test_categories.py +++ b/scrunch/tests/test_categories.py @@ -9,15 +9,27 @@ def TEST_CATEGORIES(): return [ {"id": 1, "name": "Female", "missing": False, "numeric_value": None}, {"id": 2, "name": "Male", "missing": False, "numeric_value": None}, - {"id": -1, "name": "No Data", "missing": True, "numeric_value": None} + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] def TEST_CATEGORIES_WITH_DATE(): return [ - {"id": 1, "name": "Female", "missing": False, "numeric_value": None, "date": "2020-01-01"}, - {"id": 2, "name": "Male", "missing": False, "numeric_value": None, "date": "2020-02-02"}, - {"id": -1, "name": "No Data", "missing": True, "numeric_value": None} + { + "id": 1, + "name": "Female", + "missing": False, + "numeric_value": None, + "date": "2020-01-01", + }, + { + "id": 2, + "name": "Male", + "missing": False, + "numeric_value": None, + "date": "2020-02-02", + }, + {"id": -1, "name": "No Data", "missing": True, "numeric_value": None}, ] @@ -30,95 +42,104 @@ def edit(self, **kwargs): class TestCategories(TestCase): def test_instance_is_reused(self): resource = EditableMock() - resource.entity.body = dict( - categories=TEST_CATEGORIES(), - type='categorical' - ) + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) cat_list = variable.categories self.assertTrue(isinstance(cat_list, CategoryList)) def test_category_dict_attribytes(self): resource = EditableMock() - resource.entity.body = dict(categories=TEST_CATEGORIES(), type='categorical') + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) # Does not have `date` unnecessarily assert variable.categories[1].as_dict() == { - 'id': 1, - 'missing': False, - 'name': 'Female', - 'numeric_value': None, - 'selected': False + "id": 1, + "missing": False, + "name": "Female", + "numeric_value": None, + "selected": False, } variable.categories[1].edit(date="1990-02-04") # Contains .date if needed assert variable.categories[1].as_dict() == { - 'id': 1, - 'missing': False, - 'name': 'Female', - 'numeric_value': None, - 'selected': False, - "date": "1990-02-04" + "id": 1, + "missing": False, + "name": "Female", + "numeric_value": None, + "selected": False, + "date": "1990-02-04", } def test_edit_category(self): resource = EditableMock() - resource.entity.body = dict( - categories=TEST_CATEGORIES(), - type='categorical' - ) + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) - variable.categories[1].edit(name='Mujer') - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'selected': False, 'id': 1, 'missing': False, 'name': 'Mujer'}, - # These two don't have selected yet because it is reusing the - # API categories still, only replacing the modified one - {'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Male'}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ]) + variable.categories[1].edit(name="Mujer") + resource.entity._edit.assert_called_with( + categories=[ + { + "numeric_value": None, + "selected": False, + "id": 1, + "missing": False, + "name": "Mujer", + }, + # These two don't have selected yet because it is reusing the + # API categories still, only replacing the modified one + {"numeric_value": None, "missing": False, "id": 2, "name": "Male"}, + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + ] + ) resource.entity.refresh.assert_called_once() - self.assertEqual(variable.categories[1].name, 'Mujer') + self.assertEqual(variable.categories[1].name, "Mujer") # Editing Male - variable.categories[2].edit(name='Hombre') - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'selected': False, 'id': 1, 'missing': False, 'name': 'Mujer'}, - {'numeric_value': None, 'selected': False, 'missing': False, 'id': 2, 'name': 'Hombre'}, - # Same as above, reusing the existing value from API still - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ]) + variable.categories[2].edit(name="Hombre") + resource.entity._edit.assert_called_with( + categories=[ + { + "numeric_value": None, + "selected": False, + "id": 1, + "missing": False, + "name": "Mujer", + }, + { + "numeric_value": None, + "selected": False, + "missing": False, + "id": 2, + "name": "Hombre", + }, + # Same as above, reusing the existing value from API still + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + ] + ) # Try to change the ID with self.assertRaises(AttributeError) as err: variable.categories[2].edit(id=100) - self.assertEqual( - str(err.exception), - 'Cannot edit the following attributes: id' - ) + self.assertEqual(str(err.exception), "Cannot edit the following attributes: id") # Nothing changed self.assertEqual(set(variable.categories.keys()), {1, 2, -1}) def test_delete_category(self): resource = EditableMock() - resource.entity.body = dict( - categories=TEST_CATEGORIES(), - type='categorical' - ) + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) variable.categories[1].delete() # Calling edit without the one that we wanted to delete - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Male'}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ]) + resource.entity._edit.assert_called_with( + categories=[ + {"numeric_value": None, "missing": False, "id": 2, "name": "Male"}, + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + ] + ) def test_category_attribute_writes(self): resource = EditableMock() - resource.entity.body = dict( - categories=TEST_CATEGORIES(), - type='categorical', - ) + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) error_msg = "use the edit() method for mutating attributes" @@ -130,9 +151,9 @@ def test_category_attribute_writes(self): assert str(excinfo.value) == "Can't edit attibute 'id'" with pytest.raises(AttributeError) as excinfo: - variable.categories[1].name = 'forbidden' + variable.categories[1].name = "forbidden" # nothing has changed - assert variable.categories[1].name == 'Female' + assert variable.categories[1].name == "Female" assert str(excinfo.value) == error_msg with pytest.raises(AttributeError) as excinfo: @@ -157,40 +178,46 @@ def test_edit_derived(self): resource = EditableMock() resource.entity.body = dict( categories=TEST_CATEGORIES(), - type='categorical', - derivation={'function': 'derivation_function'} + type="categorical", + derivation={"function": "derivation_function"}, ) variable = Variable(resource, MagicMock()) error_msg = "Cannot edit categories on derived variables. Re-derive with the appropriate expression" with pytest.raises(TypeError, match=error_msg): - variable.categories[1].edit(name='Mujer') + variable.categories[1].edit(name="Mujer") # Try again with an empty derivation resource = EditableMock() resource.entity.body = dict( categories=TEST_CATEGORIES(), - type='categorical', - derivation={} # Empty - + type="categorical", + derivation={}, # Empty ) variable = Variable(resource, MagicMock()) - variable.categories[1].edit(name='Mujer') - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'selected': False, 'id': 1, 'missing': False, 'name': 'Mujer'}, - {'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Male'}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ]) + variable.categories[1].edit(name="Mujer") + resource.entity._edit.assert_called_with( + categories=[ + { + "numeric_value": None, + "selected": False, + "id": 1, + "missing": False, + "name": "Mujer", + }, + {"numeric_value": None, "missing": False, "id": 2, "name": "Male"}, + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + ] + ) def test_read_category_date(self): resource = EditableMock() resource.entity.body = dict( - categories=TEST_CATEGORIES_WITH_DATE(), - type='categorical' + categories=TEST_CATEGORIES_WITH_DATE(), type="categorical" ) variable = Variable(resource, MagicMock()) - self.assertEqual(variable.categories[1].date, '2020-01-01') - self.assertEqual(variable.categories[2].date, '2020-02-02') + self.assertEqual(variable.categories[1].date, "2020-01-01") + self.assertEqual(variable.categories[2].date, "2020-02-02") with self.assertRaises(KeyError): # The `No Data` category doesn't provide a `date field _ = variable.categories[3].date @@ -198,34 +225,47 @@ def test_read_category_date(self): def test_edit_category_date(self): resource = EditableMock() resource.entity.body = dict( - categories=TEST_CATEGORIES_WITH_DATE(), - type='categorical' + categories=TEST_CATEGORIES_WITH_DATE(), type="categorical" ) variable = Variable(resource, MagicMock()) - variable.categories[1].edit(date='2021-01-01') - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'selected': False, 'id': 1, 'missing': False, 'name': 'Female', 'date': '2021-01-01'}, - {'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Male', 'date': '2020-02-02'}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ]) + variable.categories[1].edit(date="2021-01-01") + resource.entity._edit.assert_called_with( + categories=[ + { + "numeric_value": None, + "selected": False, + "id": 1, + "missing": False, + "name": "Female", + "date": "2021-01-01", + }, + { + "numeric_value": None, + "missing": False, + "id": 2, + "name": "Male", + "date": "2020-02-02", + }, + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + ] + ) resource.entity.refresh.assert_called_once() - self.assertEqual(variable.categories[1].date, '2021-01-01') + self.assertEqual(variable.categories[1].date, "2021-01-01") class TestCategoryList(TestCase): def test_reorder(self): resource = EditableMock() - resource.entity.body = dict( - categories=TEST_CATEGORIES(), - type='categorical' - ) + resource.entity.body = dict(categories=TEST_CATEGORIES(), type="categorical") variable = Variable(resource, MagicMock()) variable.categories.order(2, -1, 1) # Reordered values - resource.entity._edit.assert_called_with(categories=[ - {'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Male'}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'}, - {'numeric_value': None, 'missing': False, 'id': 1, 'name': 'Female'} - ]) + resource.entity._edit.assert_called_with( + categories=[ + {"numeric_value": None, "missing": False, "id": 2, "name": "Male"}, + {"numeric_value": None, "missing": True, "id": -1, "name": "No Data"}, + {"numeric_value": None, "missing": False, "id": 1, "name": "Female"}, + ] + ) resource.entity.refresh.assert_called_once() diff --git a/scrunch/tests/test_cubes.py b/scrunch/tests/test_cubes.py index c264b7d..427176f 100644 --- a/scrunch/tests/test_cubes.py +++ b/scrunch/tests/test_cubes.py @@ -56,4 +56,3 @@ def test_pass_filter_expression(self, mock_fetch_cube): mock_fetch_cube.assert_called_once_with( ds.resource, urls, count=count(), filter=processed_filter, weight=None ) - diff --git a/scrunch/tests/test_datasets.py b/scrunch/tests/test_datasets.py index 4256e3b..f83729f 100644 --- a/scrunch/tests/test_datasets.py +++ b/scrunch/tests/test_datasets.py @@ -10,7 +10,7 @@ try: import pandas from pandas import DataFrame -except: +except ImportError: # pandas is not installed pandas = None @@ -47,9 +47,11 @@ class dict_to_obj(object): def __init__(self, d): for a, b in d.items(): if isinstance(b, (list, tuple)): - setattr(self, a, [dict_to_obj(x) if isinstance(x, dict) else x for x in b]) + setattr( + self, a, [dict_to_obj(x) if isinstance(x, dict) else x for x in b] + ) else: - setattr(self, a, dict_to_obj(b) if isinstance(b, dict) else b) + setattr(self, a, dict_to_obj(b) if isinstance(b, dict) else b) class _CrunchPayload(dict): @@ -60,7 +62,7 @@ def __init__(self, *args, **kwargs): self.patch = MagicMock() def __getattr__(self, item): - if item == 'payload': + if item == "payload": return self else: return self[item] @@ -77,106 +79,106 @@ def _get(*args): class TestDatasetBase(object): - api = 'https://test.crunch.io/api/' + api = "https://test.crunch.io/api/" ds_shoji = { - 'element': 'shoji:entity', - 'body': { - 'id': '123456', - 'name': 'test_dataset_name', - 'notes': '', - 'description': '', - 'is_published': False, - 'streaming': '', - 'archived': False, - 'end_date': None, - 'start_date': None, + "element": "shoji:entity", + "body": { + "id": "123456", + "name": "test_dataset_name", + "notes": "", + "description": "", + "is_published": False, + "streaming": "", + "archived": False, + "end_date": None, + "start_date": None, }, } variables = { - '0001': dict( - id='0001', - alias='var1_alias', - name='var1_name', - description='', - notes='', + "0001": dict( + id="0001", + alias="var1_alias", + name="var1_name", + description="", + notes="", format=None, view=None, - type='numeric', + type="numeric", is_subvar=False, - derived=False + derived=False, ), - '0002': dict( - id='0002', - alias='var2_alias', - name='var2_name', - description='', - notes='', + "0002": dict( + id="0002", + alias="var2_alias", + name="var2_name", + description="", + notes="", format=None, view=None, - type='text', + type="text", is_subvar=False, - derived=False + derived=False, ), - '0003': dict( - id='0003', - alias='var3_alias', - name='var3_name', + "0003": dict( + id="0003", + alias="var3_alias", + name="var3_name", description=None, notes=None, format=None, view=None, - type='categorical', + type="categorical", categories=TEST_CATEGORIES(), is_subvar=False, - derived=False + derived=False, ), - '0004': dict( - id='0004', - alias='var4_alias', - name='var4_name', + "0004": dict( + id="0004", + alias="var4_alias", + name="var4_name", description=None, notes=None, format=None, view=None, - type='categorical', + type="categorical", categories=TEST_CATEGORIES(), is_subvar=False, - derived=False - ) + derived=False, + ), } def _dataset_mock(self, ds_shoji=None, variables=None): ds_shoji = ds_shoji or copy.deepcopy(self.ds_shoji) - ds_url = '%sdatasets/%s/' % (self.api, ds_shoji['body']['id']) + ds_url = "%sdatasets/%s/" % (self.api, ds_shoji["body"]["id"]) variables = variables or copy.deepcopy(self.variables) table, _variables = self._build_test_meta(ds_shoji, variables) - ds_shoji['body']['table'] = table + ds_shoji["body"]["table"] = table var_mock_attributes = { - 'by.side_effect': self._variables_by_side_effect(_variables), - 'index.get.side_effect': self._variables_by_side_effect(_variables) + "by.side_effect": self._variables_by_side_effect(_variables), + "index.get.side_effect": self._variables_by_side_effect(_variables), } ds_mock_attributes = { - 'body': ds_shoji['body'], - 'variables': MagicMock(**var_mock_attributes), - 'session': MagicMock(spec=ElementSession), - 'fragments.exclusion': '%sexclusion/' % ds_url + "body": ds_shoji["body"], + "variables": MagicMock(**var_mock_attributes), + "session": MagicMock(spec=ElementSession), + "fragments.exclusion": "%sexclusion/" % ds_url, } _ds_mock = EditableMock(**ds_mock_attributes) _ds_mock.self = ds_url table_mock = MagicMock(metadata=variables) - table_mock.self = table.get('self') + table_mock.self = table.get("self") _ds_mock.follow.return_value = table_mock return _ds_mock def _variable_mock(self, ds_url, variable=None): - variable = variable or self.variables['0001'] - var_url = '%svariables/%s/' % (ds_url, variable['id']) + variable = variable or self.variables["0001"] + var_url = "%svariables/%s/" % (ds_url, variable["id"]) # set attrs outside of entity _var_mock = MagicMock(variable) _var_mock.entity = EditableMock(body=variable) @@ -186,20 +188,20 @@ def _variable_mock(self, ds_url, variable=None): return _var_mock def _build_test_meta(self, ds_shoji, variables): - ds_url = '%sdatasets/%s/' % (self.api, ds_shoji['body']['id']) + ds_url = "%sdatasets/%s/" % (self.api, ds_shoji["body"]["id"]) table = dict( - element='crunch:table', - self='%stable/' % ds_url, - metadata=collections.OrderedDict() + element="crunch:table", + self="%stable/" % ds_url, + metadata=collections.OrderedDict(), ) _variables = dict(id=dict(), name=dict(), alias=dict()) for var in variables: _var_mock = self._variable_mock(ds_url, variables[var]) - _variables['id'].update({variables[var]['id']: _var_mock}) - _variables['name'].update({variables[var]['name']: _var_mock}) - _variables['alias'].update({variables[var]['alias']: _var_mock}) - table['metadata'][variables[var]['id']] = _var_mock + _variables["id"].update({variables[var]["id"]: _var_mock}) + _variables["name"].update({variables[var]["name"]: _var_mock}) + _variables["alias"].update({variables[var]["alias"]: _var_mock}) + table["metadata"][variables[var]["id"]] = _var_mock return table, _variables @@ -208,34 +210,34 @@ def _variables_by_side_effect(self, variables): def _get(*args): return _variables.get(args[0]) + return _get class TestDatasets(TestDatasetBase, TestCase): - class Table(Document): - element = 'crunch:table' + element = "crunch:table" def test_edit_dataset(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - assert ds.name == 'test_dataset_name' - changes = dict(name='changed') + assert ds.name == "test_dataset_name" + changes = dict(name="changed") ds.edit(**changes) - assert ds.name == 'changed' + assert ds.name == "changed" ds.resource._edit.assert_called_with(**changes) - assert ds.description == '' - changes = dict(description='changed') + assert ds.description == "" + changes = dict(description="changed") ds.edit(**changes) - assert ds.description == 'changed' + assert ds.description == "changed" ds.resource._edit.assert_called_with(**changes) - assert ds.notes == '' - changes = dict(notes='changed') + assert ds.notes == "" + changes = dict(notes="changed") ds.edit(**changes) - assert ds.notes == 'changed' + assert ds.notes == "changed" ds.resource._edit.assert_called_with(**changes) assert ds.is_published is False @@ -251,163 +253,157 @@ def test_edit_dataset(self): ds.resource._edit.assert_called_with(**changes) assert ds.end_date is None - changes = dict(end_date='2017-01-01') + changes = dict(end_date="2017-01-01") ds.edit(**changes) - assert ds.end_date == '2017-01-01' + assert ds.end_date == "2017-01-01" ds.resource._edit.assert_called_with(**changes) assert ds.start_date is None - changes = dict(start_date='2017-01-01') + changes = dict(start_date="2017-01-01") ds.edit(**changes) - assert ds.start_date == '2017-01-01' + assert ds.start_date == "2017-01-01" ds.resource._edit.assert_called_with(**changes) def process_expr_side_effect(self, expr, ds): return expr - @pytest.mark.skipif(pandas is None, reason='pandas is not installed') - @mock.patch('scrunch.streaming_dataset.StreamingDataset.push_rows') - @mock.patch('pycrunch.importing.Importer.stream_rows') + @pytest.mark.skipif(pandas is None, reason="pandas is not installed") + @mock.patch("scrunch.streaming_dataset.StreamingDataset.push_rows") + @mock.patch("pycrunch.importing.Importer.stream_rows") def test_replace_from_csv(self, mocked_stream_rows, mocked_push_rows): ds_shoji = copy.deepcopy(self.ds_shoji) - ds_shoji['body']['streaming'] = 'negative' + ds_shoji["body"]["streaming"] = "negative" ds_mock = self._dataset_mock(ds_shoji=ds_shoji) ds = MutableDataset(ds_mock) - assert ds.resource.body.get('streaming') == 'negative' + assert ds.resource.body.get("streaming") == "negative" file = StringIO() file.write("id, age\n1, 15") file.seek(0) ds.replace_from_csv(file, chunksize=5) - mocked_stream_rows.assert_called_with(ds_mock, [{'id': 1, ' age': 15}]) + mocked_stream_rows.assert_called_with(ds_mock, [{"id": 1, " age": 15}]) mocked_push_rows.assert_called_with(5) - assert ds.resource.body.get('streaming') == 'negative' + assert ds.resource.body.get("streaming") == "negative" - @mock.patch('scrunch.datasets.process_expr') + @mock.patch("scrunch.datasets.process_expr") def test_replace_values_sync(self, mocked_process): mocked_process.side_effect = self.process_expr_side_effect variables = { - '001': { - 'id': '001', - 'alias': 'birthyr', - 'name': 'Birthyear', - 'type': 'numeric' + "001": { + "id": "001", + "alias": "birthyr", + "name": "Birthyear", + "type": "numeric", }, - '002': { - 'id': '002', - 'alias': 'level', - 'name': 'Level', - 'type': 'numeric' - } + "002": {"id": "002", "alias": "level", "name": "Level", "type": "numeric"}, } ds_mock = self._dataset_mock(variables=variables) ds = MutableDataset(ds_mock) ds.resource = MagicMock() ds.resource.table.post.side_effect = [MagicMock(status_code=204)] - ds.replace_values({'birthyr': 9, 'level': 8}) + ds.replace_values({"birthyr": 9, "level": 8}) call = json.loads(ds.resource.table.post.call_args[0][0]) - assert 'command' in call - assert call['command'] == 'update' - assert 'variables' in call - assert '001' in call['variables'] - assert 'value' in call['variables']['001'] - assert call['variables']['001']['value'] == 9 - assert '002' in call['variables'] - assert 'value' in call['variables']['002'] - assert call['variables']['002']['value'] == 8 - - @mock.patch('scrunch.datasets.process_expr') + assert "command" in call + assert call["command"] == "update" + assert "variables" in call + assert "001" in call["variables"] + assert "value" in call["variables"]["001"] + assert call["variables"]["001"]["value"] == 9 + assert "002" in call["variables"] + assert "value" in call["variables"]["002"] + assert call["variables"]["002"]["value"] == 8 + + @mock.patch("scrunch.datasets.process_expr") def test_replace_values_filter(self, mocked_process): mocked_process.side_effect = self.process_expr_side_effect ds_mock = self._dataset_mock() ds = MutableDataset(ds_mock) ds.resource = MagicMock() - ds.resource.table = self.Table(session=MagicMock(), self='http://a/?b=c') + ds.resource.table = self.Table(session=MagicMock(), self="http://a/?b=c") post_mock = MagicMock() post_mock.side_effect = [MagicMock(status_code=204)] setattr(ds.resource.table, "post", post_mock) - ds.replace_values({'var3_alias': 1}, filter='var4_alias == 2') - assert ds.resource.table.self == 'http://a/' + ds.replace_values({"var3_alias": 1}, filter="var4_alias == 2") + assert ds.resource.table.self == "http://a/" - @mock.patch('scrunch.datasets.process_expr') + @mock.patch("scrunch.datasets.process_expr") def test_create_numeric(self, mocked_process): mocked_process.side_effect = self.process_expr_side_effect variables = { - '001': { - 'id': '001', - 'alias': 'weekly_rent', - 'name': 'Week rent', - 'type': 'numeric', - 'is_subvar': False - }, + "001": { + "id": "001", + "alias": "weekly_rent", + "name": "Week rent", + "type": "numeric", + "is_subvar": False, + } } ds_mock = self._dataset_mock(variables=variables) ds = MutableDataset(ds_mock) ds.resource = mock.MagicMock() ds.create_numeric( - alias='monthly_rent', - name='Monthly rent', - description='Rent paid per month', - notes='All UK adults', - derivation='(weekly_rent * 52) / 12' + alias="monthly_rent", + name="Monthly rent", + description="Rent paid per month", + notes="All UK adults", + derivation="(weekly_rent * 52) / 12", ) ds.resource.variables.create.assert_called_with( { - 'element': 'shoji:entity', - 'body': { - 'alias': 'monthly_rent', - 'name': 'Monthly rent', - 'derivation': { - 'function': '/', - 'args': [ + "element": "shoji:entity", + "body": { + "alias": "monthly_rent", + "name": "Monthly rent", + "derivation": { + "function": "/", + "args": [ { - 'function': '*', - 'args': [ - {'variable': 'weekly_rent'}, - {'value': 52} - ] + "function": "*", + "args": [{"variable": "weekly_rent"}, {"value": 52}], }, - {'value': 12} - ] + {"value": 12}, + ], }, - 'description': 'Rent paid per month', - 'notes': 'All UK adults' - } + "description": "Rent paid per month", + "notes": "All UK adults", + }, } ) - @mock.patch('scrunch.datasets.process_expr') + @mock.patch("scrunch.datasets.process_expr") def test_rollup(self, mocked_process): mocked_process.side_effect = self.process_expr_side_effect variables = { - '001': { - 'id': '001', - 'alias': 'datetime_var', - 'name': 'Datetime Variable', - 'type': 'datetime', - 'is_subvar': False - }, + "001": { + "id": "001", + "alias": "datetime_var", + "name": "Datetime Variable", + "type": "datetime", + "is_subvar": False, + } } ds_mock = self._dataset_mock(variables=variables) ds = MutableDataset(ds_mock) ds.resource = mock.MagicMock() - ds.rollup('datetime_var', 'new_rolledup_var', 'new_rolledup_var', 'Y') + ds.rollup("datetime_var", "new_rolledup_var", "new_rolledup_var", "Y") ds.resource.variables.create.assert_called_with( { - 'element': 'shoji:entity', - 'body': { - 'alias': 'new_rolledup_var', - 'name': 'new_rolledup_var', - 'expr': { - 'function': 'rollup', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/001/'}, - {'value': 'Y'} - ] + "element": "shoji:entity", + "body": { + "alias": "new_rolledup_var", + "name": "new_rolledup_var", + "expr": { + "function": "rollup", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/001/" + }, + {"value": "Y"}, + ], }, - 'description': '', - 'notes': '' - } + "description": "", + "notes": "", + }, } ) @@ -416,37 +412,39 @@ def test_create_crunchbox_full(self): ds = StreamingDataset(ds_mock) call_params = dict( - title='my title', - header='my header', - footer='my footer', - notes='my notes', + title="my title", + header="my header", + footer="my footer", + notes="my notes", min_base_size=50, - weight='001', + weight="001", palette={ "brand": ["#111111", "#222222", "#333333"], "static_colors": ["#444444", "#555555", "#666666"], "base": ["#777777", "#888888", "#999999"], - } + }, ) expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'header': 'my header', - 'footer': 'my footer', - 'title': 'my title', - 'display_settings': {'palette': { - 'base': ['#777777', '#888888', '#999999'], - 'brand': ['#111111', '#222222', '#333333'], - 'static_colors': ['#444444', '#555555', '#666666']}, - 'minBaseSize': {'value': 50} + "element": "shoji:entity", + "body": { + "header": "my header", + "footer": "my footer", + "title": "my title", + "display_settings": { + "palette": { + "base": ["#777777", "#888888", "#999999"], + "brand": ["#111111", "#222222", "#333333"], + "static_colors": ["#444444", "#555555", "#666666"], + }, + "minBaseSize": {"value": 50}, }, - 'filters': None, - 'notes': 'my notes', - 'force': False, - 'where': None, - 'weight': '001' - } + "filters": None, + "notes": "my notes", + "force": False, + "where": None, + "weight": "001", + }, } ds.create_crunchbox(**call_params) @@ -455,19 +453,17 @@ def test_create_crunchbox_full(self): def test_create_crunchbox_defaults(self): def mock_ds_preferences(mock): preferences = { - 'element': 'shoji:entity', - 'self': '%spreferences/' % mock.ds_url, - 'body': dict_to_obj({ - 'weight': '0001' - }) + "element": "shoji:entity", + "self": "%spreferences/" % mock.ds_url, + "body": dict_to_obj({"weight": "0001"}), } def _session_get(*args): - if args[0] == '{}preferences/'.format(mock.ds_url): + if args[0] == "{}preferences/".format(mock.ds_url): return _CrunchPayload(preferences) return _CrunchPayload() - mock.fragments.preferences = '%spreferences/' % mock.ds_url + mock.fragments.preferences = "%spreferences/" % mock.ds_url mock.session.get.side_effect = _session_get ds_mock = self._dataset_mock() @@ -476,17 +472,17 @@ def _session_get(*args): ds = StreamingDataset(ds_mock) expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'header': '', - 'footer': '', - 'title': 'CrunchBox for test_dataset_name', - 'filters': None, - 'notes': '', - 'force': False, - 'where': None, - 'weight': '0001' - } + "element": "shoji:entity", + "body": { + "header": "", + "footer": "", + "title": "CrunchBox for test_dataset_name", + "filters": None, + "notes": "", + "force": False, + "where": None, + "weight": "0001", + }, } ds.create_crunchbox() @@ -494,12 +490,7 @@ def _session_get(*args): def test_derive_weight(self): variables = { - '001': { - 'id': '001', - 'alias': 'foo', - 'name': 'bar', - 'type': 'numeric', - } + "001": {"id": "001", "alias": "foo", "name": "bar", "type": "numeric"} } ds_mock = self._dataset_mock(variables=variables) ds = MutableDataset(ds_mock) @@ -538,28 +529,29 @@ def test_derive_weight(self): 29: 0.019, 30: 0.002, 31: 0.004, - 32: 0.011 + 32: 0.011, } } ] def _test_sum(dct, use_fsum=False): from math import fsum + func = fsum if use_fsum else sum res = func(dct.values()) if not use_fsum and res != 1.0: - raise Exception('not 1.0') + raise Exception("not 1.0") if use_fsum and res == 1.0: - raise Exception('1.0') + raise Exception("1.0") # `sum` fails - with pytest.raises(Exception, match='not 1.0'): - _test_sum(targets[0]['foo']) + with pytest.raises(Exception, match="not 1.0"): + _test_sum(targets[0]["foo"]) # `fsum` does not - with pytest.raises(Exception, match='1.0'): - _test_sum(targets[0]['foo'], use_fsum=True) + with pytest.raises(Exception, match="1.0"): + _test_sum(targets[0]["foo"], use_fsum=True) # Now test that we don't fail on the above targets in `derive_weight` # Using sum, this would raise: @@ -567,11 +559,10 @@ def _test_sum(dct, use_fsum=False): # mock the variable creation ds._var_create_reload_return = MagicMock() - ds.derive_weight(targets=targets, alias='weight', name='Weight') + ds.derive_weight(targets=targets, alias="weight", name="Weight") class TestExclusionFilters(TestDatasetBase, TestCase): - def test_apply_exclusion(self): """ Tests that the proper PATCH request is sent to Crunch in order to @@ -579,10 +570,10 @@ def test_apply_exclusion(self): """ ds_res = self._dataset_mock() ds = StreamingDataset(ds_res) - var = ds['var1_alias'] + var = ds["var1_alias"] # Action! - exclusion_filter = 'var1_alias != 0' + exclusion_filter = "var1_alias != 0" ds.exclude(exclusion_filter) # Ensure .patch was called the right way. @@ -592,15 +583,15 @@ def test_apply_exclusion(self): assert call[0][0] == ds.resource.fragments.exclusion expected_expr_obj = { - 'expression': { - 'function': '!=', - 'args': [ - {'variable': var.url}, # Crunch needs variable URLs! - {'value': 0} - ] + "expression": { + "function": "!=", + "args": [ + {"variable": var.url}, # Crunch needs variable URLs! + {"value": 0}, + ], } } - assert json.loads(call[1]['data']) == expected_expr_obj + assert json.loads(call[1]["data"]) == expected_expr_obj def test_remove_exclusion(self): """ @@ -612,28 +603,24 @@ def test_remove_exclusion(self): ds.exclude() ds.resource.session.patch.assert_called_once_with( - ds.resource.fragments.exclusion, - data=json.dumps({'expression': {}}) + ds.resource.fragments.exclusion, data=json.dumps({"expression": {}}) ) def _exclude_payload(self, ds, expr): ds.exclude(expr) call = ds.resource.session.patch.call_args_list[0] - return json.loads(call[1]['data']) + return json.loads(call[1]["data"]) def test_gt(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias > 5') + data = self._exclude_payload(ds, "var1_alias > 5") expected_expr_obj = { - 'expression': { - 'function': '>', - 'args': [ - {'variable': var.url}, - {'value': 5} - ] + "expression": { + "function": ">", + "args": [{"variable": var.url}, {"value": 5}], } } assert data == expected_expr_obj @@ -641,16 +628,13 @@ def test_gt(self): def test_in(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias in [32766]') + data = self._exclude_payload(ds, "var1_alias in [32766]") expected_expr_obj = { "expression": { "function": "in", - "args": [ - {"variable": var.url}, - {"value": [32766]} - ] + "args": [{"variable": var.url}, {"value": [32766]}], } } @@ -659,16 +643,13 @@ def test_in(self): def test_in_multiple(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias in (32766, 32767)') + data = self._exclude_payload(ds, "var1_alias in (32766, 32767)") expected_expr_obj = { "expression": { "function": "in", - "args": [ - {"variable": var.url}, - {"value": [32766, 32767]} - ] + "args": [{"variable": var.url}, {"value": [32766, 32767]}], } } @@ -676,26 +657,20 @@ def test_in_multiple(self): def test_not_and(self): variables = { - '0001': dict( - id='0001', - alias='disposition', - name='Disposition', - type='numeric' + "0001": dict( + id="0001", alias="disposition", name="Disposition", type="numeric" ), - '0002': dict( - id='0002', - alias='exit_status', - name='Exit', - type='numeric' - ) + "0002": dict(id="0002", alias="exit_status", name="Exit", type="numeric"), } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) - var1 = ds['disposition'] - var2 = ds['exit_status'] + var1 = ds["disposition"] + var2 = ds["exit_status"] - data = self._exclude_payload(ds, 'not (disposition in (1, 2) and exit_status == 0)') + data = self._exclude_payload( + ds, "not (disposition in (1, 2) and exit_status == 0)" + ) expected_expr_obj = { "expression": { "function": "not", @@ -705,32 +680,15 @@ def test_not_and(self): "args": [ { "function": "in", - "args": [ - { - "variable": var1.url - }, - { - "value": [ - 1, - 2 - ] - } - ] + "args": [{"variable": var1.url}, {"value": [1, 2]}], }, { "function": "==", - "args": [ - { - "variable": var2.url - }, - { - "value": 0 - } - ] - } - ] + "args": [{"variable": var2.url}, {"value": 0}], + }, + ], } - ] + ], } } @@ -739,22 +697,13 @@ def test_not_and(self): def test_any(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias.any([32766])') + data = self._exclude_payload(ds, "var1_alias.any([32766])") expected_expr_obj = { "expression": { "function": "in", - "args": [ - { - "variable": var.url - }, - { - "value": [ - 32766 - ] - } - ] + "args": [{"variable": var.url}, {"value": [32766]}], } } @@ -763,27 +712,18 @@ def test_any(self): def test_not_any(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'not var1_alias.any([32766])') + data = self._exclude_payload(ds, "not var1_alias.any([32766])") expected_expr_obj = { "expression": { "function": "not", "args": [ { "function": "in", - "args": [ - { - "variable": var.url - }, - { - "value": [ - 32766 - ] - } - ] + "args": [{"variable": var.url}, {"value": [32766]}], } - ] + ], } } @@ -792,23 +732,13 @@ def test_not_any(self): def test_any_multiple(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias.any([32766, 32767])') + data = self._exclude_payload(ds, "var1_alias.any([32766, 32767])") expected_expr_obj = { "expression": { "function": "in", - "args": [ - { - "variable": var.url - }, - { - "value": [ - 32766, - 32767 - ] - } - ] + "args": [{"variable": var.url}, {"value": [32766, 32767]}], } } @@ -817,20 +747,13 @@ def test_any_multiple(self): def test_all(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias.all([32767])') + data = self._exclude_payload(ds, "var1_alias.all([32767])") expected_expr_obj = { "expression": { - "args": [ - { - "variable": var.url - }, - { - "value": [32767] - } - ], - "function": "all" + "args": [{"variable": var.url}, {"value": [32767]}], + "function": "all", } } @@ -839,27 +762,18 @@ def test_all(self): def test_not_all(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'not var1_alias.all([32767])') + data = self._exclude_payload(ds, "not var1_alias.all([32767])") expected_expr_obj = { "expression": { "function": "not", "args": [ { "function": "all", - "args": [ - { - "variable": var.url - }, - { - "value": [ - 32767 - ] - } - ] + "args": [{"variable": var.url}, {"value": [32767]}], } - ] + ], } } @@ -868,40 +782,22 @@ def test_not_all(self): def test_all_or_all(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias.all([1]) or var1_alias.all([2])') + data = self._exclude_payload(ds, "var1_alias.all([1]) or var1_alias.all([2])") expected_expr_obj = { "expression": { "args": [ { - "args": [ - { - "variable": var.url - }, - { - "value": [ - 1 - ] - } - ], - "function": "all" + "args": [{"variable": var.url}, {"value": [1]}], + "function": "all", }, { - "args": [ - { - "variable": var.url - }, - { - "value": [ - 2 - ] - } - ], - "function": "all" - } + "args": [{"variable": var.url}, {"value": [2]}], + "function": "all", + }, ], - "function": "or" + "function": "or", } } @@ -910,9 +806,11 @@ def test_all_or_all(self): def test_not_all_or_all(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'not(var1_alias.all([1]) or var1_alias.all([2]))') + data = self._exclude_payload( + ds, "not(var1_alias.all([1]) or var1_alias.all([2]))" + ) expected_expr_obj = { "expression": { "function": "not", @@ -920,35 +818,17 @@ def test_not_all_or_all(self): { "args": [ { - "args": [ - { - "variable": var.url - }, - { - "value": [ - 1 - ] - } - ], - "function": "all" + "args": [{"variable": var.url}, {"value": [1]}], + "function": "all", }, { - "args": [ - { - "variable": var.url - }, - { - "value": [ - 2 - ] - } - ], - "function": "all" - } + "args": [{"variable": var.url}, {"value": [2]}], + "function": "all", + }, ], - "function": "or" + "function": "or", } - ] + ], } } @@ -957,18 +837,11 @@ def test_not_all_or_all(self): def test_duplicates(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias.duplicates()') + data = self._exclude_payload(ds, "var1_alias.duplicates()") expected_expr_obj = { - "expression": { - "function": "duplicates", - "args": [ - { - "variable": var.url - } - ] - } + "expression": {"function": "duplicates", "args": [{"variable": var.url}]} } assert data == expected_expr_obj @@ -976,18 +849,11 @@ def test_duplicates(self): def test_valid(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'valid(var1_alias)') + data = self._exclude_payload(ds, "valid(var1_alias)") expected_expr_obj = { - "expression": { - "function": "is_valid", - "args": [ - { - "variable": var.url - } - ] - } + "expression": {"function": "is_valid", "args": [{"variable": var.url}]} } assert data == expected_expr_obj @@ -995,22 +861,13 @@ def test_valid(self): def test_not_valid(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'not valid(var1_alias)') + data = self._exclude_payload(ds, "not valid(var1_alias)") expected_expr_obj = { "expression": { - "args": [ - { - "args": [ - { - "variable": var.url - } - ], - "function": "is_valid" - } - ], - "function": "not" + "args": [{"args": [{"variable": var.url}], "function": "is_valid"}], + "function": "not", } } @@ -1019,18 +876,11 @@ def test_not_valid(self): def test_missing(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'missing(var1_alias)') + data = self._exclude_payload(ds, "missing(var1_alias)") expected_expr_obj = { - "expression": { - "args": [ - { - "variable": var.url - } - ], - "function": "is_missing" - } + "expression": {"args": [{"variable": var.url}], "function": "is_missing"} } assert data == expected_expr_obj @@ -1038,22 +888,13 @@ def test_missing(self): def test_not_missing(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'not missing(var1_alias)') + data = self._exclude_payload(ds, "not missing(var1_alias)") expected_expr_obj = { "expression": { "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": var.url - } - ] - } - ] + "args": [{"function": "is_missing", "args": [{"variable": var.url}]}], } } @@ -1062,20 +903,13 @@ def test_not_missing(self): def test_equal(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - data = self._exclude_payload(ds, 'var1_alias == 1') + data = self._exclude_payload(ds, "var1_alias == 1") expected_expr_obj = { "expression": { - "args": [ - { - "variable": var.url - }, - { - "value": 1 - } - ], - "function": "==" + "args": [{"variable": var.url}, {"value": 1}], + "function": "==", } } @@ -1083,134 +917,83 @@ def test_equal(self): def test_nested(self): variables = { - '0001': dict( - id='0001', - alias='disposition', - name='Disposition', - type='numeric' + "0001": dict( + id="0001", alias="disposition", name="Disposition", type="numeric" ), - '0002': dict( - id='0002', - alias='exit_status', - name='Exit', - type='numeric' - ) + "0002": dict(id="0002", alias="exit_status", name="Exit", type="numeric"), } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) - var1 = ds['disposition'] - var2 = ds['exit_status'] + var1 = ds["disposition"] + var2 = ds["exit_status"] - data = self._exclude_payload(ds, '(disposition != 1 and (not valid(exit_status) or exit_status >= 1)) or (disposition == 0 and exit_status == 0) or (disposition == 0 and exit_status == 1)') + data = self._exclude_payload( + ds, + "(disposition != 1 and (not valid(exit_status) or exit_status >= 1)) or (disposition == 0 and exit_status == 0) or (disposition == 0 and exit_status == 1)", + ) expected_expr_obj = { "expression": { "args": [ { "args": [ { - "args": [ - { - "variable": var1.url - }, - { - "value": 1 - } - ], - "function": "!=" + "args": [{"variable": var1.url}, {"value": 1}], + "function": "!=", }, { "args": [ { "args": [ { - "args": [ - { - "variable": var2.url - } - ], - "function": "is_valid" + "args": [{"variable": var2.url}], + "function": "is_valid", } ], - "function": "not" + "function": "not", }, { - "args": [ - { - "variable": var2.url - }, - { - "value": 1 - } - ], - "function": ">=" - } + "args": [{"variable": var2.url}, {"value": 1}], + "function": ">=", + }, ], - "function": "or" - } + "function": "or", + }, ], - "function": "and" + "function": "and", }, { "args": [ { "args": [ { - "args": [ - { - "variable": var1.url - }, - { - "value": 0 - } - ], - "function": "==" + "args": [{"variable": var1.url}, {"value": 0}], + "function": "==", }, { - "args": [ - { - "variable": var2.url - }, - { - "value": 0 - } - ], - "function": "==" - } + "args": [{"variable": var2.url}, {"value": 0}], + "function": "==", + }, ], - "function": "and" + "function": "and", }, { "args": [ { - "args": [ - { - "variable": var1.url - }, - { - "value": 0 - } - ], - "function": "==" + "args": [{"variable": var1.url}, {"value": 0}], + "function": "==", }, { - "args": [ - { - "variable": var2.url - }, - { - "value": 1 - } - ], - "function": "==" - } + "args": [{"variable": var2.url}, {"value": 1}], + "function": "==", + }, ], - "function": "and" - } + "function": "and", + }, ], - "function": "or" - } + "function": "or", + }, ], - "function": "or" + "function": "or", } } @@ -1222,41 +1005,37 @@ def test_dict_expr(self): expr = { "args": [ - { - "variable": "http://test.crunch.io/api/datasets/123/variables/0002/" - }, - { - "value": 1 - } + {"variable": "http://test.crunch.io/api/datasets/123/variables/0002/"}, + {"value": 1}, ], - "function": "==" + "function": "==", } data = self._exclude_payload(ds, expr) - expected_expr_obj = {'expression': expr} + expected_expr_obj = {"expression": expr} assert data == expected_expr_obj class TestProtectAttributes(TestDatasetBase, TestCase): - error_msg = 'use the edit() method for mutating attributes' + error_msg = "use the edit() method for mutating attributes" def test_Dataset_attribute_writes(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - assert ds.name == 'test_dataset_name' + assert ds.name == "test_dataset_name" with pytest.raises(AttributeError) as excinfo: - ds.name = 'forbidden' - assert ds.name == 'test_dataset_name' + ds.name = "forbidden" + assert ds.name == "test_dataset_name" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - ds.notes = 'forbidden' - assert ds.notes == '' + ds.notes = "forbidden" + assert ds.notes == "" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - ds.description = 'forbidden' - assert ds.description == '' + ds.description = "forbidden" + assert ds.description == "" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: @@ -1270,42 +1049,42 @@ def test_Dataset_attribute_writes(self): assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - ds.end_date = 'forbidden' + ds.end_date = "forbidden" assert ds.end_date is None assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - ds.start_date = 'forbidden' + ds.start_date = "forbidden" assert ds.start_date is None assert str(excinfo.value) == self.error_msg def test_Variable_attribute_writes(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] with pytest.raises(AttributeError) as excinfo: - var.name = 'forbidden' - assert var.name == 'var1_name' + var.name = "forbidden" + assert var.name == "var1_name" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - var.description = 'forbidden' - assert var.description == '' + var.description = "forbidden" + assert var.description == "" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - var.notes = 'forbidden' - assert var.notes == '' + var.notes = "forbidden" + assert var.notes == "" assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - var.format = 'forbidden' + var.format = "forbidden" assert var.format is None assert str(excinfo.value) == self.error_msg with pytest.raises(AttributeError) as excinfo: - var.view = 'forbidden' + var.view = "forbidden" assert var.view is None assert str(excinfo.value) == self.error_msg @@ -1314,60 +1093,65 @@ class TestVariables(TestDatasetBase, TestCase): def test_variable_as_member(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - assert ds.name == self.ds_shoji['body']['name'] - assert ds.id == self.ds_shoji['body']['id'] + assert ds.name == self.ds_shoji["body"]["name"] + assert ds.id == self.ds_shoji["body"]["id"] - assert isinstance(ds['var1_alias'], Variable) + assert isinstance(ds["var1_alias"], Variable) with pytest.raises(ValueError) as err: - ds['some_variable'] - assert str(err.value) == \ - 'Entity %s has no (sub)variable with a name or alias some_variable' % ds.name + ds["some_variable"] + assert ( + str(err.value) + == "Entity %s has no (sub)variable with a name or alias some_variable" + % ds.name + ) with pytest.raises(AttributeError) as err: ds.some_variable - assert str(err.value) == \ - "'StreamingDataset' object has no attribute 'some_variable'" + assert ( + str(err.value) + == "'StreamingDataset' object has no attribute 'some_variable'" + ) def test_variable_cast(self): variable = MagicMock() cast( variable, - type='numeric', - offset='offset', - resolution='resolution', - format='format' + type="numeric", + offset="offset", + resolution="resolution", + format="format", ) call = variable.cast.post.call_args_list[0] - data = json.loads(call[1]['data']) + data = json.loads(call[1]["data"]) assert data == { - 'cast_as': 'numeric', - 'resolution': 'resolution', - 'offset': 'offset', - 'format': 'format' + "cast_as": "numeric", + "resolution": "resolution", + "offset": "offset", + "format": "format", } def test_edit_Variables(self): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - assert var.name == 'var1_name' - changes = dict(name='changed') + assert var.name == "var1_name" + changes = dict(name="changed") var.edit(**changes) - assert var.name == 'changed' + assert var.name == "changed" var.resource._edit.assert_called_with(**changes) - assert var.description == '' - changes = dict(description='changed') + assert var.description == "" + changes = dict(description="changed") var.edit(**changes) - assert var.description == 'changed' + assert var.description == "changed" var.resource._edit.assert_called_with(**changes) - assert var.notes == '' - changes = dict(notes='changed') + assert var.notes == "" + changes = dict(notes="changed") var.edit(**changes) - assert var.notes == 'changed' + assert var.notes == "changed" var.resource._edit.assert_called_with(**changes) assert var.format is None @@ -1385,86 +1169,85 @@ def test_edit_Variables(self): def test_edit_alias(self): ds_mock = self._dataset_mock() ds = BaseDataset(ds_mock) - var = ds['var1_alias'] - with pytest.raises(AttributeError) as e: - var.edit(alias='test1') - ds.resource.body['streaming'] = 'no' - var = ds['var1_alias'] - var.edit(alias='test1') - var2 = ds['var2_alias'] + var = ds["var1_alias"] + with pytest.raises(AttributeError): + var.edit(alias="test1") + ds.resource.body["streaming"] = "no" + var = ds["var1_alias"] + var.edit(alias="test1") def test_edit_resolution(self): variables = { - '001': { - 'id': '001', - 'alias': 'datetime_var', - 'name': 'Datetime Variable', - 'type': 'datetime', - 'is_subvar': False, - 'view': { - 'rollup_resolution': 'ms', - 'width': 10, - } - }, + "001": { + "id": "001", + "alias": "datetime_var", + "name": "Datetime Variable", + "type": "datetime", + "is_subvar": False, + "view": {"rollup_resolution": "ms", "width": 10}, + } } ds_mock = self._dataset_mock(variables=variables) ds = MutableDataset(ds_mock) ds.resource = mock.MagicMock() - var = ds['datetime_var'] - updated_var = var.edit_resolution('M') + var = ds["datetime_var"] + updated_var = var.edit_resolution("M") var.resource._edit.assert_called_with( - view={ - 'rollup_resolution': 'M', - 'width': 10 - } + view={"rollup_resolution": "M", "width": 10} ) - assert updated_var.view['width'] == 10 - assert updated_var.view['rollup_resolution'] == 'M' + assert updated_var.view["width"] == 10 + assert updated_var.view["rollup_resolution"] == "M" def test_add_category(self): ds_mock = self._dataset_mock() ds = BaseDataset(ds_mock) - var = ds['var4_alias'] - var.resource.body['type'] = 'categorical' - var.resource.body['categories'] = [ + var = ds["var4_alias"] + var.resource.body["type"] = "categorical" + var.resource.body["categories"] = [ {"id": 1, "name": "Female", "missing": False, "numeric_value": 1}, {"id": 8, "name": "Male", "missing": False, "numeric_value": 8}, - {"id": 9, "name": "No Data", "missing": True, "numeric_value": 9} + {"id": 9, "name": "No Data", "missing": True, "numeric_value": 9}, ] var.CATEGORICAL_TYPES = { - 'categorical', 'multiple_response', 'categorical_array', + "categorical", + "multiple_response", + "categorical_array", } - var.add_category(2, 'New category', 2, before_id=9) - var.resource._edit.assert_called_with(categories=var.resource.body['categories']) + var.add_category(2, "New category", 2, before_id=9) + var.resource._edit.assert_called_with( + categories=var.resource.body["categories"] + ) def test_add_category_date(self): ds_mock = self._dataset_mock() ds = BaseDataset(ds_mock) - var = ds['var4_alias'] - var.resource.body['type'] = 'categorical' + var = ds["var4_alias"] + var.resource.body["type"] = "categorical" categories = [ {"id": 1, "name": "Female", "missing": False, "numeric_value": 1}, {"id": 8, "name": "Male", "missing": False, "numeric_value": 8}, - {"id": 9, "name": "No Data", "missing": True, "numeric_value": 9} + {"id": 9, "name": "No Data", "missing": True, "numeric_value": 9}, ] - var.resource.body['categories'] = categories[:] + var.resource.body["categories"] = categories[:] with pytest.raises(ValueError) as err: - var.add_category(2, 'New category', 2, date=object()) + var.add_category(2, "New category", 2, date=object()) assert str(err.value) == "Date must be a string" with pytest.raises(ValueError) as err: - var.add_category(2, 'New category', 2, date="invalid date") + var.add_category(2, "New category", 2, date="invalid date") assert str(err.value) == "Date must conform to Y-m-d format" - var.add_category(2, 'New category', 2, date="2021-12-12") - new_categories = categories[:] + [{ - "id": 2, - "name": "New category", - "date": "2021-12-12", - "numeric_value": 2, - "missing": False - }] + var.add_category(2, "New category", 2, date="2021-12-12") + new_categories = categories[:] + [ + { + "id": 2, + "name": "New category", + "date": "2021-12-12", + "numeric_value": 2, + "missing": False, + } + ] var.resource._edit.assert_called_with(categories=new_categories) def test_integrate_variables(self): @@ -1483,19 +1266,20 @@ def getitem(key): assert not var_tuple.entity.edit.called # check we call `edit` for derived variables - body['derived'] = True + body["derived"] = True var.integrate() var_tuple.entity.edit.assert_called_once_with(derived=False) def test_update_missing_rules(self): ds_mock = self._dataset_mock() ds = BaseDataset(ds_mock) - var = ds['var1_alias'] + var = ds["var1_alias"] - assert var.name == 'var1_name' + assert var.name == "var1_name" def put_side_effect(*x, **y): - return AttributeDict({'status_code': 204}) + return AttributeDict({"status_code": 204}) + var._resource.session.put.side_effect = put_side_effect var.set_missing_rules({"skipped": 9, "not asked": 8}) var._resource.session.put.assert_called_once() @@ -1505,18 +1289,15 @@ def put_side_effect(*x, **y): assert kwargs == {} assert args[0] == var._resource.fragments.missing_rules assert json.loads(args[1]) == { - "rules": { - "skipped": {'value': 9}, - "not asked": {'value': 8} - } + "rules": {"skipped": {"value": 9}, "not asked": {"value": 8}} } class TestCurrentEditor(TestDatasetBase, TestCase): - ds_url = 'https://test.crunch.io/api/datasets/123456/' - user_url = 'https://test.crunch.io/api/users/12345/' + ds_url = "https://test.crunch.io/api/datasets/123456/" + user_url = "https://test.crunch.io/api/users/12345/" - @mock.patch('scrunch.datasets.get_user') + @mock.patch("scrunch.datasets.get_user") def test_change_editor_email(self, mocked_get_user): sess = MagicMock() response = MagicMock() @@ -1524,13 +1305,7 @@ def test_change_editor_email(self, mocked_get_user): user.resource.self = self.user_url user.url = self.user_url mocked_get_user.return_value = user - response.payload = { - 'index': { - self.user_url: { - 'email': 'jane.doe@crunch.io' - } - } - } + response.payload = {"index": {self.user_url: {"email": "jane.doe@crunch.io"}}} def _get(*args, **kwargs): return response @@ -1542,25 +1317,23 @@ def _get(*args, **kwargs): ds = StreamingDataset(ds_res) # Change sending user email - ds.change_editor('jane.doe@crunch.io') - ds_res.patch.assert_called_with({ - "element": "shoji:entity", - "body": {'current_editor': self.user_url} - }) + ds.change_editor("jane.doe@crunch.io") + ds_res.patch.assert_called_with( + {"element": "shoji:entity", "body": {"current_editor": self.user_url}} + ) # Change sending user URL ds_res.patch = MagicMock() ds.change_editor(self.user_url) - ds_res.patch.assert_called_with({ - "element": "shoji:entity", - "body": {'current_editor': self.user_url} - }) + ds_res.patch.assert_called_with( + {"element": "shoji:entity", "body": {"current_editor": self.user_url}} + ) class TestCurrentOwner(TestDatasetBase, TestCase): - user_url = 'https://test.crunch.io/api/users/12345/' - user_email = 'test@crunch.com' - project_url = 'https://test.crunch.io/api/projects/12345/' + user_url = "https://test.crunch.io/api/users/12345/" + user_email = "test@crunch.com" + project_url = "https://test.crunch.io/api/projects/12345/" def test_change_owner_exception(self): ds_mock = self._dataset_mock() @@ -1569,7 +1342,7 @@ def test_change_owner_exception(self): ds.change_owner(user=self.user_url, project=self.project_url) assert e.message == "Must provide user or project. Not both" - @mock.patch('scrunch.datasets.get_user') + @mock.patch("scrunch.datasets.get_user") def test_change_owner(self, mocked_get_user): user = MagicMock() user.resource.self = self.user_url @@ -1578,9 +1351,7 @@ def test_change_owner(self, mocked_get_user): ds_mock = self._dataset_mock() ds = StreamingDataset(ds_mock) ds.change_owner(user=user) - ds_mock.patch.assert_called_with({ - 'owner': self.user_url - }) + ds_mock.patch.assert_called_with({"owner": self.user_url}) def _load_dataset(self, dataset_resource): dataset_resource.variables = MagicMock() @@ -1589,30 +1360,32 @@ def _load_dataset(self, dataset_resource): dataset_resource.refresh = MagicMock() def _project(self, session, project_url): - project = Project(Entity(session, **{ - "self": project_url, - "element": "shoji:entity", - "body": { - "name": "Targer project" - } - })) + project = Project( + Entity( + session, + **{ + "self": project_url, + "element": "shoji:entity", + "body": {"name": "Targer project"}, + }, + ) + ) project.move_here = MagicMock() return project def test_move_to_project(self): session = MockSession() - project_url = 'http://host/api/projects/abc/' - dataset_url = 'http://host/api/projects/abc/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + project_url = "http://host/api/projects/abc/" + dataset_url = "http://host/api/projects/abc/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"project": project_url}, }, - "catalogs": { - "project": project_url, - } - }) + ) self._load_dataset(dataset_resource) project = self._project(session, project_url) @@ -1622,18 +1395,17 @@ def test_move_to_project(self): def test_owner_to_project(self): session = MockSession() - project_url = 'http://host/api/projects/abc/' - dataset_url = 'http://host/api/projects/abc/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + project_url = "http://host/api/projects/abc/" + dataset_url = "http://host/api/projects/abc/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"project": project_url}, }, - "catalogs": { - "project": project_url, - } - }) + ) self._load_dataset(dataset_resource) project = self._project(session, project_url) @@ -1641,34 +1413,34 @@ def test_owner_to_project(self): with mock.patch("scrunch.datasets.warn") as mock_warn: dataset.change_owner(project=project) - mock_warn.assert_has_calls([ - mock.call("Use Dataset.move() to move datasets between projects", DeprecationWarning) - ]) + mock_warn.assert_has_calls( + [ + mock.call( + "Use Dataset.move() to move datasets between projects", + DeprecationWarning, + ) + ] + ) project.move_here.assert_called_once_with([dataset]) def test_dataset_project(self): session = MockSession() - project_url = 'http://host/api/projects/abc/' - dataset_url = 'http://host/api/projects/abc/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" - }, - "catalogs": { - "project": project_url, - } - }) + project_url = "http://host/api/projects/abc/" + dataset_url = "http://host/api/projects/abc/" + dataset_resource = Entity( + session, + element="shoji:entity", + self=dataset_url, + body={"name": "test_dataset_project"}, + catalogs={"project": project_url}, + ) self._load_dataset(dataset_resource) project_payload = { "element": "shoji:entity", "self": project_url, - "body": { - "name": "My Project" - } + "body": {"name": "My Project"}, } session.add_fixture(project_url, project_payload) dataset = StreamingDataset(dataset_resource) @@ -1678,8 +1450,7 @@ def test_dataset_project(self): class TestCast(TestCase): - - ds_url = 'http://test.crunch.io/api/datasets/123/' + ds_url = "http://test.crunch.io/api/datasets/123/" def test_cast_not_allowed(self): sess = MagicMock() @@ -1687,54 +1458,44 @@ def test_cast_not_allowed(self): ds_res.views.cast = MagicMock() ds = StreamingDataset(ds_res) with pytest.raises(AssertionError) as excinfo: - ds.cast('var_a', 'not_allowed') - ds_res.resource.session.post.assert_called_with( - {'cast_as': 'not_allowed'}) + ds.cast("var_a", "not_allowed") + ds_res.resource.session.post.assert_called_with({"cast_as": "not_allowed"}) assert str(excinfo.value) == "Cast type not allowed" class TestSavepoints(TestCase): - - ds_url = 'http://test.crunch.io/api/datasets/123/' + ds_url = "http://test.crunch.io/api/datasets/123/" def test_create_savepoint(self): sess = MagicMock() ds_res = MagicMock(session=sess) ds_res.savepoints = MagicMock() ds = StreamingDataset(ds_res) - ds.create_savepoint('savepoint description') - ds_res.savepoints.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'description': 'savepoint description' + ds.create_savepoint("savepoint description") + ds_res.savepoints.create.assert_called_with( + { + "element": "shoji:entity", + "body": {"description": "savepoint description"}, } - }) + ) def test_create_savepoint_keyerror(self): sess = MagicMock() ds_res = MagicMock(session=sess) ds_res.savepoints = MagicMock() - ds_res.savepoints.index = { - 1: { - 'description': 'savepoint description' - } - } + ds_res.savepoints.index = {1: {"description": "savepoint description"}} ds = StreamingDataset(ds_res) with pytest.raises(KeyError): - ds.create_savepoint('savepoint description') + ds.create_savepoint("savepoint description") def test_load_initial_savepoint(self): sess = MagicMock() ds_res = MagicMock(session=sess) ds_res.savepoints = MagicMock() - ds_res.savepoints.index = { - 1: { - 'description': 'savepoint description' - } - } + ds_res.savepoints.index = {1: {"description": "savepoint description"}} ds = StreamingDataset(ds_res) with pytest.raises(KeyError): - ds.create_savepoint('savepoint description') + ds.create_savepoint("savepoint description") def test_load_empty_savepoint(self): sess = MagicMock() @@ -1743,22 +1504,23 @@ def test_load_empty_savepoint(self): ds_res.savepoints.index = {} ds = StreamingDataset(ds_res) with pytest.raises(KeyError): - ds.load_savepoint('savepoint') + ds.load_savepoint("savepoint") class TestForks(TestCase): - ds_url = 'http://test.crunch.io/api/datasets/123/' - user_url = 'https://test.crunch.io/api/users/12345/' + ds_url = "http://test.crunch.io/api/datasets/123/" + user_url = "https://test.crunch.io/api/users/12345/" def test_fork(self): - user_url = "some_user_url" sess = MagicMock() - body = JSONObject({ - 'name': 'ds name', - 'description': 'ds description', - 'owner': 'http://test.crunch.io/api/users/123/', - 'streaming': 'yes', - }) + body = JSONObject( + { + "name": "ds name", + "description": "ds description", + "owner": "http://test.crunch.io/api/users/123/", + "streaming": "yes", + } + ) fork_res = MagicMock() def _create(*args): @@ -1771,34 +1533,38 @@ def _create(*args): ds = StreamingDataset(ds_res) forked_ds = ds.fork(preserve_owner=False) assert isinstance(forked_ds, MutableDataset) - ds_res.forks.create.assert_called_with(as_entity({ - 'name': 'FORK #1 of ds name', - 'description': 'ds description', - 'is_published': False, - })) + ds_res.forks.create.assert_called_with( + as_entity( + { + "name": "FORK #1 of ds name", + "description": "ds description", + "is_published": False, + } + ) + ) def test_fork_preserve_owner(self): - user_id = 'http://test.crunch.io/api/users/123/' + user_id = "http://test.crunch.io/api/users/123/" sess = MagicMock() - body = JSONObject({ - 'name': 'ds name', - 'description': 'ds description', - 'owner': user_id, - }) + body = JSONObject( + {"name": "ds name", "description": "ds description", "owner": user_id} + ) ds_res = MagicMock(session=sess, body=body) ds_res.forks = MagicMock() ds_res.forks.index = {} ds = BaseDataset(ds_res) ds.fork(preserve_owner=True) - ds_res.forks.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'name': 'FORK #1 of ds name', - 'description': 'ds description', - 'owner': user_id, # Owner preserved - 'is_published': False, + ds_res.forks.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "FORK #1 of ds name", + "description": "ds description", + "owner": user_id, # Owner preserved + "is_published": False, + }, } - }) + ) def test_delete_forks(self): f1 = MagicMock() @@ -1807,11 +1573,7 @@ def test_delete_forks(self): sess = MagicMock() ds_res = MagicMock(session=sess) ds_res.forks = MagicMock() - ds_res.forks.index = { - 'abc1': f1, - 'abc2': f2, - 'abc3': f3 - } + ds_res.forks.index = {"abc1": f1, "abc2": f2, "abc3": f3} ds = BaseDataset(ds_res) ds.delete_forks() @@ -1820,37 +1582,39 @@ def test_delete_forks(self): assert f2.entity.delete.call_count == 1 assert f3.entity.delete.call_count == 1 - @pytest.mark.skipif(pandas is None, - reason='pandas is not installed') + @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_forks_dataframe(self): f1 = dict( - name='name', - description='description', + name="name", + description="description", is_published=True, - owner_name='Jane Doe', - current_editor_name='John Doe', - creation_time='2016-01-01T00:00Z', - modification_time='2016-01-01T00:00Z', - id='abc123', + owner_name="Jane Doe", + current_editor_name="John Doe", + creation_time="2016-01-01T00:00Z", + modification_time="2016-01-01T00:00Z", + id="abc123", ) sess = MagicMock() ds_res = MagicMock(session=sess) ds_res.forks = MagicMock() - ds_res.forks.index = { - 'abc1': f1 - } + ds_res.forks.index = {"abc1": f1} ds = BaseDataset(ds_res) df = ds.forks_dataframe() assert isinstance(df, DataFrame) keys = [k for k in df.keys()] assert keys == [ - 'name', 'description', 'is_published', 'owner_name', - 'current_editor_name', 'creation_time', 'modification_time', 'id' + "name", + "description", + "is_published", + "owner_name", + "current_editor_name", + "creation_time", + "modification_time", + "id", ] - @pytest.mark.skipif(pandas is None, - reason='pandas is not installed') + @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_forks_dataframe_empty(self): sess = MagicMock() ds_res = MagicMock(session=sess) @@ -1862,8 +1626,7 @@ def test_forks_dataframe_empty(self): assert df is None - @pytest.mark.skipif(pandas is not None, - reason='pandas is installed') + @pytest.mark.skipif(pandas is not None, reason="pandas is installed") def test_forks_no_pandas(self): sess = MagicMock() ds_res = MagicMock(session=sess) @@ -1871,45 +1634,35 @@ def test_forks_no_pandas(self): ds_res.forks.index = {} ds = BaseDataset(ds_res) - with pytest.raises(ImportError) as err: + with pytest.raises(ImportError): ds.forks_dataframe() def test_merge_fork(self): - fork1_url = 'http://test.crunch.io/api/datasets/abc/' - fork2_url = 'http://test.crunch.io/api/datasets/def/' - fork3_url = 'http://test.crunch.io/api/datasets/ghi/' + fork1_url = "http://test.crunch.io/api/datasets/abc/" + fork2_url = "http://test.crunch.io/api/datasets/def/" + fork3_url = "http://test.crunch.io/api/datasets/ghi/" sess = MagicMock() - body = JSONObject({ - 'name': 'ds name', - 'id': 'xyz', - 'description': 'ds description', - 'owner': 'http://test.crunch.io/api/users/123/' - }) + body = JSONObject( + { + "name": "ds name", + "id": "xyz", + "description": "ds description", + "owner": "http://test.crunch.io/api/users/123/", + } + ) ds_res = MagicMock(session=sess, body=body) ds_res.forks.index = { - fork1_url: { - 'name': 'FORK #1 of ds name', - 'id': 'abc' - }, - fork2_url: { - 'name': 'myFork', - 'id': 'def', - }, - fork3_url: { - 'name': 'myFork', - 'id': 'ghi', - } + fork1_url: {"name": "FORK #1 of ds name", "id": "abc"}, + fork2_url: {"name": "myFork", "id": "def"}, + fork3_url: {"name": "myFork", "id": "ghi"}, } - fork_url = 'http://test.crunch.io/api/datasets/123/actions/' + fork_url = "http://test.crunch.io/api/datasets/123/actions/" ds_res.actions.self = fork_url ds = BaseDataset(ds_res) expected_call = { - 'element': 'shoji:entity', - 'body': { - 'dataset': fork1_url, - 'autorollback': True, - } + "element": "shoji:entity", + "body": {"dataset": fork1_url, "autorollback": True}, } ds.merge(1) # number as int @@ -1922,107 +1675,88 @@ def assert_expected(call): arbitrary ordering of dict keys. See #201. """ args, kwargs = call[1:3] - url, = args - data = kwargs['data'] + (url,) = args + data = kwargs["data"] assert url == fork_url assert json.loads(data) == expected_call assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() - ds.merge('1') # number as str + ds.merge("1") # number as str assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() - ds.merge('FORK #1 of ds name') # name + ds.merge("FORK #1 of ds name") # name assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() - ds.merge('abc') # id + ds.merge("abc") # id assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() # test autorollback=False - expected_call['body']['autorollback'] = False + expected_call["body"]["autorollback"] = False ds.merge(1, autorollback=False) # number as int assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() # ValueError if no unique fork could be found - error_msg = "Couldn't find a (unique) fork. " \ - "Please try again using its id" + error_msg = "Couldn't find a (unique) fork. " "Please try again using its id" with pytest.raises(ValueError) as excinfo: - ds.merge('myFork') + ds.merge("myFork") assert str(excinfo.value) == error_msg - expected_call['body']['dataset'] = fork2_url - expected_call['body']['autorollback'] = True + expected_call["body"]["dataset"] = fork2_url + expected_call["body"]["autorollback"] = True - ds.merge('def') + ds.merge("def") assert_expected(ds_res.session.post.mock_calls[0]) ds_res.reset_mock() - expected_call['body']['dataset'] = fork3_url - ds.merge('ghi') + expected_call["body"]["dataset"] = fork3_url + ds.merge("ghi") assert_expected(ds_res.session.post.mock_calls[0]) class TestFillVariables(TestCase): def prepare_ds(self): session = MockSession() - dataset_url = 'http://host/api/projects/abc/' + dataset_url = "http://host/api/projects/abc/" variables_url = "http://host/api/projects/abc/variables/" new_var_url = "http://host/api/projects/abc/variables/123/" table_url = "http://host/api/projects/abc/table/?limit=0" - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"variables": variables_url, "table": table_url}, }, - "catalogs": { - "variables": variables_url, - "table": table_url - } - }) + ) variables_payload = { "element": "shoji:entity", "self": variables_url, "index": { - "001": { - "alias": "var_a", - "type": "categorical", - "id": "001", - }, - "002": { - "alias": "var_b", - "type": "categorical", - "id": "002", - }, - new_var_url: { - "alias": "filled", - "type": "categorical", - "id": "123" - } - } + "001": {"alias": "var_a", "type": "categorical", "id": "001"}, + "002": {"alias": "var_b", "type": "categorical", "id": "002"}, + new_var_url: {"alias": "filled", "type": "categorical", "id": "123"}, + }, } table_payload = { "element": "crunch:table", "self": table_url, "metadata": variables_payload["index"], - "data": {} + "data": {}, } new_var_payload = { "element": "shoji:entity", "self": new_var_url, - "body": { - "name": "Filled var", - "alias": "filled", - "type": "categorical" - } + "body": {"name": "Filled var", "alias": "filled", "type": "categorical"}, } session.add_fixture(variables_url, variables_payload) session.add_fixture(table_url, table_payload) @@ -2030,9 +1764,7 @@ def prepare_ds(self): response = Response() response.status_code = 201 - response.headers = { - "Location": new_var_url - } + response.headers = {"Location": new_var_url} session.add_post_response(response) # Mocks that aren't relevant for the test @@ -2045,7 +1777,7 @@ def test_recode_w_fill(self): ds, session = self.prepare_ds() responses = [ {"case": "var_a == 1", "variable": "var_a"}, - {"case": "var_b == 1", "variable": "var_b"} + {"case": "var_b == 1", "variable": "var_b"}, ] # This is what we want to test for! @@ -2064,36 +1796,42 @@ def test_recode_w_fill(self): "class": "categorical", "ordinal": False, "categories": [ - {"id": 1, "missing": False, "name": "1", "numeric_value": None}, - {"id": 2, "missing": False, "name": "2", "numeric_value": None} - ]} + { + "id": 1, + "missing": False, + "name": "1", + "numeric_value": None, + }, + { + "id": 2, + "missing": False, + "name": "2", + "numeric_value": None, + }, + ], + }, }, { "function": "==", "args": [ {"variable": "http://host/api/projects/abc/variables/001/"}, - {"value": 1} - ] + {"value": 1}, + ], }, { "function": "==", "args": [ {"variable": "http://host/api/projects/abc/variables/002/"}, - {"value": 1} - ] - } - ] + {"value": 1}, + ], + }, + ], } fill_expr = { "function": "fill", "args": [ case_expr, - { - "map": { - "1": {"variable": "001"}, - "2": {"variable": "002"} - } - } + {"map": {"1": {"variable": "001"}, "2": {"variable": "002"}}}, ], } result = json.loads(post_request.body) @@ -2103,7 +1841,7 @@ def test_recode_w_fill(self): "alias": "filled", "derivation": fill_expr, "name": "Filled var", - "description": "" + "description": "", }, } self.assertEqual(result, expected) @@ -2112,7 +1850,7 @@ def test_else_code(self): ds, session = self.prepare_ds() responses = [ {"case": "var_a == 1", "variable": "var_a"}, - {"case": "else", "missing": True, "name": "Not Asked", "id": 99} + {"case": "else", "missing": True, "name": "Not Asked", "id": 99}, ] # This is what we want to test for! @@ -2131,45 +1869,52 @@ def test_else_code(self): "class": "categorical", "ordinal": False, "categories": [ - {"id": 1, "missing": False, "name": "1", "numeric_value": None}, - {"id": 99, "missing": True, "name": "Not Asked", "numeric_value": None} - ]} + { + "id": 1, + "missing": False, + "name": "1", + "numeric_value": None, + }, + { + "id": 99, + "missing": True, + "name": "Not Asked", + "numeric_value": None, + }, + ], + }, }, { "function": "==", "args": [ {"variable": "http://host/api/projects/abc/variables/001/"}, - {"value": 1} - ] + {"value": 1}, + ], }, - ] + ], } fill_expr = { "function": "fill", - "args": [ - case_expr, - { - "map": { - "1": {"variable": "001"}, - } - } - ], + "args": [case_expr, {"map": {"1": {"variable": "001"}}}], } - self.assertEqual(json.loads(post_request.body), { - "element": "shoji:entity", - "body": { - "alias": "filled", - "derivation": fill_expr, - "name": "Filled var", - "description": "" + self.assertEqual( + json.loads(post_request.body), + { + "element": "shoji:entity", + "body": { + "alias": "filled", + "derivation": fill_expr, + "name": "Filled var", + "description": "", + }, }, - }) + ) def test_else_var(self): ds, session = self.prepare_ds() responses = [ {"case": "var_a == 1", "variable": "var_a"}, - {"case": "else", "variable": "var_b"} + {"case": "else", "variable": "var_b"}, ] # This is what we want to test for! @@ -2188,505 +1933,664 @@ def test_else_var(self): "class": "categorical", "ordinal": False, "categories": [ - {"id": 1, "missing": False, "name": "1", - "numeric_value": None}, - ]} + { + "id": 1, + "missing": False, + "name": "1", + "numeric_value": None, + } + ], + }, }, { "function": "==", "args": [ {"variable": "http://host/api/projects/abc/variables/001/"}, - {"value": 1} - ] + {"value": 1}, + ], }, - ] + ], } fill_expr = { "function": "fill", "args": [ case_expr, - { - "map": { - "1": {"variable": "001"}, - "-1": {"variable": "002"} - } - } + {"map": {"1": {"variable": "001"}, "-1": {"variable": "002"}}}, ], } - self.assertEqual(json.loads(post_request.body), { - "element": "shoji:entity", - "body": { - "alias": "filled", - "derivation": fill_expr, - "name": "Filled var", - "description": "" + self.assertEqual( + json.loads(post_request.body), + { + "element": "shoji:entity", + "body": { + "alias": "filled", + "derivation": fill_expr, + "name": "Filled var", + "description": "", + }, }, - }) + ) class TestRecode(TestDatasetBase): def test_recode_single_categorical(self): variables = { - '001': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "001": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, }, - '002': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False + "002": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, }, - '003': { - 'id': '003', - 'alias': 'var_c', - 'name': 'Variable C', - 'type': 'categorical', - 'is_subvar': False + "003": { + "id": "003", + "alias": "var_c", + "name": "Variable C", + "type": "categorical", + "is_subvar": False, }, - '004': { - 'id': '004', - 'alias': 'gender', - 'name': 'Gender', - 'type': 'categorical', - 'is_subvar': False + "004": { + "id": "004", + "alias": "gender", + "name": "Gender", + "type": "categorical", + "is_subvar": False, }, - '005': { - 'id': '005', - 'alias': 'age', - 'name': 'Age', - 'type': 'categorical', - 'is_subvar': False + "005": { + "id": "005", + "alias": "age", + "name": "Age", + "type": "categorical", + "is_subvar": False, }, } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) responses = [ - {'id': 1, 'name': 'Facebook', 'case': 'var_a > 5'}, - {'id': 2, 'name': 'Twitter', - 'case': 'var_b < 10 and var_c in (1, 2, 3)'}, - {'id': 3, 'name': 'Google+', - 'case': '(gender == 1) and (age >= 16 and age <= 24)'}, + {"id": 1, "name": "Facebook", "case": "var_a > 5"}, + {"id": 2, "name": "Twitter", "case": "var_b < 10 and var_c in (1, 2, 3)"}, + { + "id": 3, + "name": "Google+", + "case": "(gender == 1) and (age >= 16 and age <= 24)", + }, ] with pytest.raises(ValueError) as err: - ds.create_categorical(responses, alias='cat', name='My cat', multiple=False) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) - ds.resource.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'description': '', - 'notes': '', - 'alias': 'cat', - 'name': 'My cat', - 'expr': { - 'function': 'case', - 'args': [{ - 'column': [1, 2, 3, -1], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'missing': False, 'id': 1, 'name': 'Facebook', 'numeric_value': None}, - {'missing': False, 'id': 2, 'name': 'Twitter', 'numeric_value': None}, - {'missing': False, 'id': 3, 'name': 'Google+', 'numeric_value': None}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ] - } - } - }, { - 'function': '>', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/001/'}, - {'value': 5} - ] - }, { - 'function': 'and', - 'args': [{ - 'function': '<', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/002/'}, - {'value': 10} - ]}, { - 'function': 'in', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/003/'}, - {'value': [1, 2, 3]} - ] - }] - }, { - 'function': 'and', - 'args': [{ - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/004/'}, - {'value': 1} - ] - }, { - 'function': 'and', - 'args': [{ - 'function': '>=', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/005/'}, - {'value': 16} - ] - }, { - 'function': '<=', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/005/'}, - {'value': 24} - ] - }] - }] - }] - } - }, - }) + ds.create_categorical(responses, alias="cat", name="My cat", multiple=False) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "description": "", + "notes": "", + "alias": "cat", + "name": "My cat", + "expr": { + "function": "case", + "args": [ + { + "column": [1, 2, 3, -1], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "missing": False, + "id": 1, + "name": "Facebook", + "numeric_value": None, + }, + { + "missing": False, + "id": 2, + "name": "Twitter", + "numeric_value": None, + }, + { + "missing": False, + "id": 3, + "name": "Google+", + "numeric_value": None, + }, + { + "numeric_value": None, + "missing": True, + "id": -1, + "name": "No Data", + }, + ], + } + }, + }, + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/001/" + }, + {"value": 5}, + ], + }, + { + "function": "and", + "args": [ + { + "function": "<", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/002/" + }, + {"value": 10}, + ], + }, + { + "function": "in", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/003/" + }, + {"value": [1, 2, 3]}, + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/004/" + }, + {"value": 1}, + ], + }, + { + "function": "and", + "args": [ + { + "function": ">=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/005/" + }, + {"value": 16}, + ], + }, + { + "function": "<=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/005/" + }, + {"value": 24}, + ], + }, + ], + }, + ], + }, + ], + }, + }, + } + ) def test_recode_multiple_response(self): variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, }, - 'var_b': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False + "var_b": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, }, - 'var_c': { - 'id': '003', - 'alias': 'var_c', - 'name': 'Variable C', - 'type': 'categorical', - 'is_subvar': False + "var_c": { + "id": "003", + "alias": "var_c", + "name": "Variable C", + "type": "categorical", + "is_subvar": False, }, - 'gender': { - 'id': '004', - 'alias': 'gender', - 'name': 'Gender', - 'type': 'categorical', - 'is_subvar': False + "gender": { + "id": "004", + "alias": "gender", + "name": "Gender", + "type": "categorical", + "is_subvar": False, }, - 'age': { - 'id': '005', - 'alias': 'age', - 'name': 'Age', - 'type': 'categorical', - 'is_subvar': False + "age": { + "id": "005", + "alias": "age", + "name": "Age", + "type": "categorical", + "is_subvar": False, }, } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) responses = [ - {'id': 1, 'name': 'Facebook', 'case': 'var_a > 5'}, - {'id': 2, 'name': 'Twitter', 'case': 'var_b < 10 and var_c in (1, 2, 3)'}, - {'id': 3, 'name': 'Google+', 'case': '(gender == 1) and (age >= 16 and age <= 24)'}, + {"id": 1, "name": "Facebook", "case": "var_a > 5"}, + {"id": 2, "name": "Twitter", "case": "var_b < 10 and var_c in (1, 2, 3)"}, + { + "id": 3, + "name": "Google+", + "case": "(gender == 1) and (age >= 16 and age <= 24)", + }, ] with pytest.raises(ValueError) as err: - ds.create_categorical(responses, alias='mr', name='my mr', multiple=True) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) - ds.resource.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'alias': 'mr', - 'description': '', - 'notes': '', - 'name': 'my mr', - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [{ - 'map': { - '0001': { - 'function': 'case', - 'references': { - 'name': 'Facebook', - 'alias': 'mr_1', - }, - 'args': [{ - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'numeric_value': None, 'selected': True, 'id': 1, 'name': 'Selected', 'missing': False}, - {'numeric_value': None, 'selected': False, 'id': 2, 'name': 'Not selected', 'missing': False} - ] - } - } - }, { - # 'var_a > 5' - 'function': '>', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_a']['id']}, - {'value': 5} - ] - }] - }, - '0002': { - 'function': 'case', - 'references': { - 'alias': 'mr_2', - 'name': 'Twitter', - }, - 'args': [{ - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'numeric_value': None, 'selected': True, 'id': 1, 'name': 'Selected', 'missing': False}, - {'numeric_value': None, 'selected': False, 'id': 2, 'name': 'Not selected', 'missing': False} - ] - } - } - }, { - # 'var_b < 10 and var_c in (1, 2, 3)'} - 'function': 'and', - 'args': [{ - 'function': '<', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_b']['id']}, - {'value': 10} - ] - }, { - 'function': 'in', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_c']['id']}, - {'value': [1, 2, 3]} - ] - }] - }] - }, - '0003': { - 'function': 'case', - 'references': { - 'alias': 'mr_3', - 'name': 'Google+', - }, - 'args': [{ - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'numeric_value': None, 'selected': True, 'id': 1, 'name': 'Selected', 'missing': False}, - {'numeric_value': None, 'selected': False, 'id': 2, 'name': 'Not selected', 'missing': False} - ] - } - } - }, { - # '(gender == 1) and (age >= 16 and age <= 24)' - 'function': 'and', - 'args': [{ - 'function': '==', - 'args': [{'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['gender']['id']}, {'value': 1}] - }, { - 'function': 'and', - 'args': [{ - 'function': '>=', - 'args': [{'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['age']['id']}, {'value': 16}] - }, { - 'function': '<=', - 'args': [{'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['age']['id']}, {'value': 24}] - }] - }] - }] - } - } - }, { - 'value': [ - '0001', - '0002', - '0003' - ] - }] - }] - } - } - }) - - def test_create_categorical_missing_case_dups(self): - ds_mock = self._dataset_mock() - ds = StreamingDataset(ds_mock) + ds.create_categorical(responses, alias="mr", name="my mr", multiple=True) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "alias": "mr", + "description": "", + "notes": "", + "name": "my mr", + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "function": "case", + "references": { + "name": "Facebook", + "alias": "mr_1", + }, + "args": [ + { + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "numeric_value": None, + "selected": True, + "id": 1, + "name": "Selected", + "missing": False, + }, + { + "numeric_value": None, + "selected": False, + "id": 2, + "name": "Not selected", + "missing": False, + }, + ], + } + }, + }, + { + # 'var_a > 5' + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_a"][ + "id" + ] + }, + {"value": 5}, + ], + }, + ], + }, + "0002": { + "function": "case", + "references": { + "alias": "mr_2", + "name": "Twitter", + }, + "args": [ + { + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "numeric_value": None, + "selected": True, + "id": 1, + "name": "Selected", + "missing": False, + }, + { + "numeric_value": None, + "selected": False, + "id": 2, + "name": "Not selected", + "missing": False, + }, + ], + } + }, + }, + { + # 'var_b < 10 and var_c in (1, 2, 3)'} + "function": "and", + "args": [ + { + "function": "<", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables[ + "var_b" + ]["id"] + }, + {"value": 10}, + ], + }, + { + "function": "in", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables[ + "var_c" + ]["id"] + }, + { + "value": [ + 1, + 2, + 3, + ] + }, + ], + }, + ], + }, + ], + }, + "0003": { + "function": "case", + "references": { + "alias": "mr_3", + "name": "Google+", + }, + "args": [ + { + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "numeric_value": None, + "selected": True, + "id": 1, + "name": "Selected", + "missing": False, + }, + { + "numeric_value": None, + "selected": False, + "id": 2, + "name": "Not selected", + "missing": False, + }, + ], + } + }, + }, + { + # '(gender == 1) and (age >= 16 and age <= 24)' + "function": "and", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables[ + "gender" + ]["id"] + }, + {"value": 1}, + ], + }, + { + "function": "and", + "args": [ + { + "function": ">=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables[ + "age" + ]["id"] + }, + { + "value": 16 + }, + ], + }, + { + "function": "<=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables[ + "age" + ]["id"] + }, + { + "value": 24 + }, + ], + }, + ], + }, + ], + }, + ], + }, + } + }, + {"value": ["0001", "0002", "0003"]}, + ], + } + ], + }, + }, + } + ) + + def test_create_categorical_missing_case_dups(self): + ds_mock = self._dataset_mock() + ds = StreamingDataset(ds_mock) kwargs = { - 'name': 'my mr', - 'alias': 'mr', - 'multiple': True, - 'missing_case': 'var_b == 0', - 'categories': [ + "name": "my mr", + "alias": "mr", + "multiple": True, + "missing_case": "var_b == 0", + "categories": [ { - 'id': 1, - 'name': 'Facebook', - 'case': 'var_a == 1', - 'missing_case': 'var_b == 0', + "id": 1, + "name": "Facebook", + "case": "var_a == 1", + "missing_case": "var_b == 0", } - ] + ], } with pytest.raises(ValueError) as err: ds.create_categorical(**kwargs) - assert 'missing_case as an argument and as element of "categories" is not allowed' in str(err.value) + assert ( + 'missing_case as an argument and as element of "categories" is not allowed' + in str(err.value) + ) def test_create_categorical_else_case(self): variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, + }, + "var_b": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "numeric", + "is_subvar": False, }, - 'var_b': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'numeric', - 'is_subvar': False - } } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) kwargs = { - 'name': 'Age Range', - 'alias': 'agerange', - 'multiple': False, - 'categories': [ - {'id': 1, 'name': 'Facebook', 'case': 'var_a == 1'}, - {'id': 2, 'name': 'Twitter', 'case': 'var_b == 1'}, - {'id': 3, 'name': 'The rest', 'case': 'else'}, - ] + "name": "Age Range", + "alias": "agerange", + "multiple": False, + "categories": [ + {"id": 1, "name": "Facebook", "case": "var_a == 1"}, + {"id": 2, "name": "Twitter", "case": "var_b == 1"}, + {"id": 3, "name": "The rest", "case": "else"}, + ], } with pytest.raises(ValueError) as err: ds.create_categorical(**kwargs) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) - ds.resource.variables.create.assert_called_with({ - "element": "shoji:entity", - "body": { - "alias": "agerange", - "name": "Age Range", - "expr": { - "function": "case", - "args": [ - { - "column": [1, 2, 3, -1], - "type": { - "value": { - "class": "categorical", - "categories": [ - { - "id": 1, - "name": "Facebook", - "numeric_value": None, - "missing": False - }, - { - "id": 2, - "name": "Twitter", - "numeric_value": None, - "missing": False - }, - { - "id": 3, - "name": "The rest", - "numeric_value": None, - "missing": False - }, - { - "id": -1, - "name": "No Data", - "numeric_value": None, - "missing": True - } - ] - } - } - }, - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/var_a/" - }, - { - "value": 1 - } - ] - }, - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/var_b/" - }, - { - "value": 1 - } - ] - }, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/var_a/" - }, - { - "value": 1 - } - ] - } - ] + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "alias": "agerange", + "name": "Age Range", + "expr": { + "function": "case", + "args": [ + { + "column": [1, 2, 3, -1], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "id": 1, + "name": "Facebook", + "numeric_value": None, + "missing": False, + }, + { + "id": 2, + "name": "Twitter", + "numeric_value": None, + "missing": False, + }, + { + "id": 3, + "name": "The rest", + "numeric_value": None, + "missing": False, + }, + { + "id": -1, + "name": "No Data", + "numeric_value": None, + "missing": True, + }, + ], + } }, - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/var_b/" - }, - { - "value": 1 - } - ] - } - ] - } - ] - } - ] + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/var_a/" + }, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/var_b/" + }, + {"value": 1}, + ], + }, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/var_a/" + }, + {"value": 1}, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/var_b/" + }, + {"value": 1}, + ], + } + ], + }, + ], + }, + ], + }, + "description": "", + "notes": "", }, - "description": "", - "notes": "" } - }) + ) def test_create_2_multiple_response_else_case(self): variables = { - 'age': { - 'id': '001', - 'alias': 'age', - 'name': 'Age', - 'type': 'numeric', - 'is_subvar': False + "age": { + "id": "001", + "alias": "age", + "name": "Age", + "type": "numeric", + "is_subvar": False, } } ds_mock = self._dataset_mock(variables=variables) @@ -2694,15 +2598,15 @@ def test_create_2_multiple_response_else_case(self): with pytest.raises(ValueError) as err: ds.create_categorical( categories=[ - {'id': 1, 'name': '20', 'case': 'age == 20'}, - {'id': 2, 'name': '50', 'case': 'age == 50'}, - {'id': 3, 'name': 'The rest', 'case': 'else'}, + {"id": 1, "name": "20", "case": "age == 20"}, + {"id": 2, "name": "50", "case": "age == 50"}, + {"id": 3, "name": "The rest", "case": "else"}, ], - alias='agerange_multi', - name='Age range multi', - multiple=True + alias="agerange_multi", + name="Age range multi", + multiple=True, ) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) categories_arg = { "column": [1, 2], "type": { @@ -2714,174 +2618,170 @@ def test_create_2_multiple_response_else_case(self): "name": "Selected", "missing": False, "numeric_value": None, - "selected": True + "selected": True, }, { "id": 2, "name": "Not selected", "missing": False, "numeric_value": None, - "selected": False - } - ] + "selected": False, + }, + ], } - } + }, } - ds.resource.variables.create.assert_called_with({ - "element": "shoji:entity", - "body": { - "name": "Age range multi", - "alias": "agerange_multi", - "description": "", - "notes": "", - "derivation": { - "function": "array", - "args": [ - { - "function": "make_frame", - "args": [ - { - "map": { - "0001": { - "references": { - "name": "20", - "alias": "agerange_multi_1" + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "Age range multi", + "alias": "agerange_multi", + "description": "", + "notes": "", + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "references": { + "name": "20", + "alias": "agerange_multi_1", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 20}, + ], + }, + ], }, - "function": "case", - "args": [ - categories_arg, - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 20 - } - ] - } - ] - }, - "0002": { - "references": { - "name": "50", - "alias": "agerange_multi_2" + "0002": { + "references": { + "name": "50", + "alias": "agerange_multi_2", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 50}, + ], + }, + ], }, - "function": "case", - "args": [ - categories_arg, - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 50 - } - ] - } - ] - }, - "0003": { - "references": { - "name": "The rest", - "alias": "agerange_multi_3" + "0003": { + "references": { + "name": "The rest", + "alias": "agerange_multi_3", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 20 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 50 + }, + ], + } + ], + }, + ], + }, + ], }, - "function": "case", - "args": [ - categories_arg, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 20 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 50 - } - ] - } - ] - } - ] - } - ] } - } - }, - { - "value": ["0001", "0002", "0003"] - } - ] - } - ] - } + }, + {"value": ["0001", "0002", "0003"]}, + ], + } + ], + }, + }, } - }) + ) def test_create_3_multiple_response_else_case(self): variables = { - 'age': { - 'id': '001', - 'alias': 'age', - 'name': 'Age', - 'type': 'numeric', - 'is_subvar': False + "age": { + "id": "001", + "alias": "age", + "name": "Age", + "type": "numeric", + "is_subvar": False, } } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) with pytest.raises(ValueError) as err: ds.create_categorical( - categories = [ + categories=[ { - 'id': 1, - 'name': '21', - 'case': 'age == 21', - 'missing_case': 'missing(age)' + "id": 1, + "name": "21", + "case": "age == 21", + "missing_case": "missing(age)", }, { - 'id': 2, - 'name': '51', - 'case': 'age == 51', - 'missing_case': 'missing(age)' + "id": 2, + "name": "51", + "case": "age == 51", + "missing_case": "missing(age)", }, { - 'id': 3, - 'name': 'The rest', - 'case': 'else', - 'missing_case': 'age > 100' + "id": 3, + "name": "The rest", + "case": "else", + "missing_case": "age > 100", }, ], - alias='agerange_multi3', - name='Age range multi3', - multiple=True + alias="agerange_multi3", + name="Age range multi3", + multiple=True, ) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) column_args = { "column": [1, 2, -1], "type": { @@ -2893,376 +2793,362 @@ def test_create_3_multiple_response_else_case(self): "numeric_value": None, "selected": True, "id": 1, - "name": "Selected" + "name": "Selected", }, { "missing": False, "numeric_value": None, "selected": False, "id": 2, - "name": "Not Selected" + "name": "Not Selected", }, { "missing": True, "numeric_value": None, "selected": False, "id": -1, - "name": "No Data" - } - ] + "name": "No Data", + }, + ], } - } + }, } - ds.resource.variables.create.assert_called_with({ - "element": "shoji:entity", - "body": { - "name": "Age range multi3", - "alias": "agerange_multi3", - "description": "", - "notes": "", - "uniform_basis": False, - "derivation": { - "function": "array", - "args": [ - { - "function": "make_frame", - "args": [ - { - "map": { - "0001": { - "references": { - "name": "21", - "alias": "agerange_multi3_1" - }, - "function": "case", - "args": [ - column_args, - { - "function": "and", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 21 - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] - }, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 21 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "Age range multi3", + "alias": "agerange_multi3", + "description": "", + "notes": "", + "uniform_basis": False, + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "references": { + "name": "21", + "alias": "agerange_multi3_1", }, - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - }, - "0002": { - "references": { - "name": "51", - "alias": "agerange_multi3_2" + "function": "case", + "args": [ + column_args, + { + "function": "and", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 21}, + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 21 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + }, + ], }, - "function": "case", - "args": [ - column_args, - { - "function": "and", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 51 - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] - }, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 51 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] + "0002": { + "references": { + "name": "51", + "alias": "agerange_multi3_2", }, - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - }, - "0003": { - "references": { - "name": "The rest", - "alias": "agerange_multi3_3" + "function": "case", + "args": [ + column_args, + { + "function": "and", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 51}, + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 51 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + }, + ], }, - "function": "case", - "args": [ - column_args, - { - "function": "and", - "args": [ - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 21 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 51 - } - ] - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": ">", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 100 - } - ] - } - ] - } - ] - }, - { - "function": "and", - "args": [ - { - "function": "or", - "args": [ - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 21 - } - ] - }, - { - "function": "==", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 51 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": ">", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 100 - } - ] - } - ] - } - ] + "0003": { + "references": { + "name": "The rest", + "alias": "agerange_multi3_3", }, - { - "function": ">", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 100 - } - ] - } - ] + "function": "case", + "args": [ + column_args, + { + "function": "and", + "args": [ + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 21 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 51 + }, + ], + } + ], + }, + ], + }, + { + "function": "not", + "args": [ + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 100 + }, + ], + } + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "or", + "args": [ + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 21 + }, + ], + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 51 + }, + ], + }, + ], + }, + { + "function": "not", + "args": [ + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 100 + }, + ], + } + ], + }, + ], + }, + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 100}, + ], + }, + ], + }, } - } - }, - { - "value": ["0001", "0002", "0003"] - } - ] - } - ] - } + }, + {"value": ["0001", "0002", "0003"]}, + ], + } + ], + }, + }, } - }) + ) def test_create_categorical_missing_case(self): variables = { - 'age': { - 'id': '001', - 'alias': 'age', - 'name': 'Age', - 'type': 'numeric', - 'is_subvar': False + "age": { + "id": "001", + "alias": "age", + "name": "Age", + "type": "numeric", + "is_subvar": False, } } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) kwargs = { - 'name': 'age2', - 'alias': 'age2', - 'multiple': True, - 'missing_case': 'missing(age)', - 'categories': [ - { - 'id': 1, - 'name': '40+', - 'case': 'age > 40', - }, - { - 'id': 2, - 'name': '40-', - 'case': 'age <= 40', - } - ] + "name": "age2", + "alias": "age2", + "multiple": True, + "missing_case": "missing(age)", + "categories": [ + {"id": 1, "name": "40+", "case": "age > 40"}, + {"id": 2, "name": "40-", "case": "age <= 40"}, + ], } with pytest.raises(ValueError) as err: ds.create_categorical(**kwargs) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) column_args = { "column": [1, 2, -1], "type": { @@ -3274,447 +3160,516 @@ def test_create_categorical_missing_case(self): "numeric_value": None, "selected": True, "id": 1, - "name": "Selected" + "name": "Selected", }, { "missing": False, "numeric_value": None, "selected": False, "id": 2, - "name": "Not Selected" + "name": "Not Selected", }, { "missing": True, "numeric_value": None, "selected": False, "id": -1, - "name": "No Data" - } - ] + "name": "No Data", + }, + ], } - } + }, } - ds.resource.variables.create.assert_called_with({ - "element": "shoji:entity", - "body": { - "name": "age2", - "alias": "age2", - "description": "", - "notes": "", - "uniform_basis": False, - "derivation": { - "function": "array", - "args": [ - { - "function": "make_frame", - "args": [ - { - "map": { - "0001": { - "references": { - "name": "40+", - "alias": "age2_1" - }, - "function": "case", - "args": [ - column_args, - { - "function": "and", - "args": [ - { - "function": ">", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 40 - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] - }, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": ">", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 40 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "age2", + "alias": "age2", + "description": "", + "notes": "", + "uniform_basis": False, + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "references": { + "name": "40+", + "alias": "age2_1", }, - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - }, - "0002": { - "references": { - "name": "40-", - "alias": "age2_2" + "function": "case", + "args": [ + column_args, + { + "function": "and", + "args": [ + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 40}, + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": ">", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 40 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + }, + ], }, - "function": "case", - "args": [ - column_args, - { - "function": "and", - "args": [ - { - "function": "<=", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 40 - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] + "0002": { + "references": { + "name": "40-", + "alias": "age2_2", }, - { - "function": "and", - "args": [ - { - "function": "not", - "args": [ - { - "function": "<=", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - }, - { - "value": 40 - } - ] - } - ] - }, - { - "function": "not", - "args": [ - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] - } - ] - }, - { - "function": "is_missing", - "args": [ - { - "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" - } - ] - } - ] + "function": "case", + "args": [ + column_args, + { + "function": "and", + "args": [ + { + "function": "<=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + {"value": 40}, + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "not", + "args": [ + { + "function": "<=", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + }, + { + "value": 40 + }, + ], + } + ], + }, + { + "function": "not", + "args": [ + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + } + ], + }, + ], + }, + { + "function": "is_missing", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/age/" + } + ], + }, + ], + }, } - } - }, - { - "value": ["0001", "0002"] - } - ] - } - ] - } + }, + {"value": ["0001", "0002"]}, + ], + } + ], + }, + }, } - }) + ) def test_derive_multiple_response(self): variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, }, - 'var_b': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False + "var_b": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, + }, + "var_c": { + "id": "003", + "alias": "var_c", + "name": "Variable C", + "type": "categorical", + "is_subvar": False, }, - 'var_c': { - 'id': '003', - 'alias': 'var_c', - 'name': 'Variable C', - 'type': 'categorical', - 'is_subvar': False - } } ds_mock = self._dataset_mock(variables=variables) ds = StreamingDataset(ds_mock) kwargs = { - 'name': 'my mr', - 'alias': 'mr', - 'categories': [ - {'id': 1, 'name': 'Yes', 'selected': True}, - {'id': 2, 'name': 'No'}, + "name": "my mr", + "alias": "mr", + "categories": [ + {"id": 1, "name": "Yes", "selected": True}, + {"id": 2, "name": "No"}, + ], + "subvariables": [ + { + "id": 1, + "name": "Facebook", + "cases": {1: "var_a == 1", 2: "var_a == 2"}, + }, + { + "id": 2, + "name": "Twitter", + "cases": {1: "var_b == 1", 2: "var_b == 2"}, + }, + { + "id": 3, + "name": "Google+", + "cases": {1: "var_c == 1", 2: "var_c == 2"}, + }, ], - 'subvariables': [ - {'id': 1, 'name': 'Facebook', 'cases': {1: 'var_a == 1', 2: 'var_a == 2'}}, - {'id': 2, 'name': 'Twitter', 'cases': {1: 'var_b == 1', 2: 'var_b == 2'}}, - {'id': 3, 'name': 'Google+', 'cases': {1: 'var_c == 1', 2: 'var_c == 2'}}, - ] } with pytest.raises(ValueError) as err: ds.derive_multiple_response(**kwargs) - assert 'Entity test_dataset_name has no (sub)variable' in str(err.value) + assert "Entity test_dataset_name has no (sub)variable" in str(err.value) categories_arg = { - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'missing': False, 'numeric_value': None, 'selected': True, 'id': 1, 'name': 'Yes'}, - {'missing': False, 'numeric_value': None, 'selected': False, 'id': 2, 'name': 'No'} - ] + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "missing": False, + "numeric_value": None, + "selected": True, + "id": 1, + "name": "Yes", + }, + { + "missing": False, + "numeric_value": None, + "selected": False, + "id": 2, + "name": "No", + }, + ], } - } + }, } - ds.resource.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'name': 'my mr', - 'alias': 'mr', - 'description': '', - 'notes': '', - 'uniform_basis': False, - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [{ - 'map': { - '0001': { - 'references': { - 'name': 'Facebook', - 'alias': 'mr_1', - }, - 'function': 'case', - 'args': [ - categories_arg, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_a']['id']}, - {'value': 1} - ] - }, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_a']['id']}, - {'value': 2} - ] - } - ] - }, - '0002': { - 'references': { - 'alias': 'mr_2', - 'name': 'Twitter', - }, - 'function': 'case', - 'args': [ - categories_arg, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_b']['id']}, - {'value': 1} - ] - }, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_b']['id']}, - {'value': 2} - ] + ds.resource.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "my mr", + "alias": "mr", + "description": "", + "notes": "", + "uniform_basis": False, + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "references": { + "name": "Facebook", + "alias": "mr_1", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_a"][ + "id" + ] + }, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_a"][ + "id" + ] + }, + {"value": 2}, + ], + }, + ], + }, + "0002": { + "references": { + "alias": "mr_2", + "name": "Twitter", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_b"][ + "id" + ] + }, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_b"][ + "id" + ] + }, + {"value": 2}, + ], + }, + ], + }, + "0003": { + "references": { + "alias": "mr_3", + "name": "Google+", + }, + "function": "case", + "args": [ + categories_arg, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_c"][ + "id" + ] + }, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/%s/" + % variables["var_c"][ + "id" + ] + }, + {"value": 2}, + ], + }, + ], + }, } - ] - }, - '0003': { - 'references': { - 'alias': 'mr_3', - 'name': 'Google+', }, - 'function': 'case', - 'args': [ - categories_arg, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_c']['id']}, - {'value': 1} - ] - }, - { - 'function': '==', - 'args': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/%s/' % variables['var_c']['id']}, - {'value': 2} - ] - } - ] - } + {"value": ["0001", "0002", "0003"]}, + ], } - }, { - 'value': ['0001','0002','0003'] - }] - }] - } + ], + }, + }, } - }) + ) class TestCopyVariable(TestCase): - def test_base_variable(self): ds_res = mock.MagicMock() var_res = mock.MagicMock() - var_res.entity.body = {'type': 'numeric'} + var_res.entity.body = {"type": "numeric"} def getitem(key): - if key == 'derived': + if key == "derived": return False + var_res.__getitem__.side_effect = getitem - var_res.entity.self = '/variable/url/' + var_res.entity.self = "/variable/url/" ds = StreamingDataset(ds_res) var = Variable(var_res, ds_res) - ds.copy_variable(var, name='copy', alias='copy') - ds_res.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'alias': 'copy', - 'name': 'copy', - 'derivation': { - 'function': 'copy_variable', - 'args': [{'variable': '/variable/url/'}] - } + ds.copy_variable(var, name="copy", alias="copy") + ds_res.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "alias": "copy", + "name": "copy", + "derivation": { + "function": "copy_variable", + "args": [{"variable": "/variable/url/"}], + }, + }, } - }) + ) def test_derived_variable(self): ds_res = mock.MagicMock() var_res = mock.MagicMock() def getitem(key): - if key == 'derived': + if key == "derived": return True + var_res.__getitem__.side_effect = getitem var_res.entity.body = { - 'type': 'multiple_response', - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [{ - 'map': { - '00001': { - 'function': 'combine_responses', - 'args': [ - {'variable': '../original_variable'} - ] + "type": "multiple_response", + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "00001": { + "function": "combine_responses", + "args": [{"variable": "../original_variable"}], + } + } } - } - }] - }] - }} - var_res.entity.self = '/variable/url/' + ], + } + ], + }, + } + var_res.entity.self = "/variable/url/" ds = StreamingDataset(ds_res) var = Variable(var_res, ds_res) - ds.copy_variable(var, name='copy', alias='copy') - ds_res.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'alias': 'copy', - 'name': 'copy', - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [{ - 'map': { - '00001': { - 'function': 'combine_responses', - 'args': [ - # Look how the variable url got abs()ed - {'variable': '/variable/original_variable'} - ] - } + ds.copy_variable(var, name="copy", alias="copy") + ds_res.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "alias": "copy", + "name": "copy", + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "00001": { + "function": "combine_responses", + "args": [ + # Look how the variable url got abs()ed + { + "variable": "/variable/original_variable" + } + ], + } + } + } + ], } - }] - }] - } + ], + }, + }, } - }) + ) def test_hide_unhide(): @@ -3728,8 +3683,7 @@ def test_hide_unhide(): class TestProjectsHierarchicalOrder(TestCase): - - ds_base = 'http://test.crunch.local/api/datasets/' + ds_base = "http://test.crunch.local/api/datasets/" @staticmethod def _get_update_payload(project): @@ -3741,45 +3695,43 @@ def _get_update_payload(project): def setUp(self): dataset_defs = [ AttributeDict( - id='12345', - name='test_dataset1', - notes='', - description='', + id="12345", + name="test_dataset1", + notes="", + description="", is_published=False, archived=False, end_date=None, start_date=None, ), AttributeDict( - id='67890', - name='test_dataset2', - notes='', - description='', + id="67890", + name="test_dataset2", + notes="", + description="", is_published=False, archived=False, end_date=None, start_date=None, - ) + ), ] hier_order = AttributeDict() hier_order.put = MagicMock() - hier_order.element = 'shoji:order' - hier_order.self = '%svariables/hier/' % self.ds_base + hier_order.element = "shoji:order" + hier_order.self = "%svariables/hier/" % self.ds_base hier_order.graph = [ - { - 'GroupA': [] - }, - '{}{}/'.format(self.ds_base, dataset_defs[0].id), - '{}{}/'.format(self.ds_base, dataset_defs[1].id) + {"GroupA": []}, + "{}{}/".format(self.ds_base, dataset_defs[0].id), + "{}{}/".format(self.ds_base, dataset_defs[1].id), ] _datasets = AttributeDict() for ds in dataset_defs: ds.entity = AttributeDict() - ds.self = '{}{}/'.format(self.ds_base, ds.id) - ds.entity.self = '{}{}/'.format(self.ds_base, ds.id) - ds.entity_url = '{}{}/'.format(self.ds_base, ds.id) + ds.self = "{}{}/".format(self.ds_base, ds.id) + ds.entity.self = "{}{}/".format(self.ds_base, ds.id) + ds.entity_url = "{}{}/".format(self.ds_base, ds.id) ds.body = ds ds.folders = MagicMock() ds.settings = MagicMock() @@ -3789,14 +3741,14 @@ def setUp(self): _datasets[ds.name] = ds # we only need one Dataset to move around - self.ds = StreamingDataset(_datasets['12345']) + self.ds = StreamingDataset(_datasets["12345"]) datasets = AttributeDict() datasets.by = MagicMock(return_value=_datasets) datasets.order = hier_order proj_resource = MagicMock() - proj_resource.self = 'http://test.crunch.local/api/projects/42/' + proj_resource.self = "http://test.crunch.local/api/projects/42/" proj_resource.datasets = datasets self.proj = Project(proj_resource) @@ -3805,300 +3757,254 @@ def test_datasets_project_order_loaded_correctly(self): assert isinstance(proj.order, scrunch.order.ProjectDatasetsOrder) assert isinstance(proj.order.group, scrunch.order.Group) - assert proj.order.group.name == '__root__' + assert proj.order.group.name == "__root__" def test_update_hierarchy_order(self): - """ this marks the initial order of all tests """ + """this marks the initial order of all tests""" proj = self.proj proj.order.update() assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + ], } def test_access_with_absolute_paths(self): proj = self.proj - root_group = proj.order['|'] + root_group = proj.order["|"] assert isinstance(root_group, scrunch.order.Group) assert root_group.is_root - a_group = proj.order['|GroupA'] + a_group = proj.order["|GroupA"] assert isinstance(a_group, scrunch.order.Group) - assert a_group.name == 'GroupA' + assert a_group.name == "GroupA" def test_access_with_relative_paths(self): proj = self.proj - a_group = proj.order['GroupA'] + a_group = proj.order["GroupA"] assert isinstance(a_group, scrunch.order.Group) - assert a_group.name == 'GroupA' + assert a_group.name == "GroupA" def test_access_with_the_in_operator(self): proj = self.proj - assert 'GroupA' in proj.order['|'] - assert 'Invalid Group' not in proj.order['|'] + assert "GroupA" in proj.order["|"] + assert "Invalid Group" not in proj.order["|"] def test_element_str_representation(self): proj = self.proj # Test first-level str representation. assert str(proj.order) == json.dumps( - [ - { - 'GroupA': [] - }, - 'test_dataset1', - 'test_dataset2' - ], - indent=scrunch.order.Group.INDENT_SIZE + [{"GroupA": []}, "test_dataset1", "test_dataset2"], + indent=scrunch.order.Group.INDENT_SIZE, ) def test_datasets_movements(self): proj = self.proj - proj.order['|'].append('12345') + proj.order["|"].append("12345") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/', - 'http://test.crunch.local/api/datasets/12345/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", + "http://test.crunch.local/api/datasets/12345/", + ], } - proj.order.place(self.ds, '|GroupA') + proj.order.place(self.ds, "|GroupA") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': ['http://test.crunch.local/api/datasets/12345/'] - }, - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": ["http://test.crunch.local/api/datasets/12345/"]}, + "http://test.crunch.local/api/datasets/67890/", + ], } - proj.order.place(self.ds, '|', before='67890') + proj.order.place(self.ds, "|", before="67890") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + ], } - proj.order.place(self.ds, '|', after='67890') + proj.order.place(self.ds, "|", after="67890") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/', - 'http://test.crunch.local/api/datasets/12345/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", + "http://test.crunch.local/api/datasets/12345/", + ], } - proj.order.place(self.ds, '|', position=0) + proj.order.place(self.ds, "|", position=0) assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/12345/', - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/12345/", + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", + ], } - proj.order.place(self.ds, '|', position=1) + proj.order.place(self.ds, "|", position=1) assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + ], } def test_group_renaming(self): proj = self.proj - proj.order['|GroupA'].rename('GroupB') + proj.order["|GroupA"].rename("GroupB") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupB': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupB": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + ], } with pytest.raises(ValueError): - proj.order['|'].rename('Root') + proj.order["|"].rename("Root") with pytest.raises(ValueError): - proj.order['|Account'].rename('id') + proj.order["|Account"].rename("id") with pytest.raises(ValueError): - proj.order['|Account'].rename('My new|Group') + proj.order["|Account"].rename("My new|Group") def test_create_group(self): proj = self.proj - proj.order['|'].create_group('GroupB') + proj.order["|"].create_group("GroupB") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/', - { - 'GroupB': [] - } - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + {"GroupB": []}, + ], } - proj.order['|GroupB'].create_group('GroupC', alias='12345') + proj.order["|GroupB"].create_group("GroupC", alias="12345") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/', + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", { - 'GroupB': [ - { - 'GroupC': ['http://test.crunch.local/api/datasets/12345/'] - } + "GroupB": [ + {"GroupC": ["http://test.crunch.local/api/datasets/12345/"]} ] - } - ] + }, + ], } @pytest.mark.xfail def test_create_group_before_group(self): proj = self.proj - proj.order['|'].create_group('GroupB', before='GroupA') + proj.order["|"].create_group("GroupB", before="GroupA") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupB': [] - }, - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + {"GroupB": []}, + {"GroupA": []}, + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + ], } def test_move_group(self): proj = self.proj - group = proj.order['|GroupA'] - group.move('|', after='12345') + group = proj.order["|GroupA"] + group.move("|", after="12345") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/12345/', - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/' - ] + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/12345/", + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", + ], } with pytest.raises(scrunch.exceptions.InvalidPathError): - proj.order['|Account|Location'].move('|Invalid Group|') + proj.order["|Account|Location"].move("|Invalid Group|") with pytest.raises(scrunch.exceptions.InvalidPathError): - proj.order['|Account|Location'].move('|Account|Location') + proj.order["|Account|Location"].move("|Account|Location") def test_cross_group_movements(self): proj = self.proj # prepare initial state - proj.order['|'].create_group('GroupB') - proj.order.place(self.ds, '|GroupA') + proj.order["|"].create_group("GroupB") + proj.order.place(self.ds, "|GroupA") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': ['http://test.crunch.local/api/datasets/12345/'] - }, - 'http://test.crunch.local/api/datasets/67890/', - { - 'GroupB': [] - } - ] + "element": "shoji:order", + "graph": [ + {"GroupA": ["http://test.crunch.local/api/datasets/12345/"]}, + "http://test.crunch.local/api/datasets/67890/", + {"GroupB": []}, + ], } # test - proj.order.place(self.ds, '|GroupB') + proj.order.place(self.ds, "|GroupB") assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - { - 'GroupA': [] - }, - 'http://test.crunch.local/api/datasets/67890/', - { - 'GroupB': ['http://test.crunch.local/api/datasets/12345/'] - } - ] + "element": "shoji:order", + "graph": [ + {"GroupA": []}, + "http://test.crunch.local/api/datasets/67890/", + {"GroupB": ["http://test.crunch.local/api/datasets/12345/"]}, + ], } def test_group_level_reordering(self): proj = self.proj - proj.order['|'].reorder(['12345', '67890', 'GroupA']) + proj.order["|"].reorder(["12345", "67890", "GroupA"]) assert self._get_update_payload(proj) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/12345/', - 'http://test.crunch.local/api/datasets/67890/', - { - 'GroupA': [] - } - ] + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/12345/", + "http://test.crunch.local/api/datasets/67890/", + {"GroupA": []}, + ], } def test_find_group(self): proj = self.proj - group = proj.order['|'].find_group('GroupA') + group = proj.order["|"].find_group("GroupA") assert isinstance(group, scrunch.order.Group) - assert group.name == 'GroupA' + assert group.name == "GroupA" def test_find_group_of_dataset(self): proj = self.proj - group = proj.order['|'].find('12345') + group = proj.order["|"].find("12345") assert isinstance(group, scrunch.order.Group) - assert group.name == '__root__' + assert group.name == "__root__" def test_order_iteration(self): proj = self.proj @@ -4107,36 +4013,36 @@ def test_order_iteration(self): items = [item for item in proj.order] assert isinstance(items[0], scrunch.order.Group) # GroupA - assert items[1].name == 'test_dataset1' - assert items[2].name == 'test_dataset2' + assert items[1].name == "test_dataset1" + assert items[2].name == "test_dataset2" def test_order_iteration_values(self): proj = self.proj items = proj.order.values() assert isinstance(items[0], scrunch.order.Group) # GroupA - assert items[1].name == 'test_dataset1' - assert items[2].name == 'test_dataset2' + assert items[1].name == "test_dataset1" + assert items[2].name == "test_dataset2" def test_order_iteration_itervalues(self): proj = self.proj items = [item for item in proj.order.itervalues()] assert isinstance(items[0], scrunch.order.Group) # GroupA - assert items[1].name == 'test_dataset1' - assert items[2].name == 'test_dataset2' + assert items[1].name == "test_dataset1" + assert items[2].name == "test_dataset2" def test_order_iteration_keys(self): proj = self.proj keys = proj.order.keys() - assert keys == ['GroupA', '12345', '67890'] + assert keys == ["GroupA", "12345", "67890"] def test_order_iteration_iterkeys(self): proj = self.proj keys = [k for k in proj.order.iterkeys()] - assert keys == ['GroupA', '12345', '67890'] + assert keys == ["GroupA", "12345", "67890"] def test_order_iteration_items(self): proj = self.proj @@ -4147,15 +4053,14 @@ def test_order_iteration_items(self): keys.append(k) items.append(v) - assert keys == ['GroupA', '12345', '67890'] + assert keys == ["GroupA", "12345", "67890"] assert isinstance(items[0], scrunch.order.Group) # GroupA - assert items[1].name == 'test_dataset1' - assert items[2].name == 'test_dataset2' + assert items[1].name == "test_dataset1" + assert items[2].name == "test_dataset2" class TestDatasetsHierarchicalOrder(TestCase): - - ds_url = 'http://test.crunch.local/api/datasets/123/' + ds_url = "http://test.crunch.local/api/datasets/123/" @staticmethod def _get_update_payload(ds): @@ -4167,149 +4072,132 @@ def _get_update_payload(ds): def setUp(self): variable_defs = [ AttributeDict( - id='000001', - alias='id', - name='ID', - type='numeric', - is_subvar=False + id="000001", alias="id", name="ID", type="numeric", is_subvar=False ), AttributeDict( - id='000002', - alias='hobbies', - name='Hobbies', - type='text', - is_subvar=False + id="000002", + alias="hobbies", + name="Hobbies", + type="text", + is_subvar=False, ), AttributeDict( - id='000003', - alias='registration_time', - name='Registration Time', - type='numeric', - is_subvar=False + id="000003", + alias="registration_time", + name="Registration Time", + type="numeric", + is_subvar=False, ), AttributeDict( - id='000004', - alias='last_login_time', - name='Last Login Time', - type='numeric', - is_subvar=False + id="000004", + alias="last_login_time", + name="Last Login Time", + type="numeric", + is_subvar=False, ), AttributeDict( - id='000005', - alias='first_name', - name='First Name', - type='text', - is_subvar=False + id="000005", + alias="first_name", + name="First Name", + type="text", + is_subvar=False, ), AttributeDict( - id='000006', - alias='last_name', - name='Last Name', - type='text', - is_subvar=False + id="000006", + alias="last_name", + name="Last Name", + type="text", + is_subvar=False, ), AttributeDict( - id='000007', - alias='gender', - name='Gender', - type='text', - is_subvar=False + id="000007", alias="gender", name="Gender", type="text", is_subvar=False ), AttributeDict( - id='000008', - alias='country', - name='Country', - type='text', - is_subvar=False + id="000008", + alias="country", + name="Country", + type="text", + is_subvar=False, ), AttributeDict( - id='000009', - alias='city', - name='City', - type='text', - is_subvar=False + id="000009", alias="city", name="City", type="text", is_subvar=False ), AttributeDict( - id='000010', - alias='zip_code', - name='Zip Code', - type='text', - is_subvar=False + id="000010", + alias="zip_code", + name="Zip Code", + type="text", + is_subvar=False, ), AttributeDict( - id='000011', - alias='address', - name='Address', - type='text', - is_subvar=False + id="000011", + alias="address", + name="Address", + type="text", + is_subvar=False, ), AttributeDict( - id='000012', - alias='music', - name='Music', - type='text', - is_subvar=False + id="000012", alias="music", name="Music", type="text", is_subvar=False ), AttributeDict( - id='000013', - alias='religion', - name='Religion', - type='text', - is_subvar=False - ) + id="000013", + alias="religion", + name="Religion", + type="text", + is_subvar=False, + ), ] table = { - 'element': 'crunch:table', - 'self': '%stable/' % self.ds_url, - 'metadata': collections.OrderedDict() + "element": "crunch:table", + "self": "%stable/" % self.ds_url, + "metadata": collections.OrderedDict(), } hier_order = AttributeDict() hier_order.put = MagicMock() - hier_order.element = 'shoji:order' - hier_order.self = '%svariables/hier/' % self.ds_url + hier_order.element = "shoji:order" + hier_order.self = "%svariables/hier/" % self.ds_url hier_order.graph = [ - '../000001/', # id - '../000002/', # hobbies + "../000001/", # id + "../000002/", # hobbies { - 'Account': [ - '../000003/', # registration_time - '../000004/', # last_login_time + "Account": [ + "../000003/", # registration_time + "../000004/", # last_login_time { - 'User Information': [ - '../000005/', # first_name - '../000006/', # last_name - '../000007/' # gender + "User Information": [ + "../000005/", # first_name + "../000006/", # last_name + "../000007/", # gender ] }, { - 'Location': [ - '../000008/', # country - '../000009/', # city - '../000010/', # zip_code - '../000011/' # address + "Location": [ + "../000008/", # country + "../000009/", # city + "../000010/", # zip_code + "../000011/", # address ] - } + }, ] }, - '../000012/', # music - '../000013/' # religion + "../000012/", # music + "../000013/", # religion ] index = { - '%svariables/%s/' % (self.ds_url, var['id']): var - for var in variable_defs + "%svariables/%s/" % (self.ds_url, var["id"]): var for var in variable_defs } _variables = AttributeDict() for var in variable_defs: var.entity = AttributeDict() - var.entity.self = '{}variables/{}/'.format(self.ds_url, var.id) - var.entity_url = '{}variables/{}/'.format(self.ds_url, var.id) - table['metadata'][var['id']] = var - _variables[var['id']] = var # for .variables.by('id') - _variables[var['alias']] = var # for .variables.by('alias') + var.entity.self = "{}variables/{}/".format(self.ds_url, var.id) + var.entity_url = "{}variables/{}/".format(self.ds_url, var.id) + table["metadata"][var["id"]] = var + _variables[var["id"]] = var # for .variables.by('id') + _variables[var["alias"]] = var # for .variables.by('alias') variables = AttributeDict() variables.by = MagicMock(return_value=_variables) @@ -4328,58 +4216,58 @@ def test_order_property_is_loaded_correctly(self): assert isinstance(ds.order, scrunch.order.DatasetVariablesOrder) assert isinstance(ds.order.group, scrunch.order.Group) # root group - assert ds.order.group.name == '__root__' + assert ds.order.group.name == "__root__" def test_access_with_absolute_paths(self): ds = self.ds # The "root" Group. - root_group = ds.order['|'] + root_group = ds.order["|"] assert isinstance(root_group, scrunch.order.Group) assert root_group.is_root # Sub-groups - group = ds.order['|Account'] + group = ds.order["|Account"] assert isinstance(group, scrunch.order.Group) - assert group.name == 'Account' + assert group.name == "Account" assert group.parent == ds.order.group - group = ds.order['|Account|User Information|'] + group = ds.order["|Account|User Information|"] assert isinstance(group, scrunch.order.Group) - assert group.name == 'User Information' - assert group.parent == ds.order['|Account'] + assert group.name == "User Information" + assert group.parent == ds.order["|Account"] with pytest.raises(scrunch.exceptions.InvalidPathError): - ds.order['|Account|Invalid Group|'] + ds.order["|Account|Invalid Group|"] with pytest.raises(scrunch.exceptions.InvalidPathError): - ds.order['|Invalid Group|'] + ds.order["|Invalid Group|"] def test_access_with_relative_paths(self): ds = self.ds - acct_group = ds.order['Account'] + acct_group = ds.order["Account"] assert isinstance(acct_group, scrunch.order.Group) - assert acct_group.name == 'Account' + assert acct_group.name == "Account" assert acct_group.parent == ds.order.group - usr_info_group = acct_group['User Information'] + usr_info_group = acct_group["User Information"] assert isinstance(usr_info_group, scrunch.order.Group) - assert usr_info_group.name == 'User Information' + assert usr_info_group.name == "User Information" assert usr_info_group.parent == acct_group with pytest.raises(scrunch.exceptions.InvalidPathError): - ds.order['Invalid Group'] + ds.order["Invalid Group"] with pytest.raises(scrunch.exceptions.InvalidPathError): - acct_group['Another Invalid Group'] + acct_group["Another Invalid Group"] def test_access_with_the_in_operator(self): ds = self.ds - assert 'Account' in ds.order['|'] - assert 'Invalid Group' not in ds.order['|'] - assert 'User Information' in ds.order['|Account'] + assert "Account" in ds.order["|"] + assert "Invalid Group" not in ds.order["|"] + assert "User Information" in ds.order["|Account"] def test_element_str_representation(self): ds = self.ds @@ -4387,1431 +4275,1510 @@ def test_element_str_representation(self): # Test first-level str representation. assert str(ds.order) == json.dumps( [ - 'ID', - 'Hobbies', + "ID", + "Hobbies", { - 'Account': [ - 'Registration Time', - 'Last Login Time', - { - 'User Information': [ - 'First Name', - 'Last Name', - 'Gender' - ] - }, - { - 'Location': [ - 'Country', - 'City', - 'Zip Code', - 'Address' - ] - } + "Account": [ + "Registration Time", + "Last Login Time", + {"User Information": ["First Name", "Last Name", "Gender"]}, + {"Location": ["Country", "City", "Zip Code", "Address"]}, ] }, - 'Music', - 'Religion' + "Music", + "Religion", ], - indent=scrunch.order.Group.INDENT_SIZE + indent=scrunch.order.Group.INDENT_SIZE, ) # Test sub-group str representation. - assert str(ds.order['|Account']) == json.dumps( + assert str(ds.order["|Account"]) == json.dumps( [ - 'Registration Time', - 'Last Login Time', - { - 'User Information': [ - 'First Name', - 'Last Name', - 'Gender' - ] - }, - { - 'Location': [ - 'Country', - 'City', - 'Zip Code', - 'Address' - ] - } + "Registration Time", + "Last Login Time", + {"User Information": ["First Name", "Last Name", "Gender"]}, + {"Location": ["Country", "City", "Zip Code", "Address"]}, ], - indent=scrunch.order.Group.INDENT_SIZE + indent=scrunch.order.Group.INDENT_SIZE, ) - assert str(ds.order['|Account|User Information']) == json.dumps( - [ - 'First Name', - 'Last Name', - 'Gender' - ], - indent=scrunch.order.Group.INDENT_SIZE + assert str(ds.order["|Account|User Information"]) == json.dumps( + ["First Name", "Last Name", "Gender"], + indent=scrunch.order.Group.INDENT_SIZE, ) def test_update_hierarchy_order(self): ds = self.ds ds.order.update() - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) def test_local_movements(self): ds = self.ds - ds.order['|'].append('id') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - ] - } + ds.order["|"].append("id") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + ], + } + ) - ds.order['|'].insert('music', position=1) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - ] - } + ds.order["|"].insert("music", position=1) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + ], + } + ) - ds.order['|'].insert(alias=['id', 'Account'], position=3) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - ] - } + ds.order["|"].insert(alias=["id", "Account"], position=3) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + ], + } + ) - ds.order['|'].insert(['Account', 'id']) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|"].insert(["Account", "id"]) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|'].append('hobbies') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - ] - } + ds.order["|"].append("hobbies") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + ], + } + ) - ds.order['|'].insert(['hobbies', 'religion'], before='music') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - ] - } + ds.order["|"].insert(["hobbies", "religion"], before="music") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + ], + } + ) - ds.order['|'].insert('Account', after='id') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - ] - } + ds.order["|"].insert("Account", after="id") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + ], + } + ) def test_local_movements_using_paths(self): ds = self.ds - ds.order['|Account'].append('registration_time') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } - - ds.order['|Account|User Information'].insert('gender', position=1) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|Account"].append("registration_time") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['Account|Location'].insert(['country', 'zip_code'], 2) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|Account|User Information"].insert("gender", position=1) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|Account|Location'].insert('address') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["Account|Location"].insert(["country", "zip_code"], 2) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|Account'].append('User Information') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|Account|Location"].insert("address") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|Account'].insert('Location', before='last_login_time') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|Account"].append("User Information") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|Account|Location'].insert('city', after='country') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + ds.order["|Account"].insert("Location", before="last_login_time") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) + + ds.order["|Account|Location"].insert("city", after="country") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) def test_cross_group_movements(self): ds = self.ds - ds.order['|'].append('gender') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - } + ds.order["|"].append("gender") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ], + } + ) - ds.order['|'].insert('address', position=1) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - } + ds.order["|"].insert("address", position=1) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ], + } + ) - ds.order['|'].insert(alias=['last_login_time', 'Location'], position=3) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - } + ds.order["|"].insert(alias=["last_login_time", "Location"], position=3) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ], + } + ) - ds.order['|'].insert(['User Information', 'country']) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - } + ds.order["|"].insert(["User Information", "country"]) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/" # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ], + } + ) - ds.order['|'].append('zip_code') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - ] - }, - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["|"].append("zip_code") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/" # city + ] + }, + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/" # registration_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) - ds.order['Account'].append('last_login_time') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - ] - }, - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["Account"].append("last_login_time") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/" # city + ] + }, + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) - ds.order['|User Information'].append('gender') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - ] - }, - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["|User Information"].append("gender") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/" # city + ] + }, + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) - ds.order['|Account'].append(['User Information', 'Location']) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["|Account"].append(["User Information", "Location"]) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000009/" # city + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) - ds.order['|Account|Location'].insert('address') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["|Account|Location"].insert("address") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) - ds.order['|Account|Location|'].insert('country', after='city') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city, - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - } + ds.order["|Account|Location|"].insert("country", after="city") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + "http://test.crunch.local/api/datasets/123/variables/000009/", # city, + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ], + } + ) def test_group_level_reordering(self): ds = self.ds - ds.order['|'].reorder(['id', 'hobbies', 'music', 'religion', 'Account']) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - ] - } + ds.order["|"].reorder(["id", "hobbies", "music", "religion", "Account"]) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + ], + } + ) - ds.order['|Account'].reorder([ - 'User Information', - 'Location', - 'registration_time', - 'last_login_time' - ]) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - ] - }, - ] - } + ds.order["|Account"].reorder( + ["User Information", "Location", "registration_time", "last_login_time"] + ) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + ], + } + ) def test_movement_errors(self): ds = self.ds with pytest.raises(ValueError): - ds.order['|Account|User Information'].append('invalid_alias') + ds.order["|Account|User Information"].append("invalid_alias") with pytest.raises(ValueError): - ds.order['|Account'].insert(alias=['id', 'invalid_alias']) + ds.order["|Account"].insert(alias=["id", "invalid_alias"]) with pytest.raises(IndexError): - ds.order['|Account'].insert('gender', position=999) + ds.order["|Account"].insert("gender", position=999) with pytest.raises(TypeError): - ds.order['|Account'].insert('id', before=1) + ds.order["|Account"].insert("id", before=1) with pytest.raises(scrunch.exceptions.InvalidReferenceError): - ds.order['|Account'].insert('id', before='unknown') + ds.order["|Account"].insert("id", before="unknown") def test_group_creation(self): ds = self.ds - ds.order['|Account'].create_group( - 'Login Details', alias=['registration_time', 'last_login_time'] + ds.order["|Account"].create_group( + "Login Details", alias=["registration_time", "last_login_time"] + ) + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + { + "Login Details": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } ) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - ] - }, - { - 'Login Details': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - ] - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } with pytest.raises(ValueError): - ds.order['|'].create_group('Account') + ds.order["|"].create_group("Account") with pytest.raises(ValueError): - ds.order['|'].create_group('My new|Group') + ds.order["|"].create_group("My new|Group") - ds.order['|Account'].create_group('New empty') + ds.order["|Account"].create_group("New empty") - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - ] - }, - { - 'Login Details': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - ] - }, - { - 'New empty': [] # empty group - }, - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - ] - } + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + { + "Login Details": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + { + "New empty": [] # empty group + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - ds.order['|Account'].create_group('Gewürze / Inhaltsstoffe', alias=[ - 'music', 'religion']) + ds.order["|Account"].create_group( + "Gewürze / Inhaltsstoffe", alias=["music", "religion"] + ) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - }, - { - 'Login Details': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/' # last_login_time - ] - }, - { - 'New empty': [] # empty group - }, - { - 'Gewürze / Inhaltsstoffe': [ - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } - ] - } - ] - } + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + { + "Login Details": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + { + "New empty": [] # empty group + }, + { + "Gewürze / Inhaltsstoffe": [ + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ] + }, + ] + }, + ], + } + ) - ds.order['|Account|User Information'].create_group('PII', alias=[ - 'first_name', 'last_name'], after='gender') + ds.order["|Account|User Information"].create_group( + "PII", alias=["first_name", "last_name"], after="gender" + ) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - { - 'PII': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - ] - }, - { - 'Login Details': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/' # last_login_time - ] - }, - { - 'New empty': [] # empty group - }, - { - 'Gewürze / Inhaltsstoffe': [ - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } - ] - } - ] - } + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + { + "PII": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + { + "Login Details": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + { + "New empty": [] # empty group + }, + { + "Gewürze / Inhaltsstoffe": [ + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ] + }, + ] + }, + ], + } + ) - ds.order['|Account|Location'].create_group('PII', alias=['address'], - before='zip_code') + ds.order["|Account|Location"].create_group( + "PII", alias=["address"], before="zip_code" + ) - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - { - 'PII': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - ] - }, - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - { - 'PII': [ - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - ] - }, - { - 'Login Details': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/' # last_login_time - ] - }, - { - 'New empty': [] # empty group - }, - { - 'Gewürze / Inhaltsstoffe': [ - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } - ] - } - ] - } + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + { + "PII": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + ] + }, + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + { + "PII": [ + "http://test.crunch.local/api/datasets/123/variables/000011/" # address + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + ] + }, + { + "Login Details": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + ] + }, + { + "New empty": [] # empty group + }, + { + "Gewürze / Inhaltsstoffe": [ + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ] + }, + ] + }, + ], + } + ) @pytest.mark.xfail def test_create_group_before_group(self): ds = self.ds - ds.order['|Account'].create_group('Login Details', before='User Information') + ds.order["|Account"].create_group("Login Details", before="User Information") assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', - 'http://test.crunch.local/api/datasets/123/variables/000002/', + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", + "http://test.crunch.local/api/datasets/123/variables/000002/", { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', - 'http://test.crunch.local/api/datasets/123/variables/000004/', - { - 'Login Details': [] - }, + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", + "http://test.crunch.local/api/datasets/123/variables/000004/", + {"Login Details": []}, { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', - 'http://test.crunch.local/api/datasets/123/variables/000006/', - 'http://test.crunch.local/api/datasets/123/variables/000007/' + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", + "http://test.crunch.local/api/datasets/123/variables/000006/", + "http://test.crunch.local/api/datasets/123/variables/000007/", ] }, { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', - 'http://test.crunch.local/api/datasets/123/variables/000009/', - 'http://test.crunch.local/api/datasets/123/variables/000010/', - 'http://test.crunch.local/api/datasets/123/variables/000011/' + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", + "http://test.crunch.local/api/datasets/123/variables/000009/", + "http://test.crunch.local/api/datasets/123/variables/000010/", + "http://test.crunch.local/api/datasets/123/variables/000011/", ] - } + }, ] }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', - 'http://test.crunch.local/api/datasets/123/variables/000013/' - ] + "http://test.crunch.local/api/datasets/123/variables/000012/", + "http://test.crunch.local/api/datasets/123/variables/000013/", + ], } def test_group_renaming(self): ds = self.ds - ds.order['|Account|User Information'].rename('User Info') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Info': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/' # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } + ds.order["|Account|User Information"].rename("User Info") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Info": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) with pytest.raises(ValueError): - ds.order['|'].rename('Root') + ds.order["|"].rename("Root") with pytest.raises(ValueError): - ds.order['|Account'].rename('id') + ds.order["|Account"].rename("id") with pytest.raises(ValueError): - ds.order['|Account'].rename('My new|Group') + ds.order["|Account"].rename("My new|Group") def test_move_group(self): ds = self.ds - group = ds.order['|Account|User Information'] - assert group.name == 'User Information' - group.move('|') - assert self._get_update_payload(ds) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/', # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/', # religion - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - ] - } + group = ds.order["|Account|User Information"] + assert group.name == "User Information" + group.move("|") + assert ( + self._get_update_payload(ds) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + ], + } + ) with pytest.raises(scrunch.exceptions.InvalidPathError): - ds.order['|Account|Location'].move('|Invalid Group|') + ds.order["|Account|Location"].move("|Invalid Group|") with pytest.raises(scrunch.exceptions.InvalidPathError): - ds.order['|Account|Location'].move('|Account|Location') + ds.order["|Account|Location"].move("|Account|Location") def test_move_variable(self): ds = self.ds - var = ds['id'] - assert var.name == 'ID' - var.move('|Account|User Information') - assert self._get_update_payload(var.dataset) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } + var = ds["id"] + assert var.name == "ID" + var.move("|Account|User Information") + assert ( + self._get_update_payload(var.dataset) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) with pytest.raises(scrunch.exceptions.InvalidPathError): - var.move('|Account|Invalid Group') - - var.move('|Account', before='registration_time') - assert self._get_update_payload(var.dataset) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } + var.move("|Account|Invalid Group") + + var.move("|Account", before="registration_time") + assert ( + self._get_update_payload(var.dataset) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) - var.move('|', after='music') - assert self._get_update_payload(var.dataset) == { - 'element': 'shoji:order', - 'graph': [ - 'http://test.crunch.local/api/datasets/123/variables/000002/', # hobbies - { - 'Account': [ - 'http://test.crunch.local/api/datasets/123/variables/000003/', # registration_time - 'http://test.crunch.local/api/datasets/123/variables/000004/', # last_login_time - { - 'User Information': [ - 'http://test.crunch.local/api/datasets/123/variables/000005/', # first_name - 'http://test.crunch.local/api/datasets/123/variables/000006/', # last_name - 'http://test.crunch.local/api/datasets/123/variables/000007/', # gender - ] - }, - { - 'Location': [ - 'http://test.crunch.local/api/datasets/123/variables/000008/', # country - 'http://test.crunch.local/api/datasets/123/variables/000009/', # city - 'http://test.crunch.local/api/datasets/123/variables/000010/', # zip_code - 'http://test.crunch.local/api/datasets/123/variables/000011/' # address - ] - } - ] - }, - 'http://test.crunch.local/api/datasets/123/variables/000012/', # music - 'http://test.crunch.local/api/datasets/123/variables/000001/', # id - 'http://test.crunch.local/api/datasets/123/variables/000013/' # religion - ] - } + var.move("|", after="music") + assert ( + self._get_update_payload(var.dataset) + == { + "element": "shoji:order", + "graph": [ + "http://test.crunch.local/api/datasets/123/variables/000002/", # hobbies + { + "Account": [ + "http://test.crunch.local/api/datasets/123/variables/000003/", # registration_time + "http://test.crunch.local/api/datasets/123/variables/000004/", # last_login_time + { + "User Information": [ + "http://test.crunch.local/api/datasets/123/variables/000005/", # first_name + "http://test.crunch.local/api/datasets/123/variables/000006/", # last_name + "http://test.crunch.local/api/datasets/123/variables/000007/", # gender + ] + }, + { + "Location": [ + "http://test.crunch.local/api/datasets/123/variables/000008/", # country + "http://test.crunch.local/api/datasets/123/variables/000009/", # city + "http://test.crunch.local/api/datasets/123/variables/000010/", # zip_code + "http://test.crunch.local/api/datasets/123/variables/000011/", # address + ] + }, + ] + }, + "http://test.crunch.local/api/datasets/123/variables/000012/", # music + "http://test.crunch.local/api/datasets/123/variables/000001/", # id + "http://test.crunch.local/api/datasets/123/variables/000013/", # religion + ], + } + ) def test_order_iteration(self): ds = self.ds @@ -5819,48 +5786,48 @@ def test_order_iteration(self): # consume all items in the dataset order items = [item for item in ds.order] - assert items[0].name == 'ID' - assert items[1].name == 'Hobbies' + assert items[0].name == "ID" + assert items[1].name == "Hobbies" assert isinstance(items[2], scrunch.order.Group) # Account - assert items[2].name == 'Account' - assert items[3].name == 'Music' - assert items[4].name == 'Religion' + assert items[2].name == "Account" + assert items[3].name == "Music" + assert items[4].name == "Religion" def test_order_iteration_values(self): ds = self.ds items = ds.order.values() - assert items[0].name == 'ID' - assert items[1].name == 'Hobbies' + assert items[0].name == "ID" + assert items[1].name == "Hobbies" assert isinstance(items[2], scrunch.order.Group) # Account - assert items[2].name == 'Account' - assert items[3].name == 'Music' - assert items[4].name == 'Religion' + assert items[2].name == "Account" + assert items[3].name == "Music" + assert items[4].name == "Religion" def test_order_iteration_itervalues(self): ds = self.ds items = [item for item in ds.order.itervalues()] - assert items[0].name == 'ID' - assert items[1].name == 'Hobbies' + assert items[0].name == "ID" + assert items[1].name == "Hobbies" assert isinstance(items[2], scrunch.order.Group) # Account - assert items[2].name == 'Account' - assert items[3].name == 'Music' - assert items[4].name == 'Religion' + assert items[2].name == "Account" + assert items[3].name == "Music" + assert items[4].name == "Religion" def test_order_iteration_keys(self): ds = self.ds keys = ds.order.keys() - assert keys == ['id', 'hobbies', 'Account', 'music', 'religion'] + assert keys == ["id", "hobbies", "Account", "music", "religion"] def test_order_iteration_iterkeys(self): ds = self.ds keys = [k for k in ds.order.iterkeys()] - assert keys == ['id', 'hobbies', 'Account', 'music', 'religion'] + assert keys == ["id", "hobbies", "Account", "music", "religion"] def test_order_iteration_items(self): ds = self.ds @@ -5871,39 +5838,38 @@ def test_order_iteration_items(self): keys.append(k) items.append(v) - assert keys == ['id', 'hobbies', 'Account', 'music', 'religion'] - assert items[0].name == 'ID' - assert items[1].name == 'Hobbies' + assert keys == ["id", "hobbies", "Account", "music", "religion"] + assert items[0].name == "ID" + assert items[1].name == "Hobbies" assert isinstance(items[2], scrunch.order.Group) # Account - assert items[2].name == 'Account' - assert items[3].name == 'Music' - assert items[4].name == 'Religion' + assert items[2].name == "Account" + assert items[3].name == "Music" + assert items[4].name == "Religion" class TestDatasetSettings(TestCase): - - ds_url = 'http://test.crunch.local/api/datasets/123/' + ds_url = "http://test.crunch.local/api/datasets/123/" def setUp(self): settings = { - 'element': 'shoji:entity', - 'self': '%ssettings/' % self.ds_url, - 'body': { - 'viewers_can_export': False, - 'min_base_size': 0, - 'weight': None, - 'viewers_can_change_weight': False - } + "element": "shoji:entity", + "self": "%ssettings/" % self.ds_url, + "body": { + "viewers_can_export": False, + "min_base_size": 0, + "weight": None, + "viewers_can_change_weight": False, + }, } def _session_get(*args): - if args[0] == '{}settings/'.format(self.ds_url): + if args[0] == "{}settings/".format(self.ds_url): return _CrunchPayload(settings) return _CrunchPayload() ds_resource = MagicMock() ds_resource.self = self.ds_url - ds_resource.fragments.settings = '%ssettings/' % self.ds_url + ds_resource.fragments.settings = "%ssettings/" % self.ds_url ds_resource.session.get.side_effect = _session_get self.ds = StreamingDataset(ds_resource) @@ -5912,10 +5878,10 @@ def test_settings_are_displayed_as_dict_obj(self): assert isinstance(ds.settings, dict) assert ds.settings == { - 'viewers_can_export': False, - 'min_base_size': 0, - 'weight': None, - 'viewers_can_change_weight': False + "viewers_can_export": False, + "min_base_size": 0, + "weight": None, + "viewers_can_change_weight": False, } def test_settings_obj_is_protected_from_modifications(self): @@ -5929,16 +5895,16 @@ def test_settings_dict_obj_is_read_only(self): ds = self.ds with pytest.raises(RuntimeError): - ds.settings['viewers_can_export'] = 'invalid' + ds.settings["viewers_can_export"] = "invalid" with pytest.raises(RuntimeError): - del ds.settings['viewers_can_export'] + del ds.settings["viewers_can_export"] with pytest.raises(RuntimeError): ds.settings.pop() with pytest.raises(RuntimeError): - ds.settings.update({'viewers_can_export': 'invalid'}) + ds.settings.update({"viewers_can_export": "invalid"}) with pytest.raises(RuntimeError): ds.settings.clear() @@ -5950,28 +5916,28 @@ def test_change_settings(self): ds.change_settings(viewers_can_export=True) _url = ds.resource.session.patch.call_args_list[-1][0][0] _payload = json.loads(ds.resource.session.patch.call_args_list[-1][0][1]) - _headers = ds.resource.session.patch.call_args_list[-1][1].get('headers', {}) - assert _url == self.ds_url + 'settings/' - assert _payload == {'viewers_can_export': True} - assert _headers == {'Content-Type': 'application/json'} + _headers = ds.resource.session.patch.call_args_list[-1][1].get("headers", {}) + assert _url == self.ds_url + "settings/" + assert _payload == {"viewers_can_export": True} + assert _headers == {"Content-Type": "application/json"} ds.change_settings( viewers_can_export=True, viewers_can_change_weight=True, viewers_can_share=False, - dashboard_deck='https://test.crunch.io/datasets/123/decks/123' + dashboard_deck="https://test.crunch.io/datasets/123/decks/123", ) _url = ds.resource.session.patch.call_args_list[-1][0][0] _payload = json.loads(ds.resource.session.patch.call_args_list[-1][0][1]) - _headers = ds.resource.session.patch.call_args_list[-1][1].get('headers', {}) - assert _url == self.ds_url + 'settings/' + _headers = ds.resource.session.patch.call_args_list[-1][1].get("headers", {}) + assert _url == self.ds_url + "settings/" assert _payload == { - 'viewers_can_export': True, - 'viewers_can_change_weight': True, - 'viewers_can_share': False, - 'dashboard_deck': 'https://test.crunch.io/datasets/123/decks/123' + "viewers_can_export": True, + "viewers_can_change_weight": True, + "viewers_can_share": False, + "dashboard_deck": "https://test.crunch.io/datasets/123/decks/123", } - assert _headers == {'Content-Type': 'application/json'} + assert _headers == {"Content-Type": "application/json"} # Test that trying to edit invalid or read-only settings is forbidden. with pytest.raises(ValueError): @@ -5981,11 +5947,11 @@ def test_change_settings(self): class TestDatasetJoins(TestCase): - left_ds_url = 'https://test.crunch.io/api/datasets/123/' - right_ds_url = 'https://test.crunch.io/api/datasets/456/' + left_ds_url = "https://test.crunch.io/api/datasets/123/" + right_ds_url = "https://test.crunch.io/api/datasets/456/" def _variable_mock(self, ds_url, var): - var_url = '%svariables/%s/' % (ds_url, var['id']) + var_url = "%svariables/%s/" % (ds_url, var["id"]) _get_func = _build_get_func(var) _var_mock = MagicMock() _var_mock.__getitem__.side_effect = _get_func @@ -5998,17 +5964,17 @@ def _variable_mock(self, ds_url, var): def setUp(self): var = { - 'id': '000001', - 'alias': 'id', - 'name': 'ID', - 'type': 'numeric', - 'is_subvar': False + "id": "000001", + "alias": "id", + "name": "ID", + "type": "numeric", + "is_subvar": False, } # setup for left dataset _left_var_mock = self._variable_mock(self.left_ds_url, var) left_variable = collections.OrderedDict() - left_variable[var['alias']] = _left_var_mock + left_variable[var["alias"]] = _left_var_mock left_ds_res = MagicMock() left_ds_res.self = self.left_ds_url left_ds_res.variables.by.return_value = left_variable @@ -6017,7 +5983,7 @@ def setUp(self): # setup for right dataset _right_var_mock = self._variable_mock(self.right_ds_url, var) right_variable = collections.OrderedDict() - right_variable[var['alias']] = _right_var_mock + right_variable[var["alias"]] = _right_var_mock right_ds_res = MagicMock() right_ds_res.self = self.right_ds_url right_ds_res.variables.by.return_value = right_variable @@ -6026,52 +5992,46 @@ def setUp(self): def test_dataset_joins(self): left_ds = self.left_ds right_ds = self.right_ds - left_var = left_ds['id'] - right_var = right_ds['id'] + left_var = left_ds["id"] + right_var = right_ds["id"] with mock.patch("scrunch.mutable_dataset.wait_progress") as mock_wait: - left_ds.join('id', right_ds, 'id', timeout=666) + left_ds.join("id", right_ds, "id", timeout=666) assert mock_wait.call_args[1]["progress_tracker"].timeout == 666 call_payload = left_ds.resource.variables.post.call_args[0][0] expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'function': 'adapt', - 'args': [ - {'dataset': right_ds.url}, - {'variable': '%svariables/%s/' % (right_ds.url, right_var.id)}, - {'variable': '%svariables/%s/' % (left_ds.url, left_var.id)} - ] - } + "element": "shoji:entity", + "body": { + "function": "adapt", + "args": [ + {"dataset": right_ds.url}, + {"variable": "%svariables/%s/" % (right_ds.url, right_var.id)}, + {"variable": "%svariables/%s/" % (left_ds.url, left_var.id)}, + ], + }, } assert call_payload == expected_payload - left_ds.resource.variables.post.assert_called_once_with( - expected_payload) + left_ds.resource.variables.post.assert_called_once_with(expected_payload) def test_dataset_joins_column_urls(self): left_ds = self.left_ds right_ds = self.right_ds - right_var = right_ds['id'] + right_var = right_ds["id"] - with mock.patch("scrunch.mutable_dataset.wait_progress") as mock_wait: - left_ds.join('id', right_ds, 'id', ['id']) + with mock.patch("scrunch.mutable_dataset.wait_progress"): + left_ds.join("id", right_ds, "id", ["id"]) call_payload = left_ds.resource.variables.post.call_args[0][0] - expected_payload = { - 'map': { - right_var.url: {'variable': right_var.url} - } - } - assert call_payload['body']['args'][0] == expected_payload + expected_payload = {"map": {right_var.url: {"variable": right_var.url}}} + assert call_payload["body"]["args"][0] == expected_payload -@mock.patch('scrunch.datasets.download_file') -@mock.patch('scrunch.datasets.export_dataset') +@mock.patch("scrunch.datasets.download_file") +@mock.patch("scrunch.datasets.export_dataset") class TestDatasetExport(TestCase): - - ds_url = 'http://test.crunch.local/api/datasets/123/' - file_download_url = 'http://test.crunch.local/download-file' + ds_url = "http://test.crunch.local/api/datasets/123/" + file_download_url = "http://test.crunch.local/download-file" def setUp(self): ds_resource = mock.MagicMock() @@ -6082,133 +6042,116 @@ def test_basic_csv_export(self, export_ds_mock, dl_file_mock): ds = self.ds export_ds_mock.return_value = self.file_download_url - ds.export('export.csv') + ds.export("export.csv") - export_format = export_ds_mock.call_args_list[0][1].get('format') - export_options = export_ds_mock.call_args_list[0][1].get('options', {}) + export_format = export_ds_mock.call_args_list[0][1].get("format") + export_options = export_ds_mock.call_args_list[0][1].get("options", {}) - assert export_format == 'csv' - assert export_options == { - 'options': { - 'use_category_ids': True - }} + assert export_format == "csv" + assert export_options == {"options": {"use_category_ids": True}} - dl_file_mock.assert_called_with(self.file_download_url, 'export.csv') + dl_file_mock.assert_called_with(self.file_download_url, "export.csv") def test_basic_json_export(self, export_ds_mock, dl_file_mock): ds = self.ds - ds.resource.table.__getitem__.return_value = 'json serializable' - ds.export('export.csv', metadata_path='metadata.json') + ds.resource.table.__getitem__.return_value = "json serializable" + ds.export("export.csv", metadata_path="metadata.json") - ds.resource.table.__getitem__.assert_called_with('metadata') + ds.resource.table.__getitem__.assert_called_with("metadata") def test_csv_export_options(self, export_ds_mock, dl_file_mock): ds = self.ds export_ds_mock.return_value = self.file_download_url - ds.export('export.csv', options={'use_category_ids': False}) + ds.export("export.csv", options={"use_category_ids": False}) - export_format = export_ds_mock.call_args_list[0][1].get('format') - export_options = export_ds_mock.call_args_list[0][1].get('options', {}) + export_format = export_ds_mock.call_args_list[0][1].get("format") + export_options = export_ds_mock.call_args_list[0][1].get("options", {}) - assert export_format == 'csv' - assert export_options == { - 'options': { - 'use_category_ids': False - }} + assert export_format == "csv" + assert export_options == {"options": {"use_category_ids": False}} - dl_file_mock.assert_called_with(self.file_download_url, 'export.csv') + dl_file_mock.assert_called_with(self.file_download_url, "export.csv") def test_invalid_csv_export_options(self, export_ds_mock, _): ds = self.ds export_ds_mock.return_value = self.file_download_url with pytest.raises(ValueError): - ds.export('export.csv', options={'invalid_option': False}) + ds.export("export.csv", options={"invalid_option": False}) def test_basic_spss_export(self, export_ds_mock, dl_file_mock): ds = self.ds export_ds_mock.return_value = self.file_download_url - ds.export('export.sav', format='spss') + ds.export("export.sav", format="spss") - export_format = export_ds_mock.call_args_list[0][1].get('format') - export_options = export_ds_mock.call_args_list[0][1].get('options', {}) + export_format = export_ds_mock.call_args_list[0][1].get("format") + export_options = export_ds_mock.call_args_list[0][1].get("options", {}) - assert export_format == 'spss' + assert export_format == "spss" assert export_options == { - 'options': { - 'prefix_subvariables': False, - 'var_label_field': 'description' - }} + "options": {"prefix_subvariables": False, "var_label_field": "description"} + } - dl_file_mock.assert_called_with(self.file_download_url, 'export.sav') + dl_file_mock.assert_called_with(self.file_download_url, "export.sav") def test_spss_export_options(self, export_ds_mock, dl_file_mock): ds = self.ds export_ds_mock.return_value = self.file_download_url ds.export( - 'export.sav', - format='spss', - options={ - 'prefix_subvariables': True, - 'var_label_field': 'name' - } + "export.sav", + format="spss", + options={"prefix_subvariables": True, "var_label_field": "name"}, ) - export_format = export_ds_mock.call_args_list[0][1].get('format') - export_options = export_ds_mock.call_args_list[0][1].get('options', {}) + export_format = export_ds_mock.call_args_list[0][1].get("format") + export_options = export_ds_mock.call_args_list[0][1].get("options", {}) - assert export_format == 'spss' + assert export_format == "spss" assert export_options == { - 'options': { - 'var_label_field': 'name', - 'prefix_subvariables': True - }} + "options": {"var_label_field": "name", "prefix_subvariables": True} + } - dl_file_mock.assert_called_with(self.file_download_url, 'export.sav') + dl_file_mock.assert_called_with(self.file_download_url, "export.sav") def test_invalid_spss_export_options(self, export_ds_mock, _): ds = self.ds export_ds_mock.return_value = self.file_download_url with pytest.raises(ValueError): - ds.export( - 'export.csv', format='spss', options={'invalid_option': False} - ) + ds.export("export.csv", format="spss", options={"invalid_option": False}) with pytest.raises(ValueError): ds.export( - 'export.csv', format='spss', - options={'var_label_field': 'invalid'} + "export.csv", format="spss", options={"var_label_field": "invalid"} ) class TestVariableIterator(TestDatasetBase): - variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, + }, + "var_b": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, }, - 'var_b': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False + "var_c": { + "id": "003", + "alias": "var_c", + "name": "Variable C", + "type": "categorical", + "is_subvar": False, }, - 'var_c': { - 'id': '003', - 'alias': 'var_c', - 'name': 'Variable C', - 'type': 'categorical', - 'is_subvar': False - } } def test_ds_keys(self): @@ -6222,37 +6165,20 @@ def test_ds_values(self): assert isinstance(ds.values(), list) def test_subvar_order(self): - subvars_order = [ - '0001', - '0002', - '0003', - '0004' - ] + subvars_order = ["0001", "0002", "0003", "0004"] subvars = { # Intentionally unordered - '0003': { - 'id': '0003', - 'alias': 'subvar_3' - }, - '0001': { - 'id': '0001', - 'alias': 'subvar_1' - }, - '0004': { - 'id': '0004', - 'alias': 'subvar_4' - }, - '0002': { - 'id': '0002', - 'alias': 'subvar_2' - }, + "0003": {"id": "0003", "alias": "subvar_3"}, + "0001": {"id": "0001", "alias": "subvar_1"}, + "0004": {"id": "0004", "alias": "subvar_4"}, + "0002": {"id": "0002", "alias": "subvar_2"}, } body = dict(subvariables=subvars_order) def getitem(key): - if key == 'body': + if key == "body": return body - if key == 'subvariables': + if key == "subvariables": return subvars_order ds = mock.MagicMock() @@ -6263,12 +6189,11 @@ def getitem(key): v = Variable(var_tuple=var_tuple, dataset=ds) - all_ids = [sv[1]['id'] for sv in v] - assert all_ids == ['0001', '0002', '0003', '0004'] + all_ids = [sv[1]["id"] for sv in v] + assert all_ids == ["0001", "0002", "0003", "0004"] class TestFilter(TestDatasetBase, TestCase): - _filter = { "element": "shoji:entity", "self": "https://alpha.crunch.io/api/datasets/1/filters/1/", @@ -6279,39 +6204,32 @@ class TestFilter(TestDatasetBase, TestCase): "is_public": True, "expression": { "function": "in", - "args": [ - { - "variable": "https://alpha.crunch.io/api/datasets/1/variables/1/" - }, - { - "value": 1 - } - ], - } - } + "args": [ + {"variable": "https://alpha.crunch.io/api/datasets/1/variables/1/"}, + {"value": 1}, + ], + }, + }, } - @mock.patch('scrunch.streaming_dataset.StreamingDataset.filters') + @mock.patch("scrunch.streaming_dataset.StreamingDataset.filters") def test_add_filter(self, filters): ds_res = self._dataset_mock() ds = StreamingDataset(ds_res) - var = ds['var1_alias'] + var = ds["var1_alias"] - ds.add_filter(name='filter', expr='var1_alias != 0') + ds.add_filter(name="filter", expr="var1_alias != 0") expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'name': 'filter', - 'is_public': False, - 'expression': { - 'function': '!=', - 'args': [ - {'variable': var.url}, - {'value': 0} - ] - } - } + "element": "shoji:entity", + "body": { + "name": "filter", + "is_public": False, + "expression": { + "function": "!=", + "args": [{"variable": var.url}, {"value": 0}], + }, + }, } ds.resource.filters.create.assert_called_with(expected_payload) @@ -6319,8 +6237,8 @@ def test_edit_filter(self): filter = EditableMock(entity=self._filter) mockfilter = Filter(filter) with pytest.raises(AttributeError): - mockfilter.edit(name='edited') - mockfilter.resource.edit.assert_called_with({'name': 'edited'}) + mockfilter.edit(name="edited") + mockfilter.resource.edit.assert_called_with({"name": "edited"}) def test_filter_class(self): filter = MagicMock(entity=self._filter) @@ -6329,7 +6247,6 @@ def test_filter_class(self): class TestDeck(TestDatasetBase, TestCase): - _deck = { "element": "shoji:entity", "self": "https://alpha.crunch.io/api/datasets/abc/decks/1/", @@ -6338,24 +6255,24 @@ class TestDeck(TestDatasetBase, TestCase): "id": "326d5db5a40f4189a8a4cddfe06bb19b", "name": "The deck", "is_public": True, - "description": "description" - } + "description": "description", + }, } - @mock.patch('scrunch.streaming_dataset.StreamingDataset.decks') + @mock.patch("scrunch.streaming_dataset.StreamingDataset.decks") def test_add_deck(self, decks): ds_res = self._dataset_mock() ds = StreamingDataset(ds_res) - ds.add_deck(name='mydeck', description='description') + ds.add_deck(name="mydeck", description="description") expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'name': 'mydeck', - 'is_public': False, - 'description': 'description' - } + "element": "shoji:entity", + "body": { + "name": "mydeck", + "is_public": False, + "description": "description", + }, } ds.resource.decks.create.assert_called_with(expected_payload) @@ -6371,8 +6288,8 @@ def test_edit_deck(self): deck = EditableMock(entity=self._deck) mockdeck = Deck(deck) with pytest.raises(AttributeError): - mockdeck.edit(name='edited') - mockdeck.resource.edit.assert_called_with({'name': 'edited'}) + mockdeck.edit(name="edited") + mockdeck.resource.edit.assert_called_with({"name": "edited"}) def test_deck_class(self): deck = MagicMock(entity=self._deck) @@ -6381,45 +6298,49 @@ def test_deck_class(self): class TestMultitable(TestDatasetBase, TestCase): - _multitable = { "element": "shoji:entity", "self": "https://test.crunch.io/api/datasets/123456/multitables/0001/", "session": MagicMock(spec=ElementSession), "views": { - "tabbook": "https://test.crunch.io/api/datasets/123456/multitables/0001/tabbook/"}, + "tabbook": "https://test.crunch.io/api/datasets/123456/multitables/0001/tabbook/" + }, "body": { "id": "326d5db5a40f4189a8a4cddfe06bb19b", "name": "Test multitable", "is_public": True, - 'template': [ + "template": [ { - 'query': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/0001/'} + "query": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/0001/" + } ] } - ] - } + ], + }, } - @mock.patch('scrunch.streaming_dataset.StreamingDataset.multitables') + @mock.patch("scrunch.streaming_dataset.StreamingDataset.multitables") def test_add_multitable(self, multitables): ds_res = self._dataset_mock() ds = StreamingDataset(ds_res) - ds.create_multitable(name='mymulti', template=['var1_alias']) + ds.create_multitable(name="mymulti", template=["var1_alias"]) expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'name': 'mymulti', - 'is_public': False, - 'template': [ + "element": "shoji:entity", + "body": { + "name": "mymulti", + "is_public": False, + "template": [ { - 'query': [ - {'variable': 'https://test.crunch.io/api/datasets/123456/variables/0001/'} + "query": [ + { + "variable": "https://test.crunch.io/api/datasets/123456/variables/0001/" + } ] } - ] - } + ], + }, } ds.resource.multitables.create.assert_called_with(expected_payload) @@ -6428,7 +6349,7 @@ def test_multitable_accessor(self): ds = StreamingDataset(ds_res) mt = EditableMock(entity=self._multitable) Multitable(mt, ds) - assert type(ds.multitables) == dict + assert isinstance(ds.multitables, dict) def test_edit_multitable(self): ds_res = self._dataset_mock() @@ -6436,8 +6357,8 @@ def test_edit_multitable(self): mt = EditableMock(entity=self._multitable) mockmulti = Multitable(mt, ds) with pytest.raises(AttributeError): - mockmulti.edit(name='edited') - mockmulti.resource.edit.assert_called_with({'name': 'edited'}) + mockmulti.edit(name="edited") + mockmulti.resource.edit.assert_called_with({"name": "edited"}) def test_multitable_class(self): ds_res = self._dataset_mock() @@ -6453,14 +6374,14 @@ def test_multitable_import(self): mockmulti = Multitable(mt, ds) ds_2 = StreamingDataset(ds_res) expected_payload = { - 'element': 'shoji:entity', - 'body': { - 'name': 'copied', - 'multitable': 'https://test.crunch.io/api/datasets/123456/multitables/0001/' - } + "element": "shoji:entity", + "body": { + "name": "copied", + "multitable": "https://test.crunch.io/api/datasets/123456/multitables/0001/", + }, } with pytest.raises(AttributeError): - ds_2.import_multitable('copied', mockmulti) + ds_2.import_multitable("copied", mockmulti) ds_2.resource.multitables.create.assert_called_with(expected_payload) def test_export_tabbook(self): @@ -6469,84 +6390,86 @@ def test_export_tabbook(self): mt = EditableMock(entity=self._multitable) mockmulti = Multitable(mt, ds) expected_payload = { - 'weight': 'https://test.crunch.io/api/datasets/123456/variables/0001/', - 'where': { - 'args': [ + "weight": "https://test.crunch.io/api/datasets/123456/variables/0001/", + "where": { + "args": [ { - 'map': { - 'https://test.crunch.io/api/datasets/123456/variables/0001/': { - 'variable': 'https://test.crunch.io/api/datasets/123456/variables/0001/'} + "map": { + "https://test.crunch.io/api/datasets/123456/variables/0001/": { + "variable": "https://test.crunch.io/api/datasets/123456/variables/0001/" + } } } ], - 'function': 'make_frame' - } + "function": "make_frame", + }, } with pytest.raises(AttributeError): - mockmulti.export_tabbook(format='xlsx', where=['var1_alias'], weight='var1_alias') + mockmulti.export_tabbook( + format="xlsx", where=["var1_alias"], weight="var1_alias" + ) ds.resource.session.post.assert_called_once_with( - 'https://test.crunch.io/api/datasets/123456/multitables/0001/tabbooks/', - data=json.dumps(expected_payload) + "https://test.crunch.io/api/datasets/123456/multitables/0001/tabbooks/", + data=json.dumps(expected_payload), ) class TestMutableMixin(TestDatasetBase): - variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, + }, + "var_b": { + "id": "002", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, }, - 'var_b': { - 'id': '002', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False - } } variables_b = { - 'var_a': { - 'id': '003', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "003", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, + }, + "var_b": { + "id": "004", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, }, - 'var_b': { - 'id': '004', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False - } } variables_with_datetime = { - 'var_a': { - 'id': '003', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'numeric', - 'is_subvar': False + "var_a": { + "id": "003", + "alias": "var_a", + "name": "Variable A", + "type": "numeric", + "is_subvar": False, }, - 'var_b': { - 'id': '004', - 'alias': 'var_b', - 'name': 'Variable B', - 'type': 'categorical', - 'is_subvar': False + "var_b": { + "id": "004", + "alias": "var_b", + "name": "Variable B", + "type": "categorical", + "is_subvar": False, + }, + "var_d": { + "id": "005", + "alias": "endtime", + "name": "Endtime", + "type": "datetime", + "is_subvar": False, }, - 'var_d': { - 'id': '005', - 'alias': 'endtime', - 'name': 'Endtime', - 'type': 'datetime', - 'is_subvar': False - } } def test_compare_datasets(self): @@ -6556,13 +6479,9 @@ def test_compare_datasets(self): ds_b = MutableDataset(ds_b_mock) diff = ds_b.compare_dataset(ds_a) expected_diff = { - 'variables': { - 'by_type': [], - 'by_alias': [], - 'by_missing_rules': [] - }, - 'categories': {}, - 'subvariables': {} + "variables": {"by_type": [], "by_alias": [], "by_missing_rules": []}, + "categories": {}, + "subvariables": {}, } assert diff == expected_diff @@ -6574,16 +6493,15 @@ def test_append_dataset(self): with pytest.raises(ValueError) as e: ds_b.append_dataset(ds_a) - assert e.message == 'Cannot append dataset to self' + assert e.message == "Cannot append dataset to self" # edit url - ds_a.url = 'http://test.crunch.io/api/datasets/123/' + ds_a.url = "http://test.crunch.io/api/datasets/123/" expected_payload = { "element": "shoji:entity", "autorollback": True, - "body": { - 'dataset': ds_a.url - }} + "body": {"dataset": ds_a.url}, + } ds_b.append_dataset(ds_a) ds_b.resource.batches.create.assert_called_with(expected_payload) @@ -6592,16 +6510,16 @@ def test_append_with_variables(self): ds_a = MutableDataset(ds_a_mock) ds_b_mock = self._dataset_mock(variables=self.variables_b) ds_b = MutableDataset(ds_b_mock) - ds_a.url = 'http://test.crunch.io/api/datasets/123/' + ds_a.url = "http://test.crunch.io/api/datasets/123/" expected_payload = { "element": "shoji:entity", "autorollback": True, "body": { - 'dataset': ds_a.url, - 'where': { - 'function': 'frame_subset', - 'args': [{'frame': 'primary'}, {'value': ['001', '002']}] - } + "dataset": ds_a.url, + "where": { + "function": "frame_subset", + "args": [{"frame": "primary"}, {"value": ["001", "002"]}], + }, }, } ds_b.append_dataset(ds_a, variables=["var_a", "var_b"]) @@ -6612,22 +6530,20 @@ def test_append_with_filter(self): ds_a = MutableDataset(ds_a_mock) ds_b_mock = self._dataset_mock(variables=self.variables_b) ds_b = MutableDataset(ds_b_mock) - ds_a.url = 'http://test.crunch.io/api/datasets/123/' + ds_a.url = "http://test.crunch.io/api/datasets/123/" expected_payload = { "element": "shoji:entity", "autorollback": True, "body": { "dataset": ds_a.url, "filter": { - "function": ">", - "args": [ + "function": ">", + "args": [ { - 'variable': 'https://test.crunch.io/api/datasets/123456/variables/var_d/' + "variable": "https://test.crunch.io/api/datasets/123456/variables/var_d/" }, - { - "value": "2024-06-03T22:53:52.393" - } - ] + {"value": "2024-06-03T22:53:52.393"}, + ], }, }, } @@ -6637,62 +6553,62 @@ def test_append_with_filter(self): class TestHeadingSubtotals(TestDatasetBase): variables = { - 'var_a': { - 'id': '001', - 'alias': 'var_a', - 'name': 'Variable A', - 'type': 'categorical', - 'categories': TEST_CATEGORIES(), - 'is_subvar': False, - 'view': {}, - 'derived': False, - }, + "var_a": { + "id": "001", + "alias": "var_a", + "name": "Variable A", + "type": "categorical", + "categories": TEST_CATEGORIES(), + "is_subvar": False, + "view": {}, + "derived": False, + } } def test_categories_as_int(self): ds_mock = self._dataset_mock(variables=self.variables) ds = StreamingDataset(ds_mock) - var = ds['var_a'] + var = ds["var_a"] expected_payload = { - 'view': { - 'transform': { - 'insertions': [ + "view": { + "transform": { + "insertions": [ { - 'anchor': 'top', - 'name': 'Test', - 'function': 'heading', - 'args': [1] + "anchor": "top", + "name": "Test", + "function": "heading", + "args": [1], } ] } } } - var.add_heading('Test', categories=1, anchor='top') + var.add_heading("Test", categories=1, anchor="top") var.resource.patch.assert_called_once_with(expected_payload) def test_categories_as_string(self): ds_mock = self._dataset_mock(variables=self.variables) ds = StreamingDataset(ds_mock) - var = ds['var_a'] + var = ds["var_a"] expected_payload = { - 'view': { - 'transform': { - 'insertions': [ + "view": { + "transform": { + "insertions": [ { - 'anchor': 'top', - 'name': 'Test', - 'function': 'heading', - 'args': [2] + "anchor": "top", + "name": "Test", + "function": "heading", + "args": [2], } ] } } } - var.add_heading('Test', categories=["Male"], anchor='top') + var.add_heading("Test", categories=["Male"], anchor="top") var.resource.patch.assert_called_once_with(expected_payload) def test_add_subtotal_difference(self): @@ -6709,14 +6625,16 @@ def test_add_subtotal_difference(self): "name": "F - M", "function": "subtotal", "args": [1], - "kwargs": {"negative": [2]} + "kwargs": {"negative": [2]}, } ] } } } - var.add_subtotal_difference("F - M", add=["Female"], subtract=["Male"], anchor="bottom") + var.add_subtotal_difference( + "F - M", add=["Female"], subtract=["Male"], anchor="bottom" + ) var.resource.patch.assert_called_once_with(expected_payload) def test_add_multiple_subtotal_difference(self): @@ -6725,19 +6643,13 @@ def test_add_multiple_subtotal_difference(self): vars_url = "https://example.com/datasets/id/variables/" var_url = "https://example.com/datasets/id/variable/id/" - var_tup = Tuple(session, var_url, **{ - "alias": "my_var", - "name": "my_var", - "type": "categorical", - "view": {} - }) + var_tup = Tuple( + session, + var_url, + **{"alias": "my_var", "name": "my_var", "type": "categorical", "view": {}}, + ) - vars_cat = Catalog(session, **{ - "self": vars_url, - "index": { - var_url: var_tup - } - }) + vars_cat = Catalog(session, **{"self": vars_url, "index": {var_url: var_tup}}) vars_cat.hier = MagicMock() existing_subtotal = { @@ -6745,7 +6657,7 @@ def test_add_multiple_subtotal_difference(self): "name": "F - M", "function": "subtotal", "args": [1], - "kwargs": {"negative": [2]} + "kwargs": {"negative": [2]}, } variable_body = { "self": var_url, @@ -6753,26 +6665,19 @@ def test_add_multiple_subtotal_difference(self): "alias": "my_var", "name": "my_var", "type": "categorical", - "categories": [ - {"id": 1, "name": "Male"}, - {"id": 2, "name": "Female"}, - ], + "categories": [{"id": 1, "name": "Male"}, {"id": 2, "name": "Female"}], "view": { "transform": { # One subtotal already exists "insertions": [existing_subtotal] } - } - } + }, + }, } var_res = Entity(session, **variable_body) var_tup._entity = var_res - ds_res = Entity(session, **{ - "self": ds_url, - "body": {}, - "catalogs": {} - }) + ds_res = Entity(session, **{"self": ds_url, "body": {}, "catalogs": {}}) ds_res.variables = vars_cat ds_res.settings = MagicMock() ds_res.folders = MagicMock() @@ -6786,7 +6691,9 @@ def test_add_multiple_subtotal_difference(self): session.add_fixture(var_url, variable_body) variable = Variable(var_tup, dataset) - variable.add_subtotal_difference("M - F", add=["Male"], subtract=["Female"], anchor="top") + variable.add_subtotal_difference( + "M - F", add=["Male"], subtract=["Female"], anchor="top" + ) # Assert that the last PATCH made contains paylod including BOTH # transforms. The existing and the new one @@ -6794,13 +6701,15 @@ def test_add_multiple_subtotal_difference(self): payload = json.loads(final_patch.body) assert final_patch.method == "PATCH" assert final_patch.url == var_url - assert payload["view"]["transform"]["insertions"] == [existing_subtotal] + [{ - "anchor": "top", - "args": [1], - "function": "subtotal", - "kwargs": {"negative": [2]}, - "name": "M - F" - }] + assert payload["view"]["transform"]["insertions"] == [existing_subtotal] + [ + { + "anchor": "top", + "args": [1], + "function": "subtotal", + "kwargs": {"negative": [2]}, + "name": "M - F", + } + ] class TestSubvariableCodes: @@ -6833,32 +6742,34 @@ def test_bind_categorical_array_without_codes(self): dataset.bind_categorical_array("My Array", "my_array", subvariables) array_map = { - '1': {'variable': '/variables/var_1/'}, - '2': {'variable': '/variables/var_2/'}, - '3': {'variable': '/variables/var_3/'} + "1": {"variable": "/variables/var_1/"}, + "2": {"variable": "/variables/var_2/"}, + "3": {"variable": "/variables/var_3/"}, } subreferences = [ # See how var_1 and var_2 have been disambiguated because # they exist in the return of dataset.variable_aliases - {'alias': 'var_1__1', 'name': 'Variable 1'}, - {'alias': 'var_2__1', 'name': 'Variable 2'}, - {'alias': 'var_3', 'name': 'Variable 3'} + {"alias": "var_1__1", "name": "Variable 1"}, + {"alias": "var_2__1", "name": "Variable 2"}, + {"alias": "var_3", "name": "Variable 3"}, ] expression = { "function": "array", "args": [{"function": "make_frame", "args": [{"map": array_map}]}], - "references": {"subreferences": subreferences} + "references": {"subreferences": subreferences}, } - assert args == [{ - "element": "shoji:entity", - "body": { - "name": "My Array", - "alias": "my_array", - "notes": "", - "description": "", - "derivation": expression, + assert args == [ + { + "element": "shoji:entity", + "body": { + "name": "My Array", + "alias": "my_array", + "notes": "", + "description": "", + "derivation": expression, + }, } - }] + ] def test_bind_categorical_array_with_codes(self): dataset, args = self.prepare_dataset() @@ -6868,56 +6779,49 @@ def test_bind_categorical_array_with_codes(self): {"alias": "var_3", "name": "Variable 3"}, ] subvariable_codes = ["code_1", "code_2", "code_3"] - dataset.bind_categorical_array("My Array", "my_array", subvariables, subvariable_codes=subvariable_codes) + dataset.bind_categorical_array( + "My Array", "my_array", subvariables, subvariable_codes=subvariable_codes + ) array_map = { - '1': {'variable': '/variables/var_1/'}, - '2': {'variable': '/variables/var_2/'}, - '3': {'variable': '/variables/var_3/'} + "1": {"variable": "/variables/var_1/"}, + "2": {"variable": "/variables/var_2/"}, + "3": {"variable": "/variables/var_3/"}, } subreferences = [ - {'alias': 'code_1', 'name': 'Variable 1'}, - {'alias': 'code_2', 'name': 'Variable 2'}, - {'alias': 'code_3', 'name': 'Variable 3'} + {"alias": "code_1", "name": "Variable 1"}, + {"alias": "code_2", "name": "Variable 2"}, + {"alias": "code_3", "name": "Variable 3"}, ] expression = { "function": "array", "args": [{"function": "make_frame", "args": [{"map": array_map}]}], - "references": {"subreferences": subreferences} + "references": {"subreferences": subreferences}, } - assert args == [{ - "element": "shoji:entity", - "body": { - "name": "My Array", - "alias": "my_array", - "notes": "", - "description": "", - "derivation": expression, + assert args == [ + { + "element": "shoji:entity", + "body": { + "name": "My Array", + "alias": "my_array", + "notes": "", + "description": "", + "derivation": expression, + }, } - }] + ] def test_copy_variable_no_codes(self): dataset, args = self.prepare_dataset() var_url = "/variable/some_id/" subvars_index = { - "%s001/" % var_url: { - "alias": "var_1", - "name": "Subvariable 1", - }, - "%s002/" % var_url: { - "alias": "var_2", - "name": "Subvariable 2", - }, - "%s003/" % var_url: { - "alias": "var_3", - "name": "Subvariable 3", - }, + "%s001/" % var_url: {"alias": "var_1", "name": "Subvariable 1"}, + "%s002/" % var_url: {"alias": "var_2", "name": "Subvariable 2"}, + "%s003/" % var_url: {"alias": "var_3", "name": "Subvariable 3"}, } subvariables_catalog = MagicMock(index=subvars_index) mock_session = MockSession() - tuple_members = { - "derived": False - } + tuple_members = {"derived": False} var_members = { "self": var_url, "body": { @@ -6928,7 +6832,7 @@ def test_copy_variable_no_codes(self): "%s003/" % var_url, ], "subreferences": subvars_index, - } + }, } var_tuple_res = Tuple(mock_session, var_url, **tuple_members) variable_shoji = Entity(mock_session, **var_members) @@ -6939,46 +6843,33 @@ def test_copy_variable_no_codes(self): _ = dataset.copy_variable(variable, "copied", "copied") subreferences = [ - {'alias': 'var_1__1', 'name': 'Subvariable 1'}, - {'alias': 'var_2__1', 'name': 'Subvariable 2'}, - {'alias': 'var_3', 'name': 'Subvariable 3'} + {"alias": "var_1__1", "name": "Subvariable 1"}, + {"alias": "var_2__1", "name": "Subvariable 2"}, + {"alias": "var_3", "name": "Subvariable 3"}, ] expression = { "function": "copy_variable", "args": [{"variable": var_url}], - "references": {"subreferences": subreferences} + "references": {"subreferences": subreferences}, } - assert args == [{ - "element": "shoji:entity", - "body": { - "name": "copied", - "alias": "copied", - "derivation": expression, + assert args == [ + { + "element": "shoji:entity", + "body": {"name": "copied", "alias": "copied", "derivation": expression}, } - }] + ] def test_copy_variable_with_codes(self): dataset, args = self.prepare_dataset() var_url = "/variable/some_id/" subvars_index = { - "%s001/" % var_url: { - "alias": "var_1", - "name": "Subvariable 1", - }, - "%s002/" % var_url: { - "alias": "var_2", - "name": "Subvariable 2", - }, - "%s003/" % var_url: { - "alias": "var_3", - "name": "Subvariable 3", - }, + "%s001/" % var_url: {"alias": "var_1", "name": "Subvariable 1"}, + "%s002/" % var_url: {"alias": "var_2", "name": "Subvariable 2"}, + "%s003/" % var_url: {"alias": "var_3", "name": "Subvariable 3"}, } subvariables_catalog = MagicMock(index=subvars_index) mock_session = MockSession() - tuple_members = { - "derived": False - } + tuple_members = {"derived": False} var_members = { "self": var_url, "body": { @@ -6989,7 +6880,7 @@ def test_copy_variable_with_codes(self): "%s003/" % var_url, ], "subreferences": subvars_index, - } + }, } var_tuple_res = Tuple(mock_session, var_url, **tuple_members) variable_shoji = Entity(mock_session, **var_members) @@ -6999,24 +6890,22 @@ def test_copy_variable_with_codes(self): variable = Variable(var_tuple_res, dataset) subvariable_codes = ["code_1", "code_2", "code_3"] - _ = dataset.copy_variable(variable, "copied", "copied", - subvariable_codes=subvariable_codes) + _ = dataset.copy_variable( + variable, "copied", "copied", subvariable_codes=subvariable_codes + ) subreferences = [ - {'alias': 'code_1', 'name': 'Subvariable 1'}, - {'alias': 'code_2', 'name': 'Subvariable 2'}, - {'alias': 'code_3', 'name': 'Subvariable 3'} + {"alias": "code_1", "name": "Subvariable 1"}, + {"alias": "code_2", "name": "Subvariable 2"}, + {"alias": "code_3", "name": "Subvariable 3"}, ] expression = { "function": "copy_variable", "args": [{"variable": var_url}], - "references": {"subreferences": subreferences} + "references": {"subreferences": subreferences}, } - assert args == [{ - "element": "shoji:entity", - "body": { - "name": "copied", - "alias": "copied", - "derivation": expression, + assert args == [ + { + "element": "shoji:entity", + "body": {"name": "copied", "alias": "copied", "derivation": expression}, } - }] - + ] diff --git a/scrunch/tests/test_expressions.py b/scrunch/tests/test_expressions.py index 7698e9e..b3ccaf0 100644 --- a/scrunch/tests/test_expressions.py +++ b/scrunch/tests/test_expressions.py @@ -1,5 +1,3 @@ -import sys - import pytest import mock from unittest import TestCase @@ -12,7 +10,6 @@ class TestExpressionParsing(TestCase): - def test_any_of_str(self): expr = '"age".any(1,2)' @@ -75,78 +72,71 @@ def test_expr_none(self): assert result == {} def test_process_nested(self): - expr = '(identity == 1 and caseid <= surveyid) or identity >= 2' + expr = "(identity == 1 and caseid <= surveyid) or identity >= 2" variables = { - '0001': { - 'alias': 'identity', - 'type': 'numeric' - }, - '0002': { - 'alias': 'caseid', - 'type': 'numeric' - }, - '0003': { - 'alias': 'surveyid', - 'type': 'numeric' - }, + "0001": {"alias": "identity", "type": "numeric"}, + "0002": {"alias": "caseid", "type": "numeric"}, + "0003": {"alias": "surveyid", "type": "numeric"}, } obj = parse_expr(expr) table_mock = mock.MagicMock(metadata=variables) ds = mock.MagicMock() - ds.self = 'http://host.com/api/datasets/abc123/' + ds.self = "http://host.com/api/datasets/abc123/" ds.follow.return_value = table_mock result = process_expr(obj, ds) assert result == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ - {'variable': 'http://host.com/api/datasets/abc123/variables/0001/'}, - {'value': 1} - ] + "function": "==", + "args": [ + { + "variable": "http://host.com/api/datasets/abc123/variables/0001/" + }, + {"value": 1}, + ], }, { - 'function': '<=', - 'args': [ - {'variable': 'http://host.com/api/datasets/abc123/variables/0002/'}, - {'variable': 'http://host.com/api/datasets/abc123/variables/0003/'} - ] - } - ] + "function": "<=", + "args": [ + { + "variable": "http://host.com/api/datasets/abc123/variables/0002/" + }, + { + "variable": "http://host.com/api/datasets/abc123/variables/0003/" + }, + ], + }, + ], }, { - 'function': '>=', - 'args': [ - {'variable': 'http://host.com/api/datasets/abc123/variables/0001/'}, - {'value': 2} - ] - } - ] + "function": ">=", + "args": [ + { + "variable": "http://host.com/api/datasets/abc123/variables/0001/" + }, + {"value": 2}, + ], + }, + ], } def test_process_invalid_variable(self): - expr = '(identity == 1 and caseid <= surveyid) or identity >= 2' + expr = "(identity == 1 and caseid <= surveyid) or identity >= 2" variables = { - '0001': { - 'alias': 'identity', - 'type': 'numeric' - }, - '0002': { - 'alias': 'caseid', - 'type': 'numeric' - } + "0001": {"alias": "identity", "type": "numeric"}, + "0002": {"alias": "caseid", "type": "numeric"}, } obj = parse_expr(expr) table_mock = mock.MagicMock(metadata=variables) ds = mock.MagicMock() - ds.self = 'http://host.com/api/datasets/abc123/' + ds.self = "http://host.com/api/datasets/abc123/" ds.follow.return_value = table_mock with pytest.raises(ValueError) as err: process_expr(obj, ds) @@ -157,416 +147,217 @@ def test_parse_equal_int(self): expr = "age == 1" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": "age"}, {"value": 1}], } # Reversed. expr = "1 == age" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'value': 1 - }, - { - 'variable': 'age' - } - ] + "function": "==", + "args": [{"value": 1}, {"variable": "age"}], } def test_parse_equal_string(self): expr = "name == 'John Doe'" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': 'name' - }, - { - 'value': 'John Doe' - } - ] + "function": "==", + "args": [{"variable": "name"}, {"value": "John Doe"}], } # Reversed. expr = "'John Doe' == name" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'value': 'John Doe' - }, - { - 'variable': 'name' - } - ] + "function": "==", + "args": [{"value": "John Doe"}, {"variable": "name"}], } def test_parse_notequal_int(self): expr = "age != 1" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '!=', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] + "function": "!=", + "args": [{"variable": "age"}, {"value": 1}], } # Reversed. expr = "1 != age" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '!=', - 'args': [ - { - 'value': 1 - }, - { - 'variable': 'age' - } - ] + "function": "!=", + "args": [{"value": 1}, {"variable": "age"}], } def test_parse_notequal_string(self): expr = "name != 'John Doe'" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '!=', - 'args': [ - { - 'variable': 'name' - }, - { - 'value': 'John Doe' - } - ] + "function": "!=", + "args": [{"variable": "name"}, {"value": "John Doe"}], } # Reversed. expr = "'John Doe' != name" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '!=', - 'args': [ - { - 'value': 'John Doe' - }, - { - 'variable': 'name' - } - ] + "function": "!=", + "args": [{"value": "John Doe"}, {"variable": "name"}], } def test_parse_less_than(self): expr = "caseid < 1234" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] + "function": "<", + "args": [{"variable": "caseid"}, {"value": 1234}], } # Reversed. expr = "1234 < caseid" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] + "function": "<", + "args": [{"value": 1234}, {"variable": "caseid"}], } def test_parse_less_than_equal(self): expr = "caseid <= 1234" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] + "function": "<=", + "args": [{"variable": "caseid"}, {"value": 1234}], } # Reversed. expr = "1234 <= caseid" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<=', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] + "function": "<=", + "args": [{"value": 1234}, {"variable": "caseid"}], } def test_parse_greater_than(self): expr = "caseid > 1234" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] + "function": ">", + "args": [{"variable": "caseid"}, {"value": 1234}], } # Reversed. expr = "1234 > caseid" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] + "function": ">", + "args": [{"value": 1234}, {"variable": "caseid"}], } def test_parse_greater_than_equal(self): expr = "caseid >= 1234" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] + "function": ">=", + "args": [{"variable": "caseid"}, {"value": 1234}], } # Reversed. expr = "1234 >= caseid" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>=', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] + "function": ">=", + "args": [{"value": 1234}, {"variable": "caseid"}], } def test_parse_compare_variable_against_another_variable(self): expr = "starttdate == arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "==", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } expr = "starttdate != arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '!=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "!=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } expr = "starttdate < arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "<", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } expr = "starttdate <= arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '<=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "<=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } expr = "starttdate > arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": ">", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } expr = "starttdate >= arrivedate" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '>=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": ">=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } def test_parse_multiple_boolean_conditions(self): - expr = '(identity == 1 and caseid <= surveyid) or identity >= 2' + expr = "(identity == 1 and caseid <= surveyid) or identity >= 2" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ - { - 'variable': 'identity' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": "identity"}, {"value": 1}], }, { - 'function': '<=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'variable': 'surveyid' - } - ] - } - ] - }, - { - 'function': '>=', - 'args': [ - { - 'variable': 'identity' + "function": "<=", + "args": [{"variable": "caseid"}, {"variable": "surveyid"}], }, - { - 'value': 2 - } - ] - } - ] + ], + }, + {"function": ">=", "args": [{"variable": "identity"}, {"value": 2}]}, + ], } def test_parse_value_in_list(self): expr = "web_browser in ['abc', 'dfg', 'hij']" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': 'web_browser' - }, - { - 'value': ['abc', 'dfg', 'hij'] - } - ] + "function": "in", + "args": [{"variable": "web_browser"}, {"value": ["abc", "dfg", "hij"]}], } # Tuples should also be supported. expr = "web_browser in ('abc', 'dfg', 'hij')" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': 'web_browser' - }, - { - 'value': ['abc', 'dfg', 'hij'] - } - ] + "function": "in", + "args": [{"variable": "web_browser"}, {"value": ["abc", "dfg", "hij"]}], } def test_parse_float_value_in_list(self): expr = "country_cat in [1.0]" expected = { - 'function': 'in', - 'args': [ - { - 'variable': 'country_cat' - }, - { - 'value': [1.0] - } - ] + "function": "in", + "args": [{"variable": "country_cat"}, {"value": [1.0]}], } expr_obj = parse_expr(expr) assert expr_obj == expected @@ -574,15 +365,8 @@ def test_parse_float_value_in_list(self): def test_parse_integer_value_in_list(self): expr = "country_cat in [1]" expected = { - 'function': 'in', - 'args': [ - { - 'variable': 'country_cat' - }, - { - 'value': [1] - } - ] + "function": "in", + "args": [{"variable": "country_cat"}, {"value": [1]}], } expr_obj = parse_expr(expr) assert expr_obj == expected @@ -590,13 +374,8 @@ def test_parse_integer_value_in_list(self): def test_r_in(self): expr = "q1 in [1, 2, r(4,7), r(10, 12)]" expected_expr_obj = { - 'args': - [ - {'variable': 'q1'}, - {'value': [1, 2, 4, 5, 6, 7, 10, 11, 12]} - ], - 'function': 'in' - + "args": [{"variable": "q1"}, {"value": [1, 2, 4, 5, 6, 7, 10, 11, 12]}], + "function": "in", } expr_obj = parse_expr(expr) assert expr_obj == expected_expr_obj @@ -607,43 +386,29 @@ def test_r_raise(self): assert "function 'r' needs 2 integer arguments" in str(excinfo.value) def test_parse_value_not_in_list(self): - expr = 'country not in [1, 2, 3]' + expr = "country not in [1, 2, 3]" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': 'country' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "in", + "args": [{"variable": "country"}, {"value": [1, 2, 3]}], } - ] + ], } # Tuples should also be supported. - expr = 'country not in (1, 2, 3)' + expr = "country not in (1, 2, 3)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': 'country' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "in", + "args": [{"variable": "country"}, {"value": [1, 2, 3]}], } - ] + ], } def test_parse_sample_rule_1(self): @@ -653,104 +418,56 @@ def test_parse_sample_rule_1(self): expr = "disposition == 0 and exit_status == 0" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 0 - } - ] - } - ] + "function": "and", + "args": [ + {"function": "==", "args": [{"variable": "disposition"}, {"value": 0}]}, + {"function": "==", "args": [{"variable": "exit_status"}, {"value": 0}]}, + ], } def test_parse_any(self): - expr = 'Q2.any([1, 2, 3])' + expr = "Q2.any([1, 2, 3])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'any', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "any", + "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}], } - expr = 'Q2.any((1, 2, 3))' + expr = "Q2.any((1, 2, 3))" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'any', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "any", + "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}], } - expr = 'Q2.any(1)' + expr = "Q2.any(1)" with pytest.raises(ValueError): parse_expr(expr) - expr = 'Q2.any(Q3)' + expr = "Q2.any(Q3)" with pytest.raises(ValueError): parse_expr(expr) def test_parse_all(self): - expr = 'Q2.all([1, 2, 3])' + expr = "Q2.all([1, 2, 3])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'all', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "all", + "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}], } - expr = 'Q2.all((1, 2, 3))' + expr = "Q2.all((1, 2, 3))" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'all', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "all", + "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}], } - expr = 'Q2.all(1)' + expr = "Q2.all(1)" with pytest.raises(ValueError): parse_expr(expr) - expr = 'Q2.all(Q3)' + expr = "Q2.all(Q3)" with pytest.raises(ValueError): parse_expr(expr) @@ -759,97 +476,65 @@ def test_parse_sample_rule_2_complex(self): # 'text': 'diposition code 0 (quotafull)', # 'index_mapper': intersection( # [{'disposition': [0]}, {'exit_status': [1]}]) - expr = "(disposition == 0 and exit_status == 1) or " \ - "(disposition == 0 and exit_status == 0)" + expr = ( + "(disposition == 0 and exit_status == 1) or " + "(disposition == 0 and exit_status == 0)" + ) expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'or', - 'args': [{ - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 1 - } - ] - } - ] - }, { - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 0 - } - ] - } - ] - } - ]} - - def test_mr_any_subvar(self): - expr = "MyMrVar.any([subvar1, subvar2])" - parsed_zcl_expr = parse_expr(expr) - assert parsed_zcl_expr == { - 'function': 'any', - 'args': [ - {'variable': 'MyMrVar'}, - {'column': ['subvar1', 'subvar2']} - ] - } - - def test_mr_all_subvar(self): - expr = "MyMrVar.all([subvar1, subvar2])" + "function": "or", + "args": [ + { + "function": "and", + "args": [ + { + "function": "==", + "args": [{"variable": "disposition"}, {"value": 0}], + }, + { + "function": "==", + "args": [{"variable": "exit_status"}, {"value": 1}], + }, + ], + }, + { + "function": "and", + "args": [ + { + "function": "==", + "args": [{"variable": "disposition"}, {"value": 0}], + }, + { + "function": "==", + "args": [{"variable": "exit_status"}, {"value": 0}], + }, + ], + }, + ], + } + + def test_mr_any_subvar(self): + expr = "MyMrVar.any([subvar1, subvar2])" + parsed_zcl_expr = parse_expr(expr) + assert parsed_zcl_expr == { + "function": "any", + "args": [{"variable": "MyMrVar"}, {"column": ["subvar1", "subvar2"]}], + } + + def test_mr_all_subvar(self): + expr = "MyMrVar.all([subvar1, subvar2])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'all', - 'args': [ - {'variable': 'MyMrVar'}, - {'column': ['subvar1', 'subvar2']} - ] + "function": "all", + "args": [{"variable": "MyMrVar"}, {"column": ["subvar1", "subvar2"]}], } def test_mr_in_subvar(self): expr = "MyMrVar in [subvar1, subvar2]" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'in', - 'args': [ - {'variable': 'MyMrVar'}, - {'column': ['subvar1', 'subvar2']} - ] + "function": "in", + "args": [{"variable": "MyMrVar"}, {"column": ["subvar1", "subvar2"]}], } def test_parse_sample_any(self): @@ -861,117 +546,58 @@ def test_parse_sample_any(self): expr = "CompanyTurnover.any([99])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'any', - 'args': [ - { - 'variable': 'CompanyTurnover' - }, - { - 'value': [99] - } - ] + "function": "any", + "args": [{"variable": "CompanyTurnover"}, {"value": [99]}], } expr = "sector.any([2, 3, 98, 99])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'any', - 'args': [ - { - 'variable': 'sector' - }, - { - 'value': [2, 3, 98, 99] - } - ] + "function": "any", + "args": [{"variable": "sector"}, {"value": [2, 3, 98, 99]}], } def test_parse_negated_expr(self): expr = "not (age == 1)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] - } - ] + "function": "not", + "args": [{"function": "==", "args": [{"variable": "age"}, {"value": 1}]}], } def test_parse_negated_method_call(self): - expr = 'not Q2.any([1, 2, 3])' + expr = "not Q2.any([1, 2, 3])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'any', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "any", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} + ], } - expr = 'not Q2.all([1, 2, 3])' + expr = "not Q2.all([1, 2, 3])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'all', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "all", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} + ], } def test_parse_duplicates_method(self): expr = "identity.duplicates()" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'duplicates', - 'args': [ - { - 'variable': 'identity' - } - ] + "function": "duplicates", + "args": [{"variable": "identity"}], } # Negated. expr = "not identity.duplicates()" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'duplicates', - 'args': [ - { - 'variable': 'identity' - } - ] - } - ] + "function": "not", + "args": [{"function": "duplicates", "args": [{"variable": "identity"}]}], } # Parameters not allowed. @@ -990,35 +616,23 @@ def test_parse_duplicates_method(self): def test_multiple_and_or(self): expr = 'age == 1 and test == 3 and myop == "age"' expected = { - 'args': [ - { - 'args': [ - {'variable': 'age'}, - {'value': 1} - ], - 'function': '==' - }, + "args": [ + {"args": [{"variable": "age"}, {"value": 1}], "function": "=="}, { - 'args': [ + "args": [ { - 'args': [ - {'variable': 'test'}, - {'value': 3} - ], - 'function': '==' + "args": [{"variable": "test"}, {"value": 3}], + "function": "==", }, { - 'args': [ - {'variable': 'myop'}, - {'value': 'age'} - ], - 'function': '==' - } + "args": [{"variable": "myop"}, {"value": "age"}], + "function": "==", + }, ], - 'function': 'and' - } + "function": "and", + }, ], - 'function': 'and' + "function": "and", } expr_obj = parse_expr(expr) assert expr_obj == expected @@ -1026,45 +640,26 @@ def test_multiple_and_or(self): def test_arithmetic_operations(self): expr = "var1 + 3 == var2 - 2 and var3 / 1 == var4 * 10" expected = { - 'args': [ + "args": [ { - 'args': [ - { - 'args': [ - {'variable': 'var1'}, - {'value': 3} - ], - 'function': '+' - }, - { - 'args': [ - {'variable': 'var2'}, - {'value': 2} - ], - 'function': '-'} + "args": [ + {"args": [{"variable": "var1"}, {"value": 3}], "function": "+"}, + {"args": [{"variable": "var2"}, {"value": 2}], "function": "-"}, ], - 'function': '==' + "function": "==", }, { - 'args': [ + "args": [ + {"args": [{"variable": "var3"}, {"value": 1}], "function": "/"}, { - 'args': [ - {'variable': 'var3'}, - {'value': 1} - ], - 'function': '/' + "args": [{"variable": "var4"}, {"value": 10}], + "function": "*", }, - { - 'args': [ - {'variable': 'var4'}, - {'value': 10} - ], - 'function': '*'} ], - 'function': '==' - } + "function": "==", + }, ], - 'function': 'and' + "function": "and", } expr_obj = parse_expr(expr) assert expr_obj == expected @@ -1072,29 +667,20 @@ def test_arithmetic_operations(self): def test_arithmetic_operator_presedence(self): expr = "var1 * 10 + 3 / 2 == var2" expected = { - 'args': [ + "args": [ { - 'args': [ + "args": [ { - 'args': [ - {'variable': 'var1'}, - {'value': 10} - ], - 'function': '*' + "args": [{"variable": "var1"}, {"value": 10}], + "function": "*", }, - { - 'args': [ - {'value': 3}, - {'value': 2} - ], - 'function': '/' - } + {"args": [{"value": 3}, {"value": 2}], "function": "/"}, ], - 'function': '+' + "function": "+", }, - {'variable': 'var2'} + {"variable": "var2"}, ], - 'function': '==' + "function": "==", } expr_obj = parse_expr(expr) assert expr_obj == expected @@ -1104,16 +690,11 @@ def test_multiple_arithmetic_operations(self): expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '+', 'args': [ - {'value': 1}, - { - 'function': '*', - 'args': [ - {'value': 2}, - {'value': 3} - ] - }, - ] + "function": "+", + "args": [ + {"value": 1}, + {"function": "*", "args": [{"value": 2}, {"value": 3}]}, + ], } def test_multiple_arithmetic_operations_precedence(self): @@ -1121,28 +702,17 @@ def test_multiple_arithmetic_operations_precedence(self): expr_obj = parse_expr(expr) assert expr_obj == { - 'function': '-', - 'args': [ + "function": "-", + "args": [ { - 'function': '+', - 'args': [ - {'value': 1}, - { - 'function': '/', - 'args': [ - {'value': 2}, - {'value': 3}] - } - ] + "function": "+", + "args": [ + {"value": 1}, + {"function": "/", "args": [{"value": 2}, {"value": 3}]}, + ], }, - { - 'function': '*', - 'args': [ - {'value': 4}, - {'value': 5} - ] - } - ] + {"function": "*", "args": [{"value": 4}, {"value": 5}]}, + ], } def test_multiple_arithmetic_operations_with_variable(self): @@ -1152,227 +722,112 @@ def test_multiple_arithmetic_operations_with_variable(self): assert expr_obj == { "function": "+", "args": [ - { - "function": "*", - "args": [ - { - "variable": "weekly_rent" - }, - { - "value": 52 - } - ] - }, - { - "value": 12 - } - ] + {"function": "*", "args": [{"variable": "weekly_rent"}, {"value": 52}]}, + {"value": 12}, + ], } def test_parse_helper_functions(self): # One variable. expr = "valid(birthyear)" expr_obj = parse_expr(expr) - assert expr_obj == { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthyear' - } - ] - } + assert expr_obj == {"function": "is_valid", "args": [{"variable": "birthyear"}]} expr = "missing(birthyear)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthyear' - } - ] + "function": "is_missing", + "args": [{"variable": "birthyear"}], } # One variable, negated. expr = "not valid(birthyear)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthyear' - } - ] - } - ] + "function": "not", + "args": [{"function": "is_valid", "args": [{"variable": "birthyear"}]}], } expr = "not missing(birthyear)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthyear' - } - ] - } - ] + "function": "not", + "args": [{"function": "is_missing", "args": [{"variable": "birthyear"}]}], } # Multiple variables. expr = "valid(birthyear, birthmonth)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'or', - 'args': [ - { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthyear' - } - ] - }, - { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthmonth' - } - ] - } - ] + "function": "or", + "args": [ + {"function": "is_valid", "args": [{"variable": "birthyear"}]}, + {"function": "is_valid", "args": [{"variable": "birthmonth"}]}, + ], } expr = "missing(birthyear, birthmonth)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'or', - 'args': [ - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthyear' - } - ] - }, - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthmonth' - } - ] - } - ] + "function": "or", + "args": [ + {"function": "is_missing", "args": [{"variable": "birthyear"}]}, + {"function": "is_missing", "args": [{"variable": "birthmonth"}]}, + ], } # Multiple variables, negated. expr = "not valid(birthyear, birthmonth)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'or', - 'args': [ - { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthyear' - } - ] - }, - { - 'function': 'is_valid', - 'args': [ - { - 'variable': 'birthmonth' - } - ] - } - ] + "function": "or", + "args": [ + {"function": "is_valid", "args": [{"variable": "birthyear"}]}, + {"function": "is_valid", "args": [{"variable": "birthmonth"}]}, + ], } - ] + ], } expr = "not missing(birthyear, birthmonth)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ + {"function": "is_missing", "args": [{"variable": "birthyear"}]}, { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthyear' - } - ] + "function": "is_missing", + "args": [{"variable": "birthmonth"}], }, - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthmonth' - } - ] - } - ] + ], } - ] + ], } # More advanced combinations. expr = "caseid < 12345 and missing(birthyear, birthmonth)" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ + {"function": "<", "args": [{"variable": "caseid"}, {"value": 12345}]}, { - 'function': '<', - 'args': [ + "function": "or", + "args": [ + {"function": "is_missing", "args": [{"variable": "birthyear"}]}, { - 'variable': 'caseid' + "function": "is_missing", + "args": [{"variable": "birthmonth"}], }, - { - 'value': 12345 - } - ] + ], }, - { - 'function': 'or', - 'args': [ - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthyear' - } - ] - }, - { - 'function': 'is_missing', - 'args': [ - { - 'variable': 'birthmonth' - } - ] - } - ] - } - ] + ], } def test_multiple_missing_valid(self): @@ -1380,175 +835,129 @@ def test_multiple_missing_valid(self): expr = "missing(year, month, age)" expr_obj = parse_expr(expr) assert expr_obj == { - 'args': [ - { - 'args': [{'variable': 'year'}], - 'function': 'is_missing' - }, + "args": [ + {"args": [{"variable": "year"}], "function": "is_missing"}, { - 'args': [ - { - 'args': [{'variable': 'month'}], - 'function': 'is_missing' - }, - { - 'args': [{'variable': 'age'}], - 'function': 'is_missing' - } + "args": [ + {"args": [{"variable": "month"}], "function": "is_missing"}, + {"args": [{"variable": "age"}], "function": "is_missing"}, ], - 'function': 'or' - } + "function": "or", + }, ], - 'function': 'or' + "function": "or", } expr = "valid(year, month, age, gender)" expr_obj = parse_expr(expr) assert expr_obj == { - 'args': [ - { - 'args': [{'variable': 'year'}], - 'function': 'is_valid' - }, + "args": [ + {"args": [{"variable": "year"}], "function": "is_valid"}, { - 'args': [ - { - 'args': [{'variable': 'month'}], - 'function': 'is_valid' - }, + "args": [ + {"args": [{"variable": "month"}], "function": "is_valid"}, { - 'args': [ + "args": [ + {"args": [{"variable": "age"}], "function": "is_valid"}, { - 'args': [{'variable': 'age'}], - 'function': 'is_valid' + "args": [{"variable": "gender"}], + "function": "is_valid", }, - { - 'args': [{'variable': 'gender'}], - 'function': 'is_valid' - } ], - 'function': 'or' - } + "function": "or", + }, ], - 'function': 'or' - } + "function": "or", + }, ], - 'function': 'or' + "function": "or", } def test_parse_not_a_in_b(self): expr = "a not in [1, 2, 3]" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'in', - 'args': [ - { - 'variable': 'a' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "in", "args": [{"variable": "a"}, {"value": [1, 2, 3]}]} + ], } expr = "not a in [1, 2, 3]" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'in', - 'args': [ - { - 'variable': 'a' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "in", "args": [{"variable": "a"}, {"value": [1, 2, 3]}]} + ], } expr = "not (a in [1, 2, 3])" expr_obj = parse_expr(expr) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'in', - 'args': [ - { - 'variable': 'a' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "in", "args": [{"variable": "a"}, {"value": [1, 2, 3]}]} + ], } def test_parse_subvariable_brackets(self): expr = "array_alias[subvariable_alias] in [1, 2, 3]" expr_obj = parse_expr(expr, platonic=False) assert expr_obj == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ # Note how instead of storing a variable string as identifier # this is a temporary intern format, so we can use this later # on to convert to URLs appropriately discovering first the # array and then the subvariable - {'variable': {"array": "array_alias", "subvariable": "subvariable_alias"}}, - {'value': [1, 2, 3]} - ] + { + "variable": { + "array": "array_alias", + "subvariable": "subvariable_alias", + } + }, + {"value": [1, 2, 3]}, + ], } expr_obj = parse_expr(expr, platonic=True) assert expr_obj == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ # Note how instead of storing a variable string as identifier # this is a temporary intern format, so we can use this later # on to convert to URLs appropriately discovering first the # array and then the subvariable - {'var': "array_alias", "axes": ["subvariable_alias"]}, - {'value': [1, 2, 3]} - ] + {"var": "array_alias", "axes": ["subvariable_alias"]}, + {"value": [1, 2, 3]}, + ], } def test_parse_platonic_expr(self): expr = """not (array[subvar] or num_val) and other[dimension] and not logical""" parsed = parse_expr(expr, platonic=True) assert parsed == { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'or', - 'args': [ - {'var': 'array', 'axes': ['subvar']}, - {'var': 'num_val'} + "function": "or", + "args": [ + {"var": "array", "axes": ["subvar"]}, + {"var": "num_val"}, ], } ], }, { - 'function': 'and', - 'args': [ - {'var': 'other', 'axes': ['dimension']}, - { - 'function': 'not', - 'args': [{'var': 'logical'}], - } + "function": "and", + "args": [ + {"var": "other", "axes": ["dimension"]}, + {"function": "not", "args": [{"var": "logical"}]}, ], - } + }, ], } @@ -1607,12 +1016,11 @@ def test_parse_platonic_expr(self): class TestExpressionProcessing(TestCase): - - ds_url = 'http://test.crunch.io/api/datasets/123/' + ds_url = "http://test.crunch.io/api/datasets/123/" class CrunchPayload(dict): def __getattr__(self, item): - if item == 'payload': + if item == "payload": return self else: return self[item] @@ -1628,66 +1036,57 @@ def _get(*args): return _get def test_transform_alias_to_var_id(self): - var_id = '0001' - var_alias = 'age' - var_type = 'numeric' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type - } - }) + var_id = "0001" + var_alias = "age" + var_type = "numeric" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + + table_mock = mock.MagicMock( + metadata={var_id: {"id": var_id, "alias": var_alias, "type": var_type}} + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr_obj = process_expr(parse_expr('age == 1'), ds) + expr_obj = process_expr(parse_expr("age == 1"), ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': var_url - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": var_url}, {"value": 1}], } @mark_fail_py2 def test_adapt_multiple_response_any_subvar(self): - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "{}variables/{}/".format(self.ds_url, var_id) var_categories = [ - {"id": 1, "name": "cat1", "selected": True}, - {"id": 2, "name": "cat2", "selected": True}, - {"id": 3, "name": "cat3", "selected": False}, - ] - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "%ssubvariables/001/" % var_url, - "%ssubvariables/002/" % var_url, - "%ssubvariables/003/" % var_url, - ], - "subreferences": { - "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, - "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, - "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, - }, - "categories": var_categories + {"id": 1, "name": "cat1", "selected": True}, + {"id": 2, "name": "cat2", "selected": True}, + {"id": 3, "name": "cat3", "selected": False}, + ] + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": [ + "%ssubvariables/001/" % var_url, + "%ssubvariables/002/" % var_url, + "%ssubvariables/003/" % var_url, + ], + "subreferences": { + "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, + "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, + "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -1708,45 +1107,55 @@ def test_adapt_multiple_response_any_subvar(self): ds.follow.return_value = table_mock values = ["subvar1", "subvar2"] - with mock.patch("scrunch.expressions.get_subvariables_resource") as mock_subvars, mock.patch("scrunch.expressions._get_categories_from_var_index") as categories: + with mock.patch( + "scrunch.expressions.get_subvariables_resource" + ) as mock_subvars, mock.patch( + "scrunch.expressions._get_categories_from_var_index" + ) as categories: categories.return_value = var_categories - mock_subvars.return_value = dict(sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items())) - result, need_wrap = adapt_multiple_response(var_url, values, ds.variables.index) + mock_subvars.return_value = dict( + sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items()) + ) + result, need_wrap = adapt_multiple_response( + var_url, values, ds.variables.index + ) assert result == [ - {'variable': "{}subvariables/001/".format(var_url), 'column': [1, 2]}, - {'variable': "{}subvariables/002/".format(var_url), 'column': [1, 2]} + {"variable": "{}subvariables/001/".format(var_url), "column": [1, 2]}, + {"variable": "{}subvariables/002/".format(var_url), "column": [1, 2]}, ] assert need_wrap is True def test_process_all_multiple_response(self): - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '%svariables/%s/' % (self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "%svariables/%s/" % (self.ds_url, var_id) var_categories = [ {"id": 1, "name": "cat1", "selected": True}, {"id": 2, "name": "cat2", "selected": True}, {"id": 3, "name": "cat3", "selected": False}, ] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "%ssubvariables/001/" % var_url, - "%ssubvariables/002/" % var_url, - "%ssubvariables/003/" % var_url, - ], - "subreferences": { - "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, - "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, - "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, - }, - "categories": var_categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": [ + "%ssubvariables/001/" % var_url, + "%ssubvariables/002/" % var_url, + "%ssubvariables/003/" % var_url, + ], + "subreferences": { + "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, + "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, + "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -1767,70 +1176,79 @@ def test_process_all_multiple_response(self): ds.follow.return_value = table_mock expr = "MyMrVar.all([1])" - with mock.patch("scrunch.expressions.get_subvariables_resource") as mock_subvars, mock.patch( - "scrunch.expressions._get_categories_from_var_index") as categories: + with mock.patch( + "scrunch.expressions.get_subvariables_resource" + ) as mock_subvars, mock.patch( + "scrunch.expressions._get_categories_from_var_index" + ) as categories: categories.return_value = var_categories - mock_subvars.return_value = dict(sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items())) + mock_subvars.return_value = dict( + sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items()) + ) parsed_expr = parse_expr(expr) processed_zcl_expr = process_expr(parsed_expr, ds) - assert sorted(processed_zcl_expr) == sorted({ - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - {'variable': "{}subvariables/001/".format(var_url)}, - {'value': 1} - ], - }, - { - 'function': '==', - 'args': [ - {'variable': "{}subvariables/002/".format(var_url)}, - {'value': 1} - ], - }, - { - 'function': '==', - 'args': [ - {'variable': "{}subvariables/003/".format(var_url)}, - {'value': 1} - ], - } - ], - }) + assert sorted(processed_zcl_expr) == sorted( + { + "function": "and", + "args": [ + { + "function": "==", + "args": [ + {"variable": "{}subvariables/001/".format(var_url)}, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + {"variable": "{}subvariables/002/".format(var_url)}, + {"value": 1}, + ], + }, + { + "function": "==", + "args": [ + {"variable": "{}subvariables/003/".format(var_url)}, + {"value": 1}, + ], + }, + ], + } + ) @pytest.mark.xfail(reason="multiple response with `in` is not yet supported") def test_process_in_multiple_response(self): # TODO: check how to handle this scenario in future releases. This should work as .any - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '%svariables/%s/' % (self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "%svariables/%s/" % (self.ds_url, var_id) var_categories = [ {"id": 1, "name": "cat1", "selected": True}, {"id": 2, "name": "cat2", "selected": True}, {"id": 3, "name": "cat3", "selected": False}, ] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "%ssubvariables/001/" % var_url, - "%ssubvariables/002/" % var_url, - "%ssubvariables/003/" % var_url, - ], - "subreferences": { - "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, - "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, - "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, - }, - "categories": var_categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": [ + "%ssubvariables/001/" % var_url, + "%ssubvariables/002/" % var_url, + "%ssubvariables/003/" % var_url, + ], + "subreferences": { + "%ssubvariables/001/" % var_url: {"alias": "subvar1"}, + "%ssubvariables/002/" % var_url: {"alias": "subvar2"}, + "%ssubvariables/003/" % var_url: {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -1851,68 +1269,71 @@ def test_process_in_multiple_response(self): ds.follow.return_value = table_mock expr = "MyMrVar in [1]" - with mock.patch("scrunch.expressions.get_subvariables_resource") as mock_subvars, mock.patch( - "scrunch.expressions._get_categories_from_var_index") as categories: + with mock.patch( + "scrunch.expressions.get_subvariables_resource" + ) as mock_subvars, mock.patch( + "scrunch.expressions._get_categories_from_var_index" + ) as categories: categories.return_value = var_categories - mock_subvars.return_value = dict(sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items())) + mock_subvars.return_value = dict( + sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items()) + ) parsed_expr = parse_expr(expr) processed_zcl_expr = process_expr(parsed_expr, ds) assert processed_zcl_expr == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ - {'variable': "{}subvariables/001/".format(var_url)}, - {'column': [1]} + "function": "in", + "args": [ + {"variable": "{}subvariables/001/".format(var_url)}, + {"column": [1]}, ], }, { - 'function': 'in', - 'args': [ - {'variable': "{}subvariables/002/".format(var_url)}, - {'column': [1]} + "function": "in", + "args": [ + {"variable": "{}subvariables/002/".format(var_url)}, + {"column": [1]}, ], }, { - 'function': 'in', - 'args': [ - {'variable': "{}subvariables/003/".format(var_url)}, - {'column': [1]} + "function": "in", + "args": [ + {"variable": "{}subvariables/003/".format(var_url)}, + {"column": [1]}, ], - } + }, ], } def test_multiple_response_any_process_single_subvariables(self): - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "{}variables/{}/".format(self.ds_url, var_id) var_categories = [ {"id": 1, "name": "cat1", "selected": True}, {"id": 2, "name": "cat2", "selected": False}, {"id": 3, "name": "cat3", "selected": False}, ] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "001", - "002", - "003", - ], - "subreferences": { - "001": {"alias": "subvar1"}, - "002": {"alias": "subvar2"}, - "003": {"alias": "subvar3"}, - }, - "categories": var_categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": ["001", "002", "003"], + "subreferences": { + "001": {"alias": "subvar1"}, + "002": {"alias": "subvar2"}, + "003": {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -1923,81 +1344,69 @@ def test_multiple_response_any_process_single_subvariables(self): "alias": "mr_variable", "id": "{}".format(var_id), "type": "multiple_response", - "subvariables": [ - "001".format(var_url), - "002".format(var_url), - "003".format(var_url), - ], + "subvariables": ["001".format(), "002".format(), "003".format()], "entity": { "subvariables": { "index": { - "001": { - "id": "001", - "alias": "subvar1" - }, - "002": { - "id": "002", - "alias": "subvar2" - }, - "003": { - "id": "003", - "alias": "subvar3" - } + "001": {"id": "001", "alias": "subvar1"}, + "002": {"id": "002", "alias": "subvar2"}, + "003": {"id": "003", "alias": "subvar3"}, } } - } + }, } } ds.follow.return_value = table_mock expr = "MyMrVar.any([subvar1])" parsed_expr = parse_expr(expr) - with mock.patch("scrunch.expressions.get_subvariables_resource") as mock_subvars, mock.patch( - "scrunch.expressions._get_categories_from_var_index") as categories: + with mock.patch( + "scrunch.expressions.get_subvariables_resource" + ) as mock_subvars, mock.patch( + "scrunch.expressions._get_categories_from_var_index" + ) as categories: categories.return_value = var_categories - mock_subvars.return_value = dict(sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items())) + mock_subvars.return_value = dict( + sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items()) + ) processed_zcl_expr = process_expr(parsed_expr, ds) assert processed_zcl_expr == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': 'http://test.crunch.io/api/datasets/123/variables/0001/subvariables/001/' + "variable": "http://test.crunch.io/api/datasets/123/variables/0001/subvariables/001/" }, - { - 'column': [1] - } + {"column": [1]}, ], } - @mark_fail_py2 + @mark_fail_py2 def test_multiple_response_any_process_two_subvariables(self): - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "{}variables/{}/".format(self.ds_url, var_id) var_categories = [ {"id": 1, "name": "cat1", "selected": True}, {"id": 2, "name": "cat2", "selected": False}, {"id": 3, "name": "cat3", "selected": False}, ] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "001", - "002", - "003", - ], - "subreferences": { - "001": {"alias": "subvar1"}, - "002": {"alias": "subvar2"}, - "003": {"alias": "subvar3"}, - }, - "categories": var_categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": ["001", "002", "003"], + "subreferences": { + "001": {"alias": "subvar1"}, + "002": {"alias": "subvar2"}, + "003": {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -2008,95 +1417,82 @@ def test_multiple_response_any_process_two_subvariables(self): "alias": "mr_variable", "id": "{}".format(var_id), "type": "multiple_response", - "subvariables": [ - "001", - "002".format(var_url), - "003".format(var_url), - ], + "subvariables": ["001", "002".format(), "003".format()], "entity": { "subvariables": { "index": { - "001": { - "id": "001", - "alias": "subvar1" - }, - "002": { - "id": "002", - "alias": "subvar2" - }, - "003": { - "id": "003", - "alias": "subvar3" - } + "001": {"id": "001", "alias": "subvar1"}, + "002": {"id": "002", "alias": "subvar2"}, + "003": {"id": "003", "alias": "subvar3"}, } } - } + }, } } ds.follow.return_value = table_mock expr = "MyMrVar.any([subvar1, subvar2])" parsed_expr = parse_expr(expr) - with mock.patch("scrunch.expressions.get_subvariables_resource") as mock_subvars, mock.patch("scrunch.expressions._get_categories_from_var_index") as categories: + with mock.patch( + "scrunch.expressions.get_subvariables_resource" + ) as mock_subvars, mock.patch( + "scrunch.expressions._get_categories_from_var_index" + ) as categories: categories.return_value = var_categories - mock_subvars.return_value = dict(sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items())) + mock_subvars.return_value = dict( + sorted({"subvar1": "001", "subvar2": "002", "subvar3": "003"}.items()) + ) processed_zcl_expr = process_expr(parsed_expr, ds) assert processed_zcl_expr == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': 'http://test.crunch.io/api/datasets/123/variables/0001/subvariables/001/' + "variable": "http://test.crunch.io/api/datasets/123/variables/0001/subvariables/001/" }, - { - 'column': [1] - } + {"column": [1]}, ], }, { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': 'http://test.crunch.io/api/datasets/123/variables/0001/subvariables/002/' + "variable": "http://test.crunch.io/api/datasets/123/variables/0001/subvariables/002/" }, - { - 'column': [1] - } + {"column": [1]}, ], }, - ] + ], } def test_multiple_response_subvar_equality(self): - var_id = '0001' - var_alias = 'MyMrVar' - var_type = 'multiple_response' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) + var_id = "0001" + var_alias = "MyMrVar" + var_type = "multiple_response" + var_url = "{}variables/{}/".format(self.ds_url, var_id) var_categories = [ {"id": 1, "name": "cat1", "selected": True}, {"id": 2, "name": "cat2", "selected": False}, {"id": 3, "name": "cat3", "selected": False}, ] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - "subvariables": [ - "001", - "002", - "003", - ], - "subreferences": { - "001": {"alias": "subvar1"}, - "002": {"alias": "subvar2"}, - "003": {"alias": "subvar3"}, - }, - "categories": var_categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "subvariables": ["001", "002", "003"], + "subreferences": { + "001": {"alias": "subvar1"}, + "002": {"alias": "subvar2"}, + "003": {"alias": "subvar3"}, + }, + "categories": var_categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.variables.index = { @@ -2115,1126 +1511,956 @@ def test_multiple_response_subvar_equality(self): "entity": { "subvariables": { "index": { - "001": { - "id": "001", - "alias": "subvar1" - }, - "002": { - "id": "002", - "alias": "subvar2" - }, - "003": { - "id": "003", - "alias": "subvar3" - } + "001": {"id": "001", "alias": "subvar1"}, + "002": {"id": "002", "alias": "subvar2"}, + "003": {"id": "003", "alias": "subvar3"}, } } - } + }, } } ds.follow.return_value = table_mock - expr = 'subvar1 == 1' + expr = "subvar1 == 1" parsed_expr = parse_expr(expr) expr_obj = process_expr(parsed_expr, ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': "{}subvariables/001/".format(var_url), - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": "{}subvariables/001/".format(var_url)}, {"value": 1}], } def test_transform_subvar_alias_to_subvar_id(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + subreferences = {"0001": {"alias": "hobbies_1"}, "0002": {"alias": "hobbies_2"}} + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr = 'hobbies_1 == 4' + expr = "hobbies_1 == 4" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': 4 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": 4}, + ], } def test_transform_subvar_alias_w_brackets_to_subvar_id(self): - var_id = '0001' - var_alias = 'hobbies_array' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = ['0001', '0002'] - subreferences = {'0001': {'alias': 'hobbies_1'}, '0002': {'alias': 'hobbies_2'}} - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + var_id = "0001" + var_alias = "hobbies_array" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002"] + subreferences = {"0001": {"alias": "hobbies_1"}, "0002": {"alias": "hobbies_2"}} + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - subvariable_url = '%ssubvariables/%s/' % (var_url, subvariables[0]) + subvariable_url = "%ssubvariables/%s/" % (var_url, subvariables[0]) # Expression with subvariable indicated by bracket syntax expr = "hobbies_array[hobbies_1] == 4" parsed_platonic = parse_expr(expr, platonic=True) assert parsed_platonic == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Keeps the platonic reference to the subvariable - {'var': 'hobbies_array', 'axes': ['hobbies_1']}, - {'value': 4} - ] + {"var": "hobbies_array", "axes": ["hobbies_1"]}, + {"value": 4}, + ], } parsed = parse_expr(expr) assert parsed == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Stores a reference to the pieces of the array/subvariable - {"variable": {"array": 'hobbies_array', "subvariable": 'hobbies_1'}}, - {'value': 4} - ] + {"variable": {"array": "hobbies_array", "subvariable": "hobbies_1"}}, + {"value": 4}, + ], } expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Correctly translates into the subvariable URL - {'variable': subvariable_url}, - {'value': 4} - ] + {"variable": subvariable_url}, + {"value": 4}, + ], } # Expression with subvariable indicated by bracket syntax expr = "hobbies_array[hobbies_1].any([1, 2])" parsed_platonic = parse_expr(expr, platonic=True) assert parsed_platonic == { - 'function': "any", - 'args': [ + "function": "any", + "args": [ # Platonic parsing keeps the var/axes reference - {'var': 'hobbies_array', 'axes': ['hobbies_1']}, - {'value': [1, 2]} - ] + {"var": "hobbies_array", "axes": ["hobbies_1"]}, + {"value": [1, 2]}, + ], } parsed = parse_expr(expr) assert parsed == { - 'function': "any", - 'args': [ + "function": "any", + "args": [ # Stores a reference to the array/subvairable - {"variable": {"array": 'hobbies_array', "subvariable": 'hobbies_1'}}, - {'value': [1, 2]} - ] + {"variable": {"array": "hobbies_array", "subvariable": "hobbies_1"}}, + {"value": [1, 2]}, + ], } expr_obj = process_expr(parsed, ds) assert expr_obj == { - 'function': "in", - 'args': [ - # Still finds the correct subvariable ID under the array URL - {'variable': subvariable_url}, - {'value': [1, 2]} - ] + "function": "in", + "args": [ + # Still finds the correct subvariable ID under the array URL + {"variable": subvariable_url}, + {"value": [1, 2]}, + ], } # `IN` functions have a bit of a special treatment. expr = "hobbies_array[hobbies_1] in [1]" parsed_platonic = parse_expr(expr, platonic=True) assert parsed_platonic == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ # Keeps the platonic reference to the subvariable - {'var': 'hobbies_array', 'axes': ['hobbies_1']}, - {'value': [1]} - ] + {"var": "hobbies_array", "axes": ["hobbies_1"]}, + {"value": [1]}, + ], } parsed = parse_expr(expr) assert parsed == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ # Stores a reference to the pieces of the array/subvariable - {"variable": {"array": 'hobbies_array', "subvariable": 'hobbies_1'}}, - {'value': [1]} - ] + {"variable": {"array": "hobbies_array", "subvariable": "hobbies_1"}}, + {"value": [1]}, + ], } expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ # Correctly translates into the subvariable URL - {'variable': subvariable_url}, - {'value': [1]} - ] + {"variable": subvariable_url}, + {"value": [1]}, + ], } def test_platonic_filter(self): - var_id = '0001' - var_alias = 'hobbies_array' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = ['0001', '0002'] - subreferences = {'0001': {'alias': 'hobbies_1'}, '0002': {'alias': 'hobbies_2'}} - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + var_id = "0001" + var_alias = "hobbies_array" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002"] + subreferences = {"0001": {"alias": "hobbies_1"}, "0002": {"alias": "hobbies_2"}} + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - subvariable_url = '%ssubvariables/%s/' % (var_url, subvariables[0]) + subvariable_url = "%ssubvariables/%s/" % (var_url, subvariables[0]) # Expression with subvariable indicated by bracket syntax expr = "hobbies_array[hobbies_1] == 4" parsed = parse_expr(expr, platonic=True) assert parsed == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Keeps the platonic reference to the subvariable - {'var': 'hobbies_array', 'axes': ['hobbies_1']}, - {'value': 4} - ] + {"var": "hobbies_array", "axes": ["hobbies_1"]}, + {"value": 4}, + ], } expr_obj = process_expr(parsed, ds) assert expr_obj == parsed parsed = parse_expr(expr, platonic=False) assert parsed == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Keeps the platonic reference to the subvariable - {"variable": {"array": 'hobbies_array', "subvariable": 'hobbies_1'}}, - {'value': 4} - ] + {"variable": {"array": "hobbies_array", "subvariable": "hobbies_1"}}, + {"value": 4}, + ], } expr_obj = process_expr(parsed, ds) assert expr_obj == { - 'function': '==', - 'args': [ + "function": "==", + "args": [ # Keeps the platonic reference to the subvariable {"variable": subvariable_url}, - {'value': 4} - ] + {"value": 4}, + ], } def test_array_expansion_single_subvariable_any(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Single value. - expr_obj = process_expr(parse_expr('hobbies.any([32766])'), ds) + expr_obj = process_expr(parse_expr("hobbies.any([32766])"), ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": [32766]}, + ], } def test_array_expansion_single_subvariable_all(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr_obj = process_expr(parse_expr('hobbies.all([32766])'), ds) + expr_obj = process_expr(parse_expr("hobbies.all([32766])"), ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': 32766 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": 32766}, + ], } def test_array_expansion_single_subvariable_not_any(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Negated. - expr_obj = process_expr(parse_expr('not hobbies.any([32766])'), ds) + expr_obj = process_expr(parse_expr("not hobbies.any([32766])"), ds) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": [32766]}, + ], } - - ] + ], } def test_array_expansion_single_subvariable_not_all(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr_obj = process_expr(parse_expr('not hobbies.all([32766])'), ds) + expr_obj = process_expr(parse_expr("not hobbies.all([32766])"), ds) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': 32766 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": 32766}, + ], } - - ] + ], } def test_array_expansion_single_subvariable_multiple_any(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Multiple values. - expr_obj = process_expr(parse_expr('hobbies.any([32766, 32767])'), ds) + expr_obj = process_expr(parse_expr("hobbies.any([32766, 32767])"), ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766, 32767] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": [32766, 32767]}, + ], } def test_array_expansion_single_subvariable_multiple_all(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - subvariables = [ - '0001' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + subvariables = ["0001"] - subreferences = { - '0001': {'alias': 'hobbies_1'}, - } + subreferences = {"0001": {"alias": "hobbies_1"}} - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock with pytest.raises(ValueError): - process_expr(parse_expr('hobbies.all([32766, 32767])'), ds) + process_expr(parse_expr("hobbies.all([32766, 32767])"), ds) def test_categorical_array_any_expansion_multiple_subvariables(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Single values. - expr = 'hobbies.any([32766])' + expr = "hobbies.any([32766])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": [32766]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[1])}, + {"value": [32766]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[2])}, + {"value": [32766]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) - }, - { - 'value': [32766] - } - ] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[3])}, + {"value": [32766]}, + ], + }, + ], } def test_categorical_array_all_process_expression(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr = 'hobbies.all([32766])' + expr = "hobbies.all([32766])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': 32766 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) - }, - { - 'value': 32766 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[1])}, + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) - }, - { - 'value': 32766 - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[2])}, + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) - }, - { - 'value': 32766 - } - ] - } - ] + "function": "==", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[3])}, + {"value": 32766}, + ], + }, + ], } def test_categorical_array_not_any_process_expression(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Negated. - expr = 'not hobbies.any([32766])' + expr = "not hobbies.any([32766])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766] - } - ] + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[0]) }, + {"value": [32766]}, + ], + }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + { + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[1]) + }, + {"value": [32766]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) - }, - { - 'value': [32766] - } - ] + "function": "in", + "args": [ + { + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[2]) + }, + {"value": [32766]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) - }, - { - 'value': [32766] - } - ] - } - ] + "function": "in", + "args": [ + { + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[3]) + }, + {"value": [32766]}, + ], + }, + ], } - ] + ], } def test_categorical_array_not_all_process_expression(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr = 'not hobbies.all([32766])' + expr = "not hobbies.all([32766])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ + "function": "==", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[0]) }, - { - 'value': 32766 - } - ] + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ + "function": "==", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[1]) }, - { - 'value': 32766 - } - ] + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ + "function": "==", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[2]) }, - { - 'value': 32766 - } - ] + {"value": 32766}, + ], }, { - 'function': '==', - 'args': [ + "function": "==", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[3]) }, - { - 'value': 32766 - } - ] - } - ] + {"value": 32766}, + ], + }, + ], } - ] + ], } def test_categorical_array_any_multiple_selection_process_expression(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Multiple values. - expr = 'hobbies.any([32766, 32767])' + expr = "hobbies.any([32766, 32767])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) - }, - { - 'value': [32766, 32767] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[0])}, + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) - }, - { - 'value': [32766, 32767] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[1])}, + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) - }, - { - 'value': [32766, 32767] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[2])}, + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) - }, - { - 'value': [32766, 32767] - } - ] - } - ] + "function": "in", + "args": [ + {"variable": "%ssubvariables/%s/" % (var_url, subvariables[3])}, + {"value": [32766, 32767]}, + ], + }, + ], } def test_categorical_array_not_any_multiple_selection_process_expression(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock # Multiple values, negated - expr = 'not hobbies.any([32766, 32767])' + expr = "not hobbies.any([32766, 32767])" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[0]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[0]) }, - { - 'value': [32766, 32767] - } - ] + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[1]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[1]) }, - { - 'value': [32766, 32767] - } - ] + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[2]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[2]) }, - { - 'value': [32766, 32767] - } - ] + {"value": [32766, 32767]}, + ], }, { - 'function': 'in', - 'args': [ + "function": "in", + "args": [ { - 'variable': '%ssubvariables/%s/' % (var_url, subvariables[3]) + "variable": "%ssubvariables/%s/" + % (var_url, subvariables[3]) }, - { - 'value': [32766, 32767] - } - ] - } - ] + {"value": [32766, 32767]}, + ], + }, + ], } - ] + ], } def test_valid_and_missing_funcs_for_arrays(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical_array' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - subvariables = [ - '0001', - '0002', - '0003', - '0004' - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical_array" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + subvariables = ["0001", "0002", "0003", "0004"] subreferences = { - '0001': {'alias': 'hobbies_1'}, - '0002': {'alias': 'hobbies_2'}, - '0003': {'alias': 'hobbies_3'}, - '0004': {'alias': 'hobbies_4'} - } - - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': [], - 'subvariables': subvariables, - 'subreferences': subreferences + "0001": {"alias": "hobbies_1"}, + "0002": {"alias": "hobbies_2"}, + "0003": {"alias": "hobbies_3"}, + "0004": {"alias": "hobbies_4"}, + } + + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": [], + "subvariables": subvariables, + "subreferences": subreferences, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock - expr = 'valid(hobbies)' + expr = "valid(hobbies)" expr_obj = process_expr(parse_expr(expr), ds) - assert expr_obj == { - 'function': 'all_valid', - 'args': [ - { - 'variable': var_url - } - ] - } + assert expr_obj == {"function": "all_valid", "args": [{"variable": var_url}]} - expr = 'not valid(hobbies)' + expr = "not valid(hobbies)" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'all_valid', - 'args': [ - { - 'variable': var_url - } - ] - } - ] + "function": "not", + "args": [{"function": "all_valid", "args": [{"variable": var_url}]}], } - expr = 'missing(hobbies)' + expr = "missing(hobbies)" expr_obj = process_expr(parse_expr(expr), ds) - assert expr_obj == { - 'function': 'is_missing', - 'args': [ - { - 'variable': var_url - } - ] - } + assert expr_obj == {"function": "is_missing", "args": [{"variable": var_url}]} - expr = 'not missing(hobbies)' + expr = "not missing(hobbies)" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'not', - 'args': [ - { - 'function': 'is_missing', - 'args': [ - { - 'variable': var_url - } - ] - } - ] + "function": "not", + "args": [{"function": "is_missing", "args": [{"variable": var_url}]}], } def test_label_expression_single(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - categories = [ - { - 'name': 'mocking', - 'id': 1 - } - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + categories = [{"name": "mocking", "id": 1}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3242,41 +2468,27 @@ def test_label_expression_single(self): expr = "hobbies == 'mocking'" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'variable': var_url - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": var_url}, {"value": 1}], } def test_label_expression_list(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - categories = [ - { - 'name': 'mocking', - 'id': 1 - }, - { - 'name': 'coding', - 'id': 2 - }, - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + categories = [{"name": "mocking", "id": 1}, {"name": "coding", "id": 2}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3284,41 +2496,27 @@ def test_label_expression_list(self): expr = "hobbies in ['mocking', 'coding']" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': var_url - }, - { - 'value': [1, 2] - } - ] + "function": "in", + "args": [{"variable": var_url}, {"value": [1, 2]}], } def test_label_expression_tuple(self): - var_id = '0001' - var_alias = 'hobbies' - var_type = 'categorical' - var_url = '%svariables/%s/' % (self.ds_url, var_id) - categories = [ - { - 'name': 'mocking', - 'id': 1 - }, - { - 'name': 'coding', - 'id': 2 - }, - ] + var_id = "0001" + var_alias = "hobbies" + var_type = "categorical" + var_url = "%svariables/%s/" % (self.ds_url, var_id) + categories = [{"name": "mocking", "id": 1}, {"name": "coding", "id": 2}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3326,41 +2524,27 @@ def test_label_expression_tuple(self): expr = "hobbies in ('mocking', 'coding')" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': var_url - }, - { - 'value': [1, 2] - } - ] + "function": "in", + "args": [{"variable": var_url}, {"value": [1, 2]}], } def test_any_categorical_var(self): - var_id = '0001' - var_alias = 'my_categorical' - var_type = 'categorical' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) - categories = [ - { - 'name': 'mocking', - 'id': 1 - }, - { - 'name': 'coding', - 'id': 2 - }, - ] + var_id = "0001" + var_alias = "my_categorical" + var_type = "categorical" + var_url = "{}variables/{}/".format(self.ds_url, var_id) + categories = [{"name": "mocking", "id": 1}, {"name": "coding", "id": 2}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3369,41 +2553,27 @@ def test_any_categorical_var(self): parsed_expr = parse_expr(expr) expr_obj = process_expr(parsed_expr, ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': var_url - }, - { - 'value': [1] - } - ] + "function": "in", + "args": [{"variable": var_url}, {"value": [1]}], } def test_in_expression_list_integer(self): - var_id = '0001' - var_alias = 'country_cat' - var_type = 'categorical' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) - categories = [ - { - 'name': 'argentina', - 'id': 1 - }, - { - 'name': 'australia', - 'id': 2 - }, - ] + var_id = "0001" + var_alias = "country_cat" + var_type = "categorical" + var_url = "{}variables/{}/".format(self.ds_url, var_id) + categories = [{"name": "argentina", "id": 1}, {"name": "australia", "id": 2}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3411,41 +2581,27 @@ def test_in_expression_list_integer(self): expr = "country_cat in [1]" expr_obj = process_expr(parse_expr(expr), ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': var_url - }, - { - 'value': [1] - } - ] + "function": "in", + "args": [{"variable": var_url}, {"value": [1]}], } def test_in_expression_list_floats(self): - var_id = '0001' - var_alias = 'country_cat' - var_type = 'categorical' - var_url = '{}variables/{}/'.format(self.ds_url, var_id) - categories = [ - { - 'name': 'argentina', - 'id': 1 - }, - { - 'name': 'australia', - 'id': 2 - }, - ] + var_id = "0001" + var_alias = "country_cat" + var_type = "categorical" + var_url = "{}variables/{}/".format(self.ds_url, var_id) + categories = [{"name": "argentina", "id": 1}, {"name": "australia", "id": 2}] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories, + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock @@ -3454,244 +2610,118 @@ def test_in_expression_list_floats(self): parsed_expr = parse_expr(expr) expr_obj = process_expr(parsed_expr, ds) assert expr_obj == { - 'function': 'in', - 'args': [ - { - 'variable': var_url - }, - { - 'value': [1.0] - } - ] + "function": "in", + "args": [{"variable": var_url}, {"value": [1.0]}], } class TestExpressionPrettify(TestCase): - def test_simple_eq(self): - expr = { - 'function': '==', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] - } + expr = {"function": "==", "args": [{"variable": "age"}, {"value": 1}]} - expected = 'age == 1' + expected = "age == 1" cel = prettify(expr) assert expected == cel def test_float_conversion_to_integer(self): - expr = { - 'function': '==', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 25.0 - } - ] - } + expr = {"function": "==", "args": [{"variable": "age"}, {"value": 25.0}]} - expected = 'age == 25' + expected = "age == 25" cel = prettify(expr) assert expected == cel def test_float_conversion_integer_in_list(self): expr = { "function": "in", - "args": [ - { - "variable": "my_var" - }, - { - "value": [ - 1.0, 2.0 - ] - } - ] + "args": [{"variable": "my_var"}, {"value": [1.0, 2.0]}], } assert prettify(expr) == "my_var in [1, 2]" def test_string_no_need_conversion_in_list(self): - expr = { - "function": "in", - "args": [ - { - "variable": "my_var" - }, - { - "value": [ - "test" - ] - } - ] - } + expr = {"function": "in", "args": [{"variable": "my_var"}, {"value": ["test"]}]} assert prettify(expr) == "my_var in ['test']" def test_and(self): expr = { - 'function': 'and', - 'args': [ - { - 'function': '>', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'favcolor' - }, - { - 'value': 2 - } - ] - } - ] + "function": "and", + "args": [ + {"function": ">", "args": [{"variable": "age"}, {"value": 1}]}, + {"function": "==", "args": [{"variable": "favcolor"}, {"value": 2}]}, + ], } - expected = 'age > 1 and favcolor == 2' + expected = "age > 1 and favcolor == 2" cel = prettify(expr) assert expected == cel def test_nested_or(self): expr = { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ + {"function": ">", "args": [{"variable": "age"}, {"value": 1}]}, { - 'function': '>', - 'args': [ + "function": "or", + "args": [ { - 'variable': 'age' + "function": "==", + "args": [{"variable": "favcolor"}, {"value": 2}], }, { - 'value': 1 - } - ] - }, - { - 'function': 'or', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'favcolor' - }, - { - 'value': 2 - } - ] + "function": "==", + "args": [{"variable": "genre"}, {"value": 1}], }, - { - 'function': '==', - 'args': [ - { - 'variable': 'genre' - }, - { - 'value': 1 - } - ] - } - ] - } - ] + ], + }, + ], } - expected = 'age > 1 and (favcolor == 2 or genre == 1)' + expected = "age > 1 and (favcolor == 2 or genre == 1)" cel = prettify(expr) assert expected == cel def test_complex(self): expr = { - 'function': 'and', - 'args': [ - { - 'function': '>', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 55 - } - ] - }, + "function": "and", + "args": [ + {"function": ">", "args": [{"variable": "age"}, {"value": 55}]}, { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ - { - 'variable': 'genre' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": "genre"}, {"value": 1}], }, { - 'function': '==', - 'args': [ - { - 'variable': 'favfruit' - }, - { - 'value': 9 - } - ] - } - ] + "function": "==", + "args": [{"variable": "favfruit"}, {"value": 9}], + }, + ], }, { - 'function': 'in', - 'args': [ - { - 'variable': 'favcolor' - }, - { - 'value': [3, 4, 5] - } - ] - } - ] - } - ] + "function": "in", + "args": [{"variable": "favcolor"}, {"value": [3, 4, 5]}], + }, + ], + }, + ], } - expected = 'age > 55 and ((genre == 1 and favfruit == 9) or favcolor in [3, 4, 5])' + expected = ( + "age > 55 and ((genre == 1 and favfruit == 9) or favcolor in [3, 4, 5])" + ) cel = prettify(expr) assert expected == cel def test_variable_url(self): expr = { - 'function': '==', - 'args': [ - { - 'variable': 'https://host.com/api/datasets/123/variables/001/' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [ + {"variable": "https://host.com/api/datasets/123/variables/001/"}, + {"value": 1}, + ], } ds = mock.MagicMock() @@ -3701,24 +2731,18 @@ def test_variable_url(self): ds.resource.session.get.side_effect = lambda *arg: response - expected = 'age == 1' + expected = "age == 1" cel = prettify(expr, ds) assert expected == cel - ds.resource.session.get.assert_called_with('https://host.com/api/datasets/123/variables/001/') + ds.resource.session.get.assert_called_with( + "https://host.com/api/datasets/123/variables/001/" + ) def test_square_bracket_subvariables(self): - subvariable_url = 'https://host.com/api/datasets/123/variables/001/subvariables/abc/' - expr = { - 'function': '==', - 'args': [ - { - 'variable': subvariable_url - }, - { - 'value': 1 - } - ] - } + subvariable_url = ( + "https://host.com/api/datasets/123/variables/001/subvariables/abc/" + ) + expr = {"function": "==", "args": [{"variable": subvariable_url}, {"value": 1}]} ds = mock.MagicMock() ds.__class__ = scrunch.mutable_dataset.MutableDataset @@ -3734,424 +2758,199 @@ def test_square_bracket_subvariables(self): # Prepare array response2 = mock.MagicMock() - response2.payload.body = {"alias": 'array_variable'} + response2.payload.body = {"alias": "array_variable"} ds.resource.session.get.side_effect = [response1, response2] - expected = 'array_variable[subvar_1] == 1' + expected = "array_variable[subvar_1] == 1" assert prettify(expr, ds) == expected def test_variable_url_no_dataset(self): expr = { - 'function': '==', - 'args': [ - { - 'variable': 'https://host.com/api/datasets/123/variables/001/' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [ + {"variable": "https://host.com/api/datasets/123/variables/001/"}, + {"value": 1}, + ], } with pytest.raises(Exception) as err: prettify(expr) assert str(err.value) == ( - 'Valid Dataset instance is required to resolve variable urls ' - 'in the expression' + "Valid Dataset instance is required to resolve variable urls " + "in the expression" ) def test_parse_equal_string(self): expr_obj = { - 'function': '==', - 'args': [ - { - 'variable': 'name' - }, - { - 'value': 'John Doe' - } - ] + "function": "==", + "args": [{"variable": "name"}, {"value": "John Doe"}], } cel = prettify(expr_obj) assert cel == "name == 'John Doe'" # Reversed. expr_obj = { - 'function': '==', - 'args': [ - { - 'value': 'John Doe' - }, - { - 'variable': 'address' - } - ] + "function": "==", + "args": [{"value": "John Doe"}, {"variable": "address"}], } cel = prettify(expr_obj) assert cel == "'John Doe' == address" def test_parse_equal_string_escape_quote(self): expr_obj = { - 'function': '==', - 'args': [ - { - 'value': '''John's Name''' - }, - { - 'variable': 'address' - } - ] + "function": "==", + "args": [{"value": """John's Name"""}, {"variable": "address"}], } cel = prettify(expr_obj) # Actually is a single backslash escaping the quote, # but we need to escape the actual backslash and quote # for this comparisson - assert cel == "'John\\\'s Name' == address" + assert cel == "'John\\'s Name' == address" def test_parse_notequal_int(self): - expr = { - 'function': '!=', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] - } + expr = {"function": "!=", "args": [{"variable": "age"}, {"value": 1}]} cel = prettify(expr) assert cel == "age != 1" # Reversed. - expr = { - 'function': '!=', - 'args': [ - { - 'value': 1 - }, - { - 'variable': 'age' - } - ] - } + expr = {"function": "!=", "args": [{"value": 1}, {"variable": "age"}]} cel = prettify(expr) assert cel == "1 != age" def test_parse_notequal_string(self): - expr = { - 'function': '!=', - 'args': [ - { - 'variable': 'name' - }, - { - 'value': 'John Doe' - } - ] - } + expr = {"function": "!=", "args": [{"variable": "name"}, {"value": "John Doe"}]} cel = prettify(expr) assert cel == "name != 'John Doe'" # Reversed. - expr = { - 'function': '!=', - 'args': [ - { - 'value': 'John Doe' - }, - { - 'variable': 'name' - } - ] - } + expr = {"function": "!=", "args": [{"value": "John Doe"}, {"variable": "name"}]} cel = prettify(expr) assert cel == "'John Doe' != name" def test_parse_less_than(self): - expr = { - 'function': '<', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] - } + expr = {"function": "<", "args": [{"variable": "caseid"}, {"value": 1234}]} cel = prettify(expr) assert cel == "caseid < 1234" # Reversed. - expr = { - 'function': '<', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] - } + expr = {"function": "<", "args": [{"value": 1234}, {"variable": "caseid"}]} cel = prettify(expr) assert cel == "1234 < caseid" def test_parse_less_than_equal(self): - expr = { - 'function': '<=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] - } + expr = {"function": "<=", "args": [{"variable": "caseid"}, {"value": 1234}]} cel = prettify(expr) assert cel == "caseid <= 1234" # Reversed. - expr = { - 'function': '<=', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] - } + expr = {"function": "<=", "args": [{"value": 1234}, {"variable": "caseid"}]} cel = prettify(expr) assert cel == "1234 <= caseid" def test_parse_greater_than(self): - expr = { - 'function': '>', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] - } + expr = {"function": ">", "args": [{"variable": "caseid"}, {"value": 1234}]} cel = prettify(expr) assert cel == "caseid > 1234" # Reversed. - expr = { - 'function': '>', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] - } - cel = prettify(expr) - assert cel == "1234 > caseid" - - def test_parse_greater_than_equal(self): - expr = { - 'function': '>=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'value': 1234 - } - ] - } + expr = {"function": ">", "args": [{"value": 1234}, {"variable": "caseid"}]} + cel = prettify(expr) + assert cel == "1234 > caseid" + + def test_parse_greater_than_equal(self): + expr = {"function": ">=", "args": [{"variable": "caseid"}, {"value": 1234}]} cel = prettify(expr) assert cel == "caseid >= 1234" # Reversed. - expr = { - 'function': '>=', - 'args': [ - { - 'value': 1234 - }, - { - 'variable': 'caseid' - } - ] - } + expr = {"function": ">=", "args": [{"value": 1234}, {"variable": "caseid"}]} cel = prettify(expr) assert cel == "1234 >= caseid" def test_parse_compare_variable_against_another_variable(self): expr = { - 'function': '==', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "==", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate == arrivedate" expr = { - 'function': '!=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "!=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate != arrivedate" expr = { - 'function': '<', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "<", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate < arrivedate" expr = { - 'function': '<=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": "<=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate <= arrivedate" expr = { - 'function': '>', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": ">", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate > arrivedate" expr = { - 'function': '>=', - 'args': [ - { - 'variable': 'starttdate' - }, - { - 'variable': 'arrivedate' - } - ] + "function": ">=", + "args": [{"variable": "starttdate"}, {"variable": "arrivedate"}], } cel = prettify(expr) assert cel == "starttdate >= arrivedate" def test_parse_multiple_boolean_conditions(self): expr = { - 'function': 'or', - 'args': [ + "function": "or", + "args": [ { - 'function': 'and', - 'args': [ + "function": "and", + "args": [ { - 'function': '==', - 'args': [ - { - 'variable': 'identity' - }, - { - 'value': 1 - } - ] + "function": "==", + "args": [{"variable": "identity"}, {"value": 1}], }, { - 'function': '<=', - 'args': [ - { - 'variable': 'caseid' - }, - { - 'variable': 'surveyid' - } - ] - } - ] - }, - { - 'function': '>=', - 'args': [ - { - 'variable': 'identity' + "function": "<=", + "args": [{"variable": "caseid"}, {"variable": "surveyid"}], }, - { - 'value': 2 - } - ] - } - ] + ], + }, + {"function": ">=", "args": [{"variable": "identity"}, {"value": 2}]}, + ], } cel = prettify(expr) - assert cel == '(identity == 1 and caseid <= surveyid) or identity >= 2' + assert cel == "(identity == 1 and caseid <= surveyid) or identity >= 2" def test_parse_value_in_list(self): expr = { - 'function': 'in', - 'args': [ - { - 'variable': 'web_browser' - }, - { - 'value': ['abc', 'dfg', 'hij'] - } - ] + "function": "in", + "args": [{"variable": "web_browser"}, {"value": ["abc", "dfg", "hij"]}], } cel = prettify(expr) assert cel == "web_browser in ['abc', 'dfg', 'hij']" def test_parse_value_not_in_list(self): expr = { - 'function': 'not', - 'args': [ + "function": "not", + "args": [ { - 'function': 'in', - 'args': [ - { - 'variable': 'country' - }, - { - 'value': [1, 2, 3] - } - ] + "function": "in", + "args": [{"variable": "country"}, {"value": [1, 2, 3]}], } - ] + ], } cel = prettify(expr) @@ -4159,270 +2958,134 @@ def test_parse_value_not_in_list(self): # despite it is valid, seems better to have # `x not in y` than `not x in y` # assert cel == 'country not in [1, 2, 3]' - assert cel == 'not country in [1, 2, 3]' + assert cel == "not country in [1, 2, 3]" def test_parse_sample_rule_1(self): - expr = { - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 0 - } - ] - } - ] + "function": "and", + "args": [ + {"function": "==", "args": [{"variable": "disposition"}, {"value": 0}]}, + {"function": "==", "args": [{"variable": "exit_status"}, {"value": 0}]}, + ], } cel = prettify(expr) assert cel == "disposition == 0 and exit_status == 0" def test_parse_any(self): - expr = { - 'function': 'any', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] - } + expr = {"function": "any", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} cel = prettify(expr) - assert cel == 'Q2.any([1, 2, 3])' + assert cel == "Q2.any([1, 2, 3])" # Works with subvariable aliases expr = { - 'function': 'any', - 'args': [ - {'variable': 'Q2'}, - {'value': ["subvar1", "subvar2"]} - ] + "function": "any", + "args": [{"variable": "Q2"}, {"value": ["subvar1", "subvar2"]}], } cel = prettify(expr) assert cel == "Q2.any(['subvar1', 'subvar2'])" def test_parse_all(self): + expr = {"function": "all", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} + cel = prettify(expr) + assert cel == "Q2.all([1, 2, 3])" + + def test_parse_sample_rule_2_complex(self): expr = { - 'function': 'all', - 'args': [ + "function": "or", + "args": [ { - 'variable': 'Q2' + "function": "and", + "args": [ + { + "function": "==", + "args": [{"variable": "disposition"}, {"value": 0}], + }, + { + "function": "==", + "args": [{"variable": "exit_status"}, {"value": 1}], + }, + ], }, { - 'value': [1, 2, 3] - } - ] + "function": "and", + "args": [ + { + "function": "==", + "args": [{"variable": "disposition"}, {"value": 0}], + }, + { + "function": "==", + "args": [{"variable": "exit_status"}, {"value": 0}], + }, + ], + }, + ], } cel = prettify(expr) - assert cel == 'Q2.all([1, 2, 3])' - - def test_parse_sample_rule_2_complex(self): - expr = { - 'function': 'or', - 'args': [{ - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 1 - } - ] - } - ] - }, { - 'function': 'and', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'disposition' - }, - { - 'value': 0 - } - ] - }, - { - 'function': '==', - 'args': [ - { - 'variable': 'exit_status' - }, - { - 'value': 0 - } - ] - } - ] - } - ]} - cel = prettify(expr) - assert cel == "(disposition == 0 and exit_status == 1) or " \ - "(disposition == 0 and exit_status == 0)" + assert ( + cel == "(disposition == 0 and exit_status == 1) or " + "(disposition == 0 and exit_status == 0)" + ) def test_parse_sample_any(self): expr = { - 'function': 'any', - 'args': [ - { - 'variable': 'CompanyTurnover' - }, - { - 'value': [99] - } - ] + "function": "any", + "args": [{"variable": "CompanyTurnover"}, {"value": [99]}], } cel = prettify(expr) assert cel == "CompanyTurnover.any([99])" expr = { - 'function': 'any', - 'args': [ - { - 'variable': 'sector' - }, - { - 'value': [2, 3, 98, 99] - } - ] + "function": "any", + "args": [{"variable": "sector"}, {"value": [2, 3, 98, 99]}], } cel = prettify(expr) assert cel == "sector.any([2, 3, 98, 99])" def test_parse_negated_expr(self): expr = { - 'function': 'not', - 'args': [ - { - 'function': '==', - 'args': [ - { - 'variable': 'age' - }, - { - 'value': 1 - } - ] - } - ] + "function": "not", + "args": [{"function": "==", "args": [{"variable": "age"}, {"value": 1}]}], } cel = prettify(expr) assert cel == "not age == 1" def test_parse_negated_method_call(self): expr = { - 'function': 'not', - 'args': [ - { - 'function': 'any', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "any", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} + ], } cel = prettify(expr) - assert cel == 'not Q2.any([1, 2, 3])' + assert cel == "not Q2.any([1, 2, 3])" expr = { - 'function': 'not', - 'args': [ - { - 'function': 'all', - 'args': [ - { - 'variable': 'Q2' - }, - { - 'value': [1, 2, 3] - } - ] - } - ] + "function": "not", + "args": [ + {"function": "all", "args": [{"variable": "Q2"}, {"value": [1, 2, 3]}]} + ], } cel = prettify(expr) - assert cel == 'not Q2.all([1, 2, 3])' + assert cel == "not Q2.all([1, 2, 3])" def test_parse_duplicates_method(self): - expr = { - 'function': 'duplicates', - 'args': [ - { - 'variable': 'identity' - } - ] - } + expr = {"function": "duplicates", "args": [{"variable": "identity"}]} cel = prettify(expr) assert cel == "identity.duplicates()" # Negated. expr = { - 'function': 'not', - 'args': [ - { - 'function': 'duplicates', - 'args': [ - { - 'variable': 'identity' - } - ] - } - ] + "function": "not", + "args": [{"function": "duplicates", "args": [{"variable": "identity"}]}], } cel = prettify(expr) assert cel == "not identity.duplicates()" def test_unknown_function(self): expr = { - 'function': '>>', # Assuming this is a typo - 'args': [ - { - 'variable': 'identity' - }, - { - 'value': 1 - } - ] + "function": ">>", # Assuming this is a typo + "args": [{"variable": "identity"}, {"value": 1}], } with pytest.raises(Exception) as err: prettify(expr) @@ -4431,117 +3094,60 @@ def test_unknown_function(self): class TestDatetimeStrings(TestCase): - def test_iso8601_complete(self): expr = "starttime < '2016-12-21T12:00:00+00:00'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21T12:00:00+00:00" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21T12:00:00+00:00"}], } def test_iso8601_wo_tzinfo(self): expr = "starttime < '2016-12-21T12:00:00'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21T12:00:00" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21T12:00:00"}], } def test_iso8601_day_hour_minute_sec(self): expr = "starttime < '2016-12-21T12:00:00'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21T12:00:00" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21T12:00:00"}], } def test_iso8601_day_hour_minute(self): expr = "starttime < '2016-12-21T12:00'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21T12:00" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21T12:00"}], } def test_iso8601_day_hour(self): expr = "starttime < '2016-12-21T12'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21T12" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21T12"}], } def test_iso8601_day(self): expr = "starttime < '2016-12-21'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12-21" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12-21"}], } def test_iso8601_month(self): expr = "starttime < '2016-12'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016-12" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016-12"}], } def test_iso8601_year(self): expr = "starttime < '2016'" assert parse_expr(expr) == { "function": "<", - "args": [ - { - "variable": "starttime" - }, - { - "value": "2016" - } - ] + "args": [{"variable": "starttime"}, {"value": "2016"}], } @@ -4550,7 +3156,7 @@ class TestDateTimeExpression(TestCase): Test for datetimes being correctly interpreted as values """ - ds_url = 'http://test.crunch.io/api/datasets/12345/' + ds_url = "http://test.crunch.io/api/datasets/12345/" def mock_dataset(self, var_id, var_alias, var_type, categories=None): """ @@ -4558,57 +3164,46 @@ def mock_dataset(self, var_id, var_alias, var_type, categories=None): """ if not categories: categories = [] - table_mock = mock.MagicMock(metadata={ - var_id: { - 'id': var_id, - 'alias': var_alias, - 'type': var_type, - 'categories': categories + table_mock = mock.MagicMock( + metadata={ + var_id: { + "id": var_id, + "alias": var_alias, + "type": var_type, + "categories": categories, + } } - }) + ) ds = mock.MagicMock() ds.self = self.ds_url ds.follow.return_value = table_mock return ds def test_process_expression(self): - var_id = '0001' - var_alias = 'starttime' - var_type = 'datetime' - var_url = '%svariables/%s/' % (self.ds_url, var_id) + var_id = "0001" + var_alias = "starttime" + var_type = "datetime" + var_url = "%svariables/%s/" % (self.ds_url, var_id) ds = self.mock_dataset(var_id, var_alias, var_type) expr = "starttime < '2016-12-21'" parsed = parse_expr(expr) expr_obj = process_expr(parsed, ds) assert expr_obj == { - 'function': '<', - 'args': [ - { - 'variable': var_url - }, - { - 'value': '2016-12-21' - } - ] + "function": "<", + "args": [{"variable": var_url}, {"value": "2016-12-21"}], } def test_datetime_as_value(self): - ds = self.mock_dataset(None, '', '') + ds = self.mock_dataset(None, "", "") expr = "'2016-12-21T12' == 5" parsed = parse_expr(expr) expr_obj = process_expr(parsed, ds) assert expr_obj == { - 'function': '==', - 'args': [ - { - 'value': '2016-12-21T12' - }, - { - 'value': 5 - } - ] + "function": "==", + "args": [{"value": "2016-12-21T12"}, {"value": 5}], } + class TestGetDatasetVariables(TestCase): ds_url = "http://mock.crunch.io/api/datasets/123/" @@ -5009,10 +3604,7 @@ def test_get_dataset_variables_numeric_arrays(self): "{}subvariables/002/".format(var_url): {"alias": "subvar2"}, "{}subvariables/003/".format(var_url): {"alias": "subvar3"}, }, - "values": [ - [1, 3, 1], - [2, 1, 1], - ], + "values": [[1, 3, 1], [2, 1, 1]], } } ) @@ -5112,10 +3704,7 @@ def test_get_dataset_variables_categorical_variable(self): var_id = "0001" var_alias = "my_categorical" var_type = "categorical" - categories = [ - {"name": "mocking", "id": 1}, - {"name": "coding", "id": 2}, - ] + categories = [{"name": "mocking", "id": 1}, {"name": "coding", "id": 2}] table_mock = mock.MagicMock( metadata={ @@ -5135,6 +3724,6 @@ def test_get_dataset_variables_categorical_variable(self): "alias": var_alias, "type": var_type, "id": var_id, - "categories": categories + "categories": categories, } - } \ No newline at end of file + } diff --git a/scrunch/tests/test_folders.py b/scrunch/tests/test_folders.py index 152bd4a..5919950 100644 --- a/scrunch/tests/test_folders.py +++ b/scrunch/tests/test_folders.py @@ -9,80 +9,83 @@ def _getitem(var): variables = { - 'foo': AttributeDict(url='http://example.foo/'), - 'bar': AttributeDict(url='http://example.bar/'), + "foo": AttributeDict(url="http://example.foo/"), + "bar": AttributeDict(url="http://example.bar/"), } return variables[var] def test_move_here_simple(): - """ basic test assuring `Folder.move_here` doesn't throw a basestring + """basic test assuring `Folder.move_here` doesn't throw a basestring exception '""" entity_mock = MagicMock() root_mock = MagicMock() root_mock.dataset.__getitem__.side_effect = _getitem folder = Folder(entity_mock, root_mock, MagicMock()) - folder.move_here(['foo', 'bar']) + folder.move_here(["foo", "bar"]) entity_mock.patch.assert_called() def test_unique_folders(): session = MockSession() - dataset_url = 'http://host/api/datasets/abc/' - folders_url = 'http://host/api/datasets/abc/folders/' - public_url = 'http://host/api/datasets/abc/folders/public/' - hidden_url = 'http://host/api/datasets/abc/folders/hidden/' - secure_url = 'http://host/api/datasets/abc/folders/secure/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + dataset_url = "http://host/api/datasets/abc/" + folders_url = "http://host/api/datasets/abc/folders/" + public_url = "http://host/api/datasets/abc/folders/public/" + hidden_url = "http://host/api/datasets/abc/folders/hidden/" + secure_url = "http://host/api/datasets/abc/folders/secure/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"folders": folders_url}, }, - "catalogs": { - "folders": folders_url, - } - }) + ) dataset_resource.variables = MagicMock() dataset_resource.settings = MagicMock() - folders_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": folders_url, - "index": {}, - "body": { - "name": "Root" + folders_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": folders_url, + "index": {}, + "body": {"name": "Root"}, + "catalogs": { + "public": public_url, + "hidden": hidden_url, + "secure": secure_url, + }, }, - "catalogs": { - "public": public_url, - "hidden": hidden_url, - "secure": secure_url, - } - }) - public_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": public_url, - "index": {}, - "body": { - "name": "Public" + ) + public_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": public_url, + "index": {}, + "body": {"name": "Public"}, }, - }) - hidden_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": hidden_url, - "index": {}, - "body": { - "name": "Hidden" + ) + hidden_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": hidden_url, + "index": {}, + "body": {"name": "Hidden"}, }, - }) - secure_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": secure_url, - "index": {}, - "body": { - "name": "Secure" + ) + secure_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": secure_url, + "index": {}, + "body": {"name": "Secure"}, }, - }) + ) session.add_fixture(folders_url, folders_resource) session.add_fixture(public_url, public_resource) session.add_fixture(hidden_url, hidden_resource) @@ -96,41 +99,40 @@ def test_unique_folders(): def test_legacy_without_public(): session = MockSession() - dataset_url = 'http://host/api/datasets/abc/' - folders_url = 'http://host/api/datasets/abc/folders/' - public_url = 'http://host/api/datasets/abc/folders/public/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + dataset_url = "http://host/api/datasets/abc/" + folders_url = "http://host/api/datasets/abc/folders/" + public_url = "http://host/api/datasets/abc/folders/public/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"folders": folders_url}, }, - "catalogs": { - "folders": folders_url, - } - }) + ) dataset_resource.variables = MagicMock() dataset_resource.settings = MagicMock() - folders_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": folders_url, - "index": {}, - "catalogs": { - "public": public_url - } - }) - public_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": public_url, - "index": {}, - "body": { - "name": "Root" + folders_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": folders_url, + "index": {}, + "catalogs": {"public": public_url}, }, - "catalogs": { - "public": public_url - } - }) + ) + public_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": public_url, + "index": {}, + "body": {"name": "Root"}, + "catalogs": {"public": public_url}, + }, + ) session.add_fixture(folders_url, folders_resource) session.add_fixture(public_url, public_resource) dataset = MutableDataset(dataset_resource) @@ -142,52 +144,54 @@ def test_legacy_without_public(): def test_unique_folders_no_secure(): session = MockSession() - dataset_url = 'http://host/api/datasets/abc/' - folders_url = 'http://host/api/datasets/abc/folders/' - public_url = 'http://host/api/datasets/abc/folders/public/' - hidden_url = 'http://host/api/datasets/abc/folders/hidden/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + dataset_url = "http://host/api/datasets/abc/" + folders_url = "http://host/api/datasets/abc/folders/" + public_url = "http://host/api/datasets/abc/folders/public/" + hidden_url = "http://host/api/datasets/abc/folders/hidden/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"folders": folders_url}, }, - "catalogs": { - "folders": folders_url, - } - }) + ) dataset_resource.variables = MagicMock() dataset_resource.settings = MagicMock() - folders_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": folders_url, - "index": {}, - "body": { - "name": "Root" + folders_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": folders_url, + "index": {}, + "body": {"name": "Root"}, + "catalogs": { + "public": public_url, + "hidden": hidden_url, + # Viewer users don't have the secure folder available + # "secure": secure_url, + }, }, - "catalogs": { - "public": public_url, - "hidden": hidden_url, - # Viewer users don't have the secure folder available - # "secure": secure_url, - } - }) - public_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": hidden_url, - "index": {}, - "body": { - "name": "Public" + ) + public_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": hidden_url, + "index": {}, + "body": {"name": "Public"}, }, - }) - hidden_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": hidden_url, - "index": {}, - "body": { - "name": "Hidden" + ) + hidden_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": hidden_url, + "index": {}, + "body": {"name": "Hidden"}, }, - }) + ) session.add_fixture(folders_url, folders_resource) session.add_fixture(public_url, public_resource) session.add_fixture(hidden_url, hidden_resource) @@ -200,47 +204,48 @@ def test_unique_folders_no_secure(): def test_unique_folders_no_hidden(): session = MockSession() - dataset_url = 'http://host/api/datasets/abc/' - folders_url = 'http://host/api/datasets/abc/folders/' - public_url = 'http://host/api/datasets/abc/folders/public/' - dataset_resource = Entity(session, **{ - "element": "shoji:entity", - "self": dataset_url, - "body": { - "name": "test_dataset_project" + dataset_url = "http://host/api/datasets/abc/" + folders_url = "http://host/api/datasets/abc/folders/" + public_url = "http://host/api/datasets/abc/folders/public/" + dataset_resource = Entity( + session, + **{ + "element": "shoji:entity", + "self": dataset_url, + "body": {"name": "test_dataset_project"}, + "catalogs": {"folders": folders_url}, }, - "catalogs": { - "folders": folders_url, - } - }) + ) dataset_resource.variables = MagicMock() dataset_resource.settings = MagicMock() - folders_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": folders_url, - "index": {}, - "body": { - "name": "Root" + folders_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": folders_url, + "index": {}, + "body": {"name": "Root"}, + "catalogs": { + # Standard exposed catalogs + "public": public_url, + "personal": "./personal/", + "parents": "./parents/", + # Viewer users don't have the secure folder available + # "secure": secure_url, + # Viewers also don't get the hidden folder exposed + # "hidden": hidden_url, + }, }, - "catalogs": { - # Standard exposed catalogs - "public": public_url, - "personal": "./personal/", - "parents": "./parents/", - # Viewer users don't have the secure folder available - # "secure": secure_url, - # Viewers also don't get the hidden folder exposed - # "hidden": hidden_url, - } - }) - public_resource = Catalog(session, **{ - "element": "shoji:catalog", - "self": public_url, - "index": {}, - "body": { - "name": "Public" + ) + public_resource = Catalog( + session, + **{ + "element": "shoji:catalog", + "self": public_url, + "index": {}, + "body": {"name": "Public"}, }, - }) + ) session.add_fixture(folders_url, folders_resource) session.add_fixture(public_url, public_resource) dataset = MutableDataset(dataset_resource) diff --git a/scrunch/tests/test_projects.py b/scrunch/tests/test_projects.py index cb39d00..018fe82 100644 --- a/scrunch/tests/test_projects.py +++ b/scrunch/tests/test_projects.py @@ -17,62 +17,40 @@ class TestProjectNesting(TestCase): def test_detect_correct_handler(self): - session = Mock( - feature_flags={'old_projects_order': True} + session = Mock(feature_flags={"old_projects_order": True}) + dataset_order = Order(session, graph=[]) + datasets_catalog = Catalog(session, index={}, order=dataset_order) + shoji_resource = Entity( + session, self="/project/url/", body={}, index={}, datasets=datasets_catalog ) - dataset_order = Order(session, **{ - 'graph': [] - }) - datasets_catalog = Catalog(session, **{ - 'index': {}, - 'order': dataset_order - }) - shoji_resource = Entity(session, **{ - 'self': '/project/url/', - 'body': {}, - 'index': {}, - 'datasets': datasets_catalog - }) project = Project(shoji_resource) self.assertTrue(isinstance(project.order, ProjectDatasetsOrder)) - session = Mock( - feature_flags={'old_projects_order': False} + session = Mock(feature_flags={"old_projects_order": False}) + shoji_resource = Entity( + session, self="/project/url/", body={}, index={}, graph=[] ) - shoji_resource = Entity(session, **{ - 'self': '/project/url/', - 'body': {}, - 'index': {}, - 'graph': [] - }) project = Project(shoji_resource) self.assertTrue(isinstance(project.order, Project)) def test_create_subproject(self): session = MockSession() - session.feature_flags = {'old_projects_order': False} - shoji_resource = Entity(session, **{ - 'self': 'http://example.com/project/url/', - 'body': {}, - 'index': {}, - 'graph': [], - }) + session.feature_flags = {"old_projects_order": False} + shoji_resource = Entity( + session, self="http://example.com/project/url/", body={}, index={}, graph=[] + ) # Setup the POST request and the fixture for the GET that happens after # the .refresh() - created_project_url = 'http://example.com/project/2/' + created_project_url = "http://example.com/project/2/" response = Response() response.status_code = 201 - response.headers = { - 'Location': created_project_url - } + response.headers = {"Location": created_project_url} session.add_post_response(response) - session.add_fixture(created_project_url, { - 'self': created_project_url, - 'body': {}, - 'index': {}, - 'graph': [], - }) + session.add_fixture( + created_project_url, + {"self": created_project_url, "body": {}, "index": {}, "graph": []}, + ) project = Project(shoji_resource) # Create a new project @@ -83,103 +61,87 @@ def test_create_subproject(self): self.assertEqual(pa.url, created_project_url) post_request = session.requests[-2] refresh_request = session.requests[-1] - self.assertEqual(refresh_request.method, 'GET') - self.assertEqual(post_request.method, 'POST') + self.assertEqual(refresh_request.method, "GET") + self.assertEqual(post_request.method, "POST") self.assertEqual(post_request.url, project.url) - self.assertEqual(json.loads(post_request.body), { - 'element': 'shoji:entity', - 'body': { - 'name': 'Project A' - } - }) + self.assertEqual( + json.loads(post_request.body), + {"element": "shoji:entity", "body": {"name": "Project A"}}, + ) def make_tree(self): session = MockSession() - session.feature_flags = {'old_projects_order': False} + session.feature_flags = {"old_projects_order": False} # A # / \ # B C # | # D - projects_res_url = 'http://example.com/api/projects/' - a_res_url = 'http://example.com/api/projects/A/' - b_res_url = 'http://example.com/api/projects/B/' - c_res_url = 'http://example.com/api/projects/C/' - d_res_url = 'http://example.com/api/projects/D/' + projects_res_url = "http://example.com/api/projects/" + a_res_url = "http://example.com/api/projects/A/" + b_res_url = "http://example.com/api/projects/B/" + c_res_url = "http://example.com/api/projects/C/" + d_res_url = "http://example.com/api/projects/D/" a_payload = { - 'element': 'shoji:entity', - 'self': a_res_url, - 'catalogs': { - 'project': 'http://example.com/api/projects/' - }, - 'body': { - 'name': 'project A' - }, - 'index': { + "element": "shoji:entity", + "self": a_res_url, + "catalogs": {"project": "http://example.com/api/projects/"}, + "body": {"name": "project A"}, + "index": { b_res_url: { - 'id': 'idB', - 'name': 'project B', - 'icon': None, - 'description': '', - 'type': 'project' + "id": "idB", + "name": "project B", + "icon": None, + "description": "", + "type": "project", }, c_res_url: { - 'id': 'idC', - 'name': 'project C', - 'icon': None, - 'description': '', - 'type': 'project' - } + "id": "idC", + "name": "project C", + "icon": None, + "description": "", + "type": "project", + }, }, - 'graph': [c_res_url, b_res_url] + "graph": [c_res_url, b_res_url], } b_payload = { - 'element': 'shoji:entity', - 'self': b_res_url, - 'catalogs': { - 'project': a_res_url - }, - 'body': {'name': 'project B'}, - 'index': { + "element": "shoji:entity", + "self": b_res_url, + "catalogs": {"project": a_res_url}, + "body": {"name": "project B"}, + "index": { d_res_url: { - 'id': 'idD', - 'name': 'project D', - 'icon': None, - 'description': '', - 'type': 'project' + "id": "idD", + "name": "project D", + "icon": None, + "description": "", + "type": "project", } }, - 'graph': [d_res_url] + "graph": [d_res_url], } c_payload = { - 'element': 'shoji:entity', - 'self': c_res_url, - 'catalogs': { - 'project': a_res_url - }, - 'body': {'name': 'project C'}, - 'index': {}, - 'graph': [] + "element": "shoji:entity", + "self": c_res_url, + "catalogs": {"project": a_res_url}, + "body": {"name": "project C"}, + "index": {}, + "graph": [], } d_payload = { - 'element': 'shoji:entity', - 'self': d_res_url, - 'catalogs': { - 'project': b_res_url - }, - 'body': {'name': 'project D'}, - 'index': {}, - 'graph': [] + "element": "shoji:entity", + "self": d_res_url, + "catalogs": {"project": b_res_url}, + "body": {"name": "project D"}, + "index": {}, + "graph": [], } projects_catalog = { - 'element': 'shoji:catalog', - 'self': projects_res_url, - 'index': { - a_res_url: { - 'name': 'project A' - } - } + "element": "shoji:catalog", + "self": projects_res_url, + "index": {a_res_url: {"name": "project A"}}, } session.add_fixture(a_res_url, a_payload) session.add_fixture(b_res_url, b_payload) @@ -194,41 +156,40 @@ def make_tree(self): return session def test_follow_path(self): - a_res_url = 'http://example.com/api/projects/A/' - d_res_url = 'http://example.com/api/projects/D/' + a_res_url = "http://example.com/api/projects/A/" + d_res_url = "http://example.com/api/projects/D/" session = self.make_tree() a_res = session.get(a_res_url).payload project_a = Project(a_res) - project_c = project_a.order['| project C '] - project_d = project_a.order['| project B | project D'] + project_d = project_a.order["| project B | project D"] self.assertTrue(isinstance(project_d, Project)) self.assertEqual(project_d.resource.self, d_res_url) with self.assertRaises(InvalidPathError): - project_a.order['| project B | Invalid'] + project_a.order["| project B | Invalid"] def test_rename(self): - a_res_url = 'http://example.com/api/projects/A/' + a_res_url = "http://example.com/api/projects/A/" session = self.make_tree() a_res = session.get(a_res_url).payload project_a = Project(a_res) - project_d = project_a.order['| project B | project D'] - project_d.rename('Renamed Project D') + project_d = project_a.order["| project B | project D"] + project_d.rename("Renamed Project D") # This works because .rename() implementation calls shoji Entity.edit # which will make the request an update the resource's internal payload # as well. If this passes it means that Scrunch is correct and pycrunch # did its thing. - self.assertEqual(project_d.resource.body.name, 'Renamed Project D') - self.assertEqual(project_d.name, 'Renamed Project D') + self.assertEqual(project_d.resource.body.name, "Renamed Project D") + self.assertEqual(project_d.name, "Renamed Project D") def test_move_things(self): - a_res_url = 'http://example.com/api/projects/A/' - dataset_url = 'http://example.com/api/datasets/1/' + a_res_url = "http://example.com/api/projects/A/" + dataset_url = "http://example.com/api/datasets/1/" session = self.make_tree() project_a = Project(session.get(a_res_url).payload) - project_c = project_a.order['| project C '] - project_d = project_a.order['| project B | project D'] + project_c = project_a.order["| project C "] + project_d = project_a.order["| project B | project D"] dataset = Mock(url=dataset_url) # Moving project C under project D @@ -241,24 +202,24 @@ def test_move_things(self): dataset.resource.refresh.assert_called_once() patch_request = session.requests[-3] refresh_request = session.requests[-2] - self.assertEqual(refresh_request.method, 'GET') + self.assertEqual(refresh_request.method, "GET") self.assertEqual(refresh_request.url, project_d.url) - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") self.assertEqual(patch_request.url, project_d.url) - self.assertEqual(json.loads(patch_request.body), { - 'element': 'shoji:entity', - 'body': {}, - 'index': { - project_c.url: {}, - dataset.url: {} + self.assertEqual( + json.loads(patch_request.body), + { + "element": "shoji:entity", + "body": {}, + "index": {project_c.url: {}, dataset.url: {}}, + # 'graph': [project_c.url, dataset.url] }, - # 'graph': [project_c.url, dataset.url] - }) + ) def test_move_project(self): - catalog_url = 'http://example.com/api/projects/' - a_res_url = 'http://example.com/api/projects/A/' - d_res_url = 'http://example.com/api/projects/D/' + catalog_url = "http://example.com/api/projects/" + a_res_url = "http://example.com/api/projects/A/" + d_res_url = "http://example.com/api/projects/D/" session = self.make_tree() project_a = Project(session.get(a_res_url).payload) project_d = Project(session.get(d_res_url).payload) @@ -278,124 +239,129 @@ def test_move_project(self): self.assertEqual(request2.url, a_res_url) patch_request = session.requests[-3] - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") self.assertEqual(patch_request.url, project_a.url) - index = json.loads(patch_request.body)['index'] - self.assertEqual(index, { - project_d.url: {}, - }) + index = json.loads(patch_request.body)["index"] + self.assertEqual(index, {project_d.url: {}}) def test_place(self): - a_res_url = 'http://example.com/api/projects/A/' - dataset1_url = 'http://example.com/api/datasets/1/' - dataset2_url = 'http://example.com/api/datasets/2/' + a_res_url = "http://example.com/api/projects/A/" + dataset1_url = "http://example.com/api/datasets/1/" + dataset2_url = "http://example.com/api/datasets/2/" session = self.make_tree() project_a = Project(session.get(a_res_url).payload) - project_b = project_a.order['| project B'] - project_d = project_a.order['| project B | project D'] + project_b = project_a.order["| project B"] + project_d = project_a.order["| project B | project D"] dataset1 = Mock(url=dataset1_url) dataset2 = Mock(url=dataset2_url) - dataset1.name = 'Dataset 1' - dataset2.name = 'Dataset 2' + dataset1.name = "Dataset 1" + dataset2.name = "Dataset 2" # Do a .place call - project_a.place(dataset1, '| project B', before='project D') + project_a.place(dataset1, "| project B", before="project D") # After a move_here there is a PATCH and a GET # the PATCH performs the changes and the GET is a resource.refresh() patch_request = session.requests[-2] - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") # Note the patch is to project B even though we did `.place` on # project A, but the target path pointed to B self.assertEqual(patch_request.url, project_b.url) - self.assertEqual(json.loads(patch_request.body), { - 'element': 'shoji:entity', - 'body': {}, - 'index': { - dataset1.url: {} + self.assertEqual( + json.loads(patch_request.body), + { + "element": "shoji:entity", + "body": {}, + "index": {dataset1.url: {}}, + # Note how the graph sent includes dataset1.url before project D + "graph": [dataset1.url, project_d.url], }, - # Note how the graph sent includes dataset1.url before project D - 'graph': [dataset1.url, project_d.url] - }) + ) # Since the PATCH did not really update the server or the session # test fixtures, we need to update the fixtures to reflect the fact # that they've been modified by the recent PATCH request - session.adapter.fixtures[project_b.url]['index'][dataset1.url] = { - 'name': dataset1.name, - 'type': 'dataset' + session.adapter.fixtures[project_b.url]["index"][dataset1.url] = { + "name": dataset1.name, + "type": "dataset", } - session.adapter.fixtures[project_b.url]['graph'] = [dataset1.url, project_d.url] - project_a.place(dataset2, '| project B', after='Dataset 1') + session.adapter.fixtures[project_b.url]["graph"] = [dataset1.url, project_d.url] + project_a.place(dataset2, "| project B", after="Dataset 1") patch_request = session.requests[-2] - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") self.assertEqual(patch_request.url, project_b.url) - self.assertEqual(json.loads(patch_request.body), { - 'element': 'shoji:entity', - 'body': {}, - 'index': { - dataset2.url: {} + self.assertEqual( + json.loads(patch_request.body), + { + "element": "shoji:entity", + "body": {}, + "index": {dataset2.url: {}}, + # Dataset 2 got placed after dataset 1 :) + "graph": [dataset1.url, dataset2.url, project_d.url], }, - # Dataset 2 got placed after dataset 1 :) - 'graph': [dataset1.url, dataset2.url, project_d.url] - }) + ) def test_reorder(self): session = self.make_tree() - a_res_url = 'http://example.com/api/projects/A/' - b_res_url = 'http://example.com/api/projects/B/' - c_res_url = 'http://example.com/api/projects/C/' + a_res_url = "http://example.com/api/projects/A/" + b_res_url = "http://example.com/api/projects/B/" + c_res_url = "http://example.com/api/projects/C/" project_a = Project(session.get(a_res_url).payload) project_a.reorder(["project C", "project B"]) patch_request = session.requests[-2] - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") self.assertEqual(patch_request.url, project_a.url) - self.assertEqual(json.loads(patch_request.body), { - 'element': 'shoji:entity', - 'body': {}, - 'index': {}, - 'graph': [c_res_url, b_res_url] - }) + self.assertEqual( + json.loads(patch_request.body), + { + "element": "shoji:entity", + "body": {}, + "index": {}, + "graph": [c_res_url, b_res_url], + }, + ) def test_move(self): session = self.make_tree() - a_res_url = 'http://example.com/api/projects/A/' - b_res_url = 'http://example.com/api/projects/B/' - c_res_url = 'http://example.com/api/projects/C/' + a_res_url = "http://example.com/api/projects/A/" + b_res_url = "http://example.com/api/projects/B/" + c_res_url = "http://example.com/api/projects/C/" project_a = Project(session.get(a_res_url).payload) project_a.reorder(["project C", "project B"]) patch_request = session.requests[-2] - self.assertEqual(patch_request.method, 'PATCH') + self.assertEqual(patch_request.method, "PATCH") self.assertEqual(patch_request.url, project_a.url) - self.assertEqual(json.loads(patch_request.body), { - 'element': 'shoji:entity', - 'body': {}, - 'index': {}, - 'graph': [c_res_url, b_res_url] - }) + self.assertEqual( + json.loads(patch_request.body), + { + "element": "shoji:entity", + "body": {}, + "index": {}, + "graph": [c_res_url, b_res_url], + }, + ) def test_is_root(self): - a_res_url = 'http://example.com/api/projects/A/' + a_res_url = "http://example.com/api/projects/A/" session = self.make_tree() project_a = Project(session.get(a_res_url).payload) - project_b = project_a.order['| project B'] + project_b = project_a.order["| project B"] self.assertTrue(project_a.is_root) self.assertFalse(project_b.is_root) def test_children(self): session = self.make_tree() - a_res_url = 'http://example.com/api/projects/A/' + a_res_url = "http://example.com/api/projects/A/" project_a = Project(session.get(a_res_url).payload) # Get instantiated correctly self.assertTrue(all(isinstance(c, Project) for c in project_a.children)) # Get iterated on the right order - self.assertEqual([c.url for c in project_a.children], - project_a.resource.graph) + self.assertEqual([c.url for c in project_a.children], project_a.resource.graph) # First request before .child should .refresh() the resource to # ensure fresh data refresh_request = session.requests[0] - self.assertEqual(refresh_request.method, 'GET') + self.assertEqual(refresh_request.method, "GET") self.assertEqual(refresh_request.url, a_res_url) def test_delete_project(self): @@ -412,27 +378,28 @@ def _get_root(self): session = MockSession() root_url = "http://example.com/api/" projects_url = "http://example.com/api/projects/" - session.add_fixture(root_url, { - "self": root_url, - "element": "shoji:catalog", - "catalogs": { - "projects": projects_url + session.add_fixture( + root_url, + { + "self": root_url, + "element": "shoji:catalog", + "catalogs": {"projects": projects_url}, + "index": {}, }, - "index": {} - }) - session.add_fixture(projects_url, { - "self": projects_url, - "element": "shoji:catalog", - "catalogs": { - "personal": self.PERSONAL_URL + ) + session.add_fixture( + projects_url, + { + "self": projects_url, + "element": "shoji:catalog", + "catalogs": {"personal": self.PERSONAL_URL}, + "index": {}, }, - "index": {} - }) - session.add_fixture(self.PERSONAL_URL, { - "self": self.PERSONAL_URL, - "element": "shoji:catalog", - "index": {} - }) + ) + session.add_fixture( + self.PERSONAL_URL, + {"self": self.PERSONAL_URL, "element": "shoji:catalog", "index": {}}, + ) response = Response() response.status_code = 204 session.add_patch_response(response) @@ -445,33 +412,30 @@ def test_get_personal(self): def test_move_to_personal(self): root = self._get_root() - a_res_url = 'http://example.com/api/projects/A/' - root.session.add_fixture(a_res_url, { - 'self': a_res_url, - 'element': 'shoji:entity', - 'catalogs': { - 'project': 'http://example.com/api/projects/' - }, - 'body': { - 'name': 'project A' + a_res_url = "http://example.com/api/projects/A/" + root.session.add_fixture( + a_res_url, + { + "self": a_res_url, + "element": "shoji:entity", + "catalogs": {"project": "http://example.com/api/projects/"}, + "body": {"name": "project A"}, + "index": {}, + "graph": [], }, - 'index': {}, - 'graph': [] - }) + ) personal = get_personal_project(root) project_a = Project(root.session.get(a_res_url).payload) personal.move_here(project_a) patch_request = root.session.requests[-3] self_refresh_request = root.session.requests[-2] child_refresh_request = root.session.requests[-1] - self.assertEqual(child_refresh_request.method, 'GET') - self.assertEqual(self_refresh_request.method, 'GET') + self.assertEqual(child_refresh_request.method, "GET") + self.assertEqual(self_refresh_request.method, "GET") self.assertEqual(self_refresh_request.url, self.PERSONAL_URL) self.assertEqual(patch_request.url, self.PERSONAL_URL) - self.assertEqual(patch_request.method, 'PATCH') - self.assertEqual(json.loads(patch_request.body)['index'], { - project_a.url: {} - }) + self.assertEqual(patch_request.method, "PATCH") + self.assertEqual(json.loads(patch_request.body)["index"], {project_a.url: {}}) class TestProjectScripts(TestCase): @@ -479,19 +443,16 @@ class TestProjectScripts(TestCase): def test_running_script(self): session = MockSession() - shoji_resource = Entity(session, **{ - 'self': 'http://example.com/project/id/', - 'body': {}, - 'index': {}, - 'graph': [], - "views": { - "execute": self.project_execute_url - } - }) - - exexute_resource = Catalog(session, **{ - 'self': self.project_execute_url, - }) + shoji_resource = Entity( + session, + self="http://example.com/project/id/", + body={}, + index={}, + graph=[], + views={"execute": self.project_execute_url}, + ) + + exexute_resource = Catalog(session, **{"self": self.project_execute_url}) response = Response() response.status_code = 204 @@ -508,25 +469,22 @@ def test_running_script(self): assert execution_request.method == "POST" assert execution_request.url == self.project_execute_url assert json.loads(execution_request.body) == { - 'element': 'shoji:view', - 'value': script_body, + "element": "shoji:view", + "value": script_body, } def test_error_handling(self): session = MockSession() - shoji_resource = Entity(session, **{ - 'self': 'http://example.com/project/id/', - 'body': {}, - 'index': {}, - 'graph': [], - "views": { - "execute": self.project_execute_url - } - }) - - execute_resource = Catalog(session, **{ - 'self': self.project_execute_url - }) + shoji_resource = Entity( + session, + self="http://example.com/project/id/", + body={}, + index={}, + graph=[], + views={"execute": self.project_execute_url}, + ) + + execute_resource = Catalog(session, **{"self": self.project_execute_url}) resolutions = [{"line": 100}] resolutions_out = json.dumps({"resolutions": resolutions}) @@ -548,4 +506,3 @@ def test_error_handling(self): project.execute("Bad script") assert err.exception.resolutions == [{"line": 100}] assert err.exception.client_error.status_code == 400 - diff --git a/scrunch/tests/test_recodes.py b/scrunch/tests/test_recodes.py index e1189d9..0d7bbba 100644 --- a/scrunch/tests/test_recodes.py +++ b/scrunch/tests/test_recodes.py @@ -11,165 +11,165 @@ def _any(self, column): - return { - 'function': 'any', - 'args': [{ - 'variable': self - }, { - 'column': column - }] - } + return {"function": "any", "args": [{"variable": self}, {"column": column}]} def mr_in(mr_self, mr_alias, groups, parent_subvariables): """ Similar helper as in examples.py. Has to return an `any` function. """ - return _any(mr_self, [parent_subvariables[subvar_alias(mr_alias, sv)].id - for sv in groups]) + return _any( + mr_self, [parent_subvariables[subvar_alias(mr_alias, sv)].id for sv in groups] + ) -dataset_url = 'http://test.crunch.io/api/datasets/123/' -var_url = 'http://test.crunch.io/api/datasets/123/variables/0001/' -subvar1_url = 'http://test.crunch.io/api/datasets/123/variables/0001/subvariables/00001/' -subvar2_url = 'http://test.crunch.io/api/datasets/123/variables/0001/subvariables/00002/' +dataset_url = "http://test.crunch.io/api/datasets/123/" +var_url = "http://test.crunch.io/api/datasets/123/variables/0001/" +subvar1_url = ( + "http://test.crunch.io/api/datasets/123/variables/0001/subvariables/00001/" +) +subvar2_url = ( + "http://test.crunch.io/api/datasets/123/variables/0001/subvariables/00002/" +) -CATEGORY_MAP = { - 1: [2, 3], - 2: 1 -} -CATEGORY_NAMES = { - 1: 'China', - 2: 'Other' -} +CATEGORY_MAP = {1: [2, 3], 2: 1} +CATEGORY_NAMES = {1: "China", 2: "Other"} -RESPONSE_MAP = { - 1: [1, 2] -} -RESPONSE_NAMES = { - 1: 'online' -} +RESPONSE_MAP = {1: [1, 2]} +RESPONSE_NAMES = {1: "online"} RECODES_PAYLOAD = { - 'element': 'shoji:entity', - 'body': { - 'alias': 'alias', - 'derivation': { - 'function': 'combine_categories', - 'args': [ - {'variable': var_url}, - {'value': [ - {'combined_ids': [2, 3], 'missing': False, 'id': 1, 'name': 'China'}, - {'combined_ids': [1], 'missing': False, 'id': 2, 'name': 'Other'} - ]} - ] + "element": "shoji:entity", + "body": { + "alias": "alias", + "derivation": { + "function": "combine_categories", + "args": [ + {"variable": var_url}, + { + "value": [ + { + "combined_ids": [2, 3], + "missing": False, + "id": 1, + "name": "China", + }, + { + "combined_ids": [1], + "missing": False, + "id": 2, + "name": "Other", + }, + ] + }, + ], }, - 'name': 'name', - 'description': '' - } + "name": "name", + "description": "", + }, } COMBINE_RESPONSES_PAYLOAD = { - 'element': 'shoji:entity', - 'body': { - 'name': 'name', - 'description': '', - 'alias': 'alias', - 'derivation': { - 'function': 'combine_responses', - 'args': [ - {'variable': var_url}, - {'value': [ - {'alias': 'alias_1', 'combined_ids': [subvar1_url, subvar2_url], 'name': 'online'} - ]} - ] - } - } + "element": "shoji:entity", + "body": { + "name": "name", + "description": "", + "alias": "alias", + "derivation": { + "function": "combine_responses", + "args": [ + {"variable": var_url}, + { + "value": [ + { + "alias": "alias_1", + "combined_ids": [subvar1_url, subvar2_url], + "name": "online", + } + ] + }, + ], + }, + }, } class TestCombine(TestCase): - def test_validate_range_expression(self): - test_map = { - 1: range(1, 5) - } - test_cats = { - 1: "China" - } + test_map = {1: range(1, 5)} + test_cats = {1: "China"} ds_res_mock = mock.MagicMock() variable_mock = mock.MagicMock() subvar_mock = mock.MagicMock(entity_url=subvar1_url) # mock the call to entity, this will happen on Variable.resource variable_mock.entity.subvariables.by.return_value = { - 'parent_1': subvar_mock, - 'parent_2': subvar_mock, - 'parent_3': subvar_mock, - 'parent_4': subvar_mock, + "parent_1": subvar_mock, + "parent_2": subvar_mock, + "parent_3": subvar_mock, + "parent_4": subvar_mock, } parent_var = Variable(variable_mock, ds_res_mock) - modified_map = responses_from_map(parent_var, test_map, test_cats, 'test', 'parent') + modified_map = responses_from_map( + parent_var, test_map, test_cats, "test", "parent" + ) # subvar_url * 4 because we used the same mock for all subvars - assert modified_map[0]['combined_ids'] == [subvar1_url] * 4 + assert modified_map[0]["combined_ids"] == [subvar1_url] * 4 def test_validate_integer(self): - test_map = { - 1: 1 - } - test_cats = { - 1: "China" - } + test_map = {1: 1} + test_cats = {1: "China"} ds_res_mock = mock.MagicMock() variable_mock = mock.MagicMock() subvar_mock = mock.MagicMock(entity_url=subvar1_url) # mock the call to entity, this will happen on Variable.resource - variable_mock.entity.subvariables.by.return_value = { - 'parent_1': subvar_mock - } + variable_mock.entity.subvariables.by.return_value = {"parent_1": subvar_mock} parent_var = Variable(variable_mock, ds_res_mock) - modified_map = responses_from_map(parent_var, test_map, test_cats, 'test', 'parent') + modified_map = responses_from_map( + parent_var, test_map, test_cats, "test", "parent" + ) # subvar_url * 4 because we used the same mock for all subvars - assert modified_map[0]['combined_ids'] == [subvar1_url] + assert modified_map[0]["combined_ids"] == [subvar1_url] def test_combine_categories_unknown_alias(self): resource = mock.MagicMock() - resource.body = {'name': 'mocked_dataset'} + resource.body = {"name": "mocked_dataset"} entity_mock = mock.MagicMock(entity_url=var_url) - resource.variables.by.return_value = { - 'test': entity_mock - } + resource.variables.by.return_value = {"test": entity_mock} resource.variables.index = {} # Var not present ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_categorical('unknown', CATEGORY_MAP, CATEGORY_NAMES, name='name', alias='alias') + ds.combine_categorical( + "unknown", CATEGORY_MAP, CATEGORY_NAMES, name="name", alias="alias" + ) - assert 'Entity mocked_dataset has no (sub)variable with a name or alias unknown' in str(err.value) + assert ( + "Entity mocked_dataset has no (sub)variable with a name or alias unknown" + in str(err.value) + ) def test_combine_categories_from_alias(self): resource = mock.MagicMock() - resource.body = {'name': 'mocked_dataset'} + resource.body = {"name": "mocked_dataset"} entity_mock = mock.MagicMock() entity_mock.entity.self = var_url - resource.variables.by.return_value = { - 'test': entity_mock, - } + resource.variables.by.return_value = {"test": entity_mock} resource.variables.index = {} ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_categorical('test', CATEGORY_MAP, CATEGORY_NAMES, name='name', alias='alias') + ds.combine_categorical( + "test", CATEGORY_MAP, CATEGORY_NAMES, name="name", alias="alias" + ) ds.resource.variables.create.assert_called_with(RECODES_PAYLOAD) - assert 'Entity mocked_dataset has no (sub)variable' in str(err.value) + assert "Entity mocked_dataset has no (sub)variable" in str(err.value) def test_combine_categories_from_entity(self): resource = mock.MagicMock() - resource.body = {'name': 'mocked_dataset'} + resource.body = {"name": "mocked_dataset"} entity_mock = mock.MagicMock() entity_mock.entity.self = var_url - resource.variables.by.return_value = { - 'test': entity_mock - } + resource.variables.by.return_value = {"test": entity_mock} resource.variables.index = {} # Var not present # mock a Tuple object @@ -179,9 +179,11 @@ def test_combine_categories_from_entity(self): entity = Variable(tuple_mock, resource) ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_categorical(entity, CATEGORY_MAP, CATEGORY_NAMES, name='name', alias='alias') + ds.combine_categorical( + entity, CATEGORY_MAP, CATEGORY_NAMES, name="name", alias="alias" + ) ds.resource.variables.create.assert_called_with(RECODES_PAYLOAD) - assert 'Entity mocked_dataset has no (sub)variable' in str(err.value) + assert "Entity mocked_dataset has no (sub)variable" in str(err.value) def test_combine_responses_unknown_alias(self): resource = mock.MagicMock() @@ -199,23 +201,23 @@ def test_combine_responses_unknown_alias(self): # add dictionaries return to by functions entity_mock.entity.subvariables.by.return_value = { - 'test_1': subvar_mock, - 'test_x': subvar2_mock + "test_1": subvar_mock, + "test_x": subvar2_mock, } - resource.variables.by.return_value = { - 'test': entity_mock - } + resource.variables.by.return_value = {"test": entity_mock} ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_multiple_response('test', RESPONSE_MAP, RESPONSE_NAMES, name='name', alias='alias') + ds.combine_multiple_response( + "test", RESPONSE_MAP, RESPONSE_NAMES, name="name", alias="alias" + ) - assert 'Unknown subvariables for variable' in str(err.value) + assert "Unknown subvariables for variable" in str(err.value) def test_combine_responses_by_alias(self): resource = mock.MagicMock() - resource.body = {'name': 'mocked_dataset'} + resource.body = {"name": "mocked_dataset"} resource.entity.self = dataset_url resource.variables.index = {} # Var not present # mock subvariables @@ -228,24 +230,24 @@ def test_combine_responses_by_alias(self): # add dictionaries return to by functions entity_mock.entity.subvariables.by.return_value = { - 'test_1': subvar_mock, - 'test_2': subvar2_mock + "test_1": subvar_mock, + "test_2": subvar2_mock, } - resource.variables.by.return_value = { - 'test': entity_mock - } + resource.variables.by.return_value = {"test": entity_mock} # make the actual response call ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_multiple_response('test', RESPONSE_MAP, RESPONSE_NAMES, name='name', alias='alias') + ds.combine_multiple_response( + "test", RESPONSE_MAP, RESPONSE_NAMES, name="name", alias="alias" + ) resource.variables.create.assert_called_with(COMBINE_RESPONSES_PAYLOAD) - assert 'Entity mocked_dataset has no (sub)variable' in str(err.value) + assert "Entity mocked_dataset has no (sub)variable" in str(err.value) def test_combine_responses_by_entity(self): resource = mock.MagicMock() - resource.body = {'name': 'mocked_dataset'} + resource.body = {"name": "mocked_dataset"} resource.entity.self = dataset_url resource.variables.index = {} # Var not present @@ -257,304 +259,339 @@ def test_combine_responses_by_entity(self): # mock parent variable entity_mock = mock.MagicMock() # need to name the var to actually build subvar names - entity_mock.alias = 'test' + entity_mock.alias = "test" entity_mock.resource.self = var_url # add dictionaries return to by functions entity_mock.resource.subvariables.by.return_value = { - 'test_1': subvar_mock, - 'test_2': subvar2_mock + "test_1": subvar_mock, + "test_2": subvar2_mock, } - resource.variables.by.return_value = { - 'test': entity_mock - } + resource.variables.by.return_value = {"test": entity_mock} ds = MutableDataset(resource) with pytest.raises(ValueError) as err: - ds.combine_multiple_response(entity_mock, RESPONSE_MAP, RESPONSE_NAMES, name='name', alias='alias') + ds.combine_multiple_response( + entity_mock, RESPONSE_MAP, RESPONSE_NAMES, name="name", alias="alias" + ) resource.variables.create.assert_called_with(COMBINE_RESPONSES_PAYLOAD) - assert 'Entity mocked_dataset has no (sub)variable' in str(err.value) + assert "Entity mocked_dataset has no (sub)variable" in str(err.value) class TestRecode(TestCase): - - @mock.patch('scrunch.mutable_dataset.get_mutable_dataset') + @mock.patch("scrunch.mutable_dataset.get_mutable_dataset") def test_recode_categoricals(self, get_dataset_mock): categories = [ + {"missing": False, "name": "Heterosexual", "numeric_value": 1, "id": 1}, + {"missing": False, "name": "Gay or lesbian", "numeric_value": 2, "id": 2}, + {"missing": False, "name": "Bisexual", "numeric_value": 3, "id": 3}, + {"missing": False, "name": "Other", "numeric_value": 4, "id": 4}, { - 'missing': False, - 'name': 'Heterosexual', - 'numeric_value': 1, - 'id': 1 - }, - { - 'missing': False, - 'name': 'Gay or lesbian', - 'numeric_value': 2, - 'id': 2 - }, - { - 'missing': False, - 'name': 'Bisexual', - 'numeric_value': 3, - 'id': 3 - }, - { - 'missing': False, - 'name': 'Other', - 'numeric_value': 4, - 'id': 4 - }, - { - 'missing': False, - 'name': 'Prefer not to say', - 'numeric_value': 5, - 'id': 5 - }, - { - 'missing': True, - 'name': 'skipped', - 'numeric_value': None, - 'id': 8 + "missing": False, + "name": "Prefer not to say", + "numeric_value": 5, + "id": 5, }, - { - 'missing': True, - 'name': 'not asked', - 'numeric_value': None, - 'id': 9 - } + {"missing": True, "name": "skipped", "numeric_value": None, "id": 8}, + {"missing": True, "name": "not asked", "numeric_value": None, "id": 9}, ] - table_mock = mock.MagicMock(metadata={ - '00001': { - 'id': '00001', - 'alias': 'sexuality', - 'type': 'categorical', - 'categories': categories + table_mock = mock.MagicMock( + metadata={ + "00001": { + "id": "00001", + "alias": "sexuality", + "type": "categorical", + "categories": categories, + } } - }) + ) ds_res = mock.MagicMock() ds_res.self = dataset_url ds_res.follow.return_value = table_mock dataset = MutableDataset(ds_res) - dataset.create_categorical([ - {'id': 1, 'name': 'Straight', 'case': 'sexuality.any([1])'}, - {'id': 2, 'name': 'LGBTQ+', 'case': 'sexuality.any([2, 3, 4, 5])'} - ], name='Sexuality 2', alias='sexuality2', multiple=False) - - ds_res.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'name': 'Sexuality 2', - 'alias': 'sexuality2', - 'description': '', - 'notes': '', - 'expr': { - 'function': 'case', - 'args': [{ - 'column': [1, 2, -1], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'missing': False, 'id': 1, 'name': 'Straight', 'numeric_value': None}, - {'missing': False, 'id': 2, 'name': 'LGBTQ+', 'numeric_value': None}, - {'numeric_value': None, 'missing': True, 'id': -1, 'name': 'No Data'} - ] - } - } - }, { - 'function': 'in', - 'args': [ - {'variable': 'http://test.crunch.io/api/datasets/123/variables/00001/'}, - {'value': [1]} - ] - }, { - 'function': 'in', - 'args': [ - {'variable': 'http://test.crunch.io/api/datasets/123/variables/00001/'}, - {'value': [2, 3, 4, 5]} - ] - }] - } + dataset.create_categorical( + [ + {"id": 1, "name": "Straight", "case": "sexuality.any([1])"}, + {"id": 2, "name": "LGBTQ+", "case": "sexuality.any([2, 3, 4, 5])"}, + ], + name="Sexuality 2", + alias="sexuality2", + multiple=False, + ) + + ds_res.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "Sexuality 2", + "alias": "sexuality2", + "description": "", + "notes": "", + "expr": { + "function": "case", + "args": [ + { + "column": [1, 2, -1], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "missing": False, + "id": 1, + "name": "Straight", + "numeric_value": None, + }, + { + "missing": False, + "id": 2, + "name": "LGBTQ+", + "numeric_value": None, + }, + { + "numeric_value": None, + "missing": True, + "id": -1, + "name": "No Data", + }, + ], + } + }, + }, + { + "function": "in", + "args": [ + { + "variable": "http://test.crunch.io/api/datasets/123/variables/00001/" + }, + {"value": [1]}, + ], + }, + { + "function": "in", + "args": [ + { + "variable": "http://test.crunch.io/api/datasets/123/variables/00001/" + }, + {"value": [2, 3, 4, 5]}, + ], + }, + ], + }, + }, } - }) + ) - @mock.patch('scrunch.mutable_dataset.get_mutable_dataset') + @mock.patch("scrunch.mutable_dataset.get_mutable_dataset") def test_recode_multiple_responses(self, get_dataset_mock): - dataset_id = '123' + dataset_id = "123" categories = [ { - 'numeric_value': 1, - 'selected': True, - 'id': 1, - 'name': 'selected', - 'missing': False - }, - { - 'numeric_value': 2, - 'selected': False, - 'id': 2, - 'name': 'not selected', - 'missing': False + "numeric_value": 1, + "selected": True, + "id": 1, + "name": "selected", + "missing": False, }, { - 'numeric_value': 9, - 'missing': True, - 'id': 9, - 'name': 'not asked' + "numeric_value": 2, + "selected": False, + "id": 2, + "name": "not selected", + "missing": False, }, - { - 'numeric_value': 8, - 'missing': True, - 'id': 8, - 'name': 'skipped' - } + {"numeric_value": 9, "missing": True, "id": 9, "name": "not asked"}, + {"numeric_value": 8, "missing": True, "id": 8, "name": "skipped"}, ] - Entity(mock.MagicMock(), **{ - 'element': 'shoji:entity', - 'self': 'http://test.crunch.io/api/datasets/%s/variables/0001/' % dataset_id, - # needed in order to simulate a Tuple, now Variable is inited with Tuple - 'entity_url': 'http://test.crunch.io/api/datasets/%s/variables/0001/' % dataset_id, - 'body': { - 'name': 'Q1', - 'subreferences': [ - { - 'alias': 'Q1_1', - 'is_subvar': True, - 'name': 'One' - }, - { - 'alias': 'Q1_2', - 'is_subvar': True, - 'name': 'Two' - }, - { - 'alias': 'Q1_3', - 'is_subvar': True, - 'name': 'Three' - } - ], - 'missing_reasons': { - 'skipped': 8, - 'not asked': 9 + Entity( + mock.MagicMock(), + **{ + "element": "shoji:entity", + "self": "http://test.crunch.io/api/datasets/%s/variables/0001/" + % dataset_id, + # needed in order to simulate a Tuple, now Variable is inited with Tuple + "entity_url": "http://test.crunch.io/api/datasets/%s/variables/0001/" + % dataset_id, + "body": { + "name": "Q1", + "subreferences": [ + {"alias": "Q1_1", "is_subvar": True, "name": "One"}, + {"alias": "Q1_2", "is_subvar": True, "name": "Two"}, + {"alias": "Q1_3", "is_subvar": True, "name": "Three"}, + ], + "missing_reasons": {"skipped": 8, "not asked": 9}, + "alias": "Q1", + "subvariables": [ + "http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000a/" + % dataset_id, + "http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000b/" + % dataset_id, + "http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000c/" + % dataset_id, + ], + "dataset_id": dataset_id, + "type": "multiple_response", + "id": "0001", + "categories": categories, + "description": "Multiple Response Example", + "notes": "", }, - 'alias': 'Q1', - 'subvariables': [ - 'http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000a/' % dataset_id, - 'http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000b/' % dataset_id, - 'http://test.crunch.io/api/datasets/%s/variables/0001/subvariables/000c/' % dataset_id - ], - 'dataset_id': dataset_id, - 'type': 'multiple_response', - 'id': '0001', - 'categories': categories, - 'description': 'Multiple Response Example', - 'notes': '', - } - - }) - table_mock = mock.MagicMock(metadata={ - '00001': { - 'id': '00001', - 'alias': 'sexuality', - 'type': 'categorical', - 'notes': '', - 'categories': categories + }, + ) + table_mock = mock.MagicMock( + metadata={ + "00001": { + "id": "00001", + "alias": "sexuality", + "type": "categorical", + "notes": "", + "categories": categories, + } } - }) + ) ds_res = mock.MagicMock() ds_res.self = dataset_url ds_res.follow.return_value = table_mock dataset = MutableDataset(ds_res) subvar_mock = mock.MagicMock() subvar_mock.self = var_url - subvar_mock.id = 'subvar' - subvariables = { - 'Q1_1': subvar_mock, - 'Q1_2': subvar_mock, - 'Q1_3': subvar_mock, - } - - dataset.create_categorical([ - {'id': 1, 'name': 'Q1_recoded_1', 'case': mr_in(var_url, 'Q1', [1, 2], subvariables)}, - {'id': 2, 'name': 'Q1_recoded_2', 'case': mr_in(var_url, 'Q1', [3], subvariables)} - ], alias='Q1_recoded', name='Q1_recoded', multiple=True) + subvar_mock.id = "subvar" + subvariables = {"Q1_1": subvar_mock, "Q1_2": subvar_mock, "Q1_3": subvar_mock} + + dataset.create_categorical( + [ + { + "id": 1, + "name": "Q1_recoded_1", + "case": mr_in(var_url, "Q1", [1, 2], subvariables), + }, + { + "id": 2, + "name": "Q1_recoded_2", + "case": mr_in(var_url, "Q1", [3], subvariables), + }, + ], + alias="Q1_recoded", + name="Q1_recoded", + multiple=True, + ) # Test how the recoded var was created. - ds_res.variables.create.assert_called_with({ - 'element': 'shoji:entity', - 'body': { - 'name': 'Q1_recoded', - 'description': '', - 'notes': '', - 'alias': 'Q1_recoded', - 'derivation': { - 'function': 'array', - 'args': [{ - 'function': 'make_frame', - 'args': [{ - 'map': { - '0001': { - 'function': 'case', - 'args': [{ - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'selected': True, 'numeric_value': None, 'missing': False, 'id': 1, 'name': 'Selected'}, - {'selected': False, 'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Not selected'} - ] - } - } - }, { - 'function': 'any', - 'args': [ - {'variable': 'http://test.crunch.io/api/datasets/123/variables/0001/'}, - {'column': ['subvar', 'subvar']} - ] - }], - 'references': { - 'alias': 'Q1_recoded_1', - 'name': 'Q1_recoded_1' - } - }, - '0002': { - 'function': 'case', - 'args': [{ - 'column': [1, 2], - 'type': { - 'value': { - 'class': 'categorical', - 'categories': [ - {'selected': True, 'numeric_value': None, 'missing': False, 'id': 1, 'name': 'Selected'}, - {'selected': False, 'numeric_value': None, 'missing': False, 'id': 2, 'name': 'Not selected'} - ] - } + ds_res.variables.create.assert_called_with( + { + "element": "shoji:entity", + "body": { + "name": "Q1_recoded", + "description": "", + "notes": "", + "alias": "Q1_recoded", + "derivation": { + "function": "array", + "args": [ + { + "function": "make_frame", + "args": [ + { + "map": { + "0001": { + "function": "case", + "args": [ + { + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "selected": True, + "numeric_value": None, + "missing": False, + "id": 1, + "name": "Selected", + }, + { + "selected": False, + "numeric_value": None, + "missing": False, + "id": 2, + "name": "Not selected", + }, + ], + } + }, + }, + { + "function": "any", + "args": [ + { + "variable": "http://test.crunch.io/api/datasets/123/variables/0001/" + }, + { + "column": [ + "subvar", + "subvar", + ] + }, + ], + }, + ], + "references": { + "alias": "Q1_recoded_1", + "name": "Q1_recoded_1", + }, + }, + "0002": { + "function": "case", + "args": [ + { + "column": [1, 2], + "type": { + "value": { + "class": "categorical", + "categories": [ + { + "selected": True, + "numeric_value": None, + "missing": False, + "id": 1, + "name": "Selected", + }, + { + "selected": False, + "numeric_value": None, + "missing": False, + "id": 2, + "name": "Not selected", + }, + ], + } + }, + }, + { + "function": "any", + "args": [ + { + "variable": "http://test.crunch.io/api/datasets/123/variables/0001/" + }, + {"column": ["subvar"]}, + ], + }, + ], + "references": { + "alias": "Q1_recoded_2", + "name": "Q1_recoded_2", + }, + }, } - }, { - 'function': 'any', - 'args': [ - {'variable': 'http://test.crunch.io/api/datasets/123/variables/0001/'}, - {'column': ['subvar']} - ] - }], - 'references': { - 'alias': 'Q1_recoded_2', - 'name': 'Q1_recoded_2' - } - } + }, + {"value": ["0001", "0002"]}, + ], } - }, { - 'value': [ - '0001', - '0002' - ] - }] - }] - } + ], + }, + }, } - }) + ) diff --git a/scrunch/tests/test_scripts.py b/scrunch/tests/test_scripts.py index 70a63d9..dbd7dc0 100644 --- a/scrunch/tests/test_scripts.py +++ b/scrunch/tests/test_scripts.py @@ -16,20 +16,19 @@ def test_create_script(self): session = MockSession() scripts_url = "https://example.com/dataset/url/scripts/" - shoji_resource = Entity(session, **{ - 'self': 'https://example.com/dataset/url/', - 'body': {}, - "catalogs": { - "scripts": scripts_url - } - }) - - created_script_url = 'https://example.com/script/2/' + shoji_resource = Entity( + session, + **{ + "self": "https://example.com/dataset/url/", + "body": {}, + "catalogs": {"scripts": scripts_url}, + }, + ) + + created_script_url = "https://example.com/script/2/" response = Response() response.status_code = 201 - response.headers = { - 'Location': created_script_url - } + response.headers = {"Location": created_script_url} dry_run_response = Response() dry_run_response.status_code = 204 @@ -37,75 +36,77 @@ def test_create_script(self): session.add_post_response(response) session.add_post_response(dry_run_response) - session.add_fixture(scripts_url, { - "element": "shoji:catalog", - "self": scripts_url, - "index": {} - }) - session.add_fixture(created_script_url, { - 'self': created_script_url, - 'body': {}, - }) + session.add_fixture( + scripts_url, {"element": "shoji:catalog", "self": scripts_url, "index": {}} + ) + session.add_fixture( + created_script_url, {"self": created_script_url, "body": {}} + ) scripts = DatasetScripts(shoji_resource) scripts.execute("