From b70e2e38d4502fcdf637fa800cd76a250d107c22 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 8 Oct 2024 14:58:51 -0400 Subject: [PATCH 001/115] Group ResourceInstance fields together --- arches/app/models/models.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index ed885b73ae..68360917b2 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1226,15 +1226,6 @@ class ResourceInstance(models.Model): to="models.ResourceInstanceLifecycleState", related_name="resource_instances", ) - - def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): - try: - return ( - self.graph.resource_instance_lifecycle.get_initial_resource_instance_lifecycle_state() - ) - except (ObjectDoesNotExist, AttributeError): - return None - # This could be used as a lock, but primarily addresses the issue that a creating user # may not yet match the criteria to edit a ResourceInstance (via Set/LogicalSet) simply # because the details may not yet be complete. Only one user can create, as it is an @@ -1246,6 +1237,14 @@ def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): User, on_delete=models.SET_NULL, blank=True, null=True ) + def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): + try: + return ( + self.graph.resource_instance_lifecycle.get_initial_resource_instance_lifecycle_state() + ) + except (ObjectDoesNotExist, AttributeError): + return None + def get_instance_creator_and_edit_permissions(self, user=None): creatorid = None can_edit = None From f3f7ca06aea9e3cb41f125092dec9bcf44afc9ca Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 8 Oct 2024 18:02:11 -0400 Subject: [PATCH 002/115] Initial commit of PythonicModelQuerySet --- arches/app/models/models.py | 7 ++++ arches/app/models/querysets.py | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 arches/app/models/querysets.py diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 68360917b2..c311ff6d36 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -21,6 +21,7 @@ from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField +from arches.app.models.querysets import PythonicModelQuerySet from arches.app.models.utils import add_to_update_fields from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string @@ -1237,6 +1238,12 @@ class ResourceInstance(models.Model): User, on_delete=models.SET_NULL, blank=True, null=True ) + objects = PythonicModelQuerySet.as_manager() + + @classmethod + def as_model(cls, *args, **kwargs): + return cls.objects.with_unpacked_tiles(*args, **kwargs) + def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): try: return ( diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py new file mode 100644 index 0000000000..244eeff160 --- /dev/null +++ b/arches/app/models/querysets.py @@ -0,0 +1,59 @@ +from django.db import models + + +class PythonicModelQuerySet(models.QuerySet): + def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): + """Annotates a ResourceInstance QuerySet with tile data unpacked + and mapped onto node aliases, e.g.: + + ResourceInstance.objects.with_unpacked_tiles("mymodel") + + With slightly fewer keystrokes: + + ResourceInstance.as_model("mymodel") + + Or with defer/only as in the QuerySet interface: + + ResourceInstance.as_model("mymodel", only="my_node_alias") + + ...although this is a pessimization if you will end up + manipulating other node data besides "my_node_alias". + + Use it like: + MyModel = ResourceInstance.as_model("mymodel") + MyModel.filter(my_node_alias="some tile value") + """ + from arches.app.models.models import GraphModel + + try: + source_graph = ( + GraphModel.objects.filter( + slug=graph_slug, + # TODO: Verify that source_identifier=None is really what I want? + source_identifier=None, + ) + .prefetch_related("node_set") + .get() + ) + except GraphModel.DoesNotExist as e: + e.add_note(f"No graph found with slug: {graph_slug}") + raise + + node_alias_annotations = {} + for node in source_graph.node_set.prefetch_related("nodegroup"): + if defer and node.alias in defer: + continue + if only and node.alias not in only: + continue + # TODO: unwrap with datatype-aware transforms + # TODO: don't worry about name collisions for now, e.g. "name" + # TODO: how to group cardinality N tiles? + node_alias_annotations[node.alias] = models.F(f"tilemodel__data__{node.pk}") + + return ( + self.filter(graph=source_graph) + .prefetch_related("tilemodel_set") + .annotate( + **node_alias_annotations, + ) + ) From 3c7bdafd1738d8840ed671581b71b284d1999846 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 9 Oct 2024 11:38:03 -0400 Subject: [PATCH 003/115] Handle cardinality N tiledata --- arches/app/models/querysets.py | 39 ++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 244eeff160..c571bd8524 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,3 +1,4 @@ +from django.contrib.postgres.aggregates import ArrayAgg from django.db import models @@ -6,25 +7,30 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: - ResourceInstance.objects.with_unpacked_tiles("mymodel") + >>> ResourceInstance.objects.with_unpacked_tiles("mymodel") With slightly fewer keystrokes: - ResourceInstance.as_model("mymodel") + >>> ResourceInstance.as_model("mymodel") Or with defer/only as in the QuerySet interface: - ResourceInstance.as_model("mymodel", only="my_node_alias") + >>> ResourceInstance.as_model("mymodel", only=["alias1", "alias2"]) ...although this is a pessimization if you will end up manipulating other node data besides "my_node_alias". - Use it like: - MyModel = ResourceInstance.as_model("mymodel") - MyModel.filter(my_node_alias="some tile value") + Example: + + >>> MyModel = ResourceInstance.as_model("mymodel") + >>> result = MyModel.filter(my_node_alias="some tile value") + >>> result.first().my_node_alias + "some tile value" """ from arches.app.models.models import GraphModel + if defer and only and (overlap := set(defer).intersection(set(only))): + raise ValueError(f"Got intersecting defer/only args: {overlap}") try: source_graph = ( GraphModel.objects.filter( @@ -40,15 +46,26 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): raise node_alias_annotations = {} - for node in source_graph.node_set.prefetch_related("nodegroup"): - if defer and node.alias in defer: + for node in source_graph.node_set.all(): + if node.datatype == "semantic": + continue + if node.nodegroup_id is None: continue - if only and node.alias not in only: + if (defer and node.alias in defer) or (only and node.alias not in only): continue # TODO: unwrap with datatype-aware transforms # TODO: don't worry about name collisions for now, e.g. "name" - # TODO: how to group cardinality N tiles? - node_alias_annotations[node.alias] = models.F(f"tilemodel__data__{node.pk}") + tile_lookup = models.F(f"tilemodel__data__{node.pk}") + + if node.nodegroup.cardinality == "n": + # TODO: May produce duplicates until we add unique constraint + # on TileModel.resourceinstance_id, nodegroup_id, sortorder + tile_lookup = ArrayAgg( + tile_lookup, + filter=models.Q(tilemodel__nodegroup_id=node.nodegroup.pk), + ordering="tilemodel__sortorder", + ) + node_alias_annotations[node.alias] = tile_lookup return ( self.filter(graph=source_graph) From d5fd6fce0835c31709cff0ef56c1849a466bc7d3 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 9 Oct 2024 12:45:58 -0400 Subject: [PATCH 004/115] Stub out save/clean/refresh_from_db --- arches/app/models/models.py | 51 ++++++++++++++++++++++++---------- arches/app/models/querysets.py | 11 ++++++++ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index c311ff6d36..443a7cd46b 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1,21 +1,9 @@ -# This is an auto-generated Django model module. -# You'll have to do the following manually to clean this up: -# * Rearrange models' order -# * Make sure each model has one field with primary_key=True -# * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table -# Feel free to rename the models, but don't rename db_table values or field names. -# -# Also note: You'll have to insert the output of 'django-admin sqlcustom [app_label]' -# into your database. - - import sys import json import uuid import datetime import logging import traceback -import django.utils.timezone from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename @@ -34,9 +22,10 @@ from django.core.exceptions import ValidationError from django.core.serializers.json import DjangoJSONEncoder from django.core.validators import RegexValidator, validate_slug +from django.db import transaction from django.db.models import JSONField, Max, Q from django.db.models.constraints import UniqueConstraint -from django.utils import translation +from django.utils import timezone, translation from django.utils.translation import gettext_lazy as _ from django.contrib.auth.models import User from django.contrib.auth.models import Group @@ -1292,7 +1281,39 @@ def save(self, *args, **kwargs): add_to_update_fields(kwargs, "resource_instance_lifecycle_state") add_to_update_fields(kwargs, "graph_publication") - super(ResourceInstance, self).save(*args, **kwargs) + + if getattr(self, "_pythonic_model", False): + self.save_tiles_for_pythonic_model(*args, **kwargs) + else: + super().save(*args, **kwargs) + + def clean(self): + if getattr(self, "_pythonic_model", False): + pass # run datatype validation on tile data + + def save_tiles_for_pythonic_model(self, *args, **kwargs): + with transaction.atomic(): + # for nodeid, alias in self._pythonic_model_fields.items(): + # new_val = getattr(self, alias) + # TODO: handle x-list, currently assuming list ~ cardinality N. + # if isinstance(new_val, list): + + # for tile_to_update in self._prefetched_objects_cache["tilemodel_set"]: + + super().save(*args, **kwargs) + self.refresh_from_db( + using=kwargs.get("using", None), + fields=kwargs.get("update_fields", None), + ) + + def refresh_from_db(self, using=None, fields=None, from_queryset=None): + if not from_queryset and ( + field_map := getattr(self, "_pythonic_model_fields", []) + ): + from_queryset = self.__class__.as_model( + self.graph.slug, only=field_map.values() + ) + super().refresh_from_db(using, fields, from_queryset) def __init__(self, *args, **kwargs): super(ResourceInstance, self).__init__(*args, **kwargs) @@ -1997,7 +2018,7 @@ class WorkflowHistory(models.Model): stepdata = JSONField(null=False, default=dict) componentdata = JSONField(null=False, default=dict) # `auto_now_add` marks the field as non-editable, which prevents the field from being serialized, so updating to use `default` instead - created = models.DateTimeField(default=django.utils.timezone.now, null=False) + created = models.DateTimeField(default=timezone.now, null=False) user = models.ForeignKey( db_column="userid", null=True, diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index c571bd8524..9884873246 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -46,6 +46,7 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): raise node_alias_annotations = {} + node_aliases_by_node_id = {} for node in source_graph.node_set.all(): if node.datatype == "semantic": continue @@ -66,6 +67,10 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): ordering="tilemodel__sortorder", ) node_alias_annotations[node.alias] = tile_lookup + node_aliases_by_node_id[str(node.pk)] = node.alias + + if not node_alias_annotations: + raise ValueError("All fields were excluded.") return ( self.filter(graph=source_graph) @@ -73,4 +78,10 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): .annotate( **node_alias_annotations, ) + .annotate( + _pythonic_model_fields=models.Value( + node_aliases_by_node_id, + output_field=models.JSONField(), + ) + ) ) From 8e851fdc924bc9d4ab26bbc8f62e89e3a5d1fc8c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 9 Oct 2024 16:57:46 -0400 Subject: [PATCH 005/115] Initial commit of updating and deleting on pythonic models --- arches/app/models/models.py | 93 ++++++++++++++++++++++++++-------- arches/app/models/querysets.py | 13 +++-- 2 files changed, 79 insertions(+), 27 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 443a7cd46b..2afd0a96e5 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -4,6 +4,7 @@ import datetime import logging import traceback +from collections import defaultdict from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename @@ -1229,6 +1230,16 @@ class ResourceInstance(models.Model): objects = PythonicModelQuerySet.as_manager() + class Meta: + managed = True + db_table = "resource_instances" + permissions = (("no_access_to_resourceinstance", "No Access"),) + + def __init__(self, *args, **kwargs): + super(ResourceInstance, self).__init__(*args, **kwargs) + if not self.resourceinstanceid: + self.resourceinstanceid = uuid.uuid4() + @classmethod def as_model(cls, *args, **kwargs): return cls.objects.with_unpacked_tiles(*args, **kwargs) @@ -1282,29 +1293,77 @@ def save(self, *args, **kwargs): add_to_update_fields(kwargs, "resource_instance_lifecycle_state") add_to_update_fields(kwargs, "graph_publication") - if getattr(self, "_pythonic_model", False): + if getattr(self, "_pythonic_model_fields", False): self.save_tiles_for_pythonic_model(*args, **kwargs) else: super().save(*args, **kwargs) def clean(self): - if getattr(self, "_pythonic_model", False): + if getattr(self, "_pythonic_model_fields", False): pass # run datatype validation on tile data def save_tiles_for_pythonic_model(self, *args, **kwargs): - with transaction.atomic(): - # for nodeid, alias in self._pythonic_model_fields.items(): - # new_val = getattr(self, alias) - # TODO: handle x-list, currently assuming list ~ cardinality N. - # if isinstance(new_val, list): + from arches.app.models.tile import Tile + + tiles_by_node_id = self._map_prefetched_tiles_to_node_ids() + for_insert = [] + for_update = [] + for_delete = [] + + for node in self.graph.node_set.all(): + node_id_str = str(node.pk) + if attribute_name := self._pythonic_model_fields.get(node_id_str, ""): + db_tiles = tiles_by_node_id[node_id_str] + new_val = getattr(self, attribute_name) + # TODO: handle x-list, currently assuming list ~ cardinality N. + if not isinstance(new_val, list): + new_val = (new_val,) + for i, inner_val in enumerate(new_val): + try: + tile = db_tiles[i] + except IndexError: + # parenttile? use Tile.get_blank_tile() instead? + # Does unnecessary node queries--fix. + tile = Tile.get_blank_tile_from_nodegroup_id( + node.nodegroup_id, resourceid=self.pk + ) + if db_tiles: + # TODO: small risk of race condition--fix. + tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + tiles_by_node_id[node_id_str].append(tile) + for_insert.append(tile) + else: + for_update.append(tile) + # skipping validation... + # skipping transform_value_for_tile + tile.data[node_id_str] = inner_val + + for_delete.extend(db_tiles[: len(new_val)]) - # for tile_to_update in self._prefetched_objects_cache["tilemodel_set"]: + with transaction.atomic(): + # TODO: indexing, editlog, etc. (use/adapt proxy model methods?) + if for_insert: + TileModel.objects.bulk_create(for_insert) + if for_update: + TileModel.objects.bulk_update(for_update, {"data"}) + if for_delete: + TileModel.objects.filter(pk__in=[t.pk for t in for_delete]).delete() super().save(*args, **kwargs) - self.refresh_from_db( - using=kwargs.get("using", None), - fields=kwargs.get("update_fields", None), - ) + + del self._pythonic_model_fields + # TODO: add unique constraint for TileModel re: sortorder + self.refresh_from_db( + using=kwargs.get("using", None), + fields=kwargs.get("update_fields", None), + ) + + def _map_prefetched_tiles_to_node_ids(self): + tiles_by_node = defaultdict(list) + for tile_to_update in self._prefetched_objects_cache["tilemodel_set"]: + for node_id in tile_to_update.data: + tiles_by_node[node_id].append(tile_to_update) + return tiles_by_node def refresh_from_db(self, using=None, fields=None, from_queryset=None): if not from_queryset and ( @@ -1315,16 +1374,6 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): ) super().refresh_from_db(using, fields, from_queryset) - def __init__(self, *args, **kwargs): - super(ResourceInstance, self).__init__(*args, **kwargs) - if not self.resourceinstanceid: - self.resourceinstanceid = uuid.uuid4() - - class Meta: - managed = True - db_table = "resource_instances" - permissions = (("no_access_to_resourceinstance", "No Access"),) - class ResourceInstanceLifecycle(models.Model): id = models.UUIDField(primary_key=True, serialize=False, default=uuid.uuid4) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 9884873246..5822260ff3 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -17,9 +17,6 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): >>> ResourceInstance.as_model("mymodel", only=["alias1", "alias2"]) - ...although this is a pessimization if you will end up - manipulating other node data besides "my_node_alias". - Example: >>> MyModel = ResourceInstance.as_model("mymodel") @@ -27,7 +24,7 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): >>> result.first().my_node_alias "some tile value" """ - from arches.app.models.models import GraphModel + from arches.app.models.models import GraphModel, TileModel if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") @@ -74,7 +71,13 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): return ( self.filter(graph=source_graph) - .prefetch_related("tilemodel_set") + .prefetch_related( + "graph__node_set", + models.Prefetch( + "tilemodel_set", + queryset=TileModel.objects.order_by("sortorder"), + ), + ) .annotate( **node_alias_annotations, ) From 04a7638b12a83cc9decc3ea36d588d2490083133 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 10 Oct 2024 12:25:07 -0400 Subject: [PATCH 006/115] Orient around nodegroups to help with jagged data, blank tiles --- arches/app/models/models.py | 139 +++++++++++++++++++++++---------- arches/app/models/querysets.py | 1 + 2 files changed, 97 insertions(+), 43 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 2afd0a96e5..d07d784bca 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1294,7 +1294,7 @@ def save(self, *args, **kwargs): add_to_update_fields(kwargs, "graph_publication") if getattr(self, "_pythonic_model_fields", False): - self.save_tiles_for_pythonic_model(*args, **kwargs) + self._save_tiles_for_pythonic_model(*args, **kwargs) else: super().save(*args, **kwargs) @@ -1302,43 +1302,26 @@ def clean(self): if getattr(self, "_pythonic_model_fields", False): pass # run datatype validation on tile data - def save_tiles_for_pythonic_model(self, *args, **kwargs): - from arches.app.models.tile import Tile + def _save_tiles_for_pythonic_model(self, *args, **kwargs): + tiles_by_nodegroup_id = self._map_prefetched_tiles_to_nodegroup_ids() + for_insert = set() + for_update = set() + for_delete = set() - tiles_by_node_id = self._map_prefetched_tiles_to_node_ids() - for_insert = [] - for_update = [] - for_delete = [] - - for node in self.graph.node_set.all(): - node_id_str = str(node.pk) - if attribute_name := self._pythonic_model_fields.get(node_id_str, ""): - db_tiles = tiles_by_node_id[node_id_str] - new_val = getattr(self, attribute_name) - # TODO: handle x-list, currently assuming list ~ cardinality N. - if not isinstance(new_val, list): - new_val = (new_val,) - for i, inner_val in enumerate(new_val): - try: - tile = db_tiles[i] - except IndexError: - # parenttile? use Tile.get_blank_tile() instead? - # Does unnecessary node queries--fix. - tile = Tile.get_blank_tile_from_nodegroup_id( - node.nodegroup_id, resourceid=self.pk - ) - if db_tiles: - # TODO: small risk of race condition--fix. - tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 - tiles_by_node_id[node_id_str].append(tile) - for_insert.append(tile) - else: - for_update.append(tile) - # skipping validation... - # skipping transform_value_for_tile - tile.data[node_id_str] = inner_val - - for_delete.extend(db_tiles[: len(new_val)]) + nodegroups = ( + NodeGroup.objects.filter(node__graph=self.graph) + .distinct() + .prefetch_related("node_set") + ) + for nodegroup in nodegroups: + db_tiles = tiles_by_nodegroup_id[nodegroup.pk] + self._update_tiles_from_pythonic_model_values( + nodegroup, + db_tiles, + for_insert=for_insert, + for_update=for_update, + for_delete=for_delete, + ) with transaction.atomic(): # TODO: indexing, editlog, etc. (use/adapt proxy model methods?) @@ -1358,12 +1341,65 @@ def save_tiles_for_pythonic_model(self, *args, **kwargs): fields=kwargs.get("update_fields", None), ) - def _map_prefetched_tiles_to_node_ids(self): - tiles_by_node = defaultdict(list) - for tile_to_update in self._prefetched_objects_cache["tilemodel_set"]: - for node_id in tile_to_update.data: - tiles_by_node[node_id].append(tile_to_update) - return tiles_by_node + def _map_prefetched_tiles_to_nodegroup_ids(self): + tiles_by_nodegroup = defaultdict(list) + for tile_to_update in self.sorted_tiles: + tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) + return tiles_by_nodegroup + + def _update_tiles_from_pythonic_model_values( + self, nodegroup, db_tiles, for_insert, for_update, for_delete + ): + working_tiles = [] # self.get_working_tiles + max_tile_length = 0 + for attribute_name in self._pythonic_model_fields.values(): + new_val = getattr(self, attribute_name) + # TODO: handle x-list, currently assuming list ~ cardinality N. + if not isinstance(new_val, list): + new_val = [new_val] + max_tile_length = max(max_tile_length, len(new_val)) + + for i in range(max(max_tile_length, len(db_tiles))): + try: + tile = db_tiles[i] + except IndexError: + tile = TileModel.get_blank_tile_from_nodegroup( + nodegroup, + resourceid=self.pk, + # parenttile? + ) + if db_tiles: + # TODO: small risk of race condition--fix. + tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + for_insert.add(tile) + else: + for_update.add(tile) + working_tiles.append(tile) + + for node in nodegroup.node_set.all(): + node_id_str = str(node.pk) + if not (attribute_name := self._pythonic_model_fields.get(node_id_str, "")): + continue + + new_val = getattr(self, attribute_name) + if nodegroup.cardinality == "1": + new_val = [new_val] + + for tile, inner_val in zip(working_tiles, new_val, strict=False): + # skipping validation... + # skipping transform_value_for_tile + tile.data[node_id_str] = inner_val + for extra_tile in working_tiles[len(new_val) :]: + extra_tile.data[node_id_str] = None + + for tile in working_tiles: + # TODO: preserve if child tiles? + if not any(tile.data.values()): + if tile._state.adding: + for_insert.remove(tile) + else: + for_update.remove(tile) + for_delete.add(tile) def refresh_from_db(self, using=None, fields=None, from_queryset=None): if not from_queryset and ( @@ -1668,6 +1704,23 @@ def serialize(self, fields=None, exclude=["nodegroup"], **kwargs): self, fields=fields, exclude=exclude, **kwargs ) + @staticmethod + def get_blank_tile_from_nodegroup( + nodegroup: NodeGroup, resourceid=None, parenttile=None + ): + tile = TileModel( + nodegroup_id=nodegroup.pk, + resourceinstance_id=resourceid, + parenttile=parenttile, + data={}, + ) + + for node in nodegroup.node_set.all(): + tile.data[str(node.nodeid)] = None + + tile.full_clean() + return tile + class Value(models.Model): valueid = models.UUIDField(primary_key=True) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 5822260ff3..2bec117534 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -76,6 +76,7 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): models.Prefetch( "tilemodel_set", queryset=TileModel.objects.order_by("sortorder"), + to_attr="sorted_tiles", ), ) .annotate( From eaa9c2321a1dfadf442eaaf9d18607fa0f7b4995 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 10 Oct 2024 13:23:22 -0400 Subject: [PATCH 007/115] Fix tile sortorder calculation --- arches/app/models/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index d07d784bca..be155f7474 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1350,7 +1350,7 @@ def _map_prefetched_tiles_to_nodegroup_ids(self): def _update_tiles_from_pythonic_model_values( self, nodegroup, db_tiles, for_insert, for_update, for_delete ): - working_tiles = [] # self.get_working_tiles + working_tiles = [] max_tile_length = 0 for attribute_name in self._pythonic_model_fields.values(): new_val = getattr(self, attribute_name) @@ -1369,8 +1369,7 @@ def _update_tiles_from_pythonic_model_values( # parenttile? ) if db_tiles: - # TODO: small risk of race condition--fix. - tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + tile.sortorder = max(t.sortorder or 0 for t in working_tiles) + 1 for_insert.add(tile) else: for_update.add(tile) From 462fa9303da67c8c54b8b9030318242be976c311 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 10 Oct 2024 13:36:53 -0400 Subject: [PATCH 008/115] Fix refreshing --- arches/app/models/models.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index be155f7474..61f051da44 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1,4 +1,5 @@ import sys +import itertools import json import uuid import datetime @@ -1334,7 +1335,6 @@ def _save_tiles_for_pythonic_model(self, *args, **kwargs): super().save(*args, **kwargs) - del self._pythonic_model_fields # TODO: add unique constraint for TileModel re: sortorder self.refresh_from_db( using=kwargs.get("using", None), @@ -1407,7 +1407,15 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): from_queryset = self.__class__.as_model( self.graph.slug, only=field_map.values() ) - super().refresh_from_db(using, fields, from_queryset) + super().refresh_from_db(using, fields, from_queryset) + # Copy over annotations. + refreshed_resource = from_queryset[0] + for field in itertools.chain( + field_map.values(), ("_pythonic_model_fields", "sorted_tiles") + ): + setattr(self, field, getattr(refreshed_resource, field)) + else: + super().refresh_from_db(using, fields, from_queryset) class ResourceInstanceLifecycle(models.Model): From 1416a2e9eca9e8b8fbebd904b0f3b2cfd3adddf8 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 10 Oct 2024 15:45:28 -0400 Subject: [PATCH 009/115] Add datatype validation --- arches/app/models/models.py | 181 ++++++++++++++++++++++----------- arches/app/models/querysets.py | 2 + 2 files changed, 121 insertions(+), 62 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 61f051da44..39db7cac7a 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1300,38 +1300,29 @@ def save(self, *args, **kwargs): super().save(*args, **kwargs) def clean(self): + """Raises a compound ValidationError with any failing tile values.""" if getattr(self, "_pythonic_model_fields", False): - pass # run datatype validation on tile data + self._update_tiles_from_pythonic_model_values(run_triggers=False) - def _save_tiles_for_pythonic_model(self, *args, **kwargs): - tiles_by_nodegroup_id = self._map_prefetched_tiles_to_nodegroup_ids() - for_insert = set() - for_update = set() - for_delete = set() + def _save_tiles_for_pythonic_model(self, index=False, **kwargs): + """Raises a compound ValidationError with any failing tile values. - nodegroups = ( - NodeGroup.objects.filter(node__graph=self.graph) - .distinct() - .prefetch_related("node_set") + (It's not exactly idiomatic for a Django project to clean() + values during a save(), but the "pythonic models" interface + is basically a form/serializer, so that's why we're validating.) + """ + to_insert, to_update, to_delete = ( + self._update_tiles_from_pythonic_model_values() ) - for nodegroup in nodegroups: - db_tiles = tiles_by_nodegroup_id[nodegroup.pk] - self._update_tiles_from_pythonic_model_values( - nodegroup, - db_tiles, - for_insert=for_insert, - for_update=for_update, - for_delete=for_delete, - ) with transaction.atomic(): # TODO: indexing, editlog, etc. (use/adapt proxy model methods?) - if for_insert: - TileModel.objects.bulk_create(for_insert) - if for_update: - TileModel.objects.bulk_update(for_update, {"data"}) - if for_delete: - TileModel.objects.filter(pk__in=[t.pk for t in for_delete]).delete() + if to_insert: + TileModel.objects.bulk_create(to_insert) + if to_update: + TileModel.objects.bulk_update(to_update, {"data"}) + if to_delete: + TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() super().save(*args, **kwargs) @@ -1347,59 +1338,125 @@ def _map_prefetched_tiles_to_nodegroup_ids(self): tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) return tiles_by_nodegroup - def _update_tiles_from_pythonic_model_values( - self, nodegroup, db_tiles, for_insert, for_update, for_delete - ): - working_tiles = [] - max_tile_length = 0 - for attribute_name in self._pythonic_model_fields.values(): - new_val = getattr(self, attribute_name) - # TODO: handle x-list, currently assuming list ~ cardinality N. - if not isinstance(new_val, list): - new_val = [new_val] - max_tile_length = max(max_tile_length, len(new_val)) + def _update_tiles_from_pythonic_model_values(self): + """Move values from model instance to prefetched tiles, and validate. + Raises ValidationError if new data fails datatype validation (and + thus may leave prefetched tiles in a partially consistent state.) + """ + db_tiles_by_nodegroup_id = self._map_prefetched_tiles_to_nodegroup_ids() + errors_by_node_alias = defaultdict(list) + to_insert = set() + to_update = set() + to_delete = set() - for i in range(max(max_tile_length, len(db_tiles))): - try: - tile = db_tiles[i] - except IndexError: - tile = TileModel.get_blank_tile_from_nodegroup( - nodegroup, - resourceid=self.pk, - # parenttile? - ) - if db_tiles: - tile.sortorder = max(t.sortorder or 0 for t in working_tiles) + 1 - for_insert.add(tile) - else: - for_update.add(tile) - working_tiles.append(tile) + nodegroups = ( + NodeGroup.objects.filter(node__graph=self.graph) + .distinct() + .prefetch_related("node_set") + ) + for nodegroup in nodegroups: + node_aliases = [n.alias for n in nodegroup.node_set.all()] + db_tiles = db_tiles_by_nodegroup_id[nodegroup.pk] + working_tiles = [] + max_tile_length = 0 + for attribute_name in self._pythonic_model_fields.values(): + if attribute_name not in node_aliases: + continue + new_val = getattr(self, attribute_name) + if nodegroup.cardinality == "1": + new_val = [new_val] + max_tile_length = max(max_tile_length, len(new_val)) + + for i in range(max(max_tile_length, len(db_tiles))): + try: + tile = db_tiles[i] + except IndexError: + tile = TileModel.get_blank_tile_from_nodegroup( + nodegroup, + resourceid=self.pk, + # parenttile? + ) + if db_tiles: + tile.sortorder = ( + max(t.sortorder or 0 for t in working_tiles) + 1 + ) + to_insert.add(tile) + else: + to_update.add(tile) + working_tiles.append(tile) + + self._update_tile_values(nodegroup, working_tiles, errors_by_node_alias) + + for tile in working_tiles: + # TODO: preserve if child tiles? + if not any(tile.data.values()): + if tile._state.adding: + to_insert.remove(tile) + else: + to_update.remove(tile) + to_delete.add(tile) + + if errors_by_node_alias: + raise ValidationError( + # TODO: Django/DRF minds if this is not an actual field? + { + alias: ValidationError("\n".join(e["message"] for e in errors)) + for alias, errors in errors_by_node_alias.items() + } + ) + + return to_insert, to_update, to_delete + def _update_tile_values(self, nodegroup, working_tiles, errors_by_node_alias): + from arches.app.datatypes.datatypes import DataTypeFactory + + datatype_factory = DataTypeFactory() for node in nodegroup.node_set.all(): node_id_str = str(node.pk) if not (attribute_name := self._pythonic_model_fields.get(node_id_str, "")): continue + datatype_instance = datatype_factory.get_instance(node.datatype) new_val = getattr(self, attribute_name) if nodegroup.cardinality == "1": new_val = [new_val] for tile, inner_val in zip(working_tiles, new_val, strict=False): - # skipping validation... - # skipping transform_value_for_tile + # TODO: move this all somewhere else + # 1. transform_value_for_tile() + # 2. clean() TODO: swap order with 3? + # 3. pre_tile_save() + # 4. validate() + + transformed = inner_val + if inner_val is not None: + # TODO: do all datatypes treat None the same way? + try: + transformed = datatype_instance.transform_value_for_tile( + inner_val, **node.config + ) + except ValueError: + pass # BooleanDataType + except: # TODO: fix and remove + pass + + datatype_instance.clean(tile, node_id_str) + + # Does pre_tile_save call transform_value_for_tile and therefore raise? + # https://github.com/archesproject/arches/issues/10851 + # try: + datatype_instance.pre_tile_save(tile, node_id_str) + + if errors := datatype_instance.validate(transformed, node=node): + errors_by_node_alias[node.alias].extend(errors) + + # TODO: call tile lifecycle triggers + # update data... tile.data[node_id_str] = inner_val + for extra_tile in working_tiles[len(new_val) :]: extra_tile.data[node_id_str] = None - for tile in working_tiles: - # TODO: preserve if child tiles? - if not any(tile.data.values()): - if tile._state.adding: - for_insert.remove(tile) - else: - for_update.remove(tile) - for_delete.add(tile) - def refresh_from_db(self, using=None, fields=None, from_queryset=None): if not from_queryset and ( field_map := getattr(self, "_pythonic_model_fields", []) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 2bec117534..979891f7d2 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -23,6 +23,8 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): >>> result = MyModel.filter(my_node_alias="some tile value") >>> result.first().my_node_alias "some tile value" + + Provisional edits are completely ignored. """ from arches.app.models.models import GraphModel, TileModel From 877814c5c40d478cda2003439e0c4a129807e122 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 10 Oct 2024 17:20:29 -0400 Subject: [PATCH 010/115] Stub out function triggers --- arches/app/models/models.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 39db7cac7a..3cdb428ae3 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1302,7 +1302,7 @@ def save(self, *args, **kwargs): def clean(self): """Raises a compound ValidationError with any failing tile values.""" if getattr(self, "_pythonic_model_fields", False): - self._update_tiles_from_pythonic_model_values(run_triggers=False) + self._update_tiles_from_pythonic_model_values() def _save_tiles_for_pythonic_model(self, index=False, **kwargs): """Raises a compound ValidationError with any failing tile values. @@ -1311,12 +1311,30 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): values during a save(), but the "pythonic models" interface is basically a form/serializer, so that's why we're validating.) """ + from arches.app.models.tile import Tile + to_insert, to_update, to_delete = ( self._update_tiles_from_pythonic_model_values() ) + run_functions = kwargs.get("run_functions", False) + if run_functions: + # Instantiate proxy models (for now). + upsert_proxies = [ + Tile.objects.get(pk=tile.pk) for tile in to_insert + to_update + ] + delete_proxies = [Tile.objects.get(pk=tile.pk) for tile in to_delete] + with transaction.atomic(): - # TODO: indexing, editlog, etc. (use/adapt proxy model methods?) + if run_functions: + for proxy_instance in upsert_proxies: + proxy_instance.__preSave() + for proxy_instance in delete_proxies: + proxy_instance.__preDelete() + + # TODO: more side effects, e.g. indexing, editlog + # (use/adapt proxy model methods?) + # datatype_post_save_actions? if to_insert: TileModel.objects.bulk_create(to_insert) if to_update: @@ -1325,6 +1343,10 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() super().save(*args, **kwargs) + if run_functions: + for proxy_instance in upsert_proxies: + proxy_instance.refresh_from_db() + proxy_instance.__postSave() # TODO: add unique constraint for TileModel re: sortorder self.refresh_from_db( @@ -1385,7 +1407,9 @@ def _update_tiles_from_pythonic_model_values(self): to_update.add(tile) working_tiles.append(tile) - self._update_tile_values(nodegroup, working_tiles, errors_by_node_alias) + self._validate_and_patch_from_pythonic_model_values( + nodegroup, working_tiles, errors_by_node_alias + ) for tile in working_tiles: # TODO: preserve if child tiles? @@ -1407,7 +1431,9 @@ def _update_tiles_from_pythonic_model_values(self): return to_insert, to_update, to_delete - def _update_tile_values(self, nodegroup, working_tiles, errors_by_node_alias): + def _validate_and_patch_from_pythonic_model_values( + self, nodegroup, working_tiles, errors_by_node_alias + ): from arches.app.datatypes.datatypes import DataTypeFactory datatype_factory = DataTypeFactory() @@ -1450,8 +1476,7 @@ def _update_tile_values(self, nodegroup, working_tiles, errors_by_node_alias): if errors := datatype_instance.validate(transformed, node=node): errors_by_node_alias[node.alias].extend(errors) - # TODO: call tile lifecycle triggers - # update data... + # Patch the validated data into the working tiles. tile.data[node_id_str] = inner_val for extra_tile in working_tiles[len(new_val) :]: From 341b6495491e07c6339e8f3f84dec5370aee5d02 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 11 Oct 2024 08:28:46 -0400 Subject: [PATCH 011/115] Move ORM lookup to datatype --- arches/app/datatypes/base.py | 16 +++++++++++++--- arches/app/models/querysets.py | 12 +++++++++--- arches/app/models/utils.py | 4 ++-- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index bdc87d96cd..7c2af581f7 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -1,9 +1,13 @@ -import json, urllib +import json +import logging +import urllib + +from django.db.models import F from django.urls import reverse +from django.utils.translation import gettext as _ + from arches.app.models import models -from arches.app.models.system_settings import settings from arches.app.search.elasticsearch_dsl_builder import Dsl, Bool, Terms, Exists, Nested -from django.utils.translation import gettext as _ import logging logger = logging.getLogger(__name__) @@ -532,3 +536,9 @@ def validate_node(self, node): a GraphValidationError """ pass + + def get_orm_lookup(self, node, from_resource=True): + lookup = f"data__{node.pk}" + if from_resource: + lookup = "tilemodel__" + lookup + return F(lookup) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 979891f7d2..ea23af47d3 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,6 +1,8 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.db import models +from arches.app.models.utils import field_names + class PythonicModelQuerySet(models.QuerySet): def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): @@ -26,6 +28,7 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): Provisional edits are completely ignored. """ + from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.models import GraphModel, TileModel if defer and only and (overlap := set(defer).intersection(set(only))): @@ -44,6 +47,8 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): e.add_note(f"No graph found with slug: {graph_slug}") raise + invalid_names = field_names(self.model) + datatype_factory = DataTypeFactory() node_alias_annotations = {} node_aliases_by_node_id = {} for node in source_graph.node_set.all(): @@ -53,9 +58,10 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): continue if (defer and node.alias in defer) or (only and node.alias not in only): continue - # TODO: unwrap with datatype-aware transforms - # TODO: don't worry about name collisions for now, e.g. "name" - tile_lookup = models.F(f"tilemodel__data__{node.pk}") + if node in invalid_names: + raise ValueError(f'"{node.alias}" clashes with a model field name.') + datatype_instance = datatype_factory.get_instance(node.datatype) + tile_lookup = datatype_instance.get_orm_lookup(node) if node.nodegroup.cardinality == "n": # TODO: May produce duplicates until we add unique constraint diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index ccfe06daff..546d278e65 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -14,5 +14,5 @@ def add_to_update_fields(kwargs, field_name): kwargs["update_fields"] = new -def field_names(instance): - return {f.name for f in instance._meta.fields} +def field_names(instance_or_class): + return {f.name for f in instance_or_class._meta.fields} From de72e263ab8464e27e26deeda7c8b64592d223e6 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 11 Oct 2024 16:11:54 -0400 Subject: [PATCH 012/115] Unwrap resource instance datatypes to string id --- arches/app/datatypes/base.py | 26 +++++++++++++++++++++++--- arches/app/datatypes/datatypes.py | 3 +++ arches/app/models/querysets.py | 13 ++----------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 7c2af581f7..53119148b8 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -2,7 +2,8 @@ import logging import urllib -from django.db.models import F +from django.contrib.postgres.expressions import ArraySubquery +from django.db.models import F, OuterRef from django.urls import reverse from django.utils.translation import gettext as _ @@ -538,7 +539,26 @@ def validate_node(self, node): pass def get_orm_lookup(self, node, from_resource=True): - lookup = f"data__{node.pk}" + base_lookup = self._get_base_orm_lookup(node) + + if node.nodegroup.cardinality == "n": + # TODO: May produce duplicates until we add unique constraint + # on TileModel.resourceinstance_id, nodegroup_id, sortorder + if from_resource: + tile_query = models.TileModel.objects.filter( + nodegroup_id=node.nodegroup.pk, resourceinstance_id=OuterRef("pk") + ) + else: + tile_query = models.TileModel.objects.filter( + nodegroup_id=node.nodegroup.pk + ) + return ArraySubquery(tile_query.order_by("sortorder").values(base_lookup)) + if from_resource: - lookup = "tilemodel__" + lookup + lookup = "tilemodel__" + base_lookup + else: + lookup = base_lookup return F(lookup) + + def _get_base_orm_lookup(self, node): + return f"data__{node.pk}" diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 87dc366c03..32fd2f396e 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2368,6 +2368,9 @@ def default_es_mapping(self): } return mapping + def _get_base_orm_lookup(self, node): + return f"data__{node.pk}__0__resourceId" + class ResourceInstanceListDataType(ResourceInstanceDataType): def to_json(self, tile, node): diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index ea23af47d3..11c78fac27 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,4 +1,3 @@ -from django.contrib.postgres.aggregates import ArrayAgg from django.db import models from arches.app.models.utils import field_names @@ -58,19 +57,11 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): continue if (defer and node.alias in defer) or (only and node.alias not in only): continue - if node in invalid_names: + if node.alias in invalid_names: raise ValueError(f'"{node.alias}" clashes with a model field name.') + datatype_instance = datatype_factory.get_instance(node.datatype) tile_lookup = datatype_instance.get_orm_lookup(node) - - if node.nodegroup.cardinality == "n": - # TODO: May produce duplicates until we add unique constraint - # on TileModel.resourceinstance_id, nodegroup_id, sortorder - tile_lookup = ArrayAgg( - tile_lookup, - filter=models.Q(tilemodel__nodegroup_id=node.nodegroup.pk), - ordering="tilemodel__sortorder", - ) node_alias_annotations[node.alias] = tile_lookup node_aliases_by_node_id[str(node.pk)] = node.alias From cbe575113ab4eda4593684dd5936e93bd2a9e925 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 11 Oct 2024 17:09:01 -0400 Subject: [PATCH 013/115] Straighten out cardinality N interaction with list datatypes --- arches/app/datatypes/base.py | 32 +++++++++++++++++++------- arches/app/datatypes/datatypes.py | 14 ++++++++++- arches/app/models/query_expressions.py | 7 ++++++ 3 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 arches/app/models/query_expressions.py diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 53119148b8..e7be321420 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -4,10 +4,12 @@ from django.contrib.postgres.expressions import ArraySubquery from django.db.models import F, OuterRef +from django.db.models.expressions import BaseExpression from django.urls import reverse from django.utils.translation import gettext as _ from arches.app.models import models +from arches.app.models.query_expressions import JsonbArrayElements from arches.app.search.elasticsearch_dsl_builder import Dsl, Bool, Terms, Exists, Nested import logging @@ -538,27 +540,41 @@ def validate_node(self, node): """ pass - def get_orm_lookup(self, node, from_resource=True): + def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: base_lookup = self._get_base_orm_lookup(node) if node.nodegroup.cardinality == "n": # TODO: May produce duplicates until we add unique constraint # on TileModel.resourceinstance_id, nodegroup_id, sortorder + tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) if from_resource: - tile_query = models.TileModel.objects.filter( - nodegroup_id=node.nodegroup.pk, resourceinstance_id=OuterRef("pk") + tile_query = tile_query.filter( + resourceinstance_id=OuterRef("resourceinstanceid") ) - else: - tile_query = models.TileModel.objects.filter( - nodegroup_id=node.nodegroup.pk + tile_query = tile_query.order_by("sortorder") + if self.collects_multiple_values(): + array_transform = self._get_orm_array_transform(base_lookup) + tile_query = tile_query.annotate( + array_transform=array_transform + ).values( + "array_transform" # TODO: name clash or OK? ) - return ArraySubquery(tile_query.order_by("sortorder").values(base_lookup)) + else: + tile_query = tile_query.values(base_lookup) + return ArraySubquery(tile_query) if from_resource: lookup = "tilemodel__" + base_lookup else: lookup = base_lookup - return F(lookup) + + if self.collects_multiple_values(): + return self._get_orm_array_transform(lookup) + else: + return F(lookup) def _get_base_orm_lookup(self, node): return f"data__{node.pk}" + + def _get_orm_array_transform(self, lookup): + return JsonbArrayElements(F(lookup)) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 32fd2f396e..0eeccd12c9 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -16,7 +16,8 @@ from mimetypes import MimeTypes from django.core.files.images import get_image_dimensions -from django.db.models import fields +from django.db.models import fields, Value +from django.db.models.expressions import CombinedExpression from arches.app.const import ExtensionType from arches.app.datatypes.base import BaseDataType @@ -2410,6 +2411,17 @@ def to_json(self, tile, node): def collects_multiple_values(self): return True + def _get_base_orm_lookup(self, node): + return f"data__{node.pk}" # TODO: UUIDField? + + def _get_orm_array_transform(self, lookup): + return CombinedExpression( + super()._get_orm_array_transform(lookup), + "->>", + Value("resourceId"), + output_field=fields.UUIDField(), + ) + class NodeValueDataType(BaseDataType): def validate( diff --git a/arches/app/models/query_expressions.py b/arches/app/models/query_expressions.py new file mode 100644 index 0000000000..633f9846ed --- /dev/null +++ b/arches/app/models/query_expressions.py @@ -0,0 +1,7 @@ +from django.db.models.expressions import Func + + +class JsonbArrayElements(Func): + arity = 1 + contains_subquery = True # TODO(Django 5.2) change -> set_returning = True + function = "JSONB_ARRAY_ELEMENTS" From 39b2d25fcd4b49298686b79d0b947481325f6139 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Sat, 12 Oct 2024 10:38:57 -0400 Subject: [PATCH 014/115] Check for invalid defer/only values --- arches/app/models/querysets.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 11c78fac27..1507a25cc4 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -67,6 +67,9 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): if not node_alias_annotations: raise ValueError("All fields were excluded.") + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') return ( self.filter(graph=source_graph) From 7e3fe067aa65946217fc1ca81ad2bdb7c8b957fb Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Sat, 12 Oct 2024 12:14:26 -0400 Subject: [PATCH 015/115] Handle concept-list datatype --- arches/app/datatypes/base.py | 27 ++++++++++++++++----------- arches/app/datatypes/datatypes.py | 3 ++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index e7be321420..3635948c97 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -9,7 +9,6 @@ from django.utils.translation import gettext as _ from arches.app.models import models -from arches.app.models.query_expressions import JsonbArrayElements from arches.app.search.elasticsearch_dsl_builder import Dsl, Bool, Terms, Exists, Nested import logging @@ -553,12 +552,15 @@ def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: ) tile_query = tile_query.order_by("sortorder") if self.collects_multiple_values(): - array_transform = self._get_orm_array_transform(base_lookup) - tile_query = tile_query.annotate( - array_transform=array_transform - ).values( - "array_transform" # TODO: name clash or OK? - ) + try: + array_transform = self._get_orm_array_transform(base_lookup) + except NotImplementedError: + tile_query = tile_query.values(base_lookup) + else: + # TODO: name clash or OK? + tile_query = tile_query.annotate( + array_transform=array_transform + ).values("array_transform") else: tile_query = tile_query.values(base_lookup) return ArraySubquery(tile_query) @@ -569,12 +571,15 @@ def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: lookup = base_lookup if self.collects_multiple_values(): - return self._get_orm_array_transform(lookup) - else: - return F(lookup) + try: + return self._get_orm_array_transform(lookup) + except NotImplementedError: + pass + + return F(lookup) def _get_base_orm_lookup(self, node): return f"data__{node.pk}" def _get_orm_array_transform(self, lookup): - return JsonbArrayElements(F(lookup)) + raise NotImplementedError diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 0eeccd12c9..45adfe83e7 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -25,6 +25,7 @@ from arches.app.models.concept import get_preflabel_from_valueid from arches.app.models.system_settings import settings from arches.app.models.fields.i18n import I18n_JSONField, I18n_String +from arches.app.models.query_expressions import JsonbArrayElements from arches.app.utils.date_utils import ExtendedDateFormat from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.permission_backend import user_is_resource_reviewer @@ -2416,7 +2417,7 @@ def _get_base_orm_lookup(self, node): def _get_orm_array_transform(self, lookup): return CombinedExpression( - super()._get_orm_array_transform(lookup), + JsonbArrayElements(super()._get_orm_array_transform(lookup)), "->>", Value("resourceId"), output_field=fields.UUIDField(), From bc9040a953e03b6d22b378a5e4fe462abbb58ce1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Sat, 12 Oct 2024 13:48:22 -0400 Subject: [PATCH 016/115] Handle JSON null in resource instance list dt transform --- arches/app/datatypes/datatypes.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 45adfe83e7..a93008be41 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -16,8 +16,8 @@ from mimetypes import MimeTypes from django.core.files.images import get_image_dimensions -from django.db.models import fields, Value -from django.db.models.expressions import CombinedExpression +from django.db.models import CharField, F, JSONField, Q, Value +from django.db.models.expressions import Case, CombinedExpression, When from arches.app.const import ExtensionType from arches.app.datatypes.base import BaseDataType @@ -2417,10 +2417,16 @@ def _get_base_orm_lookup(self, node): def _get_orm_array_transform(self, lookup): return CombinedExpression( - JsonbArrayElements(super()._get_orm_array_transform(lookup)), + JsonbArrayElements( + Case( + When(~Q(**{lookup: None}), then=F(lookup)), + default=Value('[{"resourceId": null}]'), + output_field=JSONField(), + ), + ), "->>", Value("resourceId"), - output_field=fields.UUIDField(), + output_field=CharField(), # TODO: UUIDField? ) From ccc1f6924f08f7d19ccf038bb6d57f98e41f96a1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Sat, 12 Oct 2024 14:27:18 -0400 Subject: [PATCH 017/115] Factor out _get_orm_lookup_cardinality_n() --- arches/app/datatypes/base.py | 43 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 3635948c97..146d23f537 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -543,27 +543,7 @@ def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: base_lookup = self._get_base_orm_lookup(node) if node.nodegroup.cardinality == "n": - # TODO: May produce duplicates until we add unique constraint - # on TileModel.resourceinstance_id, nodegroup_id, sortorder - tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) - if from_resource: - tile_query = tile_query.filter( - resourceinstance_id=OuterRef("resourceinstanceid") - ) - tile_query = tile_query.order_by("sortorder") - if self.collects_multiple_values(): - try: - array_transform = self._get_orm_array_transform(base_lookup) - except NotImplementedError: - tile_query = tile_query.values(base_lookup) - else: - # TODO: name clash or OK? - tile_query = tile_query.annotate( - array_transform=array_transform - ).values("array_transform") - else: - tile_query = tile_query.values(base_lookup) - return ArraySubquery(tile_query) + return self._get_orm_lookup_cardinality_n(node, base_lookup, from_resource) if from_resource: lookup = "tilemodel__" + base_lookup @@ -578,6 +558,27 @@ def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: return F(lookup) + def _get_orm_lookup_cardinality_n(self, node, base_lookup, from_resource=True): + # TODO: May produce duplicates until we add unique constraint + # on TileModel.resourceinstance_id, nodegroup_id, sortorder + tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) + if from_resource: + tile_query = tile_query.filter( + resourceinstance_id=OuterRef("resourceinstanceid") + ) + tile_query = tile_query.order_by("sortorder") + if self.collects_multiple_values(): + try: + as_array = self._get_orm_array_transform(base_lookup) + except NotImplementedError: + tile_query = tile_query.values(base_lookup) + else: + # TODO: name clash or OK? + tile_query = tile_query.annotate(as_array=as_array).values("as_array") + else: + tile_query = tile_query.values(base_lookup) + return ArraySubquery(tile_query) + def _get_base_orm_lookup(self, node): return f"data__{node.pk}" From 878ec53e2ab8417a99420184134b3bf82b4d3fa2 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 11:41:46 -0700 Subject: [PATCH 018/115] Remove run_functions switch, fix set union, respect name mangling --- arches/app/models/models.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3cdb428ae3..8bfadf158c 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1317,24 +1317,21 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): self._update_tiles_from_pythonic_model_values() ) - run_functions = kwargs.get("run_functions", False) - if run_functions: - # Instantiate proxy models (for now). - upsert_proxies = [ - Tile.objects.get(pk=tile.pk) for tile in to_insert + to_update - ] - delete_proxies = [Tile.objects.get(pk=tile.pk) for tile in to_delete] + # Instantiate proxy models for now, but find a way to expose this + # functionality on vanilla models, and in bulk. + upsert_proxies = [ + Tile.objects.get(pk=tile.pk) for tile in to_insert.union(to_update) + ] + delete_proxies = [Tile.objects.get(pk=tile.pk) for tile in to_delete] with transaction.atomic(): - if run_functions: - for proxy_instance in upsert_proxies: - proxy_instance.__preSave() - for proxy_instance in delete_proxies: - proxy_instance.__preDelete() + for proxy_instance in upsert_proxies: + proxy_instance._Tile__preSave() + for proxy_instance in delete_proxies: + proxy_instance._Tile__preDelete() # TODO: more side effects, e.g. indexing, editlog # (use/adapt proxy model methods?) - # datatype_post_save_actions? if to_insert: TileModel.objects.bulk_create(to_insert) if to_update: @@ -1343,10 +1340,10 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() super().save(*args, **kwargs) - if run_functions: - for proxy_instance in upsert_proxies: - proxy_instance.refresh_from_db() - proxy_instance.__postSave() + + for proxy_instance in upsert_proxies: + proxy_instance.refresh_from_db() + proxy_instance._Tile__postSave() # TODO: add unique constraint for TileModel re: sortorder self.refresh_from_db( From 8e45b36020997691757b48762b9808c85edbbbd0 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 12:09:32 -0700 Subject: [PATCH 019/115] Fetch only relevant tiles --- arches/app/models/models.py | 5 +++-- arches/app/models/querysets.py | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 8bfadf158c..17ae21bf9c 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1353,7 +1353,7 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): def _map_prefetched_tiles_to_nodegroup_ids(self): tiles_by_nodegroup = defaultdict(list) - for tile_to_update in self.sorted_tiles: + for tile_to_update in self._sorted_tiles_for_pythonic_model_fields: tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) return tiles_by_nodegroup @@ -1490,7 +1490,8 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): # Copy over annotations. refreshed_resource = from_queryset[0] for field in itertools.chain( - field_map.values(), ("_pythonic_model_fields", "sorted_tiles") + field_map.values(), + ("_pythonic_model_fields", "_sorted_tiles_for_pythonic_model_fields"), ): setattr(self, field, getattr(refreshed_resource, field)) else: diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 1507a25cc4..bc86b37cde 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -28,7 +28,7 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): Provisional edits are completely ignored. """ from arches.app.datatypes.datatypes import DataTypeFactory - from arches.app.models.models import GraphModel, TileModel + from arches.app.models.models import GraphModel, NodeGroup, TileModel if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") @@ -77,8 +77,12 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): "graph__node_set", models.Prefetch( "tilemodel_set", - queryset=TileModel.objects.order_by("sortorder"), - to_attr="sorted_tiles", + queryset=TileModel.objects.filter( + nodegroup_id__in=NodeGroup.objects.filter( + node__alias__in=node_alias_annotations + ) + ).order_by("sortorder"), + to_attr="_sorted_tiles_for_pythonic_model_fields", ), ) .annotate( From d0424a9ee64c1746fa37db9c252a8a1f790bf552 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 12:09:48 -0700 Subject: [PATCH 020/115] Remove typos in ResourceInstanceDataType validation --- arches/app/datatypes/datatypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index a93008be41..fc41dcec71 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2076,14 +2076,14 @@ def validate( raise ObjectDoesNotExist() except ObjectDoesNotExist: message = _( - "The related resource with id '{0}' is not in the system.".format( + "The related resource with id '{0}' is not in the system".format( resourceid ) ) errors.append({"type": "ERROR", "message": message}) except (ValueError, TypeError): message = _( - "The related resource with id '{0}' is not a valid uuid.".format( + "The related resource with id '{0}' is not a valid uuid".format( str(value) ) ) From 4d28f45ae448cc39df485e7c215fcfd2200ba1e9 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 12:49:25 -0700 Subject: [PATCH 021/115] Remove unwanted outer join --- arches/app/datatypes/base.py | 39 +++++++++++------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 146d23f537..a4d1e01ec1 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -3,7 +3,7 @@ import urllib from django.contrib.postgres.expressions import ArraySubquery -from django.db.models import F, OuterRef +from django.db.models import OuterRef, Subquery from django.db.models.expressions import BaseExpression from django.urls import reverse from django.utils.translation import gettext as _ @@ -539,45 +539,30 @@ def validate_node(self, node): """ pass - def get_orm_lookup(self, node, from_resource=True) -> BaseExpression: + def get_orm_lookup(self, node) -> BaseExpression: + """Return a tile subquery expression for use in a ResourceInstance QuerySet.""" base_lookup = self._get_base_orm_lookup(node) + tile_query = models.TileModel.objects.filter( + nodegroup_id=node.nodegroup.pk + ).filter(resourceinstance_id=OuterRef("resourceinstanceid")) if node.nodegroup.cardinality == "n": - return self._get_orm_lookup_cardinality_n(node, base_lookup, from_resource) - - if from_resource: - lookup = "tilemodel__" + base_lookup - else: - lookup = base_lookup + tile_query = tile_query.order_by("sortorder") - if self.collects_multiple_values(): - try: - return self._get_orm_array_transform(lookup) - except NotImplementedError: - pass - - return F(lookup) - - def _get_orm_lookup_cardinality_n(self, node, base_lookup, from_resource=True): - # TODO: May produce duplicates until we add unique constraint - # on TileModel.resourceinstance_id, nodegroup_id, sortorder - tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) - if from_resource: - tile_query = tile_query.filter( - resourceinstance_id=OuterRef("resourceinstanceid") - ) - tile_query = tile_query.order_by("sortorder") if self.collects_multiple_values(): try: as_array = self._get_orm_array_transform(base_lookup) except NotImplementedError: tile_query = tile_query.values(base_lookup) else: - # TODO: name clash or OK? tile_query = tile_query.annotate(as_array=as_array).values("as_array") else: tile_query = tile_query.values(base_lookup) - return ArraySubquery(tile_query) + + if node.nodegroup.cardinality == "n": + return ArraySubquery(tile_query) + else: + return Subquery(tile_query) def _get_base_orm_lookup(self, node): return f"data__{node.pk}" From e4fe62a9466037ff37d02d4535f1fdba67b971a5 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 20:01:43 -0400 Subject: [PATCH 022/115] Return model instances for RI datatypes --- arches/app/datatypes/base.py | 3 ++ arches/app/datatypes/datatypes.py | 24 +++++++-- arches/app/models/models.py | 22 ++++---- arches/app/models/querysets.py | 87 +++++++++++++++++++++++++------ 4 files changed, 108 insertions(+), 28 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index a4d1e01ec1..addb86c3cd 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -569,3 +569,6 @@ def _get_base_orm_lookup(self, node): def _get_orm_array_transform(self, lookup): raise NotImplementedError + + def to_python(self, tile_val): + return tile_val diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index fc41dcec71..a8b7eea3b4 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2233,6 +2233,14 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): + def from_instance(instance): + nonlocal kwargs + return { + "resourceId": str(instance.pk), + "inverseOntology": kwargs.get("inverseOntology"), + "inverseOntologyProperty": kwargs.get("inverseOntologyProperty"), + } + try: return json.loads(value) except ValueError: @@ -2244,7 +2252,12 @@ def transform_value_for_tile(self, value, **kwargs): except TypeError: # data should come in as json but python list is accepted as well if isinstance(value, list): - return value + if all(isinstance(inner, models.ResourceInstance) for inner in value): + return [from_instance(instance) for instance in value] + else: + return value + if isinstance(value, models.ResourceInstance): + return [from_instance(value)] def transform_export_values(self, value, *args, **kwargs): return json.dumps(value) @@ -2373,6 +2386,11 @@ def default_es_mapping(self): def _get_base_orm_lookup(self, node): return f"data__{node.pk}__0__resourceId" + def to_python(self, tile_val): + if tile_val is None: + return tile_val + return models.ResourceInstance.objects.as_resource(tile_val) + class ResourceInstanceListDataType(ResourceInstanceDataType): def to_json(self, tile, node): @@ -2413,7 +2431,7 @@ def collects_multiple_values(self): return True def _get_base_orm_lookup(self, node): - return f"data__{node.pk}" # TODO: UUIDField? + return f"data__{node.pk}" def _get_orm_array_transform(self, lookup): return CombinedExpression( @@ -2426,7 +2444,7 @@ def _get_orm_array_transform(self, lookup): ), "->>", Value("resourceId"), - output_field=CharField(), # TODO: UUIDField? + output_field=CharField(), ) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 17ae21bf9c..fc1f056b93 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1447,9 +1447,9 @@ def _validate_and_patch_from_pythonic_model_values( for tile, inner_val in zip(working_tiles, new_val, strict=False): # TODO: move this all somewhere else # 1. transform_value_for_tile() - # 2. clean() TODO: swap order with 3? - # 3. pre_tile_save() - # 4. validate() + # 2. clean() + # 3. validate() + # 4. pre_tile_save() transformed = inner_val if inner_val is not None: @@ -1463,18 +1463,20 @@ def _validate_and_patch_from_pythonic_model_values( except: # TODO: fix and remove pass - datatype_instance.clean(tile, node_id_str) + # Patch the transformed data into the working tiles. + tile.data[node_id_str] = transformed - # Does pre_tile_save call transform_value_for_tile and therefore raise? - # https://github.com/archesproject/arches/issues/10851 - # try: - datatype_instance.pre_tile_save(tile, node_id_str) + datatype_instance.clean(tile, node_id_str) if errors := datatype_instance.validate(transformed, node=node): errors_by_node_alias[node.alias].extend(errors) - # Patch the validated data into the working tiles. - tile.data[node_id_str] = inner_val + # Does pre_tile_save call transform_value_for_tile and therefore raise? + # https://github.com/archesproject/arches/issues/10851 + # try: + if transformed: + # TODO: determine None handling + datatype_instance.pre_tile_save(tile, node_id_str) for extra_tile in working_tiles[len(new_val) :]: extra_tile.data[node_id_str] = None diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index bc86b37cde..e46a5b8910 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -4,7 +4,9 @@ class PythonicModelQuerySet(models.QuerySet): - def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): + def with_unpacked_tiles( + self, graph_slug=None, *, resource=None, defer=None, only=None + ): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: @@ -32,19 +34,22 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") - try: - source_graph = ( - GraphModel.objects.filter( - slug=graph_slug, - # TODO: Verify that source_identifier=None is really what I want? - source_identifier=None, + if resource: + source_graph = resource.graph + else: + try: + source_graph = ( + GraphModel.objects.filter( + slug=graph_slug, + # TODO: Verify that source_identifier=None is really what I want? + source_identifier=None, + ) + .prefetch_related("node_set") + .get() ) - .prefetch_related("node_set") - .get() - ) - except GraphModel.DoesNotExist as e: - e.add_note(f"No graph found with slug: {graph_slug}") - raise + except GraphModel.DoesNotExist as e: + e.add_note(f"No graph found with slug: {graph_slug}") + raise invalid_names = field_names(self.model) datatype_factory = DataTypeFactory() @@ -71,9 +76,12 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): if given_alias not in node_alias_annotations: raise ValueError(f'"{given_alias}" is not a valid node alias.') + if resource: + qs = self.filter(pk=resource.pk) + else: + qs = self.filter(graph=source_graph) return ( - self.filter(graph=source_graph) - .prefetch_related( + qs.prefetch_related( "graph__node_set", models.Prefetch( "tilemodel_set", @@ -95,3 +103,52 @@ def with_unpacked_tiles(self, graph_slug, *, defer=None, only=None): ) ) ) + + def as_resource(self, resource_id): + from arches.app.models.models import ResourceInstance + + resource = ( + ResourceInstance.objects.filter(pk=resource_id) + .prefetch_related("graph__node_set") + .get() + ) + return self.with_unpacked_tiles(resource=resource).first() + + def _fetch_all(self): + """Call datatype to_python() methods when evaluating queryset.""" + from arches.app.datatypes.datatypes import DataTypeFactory + + super()._fetch_all() + + datatype_factory = DataTypeFactory() + datatypes_by_nodeid = {} + + try: + first_resource = self._result_cache[0] + except IndexError: + return + for node in first_resource.graph.node_set.all(): + datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( + node.datatype + ) + + for resource in self._result_cache: + if not hasattr(resource, "_pythonic_model_fields"): + # On the first fetch, annotations haven't been applied yet. + continue + for nodeid, alias in resource._pythonic_model_fields.items(): + tile_val = getattr(resource, alias) + if tile_val is None: + continue + datatype_instance = datatypes_by_nodeid[nodeid] + if not datatype_instance.collects_multiple_values(): + tile_val = list(tile_val) + python_val = [] + for inner_tile_val in tile_val: + # TODO: add prefetching/lazy for RI-list? + python_val.append(datatype_instance.to_python(inner_tile_val)) + if tile_val != python_val: + if datatype_instance.collects_multiple_values(): + setattr(resource, alias, python_val) + else: + setattr(resource, alias, python_val[0]) From 5f21334f94763e3b30021b80e7eb33e8c54ebddd Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 16 Oct 2024 21:08:40 -0400 Subject: [PATCH 023/115] Make `request` optional in Tile.__preDelete --- arches/app/models/tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/models/tile.py b/arches/app/models/tile.py index 8c61bd8e74..20429e2463 100644 --- a/arches/app/models/tile.py +++ b/arches/app/models/tile.py @@ -808,7 +808,7 @@ def __preSave(self, request=None, context=None): ) logger.warning(e) - def __preDelete(self, request): + def __preDelete(self, request=None): try: for function in self._getFunctionClassInstances(): try: From cfbe6b2e8c11f4872ba3e8bed4d5eae94b8b2481 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 18 Oct 2024 18:40:38 -0400 Subject: [PATCH 024/115] Skip no-op tile updates --- arches/app/models/models.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index fc1f056b93..c61a896822 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1386,6 +1386,8 @@ def _update_tiles_from_pythonic_model_values(self): new_val = [new_val] max_tile_length = max(max_tile_length, len(new_val)) + # TODO: handle saving related objects? + original_tile_data_by_tile_id = {} for i in range(max(max_tile_length, len(db_tiles))): try: tile = db_tiles[i] @@ -1402,6 +1404,7 @@ def _update_tiles_from_pythonic_model_values(self): to_insert.add(tile) else: to_update.add(tile) + original_tile_data_by_tile_id[tile.pk] = {**tile.data} working_tiles.append(tile) self._validate_and_patch_from_pythonic_model_values( @@ -1410,12 +1413,18 @@ def _update_tiles_from_pythonic_model_values(self): for tile in working_tiles: # TODO: preserve if child tiles? + # Remove blank tiles. if not any(tile.data.values()): if tile._state.adding: to_insert.remove(tile) else: to_update.remove(tile) to_delete.add(tile) + # Skip no-op updates. + if ( + original_data := original_tile_data_by_tile_id.pop(tile.pk, None) + ) and original_data == tile.data: + to_update.remove(tile) if errors_by_node_alias: raise ValidationError( From 77638cc87beadc185851ae785af88a89f29b576d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 18 Oct 2024 18:53:22 -0400 Subject: [PATCH 025/115] Improve prefetching --- arches/app/models/querysets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index e46a5b8910..83a740e057 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -44,7 +44,7 @@ def with_unpacked_tiles( # TODO: Verify that source_identifier=None is really what I want? source_identifier=None, ) - .prefetch_related("node_set") + .prefetch_related("node_set__nodegroup") .get() ) except GraphModel.DoesNotExist as e: From 4b4268c59d1593e0d810d4a85258289ac41e6386 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 18 Oct 2024 19:40:36 -0400 Subject: [PATCH 026/115] Implement RI datatype values_match --- arches/app/datatypes/datatypes.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index a8b7eea3b4..4b5e86c6d9 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -1,4 +1,5 @@ import copy +import itertools import uuid import json import decimal @@ -2391,6 +2392,15 @@ def to_python(self, tile_val): return tile_val return models.ResourceInstance.objects.as_resource(tile_val) + def values_match(self, value1, value2): + if not isinstance(value1, list) or not isinstance(value2, list): + return value1 == value2 + copy1 = [{**inner_val} for inner_val in value1] + copy2 = [{**inner_val} for inner_val in value2] + for inner_val in itertools.chain(copy1, copy2): + inner_val.pop("resourceXresourceId", None) + return copy1 == copy2 + class ResourceInstanceListDataType(ResourceInstanceDataType): def to_json(self, tile, node): From 56bd61e26362dd8226aa820509a325559679ace6 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 18 Oct 2024 19:41:12 -0400 Subject: [PATCH 027/115] Implement datatype post save actions --- arches/app/models/models.py | 53 +++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index c61a896822..dc1b93b0ae 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1302,7 +1302,12 @@ def save(self, *args, **kwargs): def clean(self): """Raises a compound ValidationError with any failing tile values.""" if getattr(self, "_pythonic_model_fields", False): - self._update_tiles_from_pythonic_model_values() + nodegroups = ( + NodeGroup.objects.filter(node__graph=self.graph) + .distinct() + .prefetch_related("node_set") + ) + self._update_tiles_from_pythonic_model_values(nodegroups) def _save_tiles_for_pythonic_model(self, index=False, **kwargs): """Raises a compound ValidationError with any failing tile values. @@ -1311,10 +1316,17 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): values during a save(), but the "pythonic models" interface is basically a form/serializer, so that's why we're validating.) """ + from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.tile import Tile - to_insert, to_update, to_delete = ( - self._update_tiles_from_pythonic_model_values() + datatype_factory = DataTypeFactory() + nodegroups = ( + NodeGroup.objects.filter(node__graph=self.graph) + .distinct() + .prefetch_related("node_set") + ) + to_insert, to_update, to_delete = self._update_tiles_from_pythonic_model_values( + nodegroups ) # Instantiate proxy models for now, but find a way to expose this @@ -1345,6 +1357,14 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): proxy_instance.refresh_from_db() proxy_instance._Tile__postSave() + for to_update_tile in to_update: + for nodegroup in nodegroups: + if to_update_tile.nodegroup_id == nodegroup.pk: + for node in nodegroup.node_set.all(): + datatype = datatype_factory.get_instance(node.datatype) + datatype.post_tile_save(to_update_tile, str(node.pk)) + break + # TODO: add unique constraint for TileModel re: sortorder self.refresh_from_db( using=kwargs.get("using", None), @@ -1357,22 +1377,20 @@ def _map_prefetched_tiles_to_nodegroup_ids(self): tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) return tiles_by_nodegroup - def _update_tiles_from_pythonic_model_values(self): + def _update_tiles_from_pythonic_model_values(self, nodegroups): """Move values from model instance to prefetched tiles, and validate. Raises ValidationError if new data fails datatype validation (and thus may leave prefetched tiles in a partially consistent state.) """ + from arches.app.datatypes.datatypes import DataTypeFactory + + datatype_factory = DataTypeFactory() db_tiles_by_nodegroup_id = self._map_prefetched_tiles_to_nodegroup_ids() errors_by_node_alias = defaultdict(list) to_insert = set() to_update = set() to_delete = set() - nodegroups = ( - NodeGroup.objects.filter(node__graph=self.graph) - .distinct() - .prefetch_related("node_set") - ) for nodegroup in nodegroups: node_aliases = [n.alias for n in nodegroup.node_set.all()] db_tiles = db_tiles_by_nodegroup_id[nodegroup.pk] @@ -1382,7 +1400,7 @@ def _update_tiles_from_pythonic_model_values(self): if attribute_name not in node_aliases: continue new_val = getattr(self, attribute_name) - if nodegroup.cardinality == "1": + if nodegroup.cardinality == "1" or new_val is None: new_val = [new_val] max_tile_length = max(max_tile_length, len(new_val)) @@ -1421,10 +1439,17 @@ def _update_tiles_from_pythonic_model_values(self): to_update.remove(tile) to_delete.add(tile) # Skip no-op updates. - if ( - original_data := original_tile_data_by_tile_id.pop(tile.pk, None) - ) and original_data == tile.data: - to_update.remove(tile) + if original_data := original_tile_data_by_tile_id.pop(tile.pk, None): + for node in nodegroup.node_set.all(): + if node.datatype == "semantic": + continue + old = original_data[str(node.nodeid)] + datatype_instance = datatype_factory.get_instance(node.datatype) + new = tile.data[str(node.nodeid)] + if not datatype_instance.values_match(old, new): + break + else: + to_update.remove(tile) if errors_by_node_alias: raise ValidationError( From a50acaf055868ff097903a4bd5d0a56e2c096323 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 11:06:46 -0400 Subject: [PATCH 028/115] Fix fallback value for RI ontology properties --- arches/app/datatypes/datatypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 4b5e86c6d9..6b8c9e2f4f 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2238,8 +2238,8 @@ def from_instance(instance): nonlocal kwargs return { "resourceId": str(instance.pk), - "inverseOntology": kwargs.get("inverseOntology"), - "inverseOntologyProperty": kwargs.get("inverseOntologyProperty"), + "inverseOntology": kwargs.get("inverseOntology", ""), + "inverseOntologyProperty": kwargs.get("inverseOntologyProperty", ""), } try: From fc18c91df71d02ff49a6b3b7fb109094a7c2e45a Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 11:09:21 -0400 Subject: [PATCH 029/115] Make post_tile_save request kwarg optional --- arches/app/datatypes/base.py | 2 +- arches/app/datatypes/datatypes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index addb86c3cd..2b3ee80f3c 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -342,7 +342,7 @@ def get_default_language_value_from_localized_node(self, tile, nodeid): """ return tile.data[str(nodeid)] - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): """ Called after the tile is saved to the database diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 6b8c9e2f4f..c73480d9ac 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -1275,7 +1275,7 @@ def to_json(self, tile, node): if data: return self.compile_json(tile, node, file_details=data[str(node.nodeid)]) - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): if request is not None: # this does not get called when saving data from the mobile app previously_saved_tile = models.TileModel.objects.filter(pk=tile.tileid) @@ -2107,7 +2107,7 @@ def pre_tile_save(self, tile, nodeid): for relationship in relationships: relationship["resourceXresourceId"] = str(uuid.uuid4()) - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): ret = False sql = """ SELECT * FROM __arches_create_resource_x_resource_relationships('%s') as t; From c16f55d9dd46c8004af6095d09d8c3fe56ee3157 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 11:11:10 -0400 Subject: [PATCH 030/115] Implement indexing --- arches/app/models/models.py | 21 +++++++++++++++++---- arches/app/models/resource.py | 5 +++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index dc1b93b0ae..ce9f613827 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1280,7 +1280,7 @@ def get_instance_creator(self) -> int: return creatorid - def save(self, *args, **kwargs): + def save(self, index=False, **kwargs): try: self.graph_publication = self.graph.publication except ResourceInstance.graph.RelatedObjectDoesNotExist: @@ -1295,9 +1295,9 @@ def save(self, *args, **kwargs): add_to_update_fields(kwargs, "graph_publication") if getattr(self, "_pythonic_model_fields", False): - self._save_tiles_for_pythonic_model(*args, **kwargs) + self._save_tiles_for_pythonic_model(index=index, **kwargs) else: - super().save(*args, **kwargs) + super().save(**kwargs) def clean(self): """Raises a compound ValidationError with any failing tile values.""" @@ -1317,6 +1317,7 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): is basically a form/serializer, so that's why we're validating.) """ from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.resource import Resource from arches.app.models.tile import Tile datatype_factory = DataTypeFactory() @@ -1351,7 +1352,7 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): if to_delete: TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() - super().save(*args, **kwargs) + super().save(**kwargs) for proxy_instance in upsert_proxies: proxy_instance.refresh_from_db() @@ -1371,6 +1372,18 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): fields=kwargs.get("update_fields", None), ) + # Instantiate proxy model for now, but refactor & expose this on vanilla model. + if index: + node_datatypes = {} + for node in self.graph.node_set.all(): + node_datatypes[str(node.pk)] = node.datatype + + proxy = Resource.objects.get(pk=self.pk) + # Stick the data we already have onto the proxy instance. + proxy.tiles = self._sorted_tiles_for_pythonic_model_fields + proxy.set_node_datatypes(node_datatypes) + proxy.index(fetchTiles=False) + def _map_prefetched_tiles_to_nodegroup_ids(self): tiles_by_nodegroup = defaultdict(list) for tile_to_update in self._sorted_tiles_for_pythonic_model_fields: diff --git a/arches/app/models/resource.py b/arches/app/models/resource.py index f9ea3aa0f8..c3477a90ff 100644 --- a/arches/app/models/resource.py +++ b/arches/app/models/resource.py @@ -421,9 +421,9 @@ def bulk_save(resources, transaction_id=None): se.bulk_index(documents) se.bulk_index(term_list) - def index(self, context=None): + def index(self, context=None, fetchTiles=True): """ - Indexes all the nessesary items values of a resource to support search + Indexes all the necessary items values of a resource to support search Keyword Arguments: context -- a string such as "copy" to indicate conditions under which a document is indexed @@ -440,6 +440,7 @@ def index(self, context=None): ) } document, terms = self.get_documents_to_index( + fetchTiles=fetchTiles, datatype_factory=datatype_factory, node_datatypes=node_datatypes, context=context, From 2cd897125ad7fd9dcc4b00b2edd2c845aac49906 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 11:57:57 -0400 Subject: [PATCH 031/115] Implement edit log saves --- arches/app/models/models.py | 16 +++++++++++++++- arches/app/models/resource.py | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index ce9f613827..bcfa039aea 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1280,7 +1280,20 @@ def get_instance_creator(self) -> int: return creatorid - def save(self, index=False, **kwargs): + def save_edit(self, user=None): + """Intended to replace proxy model method eventually.""" + from arches.app.models.resource import Resource + + edit_type = "update" + if self._state.adding: + edit_type = "create" + # Until save_edit() is a static method, work around it. + ephemeral_proxy_instance = Resource() + ephemeral_proxy_instance.graphid = self.graph_id + ephemeral_proxy_instance.resourceinstanceid = str(self.pk) + ephemeral_proxy_instance.save_edit(user=user, edit_type=edit_type) + + def save(self, index=False, user=None, **kwargs): try: self.graph_publication = self.graph.publication except ResourceInstance.graph.RelatedObjectDoesNotExist: @@ -1296,6 +1309,7 @@ def save(self, index=False, **kwargs): if getattr(self, "_pythonic_model_fields", False): self._save_tiles_for_pythonic_model(index=index, **kwargs) + self.save_edit(user=user) else: super().save(**kwargs) diff --git a/arches/app/models/resource.py b/arches/app/models/resource.py index c3477a90ff..f8a5fbadf0 100644 --- a/arches/app/models/resource.py +++ b/arches/app/models/resource.py @@ -246,7 +246,7 @@ def save_edit( if transaction_id is not None: edit.transactionid = transaction_id edit.edittype = edit_type - edit.save() + edit.save(force_insert=True) def save(self, *args, **kwargs): """ From d3698b79545dadaeb476642af9b799fc6afd8a17 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 13:29:18 -0400 Subject: [PATCH 032/115] Improve None handling --- arches/app/datatypes/datatypes.py | 2 ++ arches/app/models/models.py | 27 +++++++++++---------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index c73480d9ac..6d075e67b9 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -910,6 +910,8 @@ def transform_value_for_tile(self, value, **kwargs): return str(transformed_value.edtf) def pre_tile_save(self, tile, nodeid): + # TODO: This is likely to be duplicative once we clean this up: + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 tile.data[nodeid] = self.transform_value_for_tile(tile.data[nodeid]) def validate( diff --git a/arches/app/models/models.py b/arches/app/models/models.py index bcfa039aea..3372adaeaa 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1506,23 +1506,16 @@ def _validate_and_patch_from_pythonic_model_values( new_val = [new_val] for tile, inner_val in zip(working_tiles, new_val, strict=False): - # TODO: move this all somewhere else - # 1. transform_value_for_tile() - # 2. clean() - # 3. validate() - # 4. pre_tile_save() - + # TODO: move this to Tile.full_clean()? + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 transformed = inner_val if inner_val is not None: - # TODO: do all datatypes treat None the same way? try: transformed = datatype_instance.transform_value_for_tile( inner_val, **node.config ) - except ValueError: - pass # BooleanDataType - except: # TODO: fix and remove - pass + except ValueError: # BooleanDataType raises. + pass # validate() will handle. # Patch the transformed data into the working tiles. tile.data[node_id_str] = transformed @@ -1532,12 +1525,14 @@ def _validate_and_patch_from_pythonic_model_values( if errors := datatype_instance.validate(transformed, node=node): errors_by_node_alias[node.alias].extend(errors) - # Does pre_tile_save call transform_value_for_tile and therefore raise? - # https://github.com/archesproject/arches/issues/10851 - # try: - if transformed: - # TODO: determine None handling + try: datatype_instance.pre_tile_save(tile, node_id_str) + except TypeError: # GeoJSONDataType raises. + errors_by_node_alias[node.alias].append( + datatype_instance.create_error_message( + tile.data[node_id_str], None, None, None + ) + ) for extra_tile in working_tiles[len(new_val) :]: extra_tile.data[node_id_str] = None From c44f2969d33412c1dde5f794a0d0dce6e78e81f7 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 14:38:48 -0400 Subject: [PATCH 033/115] Add name to ResourceInstance.__repr__() --- arches/app/models/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3372adaeaa..29f05f4cd0 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1241,6 +1241,9 @@ def __init__(self, *args, **kwargs): if not self.resourceinstanceid: self.resourceinstanceid = uuid.uuid4() + def __repr__(self): + return f"<{self.__class__.__qualname__}: {self.name} ({self.pk})>" + @classmethod def as_model(cls, *args, **kwargs): return cls.objects.with_unpacked_tiles(*args, **kwargs) From 8f566de6f0bf656b45bba6858c426a1dddf3a166 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 16:25:59 -0400 Subject: [PATCH 034/115] Harden ConceptListDataType.transform_value_for_tile against lists --- arches/app/datatypes/concept_types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 54ea077edd..3db5c7aa2d 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -431,7 +431,9 @@ def validate( def transform_value_for_tile(self, value, **kwargs): ret = [] - for val in csv.reader([value], delimiter=",", quotechar='"'): + if not isinstance(value, list): + value = [value] + for val in csv.reader(value, delimiter=",", quotechar='"'): lines = [line for line in val] for v in lines: try: From 231f7f96a0463cc97843731c9a916a738890e539 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 21 Oct 2024 19:00:34 -0400 Subject: [PATCH 035/115] Remove orm_array_transform in favor of to_python() --- arches/app/datatypes/base.py | 13 +---------- arches/app/datatypes/datatypes.py | 31 +++++++------------------- arches/app/models/query_expressions.py | 7 ------ 3 files changed, 9 insertions(+), 42 deletions(-) delete mode 100644 arches/app/models/query_expressions.py diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 2b3ee80f3c..2cfcf1fb60 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -549,15 +549,7 @@ def get_orm_lookup(self, node) -> BaseExpression: if node.nodegroup.cardinality == "n": tile_query = tile_query.order_by("sortorder") - if self.collects_multiple_values(): - try: - as_array = self._get_orm_array_transform(base_lookup) - except NotImplementedError: - tile_query = tile_query.values(base_lookup) - else: - tile_query = tile_query.annotate(as_array=as_array).values("as_array") - else: - tile_query = tile_query.values(base_lookup) + tile_query = tile_query.values(base_lookup) if node.nodegroup.cardinality == "n": return ArraySubquery(tile_query) @@ -567,8 +559,5 @@ def get_orm_lookup(self, node) -> BaseExpression: def _get_base_orm_lookup(self, node): return f"data__{node.pk}" - def _get_orm_array_transform(self, lookup): - raise NotImplementedError - def to_python(self, tile_val): return tile_val diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 6d075e67b9..9a93255663 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -17,8 +17,6 @@ from mimetypes import MimeTypes from django.core.files.images import get_image_dimensions -from django.db.models import CharField, F, JSONField, Q, Value -from django.db.models.expressions import Case, CombinedExpression, When from arches.app.const import ExtensionType from arches.app.datatypes.base import BaseDataType @@ -26,7 +24,6 @@ from arches.app.models.concept import get_preflabel_from_valueid from arches.app.models.system_settings import settings from arches.app.models.fields.i18n import I18n_JSONField, I18n_String -from arches.app.models.query_expressions import JsonbArrayElements from arches.app.utils.date_utils import ExtendedDateFormat from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.permission_backend import user_is_resource_reviewer @@ -2386,13 +2383,10 @@ def default_es_mapping(self): } return mapping - def _get_base_orm_lookup(self, node): - return f"data__{node.pk}__0__resourceId" - def to_python(self, tile_val): if tile_val is None: return tile_val - return models.ResourceInstance.objects.as_resource(tile_val) + return models.ResourceInstance.objects.as_resource(tile_val[0]["resourceId"]) def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): @@ -2442,22 +2436,13 @@ def to_json(self, tile, node): def collects_multiple_values(self): return True - def _get_base_orm_lookup(self, node): - return f"data__{node.pk}" - - def _get_orm_array_transform(self, lookup): - return CombinedExpression( - JsonbArrayElements( - Case( - When(~Q(**{lookup: None}), then=F(lookup)), - default=Value('[{"resourceId": null}]'), - output_field=JSONField(), - ), - ), - "->>", - Value("resourceId"), - output_field=CharField(), - ) + def to_python(self, tile_val): + if tile_val is None: + return tile_val + return [ + models.ResourceInstance.objects.as_resource(inner["resourceId"]) + for inner in tile_val + ] class NodeValueDataType(BaseDataType): diff --git a/arches/app/models/query_expressions.py b/arches/app/models/query_expressions.py deleted file mode 100644 index 633f9846ed..0000000000 --- a/arches/app/models/query_expressions.py +++ /dev/null @@ -1,7 +0,0 @@ -from django.db.models.expressions import Func - - -class JsonbArrayElements(Func): - arity = 1 - contains_subquery = True # TODO(Django 5.2) change -> set_returning = True - function = "JSONB_ARRAY_ELEMENTS" From c7e4929e0ce10fbdc1f76f475366def02cfa17a2 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 22 Oct 2024 11:31:27 -0400 Subject: [PATCH 036/115] Move principal user fallback logic --- arches/app/models/models.py | 10 +++++++--- arches/app/models/resource.py | 7 ++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 29f05f4cd0..f77a752def 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1301,14 +1301,18 @@ def save(self, index=False, user=None, **kwargs): self.graph_publication = self.graph.publication except ResourceInstance.graph.RelatedObjectDoesNotExist: pass + else: + add_to_update_fields(kwargs, "graph_publication") + + if not self.principaluser_id and user: + self.principaluser = user + add_to_update_fields(kwargs, "principaluser") if not hasattr(self, "resource_instance_lifecycle_state"): self.resource_instance_lifecycle_state = ( self.get_initial_resource_instance_lifecycle_state() ) - - add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - add_to_update_fields(kwargs, "graph_publication") + add_to_update_fields(kwargs, "resource_instance_lifecycle_state") if getattr(self, "_pythonic_model_fields", False): self._save_tiles_for_pythonic_model(index=index, **kwargs) diff --git a/arches/app/models/resource.py b/arches/app/models/resource.py index f8a5fbadf0..b64ffe1784 100644 --- a/arches/app/models/resource.py +++ b/arches/app/models/resource.py @@ -248,7 +248,7 @@ def save_edit( edit.edittype = edit_type edit.save(force_insert=True) - def save(self, *args, **kwargs): + def save(self, **kwargs): """ Saves and indexes a single resource @@ -280,10 +280,7 @@ def save(self, *args, **kwargs): else: user = request.user - if not self.principaluser_id and user: - self.principaluser_id = user.id - - super(Resource, self).save(*args, **kwargs) + super(Resource, self).save(user=user, **kwargs) if should_update_resource_instance_lifecycle_state: self.save_edit( From 1c34d6ddb046040dc734aaf33c00cd3614e8c7ff Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 22 Oct 2024 15:58:07 -0400 Subject: [PATCH 037/115] Improve cardinality n vs. n-squared stuff --- arches/app/datatypes/datatypes.py | 11 +++++- arches/app/models/querysets.py | 65 +++++++++++++------------------ 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 9a93255663..1d81fd6779 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2383,10 +2383,16 @@ def default_es_mapping(self): } return mapping + def _get_base_orm_lookup(self, node): + """Immediately unwrap to a single value so that we can depend + on datatypes that do not collect multiple values not being a list. + """ + return f"data__{node.pk}__0__resourceId" + def to_python(self, tile_val): if tile_val is None: return tile_val - return models.ResourceInstance.objects.as_resource(tile_val[0]["resourceId"]) + return models.ResourceInstance.objects.as_resource(tile_val) def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): @@ -2436,6 +2442,9 @@ def to_json(self, tile, node): def collects_multiple_values(self): return True + def _get_base_orm_lookup(self, node): + return f"data__{node.pk}" + def to_python(self, tile_val): if tile_val is None: return tile_val diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 83a740e057..198c8a222b 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -5,7 +5,7 @@ class PythonicModelQuerySet(models.QuerySet): def with_unpacked_tiles( - self, graph_slug=None, *, resource=None, defer=None, only=None + self, graph_slug=None, *, resource_id=None, defer=None, only=None ): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: @@ -34,22 +34,17 @@ def with_unpacked_tiles( if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") - if resource: - source_graph = resource.graph + if resource_id and not graph_slug: + graph_query = GraphModel.objects.filter(resourceinstance=resource_id) else: - try: - source_graph = ( - GraphModel.objects.filter( - slug=graph_slug, - # TODO: Verify that source_identifier=None is really what I want? - source_identifier=None, - ) - .prefetch_related("node_set__nodegroup") - .get() - ) - except GraphModel.DoesNotExist as e: - e.add_note(f"No graph found with slug: {graph_slug}") - raise + graph_query = GraphModel.objects.filter( + slug=graph_slug, source_identifier=None + ) + try: + source_graph = graph_query.prefetch_related("node_set__nodegroup").get() + except GraphModel.DoesNotExist as e: + e.add_note(f"No graph found with slug: {graph_slug}") + raise invalid_names = field_names(self.model) datatype_factory = DataTypeFactory() @@ -76,13 +71,13 @@ def with_unpacked_tiles( if given_alias not in node_alias_annotations: raise ValueError(f'"{given_alias}" is not a valid node alias.') - if resource: - qs = self.filter(pk=resource.pk) + if resource_id: + qs = self.filter(pk=resource_id) else: qs = self.filter(graph=source_graph) return ( qs.prefetch_related( - "graph__node_set", + "graph__node_set__nodegroup", models.Prefetch( "tilemodel_set", queryset=TileModel.objects.filter( @@ -105,14 +100,7 @@ def with_unpacked_tiles( ) def as_resource(self, resource_id): - from arches.app.models.models import ResourceInstance - - resource = ( - ResourceInstance.objects.filter(pk=resource_id) - .prefetch_related("graph__node_set") - .get() - ) - return self.with_unpacked_tiles(resource=resource).first() + return self.with_unpacked_tiles(resource_id=resource_id).get() def _fetch_all(self): """Call datatype to_python() methods when evaluating queryset.""" @@ -122,6 +110,7 @@ def _fetch_all(self): datatype_factory = DataTypeFactory() datatypes_by_nodeid = {} + nodegroups_by_nodeid = {} try: first_resource = self._result_cache[0] @@ -131,24 +120,26 @@ def _fetch_all(self): datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( node.datatype ) + nodegroups_by_nodeid[str(node.pk)] = node.nodegroup for resource in self._result_cache: if not hasattr(resource, "_pythonic_model_fields"): # On the first fetch, annotations haven't been applied yet. continue for nodeid, alias in resource._pythonic_model_fields.items(): - tile_val = getattr(resource, alias) - if tile_val is None: + python_val = [] + all_tile_values = getattr(resource, alias) + if all_tile_values is None: continue datatype_instance = datatypes_by_nodeid[nodeid] - if not datatype_instance.collects_multiple_values(): - tile_val = list(tile_val) - python_val = [] - for inner_tile_val in tile_val: + nodegroup = nodegroups_by_nodeid[nodeid] + if nodegroup.cardinality == "1": + all_tile_values = list(all_tile_values) + for inner_tile_val in all_tile_values: # TODO: add prefetching/lazy for RI-list? python_val.append(datatype_instance.to_python(inner_tile_val)) - if tile_val != python_val: - if datatype_instance.collects_multiple_values(): - setattr(resource, alias, python_val) - else: + if all_tile_values != python_val: + if nodegroup.cardinality == "1": setattr(resource, alias, python_val[0]) + else: + setattr(resource, alias, python_val) From 6f9358bb1e9081d6d36db0ff05e99ba810821c76 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 22 Oct 2024 16:11:54 -0400 Subject: [PATCH 038/115] Remove as_resource() --- arches/app/datatypes/datatypes.py | 14 +++++++++----- arches/app/models/querysets.py | 13 +++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 1d81fd6779..833d6fdad0 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2392,7 +2392,9 @@ def _get_base_orm_lookup(self, node): def to_python(self, tile_val): if tile_val is None: return tile_val - return models.ResourceInstance.objects.as_resource(tile_val) + return models.ResourceInstance.objects.with_unpacked_tiles( + resource_ids=[tile_val] + ).get() def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): @@ -2448,10 +2450,12 @@ def _get_base_orm_lookup(self, node): def to_python(self, tile_val): if tile_val is None: return tile_val - return [ - models.ResourceInstance.objects.as_resource(inner["resourceId"]) - for inner in tile_val - ] + resource_ids = [inner["resourceId"] for inner in tile_val] + return list( + models.ResourceInstance.objects.with_unpacked_tiles( + resource_ids=resource_ids + ) + ) class NodeValueDataType(BaseDataType): diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 198c8a222b..be8fc88fe1 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -5,7 +5,7 @@ class PythonicModelQuerySet(models.QuerySet): def with_unpacked_tiles( - self, graph_slug=None, *, resource_id=None, defer=None, only=None + self, graph_slug=None, *, resource_ids=None, defer=None, only=None ): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: @@ -34,8 +34,8 @@ def with_unpacked_tiles( if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") - if resource_id and not graph_slug: - graph_query = GraphModel.objects.filter(resourceinstance=resource_id) + if resource_ids and not graph_slug: + graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) else: graph_query = GraphModel.objects.filter( slug=graph_slug, source_identifier=None @@ -71,8 +71,8 @@ def with_unpacked_tiles( if given_alias not in node_alias_annotations: raise ValueError(f'"{given_alias}" is not a valid node alias.') - if resource_id: - qs = self.filter(pk=resource_id) + if resource_ids: + qs = self.filter(pk__in=resource_ids) else: qs = self.filter(graph=source_graph) return ( @@ -99,9 +99,6 @@ def with_unpacked_tiles( ) ) - def as_resource(self, resource_id): - return self.with_unpacked_tiles(resource_id=resource_id).get() - def _fetch_all(self): """Call datatype to_python() methods when evaluating queryset.""" from arches.app.datatypes.datatypes import DataTypeFactory From ce8e103a6a56073b212d99ba5cc735184491f6d4 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 22 Oct 2024 16:19:01 -0400 Subject: [PATCH 039/115] Rename queryset --- arches/app/models/models.py | 4 ++-- arches/app/models/querysets.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index f77a752def..3bd8ba838e 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -11,7 +11,7 @@ from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField -from arches.app.models.querysets import PythonicModelQuerySet +from arches.app.models.querysets import ResourceInstanceQuerySet from arches.app.models.utils import add_to_update_fields from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string @@ -1229,7 +1229,7 @@ class ResourceInstance(models.Model): User, on_delete=models.SET_NULL, blank=True, null=True ) - objects = PythonicModelQuerySet.as_manager() + objects = ResourceInstanceQuerySet.as_manager() class Meta: managed = True diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index be8fc88fe1..327e62bdd2 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -3,7 +3,7 @@ from arches.app.models.utils import field_names -class PythonicModelQuerySet(models.QuerySet): +class ResourceInstanceQuerySet(models.QuerySet): def with_unpacked_tiles( self, graph_slug=None, *, resource_ids=None, defer=None, only=None ): From 1369ebbd5bfe119ed4d3b84289089ceef9b08477 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 22 Oct 2024 17:13:47 -0400 Subject: [PATCH 040/115] Simplify error reporting --- arches/app/models/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3bd8ba838e..cf7b2a6ad6 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1487,9 +1487,8 @@ def _update_tiles_from_pythonic_model_values(self, nodegroups): if errors_by_node_alias: raise ValidationError( - # TODO: Django/DRF minds if this is not an actual field? { - alias: ValidationError("\n".join(e["message"] for e in errors)) + alias: ValidationError([e["message"] for e in errors]) for alias, errors in errors_by_node_alias.items() } ) From 2b4119127e78b99d8a955920cb3032d7e1434dae Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 23 Oct 2024 16:44:09 -0400 Subject: [PATCH 041/115] Add TileQuerySet --- arches/app/datatypes/base.py | 5 +- arches/app/datatypes/datatypes.py | 8 +-- arches/app/models/models.py | 33 +++++---- arches/app/models/querysets.py | 111 ++++++++++++++++++++---------- 4 files changed, 102 insertions(+), 55 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 2cfcf1fb60..870d3cde1e 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -539,13 +539,14 @@ def validate_node(self, node): """ pass - def get_orm_lookup(self, node) -> BaseExpression: + def get_orm_lookup(self, node, *, for_resource=True) -> BaseExpression: """Return a tile subquery expression for use in a ResourceInstance QuerySet.""" base_lookup = self._get_base_orm_lookup(node) + outer_ref = "resourceinstanceid" if for_resource else "resourceinstance_id" tile_query = models.TileModel.objects.filter( nodegroup_id=node.nodegroup.pk - ).filter(resourceinstance_id=OuterRef("resourceinstanceid")) + ).filter(resourceinstance_id=OuterRef(outer_ref)) if node.nodegroup.cardinality == "n": tile_query = tile_query.order_by("sortorder") diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 833d6fdad0..2c0f1a2c20 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2392,9 +2392,7 @@ def _get_base_orm_lookup(self, node): def to_python(self, tile_val): if tile_val is None: return tile_val - return models.ResourceInstance.objects.with_unpacked_tiles( - resource_ids=[tile_val] - ).get() + return models.ResourceInstance.objects.with_tiles(resource_ids=[tile_val]).get() def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): @@ -2452,9 +2450,7 @@ def to_python(self, tile_val): return tile_val resource_ids = [inner["resourceId"] for inner in tile_val] return list( - models.ResourceInstance.objects.with_unpacked_tiles( - resource_ids=resource_ids - ) + models.ResourceInstance.objects.with_tiles(resource_ids=resource_ids) ) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index cf7b2a6ad6..d308873ec8 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -11,7 +11,7 @@ from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField -from arches.app.models.querysets import ResourceInstanceQuerySet +from arches.app.models.querysets import ResourceInstanceQuerySet, TileQuerySet from arches.app.models.utils import add_to_update_fields from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string @@ -1246,7 +1246,7 @@ def __repr__(self): @classmethod def as_model(cls, *args, **kwargs): - return cls.objects.with_unpacked_tiles(*args, **kwargs) + return cls.objects.with_tiles(*args, **kwargs) def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): try: @@ -1314,7 +1314,7 @@ def save(self, index=False, user=None, **kwargs): ) add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - if getattr(self, "_pythonic_model_fields", False): + if getattr(self, "_fetched_nodes", False): self._save_tiles_for_pythonic_model(index=index, **kwargs) self.save_edit(user=user) else: @@ -1322,7 +1322,7 @@ def save(self, index=False, user=None, **kwargs): def clean(self): """Raises a compound ValidationError with any failing tile values.""" - if getattr(self, "_pythonic_model_fields", False): + if getattr(self, "_fetched_nodes", False): nodegroups = ( NodeGroup.objects.filter(node__graph=self.graph) .distinct() @@ -1401,13 +1401,13 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): proxy = Resource.objects.get(pk=self.pk) # Stick the data we already have onto the proxy instance. - proxy.tiles = self._sorted_tiles_for_pythonic_model_fields + proxy.tiles = self._sorted_tiles_for_fetched_nodes proxy.set_node_datatypes(node_datatypes) proxy.index(fetchTiles=False) def _map_prefetched_tiles_to_nodegroup_ids(self): tiles_by_nodegroup = defaultdict(list) - for tile_to_update in self._sorted_tiles_for_pythonic_model_fields: + for tile_to_update in self._sorted_tiles_for_fetched_nodes: tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) return tiles_by_nodegroup @@ -1430,7 +1430,7 @@ def _update_tiles_from_pythonic_model_values(self, nodegroups): db_tiles = db_tiles_by_nodegroup_id[nodegroup.pk] working_tiles = [] max_tile_length = 0 - for attribute_name in self._pythonic_model_fields.values(): + for attribute_name in self._fetched_nodes.values(): if attribute_name not in node_aliases: continue new_val = getattr(self, attribute_name) @@ -1503,7 +1503,7 @@ def _validate_and_patch_from_pythonic_model_values( datatype_factory = DataTypeFactory() for node in nodegroup.node_set.all(): node_id_str = str(node.pk) - if not (attribute_name := self._pythonic_model_fields.get(node_id_str, "")): + if not (attribute_name := self._fetched_nodes.get(node_id_str, "")): continue datatype_instance = datatype_factory.get_instance(node.datatype) @@ -1544,9 +1544,7 @@ def _validate_and_patch_from_pythonic_model_values( extra_tile.data[node_id_str] = None def refresh_from_db(self, using=None, fields=None, from_queryset=None): - if not from_queryset and ( - field_map := getattr(self, "_pythonic_model_fields", []) - ): + if not from_queryset and (field_map := getattr(self, "_fetched_nodes", [])): from_queryset = self.__class__.as_model( self.graph.slug, only=field_map.values() ) @@ -1555,7 +1553,7 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): refreshed_resource = from_queryset[0] for field in itertools.chain( field_map.values(), - ("_pythonic_model_fields", "_sorted_tiles_for_pythonic_model_fields"), + ("_fetched_nodes", "_sorted_tiles_for_fetched_nodes"), ): setattr(self, field, getattr(refreshed_resource, field)) else: @@ -1815,6 +1813,8 @@ class TileModel(models.Model): # Tile sortorder = models.IntegerField(blank=True, null=True, default=0) provisionaledits = JSONField(blank=True, null=True, db_column="provisionaledits") + objects = TileQuerySet.as_manager() + class Meta: managed = True db_table = "tiles" @@ -1824,10 +1824,19 @@ def __init__(self, *args, **kwargs): if not self.tileid: self.tileid = uuid.uuid4() + def __repr__(self): + return f"<{self.__class__.__qualname__}: {self.nodegroup_alias} ({self.pk})>" + @property def nodegroup(self): return NodeGroup.objects.filter(pk=self.nodegroup_id).first() + @property + def nodegroup_alias(self): + if node_for_nodegroup := Node.objects.filter(pk=self.nodegroup_id).first(): + return node_for_nodegroup.alias + return None + def is_fully_provisional(self): return bool(self.provisionaledits and not any(self.data.values())) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 327e62bdd2..b920637b3e 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -3,14 +3,75 @@ from arches.app.models.utils import field_names +# TODO: figure out best layer for reuse +def _generate_annotation_maps(nodes, defer, only, invalid_names, for_resource=True): + from arches.app.datatypes.datatypes import DataTypeFactory + + if defer and only and (overlap := set(defer).intersection(set(only))): + raise ValueError(f"Got intersecting defer/only args: {overlap}") + datatype_factory = DataTypeFactory() + node_alias_annotations = {} + node_aliases_by_node_id = {} + for node in nodes: + if node.datatype == "semantic": + continue + if node.nodegroup_id is None: + continue + if (defer and node.alias in defer) or (only and node.alias not in only): + continue + if node.alias in invalid_names: + raise ValueError(f'"{node.alias}" clashes with a model field name.') + + datatype_instance = datatype_factory.get_instance(node.datatype) + tile_lookup = datatype_instance.get_orm_lookup(node, for_resource=for_resource) + node_alias_annotations[node.alias] = tile_lookup + node_aliases_by_node_id[str(node.pk)] = node.alias + + if not node_alias_annotations: + raise ValueError("All fields were excluded.") + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') + + return node_alias_annotations, node_aliases_by_node_id + + +class TileQuerySet(models.QuerySet): + def with_node_values(self, top_node_alias, *, graph_slug, defer=None, only=None): + from arches.app.models.models import Node + + # TODO: avoidable if I already have a node_set? + qs = ( + Node.objects.filter(graph__slug=graph_slug, alias=top_node_alias) + .select_related("nodegroup") + .prefetch_related("nodegroup__node_set") + ) + # TODO: make deterministic by checking source_identifier + # https://github.com/archesproject/arches/issues/11565 + top_node_for_group = qs.last() + node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( + top_node_for_group.nodegroup.node_set.all(), + defer=defer, + only=only, + invalid_names=field_names(self.model), + for_resource=False, + ) + return self.annotate( + **node_alias_annotations, + ).annotate( + _fetched_nodes=models.Value( + node_aliases_by_node_id, + output_field=models.JSONField(), + ) + ) + + class ResourceInstanceQuerySet(models.QuerySet): - def with_unpacked_tiles( - self, graph_slug=None, *, resource_ids=None, defer=None, only=None - ): + def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: - >>> ResourceInstance.objects.with_unpacked_tiles("mymodel") + >>> ResourceInstance.objects.with_tiles("mymodel") With slightly fewer keystrokes: @@ -29,11 +90,8 @@ def with_unpacked_tiles( Provisional edits are completely ignored. """ - from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.models import GraphModel, NodeGroup, TileModel - if defer and only and (overlap := set(defer).intersection(set(only))): - raise ValueError(f"Got intersecting defer/only args: {overlap}") if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) else: @@ -46,30 +104,13 @@ def with_unpacked_tiles( e.add_note(f"No graph found with slug: {graph_slug}") raise - invalid_names = field_names(self.model) - datatype_factory = DataTypeFactory() - node_alias_annotations = {} - node_aliases_by_node_id = {} - for node in source_graph.node_set.all(): - if node.datatype == "semantic": - continue - if node.nodegroup_id is None: - continue - if (defer and node.alias in defer) or (only and node.alias not in only): - continue - if node.alias in invalid_names: - raise ValueError(f'"{node.alias}" clashes with a model field name.') - - datatype_instance = datatype_factory.get_instance(node.datatype) - tile_lookup = datatype_instance.get_orm_lookup(node) - node_alias_annotations[node.alias] = tile_lookup - node_aliases_by_node_id[str(node.pk)] = node.alias - - if not node_alias_annotations: - raise ValueError("All fields were excluded.") - for given_alias in only or []: - if given_alias not in node_alias_annotations: - raise ValueError(f'"{given_alias}" is not a valid node alias.') + node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( + source_graph.node_set.all(), + defer=defer, + only=only, + invalid_names=field_names(self.model), + for_resource=True, + ) if resource_ids: qs = self.filter(pk__in=resource_ids) @@ -85,14 +126,14 @@ def with_unpacked_tiles( node__alias__in=node_alias_annotations ) ).order_by("sortorder"), - to_attr="_sorted_tiles_for_pythonic_model_fields", + to_attr="_sorted_tiles_for_fetched_nodes", ), ) .annotate( **node_alias_annotations, ) .annotate( - _pythonic_model_fields=models.Value( + _fetched_nodes=models.Value( node_aliases_by_node_id, output_field=models.JSONField(), ) @@ -120,10 +161,10 @@ def _fetch_all(self): nodegroups_by_nodeid[str(node.pk)] = node.nodegroup for resource in self._result_cache: - if not hasattr(resource, "_pythonic_model_fields"): + if not hasattr(resource, "_fetched_nodes"): # On the first fetch, annotations haven't been applied yet. continue - for nodeid, alias in resource._pythonic_model_fields.items(): + for nodeid, alias in resource._fetched_nodes.items(): python_val = [] all_tile_values = getattr(resource, alias) if all_tile_values is None: From 2c0bb06dadcdbdee3378fc643b6e51db2cccf23a Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 23 Oct 2024 17:55:46 -0400 Subject: [PATCH 042/115] Remove eager materialization of RI instances for now --- arches/app/datatypes/datatypes.py | 9 +-------- arches/app/models/querysets.py | 10 +++++----- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 2c0f1a2c20..16f74bfc72 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2389,11 +2389,6 @@ def _get_base_orm_lookup(self, node): """ return f"data__{node.pk}__0__resourceId" - def to_python(self, tile_val): - if tile_val is None: - return tile_val - return models.ResourceInstance.objects.with_tiles(resource_ids=[tile_val]).get() - def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): return value1 == value2 @@ -2449,9 +2444,7 @@ def to_python(self, tile_val): if tile_val is None: return tile_val resource_ids = [inner["resourceId"] for inner in tile_val] - return list( - models.ResourceInstance.objects.with_tiles(resource_ids=resource_ids) - ) + return resource_ids class NodeValueDataType(BaseDataType): diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index b920637b3e..b9ff2f0f82 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -71,20 +71,20 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: - >>> ResourceInstance.objects.with_tiles("mymodel") + >>> ResourceInstance.objects.with_tiles("concept") With slightly fewer keystrokes: - >>> ResourceInstance.as_model("mymodel") + >>> ResourceInstance.as_model("concept") Or with defer/only as in the QuerySet interface: - >>> ResourceInstance.as_model("mymodel", only=["alias1", "alias2"]) + >>> ResourceInstance.as_model("concept", only=["alias1", "alias2"]) Example: - >>> MyModel = ResourceInstance.as_model("mymodel") - >>> result = MyModel.filter(my_node_alias="some tile value") + >>> concepts = ResourceInstance.as_model("concepts") + >>> result = concepts.filter(my_node_alias="some tile value") >>> result.first().my_node_alias "some tile value" From 95ec768b0c4d2bf5c3ee84b119846be67af74b31 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 24 Oct 2024 08:18:10 -0400 Subject: [PATCH 043/115] Add as_nodegroup transform --- arches/app/models/querysets.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index b9ff2f0f82..71036c48bb 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -37,7 +37,8 @@ def _generate_annotation_maps(nodes, defer, only, invalid_names, for_resource=Tr class TileQuerySet(models.QuerySet): - def with_node_values(self, top_node_alias, *, graph_slug, defer=None, only=None): + @staticmethod + def _top_node_for_nodegroup(graph_slug, top_node_alias): from arches.app.models.models import Node # TODO: avoidable if I already have a node_set? @@ -48,9 +49,18 @@ def with_node_values(self, top_node_alias, *, graph_slug, defer=None, only=None) ) # TODO: make deterministic by checking source_identifier # https://github.com/archesproject/arches/issues/11565 - top_node_for_group = qs.last() + ret = qs.last() + if ret is None: + raise Node.DoesNotExist + return ret + + def with_node_values(self, top_node_or_alias, *, graph_slug, defer=None, only=None): + if isinstance(top_node_or_alias, str): + node_for_group = self._top_node_for_nodegroup(graph_slug, top_node_or_alias) + else: + node_for_group = top_node_or_alias node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( - top_node_for_group.nodegroup.node_set.all(), + node_for_group.nodegroup.node_set.all(), defer=defer, only=only, invalid_names=field_names(self.model), @@ -65,6 +75,12 @@ def with_node_values(self, top_node_alias, *, graph_slug, defer=None, only=None) ) ) + def as_nodegroup(self, top_node_alias, *, graph_slug, defer=None, only=None): + node_for_group = self._top_node_for_nodegroup(graph_slug, top_node_alias) + return self.filter(nodegroup_id=node_for_group.pk).with_node_values( + node_for_group, graph_slug=graph_slug, defer=defer, only=only + ) + class ResourceInstanceQuerySet(models.QuerySet): def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): From cb6f1dc41fa6dc346f7f54181fce484eb8906549 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 24 Oct 2024 09:59:47 -0400 Subject: [PATCH 044/115] Attach nodegroups to resources --- arches/app/models/models.py | 15 +++- arches/app/models/querysets.py | 137 ++++++++++++++++++--------------- 2 files changed, 87 insertions(+), 65 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index d308873ec8..765655fcd5 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1244,6 +1244,9 @@ def __init__(self, *args, **kwargs): def __repr__(self): return f"<{self.__class__.__qualname__}: {self.name} ({self.pk})>" + def __str__(self): + return repr(self) + @classmethod def as_model(cls, *args, **kwargs): return cls.objects.with_tiles(*args, **kwargs) @@ -1553,7 +1556,12 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): refreshed_resource = from_queryset[0] for field in itertools.chain( field_map.values(), - ("_fetched_nodes", "_sorted_tiles_for_fetched_nodes"), + # TODO: move to constant + ( + "_fetched_nodes", + "_pythonic_nodegroups", + "_sorted_tiles_for_fetched_nodes", + ), ): setattr(self, field, getattr(refreshed_resource, field)) else: @@ -1827,12 +1835,17 @@ def __init__(self, *args, **kwargs): def __repr__(self): return f"<{self.__class__.__qualname__}: {self.nodegroup_alias} ({self.pk})>" + def __str__(self): + return repr(self) + @property def nodegroup(self): return NodeGroup.objects.filter(pk=self.nodegroup_id).first() @property def nodegroup_alias(self): + if nodegroup_alias := getattr(self, "_nodegroup_alias", None): + return nodegroup_alias if node_for_nodegroup := Node.objects.filter(pk=self.nodegroup_id).first(): return node_for_nodegroup.alias return None diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 71036c48bb..d23e09f40e 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -51,36 +51,75 @@ def _top_node_for_nodegroup(graph_slug, top_node_alias): # https://github.com/archesproject/arches/issues/11565 ret = qs.last() if ret is None: - raise Node.DoesNotExist + raise Node.DoesNotExist(f"graph: {graph_slug} node: {top_node_alias}") return ret - def with_node_values(self, top_node_or_alias, *, graph_slug, defer=None, only=None): - if isinstance(top_node_or_alias, str): - node_for_group = self._top_node_for_nodegroup(graph_slug, top_node_or_alias) - else: - node_for_group = top_node_or_alias + def with_node_values(self, nodes, *, defer=None, only=None): node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( - node_for_group.nodegroup.node_set.all(), + nodes, defer=defer, only=only, invalid_names=field_names(self.model), for_resource=False, ) - return self.annotate( - **node_alias_annotations, - ).annotate( - _fetched_nodes=models.Value( - node_aliases_by_node_id, - output_field=models.JSONField(), + return ( + self.prefetch_related( + "resourceinstance__graph__node_set", + ) + .annotate( + **node_alias_annotations, + ) + .annotate( + _fetched_nodes=models.Value( + node_aliases_by_node_id, + output_field=models.JSONField(), + ) ) + .order_by("sortorder") ) def as_nodegroup(self, top_node_alias, *, graph_slug, defer=None, only=None): node_for_group = self._top_node_for_nodegroup(graph_slug, top_node_alias) - return self.filter(nodegroup_id=node_for_group.pk).with_node_values( - node_for_group, graph_slug=graph_slug, defer=defer, only=only + return ( + self.filter(nodegroup_id=node_for_group.pk) + .with_node_values([node_for_group], defer=defer, only=only) + .annotate(_nodegroup_alias=models.Value(top_node_alias)) ) + def _fetch_all(self): + """Call datatype to_python() methods when materializing the QuerySet.""" + from arches.app.datatypes.datatypes import DataTypeFactory + + super()._fetch_all() + + datatype_factory = DataTypeFactory() + datatypes_by_nodeid = {} + nodegroups_by_nodeid = {} + + try: + first_tile = self._result_cache[0] + except IndexError: + return + for node in first_tile.resourceinstance.graph.node_set.all(): + datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( + node.datatype + ) + nodegroups_by_nodeid[str(node.pk)] = node.nodegroup + + NOT_PROVIDED = object() + for tile in self._result_cache: + for nodeid, alias in getattr(tile, "_fetched_nodes", {}).items(): + tile_val = getattr(tile, alias, NOT_PROVIDED) + if tile_val is not NOT_PROVIDED: + datatype_instance = datatypes_by_nodeid[nodeid] + try: + python_val = datatype_instance.to_python(tile_val) + except: + # TODO: some things break because datatype orm lookups + # need to be reoriented around nodegroups (next) + continue + setattr(tile, alias, python_val) + class ResourceInstanceQuerySet(models.QuerySet): def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): @@ -106,7 +145,7 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non Provisional edits are completely ignored. """ - from arches.app.models.models import GraphModel, NodeGroup, TileModel + from arches.app.models.models import GraphModel, TileModel if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) @@ -120,8 +159,9 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non e.add_note(f"No graph found with slug: {graph_slug}") raise + nodes = source_graph.node_set.all() node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( - source_graph.node_set.all(), + nodes, defer=defer, only=only, invalid_names=field_names(self.model), @@ -135,20 +175,20 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non return ( qs.prefetch_related( "graph__node_set__nodegroup", + # TODO: reuse this for child tiles. models.Prefetch( "tilemodel_set", - queryset=TileModel.objects.filter( - nodegroup_id__in=NodeGroup.objects.filter( - node__alias__in=node_alias_annotations - ) - ).order_by("sortorder"), - to_attr="_sorted_tiles_for_fetched_nodes", + queryset=TileModel.objects.with_node_values( + nodes, defer=defer, only=only + ), + to_attr="_pythonic_nodegroups", ), ) + # still wanted? + # won't work if there are node-for-nodegroups that are data collecting .annotate( **node_alias_annotations, - ) - .annotate( + ).annotate( _fetched_nodes=models.Value( node_aliases_by_node_id, output_field=models.JSONField(), @@ -156,44 +196,13 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non ) ) - def _fetch_all(self): - """Call datatype to_python() methods when evaluating queryset.""" - from arches.app.datatypes.datatypes import DataTypeFactory - - super()._fetch_all() - - datatype_factory = DataTypeFactory() - datatypes_by_nodeid = {} - nodegroups_by_nodeid = {} - - try: - first_resource = self._result_cache[0] - except IndexError: - return - for node in first_resource.graph.node_set.all(): - datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( - node.datatype - ) - nodegroups_by_nodeid[str(node.pk)] = node.nodegroup + def _prefetch_related_objects(self): + """Attach nodegroups to resource instances.""" + super()._prefetch_related_objects() for resource in self._result_cache: - if not hasattr(resource, "_fetched_nodes"): - # On the first fetch, annotations haven't been applied yet. - continue - for nodeid, alias in resource._fetched_nodes.items(): - python_val = [] - all_tile_values = getattr(resource, alias) - if all_tile_values is None: - continue - datatype_instance = datatypes_by_nodeid[nodeid] - nodegroup = nodegroups_by_nodeid[nodeid] - if nodegroup.cardinality == "1": - all_tile_values = list(all_tile_values) - for inner_tile_val in all_tile_values: - # TODO: add prefetching/lazy for RI-list? - python_val.append(datatype_instance.to_python(inner_tile_val)) - if all_tile_values != python_val: - if nodegroup.cardinality == "1": - setattr(resource, alias, python_val[0]) - else: - setattr(resource, alias, python_val) + annotated_tiles = getattr(resource, "_pythonic_nodegroups", []) + for annotated_tile in annotated_tiles: + # TODO: move responsibility for cardinality N compilation to here. + # TODO: remove queries as part of filtering in with_node_values(). + setattr(resource, annotated_tile.nodegroup_alias, annotated_tile) From 659231e3e0baf2be2e1cfa46365b671b8544b368 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 24 Oct 2024 13:43:35 -0400 Subject: [PATCH 045/115] Attach child tiles --- arches/app/models/querysets.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index d23e09f40e..8e0903751c 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -54,7 +54,9 @@ def _top_node_for_nodegroup(graph_slug, top_node_alias): raise Node.DoesNotExist(f"graph: {graph_slug} node: {top_node_alias}") return ret - def with_node_values(self, nodes, *, defer=None, only=None): + def with_node_values(self, nodes, *, defer=None, only=None, depth=1): + from arches.app.models.models import TileModel + node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( nodes, defer=defer, @@ -62,10 +64,19 @@ def with_node_values(self, nodes, *, defer=None, only=None): invalid_names=field_names(self.model), for_resource=False, ) - return ( - self.prefetch_related( - "resourceinstance__graph__node_set", + + prefetches = ["resourceinstance__graph__node_set"] + if depth: + prefetches.append( + models.Prefetch( + "parenttile", + queryset=TileModel.objects.with_node_values( + nodes, defer=defer, only=only, depth=depth - 1 + ), + ) ) + return ( + self.prefetch_related(*prefetches) .annotate( **node_alias_annotations, ) @@ -94,7 +105,6 @@ def _fetch_all(self): datatype_factory = DataTypeFactory() datatypes_by_nodeid = {} - nodegroups_by_nodeid = {} try: first_tile = self._result_cache[0] @@ -104,7 +114,6 @@ def _fetch_all(self): datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( node.datatype ) - nodegroups_by_nodeid[str(node.pk)] = node.nodegroup NOT_PROVIDED = object() for tile in self._result_cache: @@ -175,7 +184,6 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non return ( qs.prefetch_related( "graph__node_set__nodegroup", - # TODO: reuse this for child tiles. models.Prefetch( "tilemodel_set", queryset=TileModel.objects.with_node_values( @@ -206,3 +214,12 @@ def _prefetch_related_objects(self): # TODO: move responsibility for cardinality N compilation to here. # TODO: remove queries as part of filtering in with_node_values(). setattr(resource, annotated_tile.nodegroup_alias, annotated_tile) + if ( + annotated_tile.parenttile + and annotated_tile.parenttile.nodegroup_alias + ): + setattr( + annotated_tile, + annotated_tile.parenttile.nodegroup_alias, + annotated_tile.parenttile, + ) From c0ac899a15088953d20fbd32334e4acd59faffd2 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 24 Oct 2024 15:11:04 -0400 Subject: [PATCH 046/115] Improve child tile attachment --- arches/app/models/querysets.py | 42 +++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 8e0903751c..05dfbbf17a 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -4,7 +4,7 @@ # TODO: figure out best layer for reuse -def _generate_annotation_maps(nodes, defer, only, invalid_names, for_resource=True): +def _generate_annotations(nodes, defer, only, invalid_names, for_resource=True): from arches.app.datatypes.datatypes import DataTypeFactory if defer and only and (overlap := set(defer).intersection(set(only))): @@ -38,26 +38,28 @@ def _generate_annotation_maps(nodes, defer, only, invalid_names, for_resource=Tr class TileQuerySet(models.QuerySet): @staticmethod - def _top_node_for_nodegroup(graph_slug, top_node_alias): + def _root_node_for_nodegroup(graph_slug, root_node_alias): from arches.app.models.models import Node - # TODO: avoidable if I already have a node_set? qs = ( - Node.objects.filter(graph__slug=graph_slug, alias=top_node_alias) + Node.objects.filter(graph__slug=graph_slug, alias=root_node_alias) .select_related("nodegroup") .prefetch_related("nodegroup__node_set") + # Prefetching to a depth of 2 seems like a good trade-off for now. + .prefetch_related("nodegroup__nodegroup_set") + .prefetch_related("nodegroup__nodegroup_set__nodegroup_set") ) # TODO: make deterministic by checking source_identifier # https://github.com/archesproject/arches/issues/11565 ret = qs.last() if ret is None: - raise Node.DoesNotExist(f"graph: {graph_slug} node: {top_node_alias}") + raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") return ret def with_node_values(self, nodes, *, defer=None, only=None, depth=1): from arches.app.models.models import TileModel - node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( + node_alias_annotations, node_aliases_by_node_id = _generate_annotations( nodes, defer=defer, only=only, @@ -89,12 +91,21 @@ def with_node_values(self, nodes, *, defer=None, only=None, depth=1): .order_by("sortorder") ) - def as_nodegroup(self, top_node_alias, *, graph_slug, defer=None, only=None): - node_for_group = self._top_node_for_nodegroup(graph_slug, top_node_alias) + def as_nodegroup(self, root_node_alias, *, graph_slug, defer=None, only=None): + root_node = self._root_node_for_nodegroup(graph_slug, root_node_alias) + + def accumulate_nodes_below(nodegroup, acc): + acc.extend(list(nodegroup.node_set.all())) + for child_nodegroup in nodegroup.nodegroup_set.all(): + accumulate_nodes_below(child_nodegroup, acc) + + branch_nodes = [] + accumulate_nodes_below(root_node.nodegroup, acc=branch_nodes) + return ( - self.filter(nodegroup_id=node_for_group.pk) - .with_node_values([node_for_group], defer=defer, only=only) - .annotate(_nodegroup_alias=models.Value(top_node_alias)) + self.filter(nodegroup_id=root_node.pk) + .with_node_values(branch_nodes, defer=defer, only=only) + .annotate(_nodegroup_alias=models.Value(root_node_alias)) ) def _fetch_all(self): @@ -169,7 +180,7 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non raise nodes = source_graph.node_set.all() - node_alias_annotations, node_aliases_by_node_id = _generate_annotation_maps( + node_alias_annotations, node_aliases_by_node_id = _generate_annotations( nodes, defer=defer, only=only, @@ -192,11 +203,10 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non to_attr="_pythonic_nodegroups", ), ) - # still wanted? - # won't work if there are node-for-nodegroups that are data collecting .annotate( **node_alias_annotations, - ).annotate( + ) + .annotate( _fetched_nodes=models.Value( node_aliases_by_node_id, output_field=models.JSONField(), @@ -205,7 +215,7 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non ) def _prefetch_related_objects(self): - """Attach nodegroups to resource instances.""" + """Attach annotated tiles to resource instances.""" super()._prefetch_related_objects() for resource in self._result_cache: From 00388e84e408924fb88e6394fcaa8fec0acff4c1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 24 Oct 2024 16:39:19 -0400 Subject: [PATCH 047/115] Shave off some data, improve performance --- arches/app/models/models.py | 1 + arches/app/models/querysets.py | 64 +++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 765655fcd5..3c342f1ef6 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1847,6 +1847,7 @@ def nodegroup_alias(self): if nodegroup_alias := getattr(self, "_nodegroup_alias", None): return nodegroup_alias if node_for_nodegroup := Node.objects.filter(pk=self.nodegroup_id).first(): + self._nodegroup_alias = node_for_nodegroup.alias return node_for_nodegroup.alias return None diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 05dfbbf17a..04ec5fdf56 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -67,7 +67,8 @@ def with_node_values(self, nodes, *, defer=None, only=None, depth=1): for_resource=False, ) - prefetches = ["resourceinstance__graph__node_set"] + # Prefetch sibling nodes. + prefetches = ["resourceinstance__graph__node_set__nodegroup__node_set"] if depth: prefetches.append( models.Prefetch( @@ -108,37 +109,44 @@ def accumulate_nodes_below(nodegroup, acc): .annotate(_nodegroup_alias=models.Value(root_node_alias)) ) - def _fetch_all(self): - """Call datatype to_python() methods when materializing the QuerySet.""" + def _prefetch_related_objects(self): + """Call datatype to_python() methods when materializing the QuerySet. + Discard annotations that do not pertain to this tile. + TODO: determine if having these is useful for shallow filtering, + or if we can shave off in some upper layer. + """ from arches.app.datatypes.datatypes import DataTypeFactory - super()._fetch_all() + super()._prefetch_related_objects() datatype_factory = DataTypeFactory() - datatypes_by_nodeid = {} - - try: - first_tile = self._result_cache[0] - except IndexError: - return - for node in first_tile.resourceinstance.graph.node_set.all(): - datatypes_by_nodeid[str(node.pk)] = datatype_factory.get_instance( - node.datatype - ) - NOT_PROVIDED = object() for tile in self._result_cache: + root_node = None + for node in tile.resourceinstance.graph.node_set.all(): + if node.alias == tile.nodegroup_alias: + root_node = node + if not root_node: + continue + for nodeid, alias in getattr(tile, "_fetched_nodes", {}).items(): - tile_val = getattr(tile, alias, NOT_PROVIDED) - if tile_val is not NOT_PROVIDED: - datatype_instance = datatypes_by_nodeid[nodeid] - try: - python_val = datatype_instance.to_python(tile_val) - except: - # TODO: some things break because datatype orm lookups - # need to be reoriented around nodegroups (next) + # TODO: evaluate for efficiency re: reshaping _fetched_nodes map + for node in root_node.nodegroup.node_set.all(): + if str(node.pk) != nodeid: continue - setattr(tile, alias, python_val) + # TODO: debug and remove + assert node.nodegroup_id == tile.nodegroup_id + tile_val = getattr(tile, alias, NOT_PROVIDED) + if tile_val is not NOT_PROVIDED: + datatype_instance = datatype_factory.get_instance(node.datatype) + try: + python_val = datatype_instance.to_python(tile_val) + except: + # TODO: some things break because datatype orm lookups + # need to be reoriented around nodegroups (next) + continue + setattr(tile, alias, python_val) + break class ResourceInstanceQuerySet(models.QuerySet): @@ -215,10 +223,16 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non ) def _prefetch_related_objects(self): - """Attach annotated tiles to resource instances.""" + """Attach annotated tiles to resource instances. + Discard annotations only used for shallow filtering. + """ super()._prefetch_related_objects() for resource in self._result_cache: + fetched_nodes = getattr(resource, "_fetched_nodes", {}) + for fetched_alias in fetched_nodes.values(): + delattr(resource, fetched_alias) + annotated_tiles = getattr(resource, "_pythonic_nodegroups", []) for annotated_tile in annotated_tiles: # TODO: move responsibility for cardinality N compilation to here. From ef6f5ab9ced8aa2abc234ef6054fcc3d7bc3f71e Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 25 Oct 2024 16:39:19 -0400 Subject: [PATCH 048/115] Finish reorienting around nodegroups, add minimal docs --- arches/app/datatypes/base.py | 22 +- .../11043_tile_nodegroup_add_related_names.py | 40 +++ arches/app/models/models.py | 10 +- arches/app/models/querysets.py | 250 +++++++++++------- 4 files changed, 216 insertions(+), 106 deletions(-) create mode 100644 arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 870d3cde1e..62861f7335 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -539,14 +539,20 @@ def validate_node(self, node): """ pass - def get_orm_lookup(self, node, *, for_resource=True) -> BaseExpression: - """Return a tile subquery expression for use in a ResourceInstance QuerySet.""" + def get_values_query(self, node, *, outer_ref=None) -> BaseExpression: + """Return a tile values query expression for use in a + ResourceInstanceQuerySet or TileQuerySet. + + The outer_ref names the resource instance field for use in the + subquery. It is spelled slightly differently when annotating + a Tile or a ResourceInstance. For resource instances, it's + "resourceinstanceid", otherwise "resourceinstance_id". + """ base_lookup = self._get_base_orm_lookup(node) - outer_ref = "resourceinstanceid" if for_resource else "resourceinstance_id" - tile_query = models.TileModel.objects.filter( - nodegroup_id=node.nodegroup.pk - ).filter(resourceinstance_id=OuterRef(outer_ref)) + tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) + if outer_ref: + tile_query = tile_query.filter(resourceinstance_id=OuterRef(outer_ref)) if node.nodegroup.cardinality == "n": tile_query = tile_query.order_by("sortorder") @@ -554,8 +560,10 @@ def get_orm_lookup(self, node, *, for_resource=True) -> BaseExpression: if node.nodegroup.cardinality == "n": return ArraySubquery(tile_query) - else: + elif outer_ref: return Subquery(tile_query) + else: + return tile_query def _get_base_orm_lookup(self, node): return f"data__{node.pk}" diff --git a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py new file mode 100644 index 0000000000..c55144cc4d --- /dev/null +++ b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py @@ -0,0 +1,40 @@ +# Generated by Django 5.1.2 on 2024-10-25 12:44 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11042_update__arches_staging_to_tile"), + ] + + operations = [ + migrations.AlterField( + model_name="nodegroup", + name="parentnodegroup", + field=models.ForeignKey( + blank=True, + db_column="parentnodegroupid", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="children", + related_query_name="child", + to="models.nodegroup", + ), + ), + migrations.AlterField( + model_name="tilemodel", + name="parenttile", + field=models.ForeignKey( + blank=True, + db_column="parenttileid", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="children", + related_query_name="child", + to="models.tilemodel", + ), + ), + ] diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3c342f1ef6..47f4057310 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -708,6 +708,8 @@ class NodeGroup(models.Model): blank=True, null=True, on_delete=models.CASCADE, + related_name="children", + related_query_name="child", ) # Allows nodegroups within nodegroups def __init__(self, *args, **kwargs): @@ -1242,7 +1244,7 @@ def __init__(self, *args, **kwargs): self.resourceinstanceid = uuid.uuid4() def __repr__(self): - return f"<{self.__class__.__qualname__}: {self.name} ({self.pk})>" + return f"<{self.graph.name}: {self.name} ({self.pk})>" def __str__(self): return repr(self) @@ -1559,7 +1561,7 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): # TODO: move to constant ( "_fetched_nodes", - "_pythonic_nodegroups", + "_annotated_tiles", "_sorted_tiles_for_fetched_nodes", ), ): @@ -1815,6 +1817,8 @@ class TileModel(models.Model): # Tile blank=True, null=True, on_delete=models.CASCADE, + related_name="children", + related_query_name="child", ) data = JSONField(blank=True, null=True, db_column="tiledata") nodegroup_id = models.UUIDField(db_column="nodegroupid", null=True) @@ -1833,7 +1837,7 @@ def __init__(self, *args, **kwargs): self.tileid = uuid.uuid4() def __repr__(self): - return f"<{self.__class__.__qualname__}: {self.nodegroup_alias} ({self.pk})>" + return f"<{self.nodegroup_alias} ({self.pk})>" def __str__(self): return repr(self) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 04ec5fdf56..b9d455e198 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -3,15 +3,13 @@ from arches.app.models.utils import field_names -# TODO: figure out best layer for reuse -def _generate_annotations(nodes, defer, only, invalid_names, for_resource=True): +def _generate_tile_annotations(nodes, defer, only, invalid_names, outer_ref=None): from arches.app.datatypes.datatypes import DataTypeFactory if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") datatype_factory = DataTypeFactory() node_alias_annotations = {} - node_aliases_by_node_id = {} for node in nodes: if node.datatype == "semantic": continue @@ -23,9 +21,10 @@ def _generate_annotations(nodes, defer, only, invalid_names, for_resource=True): raise ValueError(f'"{node.alias}" clashes with a model field name.') datatype_instance = datatype_factory.get_instance(node.datatype) - tile_lookup = datatype_instance.get_orm_lookup(node, for_resource=for_resource) - node_alias_annotations[node.alias] = tile_lookup - node_aliases_by_node_id[str(node.pk)] = node.alias + tile_values_query = datatype_instance.get_values_query( + node, outer_ref=outer_ref + ) + node_alias_annotations[node.alias] = tile_values_query if not node_alias_annotations: raise ValueError("All fields were excluded.") @@ -33,7 +32,7 @@ def _generate_annotations(nodes, defer, only, invalid_names, for_resource=True): if given_alias not in node_alias_annotations: raise ValueError(f'"{given_alias}" is not a valid node alias.') - return node_alias_annotations, node_aliases_by_node_id + return node_alias_annotations class TileQuerySet(models.QuerySet): @@ -46,8 +45,8 @@ def _root_node_for_nodegroup(graph_slug, root_node_alias): .select_related("nodegroup") .prefetch_related("nodegroup__node_set") # Prefetching to a depth of 2 seems like a good trade-off for now. - .prefetch_related("nodegroup__nodegroup_set") - .prefetch_related("nodegroup__nodegroup_set__nodegroup_set") + .prefetch_related("nodegroup__children") + .prefetch_related("nodegroup__children__children") ) # TODO: make deterministic by checking source_identifier # https://github.com/archesproject/arches/issues/11565 @@ -56,48 +55,63 @@ def _root_node_for_nodegroup(graph_slug, root_node_alias): raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") return ret - def with_node_values(self, nodes, *, defer=None, only=None, depth=1): - from arches.app.models.models import TileModel + def with_node_values( + self, nodes, *, defer=None, only=None, outer_ref=None, depth=1 + ): + # from arches.app.models.models import TileModel - node_alias_annotations, node_aliases_by_node_id = _generate_annotations( + node_alias_annotations = _generate_tile_annotations( nodes, defer=defer, only=only, invalid_names=field_names(self.model), - for_resource=False, + outer_ref=outer_ref, ) - # Prefetch sibling nodes. - prefetches = ["resourceinstance__graph__node_set__nodegroup__node_set"] - if depth: - prefetches.append( - models.Prefetch( - "parenttile", - queryset=TileModel.objects.with_node_values( - nodes, defer=defer, only=only, depth=depth - 1 - ), - ) - ) + prefetches = [] + # TODO: debug this. + # if depth: + # prefetches.append( + # models.Prefetch( + # "parenttile", + # queryset=TileModel.objects.with_node_values( + # nodes, defer=defer, only=only, depth=depth - 1 + # ), + # ) + # ) + + self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] return ( - self.prefetch_related(*prefetches) + self.filter(data__has_any_keys=[n.pk for n in self._fetched_nodes]) + .prefetch_related(*prefetches) .annotate( **node_alias_annotations, ) - .annotate( - _fetched_nodes=models.Value( - node_aliases_by_node_id, - output_field=models.JSONField(), - ) - ) .order_by("sortorder") ) def as_nodegroup(self, root_node_alias, *, graph_slug, defer=None, only=None): + """ + Entry point for filtering arches data by nodegroups (instead of grouping by + resource.) + + >>> statements = TileModel.objects.as_nodegroup("statement", graph_slug="concept") + >>> results = statements.filter(statement_content__0__en__value__startswith="F") # todo: make more ergonomic, remove limitation of 0 + >>> for result in results: + print(result.resourceinstance) + print("\t", result.statement_content[0]["en"]["value"]) # TODO: unwrap/string viewmodel + + + Fluorescence stimulated by x-rays; ... + + Fine-quality calf or lamb parchment ... + """ + root_node = self._root_node_for_nodegroup(graph_slug, root_node_alias) def accumulate_nodes_below(nodegroup, acc): acc.extend(list(nodegroup.node_set.all())) - for child_nodegroup in nodegroup.nodegroup_set.all(): + for child_nodegroup in nodegroup.children.all(): accumulate_nodes_below(child_nodegroup, acc) branch_nodes = [] @@ -105,15 +119,15 @@ def accumulate_nodes_below(nodegroup, acc): return ( self.filter(nodegroup_id=root_node.pk) - .with_node_values(branch_nodes, defer=defer, only=only) + .with_node_values( + branch_nodes, defer=defer, only=only, outer_ref="resourceinstance_id" + ) .annotate(_nodegroup_alias=models.Value(root_node_alias)) ) def _prefetch_related_objects(self): """Call datatype to_python() methods when materializing the QuerySet. Discard annotations that do not pertain to this tile. - TODO: determine if having these is useful for shallow filtering, - or if we can shave off in some upper layer. """ from arches.app.datatypes.datatypes import DataTypeFactory @@ -122,58 +136,69 @@ def _prefetch_related_objects(self): datatype_factory = DataTypeFactory() NOT_PROVIDED = object() for tile in self._result_cache: - root_node = None - for node in tile.resourceinstance.graph.node_set.all(): - if node.alias == tile.nodegroup_alias: - root_node = node - if not root_node: - continue - - for nodeid, alias in getattr(tile, "_fetched_nodes", {}).items(): - # TODO: evaluate for efficiency re: reshaping _fetched_nodes map - for node in root_node.nodegroup.node_set.all(): - if str(node.pk) != nodeid: - continue - # TODO: debug and remove - assert node.nodegroup_id == tile.nodegroup_id - tile_val = getattr(tile, alias, NOT_PROVIDED) + for node in self._fetched_nodes: + if node.nodegroup_id == tile.nodegroup_id: + tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) - try: - python_val = datatype_instance.to_python(tile_val) - except: - # TODO: some things break because datatype orm lookups - # need to be reoriented around nodegroups (next) - continue - setattr(tile, alias, python_val) + # Immediately coalesce [None] (from ArraySubquery) to []. + if tile_val == [None]: + tile_val = [] + python_val = datatype_instance.to_python(tile_val) + setattr(tile, node.alias, python_val) break + def _clone(self): + ret = super()._clone() + if hasattr(self, "_fetched_nodes"): + ret._fetched_nodes = self._fetched_nodes + return ret + class ResourceInstanceQuerySet(models.QuerySet): def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: - >>> ResourceInstance.objects.with_tiles("concept") + >>> concepts = ResourceInstance.objects.with_tiles("concept") With slightly fewer keystrokes: - >>> ResourceInstance.as_model("concept") + >>> concepts = ResourceInstance.as_model("concept") Or with defer/only as in the QuerySet interface: - >>> ResourceInstance.as_model("concept", only=["alias1", "alias2"]) + >>> partial_concepts = ResourceInstance.as_model("concept", only=["n1", "n2"]) Example: - >>> concepts = ResourceInstance.as_model("concepts") - >>> result = concepts.filter(my_node_alias="some tile value") - >>> result.first().my_node_alias - "some tile value" + >>> from arches.app.models.models import * + >>> concepts = ResourceInstance.as_model("concept") + + Django QuerySet methods are available for efficient queries: + >>> concepts.count() + 785 + + Filter on any nested node at the top level ("shallow query") + + >>> subset = concepts.filter(statement_content__isnull=False)[:4] + + Access through nodegroup names: + + >>> for concept in subset: + print(concept) + for stmt in concept.statement: # TODO: should name with _set (?) + print("\t", stmt) + print("\t\t", stmt.statement_content) + + + + [{'en': {'value': 'Method of acquiring property ... + ... Provisional edits are completely ignored. """ - from arches.app.models.models import GraphModel, TileModel + from arches.app.models.models import GraphModel, NodeGroup, TileModel if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) @@ -182,62 +207,89 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non slug=graph_slug, source_identifier=None ) try: - source_graph = graph_query.prefetch_related("node_set__nodegroup").get() + # Prefetch sibling nodes for use in _prefetch_related_objects() + source_graph = graph_query.prefetch_related( + "node_set__nodegroup__node_set" + ).get() except GraphModel.DoesNotExist as e: e.add_note(f"No graph found with slug: {graph_slug}") raise nodes = source_graph.node_set.all() - node_alias_annotations, node_aliases_by_node_id = _generate_annotations( + node_alias_annotations = _generate_tile_annotations( nodes, defer=defer, only=only, invalid_names=field_names(self.model), - for_resource=True, + outer_ref="resourceinstanceid", ) + self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] if resource_ids: qs = self.filter(pk__in=resource_ids) else: qs = self.filter(graph=source_graph) - return ( - qs.prefetch_related( - "graph__node_set__nodegroup", - models.Prefetch( - "tilemodel_set", - queryset=TileModel.objects.with_node_values( - nodes, defer=defer, only=only - ), - to_attr="_pythonic_nodegroups", + return qs.prefetch_related( + "graph__node_set__nodegroup", + models.Prefetch( + "tilemodel_set", + queryset=TileModel.objects.with_node_values( + self._fetched_nodes, + defer=defer, + only=only, + outer_ref="resourceinstance_id", + ).annotate( + cardinality=NodeGroup.objects.filter( + pk=models.OuterRef("nodegroup_id") + ).values("cardinality") ), - ) - .annotate( - **node_alias_annotations, - ) - .annotate( - _fetched_nodes=models.Value( - node_aliases_by_node_id, - output_field=models.JSONField(), - ) - ) + to_attr="_annotated_tiles", + ), + ).annotate( + **node_alias_annotations, ) def _prefetch_related_objects(self): - """Attach annotated tiles to resource instances. + """Attach annotated tiles to resource instances, at the root, by + nodegroup alias. TODO: consider building as a nested structure. Discard annotations only used for shallow filtering. """ super()._prefetch_related_objects() - for resource in self._result_cache: - fetched_nodes = getattr(resource, "_fetched_nodes", {}) - for fetched_alias in fetched_nodes.values(): - delattr(resource, fetched_alias) + root_nodes = [] + for node in self._fetched_nodes: + # TODO: less roundabout lookup, see earlier siblings prefetch. + root_node = None + for sibling_node in node.nodegroup.node_set.all(): + if sibling_node.pk == node.nodegroup_id: + root_node = sibling_node + break + root_nodes.append(root_node) - annotated_tiles = getattr(resource, "_pythonic_nodegroups", []) + for resource in self._result_cache: + for node in self._fetched_nodes: + delattr(resource, node.alias) + for root_node in root_nodes: + setattr( + resource, + root_node.alias, + None if root_node.nodegroup.cardinality == "1" else [], + ) + annotated_tiles = getattr(resource, "_annotated_tiles", []) for annotated_tile in annotated_tiles: - # TODO: move responsibility for cardinality N compilation to here. - # TODO: remove queries as part of filtering in with_node_values(). - setattr(resource, annotated_tile.nodegroup_alias, annotated_tile) + for root_node in root_nodes: + if root_node.pk == annotated_tile.nodegroup_id: + ng_alias = root_node.alias + break + else: + raise RuntimeError("missing root node for annotated tile") + + if annotated_tile.cardinality == "n": + tile_array = getattr(resource, ng_alias) + tile_array.append(annotated_tile) + else: + setattr(resource, ng_alias, annotated_tile) + if ( annotated_tile.parenttile and annotated_tile.parenttile.nodegroup_alias @@ -247,3 +299,9 @@ def _prefetch_related_objects(self): annotated_tile.parenttile.nodegroup_alias, annotated_tile.parenttile, ) + + def _clone(self): + ret = super()._clone() + if hasattr(self, "_fetched_nodes"): + ret._fetched_nodes = self._fetched_nodes + return ret From 5e1182d64d65e9d8dab7f8381b2ef5d5785c0b00 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 28 Oct 2024 18:08:33 -0400 Subject: [PATCH 049/115] Fix subquery bugs --- arches/app/datatypes/base.py | 31 +----- arches/app/datatypes/datatypes.py | 2 +- arches/app/models/models.py | 4 +- arches/app/models/querysets.py | 168 ++++++++++++++++++------------ 4 files changed, 104 insertions(+), 101 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 62861f7335..664bddd510 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -2,9 +2,6 @@ import logging import urllib -from django.contrib.postgres.expressions import ArraySubquery -from django.db.models import OuterRef, Subquery -from django.db.models.expressions import BaseExpression from django.urls import reverse from django.utils.translation import gettext as _ @@ -539,33 +536,7 @@ def validate_node(self, node): """ pass - def get_values_query(self, node, *, outer_ref=None) -> BaseExpression: - """Return a tile values query expression for use in a - ResourceInstanceQuerySet or TileQuerySet. - - The outer_ref names the resource instance field for use in the - subquery. It is spelled slightly differently when annotating - a Tile or a ResourceInstance. For resource instances, it's - "resourceinstanceid", otherwise "resourceinstance_id". - """ - base_lookup = self._get_base_orm_lookup(node) - - tile_query = models.TileModel.objects.filter(nodegroup_id=node.nodegroup.pk) - if outer_ref: - tile_query = tile_query.filter(resourceinstance_id=OuterRef(outer_ref)) - if node.nodegroup.cardinality == "n": - tile_query = tile_query.order_by("sortorder") - - tile_query = tile_query.values(base_lookup) - - if node.nodegroup.cardinality == "n": - return ArraySubquery(tile_query) - elif outer_ref: - return Subquery(tile_query) - else: - return tile_query - - def _get_base_orm_lookup(self, node): + def get_base_orm_lookup(self, node): return f"data__{node.pk}" def to_python(self, tile_val): diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 16f74bfc72..083c08df79 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2443,7 +2443,7 @@ def _get_base_orm_lookup(self, node): def to_python(self, tile_val): if tile_val is None: return tile_val - resource_ids = [inner["resourceId"] for inner in tile_val] + resource_ids = [inner["resourceId"] if inner else None for inner in tile_val] return resource_ids diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 47f4057310..dc881cbc76 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1319,7 +1319,7 @@ def save(self, index=False, user=None, **kwargs): ) add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - if getattr(self, "_fetched_nodes", False): + if getattr(self, "_annotated_tiles", False): self._save_tiles_for_pythonic_model(index=index, **kwargs) self.save_edit(user=user) else: @@ -1327,7 +1327,7 @@ def save(self, index=False, user=None, **kwargs): def clean(self): """Raises a compound ValidationError with any failing tile values.""" - if getattr(self, "_fetched_nodes", False): + if getattr(self, "_annotated_tiles", False): nodegroups = ( NodeGroup.objects.filter(node__graph=self.graph) .distinct() diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index b9d455e198..ad31874c98 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,41 +1,11 @@ -from django.db import models +from django.contrib.postgres.expressions import ArraySubquery +from django.db.models import OuterRef, Prefetch, QuerySet, Subquery, Value +from django.db.models.expressions import BaseExpression from arches.app.models.utils import field_names -def _generate_tile_annotations(nodes, defer, only, invalid_names, outer_ref=None): - from arches.app.datatypes.datatypes import DataTypeFactory - - if defer and only and (overlap := set(defer).intersection(set(only))): - raise ValueError(f"Got intersecting defer/only args: {overlap}") - datatype_factory = DataTypeFactory() - node_alias_annotations = {} - for node in nodes: - if node.datatype == "semantic": - continue - if node.nodegroup_id is None: - continue - if (defer and node.alias in defer) or (only and node.alias not in only): - continue - if node.alias in invalid_names: - raise ValueError(f'"{node.alias}" clashes with a model field name.') - - datatype_instance = datatype_factory.get_instance(node.datatype) - tile_values_query = datatype_instance.get_values_query( - node, outer_ref=outer_ref - ) - node_alias_annotations[node.alias] = tile_values_query - - if not node_alias_annotations: - raise ValueError("All fields were excluded.") - for given_alias in only or []: - if given_alias not in node_alias_annotations: - raise ValueError(f'"{given_alias}" is not a valid node alias.') - - return node_alias_annotations - - -class TileQuerySet(models.QuerySet): +class TileQuerySet(QuerySet): @staticmethod def _root_node_for_nodegroup(graph_slug, root_node_alias): from arches.app.models.models import Node @@ -48,37 +18,42 @@ def _root_node_for_nodegroup(graph_slug, root_node_alias): .prefetch_related("nodegroup__children") .prefetch_related("nodegroup__children__children") ) - # TODO: make deterministic by checking source_identifier + # TODO: get last # https://github.com/archesproject/arches/issues/11565 - ret = qs.last() + ret = qs.filter(source_identifier=None).first() if ret is None: raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") return ret def with_node_values( - self, nodes, *, defer=None, only=None, outer_ref=None, depth=1 + self, nodes, *, defer=None, only=None, lhs=None, outer_ref, depth=1 ): - # from arches.app.models.models import TileModel + from arches.app.models.models import TileModel node_alias_annotations = _generate_tile_annotations( nodes, defer=defer, only=only, invalid_names=field_names(self.model), + lhs=lhs, outer_ref=outer_ref, ) prefetches = [] - # TODO: debug this. - # if depth: - # prefetches.append( - # models.Prefetch( - # "parenttile", - # queryset=TileModel.objects.with_node_values( - # nodes, defer=defer, only=only, depth=depth - 1 - # ), - # ) - # ) + if depth: + prefetches.append( + Prefetch( + "children", + queryset=TileModel.objects.with_node_values( + nodes, + defer=defer, + only=only, + depth=depth - 1, + lhs="parenttile", + outer_ref="tileid", + ), + ) + ) self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] return ( @@ -120,14 +95,14 @@ def accumulate_nodes_below(nodegroup, acc): return ( self.filter(nodegroup_id=root_node.pk) .with_node_values( - branch_nodes, defer=defer, only=only, outer_ref="resourceinstance_id" + branch_nodes, defer=defer, only=only, lhs="pk", outer_ref="tileid" ) - .annotate(_nodegroup_alias=models.Value(root_node_alias)) + .annotate(_nodegroup_alias=Value(root_node_alias)) ) def _prefetch_related_objects(self): """Call datatype to_python() methods when materializing the QuerySet. - Discard annotations that do not pertain to this tile. + Discard annotations that do not pertain to this nodegroup. """ from arches.app.datatypes.datatypes import DataTypeFactory @@ -141,12 +116,10 @@ def _prefetch_related_objects(self): tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) - # Immediately coalesce [None] (from ArraySubquery) to []. - if tile_val == [None]: - tile_val = [] python_val = datatype_instance.to_python(tile_val) setattr(tile, node.alias, python_val) - break + else: + delattr(tile, node.alias) def _clone(self): ret = super()._clone() @@ -155,7 +128,7 @@ def _clone(self): return ret -class ResourceInstanceQuerySet(models.QuerySet): +class ResourceInstanceQuerySet(QuerySet): def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto node aliases, e.g.: @@ -221,9 +194,11 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non defer=defer, only=only, invalid_names=field_names(self.model), + lhs=None, # TODO: AWKWARD outer_ref="resourceinstanceid", ) self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] + # TODO: there might be some way to prune unused annotations. if resource_ids: qs = self.filter(pk__in=resource_ids) @@ -231,16 +206,17 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non qs = self.filter(graph=source_graph) return qs.prefetch_related( "graph__node_set__nodegroup", - models.Prefetch( + Prefetch( "tilemodel_set", queryset=TileModel.objects.with_node_values( self._fetched_nodes, defer=defer, only=only, - outer_ref="resourceinstance_id", + lhs="pk", + outer_ref="tileid", ).annotate( cardinality=NodeGroup.objects.filter( - pk=models.OuterRef("nodegroup_id") + pk=OuterRef("nodegroup_id") ).values("cardinality") ), to_attr="_annotated_tiles", @@ -290,18 +266,74 @@ def _prefetch_related_objects(self): else: setattr(resource, ng_alias, annotated_tile) - if ( - annotated_tile.parenttile - and annotated_tile.parenttile.nodegroup_alias - ): - setattr( - annotated_tile, - annotated_tile.parenttile.nodegroup_alias, - annotated_tile.parenttile, - ) + for child_tile in annotated_tile.children.all(): + setattr(child_tile, ng_alias, annotated_tile.parenttile) def _clone(self): ret = super()._clone() if hasattr(self, "_fetched_nodes"): ret._fetched_nodes = self._fetched_nodes return ret + + +def _generate_tile_annotations(nodes, *, defer, only, invalid_names, lhs, outer_ref): + from arches.app.datatypes.datatypes import DataTypeFactory + + if defer and only and (overlap := set(defer).intersection(set(only))): + raise ValueError(f"Got intersecting defer/only args: {overlap}") + datatype_factory = DataTypeFactory() + node_alias_annotations = {} + for node in nodes: + if node.datatype == "semantic": + continue + if node.nodegroup_id is None: + continue + if (defer and node.alias in defer) or (only and node.alias not in only): + continue + if node.alias in invalid_names: + raise ValueError(f'"{node.alias}" clashes with a model field name.') + + datatype_instance = datatype_factory.get_instance(node.datatype) + tile_values_query = _get_values_query( + nodegroup=node.nodegroup, + base_lookup=datatype_instance.get_base_orm_lookup(node), + lhs=lhs, + outer_ref=outer_ref, + ) + node_alias_annotations[node.alias] = tile_values_query + + if not node_alias_annotations: + raise ValueError("All fields were excluded.") + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') + + return node_alias_annotations + + +def _get_values_query( + nodegroup, base_lookup, *, lhs=None, outer_ref=None +) -> BaseExpression: + """Return a tile values query expression for use in a + ResourceInstanceQuerySet or TileQuerySet. + """ + from arches.app.models.models import TileModel + + # TODO: make this a little less fragile. + if lhs is None: + tile_query = TileModel.objects.filter( + nodegroup_id=nodegroup.pk, resourceinstance_id=OuterRef(outer_ref) + ) + elif lhs and outer_ref: + tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) + else: + tile_query = TileModel.objects.filter(nodegroup_id=nodegroup.pk) + if nodegroup.cardinality == "n": + tile_query = tile_query.order_by("sortorder") + + tile_query = tile_query.values(base_lookup) + + if outer_ref == "tileid": + return Subquery(tile_query) + else: + return ArraySubquery(tile_query) From 633d9471e1a0fd3e59fe26f3277303fe8cbc7005 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 28 Oct 2024 18:08:45 -0400 Subject: [PATCH 050/115] Initial commit of ArchesModelSerializer TODO: finish re-orienting around nodegroups --- arches/app/models/serializers.py | 134 +++++++++++++++++++++++++++++++ arches/app/views/api/mixins.py | 87 ++++++++++++++++++++ arches/settings.py | 10 +++ pyproject.toml | 1 + releases/8.0.0.md | 17 ++-- 5 files changed, 243 insertions(+), 6 deletions(-) create mode 100644 arches/app/models/serializers.py create mode 100644 arches/app/views/api/mixins.py diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py new file mode 100644 index 0000000000..2cf0ceff00 --- /dev/null +++ b/arches/app/models/serializers.py @@ -0,0 +1,134 @@ +from copy import deepcopy + +from django.contrib.postgres.fields import ArrayField +from django.db.models import fields +from rest_framework import renderers +from rest_framework import serializers + +# from rest_framework.utils import model_meta + +from arches.app.models.models import Node, ResourceInstance, TileModel +from arches.app.utils.betterJSONSerializer import JSONSerializer + + +# Workaround for I18n_string fields +renderers.JSONRenderer.encoder_class = JSONSerializer +renderers.JSONOpenAPIRenderer.encoder_class = JSONSerializer + + +class ArchesTileSerializer(serializers.ModelSerializer): + DATATYPE_FIELD_MAPPING = { + "string": fields.CharField(null=True), # XXX + "number": fields.FloatField(null=True), + "concept": fields.CharField(null=True), + "concept-list": ArrayField(base_field=fields.CharField(), null=True), + "date": fields.CharField(null=True), # XXX + "node-value": fields.CharField(null=True), # XXX + "edtf": fields.CharField(null=True), # XXX + "annotation": fields.CharField(null=True), # XXX + "url": fields.URLField(null=True), + # "resource-instance": ForeignKey(to="self", on_delete=DO_NOTHING, null=True), + "resource-instance": fields.UUIDField(null=True), + "resource-instance-list": ArrayField(base_field=fields.UUIDField(), null=True), + "boolean": fields.BooleanField(null=True), + "domain-value": ArrayField(base_field=fields.CharField(), null=True), + "domain-value-list": ArrayField(base_field=fields.CharField(), null=True), + "non-localized-string": fields.CharField(null=True), + "geojson-feature-collection": fields.CharField(null=True), # XXX + "file-list": ArrayField(base_field=fields.CharField(), null=True), # XXX + # "reference" + } + + def get_default_field_names(self, declared_fields, model_info): + field_names = super().get_default_field_names(declared_fields, model_info) + aliases = self.__class__.Meta.fields + if aliases == "__all__": + aliases = ( + Node.objects.filter( + graph__slug=self.__class__.Meta.graph_slug, + graph__source_identifier=None, + ) + .exclude(nodegroup=None) + .exclude(datatype="semantic") + .values_list("alias", flat=True) + ) + field_names.extend(aliases) + return field_names + + def build_unknown_field(self, field_name, model_class): + graph_slug = self.__class__.Meta.graph_slug + node = ( + Node.objects.filter( + graph__slug=graph_slug, + graph__source_identifier=None, + alias=field_name, + ) + .select_related() + .get() + ) + model_field = deepcopy(self.DATATYPE_FIELD_MAPPING[node.datatype]) + model_field.model = ResourceInstance + model_field.blank = not node.isrequired + + # if isinstance(model_field, ForeignKey) and node.nodegroup.cardinality == "1": + # relation_info = model_meta.RelationInfo( + # model_field=ForeignKey( + # "ResourceInstance", on_delete=DO_NOTHING, blank=model_field.blank, null=True + # ), + # related_model=ResourceInstance, + # to_many=node.datatype == "resource-instance-list", + # to_field=None, + # has_through_model=False, + # reverse=False, + # ) + # return self.build_relational_field(field_name, relation_info) + + if node.nodegroup.cardinality == "n": + model_field = ArrayField( + base_field=model_field, null=True, blank=model_field.blank + ) + model_field.model = ResourceInstance + + return self.build_standard_field(field_name, model_field) + + +class ArchesModelSerializer(serializers.ModelSerializer): + def get_default_field_names(self, declared_fields, model_info): + field_names = super().get_default_field_names(declared_fields, model_info) + aliases = self.__class__.Meta.fields + if aliases == "__all__": + aliases = ( + Node.objects.filter( + graph__slug=self.__class__.Meta.graph_slug, + # TODO: latest + graph__source_identifier=None, + ) + .exclude(nodegroup=None) + .exclude(datatype="semantic") + .values_list("alias", flat=True) + ) + field_names.extend(aliases) + return field_names + + def build_unknown_field(self, field_name, model_class): + graph_slug = self.__class__.Meta.graph_slug + node = ( + Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + alias=field_name, + ) + .select_related() + .get() + ) + model_field = deepcopy(self.DATATYPE_FIELD_MAPPING[node.datatype]) + model_field.model = TileModel + + if node.nodegroup.cardinality == "n": + model_field = ArrayField( + base_field=model_field, null=True, blank=model_field.blank + ) + model_field.model = ResourceInstance + + return self.build_standard_field(field_name, model_field) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py new file mode 100644 index 0000000000..95771b1c70 --- /dev/null +++ b/arches/app/views/api/mixins.py @@ -0,0 +1,87 @@ +from functools import partial + +from django.core.exceptions import ValidationError as DjangoValidationError +from rest_framework.exceptions import NotFound, ValidationError + +from arches.app.models.models import ResourceInstance +from arches.app.utils.permission_backend import ( + user_can_delete_resource, + user_can_edit_resource, + user_can_read_resource, +) + + +class ArchesModelAPIMixin: + def get_queryset(self): + fields = self.serializer_class.Meta.fields + if fields == "__all__": + fields = None + return ResourceInstance.as_model( + self.serializer_class.Meta.graph_slug, only=fields + ) + + def get_object(self, user=None, permission_callable=None): + ret = super().get_object() + if permission_callable and not permission_callable(user=user, resource=ret): + raise NotFound + ret.save = partial(ret.save, user=user) + return ret + + def create(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_edit_resource, + ) + return super().create(request, *args, **kwargs) + + def retrieve(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_read_resource, + ) + return super().retrieve(request, *args, **kwargs) + + def update(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_edit_resource, + ) + return super().update(request, *args, **kwargs) + + def destroy(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_delete_resource, + ) + return super().destroy(request, *args, **kwargs) + + def validate_tile_data_and_save(self, serializer): + """Re-raise ValidationError as DRF ValidationError. + + In 3.0 (2014), DRF decided to stop full_clean()'ing before save(), + which divorces DRF validation needs from model logic needing to + support the Django admin or similar ModelFormish patterns. + The stated reasons were: + - to avoid calling into big & scary full_clean(). + - to force expressing validation logic outside of models. + but adhering to that second point would be difficult in light of + how dynamically these fields are constructed. + + Discussion: + https://github.com/encode/django-rest-framework/discussions/7850 + """ + try: + serializer.save() + except DjangoValidationError as django_error: + # TODO: doesn't handle well inner lists, stringifies them + raise ValidationError(detail=django_error.error_dict) from django_error + + def perform_create(self, serializer): + self.validate_tile_data_and_save(serializer) + + def perform_update(self, serializer): + self.validate_tile_data_and_save(serializer) diff --git a/arches/settings.py b/arches/settings.py index 4645a5dbbc..5a8b239622 100644 --- a/arches/settings.py +++ b/arches/settings.py @@ -373,6 +373,7 @@ "guardian", "captcha", "revproxy", + "rest_framework", "corsheaders", "oauth2_provider", "django_celery_results", @@ -406,6 +407,15 @@ "django_hosts.middleware.HostsResponseMiddleware" ) +# TODO: choose most appropriate default. +REST_FRAMEWORK = { + # Use Django's standard `django.contrib.auth` permissions, + # or allow read-only access for unauthenticated users. + "DEFAULT_PERMISSION_CLASSES": [ + "rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly" + ] +} + WEBPACK_LOADER = { "DEFAULT": { "STATS_FILE": os.path.join(ROOT_DIR, "..", "webpack/webpack-stats.json"), diff --git a/pyproject.toml b/pyproject.toml index d1ab7a1730..9a4dc98361 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "django-recaptcha==3.0.0", "django-revproxy==0.12.0", "django-webpack-loader==2.0.1", + "djangorestframework==3.15.2", "edtf==4.0.1", "elasticsearch>=8.3.1,<9.0.0", "filetype==1.2.0", diff --git a/releases/8.0.0.md b/releases/8.0.0.md index 534dd729c2..9d3379cc64 100644 --- a/releases/8.0.0.md +++ b/releases/8.0.0.md @@ -5,6 +5,8 @@ Arches 8.0.0 Release Notes - 9613 Adds editable_future_graphs and the ability to update Graphs without unpublishing. - 11042 Adds `ResourceInstanceLifecycle`s and `ResourceInstanceLifecycleState`s - Add token-based CSS theming [#11262](https://github.com/archesproject/arches/issues/11262) +- Add `ArchesModelSerializer` for generating human-friendly REST APIs for tile creates, updates, and deletes []() +- Add querying interface for tile data by node alias []() - Support Python 3.13 [#11550](https://github.com/archesproject/arches/pull/11550) ### Performance improvements @@ -39,6 +41,7 @@ Python: openpyxl: 3.1.5 Added: + djangorestframework: 3.15.2 Removed: tomli @@ -80,12 +83,14 @@ JavaScript: 1. Remove "3.10" from the `python-version` matrix in `.github/workflows/main.yml`. -1. In settings.py, add the following key to `DATABASES` to [improve indexing performance](https://github.com/archesproject/arches/issues/11382): - ``` - "OPTIONS": { - "options": "-c cursor_tuple_fraction=1", - }, - ``` +1. In settings.py: + - add the following key to `DATABASES` to [improve indexing performance](https://github.com/archesproject/arches/issues/11382): + ``` + "OPTIONS": { + "options": "-c cursor_tuple_fraction=1", + }, + ``` + - add `rest_framework` to `INSTALLED_APPS` if you wish to use an [ArchesModelSerializer]() to build REST APIs for your resource models. 1. Update your frontend dependencies: ``` From a420ed0c2968e904bb778a47ebcdd72573c8e81c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 09:34:55 -0400 Subject: [PATCH 051/115] Move as_nodegroup() --- arches/app/models/models.py | 63 +++++++++++++++++++++++++++++--- arches/app/models/querysets.py | 67 +++------------------------------- 2 files changed, 63 insertions(+), 67 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index dc881cbc76..43ad911457 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -20,19 +20,15 @@ from django.core import checks from django.core.exceptions import ObjectDoesNotExist from django.db import connection -from django.db.models import JSONField from django.core.exceptions import ValidationError from django.core.serializers.json import DjangoJSONEncoder from django.core.validators import RegexValidator, validate_slug from django.db import transaction from django.db.models import JSONField, Max, Q +from django.db.models import Value as ORMValue from django.db.models.constraints import UniqueConstraint from django.utils import timezone, translation from django.utils.translation import gettext_lazy as _ -from django.contrib.auth.models import User -from django.contrib.auth.models import Group -from django.core.validators import validate_slug -from django.core.exceptions import ValidationError # can't use "arches.app.models.system_settings.SystemSettings" because of circular refernce issue # so make sure the only settings we use in this file are ones that are static (fixed at run time) @@ -1251,7 +1247,7 @@ def __str__(self): @classmethod def as_model(cls, *args, **kwargs): - return cls.objects.with_tiles(*args, **kwargs) + return cls.objects.with_nodegroups(*args, **kwargs) def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): try: @@ -1855,6 +1851,61 @@ def nodegroup_alias(self): return node_for_nodegroup.alias return None + @classmethod + def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): + """ + Entry point for filtering arches data by nodegroups (instead of grouping by + resource.) + + >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") + >>> results = statements.filter(statement_content__0__en__value__startswith="F") # todo: make more ergonomic, remove limitation of 0 + >>> for result in results: + print(result.resourceinstance) + print("\t", result.statement_content[0]["en"]["value"]) # TODO: unwrap/string viewmodel + + + Fluorescence stimulated by x-rays; ... + + Fine-quality calf or lamb parchment ... + """ + + root_node = cls._root_node_for_nodegroup(graph_slug, root_node_alias) + + def accumulate_nodes_below(nodegroup, acc): + acc.extend(list(nodegroup.node_set.all())) + for child_nodegroup in nodegroup.children.all(): + accumulate_nodes_below(child_nodegroup, acc) + + branch_nodes = [] + accumulate_nodes_below(root_node.nodegroup, acc=branch_nodes) + + return ( + cls.objects.filter(nodegroup_id=root_node.pk) + .with_node_values( + branch_nodes, defer=defer, only=only, lhs="pk", outer_ref="tileid" + ) + .annotate(_nodegroup_alias=ORMValue(root_node_alias)) + ) + + @staticmethod + def _root_node_for_nodegroup(graph_slug, root_node_alias): + from arches.app.models.models import Node + + qs = ( + Node.objects.filter(graph__slug=graph_slug, alias=root_node_alias) + .select_related("nodegroup") + .prefetch_related("nodegroup__node_set") + # Prefetching to a depth of 2 seems like a good trade-off for now. + .prefetch_related("nodegroup__children") + .prefetch_related("nodegroup__children__children") + ) + # TODO: get last + # https://github.com/archesproject/arches/issues/11565 + ret = qs.filter(source_identifier=None).first() + if ret is None: + raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") + return ret + def is_fully_provisional(self): return bool(self.provisionaledits and not any(self.data.values())) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index ad31874c98..7aff5f3841 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,30 +1,11 @@ from django.contrib.postgres.expressions import ArraySubquery -from django.db.models import OuterRef, Prefetch, QuerySet, Subquery, Value +from django.db.models import OuterRef, Prefetch, QuerySet, Subquery from django.db.models.expressions import BaseExpression from arches.app.models.utils import field_names class TileQuerySet(QuerySet): - @staticmethod - def _root_node_for_nodegroup(graph_slug, root_node_alias): - from arches.app.models.models import Node - - qs = ( - Node.objects.filter(graph__slug=graph_slug, alias=root_node_alias) - .select_related("nodegroup") - .prefetch_related("nodegroup__node_set") - # Prefetching to a depth of 2 seems like a good trade-off for now. - .prefetch_related("nodegroup__children") - .prefetch_related("nodegroup__children__children") - ) - # TODO: get last - # https://github.com/archesproject/arches/issues/11565 - ret = qs.filter(source_identifier=None).first() - if ret is None: - raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") - return ret - def with_node_values( self, nodes, *, defer=None, only=None, lhs=None, outer_ref, depth=1 ): @@ -65,41 +46,6 @@ def with_node_values( .order_by("sortorder") ) - def as_nodegroup(self, root_node_alias, *, graph_slug, defer=None, only=None): - """ - Entry point for filtering arches data by nodegroups (instead of grouping by - resource.) - - >>> statements = TileModel.objects.as_nodegroup("statement", graph_slug="concept") - >>> results = statements.filter(statement_content__0__en__value__startswith="F") # todo: make more ergonomic, remove limitation of 0 - >>> for result in results: - print(result.resourceinstance) - print("\t", result.statement_content[0]["en"]["value"]) # TODO: unwrap/string viewmodel - - - Fluorescence stimulated by x-rays; ... - - Fine-quality calf or lamb parchment ... - """ - - root_node = self._root_node_for_nodegroup(graph_slug, root_node_alias) - - def accumulate_nodes_below(nodegroup, acc): - acc.extend(list(nodegroup.node_set.all())) - for child_nodegroup in nodegroup.children.all(): - accumulate_nodes_below(child_nodegroup, acc) - - branch_nodes = [] - accumulate_nodes_below(root_node.nodegroup, acc=branch_nodes) - - return ( - self.filter(nodegroup_id=root_node.pk) - .with_node_values( - branch_nodes, defer=defer, only=only, lhs="pk", outer_ref="tileid" - ) - .annotate(_nodegroup_alias=Value(root_node_alias)) - ) - def _prefetch_related_objects(self): """Call datatype to_python() methods when materializing the QuerySet. Discard annotations that do not pertain to this nodegroup. @@ -129,11 +75,13 @@ def _clone(self): class ResourceInstanceQuerySet(QuerySet): - def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=None): + def with_nodegroups( + self, graph_slug=None, *, resource_ids=None, defer=None, only=None + ): """Annotates a ResourceInstance QuerySet with tile data unpacked - and mapped onto node aliases, e.g.: + and mapped onto nodegroup aliases, e.g.: - >>> concepts = ResourceInstance.objects.with_tiles("concept") + >>> concepts = ResourceInstance.objects.with_nodegroups("concept") With slightly fewer keystrokes: @@ -155,9 +103,6 @@ def with_tiles(self, graph_slug=None, *, resource_ids=None, defer=None, only=Non Filter on any nested node at the top level ("shallow query") >>> subset = concepts.filter(statement_content__isnull=False)[:4] - - Access through nodegroup names: - >>> for concept in subset: print(concept) for stmt in concept.statement: # TODO: should name with _set (?) From df9224279262a2ab66bd636c214aaa0da90c5fb0 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 10:07:13 -0400 Subject: [PATCH 052/115] Continue fleshing out tile/instance serializers --- arches/app/models/querysets.py | 55 +++++++++++++------- arches/app/models/serializers.py | 87 ++++++++------------------------ arches/app/views/api/mixins.py | 15 ++++-- 3 files changed, 70 insertions(+), 87 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 7aff5f3841..d69a2ff353 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -15,7 +15,7 @@ def with_node_values( nodes, defer=defer, only=only, - invalid_names=field_names(self.model), + model=self.model, lhs=lhs, outer_ref=outer_ref, ) @@ -87,9 +87,9 @@ def with_nodegroups( >>> concepts = ResourceInstance.as_model("concept") - Or with defer/only as in the QuerySet interface: + Or direct certain nodegroups with defer/only as in the QuerySet interface: - >>> partial_concepts = ResourceInstance.as_model("concept", only=["n1", "n2"]) + >>> partial_concepts = ResourceInstance.as_model("concept", only=["ng1", "ng2"]) Example: @@ -126,6 +126,7 @@ def with_nodegroups( ) try: # Prefetch sibling nodes for use in _prefetch_related_objects() + # and _generate_tile_annotations(). source_graph = graph_query.prefetch_related( "node_set__nodegroup__node_set" ).get() @@ -138,7 +139,7 @@ def with_nodegroups( nodes, defer=defer, only=only, - invalid_names=field_names(self.model), + model=self.model, lhs=None, # TODO: AWKWARD outer_ref="resourceinstanceid", ) @@ -155,8 +156,7 @@ def with_nodegroups( "tilemodel_set", queryset=TileModel.objects.with_node_values( self._fetched_nodes, - defer=defer, - only=only, + only=[n.alias for n in self._fetched_nodes], lhs="pk", outer_ref="tileid", ).annotate( @@ -179,12 +179,9 @@ def _prefetch_related_objects(self): root_nodes = [] for node in self._fetched_nodes: - # TODO: less roundabout lookup, see earlier siblings prefetch. - root_node = None - for sibling_node in node.nodegroup.node_set.all(): - if sibling_node.pk == node.nodegroup_id: - root_node = sibling_node - break + root_node = _find_root_node( + node.nodegroup.node_set.all(), node.nodegroup_id + ) root_nodes.append(root_node) for resource in self._result_cache: @@ -221,20 +218,34 @@ def _clone(self): return ret -def _generate_tile_annotations(nodes, *, defer, only, invalid_names, lhs, outer_ref): +def _generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.models import ResourceInstance, TileModel if defer and only and (overlap := set(defer).intersection(set(only))): raise ValueError(f"Got intersecting defer/only args: {overlap}") datatype_factory = DataTypeFactory() node_alias_annotations = {} + invalid_names = field_names(model) + is_resource = True + if ResourceInstance in model.mro(): + is_resource = True + elif TileModel in model.mro(): + is_resource = False + else: + raise ValueError(model) for node in nodes: if node.datatype == "semantic": continue if node.nodegroup_id is None: continue - if (defer and node.alias in defer) or (only and node.alias not in only): - continue + if is_resource: + root = _find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) + if (defer and root.alias in defer) or (only and root.alias not in only): + continue + else: + if (defer and node.alias in defer) or (only and node.alias not in only): + continue if node.alias in invalid_names: raise ValueError(f'"{node.alias}" clashes with a model field name.') @@ -249,13 +260,21 @@ def _generate_tile_annotations(nodes, *, defer, only, invalid_names, lhs, outer_ if not node_alias_annotations: raise ValueError("All fields were excluded.") - for given_alias in only or []: - if given_alias not in node_alias_annotations: - raise ValueError(f'"{given_alias}" is not a valid node alias.') + # TODO: also add some safety around bad nodegroups. + if not is_resource: + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') return node_alias_annotations +def _find_root_node(prefetched_siblings, nodegroup_id): + for sibling_node in prefetched_siblings: + if sibling_node.pk == nodegroup_id: + return sibling_node + + def _get_values_query( nodegroup, base_lookup, *, lhs=None, outer_ref=None ) -> BaseExpression: diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 2cf0ceff00..fcfb438e8f 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -5,9 +5,7 @@ from rest_framework import renderers from rest_framework import serializers -# from rest_framework.utils import model_meta - -from arches.app.models.models import Node, ResourceInstance, TileModel +from arches.app.models.models import Node from arches.app.utils.betterJSONSerializer import JSONSerializer @@ -20,19 +18,18 @@ class ArchesTileSerializer(serializers.ModelSerializer): DATATYPE_FIELD_MAPPING = { "string": fields.CharField(null=True), # XXX "number": fields.FloatField(null=True), - "concept": fields.CharField(null=True), - "concept-list": ArrayField(base_field=fields.CharField(), null=True), + "concept": fields.UUIDField(null=True), + "concept-list": ArrayField(base_field=fields.UUIDField(), null=True), "date": fields.CharField(null=True), # XXX "node-value": fields.CharField(null=True), # XXX "edtf": fields.CharField(null=True), # XXX "annotation": fields.CharField(null=True), # XXX "url": fields.URLField(null=True), - # "resource-instance": ForeignKey(to="self", on_delete=DO_NOTHING, null=True), "resource-instance": fields.UUIDField(null=True), "resource-instance-list": ArrayField(base_field=fields.UUIDField(), null=True), "boolean": fields.BooleanField(null=True), - "domain-value": ArrayField(base_field=fields.CharField(), null=True), - "domain-value-list": ArrayField(base_field=fields.CharField(), null=True), + "domain-value": ArrayField(base_field=fields.UUIDField(), null=True), + "domain-value-list": ArrayField(base_field=fields.UUIDField(), null=True), "non-localized-string": fields.CharField(null=True), "geojson-feature-collection": fields.CharField(null=True), # XXX "file-list": ArrayField(base_field=fields.CharField(), null=True), # XXX @@ -41,14 +38,25 @@ class ArchesTileSerializer(serializers.ModelSerializer): def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) + try: + field_names.remove("data") + except ValueError: + pass aliases = self.__class__.Meta.fields if aliases == "__all__": - aliases = ( + # TODO: latest graph + root_node = ( Node.objects.filter( graph__slug=self.__class__.Meta.graph_slug, + alias=self.__class__.Meta.root_node, graph__source_identifier=None, ) - .exclude(nodegroup=None) + .select_related("nodegroup") + .prefetch_related("nodegroup__node_set") + .get() + ) + aliases = ( + root_node.nodegroup.node_set.exclude(nodegroup=None) .exclude(datatype="semantic") .values_list("alias", flat=True) ) @@ -67,28 +75,9 @@ def build_unknown_field(self, field_name, model_class): .get() ) model_field = deepcopy(self.DATATYPE_FIELD_MAPPING[node.datatype]) - model_field.model = ResourceInstance + model_field.model = model_class model_field.blank = not node.isrequired - # if isinstance(model_field, ForeignKey) and node.nodegroup.cardinality == "1": - # relation_info = model_meta.RelationInfo( - # model_field=ForeignKey( - # "ResourceInstance", on_delete=DO_NOTHING, blank=model_field.blank, null=True - # ), - # related_model=ResourceInstance, - # to_many=node.datatype == "resource-instance-list", - # to_field=None, - # has_through_model=False, - # reverse=False, - # ) - # return self.build_relational_field(field_name, relation_info) - - if node.nodegroup.cardinality == "n": - model_field = ArrayField( - base_field=model_field, null=True, blank=model_field.blank - ) - model_field.model = ResourceInstance - return self.build_standard_field(field_name, model_field) @@ -96,39 +85,7 @@ class ArchesModelSerializer(serializers.ModelSerializer): def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) aliases = self.__class__.Meta.fields - if aliases == "__all__": - aliases = ( - Node.objects.filter( - graph__slug=self.__class__.Meta.graph_slug, - # TODO: latest - graph__source_identifier=None, - ) - .exclude(nodegroup=None) - .exclude(datatype="semantic") - .values_list("alias", flat=True) - ) - field_names.extend(aliases) + if aliases != "__all__": + raise NotImplementedError # TODO... + field_names.extend(self.__class__.Meta.nodegroups) return field_names - - def build_unknown_field(self, field_name, model_class): - graph_slug = self.__class__.Meta.graph_slug - node = ( - Node.objects.filter( - graph__slug=graph_slug, - # TODO: latest - graph__source_identifier=None, - alias=field_name, - ) - .select_related() - .get() - ) - model_field = deepcopy(self.DATATYPE_FIELD_MAPPING[node.datatype]) - model_field.model = TileModel - - if node.nodegroup.cardinality == "n": - model_field = ArrayField( - base_field=model_field, null=True, blank=model_field.blank - ) - model_field.model = ResourceInstance - - return self.build_standard_field(field_name, model_field) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 95771b1c70..b9094fb8bb 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -3,7 +3,7 @@ from django.core.exceptions import ValidationError as DjangoValidationError from rest_framework.exceptions import NotFound, ValidationError -from arches.app.models.models import ResourceInstance +from arches.app.models.models import ResourceInstance, TileModel from arches.app.utils.permission_backend import ( user_can_delete_resource, user_can_edit_resource, @@ -16,9 +16,16 @@ def get_queryset(self): fields = self.serializer_class.Meta.fields if fields == "__all__": fields = None - return ResourceInstance.as_model( - self.serializer_class.Meta.graph_slug, only=fields - ) + else: + raise NotImplementedError + meta = self.serializer_class.Meta + if ResourceInstance in meta.model.mro(): + return meta.model.as_model(meta.graph_slug, only=meta.nodegroups) + elif TileModel in meta.model.mro(): + return meta.model.as_nodegroup( + meta.root_node, graph_slug=meta.graph_slug, only=fields + ) + raise NotImplementedError def get_object(self, user=None, permission_callable=None): ret = super().get_object() From 5eb6ec9a7511c5587431ab9a583ce14416d729cd Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 10:22:19 -0400 Subject: [PATCH 053/115] Move some helpers to utils --- arches/app/models/querysets.py | 101 ++------------------------------- arches/app/models/utils.py | 90 +++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 95 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index d69a2ff353..444e6db810 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -1,8 +1,6 @@ -from django.contrib.postgres.expressions import ArraySubquery -from django.db.models import OuterRef, Prefetch, QuerySet, Subquery -from django.db.models.expressions import BaseExpression +from django.db.models import OuterRef, Prefetch, QuerySet -from arches.app.models.utils import field_names +from arches.app.models.utils import find_root_node, generate_tile_annotations class TileQuerySet(QuerySet): @@ -11,7 +9,7 @@ def with_node_values( ): from arches.app.models.models import TileModel - node_alias_annotations = _generate_tile_annotations( + node_alias_annotations = generate_tile_annotations( nodes, defer=defer, only=only, @@ -126,7 +124,7 @@ def with_nodegroups( ) try: # Prefetch sibling nodes for use in _prefetch_related_objects() - # and _generate_tile_annotations(). + # and generate_tile_annotations(). source_graph = graph_query.prefetch_related( "node_set__nodegroup__node_set" ).get() @@ -135,7 +133,7 @@ def with_nodegroups( raise nodes = source_graph.node_set.all() - node_alias_annotations = _generate_tile_annotations( + node_alias_annotations = generate_tile_annotations( nodes, defer=defer, only=only, @@ -179,9 +177,7 @@ def _prefetch_related_objects(self): root_nodes = [] for node in self._fetched_nodes: - root_node = _find_root_node( - node.nodegroup.node_set.all(), node.nodegroup_id - ) + root_node = find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) root_nodes.append(root_node) for resource in self._result_cache: @@ -216,88 +212,3 @@ def _clone(self): if hasattr(self, "_fetched_nodes"): ret._fetched_nodes = self._fetched_nodes return ret - - -def _generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): - from arches.app.datatypes.datatypes import DataTypeFactory - from arches.app.models.models import ResourceInstance, TileModel - - if defer and only and (overlap := set(defer).intersection(set(only))): - raise ValueError(f"Got intersecting defer/only args: {overlap}") - datatype_factory = DataTypeFactory() - node_alias_annotations = {} - invalid_names = field_names(model) - is_resource = True - if ResourceInstance in model.mro(): - is_resource = True - elif TileModel in model.mro(): - is_resource = False - else: - raise ValueError(model) - for node in nodes: - if node.datatype == "semantic": - continue - if node.nodegroup_id is None: - continue - if is_resource: - root = _find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) - if (defer and root.alias in defer) or (only and root.alias not in only): - continue - else: - if (defer and node.alias in defer) or (only and node.alias not in only): - continue - if node.alias in invalid_names: - raise ValueError(f'"{node.alias}" clashes with a model field name.') - - datatype_instance = datatype_factory.get_instance(node.datatype) - tile_values_query = _get_values_query( - nodegroup=node.nodegroup, - base_lookup=datatype_instance.get_base_orm_lookup(node), - lhs=lhs, - outer_ref=outer_ref, - ) - node_alias_annotations[node.alias] = tile_values_query - - if not node_alias_annotations: - raise ValueError("All fields were excluded.") - # TODO: also add some safety around bad nodegroups. - if not is_resource: - for given_alias in only or []: - if given_alias not in node_alias_annotations: - raise ValueError(f'"{given_alias}" is not a valid node alias.') - - return node_alias_annotations - - -def _find_root_node(prefetched_siblings, nodegroup_id): - for sibling_node in prefetched_siblings: - if sibling_node.pk == nodegroup_id: - return sibling_node - - -def _get_values_query( - nodegroup, base_lookup, *, lhs=None, outer_ref=None -) -> BaseExpression: - """Return a tile values query expression for use in a - ResourceInstanceQuerySet or TileQuerySet. - """ - from arches.app.models.models import TileModel - - # TODO: make this a little less fragile. - if lhs is None: - tile_query = TileModel.objects.filter( - nodegroup_id=nodegroup.pk, resourceinstance_id=OuterRef(outer_ref) - ) - elif lhs and outer_ref: - tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) - else: - tile_query = TileModel.objects.filter(nodegroup_id=nodegroup.pk) - if nodegroup.cardinality == "n": - tile_query = tile_query.order_by("sortorder") - - tile_query = tile_query.values(base_lookup) - - if outer_ref == "tileid": - return Subquery(tile_query) - else: - return ArraySubquery(tile_query) diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index 546d278e65..1d419aabd4 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -1,3 +1,8 @@ +from django.contrib.postgres.expressions import ArraySubquery +from django.db.models import OuterRef, Subquery +from django.db.models.expressions import BaseExpression + + def add_to_update_fields(kwargs, field_name): """ Update the `update_field` arg inside `kwargs` (if present) in-place @@ -16,3 +21,88 @@ def add_to_update_fields(kwargs, field_name): def field_names(instance_or_class): return {f.name for f in instance_or_class._meta.fields} + + +def generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): + from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.models import ResourceInstance, TileModel + + if defer and only and (overlap := set(defer).intersection(set(only))): + raise ValueError(f"Got intersecting defer/only args: {overlap}") + datatype_factory = DataTypeFactory() + node_alias_annotations = {} + invalid_names = field_names(model) + is_resource = True + if ResourceInstance in model.mro(): + is_resource = True + elif TileModel in model.mro(): + is_resource = False + else: + raise ValueError(model) + for node in nodes: + if node.datatype == "semantic": + continue + if node.nodegroup_id is None: + continue + if is_resource: + root = find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) + if (defer and root.alias in defer) or (only and root.alias not in only): + continue + else: + if (defer and node.alias in defer) or (only and node.alias not in only): + continue + if node.alias in invalid_names: + raise ValueError(f'"{node.alias}" clashes with a model field name.') + + datatype_instance = datatype_factory.get_instance(node.datatype) + tile_values_query = get_values_query( + nodegroup=node.nodegroup, + base_lookup=datatype_instance.get_base_orm_lookup(node), + lhs=lhs, + outer_ref=outer_ref, + ) + node_alias_annotations[node.alias] = tile_values_query + + if not node_alias_annotations: + raise ValueError("All fields were excluded.") + # TODO: also add some safety around bad nodegroups. + if not is_resource: + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') + + return node_alias_annotations + + +def find_root_node(prefetched_siblings, nodegroup_id): + for sibling_node in prefetched_siblings: + if sibling_node.pk == nodegroup_id: + return sibling_node + + +def get_values_query( + nodegroup, base_lookup, *, lhs=None, outer_ref=None +) -> BaseExpression: + """Return a tile values query expression for use in a + ResourceInstanceQuerySet or TileQuerySet. + """ + from arches.app.models.models import TileModel + + # TODO: make this a little less fragile. + if lhs is None: + tile_query = TileModel.objects.filter( + nodegroup_id=nodegroup.pk, resourceinstance_id=OuterRef(outer_ref) + ) + elif lhs and outer_ref: + tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) + else: + tile_query = TileModel.objects.filter(nodegroup_id=nodegroup.pk) + if nodegroup.cardinality == "n": + tile_query = tile_query.order_by("sortorder") + + tile_query = tile_query.values(base_lookup) + + if outer_ref == "tileid": + return Subquery(tile_query) + else: + return ArraySubquery(tile_query) From dd5e8074a2598d56a27f58d526145b58ed1d16c4 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 14:20:07 -0400 Subject: [PATCH 054/115] Make further fields blank --- .../11044_make_further_fields_blank.py | 23 +++++++++++++++++++ arches/app/models/models.py | 4 ++-- 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 arches/app/models/migrations/11044_make_further_fields_blank.py diff --git a/arches/app/models/migrations/11044_make_further_fields_blank.py b/arches/app/models/migrations/11044_make_further_fields_blank.py new file mode 100644 index 0000000000..054022cca4 --- /dev/null +++ b/arches/app/models/migrations/11044_make_further_fields_blank.py @@ -0,0 +1,23 @@ +# Generated by Django 5.1.2 on 2024-10-29 11:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11043_tile_nodegroup_add_related_names"), + ] + + operations = [ + migrations.AlterField( + model_name="resourceinstance", + name="resourceinstanceid", + field=models.UUIDField(blank=True, primary_key=True, serialize=False), + ), + migrations.AlterField( + model_name="tilemodel", + name="tileid", + field=models.UUIDField(blank=True, primary_key=True, serialize=False), + ), + ] diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 43ad911457..a3e131fb9e 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1199,7 +1199,7 @@ class Meta: class ResourceInstance(models.Model): - resourceinstanceid = models.UUIDField(primary_key=True) + resourceinstanceid = models.UUIDField(primary_key=True, blank=True) graph = models.ForeignKey(GraphModel, db_column="graphid", on_delete=models.CASCADE) graph_publication = models.ForeignKey( GraphXPublishedGraph, @@ -1803,7 +1803,7 @@ class TileModel(models.Model): # Tile """ - tileid = models.UUIDField(primary_key=True) + tileid = models.UUIDField(primary_key=True, blank=True) resourceinstance = models.ForeignKey( ResourceInstance, db_column="resourceinstanceid", on_delete=models.CASCADE ) From 786334bedffc225c6004641664b44ca079173946 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 14:41:35 -0400 Subject: [PATCH 055/115] Fix additional subquery bugs --- arches/app/models/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index a3e131fb9e..54f072f800 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1858,10 +1858,10 @@ def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): resource.) >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") - >>> results = statements.filter(statement_content__0__en__value__startswith="F") # todo: make more ergonomic, remove limitation of 0 + >>> results = statements.filter(statement_content__en__value__startswith="F") # todo: make more ergonomic >>> for result in results: print(result.resourceinstance) - print("\t", result.statement_content[0]["en"]["value"]) # TODO: unwrap/string viewmodel + print("\t", result.statement_content["en"]["value"]) # TODO: unwrap/string viewmodel Fluorescence stimulated by x-rays; ... From 6b58ec93ac2a07a9c123323c4909038412a7223a Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 17:58:41 -0400 Subject: [PATCH 056/115] Harden concept{list} dt validation against UUIDs --- arches/app/datatypes/concept_types.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 3db5c7aa2d..35b0c40467 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -253,6 +253,8 @@ def validate( return errors def transform_value_for_tile(self, value, **kwargs): + if isinstance(value, uuid.UUID): + return str(value) try: stripped = value.strip() uuid.UUID(stripped) @@ -425,7 +427,10 @@ def validate( if value is not None: validate_concept = DataTypeFactory().get_instance("concept") for v in value: - val = v.strip() + if isinstance(v, uuid.UUID): + val = str(v) + else: + val = v.strip() errors += validate_concept.validate(val, row_number) return errors @@ -433,6 +438,8 @@ def transform_value_for_tile(self, value, **kwargs): ret = [] if not isinstance(value, list): value = [value] + if all(isinstance(inner, uuid.UUID) for inner in value): + return [str(inner) for inner in value] for val in csv.reader(value, delimiter=",", quotechar='"'): lines = [line for line in val] for v in lines: From 0d7a9315f48eceee97d14b0ff03290eaaef52d8c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 29 Oct 2024 18:12:58 -0400 Subject: [PATCH 057/115] Update save machinery --- arches/app/models/models.py | 222 ++++++++++++++----------------- arches/app/models/querysets.py | 3 + arches/app/models/serializers.py | 2 + arches/app/views/api/mixins.py | 1 + 4 files changed, 105 insertions(+), 123 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 54f072f800..9cbab40085 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -6,6 +6,8 @@ import logging import traceback from collections import defaultdict +from itertools import zip_longest +from operator import itemgetter from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename @@ -1315,7 +1317,7 @@ def save(self, index=False, user=None, **kwargs): ) add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - if getattr(self, "_annotated_tiles", False): + if getattr(self, "_fetched_root_nodes", False): self._save_tiles_for_pythonic_model(index=index, **kwargs) self.save_edit(user=user) else: @@ -1323,13 +1325,8 @@ def save(self, index=False, user=None, **kwargs): def clean(self): """Raises a compound ValidationError with any failing tile values.""" - if getattr(self, "_annotated_tiles", False): - nodegroups = ( - NodeGroup.objects.filter(node__graph=self.graph) - .distinct() - .prefetch_related("node_set") - ) - self._update_tiles_from_pythonic_model_values(nodegroups) + if getattr(self, "_fetched_root_nodes", False): + self._update_tiles_from_pythonic_model_values() def _save_tiles_for_pythonic_model(self, index=False, **kwargs): """Raises a compound ValidationError with any failing tile values. @@ -1343,13 +1340,8 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): from arches.app.models.tile import Tile datatype_factory = DataTypeFactory() - nodegroups = ( - NodeGroup.objects.filter(node__graph=self.graph) - .distinct() - .prefetch_related("node_set") - ) - to_insert, to_update, to_delete = self._update_tiles_from_pythonic_model_values( - nodegroups + to_insert, to_update, to_delete = ( + self._update_tiles_from_pythonic_model_values() ) # Instantiate proxy models for now, but find a way to expose this @@ -1381,9 +1373,9 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): proxy_instance._Tile__postSave() for to_update_tile in to_update: - for nodegroup in nodegroups: - if to_update_tile.nodegroup_id == nodegroup.pk: - for node in nodegroup.node_set.all(): + for root_node in self._fetched_root_nodes: + if to_update_tile.nodegroup_id == root_node.nodegroup_id: + for node in root_node.nodegroup.node_set.all(): datatype = datatype_factory.get_instance(node.datatype) datatype.post_tile_save(to_update_tile, str(node.pk)) break @@ -1394,77 +1386,67 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): fields=kwargs.get("update_fields", None), ) - # Instantiate proxy model for now, but refactor & expose this on vanilla model. + # Instantiate proxy model for now, but refactor & expose this on vanilla model + proxy_resource = Resource.objects.get(pk=self.pk) + proxy_resource.save_descriptors() if index: - node_datatypes = {} - for node in self.graph.node_set.all(): - node_datatypes[str(node.pk)] = node.datatype - - proxy = Resource.objects.get(pk=self.pk) - # Stick the data we already have onto the proxy instance. - proxy.tiles = self._sorted_tiles_for_fetched_nodes - proxy.set_node_datatypes(node_datatypes) - proxy.index(fetchTiles=False) - - def _map_prefetched_tiles_to_nodegroup_ids(self): - tiles_by_nodegroup = defaultdict(list) - for tile_to_update in self._sorted_tiles_for_fetched_nodes: - tiles_by_nodegroup[tile_to_update.nodegroup_id].append(tile_to_update) - return tiles_by_nodegroup - - def _update_tiles_from_pythonic_model_values(self, nodegroups): + proxy_resource.index() + + def _update_tiles_from_pythonic_model_values(self): """Move values from model instance to prefetched tiles, and validate. - Raises ValidationError if new data fails datatype validation (and - thus may leave prefetched tiles in a partially consistent state.) + Raises ValidationError if new data fails datatype validation. """ from arches.app.datatypes.datatypes import DataTypeFactory datatype_factory = DataTypeFactory() - db_tiles_by_nodegroup_id = self._map_prefetched_tiles_to_nodegroup_ids() errors_by_node_alias = defaultdict(list) to_insert = set() to_update = set() to_delete = set() - for nodegroup in nodegroups: - node_aliases = [n.alias for n in nodegroup.node_set.all()] - db_tiles = db_tiles_by_nodegroup_id[nodegroup.pk] - working_tiles = [] - max_tile_length = 0 - for attribute_name in self._fetched_nodes.values(): - if attribute_name not in node_aliases: + NOT_PROVIDED = object() + original_tile_data_by_tile_id = {} + errors_by_node_alias = {} + for root_node in self._fetched_root_nodes: + new_tiles = getattr(self, root_node.alias, NOT_PROVIDED) + if new_tiles is NOT_PROVIDED: + continue + if root_node.nodegroup.cardinality == "1": + new_tiles = [new_tiles] + new_tiles.sort(key=itemgetter("sortorder")) + db_tiles = [ + t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias + ] + for db_tile, new_tile in zip_longest( + db_tiles, new_tiles, fillvalue=NOT_PROVIDED + ): + if new_tile is NOT_PROVIDED: + to_delete.add(db_tile) continue - new_val = getattr(self, attribute_name) - if nodegroup.cardinality == "1" or new_val is None: - new_val = [new_val] - max_tile_length = max(max_tile_length, len(new_val)) - - # TODO: handle saving related objects? - original_tile_data_by_tile_id = {} - for i in range(max(max_tile_length, len(db_tiles))): - try: - tile = db_tiles[i] - except IndexError: - tile = TileModel.get_blank_tile_from_nodegroup( - nodegroup, + if db_tile is NOT_PROVIDED: + new_tile_obj = TileModel.get_blank_tile_from_nodegroup( + nodegroup=root_node.nodegroup, resourceid=self.pk, - # parenttile? + # TODO: ensure this deserializes correctly. + parenttile=getattr(new_tile, "parenttile", None), ) + new_tile_obj._nodegroup_alias = root_node.nodegroup.alias if db_tiles: - tile.sortorder = ( - max(t.sortorder or 0 for t in working_tiles) + 1 - ) - to_insert.add(tile) + db_tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + new_tile_obj._incoming_tile = new_tile + to_insert.add(new_tile_obj) else: - to_update.add(tile) - original_tile_data_by_tile_id[tile.pk] = {**tile.data} - working_tiles.append(tile) - - self._validate_and_patch_from_pythonic_model_values( - nodegroup, working_tiles, errors_by_node_alias - ) + original_tile_data_by_tile_id[db_tile.pk] = {**db_tile.data} + db_tile._incoming_tile = new_tile + to_update.add(db_tile) + + upserts = to_insert | to_update + for tile in upserts: + self._validate_and_patch_from_tile_values( + tile, root_node, errors_by_node_alias + ) - for tile in working_tiles: + for tile in upserts: # TODO: preserve if child tiles? # Remove blank tiles. if not any(tile.data.values()): @@ -1475,7 +1457,7 @@ def _update_tiles_from_pythonic_model_values(self, nodegroups): to_delete.add(tile) # Skip no-op updates. if original_data := original_tile_data_by_tile_id.pop(tile.pk, None): - for node in nodegroup.node_set.all(): + for node in root_node.nodegroup.node_set.all(): if node.datatype == "semantic": continue old = original_data[str(node.nodeid)] @@ -1487,6 +1469,7 @@ def _update_tiles_from_pythonic_model_values(self, nodegroups): to_update.remove(tile) if errors_by_node_alias: + del self._annotated_tiles raise ValidationError( { alias: ValidationError([e["message"] for e in errors]) @@ -1496,70 +1479,63 @@ def _update_tiles_from_pythonic_model_values(self, nodegroups): return to_insert, to_update, to_delete - def _validate_and_patch_from_pythonic_model_values( - self, nodegroup, working_tiles, errors_by_node_alias + def _validate_and_patch_from_tile_values( + self, tile, root_node, errors_by_node_alias ): + """Validate data found on ._incoming_data and move it to .data. + Update errors_by_node_alias in place.""" from arches.app.datatypes.datatypes import DataTypeFactory + NOT_PROVIDED = object() datatype_factory = DataTypeFactory() - for node in nodegroup.node_set.all(): + for node in root_node.nodegroup.node_set.all(): node_id_str = str(node.pk) - if not (attribute_name := self._fetched_nodes.get(node_id_str, "")): + value_to_validate = tile._incoming_tile.get(node.alias, NOT_PROVIDED) + if value_to_validate is NOT_PROVIDED: continue - datatype_instance = datatype_factory.get_instance(node.datatype) - new_val = getattr(self, attribute_name) - if nodegroup.cardinality == "1": - new_val = [new_val] - - for tile, inner_val in zip(working_tiles, new_val, strict=False): - # TODO: move this to Tile.full_clean()? - # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 - transformed = inner_val - if inner_val is not None: - try: - transformed = datatype_instance.transform_value_for_tile( - inner_val, **node.config - ) - except ValueError: # BooleanDataType raises. - pass # validate() will handle. - - # Patch the transformed data into the working tiles. - tile.data[node_id_str] = transformed - - datatype_instance.clean(tile, node_id_str) - - if errors := datatype_instance.validate(transformed, node=node): - errors_by_node_alias[node.alias].extend(errors) - - try: - datatype_instance.pre_tile_save(tile, node_id_str) - except TypeError: # GeoJSONDataType raises. - errors_by_node_alias[node.alias].append( - datatype_instance.create_error_message( - tile.data[node_id_str], None, None, None - ) - ) + # TODO: move this to Tile.full_clean()? + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 + if value_to_validate is None: + tile.data[node_id_str] = None + continue + try: + transformed = datatype_instance.transform_value_for_tile( + value_to_validate, **node.config + ) + except ValueError: # BooleanDataType raises. + # validate() will handle. + transformed = value_to_validate + + # Patch the transformed data into the working tiles. + tile.data[node_id_str] = transformed + + datatype_instance.clean(tile, node_id_str) + + if errors := datatype_instance.validate(transformed, node=node): + errors_by_node_alias[node.alias].extend(errors) - for extra_tile in working_tiles[len(new_val) :]: - extra_tile.data[node_id_str] = None + try: + datatype_instance.pre_tile_save(tile, node_id_str) + except TypeError: # GeoJSONDataType raises. + errors_by_node_alias[node.alias].append( + datatype_instance.create_error_message( + tile.data[node_id_str], None, None, None + ) + ) def refresh_from_db(self, using=None, fields=None, from_queryset=None): - if not from_queryset and (field_map := getattr(self, "_fetched_nodes", [])): - from_queryset = self.__class__.as_model( - self.graph.slug, only=field_map.values() - ) + if not from_queryset and ( + root_nodes := getattr(self, "_fetched_root_nodes", set()) + ): + aliases = [n.alias for n in root_nodes] + from_queryset = self.__class__.as_model(self.graph.slug, only=aliases) super().refresh_from_db(using, fields, from_queryset) # Copy over annotations. refreshed_resource = from_queryset[0] for field in itertools.chain( - field_map.values(), - # TODO: move to constant - ( - "_fetched_nodes", - "_annotated_tiles", - "_sorted_tiles_for_fetched_nodes", - ), + aliases, + ("_fetched_root_nodes", "_annotated_tiles"), ): setattr(self, field, getattr(refreshed_resource, field)) else: diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 444e6db810..fbd658617b 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -172,6 +172,7 @@ def _prefetch_related_objects(self): """Attach annotated tiles to resource instances, at the root, by nodegroup alias. TODO: consider building as a nested structure. Discard annotations only used for shallow filtering. + Memoize fetched root node aliases. """ super()._prefetch_related_objects() @@ -181,6 +182,7 @@ def _prefetch_related_objects(self): root_nodes.append(root_node) for resource in self._result_cache: + resource._fetched_root_nodes = set() for node in self._fetched_nodes: delattr(resource, node.alias) for root_node in root_nodes: @@ -189,6 +191,7 @@ def _prefetch_related_objects(self): root_node.alias, None if root_node.nodegroup.cardinality == "1" else [], ) + resource._fetched_root_nodes.add(root_node) annotated_tiles = getattr(resource, "_annotated_tiles", []) for annotated_tile in annotated_tiles: for root_node in root_nodes: diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index fcfb438e8f..cffd86dda4 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -36,6 +36,8 @@ class ArchesTileSerializer(serializers.ModelSerializer): # "reference" } + tileid = serializers.UUIDField(validators=[]) + def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) try: diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index b9094fb8bb..8afedbb179 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -56,6 +56,7 @@ def update(self, request, *args, **kwargs): user=request.user, permission_callable=user_can_edit_resource, ) + # TODO: return correct response with updated object. return super().update(request, *args, **kwargs) def destroy(self, request, *args, **kwargs): From ebbd7289ac2f182afccdb14e518c504ffbd4cbf8 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 30 Oct 2024 11:48:20 -0400 Subject: [PATCH 058/115] Move ersatz model fields to datatype classes --- arches/app/datatypes/base.py | 4 ++- arches/app/datatypes/concept_types.py | 12 ++++++-- .../datatypes/core/non_localized_string.py | 5 +++- arches/app/datatypes/datatypes.py | 25 +++++++++++++--- arches/app/datatypes/url.py | 3 ++ arches/app/models/serializers.py | 29 ++++--------------- 6 files changed, 45 insertions(+), 33 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 664bddd510..f59122d07b 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -7,12 +7,14 @@ from arches.app.models import models from arches.app.search.elasticsearch_dsl_builder import Dsl, Bool, Terms, Exists, Nested -import logging logger = logging.getLogger(__name__) class BaseDataType(object): + _rest_framework_model_field = None + """Django model field if the datatype were to be a real table column.""" + def __init__(self, model=None): self.datatype_model = model self.datatype_name = model.datatype if model else None diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 35b0c40467..3cbf9840c1 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -2,11 +2,14 @@ import uuid import csv import logging + +from django.contrib.postgres.fields import ArrayField from django.core.exceptions import ObjectDoesNotExist +from django.db.models import fields from django.utils.translation import gettext as _ -from arches.app.models import models -from arches.app.models import concept from django.core.cache import cache + +from arches.app.models import models from arches.app.models.system_settings import settings from arches.app.datatypes.base import BaseDataType from arches.app.datatypes.datatypes import DataTypeFactory, get_value_from_jsonld @@ -32,7 +35,6 @@ from rdflib.namespace import RDF, RDFS, XSD, DC, DCTERMS, SKOS from arches.app.models.concept import ConceptValue from arches.app.models.concept import Concept -from io import StringIO archesproject = Namespace(settings.ARCHES_NAMESPACE_FOR_DATA_EXPORT) cidoc_nm = Namespace("http://www.cidoc-crm.org/cidoc-crm/") @@ -41,6 +43,8 @@ class BaseConceptDataType(BaseDataType): + _rest_framework_model_field = fields.UUIDField(null=True) + def __init__(self, model=None): super(BaseConceptDataType, self).__init__(model=model) self.value_lookup = {} @@ -411,6 +415,8 @@ def ignore_keys(self): class ConceptListDataType(BaseConceptDataType): + _rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + def validate( self, value, diff --git a/arches/app/datatypes/core/non_localized_string.py b/arches/app/datatypes/core/non_localized_string.py index d77e209a6a..0ea50528af 100644 --- a/arches/app/datatypes/core/non_localized_string.py +++ b/arches/app/datatypes/core/non_localized_string.py @@ -1,10 +1,11 @@ +from django.conf import settings +from django.db.models import fields from django.utils.translation import gettext as _ from rdflib import URIRef, Literal, ConjunctiveGraph as Graph from rdflib.namespace import RDF from arches.app.datatypes.base import BaseDataType from arches.app.datatypes.core.util import get_value_from_jsonld -from django.conf import settings from arches.app.search.elasticsearch_dsl_builder import ( Bool, Exists, @@ -18,6 +19,8 @@ class NonLocalizedStringDataType(BaseDataType): + _rest_framework_model_field = fields.CharField(null=True) + def validate( self, value, diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 083c08df79..5688b1fa20 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -47,13 +47,14 @@ from arches.app.search.search_engine_factory import SearchEngineInstance as se from arches.app.search.search_term import SearchTerm from arches.app.search.mappings import RESOURCES_INDEX + +from django.contrib.postgres.fields import ArrayField from django.core.cache import cache from django.core.files import File -from django.core.files.base import ContentFile -from django.core.files.storage import FileSystemStorage, default_storage +from django.core.files.storage import default_storage from django.core.exceptions import ObjectDoesNotExist -from django.core.exceptions import ValidationError -from django.db import connection, transaction +from django.db import connection +from django.db.models import fields from django.utils.translation import get_language, gettext as _ from elasticsearch import Elasticsearch @@ -118,6 +119,8 @@ def get_instance(self, datatype): class StringDataType(BaseDataType): + _rest_framework_model_field = fields.CharField(null=True) + def validate( self, value, @@ -458,6 +461,8 @@ def pre_structure_tile_data(self, tile, nodeid, **kwargs): class NumberDataType(BaseDataType): + _rest_framework_model_field = fields.FloatField(null=True) + def validate( self, value, @@ -588,6 +593,8 @@ def get_search_terms(self, nodevalue, nodeid=None): class BooleanDataType(BaseDataType): + _rest_framework_model_field = fields.BooleanField(null=True) + def validate( self, value, @@ -689,6 +696,8 @@ def default_es_mapping(self): class DateDataType(BaseDataType): + _rest_framework_model_field = fields.DateField(null=True) + def validate( self, value, @@ -900,6 +909,8 @@ def get_display_value(self, tile, node, **kwargs): class EDTFDataType(BaseDataType): + _rest_framework_model_field = fields.CharField(null=True) + def transform_value_for_tile(self, value, **kwargs): transformed_value = ExtendedDateFormat(value) if transformed_value.edtf is None: @@ -1073,6 +1084,8 @@ def default_es_mapping(self): class FileListDataType(BaseDataType): + _rest_framework_model_field = ArrayField(base_field=fields.CharField(), null=True) + def __init__(self, model=None): super(FileListDataType, self).__init__(model=model) self.node_lookup = {} @@ -2029,6 +2042,8 @@ class ResourceInstanceDataType(BaseDataType): """ + _rest_framework_model_field = fields.UUIDField(null=True) + def validate( self, value, @@ -2400,6 +2415,8 @@ def values_match(self, value1, value2): class ResourceInstanceListDataType(ResourceInstanceDataType): + _rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + def to_json(self, tile, node): from arches.app.models.resource import ( Resource, diff --git a/arches/app/datatypes/url.py b/arches/app/datatypes/url.py index b027d88af2..818f95bfa1 100644 --- a/arches/app/datatypes/url.py +++ b/arches/app/datatypes/url.py @@ -27,6 +27,7 @@ from rdflib import ConjunctiveGraph as Graph from rdflib import URIRef, Literal, Namespace from rdflib.namespace import RDF, RDFS, XSD, DC, DCTERMS +from django.db.models import fields from django.utils.translation import gettext as _ archesproject = Namespace(settings.ARCHES_NAMESPACE_FOR_DATA_EXPORT) @@ -70,6 +71,8 @@ class URLDataType(BaseDataType): URL Datatype to store an optionally labelled hyperlink to a (typically) external resource """ + _rest_framework_model_field = fields.URLField(null=True) + URL_REGEX = re.compile( r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" ) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index cffd86dda4..74e2a79b38 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -1,10 +1,9 @@ from copy import deepcopy -from django.contrib.postgres.fields import ArrayField -from django.db.models import fields from rest_framework import renderers from rest_framework import serializers +from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.models import Node from arches.app.utils.betterJSONSerializer import JSONSerializer @@ -15,27 +14,6 @@ class ArchesTileSerializer(serializers.ModelSerializer): - DATATYPE_FIELD_MAPPING = { - "string": fields.CharField(null=True), # XXX - "number": fields.FloatField(null=True), - "concept": fields.UUIDField(null=True), - "concept-list": ArrayField(base_field=fields.UUIDField(), null=True), - "date": fields.CharField(null=True), # XXX - "node-value": fields.CharField(null=True), # XXX - "edtf": fields.CharField(null=True), # XXX - "annotation": fields.CharField(null=True), # XXX - "url": fields.URLField(null=True), - "resource-instance": fields.UUIDField(null=True), - "resource-instance-list": ArrayField(base_field=fields.UUIDField(), null=True), - "boolean": fields.BooleanField(null=True), - "domain-value": ArrayField(base_field=fields.UUIDField(), null=True), - "domain-value-list": ArrayField(base_field=fields.UUIDField(), null=True), - "non-localized-string": fields.CharField(null=True), - "geojson-feature-collection": fields.CharField(null=True), # XXX - "file-list": ArrayField(base_field=fields.CharField(), null=True), # XXX - # "reference" - } - tileid = serializers.UUIDField(validators=[]) def get_default_field_names(self, declared_fields, model_info): @@ -76,7 +54,10 @@ def build_unknown_field(self, field_name, model_class): .select_related() .get() ) - model_field = deepcopy(self.DATATYPE_FIELD_MAPPING[node.datatype]) + datatype = DataTypeFactory().get_instance(node.datatype) + model_field = deepcopy(datatype._rest_framework_model_field) + if model_field is None: + raise NotImplementedError(f"Field missing for datatype: {node.datatype}") model_field.model = model_class model_field.blank = not node.isrequired From d0bfcf008a792ed1d74dc15039b660e4a906ef29 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 30 Oct 2024 13:26:55 -0400 Subject: [PATCH 059/115] Implement single tile updates --- arches/app/models/models.py | 146 ++++++++++++++++++++++++++------- arches/app/models/querysets.py | 1 + 2 files changed, 116 insertions(+), 31 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 9cbab40085..0f9ebb0fe6 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -14,7 +14,7 @@ from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField from arches.app.models.querysets import ResourceInstanceQuerySet, TileQuerySet -from arches.app.models.utils import add_to_update_fields +from arches.app.models.utils import add_to_update_fields, field_names from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string from django.contrib.auth.models import Group, User @@ -1346,13 +1346,16 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): # Instantiate proxy models for now, but find a way to expose this # functionality on vanilla models, and in bulk. - upsert_proxies = [ - Tile.objects.get(pk=tile.pk) for tile in to_insert.union(to_update) - ] + upserts = to_insert | to_update + upsert_proxies = [Tile.objects.get(pk=tile.pk) for tile in upserts] delete_proxies = [Tile.objects.get(pk=tile.pk) for tile in to_delete] with transaction.atomic(): - for proxy_instance in upsert_proxies: + for proxy_instance, vanilla_instance in zip( + upsert_proxies, upserts, strict=True + ): + for field in field_names(vanilla_instance): + setattr(proxy_instance, field, getattr(vanilla_instance, field)) proxy_instance._Tile__preSave() for proxy_instance in delete_proxies: proxy_instance._Tile__preDelete() @@ -1362,22 +1365,22 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): if to_insert: TileModel.objects.bulk_create(to_insert) if to_update: - TileModel.objects.bulk_update(to_update, {"data"}) + TileModel.objects.bulk_update(to_update, {"data", "parenttile"}) if to_delete: TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() super().save(**kwargs) for proxy_instance in upsert_proxies: - proxy_instance.refresh_from_db() + # TODO: determine if needed. proxy_instance.refresh_from_db() proxy_instance._Tile__postSave() - for to_update_tile in to_update: + for upsert_tile in upserts: for root_node in self._fetched_root_nodes: - if to_update_tile.nodegroup_id == root_node.nodegroup_id: + if upsert_tile.nodegroup_id == root_node.nodegroup_id: for node in root_node.nodegroup.node_set.all(): datatype = datatype_factory.get_instance(node.datatype) - datatype.post_tile_save(to_update_tile, str(node.pk)) + datatype.post_tile_save(upsert_tile, str(node.pk)) break # TODO: add unique constraint for TileModel re: sortorder @@ -1396,9 +1399,6 @@ def _update_tiles_from_pythonic_model_values(self): """Move values from model instance to prefetched tiles, and validate. Raises ValidationError if new data fails datatype validation. """ - from arches.app.datatypes.datatypes import DataTypeFactory - - datatype_factory = DataTypeFactory() errors_by_node_alias = defaultdict(list) to_insert = set() to_update = set() @@ -1455,18 +1455,10 @@ def _update_tiles_from_pythonic_model_values(self): else: to_update.remove(tile) to_delete.add(tile) - # Skip no-op updates. - if original_data := original_tile_data_by_tile_id.pop(tile.pk, None): - for node in root_node.nodegroup.node_set.all(): - if node.datatype == "semantic": - continue - old = original_data[str(node.nodeid)] - datatype_instance = datatype_factory.get_instance(node.datatype) - new = tile.data[str(node.nodeid)] - if not datatype_instance.values_match(old, new): - break - else: - to_update.remove(tile) + if ( + original_data := original_tile_data_by_tile_id.pop(tile.pk, None) + ) and tile._tile_update_is_noop(original_data): + to_update.remove(tile) if errors_by_node_alias: del self._annotated_tiles @@ -1479,10 +1471,9 @@ def _update_tiles_from_pythonic_model_values(self): return to_insert, to_update, to_delete - def _validate_and_patch_from_tile_values( - self, tile, root_node, errors_by_node_alias - ): - """Validate data found on ._incoming_data and move it to .data. + @staticmethod + def _validate_and_patch_from_tile_values(tile, root_node, errors_by_node_alias): + """Validate data found on ._incoming_tile and move it to .data. Update errors_by_node_alias in place.""" from arches.app.datatypes.datatypes import DataTypeFactory @@ -1885,7 +1876,7 @@ def _root_node_for_nodegroup(graph_slug, root_node_alias): def is_fully_provisional(self): return bool(self.provisionaledits and not any(self.data.values())) - def save(self, *args, **kwargs): + def save(self, index=False, user=None, **kwargs): if self.sortorder is None or self.is_fully_provisional(): for node in Node.objects.filter(nodegroup_id=self.nodegroup_id).exclude( datatype="semantic" @@ -1902,7 +1893,100 @@ def save(self, *args, **kwargs): if not self.tileid: self.tileid = uuid.uuid4() add_to_update_fields(kwargs, "tileid") - super(TileModel, self).save(*args, **kwargs) # Call the "real" save() method. + + # TODO: check user? + # TOOD: index side effects? + + if getattr(self, "_root_node", False): + self._save_from_pythonic_model_values(**kwargs) + else: + super().save(**kwargs) + + def _save_from_pythonic_model_values(self, index=False, **kwargs): + from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.resource import Resource + from arches.app.models.tile import Tile + + should_save = self._update_tile_from_pythonic_model_values() + if not should_save: + return + + # Instantiate a proxy model and sync data to it, to run all side effects. + # TODO: expose on vanilla model. + proxy = Tile.objects.get(pk=self.pk) + for field in field_names(self): + setattr(proxy, field, getattr(self, field)) + + datatype_factory = DataTypeFactory() + with transaction.atomic(): + proxy._Tile__preSave() + super().save(**kwargs) + proxy._Tile__postSave() + for node in self._root_node.nodegroup.node_set.all(): + datatype = datatype_factory.get_instance(node.datatype) + datatype.post_tile_save(self, str(node.pk)) + + # TODO: add unique constraint for TileModel re: sortorder + self.refresh_from_db( + using=kwargs.get("using", None), + fields=kwargs.get("update_fields", None), + ) + + # TODO: refactor & expose this on vanilla model + proxy_resource = Resource.objects.get(pk=self.resourceinstance_id) + proxy_resource.save_descriptors() + if index: + proxy_resource.index() + + def _update_tile_from_pythonic_model_values(self): + original_data = {**self.data} + + # TODO: this will look different when moving _validate_and_patch_from_tile_values? + self._incoming_tile = {} + model_fields = field_names(self) + for tile_attr, tile_value in vars(self).items(): + if tile_attr.startswith("_") or tile_attr in model_fields: + continue + self._incoming_tile[tile_attr] = tile_value + + errors_by_alias = defaultdict(list) + # TODO: move this somewhere else. + ResourceInstance._validate_and_patch_from_tile_values( + self, self._root_node, errors_by_alias + ) + if not any(self.data.values()): + raise ValidationError(_("Tile is blank.")) + if self._tile_update_is_noop(original_data): + return False + if errors_by_alias: + raise ValidationError( + { + alias: ValidationError([e["message"] for e in errors]) + for alias, errors in errors_by_alias.items() + } + ) + return True + + def _tile_update_is_noop(self, original_data): + """Skipping no-op tile saves avoids regenerating RxR rows, at least + given the current implementation that doesn't serialize them.""" + from arches.app.datatypes.datatypes import DataTypeFactory + + # TODO: this currently prevents you from being able to *only* + # change parenttile and sortorder, but at least for sortorder + # that's probably good. Determine DX here. + + datatype_factory = DataTypeFactory() + for node in self._root_node.nodegroup.node_set.all(): + if node.datatype == "semantic": + continue + old = original_data[str(node.nodeid)] + datatype_instance = datatype_factory.get_instance(node.datatype) + new = self.data[str(node.nodeid)] + if not datatype_instance.values_match(old, new): + return False + + return True def serialize(self, fields=None, exclude=["nodegroup"], **kwargs): return JSONSerializer().handle_model( diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index fbd658617b..3f7dfe84e0 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -57,6 +57,7 @@ def _prefetch_related_objects(self): for tile in self._result_cache: for node in self._fetched_nodes: if node.nodegroup_id == tile.nodegroup_id: + tile._root_node = node tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) From 78bb3d534215d076c2cb289a8b251d88cc9aa220 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 30 Oct 2024 14:45:20 -0400 Subject: [PATCH 060/115] Make StringDataType a JSONField --- arches/app/datatypes/datatypes.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 5688b1fa20..7edcad33a1 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -16,8 +16,6 @@ from datetime import datetime from mimetypes import MimeTypes -from django.core.files.images import get_image_dimensions - from arches.app.const import ExtensionType from arches.app.datatypes.base import BaseDataType from arches.app.models import models @@ -51,15 +49,14 @@ from django.contrib.postgres.fields import ArrayField from django.core.cache import cache from django.core.files import File +from django.core.files.images import get_image_dimensions from django.core.files.storage import default_storage from django.core.exceptions import ObjectDoesNotExist from django.db import connection from django.db.models import fields +from django.db.models.fields.json import JSONField from django.utils.translation import get_language, gettext as _ -from elasticsearch import Elasticsearch -from elasticsearch.exceptions import NotFoundError - # One benefit of shifting to python3.x would be to use # importlib.util.LazyLoader to load rdflib (and other lesser # used but memory soaking libs) @@ -119,7 +116,7 @@ def get_instance(self, datatype): class StringDataType(BaseDataType): - _rest_framework_model_field = fields.CharField(null=True) + _rest_framework_model_field = JSONField(null=True) def validate( self, From b30e6615364b28bf8aef2ef783b206628a987d48 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 30 Oct 2024 16:36:39 -0400 Subject: [PATCH 061/115] Fix example query --- arches/app/models/querysets.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 3f7dfe84e0..1e94432647 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -99,12 +99,14 @@ def with_nodegroups( >>> concepts.count() 785 - Filter on any nested node at the top level ("shallow query") + Filter on any nested node at the top level ("shallow query"). + In this example, statement_content is a cardinality-N node, thus an array. + # TODO: should name with _set (?) - >>> subset = concepts.filter(statement_content__isnull=False)[:4] + >>> subset = concepts.filter(statement_content__len__gt=0)[:4] >>> for concept in subset: print(concept) - for stmt in concept.statement: # TODO: should name with _set (?) + for stmt in concept.statement: print("\t", stmt) print("\t\t", stmt.statement_content) From 6c444e250af444e424abe8ace9ce2a0bd5d1346d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 09:38:27 -0400 Subject: [PATCH 062/115] Fix tests --- arches/app/models/models.py | 4 +++- tests/models/resource_test.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 0f9ebb0fe6..a81a3bac9b 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1988,7 +1988,9 @@ def _tile_update_is_noop(self, original_data): return True - def serialize(self, fields=None, exclude=["nodegroup"], **kwargs): + def serialize( + self, fields=None, exclude=("nodegroup", "nodegroup_alias"), **kwargs + ): return JSONSerializer().handle_model( self, fields=fields, exclude=exclude, **kwargs ) diff --git a/tests/models/resource_test.py b/tests/models/resource_test.py index 30df220784..2c3e8af4d3 100644 --- a/tests/models/resource_test.py +++ b/tests/models/resource_test.py @@ -352,7 +352,8 @@ def test_delete_acts_on_custom_indices(self, mock): other_resource = Resource(pk=uuid.uuid4()) with sync_overridden_test_settings_to_arches(): self.test_resource.delete_index(other_resource.pk) - self.assertIn(str(other_resource.pk), str(mock._mock_call_args)) + # delete_resources() was called with the correct resource id. + self.assertEqual(other_resource.pk, mock._mock_call_args[1]["resources"].pk) def test_publication_restored_on_save(self): """ From ca9af65148a93d6077acd0d28c9e3e2fa389316d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 10:08:51 -0400 Subject: [PATCH 063/115] Quiet output from tests that expect transactions to fail --- tests/models/tile_model_tests.py | 3 ++- tests/views/workflow_tests.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/models/tile_model_tests.py b/tests/models/tile_model_tests.py index e56ad1ba94..f6f55d939a 100644 --- a/tests/models/tile_model_tests.py +++ b/tests/models/tile_model_tests.py @@ -23,6 +23,7 @@ from django.contrib.auth.models import User from django.db.utils import ProgrammingError from django.http import HttpRequest +from django.test.utils import captured_stdout from arches.app.models.graph import Graph from arches.app.models.tile import Tile, TileValidationError from arches.app.models.resource import Resource @@ -394,7 +395,7 @@ def test_tile_cardinality(self): } second_tile = Tile(second_json) - with self.assertRaises(ProgrammingError): + with self.assertRaises(ProgrammingError), captured_stdout(): second_tile.save(index=False, request=request) def test_apply_provisional_edit(self): diff --git a/tests/views/workflow_tests.py b/tests/views/workflow_tests.py index a2b93b29da..5e75935e76 100644 --- a/tests/views/workflow_tests.py +++ b/tests/views/workflow_tests.py @@ -4,6 +4,7 @@ from django.contrib.auth.models import Group, User from django.urls import reverse from django.test.client import Client +from django.test.utils import captured_stdout from arches.app.models.models import WorkflowHistory from tests.base_test import ArchesTestCase @@ -138,7 +139,7 @@ def test_post_workflow_history(self): # Non-superuser cannot update someone else's workflow. self.client.force_login(self.editor) - with self.assertLogs("django.request", level="WARNING"): + with self.assertLogs("django.request", level="WARNING"), captured_stdout(): response = self.client.post( reverse( "workflow_history", From 2a5a7bdd54cb261f598024cbd99a74a20ba98b56 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 10:39:41 -0400 Subject: [PATCH 064/115] Avoid saving resource update edit log entries --- arches/app/models/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index a81a3bac9b..2825e921d0 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1293,6 +1293,9 @@ def save_edit(self, user=None): edit_type = "update" if self._state.adding: edit_type = "create" + else: + return # TODO: should we save an edit log update? + # Until save_edit() is a static method, work around it. ephemeral_proxy_instance = Resource() ephemeral_proxy_instance.graphid = self.graph_id From 3e73bb125fa4b1bf600f868cc63d523422b0ec05 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 10:59:15 -0400 Subject: [PATCH 065/115] Fix parent/child tile attachment --- arches/app/models/querysets.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 1e94432647..90df530e15 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -65,6 +65,11 @@ def _prefetch_related_objects(self): setattr(tile, node.alias, python_val) else: delattr(tile, node.alias) + for child_tile in tile.children.all(): + setattr(child_tile, tile.nodegroup_alias, child_tile.parenttile) + children = getattr(tile, child_tile.nodegroup_alias, []) + children.append(child_tile) + setattr(tile, child_tile.nodegroup_alias, children) def _clone(self): ret = super()._clone() @@ -101,7 +106,7 @@ def with_nodegroups( Filter on any nested node at the top level ("shallow query"). In this example, statement_content is a cardinality-N node, thus an array. - # TODO: should name with _set (?) + # TODO: should name with `_set`? But then would need to check for clashes. >>> subset = concepts.filter(statement_content__len__gt=0)[:4] >>> for concept in subset: @@ -115,6 +120,17 @@ def with_nodegroups( [{'en': {'value': 'Method of acquiring property ... ... + Access child and parent tiles by nodegroup aliases: + + >>> has_child = concepts.filter(statement_data_assignment_statement_content__len__gt=0).first() + >>> has_child + (751614c0-de7a-47d7-8e87-a4d18c7337ff)> + >>> has_child.statement_data_assignment_statement + + >>> parent = has_child.statement[0] + >>> parent.statement_data_assignment_statement[0].statement is parent + True + Provisional edits are completely ignored. """ from arches.app.models.models import GraphModel, NodeGroup, TileModel @@ -122,6 +138,8 @@ def with_nodegroups( if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) else: + # TODO: get latest graph. + # https://github.com/archesproject/arches/issues/11565 graph_query = GraphModel.objects.filter( slug=graph_slug, source_identifier=None ) @@ -207,11 +225,15 @@ def _prefetch_related_objects(self): if annotated_tile.cardinality == "n": tile_array = getattr(resource, ng_alias) tile_array.append(annotated_tile) - else: + elif root_node.nodegroup.parentnodegroup_id is None: setattr(resource, ng_alias, annotated_tile) for child_tile in annotated_tile.children.all(): - setattr(child_tile, ng_alias, annotated_tile.parenttile) + setattr(child_tile, ng_alias, child_tile.parenttile) + children = getattr(annotated_tile, child_tile.nodegroup_alias, []) + if child_tile not in children: + children.append(child_tile) + setattr(annotated_tile, child_tile.nodegroup_alias, children) def _clone(self): ret = super()._clone() From 4f7ec0a9c145719cd6d7b72a0503ef6057e20351 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 11:18:04 -0400 Subject: [PATCH 066/115] Update changelog --- releases/8.0.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/releases/8.0.0.md b/releases/8.0.0.md index 9d3379cc64..6cd8e4fc84 100644 --- a/releases/8.0.0.md +++ b/releases/8.0.0.md @@ -6,7 +6,7 @@ Arches 8.0.0 Release Notes - 11042 Adds `ResourceInstanceLifecycle`s and `ResourceInstanceLifecycleState`s - Add token-based CSS theming [#11262](https://github.com/archesproject/arches/issues/11262) - Add `ArchesModelSerializer` for generating human-friendly REST APIs for tile creates, updates, and deletes []() -- Add querying interface for tile data by node alias []() +- Add interface, REST serializers for querying & updating tile data by node alias [#11595](https://github.com/archesproject/arches/issues/11595) - Support Python 3.13 [#11550](https://github.com/archesproject/arches/pull/11550) ### Performance improvements From f96c07013de364b1c6bdf4a743f4ab8d07fb5bc5 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 13:11:55 -0400 Subject: [PATCH 067/115] Add documentation, reduce queries --- arches/app/datatypes/base.py | 4 + arches/app/datatypes/datatypes.py | 7 +- arches/app/models/models.py | 170 +++++++++++++++++------------- 3 files changed, 103 insertions(+), 78 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index f59122d07b..9b33194514 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -539,6 +539,10 @@ def validate_node(self, node): pass def get_base_orm_lookup(self, node): + """This expression gets the tile data for a specific node. It can be + overridden to extract something more specific, especially where the + node value is JSON and only certain k/v pairs are useful to query. + """ return f"data__{node.pk}" def to_python(self, tile_val): diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 7edcad33a1..04f10867e7 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2396,9 +2396,7 @@ def default_es_mapping(self): return mapping def _get_base_orm_lookup(self, node): - """Immediately unwrap to a single value so that we can depend - on datatypes that do not collect multiple values not being a list. - """ + """Filter down to the resourceId.""" return f"data__{node.pk}__0__resourceId" def values_match(self, value1, value2): @@ -2452,6 +2450,9 @@ def collects_multiple_values(self): return True def _get_base_orm_lookup(self, node): + """Undo the override in ResourceInstanceDataType. TODO: write a better lookup. + Currently the unpacking into UUID[] is done in to_python(), but this isn't + useful for querying.""" return f"data__{node.pk}" def to_python(self, tile_val): diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 2825e921d0..b4bf191291 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1248,8 +1248,12 @@ def __str__(self): return repr(self) @classmethod - def as_model(cls, *args, **kwargs): - return cls.objects.with_nodegroups(*args, **kwargs) + def as_model(cls, graph_slug=None, *, resource_ids=None, defer=None, only=None): + """Return a chainable QuerySet for a requested graph's instances, + with tile data annotated onto node and nodegroup aliases.""" + return cls.objects.with_nodegroups( + graph_slug, resource_ids=resource_ids, defer=defer, only=only + ) def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): try: @@ -1334,9 +1338,11 @@ def clean(self): def _save_tiles_for_pythonic_model(self, index=False, **kwargs): """Raises a compound ValidationError with any failing tile values. - (It's not exactly idiomatic for a Django project to clean() - values during a save(), but the "pythonic models" interface - is basically a form/serializer, so that's why we're validating.) + It's not exactly idiomatic for a Django project to clean() + values during a save(), but we can't easily express this logic + in a "pure" DRF field validator, because: + - the node values are phantom fields. + - we have other entry points besides DRF. """ from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.resource import Resource @@ -1347,11 +1353,11 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): self._update_tiles_from_pythonic_model_values() ) - # Instantiate proxy models for now, but find a way to expose this + # Instantiate proxy models for now, but TODO: expose this # functionality on vanilla models, and in bulk. upserts = to_insert | to_update - upsert_proxies = [Tile.objects.get(pk=tile.pk) for tile in upserts] - delete_proxies = [Tile.objects.get(pk=tile.pk) for tile in to_delete] + upsert_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in upserts]) + delete_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in to_delete]) with transaction.atomic(): for proxy_instance, vanilla_instance in zip( @@ -1363,10 +1369,13 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): for proxy_instance in delete_proxies: proxy_instance._Tile__preDelete() - # TODO: more side effects, e.g. indexing, editlog + # TODO: determine appropriate effects, e.g. indexing, editlog # (use/adapt proxy model methods?) + insert_proxies = TileModel.objects.none() if to_insert: - TileModel.objects.bulk_create(to_insert) + inserted = TileModel.objects.bulk_create(to_insert) + # Pay the cost of TileModel -> Tile transformation until this is moved. + insert_proxies = Tile.objects.filter(pk__in=[t.pk for t in inserted]) if to_update: TileModel.objects.bulk_update(to_update, {"data", "parenttile"}) if to_delete: @@ -1374,8 +1383,7 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): super().save(**kwargs) - for proxy_instance in upsert_proxies: - # TODO: determine if needed. proxy_instance.refresh_from_db() + for proxy_instance in upsert_proxies.difference(insert_proxies): proxy_instance._Tile__postSave() for upsert_tile in upserts: @@ -1386,12 +1394,6 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): datatype.post_tile_save(upsert_tile, str(node.pk)) break - # TODO: add unique constraint for TileModel re: sortorder - self.refresh_from_db( - using=kwargs.get("using", None), - fields=kwargs.get("update_fields", None), - ) - # Instantiate proxy model for now, but refactor & expose this on vanilla model proxy_resource = Resource.objects.get(pk=self.pk) proxy_resource.save_descriptors() @@ -1402,66 +1404,22 @@ def _update_tiles_from_pythonic_model_values(self): """Move values from model instance to prefetched tiles, and validate. Raises ValidationError if new data fails datatype validation. """ + # TODO: put all this state in a helper dataclass to ease passing it around. errors_by_node_alias = defaultdict(list) to_insert = set() to_update = set() to_delete = set() - NOT_PROVIDED = object() original_tile_data_by_tile_id = {} - errors_by_node_alias = {} for root_node in self._fetched_root_nodes: - new_tiles = getattr(self, root_node.alias, NOT_PROVIDED) - if new_tiles is NOT_PROVIDED: - continue - if root_node.nodegroup.cardinality == "1": - new_tiles = [new_tiles] - new_tiles.sort(key=itemgetter("sortorder")) - db_tiles = [ - t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias - ] - for db_tile, new_tile in zip_longest( - db_tiles, new_tiles, fillvalue=NOT_PROVIDED - ): - if new_tile is NOT_PROVIDED: - to_delete.add(db_tile) - continue - if db_tile is NOT_PROVIDED: - new_tile_obj = TileModel.get_blank_tile_from_nodegroup( - nodegroup=root_node.nodegroup, - resourceid=self.pk, - # TODO: ensure this deserializes correctly. - parenttile=getattr(new_tile, "parenttile", None), - ) - new_tile_obj._nodegroup_alias = root_node.nodegroup.alias - if db_tiles: - db_tile.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 - new_tile_obj._incoming_tile = new_tile - to_insert.add(new_tile_obj) - else: - original_tile_data_by_tile_id[db_tile.pk] = {**db_tile.data} - db_tile._incoming_tile = new_tile - to_update.add(db_tile) - - upserts = to_insert | to_update - for tile in upserts: - self._validate_and_patch_from_tile_values( - tile, root_node, errors_by_node_alias - ) - - for tile in upserts: - # TODO: preserve if child tiles? - # Remove blank tiles. - if not any(tile.data.values()): - if tile._state.adding: - to_insert.remove(tile) - else: - to_update.remove(tile) - to_delete.add(tile) - if ( - original_data := original_tile_data_by_tile_id.pop(tile.pk, None) - ) and tile._tile_update_is_noop(original_data): - to_update.remove(tile) + self._update_tile_for_single_node( + root_node, + original_tile_data_by_tile_id, + to_insert, + to_update, + to_delete, + errors_by_node_alias, + ) if errors_by_node_alias: del self._annotated_tiles @@ -1474,6 +1432,68 @@ def _update_tiles_from_pythonic_model_values(self): return to_insert, to_update, to_delete + def _update_tile_for_single_node( + self, + root_node, + original_tile_data_by_tile_id, + to_insert, + to_update, + to_delete, + errors_by_node_alias, + ): + NOT_PROVIDED = object() + + new_tiles = getattr(self, root_node.alias, NOT_PROVIDED) + if new_tiles is NOT_PROVIDED: + return + if root_node.nodegroup.cardinality == "1": + new_tiles = [new_tiles] + new_tiles.sort(key=itemgetter("sortorder")) + db_tiles = [ + t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias + ] + for db_tile, new_tile in zip_longest( + db_tiles, new_tiles, fillvalue=NOT_PROVIDED + ): + if new_tile is NOT_PROVIDED: + to_delete.add(db_tile) + continue + if db_tile is NOT_PROVIDED: + new_tile_obj = TileModel.get_blank_tile_from_nodegroup( + nodegroup=root_node.nodegroup, + resourceid=self.pk, + # TODO: ensure this deserializes correctly. + parenttile=getattr(new_tile, "parenttile", None), + ) + new_tile_obj._nodegroup_alias = root_node.alias + if db_tiles: + new_tile_obj.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + new_tile_obj._incoming_tile = new_tile + to_insert.add(new_tile_obj) + else: + original_tile_data_by_tile_id[db_tile.pk] = {**db_tile.data} + db_tile._incoming_tile = new_tile + to_update.add(db_tile) + + upserts = to_insert | to_update + for tile in upserts: + self._validate_and_patch_from_tile_values( + tile, root_node, errors_by_node_alias + ) + + for tile in upserts: + # Remove blank tiles. + if not any(tile.data.values()) and not tile.children.count(): + if tile._state.adding: + to_insert.remove(tile) + else: + to_update.remove(tile) + to_delete.add(tile) + if ( + original_data := original_tile_data_by_tile_id.pop(tile.pk, None) + ) and tile._tile_update_is_noop(original_data): + to_update.remove(tile) + @staticmethod def _validate_and_patch_from_tile_values(tile, root_node, errors_by_node_alias): """Validate data found on ._incoming_tile and move it to .data. @@ -1828,10 +1848,10 @@ def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): resource.) >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") - >>> results = statements.filter(statement_content__en__value__startswith="F") # todo: make more ergonomic + >>> results = statements.filter(statement_content__en__value__startswith="F") # TODO: make more ergonomic >>> for result in results: print(result.resourceinstance) - print("\t", result.statement_content["en"]["value"]) # TODO: unwrap/string viewmodel + print("\t", result.statement_content["en"]["value"]) # TODO: unwrap? Fluorescence stimulated by x-rays; ... @@ -1935,7 +1955,8 @@ def _save_from_pythonic_model_values(self, index=False, **kwargs): fields=kwargs.get("update_fields", None), ) - # TODO: refactor & expose this on vanilla model + # TODO: refactor & expose this on vanilla model, at which point + # we may want to refresh_from_db() here. proxy_resource = Resource.objects.get(pk=self.resourceinstance_id) proxy_resource.save_descriptors() if index: @@ -1944,7 +1965,6 @@ def _save_from_pythonic_model_values(self, index=False, **kwargs): def _update_tile_from_pythonic_model_values(self): original_data = {**self.data} - # TODO: this will look different when moving _validate_and_patch_from_tile_values? self._incoming_tile = {} model_fields = field_names(self) for tile_attr, tile_value in vars(self).items(): From e143921a8f1c7a19d7b2b16dee5139eb0ffa3d3c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 13:42:25 -0400 Subject: [PATCH 068/115] Point to other docstring --- arches/app/models/querysets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 90df530e15..7c071e56c8 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -7,6 +7,7 @@ class TileQuerySet(QuerySet): def with_node_values( self, nodes, *, defer=None, only=None, lhs=None, outer_ref, depth=1 ): + """TileModel.as_nodegroup() is the better entrypoint, see docs there.""" from arches.app.models.models import TileModel node_alias_annotations = generate_tile_annotations( From fba5031a4783e3d3fe0e6fccc359959442429cd2 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 13:57:37 -0400 Subject: [PATCH 069/115] Remove N+1 queries in build_unknown_field() --- arches/app/models/serializers.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 74e2a79b38..cbe45e537b 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -16,6 +16,8 @@ class ArchesTileSerializer(serializers.ModelSerializer): tileid = serializers.UUIDField(validators=[]) + _nodes = Node.objects.none() + def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) try: @@ -45,15 +47,21 @@ def get_default_field_names(self, declared_fields, model_info): def build_unknown_field(self, field_name, model_class): graph_slug = self.__class__.Meta.graph_slug - node = ( - Node.objects.filter( + if not self._nodes: + self._nodes = Node.objects.filter( graph__slug=graph_slug, + # TODO: latest graph__source_identifier=None, - alias=field_name, ) - .select_related() - .get() - ) + + for node in self._nodes: + if node.alias == field_name: + break + else: + raise Node.DoesNotExist( + f"Node with alias {field_name} not found in graph {graph_slug}" + ) + datatype = DataTypeFactory().get_instance(node.datatype) model_field = deepcopy(datatype._rest_framework_model_field) if model_field is None: From eaf3b27ec22aae2629c40c34e69f2d6e435e0dac Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 31 Oct 2024 15:57:40 -0400 Subject: [PATCH 070/115] Improve ResourceInstance deserialization --- arches/app/datatypes/datatypes.py | 10 ++++++---- arches/app/models/querysets.py | 8 ++------ releases/8.0.0.md | 3 +-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 04f10867e7..df64574bf1 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2245,10 +2245,10 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): - def from_instance(instance): + def from_id_string(uuid_string): nonlocal kwargs return { - "resourceId": str(instance.pk), + "resourceId": uuid_string, "inverseOntology": kwargs.get("inverseOntology", ""), "inverseOntologyProperty": kwargs.get("inverseOntologyProperty", ""), } @@ -2265,11 +2265,13 @@ def from_instance(instance): # data should come in as json but python list is accepted as well if isinstance(value, list): if all(isinstance(inner, models.ResourceInstance) for inner in value): - return [from_instance(instance) for instance in value] + return [from_id_string(str(instance.pk)) for instance in value] + elif all(isinstance(inner, uuid.UUID) for inner in value): + return [from_id_string(str(uid)) for uid in value] else: return value if isinstance(value, models.ResourceInstance): - return [from_instance(value)] + return [from_id_string(str(value.pk))] def transform_export_values(self, value, *args, **kwargs): return json.dumps(value) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 7c071e56c8..f28522c462 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -39,9 +39,7 @@ def with_node_values( return ( self.filter(data__has_any_keys=[n.pk for n in self._fetched_nodes]) .prefetch_related(*prefetches) - .annotate( - **node_alias_annotations, - ) + .annotate(**node_alias_annotations) .order_by("sortorder") ) @@ -186,9 +184,7 @@ def with_nodegroups( ), to_attr="_annotated_tiles", ), - ).annotate( - **node_alias_annotations, - ) + ).annotate(**node_alias_annotations) def _prefetch_related_objects(self): """Attach annotated tiles to resource instances, at the root, by diff --git a/releases/8.0.0.md b/releases/8.0.0.md index 6cd8e4fc84..d770e0ae88 100644 --- a/releases/8.0.0.md +++ b/releases/8.0.0.md @@ -5,8 +5,7 @@ Arches 8.0.0 Release Notes - 9613 Adds editable_future_graphs and the ability to update Graphs without unpublishing. - 11042 Adds `ResourceInstanceLifecycle`s and `ResourceInstanceLifecycleState`s - Add token-based CSS theming [#11262](https://github.com/archesproject/arches/issues/11262) -- Add `ArchesModelSerializer` for generating human-friendly REST APIs for tile creates, updates, and deletes []() -- Add interface, REST serializers for querying & updating tile data by node alias [#11595](https://github.com/archesproject/arches/issues/11595) +- Add query interface, REST serializers for retrieving & updating tile data by node alias [#11595](https://github.com/archesproject/arches/issues/11595) - Support Python 3.13 [#11550](https://github.com/archesproject/arches/pull/11550) ### Performance improvements From 3587f3b8cbd5fe7376d3a538f3f20fc093859cab Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 1 Nov 2024 12:58:14 -0400 Subject: [PATCH 071/115] Improve names --- arches/app/datatypes/base.py | 2 +- arches/app/datatypes/concept_types.py | 4 ++-- .../datatypes/core/non_localized_string.py | 2 +- arches/app/datatypes/datatypes.py | 16 +++++++------- arches/app/datatypes/url.py | 2 +- arches/app/models/models.py | 21 +++++++++---------- arches/app/models/serializers.py | 2 +- 7 files changed, 24 insertions(+), 25 deletions(-) diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index 9b33194514..30381ff8fe 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -12,7 +12,7 @@ class BaseDataType(object): - _rest_framework_model_field = None + rest_framework_model_field = None """Django model field if the datatype were to be a real table column.""" def __init__(self, model=None): diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 3cbf9840c1..2bc2fc8b75 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -43,7 +43,7 @@ class BaseConceptDataType(BaseDataType): - _rest_framework_model_field = fields.UUIDField(null=True) + rest_framework_model_field = fields.UUIDField(null=True) def __init__(self, model=None): super(BaseConceptDataType, self).__init__(model=model) @@ -415,7 +415,7 @@ def ignore_keys(self): class ConceptListDataType(BaseConceptDataType): - _rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) def validate( self, diff --git a/arches/app/datatypes/core/non_localized_string.py b/arches/app/datatypes/core/non_localized_string.py index 0ea50528af..eb5350f1be 100644 --- a/arches/app/datatypes/core/non_localized_string.py +++ b/arches/app/datatypes/core/non_localized_string.py @@ -19,7 +19,7 @@ class NonLocalizedStringDataType(BaseDataType): - _rest_framework_model_field = fields.CharField(null=True) + rest_framework_model_field = fields.CharField(null=True) def validate( self, diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index df64574bf1..4e3033813a 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -116,7 +116,7 @@ def get_instance(self, datatype): class StringDataType(BaseDataType): - _rest_framework_model_field = JSONField(null=True) + rest_framework_model_field = JSONField(null=True) def validate( self, @@ -458,7 +458,7 @@ def pre_structure_tile_data(self, tile, nodeid, **kwargs): class NumberDataType(BaseDataType): - _rest_framework_model_field = fields.FloatField(null=True) + rest_framework_model_field = fields.FloatField(null=True) def validate( self, @@ -590,7 +590,7 @@ def get_search_terms(self, nodevalue, nodeid=None): class BooleanDataType(BaseDataType): - _rest_framework_model_field = fields.BooleanField(null=True) + rest_framework_model_field = fields.BooleanField(null=True) def validate( self, @@ -693,7 +693,7 @@ def default_es_mapping(self): class DateDataType(BaseDataType): - _rest_framework_model_field = fields.DateField(null=True) + rest_framework_model_field = fields.DateField(null=True) def validate( self, @@ -906,7 +906,7 @@ def get_display_value(self, tile, node, **kwargs): class EDTFDataType(BaseDataType): - _rest_framework_model_field = fields.CharField(null=True) + rest_framework_model_field = fields.CharField(null=True) def transform_value_for_tile(self, value, **kwargs): transformed_value = ExtendedDateFormat(value) @@ -1081,7 +1081,7 @@ def default_es_mapping(self): class FileListDataType(BaseDataType): - _rest_framework_model_field = ArrayField(base_field=fields.CharField(), null=True) + rest_framework_model_field = ArrayField(base_field=fields.CharField(), null=True) def __init__(self, model=None): super(FileListDataType, self).__init__(model=model) @@ -2039,7 +2039,7 @@ class ResourceInstanceDataType(BaseDataType): """ - _rest_framework_model_field = fields.UUIDField(null=True) + rest_framework_model_field = fields.UUIDField(null=True) def validate( self, @@ -2412,7 +2412,7 @@ def values_match(self, value1, value2): class ResourceInstanceListDataType(ResourceInstanceDataType): - _rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) def to_json(self, tile, node): from arches.app.models.resource import ( diff --git a/arches/app/datatypes/url.py b/arches/app/datatypes/url.py index 818f95bfa1..71936d287e 100644 --- a/arches/app/datatypes/url.py +++ b/arches/app/datatypes/url.py @@ -71,7 +71,7 @@ class URLDataType(BaseDataType): URL Datatype to store an optionally labelled hyperlink to a (typically) external resource """ - _rest_framework_model_field = fields.URLField(null=True) + rest_framework_model_field = fields.URLField(null=True) URL_REGEX = re.compile( r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" diff --git a/arches/app/models/models.py b/arches/app/models/models.py index b4bf191291..1064c68b46 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1412,7 +1412,7 @@ def _update_tiles_from_pythonic_model_values(self): original_tile_data_by_tile_id = {} for root_node in self._fetched_root_nodes: - self._update_tile_for_single_node( + self._update_tile_for_root_node( root_node, original_tile_data_by_tile_id, to_insert, @@ -1432,7 +1432,7 @@ def _update_tiles_from_pythonic_model_values(self): return to_insert, to_update, to_delete - def _update_tile_for_single_node( + def _update_tile_for_root_node( self, root_node, original_tile_data_by_tile_id, @@ -1480,9 +1480,8 @@ def _update_tile_for_single_node( self._validate_and_patch_from_tile_values( tile, root_node, errors_by_node_alias ) - - for tile in upserts: # Remove blank tiles. + # TODO: also check for unsaved children? if not any(tile.data.values()) and not tile.children.count(): if tile._state.adding: to_insert.remove(tile) @@ -1521,7 +1520,7 @@ def _validate_and_patch_from_tile_values(tile, root_node, errors_by_node_alias): # validate() will handle. transformed = value_to_validate - # Patch the transformed data into the working tiles. + # Patch the transformed data into the tile.data. tile.data[node_id_str] = transformed datatype_instance.clean(tile, node_id_str) @@ -1836,9 +1835,9 @@ def nodegroup(self): def nodegroup_alias(self): if nodegroup_alias := getattr(self, "_nodegroup_alias", None): return nodegroup_alias - if node_for_nodegroup := Node.objects.filter(pk=self.nodegroup_id).first(): - self._nodegroup_alias = node_for_nodegroup.alias - return node_for_nodegroup.alias + if root_node := Node.objects.filter(pk=self.nodegroup_id).first(): + self._nodegroup_alias = root_node.alias + return root_node.alias return None @classmethod @@ -1859,7 +1858,7 @@ def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): Fine-quality calf or lamb parchment ... """ - root_node = cls._root_node_for_nodegroup(graph_slug, root_node_alias) + root_node = cls._root_node(graph_slug, root_node_alias) def accumulate_nodes_below(nodegroup, acc): acc.extend(list(nodegroup.node_set.all())) @@ -1878,7 +1877,7 @@ def accumulate_nodes_below(nodegroup, acc): ) @staticmethod - def _root_node_for_nodegroup(graph_slug, root_node_alias): + def _root_node(graph_slug, root_node_alias): from arches.app.models.models import Node qs = ( @@ -1920,7 +1919,7 @@ def save(self, index=False, user=None, **kwargs): # TODO: check user? # TOOD: index side effects? - if getattr(self, "_root_node", False): + if getattr(self, "_fetched_nodes", False): self._save_from_pythonic_model_values(**kwargs) else: super().save(**kwargs) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index cbe45e537b..1d455d0adf 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -63,7 +63,7 @@ def build_unknown_field(self, field_name, model_class): ) datatype = DataTypeFactory().get_instance(node.datatype) - model_field = deepcopy(datatype._rest_framework_model_field) + model_field = deepcopy(datatype.rest_framework_model_field) if model_field is None: raise NotImplementedError(f"Field missing for datatype: {node.datatype}") model_field.model = model_class From 2840e4a37c8b6667c2eadac07e34b6b3e1378f1b Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 5 Nov 2024 08:53:04 -0500 Subject: [PATCH 072/115] Support nodegroups = "__all__" in serializers --- arches/app/models/serializers.py | 38 ++++++++++++++++++++++++++++++-- arches/app/views/api/mixins.py | 3 ++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 1d455d0adf..7bea958093 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -1,10 +1,11 @@ from copy import deepcopy +from django.db.models import F from rest_framework import renderers from rest_framework import serializers from arches.app.datatypes.datatypes import DataTypeFactory -from arches.app.models.models import Node +from arches.app.models.models import Node, TileModel from arches.app.utils.betterJSONSerializer import JSONSerializer @@ -73,10 +74,43 @@ def build_unknown_field(self, field_name, model_class): class ArchesModelSerializer(serializers.ModelSerializer): + _root_nodes = Node.objects.none() + + def get_fields(self): + graph_slug = self.__class__.Meta.graph_slug + + if self.__class__.Meta.nodegroups == "__all__": + if not self._root_nodes: + self._root_nodes = Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + nodegroup_id=F("nodeid"), + ).select_related("nodegroup") + for root in self._root_nodes: + if root.alias not in self._declared_fields: + + class TileSerializer(ArchesTileSerializer): + class Meta: + model = TileModel + graph_slug = self.__class__.Meta.graph_slug + root_node = root.alias + fields = self.__class__.Meta.fields + + self._declared_fields[root.alias] = TileSerializer( + many=root.nodegroup.cardinality == "n", required=False + ) + + return super().get_fields() + def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) aliases = self.__class__.Meta.fields if aliases != "__all__": raise NotImplementedError # TODO... - field_names.extend(self.__class__.Meta.nodegroups) + nodegroups = self.__class__.Meta.nodegroups + if nodegroups == "__all__": + field_names.extend(self._root_nodes.values_list("alias", flat=True)) + else: + field_names.extend(self.__class__.Meta.nodegroups) return field_names diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 8afedbb179..99fc831443 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -20,7 +20,8 @@ def get_queryset(self): raise NotImplementedError meta = self.serializer_class.Meta if ResourceInstance in meta.model.mro(): - return meta.model.as_model(meta.graph_slug, only=meta.nodegroups) + only = None if meta.nodegroups == "__all__" else meta.nodegroups + return meta.model.as_model(meta.graph_slug, only=only) elif TileModel in meta.model.mro(): return meta.model.as_nodegroup( meta.root_node, graph_slug=meta.graph_slug, only=fields From 5bff63b027446422f33c4c12d3c54597c68bb75f Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 5 Nov 2024 12:13:58 -0500 Subject: [PATCH 073/115] Allow creation with tile data --- arches/app/models/models.py | 22 +++++++++++++++++++--- arches/app/models/serializers.py | 2 +- arches/app/models/utils.py | 9 +++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 1064c68b46..615ac5d26c 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -14,7 +14,11 @@ from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField from arches.app.models.querysets import ResourceInstanceQuerySet, TileQuerySet -from arches.app.models.utils import add_to_update_fields, field_names +from arches.app.models.utils import ( + add_to_update_fields, + field_names, + pop_arches_model_kwargs, +) from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string from django.contrib.auth.models import Group, User @@ -1237,7 +1241,13 @@ class Meta: permissions = (("no_access_to_resourceinstance", "No Access"),) def __init__(self, *args, **kwargs): - super(ResourceInstance, self).__init__(*args, **kwargs) + arches_model_kwargs, other_kwargs = pop_arches_model_kwargs( + kwargs, self._meta.get_fields() + ) + super().__init__(*args, **other_kwargs) + + for kwarg, value in arches_model_kwargs.items(): + setattr(self, kwarg, value) if not self.resourceinstanceid: self.resourceinstanceid = uuid.uuid4() @@ -1817,7 +1827,13 @@ class Meta: db_table = "tiles" def __init__(self, *args, **kwargs): - super(TileModel, self).__init__(*args, **kwargs) + arches_model_kwargs, other_kwargs = pop_arches_model_kwargs( + kwargs, self._meta.get_fields() + ) + super().__init__(*args, **other_kwargs) + + for kwarg, value in arches_model_kwargs.items(): + setattr(self, kwarg, value) if not self.tileid: self.tileid = uuid.uuid4() diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 7bea958093..cd53263aa5 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -15,7 +15,7 @@ class ArchesTileSerializer(serializers.ModelSerializer): - tileid = serializers.UUIDField(validators=[]) + tileid = serializers.UUIDField(validators=[], required=False) _nodes = Node.objects.none() diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index 1d419aabd4..25dbaa3ffb 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -74,6 +74,15 @@ def generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): return node_alias_annotations +def pop_arches_model_kwargs(kwargs, model_fields): + arches_model_data = {} + for kwarg, value in kwargs.items(): + if kwarg not in model_fields: + arches_model_data[kwarg] = value + without_model_data = {k: v for k, v in kwargs.items() if k not in arches_model_data} + return arches_model_data, without_model_data + + def find_root_node(prefetched_siblings, nodegroup_id): for sibling_node in prefetched_siblings: if sibling_node.pk == nodegroup_id: From feb177a3b4f5cc97dcccd5c75f8b5bf15109ddee Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 5 Nov 2024 16:18:04 -0500 Subject: [PATCH 074/115] Check correct private attribute --- arches/app/models/models.py | 2 +- arches/app/models/querysets.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 615ac5d26c..23c02d6b63 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1935,7 +1935,7 @@ def save(self, index=False, user=None, **kwargs): # TODO: check user? # TOOD: index side effects? - if getattr(self, "_fetched_nodes", False): + if getattr(self, "_fetched_root_nodes", False): self._save_from_pythonic_model_values(**kwargs) else: super().save(**kwargs) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index f28522c462..daaad9f408 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -54,9 +54,11 @@ def _prefetch_related_objects(self): datatype_factory = DataTypeFactory() NOT_PROVIDED = object() for tile in self._result_cache: + tile._fetched_root_nodes = set() for node in self._fetched_nodes: if node.nodegroup_id == tile.nodegroup_id: tile._root_node = node + tile._fetched_root_nodes.add(node) tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) From bd44aad575b4897f408c4d1257a448e61052a78b Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 5 Nov 2024 17:40:35 -0500 Subject: [PATCH 075/115] Harden RI transform_value_for_tile --- arches/app/datatypes/datatypes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 4e3033813a..77585baf1c 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2268,6 +2268,8 @@ def from_id_string(uuid_string): return [from_id_string(str(instance.pk)) for instance in value] elif all(isinstance(inner, uuid.UUID) for inner in value): return [from_id_string(str(uid)) for uid in value] + elif all(isinstance(inner, str) for inner in value): + return [from_id_string(uid) for uid in value] else: return value if isinstance(value, models.ResourceInstance): From a705331a4a30033b1df8310f005086484b4584d0 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 6 Nov 2024 11:36:36 -0500 Subject: [PATCH 076/115] Implement provisional edits --- arches/app/models/models.py | 189 +++++++++++++++++++++++++++++---- arches/app/models/querysets.py | 2 +- 2 files changed, 167 insertions(+), 24 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 23c02d6b63..4ff57ef42b 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1335,7 +1335,7 @@ def save(self, index=False, user=None, **kwargs): add_to_update_fields(kwargs, "resource_instance_lifecycle_state") if getattr(self, "_fetched_root_nodes", False): - self._save_tiles_for_pythonic_model(index=index, **kwargs) + self._save_tiles_for_pythonic_model(user=user, index=index, **kwargs) self.save_edit(user=user) else: super().save(**kwargs) @@ -1345,7 +1345,7 @@ def clean(self): if getattr(self, "_fetched_root_nodes", False): self._update_tiles_from_pythonic_model_values() - def _save_tiles_for_pythonic_model(self, index=False, **kwargs): + def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): """Raises a compound ValidationError with any failing tile values. It's not exactly idiomatic for a Django project to clean() @@ -1370,32 +1370,60 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): delete_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in to_delete]) with transaction.atomic(): - for proxy_instance, vanilla_instance in zip( + # Interact with the database in bulk as much as possible, but + # run certain side effects from Tile.save() one-at-a-time until + # proxy model methods can be refactored. Then run in bulk. + for upsert_proxy, vanilla_instance in zip( upsert_proxies, upserts, strict=True ): + upsert_proxy._existing_data = upsert_proxy.data + upsert_proxy._existing_provisionaledits = upsert_proxy.provisionaledits + + # Sync proxy instance fields. for field in field_names(vanilla_instance): - setattr(proxy_instance, field, getattr(vanilla_instance, field)) - proxy_instance._Tile__preSave() - for proxy_instance in delete_proxies: - proxy_instance._Tile__preDelete() + setattr(upsert_proxy, field, getattr(vanilla_instance, field)) + + # Run tile lifecycle updates on proxy instance. + upsert_proxy._Tile__preSave() + upsert_proxy.check_for_missing_nodes() + upsert_proxy.check_for_constraint_violation() + ( + oldprovisionalvalue, + newprovisionalvalue, + provisional_edit_log_details, + ) = vanilla_instance._apply_provisional_edit( + upsert_proxy, + upsert_proxy._existing_data, + upsert_proxy._existing_provisionaledits, + user=user, + ) + # Remember the values needed for the edit log updates later. + upsert_proxy._oldprovisionalvalue = oldprovisionalvalue + upsert_proxy._newprovisionalvalue = newprovisionalvalue + upsert_proxy._provisional_edit_log_details = ( + provisional_edit_log_details + ) + upsert_proxy._existing_data = vanilla_instance.data + + for upsert_proxy in delete_proxies: + upsert_proxy._Tile__preDelete() - # TODO: determine appropriate effects, e.g. indexing, editlog - # (use/adapt proxy model methods?) insert_proxies = TileModel.objects.none() if to_insert: inserted = TileModel.objects.bulk_create(to_insert) - # Pay the cost of TileModel -> Tile transformation until this is moved. + # Pay the cost of a second TileModel -> Tile transform until refactored. + update_proxies = upsert_proxies.difference(insert_proxies) insert_proxies = Tile.objects.filter(pk__in=[t.pk for t in inserted]) + upsert_proxies = update_proxies | insert_proxies if to_update: - TileModel.objects.bulk_update(to_update, {"data", "parenttile"}) + TileModel.objects.bulk_update( + to_update, {"data", "parenttile", "provisionaledits"} + ) if to_delete: TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() super().save(**kwargs) - for proxy_instance in upsert_proxies.difference(insert_proxies): - proxy_instance._Tile__postSave() - for upsert_tile in upserts: for root_node in self._fetched_root_nodes: if upsert_tile.nodegroup_id == root_node.nodegroup_id: @@ -1404,6 +1432,36 @@ def _save_tiles_for_pythonic_model(self, index=False, **kwargs): datatype.post_tile_save(upsert_tile, str(node.pk)) break + for upsert_proxy in upsert_proxies: + upsert_proxy._Tile__postSave() + + # Save edits: could be done in bulk once above side effects are un-proxied. + for upsert_proxy in upsert_proxies: + if self._state.adding: + upsert_proxy.save_edit( + user=user, + edit_type="tile create", + old_value={}, + new_value=upsert_proxy.data, + newprovisionalvalue=upsert_proxy._newprovisionalvalue, + provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, + transaction_id=None, + # TODO: get this information upstream somewhere. + new_resource_created=False, + note=None, + ) + else: + upsert_proxy.save_edit( + user=user, + edit_type="tile edit", + old_value=upsert_proxy._existing_data, + new_value=upsert_proxy.data, + newprovisionalvalue=upsert_proxy._newprovisionalvalue, + oldprovisionalvalue=upsert_proxy._oldprovisionalvalue, + provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, + transaction_id=None, + ) + # Instantiate proxy model for now, but refactor & expose this on vanilla model proxy_resource = Resource.objects.get(pk=self.pk) proxy_resource.save_descriptors() @@ -1488,7 +1546,7 @@ def _update_tile_for_root_node( upserts = to_insert | to_update for tile in upserts: self._validate_and_patch_from_tile_values( - tile, root_node, errors_by_node_alias + tile, root_node=root_node, errors_by_node_alias=errors_by_node_alias ) # Remove blank tiles. # TODO: also check for unsaved children? @@ -1504,7 +1562,7 @@ def _update_tile_for_root_node( to_update.remove(tile) @staticmethod - def _validate_and_patch_from_tile_values(tile, root_node, errors_by_node_alias): + def _validate_and_patch_from_tile_values(tile, *, root_node, errors_by_node_alias): """Validate data found on ._incoming_tile and move it to .data. Update errors_by_node_alias in place.""" from arches.app.datatypes.datatypes import DataTypeFactory @@ -1932,15 +1990,12 @@ def save(self, index=False, user=None, **kwargs): self.tileid = uuid.uuid4() add_to_update_fields(kwargs, "tileid") - # TODO: check user? - # TOOD: index side effects? - if getattr(self, "_fetched_root_nodes", False): - self._save_from_pythonic_model_values(**kwargs) + self._save_from_pythonic_model_values(user=user, index=index, **kwargs) else: super().save(**kwargs) - def _save_from_pythonic_model_values(self, index=False, **kwargs): + def _save_from_pythonic_model_values(self, *, user=None, index=False, **kwargs): from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.resource import Resource from arches.app.models.tile import Tile @@ -1950,21 +2005,62 @@ def _save_from_pythonic_model_values(self, index=False, **kwargs): return # Instantiate a proxy model and sync data to it, to run all side effects. + # Explanation: this is basically Tile.save() but with the serialized + # graph and tile fetching skipped. Hence why we might # TODO: expose on vanilla model. proxy = Tile.objects.get(pk=self.pk) + # TODO: handle create. + # Capture these to avoid re-querying in _apply_provisional_edit(). + existing_data = proxy.data + existing_provisional_edits = proxy.provisionaledits for field in field_names(self): setattr(proxy, field, getattr(self, field)) datatype_factory = DataTypeFactory() with transaction.atomic(): proxy._Tile__preSave() + proxy.check_for_missing_nodes() + proxy.check_for_constraint_violation() + oldprovisionalvalue, newprovisionalvalue, provisional_edit_log_details = ( + self._apply_provisional_edit( + proxy, existing_data, existing_provisional_edits, user=user + ) + ) + super().save(**kwargs) - proxy._Tile__postSave() + for node in self._root_node.nodegroup.node_set.all(): datatype = datatype_factory.get_instance(node.datatype) datatype.post_tile_save(self, str(node.pk)) + proxy._Tile__postSave() + + if self._state.adding: + proxy.save_edit( + user=user, + edit_type="tile create", + old_value={}, + new_value=self.data, + newprovisionalvalue=newprovisionalvalue, + provisional_edit_log_details=provisional_edit_log_details, + transaction_id=None, + # TODO: get this information upstream somewhere. + new_resource_created=False, + note=None, + ) + else: + proxy.save_edit( + user=user, + edit_type="tile edit", + old_value=existing_data, + new_value=self.data, + newprovisionalvalue=newprovisionalvalue, + oldprovisionalvalue=oldprovisionalvalue, + provisional_edit_log_details=provisional_edit_log_details, + transaction_id=None, + ) # TODO: add unique constraint for TileModel re: sortorder + # TODO: determine whether this should be skippable, and how. self.refresh_from_db( using=kwargs.get("using", None), fields=kwargs.get("update_fields", None), @@ -1990,7 +2086,7 @@ def _update_tile_from_pythonic_model_values(self): errors_by_alias = defaultdict(list) # TODO: move this somewhere else. ResourceInstance._validate_and_patch_from_tile_values( - self, self._root_node, errors_by_alias + self, root_node=self._root_node, errors_by_node_alias=errors_by_alias ) if not any(self.data.values()): raise ValidationError(_("Tile is blank.")) @@ -2026,6 +2122,53 @@ def _tile_update_is_noop(self, original_data): return True + def _apply_provisional_edit( + self, proxy, existing_data, existing_provisional_edits, *, user=None + ): + # TODO: decompose this out of Tile.save() and call *that*. + # this section moves the data over from self.data to self.provisionaledits if certain users permissions are in force + # then self.data is restored from the previously saved tile data + from arches.app.models.tile import Tile + from arches.app.utils.permission_backend import user_is_resource_reviewer + + oldprovisionalvalue = None + newprovisionalvalue = None + provisional_edit_log_details = None + creating_new_tile = self._state.adding + existing_instance = Tile( + data={**existing_data} if existing_data else None, + provisional_edits=( + {**existing_provisional_edits} if existing_provisional_edits else None + ), + ) + existing_instance._state.adding = creating_new_tile + if user is not None and not user_is_resource_reviewer(user): + if creating_new_tile: + # the user has previously edited this tile + proxy.apply_provisional_edit( + user, self.data, action="update", existing_model=existing_instance + ) + oldprovisional = proxy.get_provisional_edit(existing_instance, user) + if oldprovisional is not None: + oldprovisionalvalue = oldprovisional["value"] + else: + proxy.apply_provisional_edit(user, data=self.data, action="create") + + newprovisionalvalue = self.data + self.provisionaledits = proxy.provisionaledits + self.data = existing_data + # Also update proxy, which will be used to run further side effects. + proxy.provisionaledits = proxy.provisionaledits + proxy.data = existing_data + + provisional_edit_log_details = { + "user": user, + "provisional_editor": user, + "action": "create tile" if creating_new_tile else "add edit", + } + + return oldprovisionalvalue, newprovisionalvalue, provisional_edit_log_details + def serialize( self, fields=None, exclude=("nodegroup", "nodegroup_alias"), **kwargs ): diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index daaad9f408..2c4ca24d5a 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -132,7 +132,7 @@ def with_nodegroups( >>> parent.statement_data_assignment_statement[0].statement is parent True - Provisional edits are completely ignored. + Provisional edits are completely ignored for the purposes of querying. """ from arches.app.models.models import GraphModel, NodeGroup, TileModel From 7c36facb5074db814423a685a5fae25c7ed866bd Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 6 Nov 2024 11:37:17 -0500 Subject: [PATCH 077/115] Temporarily workaround tile deserialization issues --- arches/app/models/models.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 4ff57ef42b..ba68c07816 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -7,7 +7,7 @@ import traceback from collections import defaultdict from itertools import zip_longest -from operator import itemgetter +from operator import attrgetter, itemgetter from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename @@ -1515,8 +1515,16 @@ def _update_tile_for_root_node( if new_tiles is NOT_PROVIDED: return if root_node.nodegroup.cardinality == "1": - new_tiles = [new_tiles] - new_tiles.sort(key=itemgetter("sortorder")) + if new_tiles is None: + new_tiles = [] + else: + new_tiles = [new_tiles] + if all(isinstance(tile, TileModel) for tile in new_tiles): + new_tiles.sort(key=attrgetter("sortorder")) + else: + # TODO: figure out best layer for this and remove if/else. + # TODO: nullguard or make not nullable. + new_tiles.sort(key=itemgetter("sortorder")) db_tiles = [ t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias ] @@ -1571,7 +1579,13 @@ def _validate_and_patch_from_tile_values(tile, *, root_node, errors_by_node_alia datatype_factory = DataTypeFactory() for node in root_node.nodegroup.node_set.all(): node_id_str = str(node.pk) - value_to_validate = tile._incoming_tile.get(node.alias, NOT_PROVIDED) + # TODO: remove this switch and deserialize this in DRF. + if isinstance(tile._incoming_tile, TileModel): + value_to_validate = getattr( + tile._incoming_tile, node.alias, NOT_PROVIDED + ) + else: + value_to_validate = tile._incoming_tile.get(node.alias, NOT_PROVIDED) if value_to_validate is NOT_PROVIDED: continue datatype_instance = datatype_factory.get_instance(node.datatype) From 0137088f2ae393c38536eca75904ce105445cd96 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 6 Nov 2024 11:38:01 -0500 Subject: [PATCH 078/115] Override TileModel.refresh_from_db() --- arches/app/models/models.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index ba68c07816..e147e440e5 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1626,12 +1626,9 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): aliases = [n.alias for n in root_nodes] from_queryset = self.__class__.as_model(self.graph.slug, only=aliases) super().refresh_from_db(using, fields, from_queryset) - # Copy over annotations. + # Copy over annotations and annotated tiles. refreshed_resource = from_queryset[0] - for field in itertools.chain( - aliases, - ("_fetched_root_nodes", "_annotated_tiles"), - ): + for field in itertools.chain(aliases, ["_annotated_tiles"]): setattr(self, field, getattr(refreshed_resource, field)) else: super().refresh_from_db(using, fields, from_queryset) @@ -2190,6 +2187,26 @@ def serialize( self, fields=fields, exclude=exclude, **kwargs ) + def refresh_from_db(self, using=None, fields=None, from_queryset=None): + if ( + not from_queryset + and (root_nodes := getattr(self, "_fetched_root_nodes", set())) + and self.resourceinstance.graph.slug + ): + aliases = [n.alias for n in root_nodes] + from_queryset = self.__class__.as_nodegroup( + root_node_alias=self._root_node.alias, + graph_slug=self.resourceinstance.graph.slug, + only=aliases, + ) + super().refresh_from_db(using, fields, from_queryset) + # Copy over annotations. + refreshed_tile = from_queryset[0] + for field in aliases: + setattr(self, field, getattr(refreshed_tile, field)) + else: + super().refresh_from_db(using, fields, from_queryset) + @staticmethod def get_blank_tile_from_nodegroup( nodegroup: NodeGroup, resourceid=None, parenttile=None From 449ef943a194d8f36af629f6e14e1018d656226c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 6 Nov 2024 14:03:30 -0500 Subject: [PATCH 079/115] Disallow empty strings for legacyid --- arches/app/models/serializers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index cd53263aa5..07a2b4855f 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -74,6 +74,8 @@ def build_unknown_field(self, field_name, model_class): class ArchesModelSerializer(serializers.ModelSerializer): + legacyid = serializers.CharField(max_length=255, required=False, allow_null=True) + _root_nodes = Node.objects.none() def get_fields(self): From 4011a4838fe6b35065d6c4b502b32b8d61e1f2d4 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 6 Nov 2024 14:04:50 -0500 Subject: [PATCH 080/115] Proof of concept of db_default uuid4() #10958 --- arches/app/models/functions.py | 7 ++++++ .../migrations/11045_generate_resource_ids.py | 24 +++++++++++++++++++ arches/app/models/models.py | 7 +++--- 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 arches/app/models/functions.py create mode 100644 arches/app/models/migrations/11045_generate_resource_ids.py diff --git a/arches/app/models/functions.py b/arches/app/models/functions.py new file mode 100644 index 0000000000..0b9444eb61 --- /dev/null +++ b/arches/app/models/functions.py @@ -0,0 +1,7 @@ +from django.db import models + + +class UUID4(models.Func): + function = "uuid_generate_v4" + arity = 0 + output_field = models.UUIDField() diff --git a/arches/app/models/migrations/11045_generate_resource_ids.py b/arches/app/models/migrations/11045_generate_resource_ids.py new file mode 100644 index 0000000000..0a98b2e44f --- /dev/null +++ b/arches/app/models/migrations/11045_generate_resource_ids.py @@ -0,0 +1,24 @@ +# Generated by Django 5.1.3 on 2024-11-06 12:10 + +import arches.app.models.functions +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11044_make_further_fields_blank"), + ] + + operations = [ + migrations.AlterField( + model_name="resourceinstance", + name="resourceinstanceid", + field=models.UUIDField( + blank=True, + db_default=arches.app.models.functions.UUID4(), + primary_key=True, + serialize=False, + ), + ), + ] diff --git a/arches/app/models/models.py b/arches/app/models/models.py index e147e440e5..115025c4e0 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -13,6 +13,7 @@ from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField +from arches.app.models.functions import UUID4 from arches.app.models.querysets import ResourceInstanceQuerySet, TileQuerySet from arches.app.models.utils import ( add_to_update_fields, @@ -1205,7 +1206,9 @@ class Meta: class ResourceInstance(models.Model): - resourceinstanceid = models.UUIDField(primary_key=True, blank=True) + resourceinstanceid = models.UUIDField( + primary_key=True, blank=True, db_default=UUID4() + ) graph = models.ForeignKey(GraphModel, db_column="graphid", on_delete=models.CASCADE) graph_publication = models.ForeignKey( GraphXPublishedGraph, @@ -1248,8 +1251,6 @@ def __init__(self, *args, **kwargs): for kwarg, value in arches_model_kwargs.items(): setattr(self, kwarg, value) - if not self.resourceinstanceid: - self.resourceinstanceid = uuid.uuid4() def __repr__(self): return f"<{self.graph.name}: {self.name} ({self.pk})>" From 2fb035f6ada8f99dfb0383e707f3ab0ecdd44fdd Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 08:36:12 -0500 Subject: [PATCH 081/115] Improve peformance of browsable API --- ...make_tile_resourceinstanceid_uneditable.py | 24 +++++++++++++++++++ arches/app/models/models.py | 5 +++- arches/app/models/serializers.py | 18 +++++++++++--- 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 arches/app/models/migrations/11046_make_tile_resourceinstanceid_uneditable.py diff --git a/arches/app/models/migrations/11046_make_tile_resourceinstanceid_uneditable.py b/arches/app/models/migrations/11046_make_tile_resourceinstanceid_uneditable.py new file mode 100644 index 0000000000..3df2e29752 --- /dev/null +++ b/arches/app/models/migrations/11046_make_tile_resourceinstanceid_uneditable.py @@ -0,0 +1,24 @@ +# Generated by Django 5.1.3 on 2024-11-07 07:35 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11045_generate_resource_ids"), + ] + + operations = [ + migrations.AlterField( + model_name="tilemodel", + name="resourceinstance", + field=models.ForeignKey( + db_column="resourceinstanceid", + editable=False, + on_delete=django.db.models.deletion.CASCADE, + to="models.resourceinstance", + ), + ), + ] diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 115025c4e0..5169b2dba7 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1874,7 +1874,10 @@ class TileModel(models.Model): # Tile tileid = models.UUIDField(primary_key=True, blank=True) resourceinstance = models.ForeignKey( - ResourceInstance, db_column="resourceinstanceid", on_delete=models.CASCADE + ResourceInstance, + db_column="resourceinstanceid", + on_delete=models.CASCADE, + editable=False, ) parenttile = models.ForeignKey( "self", diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 07a2b4855f..36cabdf891 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -1,6 +1,7 @@ from copy import deepcopy from django.db.models import F +from rest_framework import fields from rest_framework import renderers from rest_framework import serializers @@ -17,7 +18,10 @@ class ArchesTileSerializer(serializers.ModelSerializer): tileid = serializers.UUIDField(validators=[], required=False) - _nodes = Node.objects.none() + def __init__(self, instance=None, data=fields.empty, **kwargs): + super().__init__(instance, data, **kwargs) + self._nodes = Node.objects.none() + self._root_node = None def get_default_field_names(self, declared_fields, model_info): field_names = super().get_default_field_names(declared_fields, model_info) @@ -28,7 +32,7 @@ def get_default_field_names(self, declared_fields, model_info): aliases = self.__class__.Meta.fields if aliases == "__all__": # TODO: latest graph - root_node = ( + self._root_node = ( Node.objects.filter( graph__slug=self.__class__.Meta.graph_slug, alias=self.__class__.Meta.root_node, @@ -39,7 +43,7 @@ def get_default_field_names(self, declared_fields, model_info): .get() ) aliases = ( - root_node.nodegroup.node_set.exclude(nodegroup=None) + self._root_node.nodegroup.node_set.exclude(nodegroup=None) .exclude(datatype="semantic") .values_list("alias", flat=True) ) @@ -72,6 +76,14 @@ def build_unknown_field(self, field_name, model_class): return self.build_standard_field(field_name, model_field) + def build_relational_field(self, field_name, relation_info): + ret = super().build_relational_field(field_name, relation_info) + if field_name == "parenttile": + ret[1]["queryset"] = ret[1]["queryset"].filter( + nodegroup_id=self._root_node.nodegroup.parentnodegroup_id + ) + return ret + class ArchesModelSerializer(serializers.ModelSerializer): legacyid = serializers.CharField(max_length=255, required=False, allow_null=True) From 9b025c1c3f1cbc40777e954dde217ed6c685efd9 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 09:05:16 -0500 Subject: [PATCH 082/115] Improve types handling in ResourceInstance dt transform_value_for_tile --- arches/app/datatypes/datatypes.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 77585baf1c..0e9dc23f2e 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2264,14 +2264,18 @@ def from_id_string(uuid_string): except TypeError: # data should come in as json but python list is accepted as well if isinstance(value, list): - if all(isinstance(inner, models.ResourceInstance) for inner in value): - return [from_id_string(str(instance.pk)) for instance in value] - elif all(isinstance(inner, uuid.UUID) for inner in value): - return [from_id_string(str(uid)) for uid in value] - elif all(isinstance(inner, str) for inner in value): - return [from_id_string(uid) for uid in value] - else: - return value + transformed = [] + for inner in value: + match inner: + case models.ResourceInstance(): + transformed.append(from_id_string(str(inner.pk))) + case uuid.UUID(): + transformed.append(from_id_string(str(inner))) + case str(): + transformed.append(from_id_string(inner)) + case _: + transformed.append(inner) + return transformed if isinstance(value, models.ResourceInstance): return [from_id_string(str(value.pk))] From 61b5213c443840cf4cc955554f2f833372a720fb Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 09:06:57 -0500 Subject: [PATCH 083/115] Add edit log update on ResourceInstance.save() --- arches/app/models/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 5169b2dba7..3d60da0b57 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1305,11 +1305,10 @@ def save_edit(self, user=None): """Intended to replace proxy model method eventually.""" from arches.app.models.resource import Resource - edit_type = "update" if self._state.adding: edit_type = "create" else: - return # TODO: should we save an edit log update? + edit_type = "update" # Until save_edit() is a static method, work around it. ephemeral_proxy_instance = Resource() From ff187d0b12b855d82ce96788ff459dce3cb90cda Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 09:22:10 -0500 Subject: [PATCH 084/115] Refactor get_values_query() --- arches/app/models/querysets.py | 2 -- arches/app/models/utils.py | 19 ++++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 2c4ca24d5a..22db9a3279 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -160,11 +160,9 @@ def with_nodegroups( defer=defer, only=only, model=self.model, - lhs=None, # TODO: AWKWARD outer_ref="resourceinstanceid", ) self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] - # TODO: there might be some way to prune unused annotations. if resource_ids: qs = self.filter(pk__in=resource_ids) diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index 25dbaa3ffb..c49040fc3c 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -23,7 +23,7 @@ def field_names(instance_or_class): return {f.name for f in instance_or_class._meta.fields} -def generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): +def generate_tile_annotations(nodes, *, defer, only, model, lhs=None, outer_ref): from arches.app.datatypes.datatypes import DataTypeFactory from arches.app.models.models import ResourceInstance, TileModel @@ -65,7 +65,6 @@ def generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): if not node_alias_annotations: raise ValueError("All fields were excluded.") - # TODO: also add some safety around bad nodegroups. if not is_resource: for given_alias in only or []: if given_alias not in node_alias_annotations: @@ -89,23 +88,21 @@ def find_root_node(prefetched_siblings, nodegroup_id): return sibling_node -def get_values_query( - nodegroup, base_lookup, *, lhs=None, outer_ref=None -) -> BaseExpression: +def get_values_query(*, nodegroup, base_lookup, lhs=None, outer_ref) -> BaseExpression: """Return a tile values query expression for use in a ResourceInstanceQuerySet or TileQuerySet. + + lhs: the left-hand side (field_name) of the tile query. + If absent, the query will be filtered by nodegroup and resourceinstance. """ from arches.app.models.models import TileModel - # TODO: make this a little less fragile. - if lhs is None: + if lhs: + tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) + else: tile_query = TileModel.objects.filter( nodegroup_id=nodegroup.pk, resourceinstance_id=OuterRef(outer_ref) ) - elif lhs and outer_ref: - tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) - else: - tile_query = TileModel.objects.filter(nodegroup_id=nodegroup.pk) if nodegroup.cardinality == "n": tile_query = tile_query.order_by("sortorder") From 87fb3284e3de9df58bb3167be358cec8cb565a58 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 09:45:34 -0500 Subject: [PATCH 085/115] Add default pagination --- arches/settings.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arches/settings.py b/arches/settings.py index 5a8b239622..2523cda7bc 100644 --- a/arches/settings.py +++ b/arches/settings.py @@ -407,15 +407,6 @@ "django_hosts.middleware.HostsResponseMiddleware" ) -# TODO: choose most appropriate default. -REST_FRAMEWORK = { - # Use Django's standard `django.contrib.auth` permissions, - # or allow read-only access for unauthenticated users. - "DEFAULT_PERMISSION_CLASSES": [ - "rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly" - ] -} - WEBPACK_LOADER = { "DEFAULT": { "STATS_FILE": os.path.join(ROOT_DIR, "..", "webpack/webpack-stats.json"), @@ -613,6 +604,17 @@ API_MAX_PAGE_SIZE = 500 +REST_FRAMEWORK = { + # TODO: choose most appropriate default. + # Use Django's standard `django.contrib.auth` permissions, + # or allow read-only access for unauthenticated users. + "DEFAULT_PERMISSION_CLASSES": [ + "rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly" + ], + "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.LimitOffsetPagination", + "PAGE_SIZE": API_MAX_PAGE_SIZE, +} + UUID_REGEX = ( "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" ) From a991613e111605ea5de425bb3a19262a8f9b1584 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 10:14:41 -0500 Subject: [PATCH 086/115] Limit graph choices in browsable API --- arches/app/models/serializers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 36cabdf891..b8ce8f0410 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -128,3 +128,11 @@ def get_default_field_names(self, declared_fields, model_info): else: field_names.extend(self.__class__.Meta.nodegroups) return field_names + + def build_relational_field(self, field_name, relation_info): + ret = super().build_relational_field(field_name, relation_info) + if field_name == "graph": + ret[1]["queryset"] = ret[1]["queryset"].filter( + graphmodel__slug=self.__class__.Meta.graph_slug + ) + return ret From 1da0afa99aa5dd6156bbc5539af47f67bf4344f9 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 10:26:13 -0500 Subject: [PATCH 087/115] Avoid 404 for permission denied --- arches/app/views/api/mixins.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 99fc831443..5477afd2e7 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -1,7 +1,7 @@ from functools import partial from django.core.exceptions import ValidationError as DjangoValidationError -from rest_framework.exceptions import NotFound, ValidationError +from rest_framework.exceptions import PermissionDenied, ValidationError from arches.app.models.models import ResourceInstance, TileModel from arches.app.utils.permission_backend import ( @@ -31,7 +31,8 @@ def get_queryset(self): def get_object(self, user=None, permission_callable=None): ret = super().get_object() if permission_callable and not permission_callable(user=user, resource=ret): - raise NotFound + # Not 404, see https://github.com/archesproject/arches/issues/11563 + raise PermissionDenied ret.save = partial(ret.save, user=user) return ret From b7c6892695922c29dc4defa65ea33238692ea3f1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 10:59:34 -0500 Subject: [PATCH 088/115] Allow null for tile serializer fields --- arches/app/models/serializers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index b8ce8f0410..e3d35c42db 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -112,7 +112,9 @@ class Meta: fields = self.__class__.Meta.fields self._declared_fields[root.alias] = TileSerializer( - many=root.nodegroup.cardinality == "n", required=False + many=root.nodegroup.cardinality == "n", + required=False, + allow_null=True, ) return super().get_fields() From 49ca86dfb8dd564d6eb9de301219ea1966c3e67c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 11:00:46 -0500 Subject: [PATCH 089/115] Factor out _make_tile_serializer() --- arches/app/models/serializers.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index e3d35c42db..424b3c480c 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -103,19 +103,7 @@ def get_fields(self): ).select_related("nodegroup") for root in self._root_nodes: if root.alias not in self._declared_fields: - - class TileSerializer(ArchesTileSerializer): - class Meta: - model = TileModel - graph_slug = self.__class__.Meta.graph_slug - root_node = root.alias - fields = self.__class__.Meta.fields - - self._declared_fields[root.alias] = TileSerializer( - many=root.nodegroup.cardinality == "n", - required=False, - allow_null=True, - ) + self._make_tile_serializer(root) return super().get_fields() @@ -138,3 +126,17 @@ def build_relational_field(self, field_name, relation_info): graphmodel__slug=self.__class__.Meta.graph_slug ) return ret + + def _make_tile_serializer(self, root): + class TileSerializer(ArchesTileSerializer): + class Meta: + model = TileModel + graph_slug = self.__class__.Meta.graph_slug + root_node = root.alias + fields = self.__class__.Meta.fields + + self._declared_fields[root.alias] = TileSerializer( + many=root.nodegroup.cardinality == "n", + required=False, + allow_null=True, + ) From c83b44d0b1079d75f60abcafb453d85e6b8c92dc Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 11:10:30 -0500 Subject: [PATCH 090/115] Fix nodegroup declaration on serializers --- arches/app/models/serializers.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 424b3c480c..a2c47adca0 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -94,17 +94,23 @@ def get_fields(self): graph_slug = self.__class__.Meta.graph_slug if self.__class__.Meta.nodegroups == "__all__": - if not self._root_nodes: - self._root_nodes = Node.objects.filter( - graph__slug=graph_slug, - # TODO: latest - graph__source_identifier=None, - nodegroup_id=F("nodeid"), - ).select_related("nodegroup") - for root in self._root_nodes: - if root.alias not in self._declared_fields: - self._make_tile_serializer(root) - + self._root_nodes = Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + nodegroup_id=F("nodeid"), + ).select_related("nodegroup") + else: + self._root_nodes = Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + nodegroup_id=F("nodeid"), + node__alias__in=self.__class__.Meta.nodegroups, + ).select_related("nodegroup") + for root in self._root_nodes: + if root.alias not in self._declared_fields: + self._make_tile_serializer(root) return super().get_fields() def get_default_field_names(self, declared_fields, model_info): From 0a0b698eee00df62aac11033c3dfd0e2fe43b742 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 11:29:07 -0500 Subject: [PATCH 091/115] Implement ResourceInstance dt to_python() --- arches/app/datatypes/datatypes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 0e9dc23f2e..5a1065d67e 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2407,6 +2407,11 @@ def _get_base_orm_lookup(self, node): """Filter down to the resourceId.""" return f"data__{node.pk}__0__resourceId" + def to_python(self, tile_val): + if tile_val is None or len(tile_val) != 1: + return tile_val + return tile_val[0]["resourceId"] + def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): return value1 == value2 From 50155a3cc54d981f1d55cfc3965f3e2950e51064 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 13:52:30 -0500 Subject: [PATCH 092/115] Calculate display value for RI/RI list datatypes --- arches/app/datatypes/datatypes.py | 14 ++---- arches/app/models/models.py | 52 ++++++++++++++-------- arches/app/models/querysets.py | 74 +++++++++++++++++++++++++++---- arches/app/views/api/mixins.py | 9 +++- 4 files changed, 110 insertions(+), 39 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 5a1065d67e..402a0d3e0a 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -443,7 +443,7 @@ def get_column_header(self, node, **kwargs): def to_json(self, tile, node): data = self.get_tile_data(tile) if data: - return self.compile_json(tile, node, **data.get(str(node.nodeid))) + return self.compile_json(tile, node, **data.get(str(node.nodeid)) or {}) def pre_structure_tile_data(self, tile, nodeid, **kwargs): all_language_codes = {lang.code for lang in kwargs["languages"]} @@ -2039,7 +2039,7 @@ class ResourceInstanceDataType(BaseDataType): """ - rest_framework_model_field = fields.UUIDField(null=True) + rest_framework_model_field = JSONField(null=True) def validate( self, @@ -2410,7 +2410,7 @@ def _get_base_orm_lookup(self, node): def to_python(self, tile_val): if tile_val is None or len(tile_val) != 1: return tile_val - return tile_val[0]["resourceId"] + return tile_val[0] def values_match(self, value1, value2): if not isinstance(value1, list) or not isinstance(value2, list): @@ -2423,7 +2423,7 @@ def values_match(self, value1, value2): class ResourceInstanceListDataType(ResourceInstanceDataType): - rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + rest_framework_model_field = ArrayField(base_field=JSONField(), null=True) def to_json(self, tile, node): from arches.app.models.resource import ( @@ -2468,12 +2468,6 @@ def _get_base_orm_lookup(self, node): useful for querying.""" return f"data__{node.pk}" - def to_python(self, tile_val): - if tile_val is None: - return tile_val - resource_ids = [inner["resourceId"] if inner else None for inner in tile_val] - return resource_ids - class NodeValueDataType(BaseDataType): def validate( diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3d60da0b57..1a3dc7a289 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1259,11 +1259,26 @@ def __str__(self): return repr(self) @classmethod - def as_model(cls, graph_slug=None, *, resource_ids=None, defer=None, only=None): + def as_model( + cls, + graph_slug=None, + *, + resource_ids=None, + defer=None, + only=None, + as_representation=False, + ): """Return a chainable QuerySet for a requested graph's instances, - with tile data annotated onto node and nodegroup aliases.""" + with tile data annotated onto node and nodegroup aliases. + + See `arches.app.models.querysets.ResourceInstanceQuerySet.with_nodegroups`. + """ return cls.objects.with_nodegroups( - graph_slug, resource_ids=resource_ids, defer=defer, only=only + graph_slug, + resource_ids=resource_ids, + defer=defer, + only=only, + as_representation=as_representation, ) def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): @@ -1929,21 +1944,17 @@ def nodegroup_alias(self): return None @classmethod - def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): + def as_nodegroup( + cls, + root_node_alias, + *, + graph_slug, + defer=None, + only=None, + as_representation=False, + ): """ - Entry point for filtering arches data by nodegroups (instead of grouping by - resource.) - - >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") - >>> results = statements.filter(statement_content__en__value__startswith="F") # TODO: make more ergonomic - >>> for result in results: - print(result.resourceinstance) - print("\t", result.statement_content["en"]["value"]) # TODO: unwrap? - - - Fluorescence stimulated by x-rays; ... - - Fine-quality calf or lamb parchment ... + See `arches.app.models.querysets.TileModelQuerySet.with_tile_values`. """ root_node = cls._root_node(graph_slug, root_node_alias) @@ -1959,7 +1970,12 @@ def accumulate_nodes_below(nodegroup, acc): return ( cls.objects.filter(nodegroup_id=root_node.pk) .with_node_values( - branch_nodes, defer=defer, only=only, lhs="pk", outer_ref="tileid" + branch_nodes, + defer=defer, + only=only, + lhs="pk", + outer_ref="tileid", + as_representation=as_representation, ) .annotate(_nodegroup_alias=ORMValue(root_node_alias)) ) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 22db9a3279..7c78f0dbb7 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -4,12 +4,44 @@ class TileQuerySet(QuerySet): + def __init__(self, model=None, query=None, using=None, hints=None): + super().__init__(model, query, using, hints) + self._as_representation = False + self._fetched_nodes = [] + def with_node_values( - self, nodes, *, defer=None, only=None, lhs=None, outer_ref, depth=1 + self, + nodes, + *, + defer=None, + only=None, + lhs=None, + outer_ref, + depth=1, + as_representation=False, ): - """TileModel.as_nodegroup() is the better entrypoint, see docs there.""" + """ + Entry point for filtering arches data by nodegroups (instead of grouping by + resource.) + + >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") + >>> results = statements.filter(statement_content__en__value__startswith="F") # TODO: make more ergonomic + >>> for result in results: + print(result.resourceinstance) + print("\t", result.statement_content["en"]["value"]) # TODO: unwrap? + + + Fluorescence stimulated by x-rays; ... + + Fine-quality calf or lamb parchment ... + + as_representation = True skips calling to_python datatype methods and calls + as_json() instead. + """ from arches.app.models.models import TileModel + self._as_representation = as_representation + node_alias_annotations = generate_tile_annotations( nodes, defer=defer, @@ -48,6 +80,7 @@ def _prefetch_related_objects(self): Discard annotations that do not pertain to this nodegroup. """ from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.models import TileModel super()._prefetch_related_objects() @@ -62,8 +95,14 @@ def _prefetch_related_objects(self): tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) - python_val = datatype_instance.to_python(tile_val) - setattr(tile, node.alias, python_val) + dummy_tile = TileModel( + data={str(node.pk): tile_val}, + provisionaledits=tile.provisionaledits, + ) + datatype_instance.to_json(dummy_tile, node) + if not self._as_representation: + tile_val = datatype_instance.to_python(tile_val) + setattr(tile, node.alias, tile_val) else: delattr(tile, node.alias) for child_tile in tile.children.all(): @@ -74,14 +113,25 @@ def _prefetch_related_objects(self): def _clone(self): ret = super()._clone() - if hasattr(self, "_fetched_nodes"): - ret._fetched_nodes = self._fetched_nodes + ret._fetched_nodes = self._fetched_nodes + ret._as_representation = self._as_representation return ret class ResourceInstanceQuerySet(QuerySet): + def __init__(self, model=None, query=None, using=None, hints=None): + super().__init__(model, query, using, hints) + self._as_representation = False + self._fetched_nodes = [] + def with_nodegroups( - self, graph_slug=None, *, resource_ids=None, defer=None, only=None + self, + graph_slug=None, + *, + resource_ids=None, + defer=None, + only=None, + as_representation=False, ): """Annotates a ResourceInstance QuerySet with tile data unpacked and mapped onto nodegroup aliases, e.g.: @@ -133,9 +183,14 @@ def with_nodegroups( True Provisional edits are completely ignored for the purposes of querying. + + as_representation = True skips calling to_python datatype methods and calls + as_json() instead. """ from arches.app.models.models import GraphModel, NodeGroup, TileModel + self._as_representation = as_representation + if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) else: @@ -177,6 +232,7 @@ def with_nodegroups( only=[n.alias for n in self._fetched_nodes], lhs="pk", outer_ref="tileid", + as_representation=as_representation, ).annotate( cardinality=NodeGroup.objects.filter( pk=OuterRef("nodegroup_id") @@ -234,6 +290,6 @@ def _prefetch_related_objects(self): def _clone(self): ret = super()._clone() - if hasattr(self, "_fetched_nodes"): - ret._fetched_nodes = self._fetched_nodes + ret._fetched_nodes = self._fetched_nodes + ret._as_representation = self._as_representation return ret diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 5477afd2e7..889ba7f7c1 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -21,10 +21,15 @@ def get_queryset(self): meta = self.serializer_class.Meta if ResourceInstance in meta.model.mro(): only = None if meta.nodegroups == "__all__" else meta.nodegroups - return meta.model.as_model(meta.graph_slug, only=only) + return meta.model.as_model( + meta.graph_slug, only=only, as_representation=True + ) elif TileModel in meta.model.mro(): return meta.model.as_nodegroup( - meta.root_node, graph_slug=meta.graph_slug, only=fields + meta.root_node, + graph_slug=meta.graph_slug, + only=fields, + as_representation=True, ) raise NotImplementedError From bee6a2405778c5f67549ff206b5b682fd7a0e38e Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 14:14:37 -0500 Subject: [PATCH 093/115] Harden ConceptListDataType.to_json() --- arches/app/datatypes/concept_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 2bc2fc8b75..83b932d16d 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -488,7 +488,7 @@ def to_json(self, tile, node): new_values = [] data = self.get_tile_data(tile) if data: - for val in data[str(node.nodeid)]: + for val in data[str(node.nodeid)] or []: new_val = self.get_value(uuid.UUID(val)) new_values.append(new_val) return self.compile_json(tile, node, concept_details=new_values) From fe260d87626faea28c9bafa30cced09a3ae181b2 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 15:58:35 -0500 Subject: [PATCH 094/115] Add lookups for searching localized strings in any language --- arches/app/models/__init__.py | 1 + arches/app/models/lookups.py | 66 ++++++++++++++++++++++++++++++++++ arches/app/models/querysets.py | 2 +- 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 arches/app/models/lookups.py diff --git a/arches/app/models/__init__.py b/arches/app/models/__init__.py index e69de29bb2..fefa5c6284 100644 --- a/arches/app/models/__init__.py +++ b/arches/app/models/__init__.py @@ -0,0 +1 @@ +from arches.app.models.lookups import * diff --git a/arches/app/models/lookups.py b/arches/app/models/lookups.py new file mode 100644 index 0000000000..f3940b13e7 --- /dev/null +++ b/arches/app/models/lookups.py @@ -0,0 +1,66 @@ +from django.db.models import JSONField, Lookup + + +# TODO: manually merging parameters is usually a no-no, +# but are these already safe via the \" quotes? +# Look into sql.Identifier or AsIs(). + +# Seems like a Django bug that I need to override get_db_prep_lookup(). TODO: Ask. + + +@JSONField.register_lookup +class AnyLanguageEquals(Lookup): + lookup_name = "any_lang" + + def get_db_prep_lookup(self, value, connection): + return ("%s", (value,)) + + def as_sql(self, compiler, connection): + lhs, lhs_params = self.process_lhs(compiler, connection) + rhs, rhs_params = self.process_rhs(compiler, connection) + placeholder = "%s @? '$.*.value ? (@ == \"" + rhs_params[0] + "\")'" + return placeholder % (lhs,), lhs_params + + +@JSONField.register_lookup +class AnyLanguageContains(Lookup): + lookup_name = "any_lang_contains" + + def get_db_prep_lookup(self, value, connection): + return ("%s", (value,)) + + def as_sql(self, compiler, connection): + lhs, lhs_params = self.process_lhs(compiler, connection) + rhs, rhs_params = self.process_rhs(compiler, connection) + placeholder = "%s @? '$.*.value ? (@ like_regex \"" + rhs_params[0] + "\")'" + return placeholder % (lhs,), lhs_params + + +@JSONField.register_lookup +class AnyLanguageIContains(Lookup): + lookup_name = "any_lang_icontains" + + def get_db_prep_lookup(self, value, connection): + return ("%s", (value,)) + + def as_sql(self, compiler, connection): + lhs, lhs_params = self.process_lhs(compiler, connection) + rhs, rhs_params = self.process_rhs(compiler, connection) + placeholder = ( + "%s @? '$.*.value ? (@ like_regex \"" + rhs_params[0] + '" flag "i")\'' + ) + return placeholder % (lhs,), lhs_params + + +@JSONField.register_lookup +class AnyLanguageStartsWith(Lookup): + lookup_name = "any_lang_startswith" + + def get_db_prep_lookup(self, value, connection): + return ("%s", (value,)) + + def as_sql(self, compiler, connection): + lhs, lhs_params = self.process_lhs(compiler, connection) + rhs, rhs_params = self.process_rhs(compiler, connection) + placeholder = "%s @? '$.*.value ? (@ starts with \"" + rhs_params[0] + "\")'" + return placeholder % (lhs,), lhs_params diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 7c78f0dbb7..9eac798118 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -25,7 +25,7 @@ def with_node_values( resource.) >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") - >>> results = statements.filter(statement_content__en__value__startswith="F") # TODO: make more ergonomic + >>> results = statements.filter(statement_content__any_lang_startswith="F") >>> for result in results: print(result.resourceinstance) print("\t", result.statement_content["en"]["value"]) # TODO: unwrap? From 2ac22d6c6ab8506075f7713c5ed8c0d7f777d345 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 16:18:33 -0500 Subject: [PATCH 095/115] Pop extra RI keys --- arches/app/datatypes/datatypes.py | 2 ++ arches/app/views/api/mixins.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 402a0d3e0a..11033d6590 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2274,6 +2274,8 @@ def from_id_string(uuid_string): case str(): transformed.append(from_id_string(inner)) case _: + # TODO: move this to validate? + inner.pop("display_value", None) transformed.append(inner) return transformed if isinstance(value, models.ResourceInstance): diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 889ba7f7c1..dd404fd4c7 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -63,7 +63,6 @@ def update(self, request, *args, **kwargs): user=request.user, permission_callable=user_can_edit_resource, ) - # TODO: return correct response with updated object. return super().update(request, *args, **kwargs) def destroy(self, request, *args, **kwargs): From 65b488e4ca4cf09fd9b3e03024d0691d680cacd4 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 7 Nov 2024 16:22:10 -0500 Subject: [PATCH 096/115] Improve todo --- arches/app/views/api/mixins.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index dd404fd4c7..83c668e8ea 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -63,6 +63,8 @@ def update(self, request, *args, **kwargs): user=request.user, permission_callable=user_can_edit_resource, ) + # TODO: returned object is pretty close, but currently lacks + # recalculated display_value on RI datatypes. return super().update(request, *args, **kwargs) def destroy(self, request, *args, **kwargs): From 4443aa61002a6095548898e8c696f1ab83ffc9f6 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 11 Nov 2024 08:01:10 -0500 Subject: [PATCH 097/115] Improve SQL safety --- arches/app/models/lookups.py | 49 ++++++++++++++---------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/arches/app/models/lookups.py b/arches/app/models/lookups.py index f3940b13e7..5d4f543f6a 100644 --- a/arches/app/models/lookups.py +++ b/arches/app/models/lookups.py @@ -1,66 +1,55 @@ from django.db.models import JSONField, Lookup +from psycopg2.extensions import AsIs, QuotedString -# TODO: manually merging parameters is usually a no-no, -# but are these already safe via the \" quotes? -# Look into sql.Identifier or AsIs(). +class JSONPathFilter: + """Handle the double-quoting and escaping for JSONPath filters.""" -# Seems like a Django bug that I need to override get_db_prep_lookup(). TODO: Ask. + def process_rhs(self, compiler, connection): + rhs, params = super().process_rhs(compiler, connection) + escaped = AsIs(QuotedString(params[0]).getquoted().decode()[1:-1]) + return rhs, (escaped,) @JSONField.register_lookup -class AnyLanguageEquals(Lookup): +class AnyLanguageEquals(JSONPathFilter, Lookup): lookup_name = "any_lang" - def get_db_prep_lookup(self, value, connection): - return ("%s", (value,)) - def as_sql(self, compiler, connection): lhs, lhs_params = self.process_lhs(compiler, connection) rhs, rhs_params = self.process_rhs(compiler, connection) - placeholder = "%s @? '$.*.value ? (@ == \"" + rhs_params[0] + "\")'" - return placeholder % (lhs,), lhs_params + params = lhs_params + rhs_params + return "%s @? '$.*.value ? (@ == \"%s\")'" % (lhs, rhs), params @JSONField.register_lookup -class AnyLanguageContains(Lookup): +class AnyLanguageContains(JSONPathFilter, Lookup): lookup_name = "any_lang_contains" - def get_db_prep_lookup(self, value, connection): - return ("%s", (value,)) - def as_sql(self, compiler, connection): lhs, lhs_params = self.process_lhs(compiler, connection) rhs, rhs_params = self.process_rhs(compiler, connection) - placeholder = "%s @? '$.*.value ? (@ like_regex \"" + rhs_params[0] + "\")'" - return placeholder % (lhs,), lhs_params + params = lhs_params + rhs_params + return "%s @? '$.*.value ? (@ like_regex \"%s\")'" % (lhs, rhs), params @JSONField.register_lookup -class AnyLanguageIContains(Lookup): +class AnyLanguageIContains(JSONPathFilter, Lookup): lookup_name = "any_lang_icontains" - def get_db_prep_lookup(self, value, connection): - return ("%s", (value,)) - def as_sql(self, compiler, connection): lhs, lhs_params = self.process_lhs(compiler, connection) rhs, rhs_params = self.process_rhs(compiler, connection) - placeholder = ( - "%s @? '$.*.value ? (@ like_regex \"" + rhs_params[0] + '" flag "i")\'' - ) - return placeholder % (lhs,), lhs_params + params = lhs_params + rhs_params + return '%s @? \'$.*.value ? (@ like_regex "%s" flag "i")\'' % (lhs, rhs), params @JSONField.register_lookup -class AnyLanguageStartsWith(Lookup): +class AnyLanguageStartsWith(JSONPathFilter, Lookup): lookup_name = "any_lang_startswith" - def get_db_prep_lookup(self, value, connection): - return ("%s", (value,)) - def as_sql(self, compiler, connection): lhs, lhs_params = self.process_lhs(compiler, connection) rhs, rhs_params = self.process_rhs(compiler, connection) - placeholder = "%s @? '$.*.value ? (@ starts with \"" + rhs_params[0] + "\")'" - return placeholder % (lhs,), lhs_params + params = lhs_params + rhs_params + return "%s @? '$.*.value ? (@ starts with \"%s\")'" % (lhs, rhs), params From b55edd84edf5793a953ba85d5d21b8530f7ecede Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 11 Nov 2024 16:38:50 -0500 Subject: [PATCH 098/115] Fix migration history --- .../models/migrations/11043_tile_nodegroup_add_related_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py index c55144cc4d..b41872d108 100644 --- a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py +++ b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("models", "11042_update__arches_staging_to_tile"), + ("models", "10437_node_alias_not_null"), ] operations = [ From 24376cba6f6c6324b47982e11c6f4a94cc1f27af Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 11 Nov 2024 18:01:13 -0500 Subject: [PATCH 099/115] Add ontologyProperty, inverseOntologyProperty --- arches/app/datatypes/datatypes.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 11033d6590..2c8e6e5e52 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2245,12 +2245,17 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): - def from_id_string(uuid_string): + def from_id_string(uuid_string, graph_id=None): nonlocal kwargs + for graph_config in kwargs.get("graphs", []): + if graph_id is None or str(graph_id) == graph_config["graphid"]: + break + else: + graph_config = {"ontologyProperty": {}, "inverseOntologyProperty": {}} return { "resourceId": uuid_string, - "inverseOntology": kwargs.get("inverseOntology", ""), - "inverseOntologyProperty": kwargs.get("inverseOntologyProperty", ""), + "ontologyProperty": graph_config["ontologyProperty"], + "inverseOntologyProperty": graph_config["inverseOntologyProperty"], } try: @@ -2268,10 +2273,14 @@ def from_id_string(uuid_string): for inner in value: match inner: case models.ResourceInstance(): - transformed.append(from_id_string(str(inner.pk))) + transformed.append( + from_id_string(str(inner.pk), inner.graph_id) + ) case uuid.UUID(): + # TODO: handle multiple graph configs, requires db? transformed.append(from_id_string(str(inner))) case str(): + # TODO: handle multiple graph configs, requires db? transformed.append(from_id_string(inner)) case _: # TODO: move this to validate? @@ -2279,7 +2288,7 @@ def from_id_string(uuid_string): transformed.append(inner) return transformed if isinstance(value, models.ResourceInstance): - return [from_id_string(str(value.pk))] + return [from_id_string(str(value.pk), value.graph_id)] def transform_export_values(self, value, *args, **kwargs): return json.dumps(value) From 14bd35fa85c1ab8d7d6aab626f1f6f109d78cb50 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 11 Nov 2024 18:05:02 -0500 Subject: [PATCH 100/115] Remove incorrect TODOs --- arches/app/models/models.py | 2 -- arches/app/models/querysets.py | 2 -- arches/app/models/serializers.py | 4 ---- 3 files changed, 8 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 1a3dc7a289..d302de341b 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1992,8 +1992,6 @@ def _root_node(graph_slug, root_node_alias): .prefetch_related("nodegroup__children") .prefetch_related("nodegroup__children__children") ) - # TODO: get last - # https://github.com/archesproject/arches/issues/11565 ret = qs.filter(source_identifier=None).first() if ret is None: raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 9eac798118..6c7deb8f8a 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -194,8 +194,6 @@ def with_nodegroups( if resource_ids and not graph_slug: graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) else: - # TODO: get latest graph. - # https://github.com/archesproject/arches/issues/11565 graph_query = GraphModel.objects.filter( slug=graph_slug, source_identifier=None ) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index a2c47adca0..86ae3d7b78 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -31,7 +31,6 @@ def get_default_field_names(self, declared_fields, model_info): pass aliases = self.__class__.Meta.fields if aliases == "__all__": - # TODO: latest graph self._root_node = ( Node.objects.filter( graph__slug=self.__class__.Meta.graph_slug, @@ -55,7 +54,6 @@ def build_unknown_field(self, field_name, model_class): if not self._nodes: self._nodes = Node.objects.filter( graph__slug=graph_slug, - # TODO: latest graph__source_identifier=None, ) @@ -96,14 +94,12 @@ def get_fields(self): if self.__class__.Meta.nodegroups == "__all__": self._root_nodes = Node.objects.filter( graph__slug=graph_slug, - # TODO: latest graph__source_identifier=None, nodegroup_id=F("nodeid"), ).select_related("nodegroup") else: self._root_nodes = Node.objects.filter( graph__slug=graph_slug, - # TODO: latest graph__source_identifier=None, nodegroup_id=F("nodeid"), node__alias__in=self.__class__.Meta.nodegroups, From 7ddbc49272fe8e03bfa9f81fd52ff51cda352256 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 11:01:18 -0500 Subject: [PATCH 101/115] DRY out x-list serializer fields --- arches/app/datatypes/concept_types.py | 4 +++- arches/app/datatypes/datatypes.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 83b932d16d..e9714d9281 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -415,7 +415,9 @@ def ignore_keys(self): class ConceptListDataType(BaseConceptDataType): - rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + rest_framework_model_field = ArrayField( + base_field=BaseConceptDataType.rest_framework_model_field, null=True + ) def validate( self, diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 2c8e6e5e52..72004b1688 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2434,7 +2434,9 @@ def values_match(self, value1, value2): class ResourceInstanceListDataType(ResourceInstanceDataType): - rest_framework_model_field = ArrayField(base_field=JSONField(), null=True) + rest_framework_model_field = ArrayField( + base_field=ResourceInstanceDataType.rest_framework_model_field, null=True + ) def to_json(self, tile, node): from arches.app.models.resource import ( From 361aa33a2b0bde02c3ab6da5bf5ce1f13e9fa1a3 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 11:25:04 -0500 Subject: [PATCH 102/115] Add RDMAdministrator permission class --- arches/app/permissions/rest_framework.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 arches/app/permissions/rest_framework.py diff --git a/arches/app/permissions/rest_framework.py b/arches/app/permissions/rest_framework.py new file mode 100644 index 0000000000..4cc995338f --- /dev/null +++ b/arches/app/permissions/rest_framework.py @@ -0,0 +1,8 @@ +from rest_framework import permissions + +from arches.app.utils.permission_backend import group_required + + +class RDMAdministrator(permissions.BasePermission): + def has_permission(self, request, view): + return group_required(request.user, ["RDM Administrator"]) From ff1063a5f4359c3dc38749721e03baf389c385d0 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 12:00:55 -0500 Subject: [PATCH 103/115] Catch KeyError in ResourceInstanceDataType.validate() --- arches/app/datatypes/datatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 72004b1688..98bfd707c9 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2093,7 +2093,7 @@ def validate( ) ) errors.append({"type": "ERROR", "message": message}) - except (ValueError, TypeError): + except (KeyError, ValueError, TypeError): message = _( "The related resource with id '{0}' is not a valid uuid".format( str(value) From 1d987f67402025b581a386a5557b8875df6a18b5 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 12:29:59 -0500 Subject: [PATCH 104/115] Re-serialize creates and updates with new data from backend --- arches/app/views/api/mixins.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py index 83c668e8ea..8290a47069 100644 --- a/arches/app/views/api/mixins.py +++ b/arches/app/views/api/mixins.py @@ -63,8 +63,6 @@ def update(self, request, *args, **kwargs): user=request.user, permission_callable=user_can_edit_resource, ) - # TODO: returned object is pretty close, but currently lacks - # recalculated display_value on RI datatypes. return super().update(request, *args, **kwargs) def destroy(self, request, *args, **kwargs): @@ -95,6 +93,13 @@ def validate_tile_data_and_save(self, serializer): except DjangoValidationError as django_error: # TODO: doesn't handle well inner lists, stringifies them raise ValidationError(detail=django_error.error_dict) from django_error + # The backend hydrates additional data, so make sure to use it. + # We could avoid this by only validating data during clean(), + # not save(), but we do graph/node queries during each phase. + # Having to fight so hard against DRF here is a good encouragement + # to separate clean() and save() in a performant way when working on: + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 + serializer._data = self.get_serializer(serializer.instance).data def perform_create(self, serializer): self.validate_tile_data_and_save(serializer) From da6a02eb8ab89b1644431a0da3d87c8d9547b706 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 14:01:24 -0500 Subject: [PATCH 105/115] Harden against calling .values() --- arches/app/models/querysets.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 6c7deb8f8a..73403c0607 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -254,6 +254,9 @@ def _prefetch_related_objects(self): root_nodes.append(root_node) for resource in self._result_cache: + if not isinstance(resource, self.model): + # For a .values() query, we will lack instances. + continue resource._fetched_root_nodes = set() for node in self._fetched_nodes: delattr(resource, node.alias) From e45b88e5856e86d2ba2dd03024e6817bf11cb58c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 17:03:43 -0500 Subject: [PATCH 106/115] Allow creation of resource with new tiles --- arches/app/models/models.py | 80 ++++++++++++++++++-------------- arches/app/models/serializers.py | 14 +++++- arches/app/views/resource.py | 1 + 3 files changed, 59 insertions(+), 36 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index d302de341b..22dba287f4 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1381,7 +1381,9 @@ def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): # Instantiate proxy models for now, but TODO: expose this # functionality on vanilla models, and in bulk. upserts = to_insert | to_update - upsert_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in upserts]) + insert_proxies = [Tile(**vars(insert)) for insert in to_insert] + update_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in to_update]) + upsert_proxies = itertools.chain(insert_proxies, update_proxies) delete_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in to_delete]) with transaction.atomic(): @@ -1391,6 +1393,7 @@ def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): for upsert_proxy, vanilla_instance in zip( upsert_proxies, upserts, strict=True ): + assert upsert_proxy.pk == vanilla_instance.pk upsert_proxy._existing_data = upsert_proxy.data upsert_proxy._existing_provisionaledits = upsert_proxy.provisionaledits @@ -1420,16 +1423,26 @@ def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): ) upsert_proxy._existing_data = vanilla_instance.data - for upsert_proxy in delete_proxies: - upsert_proxy._Tile__preDelete() + for delete_proxy in delete_proxies: + delete_proxy._Tile__preDelete() - insert_proxies = TileModel.objects.none() if to_insert: inserted = TileModel.objects.bulk_create(to_insert) # Pay the cost of a second TileModel -> Tile transform until refactored. - update_proxies = upsert_proxies.difference(insert_proxies) - insert_proxies = Tile.objects.filter(pk__in=[t.pk for t in inserted]) - upsert_proxies = update_proxies | insert_proxies + refreshed_insert_proxies = Tile.objects.filter( + pk__in=[t.pk for t in inserted] + ) + for before, after in zip( + insert_proxies, refreshed_insert_proxies, strict=True + ): + assert before.pk == after.pk + after._newprovisionalvalue = before._newprovisionalvalue + after._provisional_edit_log_details = ( + before._provisional_edit_log_details + ) + upsert_proxies = refreshed_insert_proxies | update_proxies + else: + insert_proxies = TileModel.objects.none() if to_update: TileModel.objects.bulk_update( to_update, {"data", "parenttile", "provisionaledits"} @@ -1451,31 +1464,30 @@ def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): upsert_proxy._Tile__postSave() # Save edits: could be done in bulk once above side effects are un-proxied. - for upsert_proxy in upsert_proxies: - if self._state.adding: - upsert_proxy.save_edit( - user=user, - edit_type="tile create", - old_value={}, - new_value=upsert_proxy.data, - newprovisionalvalue=upsert_proxy._newprovisionalvalue, - provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, - transaction_id=None, - # TODO: get this information upstream somewhere. - new_resource_created=False, - note=None, - ) - else: - upsert_proxy.save_edit( - user=user, - edit_type="tile edit", - old_value=upsert_proxy._existing_data, - new_value=upsert_proxy.data, - newprovisionalvalue=upsert_proxy._newprovisionalvalue, - oldprovisionalvalue=upsert_proxy._oldprovisionalvalue, - provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, - transaction_id=None, - ) + for insert_proxy in insert_proxies: + insert_proxy.save_edit( + user=user, + edit_type="tile create", + old_value={}, + new_value=insert_proxy.data, + newprovisionalvalue=insert_proxy._newprovisionalvalue, + provisional_edit_log_details=insert_proxy._provisional_edit_log_details, + transaction_id=None, + # TODO: get this information upstream somewhere. + new_resource_created=False, + note=None, + ) + for update_proxy in update_proxies: + update_proxy.save_edit( + user=user, + edit_type="tile edit", + old_value=update_proxy._existing_data, + new_value=update_proxy.data, + newprovisionalvalue=update_proxy._newprovisionalvalue, + oldprovisionalvalue=update_proxy._oldprovisionalvalue, + provisional_edit_log_details=update_proxy._provisional_edit_log_details, + transaction_id=None, + ) # Instantiate proxy model for now, but refactor & expose this on vanilla model proxy_resource = Resource.objects.get(pk=self.pk) @@ -1537,9 +1549,9 @@ def _update_tile_for_root_node( if all(isinstance(tile, TileModel) for tile in new_tiles): new_tiles.sort(key=attrgetter("sortorder")) else: - # TODO: figure out best layer for this and remove if/else. + # TODO: figure out best layer for deserializing and remove if/else. # TODO: nullguard or make not nullable. - new_tiles.sort(key=itemgetter("sortorder")) + pass db_tiles = [ t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias ] diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 86ae3d7b78..f379e2095e 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -130,15 +130,25 @@ def build_relational_field(self, field_name, relation_info): return ret def _make_tile_serializer(self, root): - class TileSerializer(ArchesTileSerializer): + class DynamicTileSerializer(ArchesTileSerializer): class Meta: model = TileModel graph_slug = self.__class__.Meta.graph_slug root_node = root.alias fields = self.__class__.Meta.fields - self._declared_fields[root.alias] = TileSerializer( + self._declared_fields[root.alias] = DynamicTileSerializer( many=root.nodegroup.cardinality == "n", required=False, allow_null=True, ) + + def create(self, validated_data): + meta = self.__class__.Meta + instance_without_tile_data = super().create(validated_data) + instance_from_factory = meta.model.as_model( + graph_slug=self.__class__.Meta.graph_slug, + only=None if meta.nodegroups == "__all__" else meta.nodegroups, + ).get(pk=instance_without_tile_data.pk) + # TODO: fullest/hydrated version of tile data not yet appearing? + return self.update(instance_from_factory, validated_data) diff --git a/arches/app/views/resource.py b/arches/app/views/resource.py index ba373c0fed..1478bd3317 100644 --- a/arches/app/views/resource.py +++ b/arches/app/views/resource.py @@ -626,6 +626,7 @@ def get( edit_type_lookup = { "create": _("Resource Created"), "delete": _("Resource Deleted"), + "update": _("Resource Updated"), "tile delete": _("Tile Deleted"), "tile create": _("Tile Created"), "tile edit": _("Tile Updated"), From 554a055c172c47e2d6f4fc56744850dd9b429480 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 12 Nov 2024 17:07:24 -0500 Subject: [PATCH 107/115] Explicit is None comparison --- arches/app/models/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 22dba287f4..5e07dcdc55 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1647,7 +1647,7 @@ def _validate_and_patch_from_tile_values(tile, *, root_node, errors_by_node_alia ) def refresh_from_db(self, using=None, fields=None, from_queryset=None): - if not from_queryset and ( + if from_queryset is None and ( root_nodes := getattr(self, "_fetched_root_nodes", set()) ): aliases = [n.alias for n in root_nodes] @@ -2218,7 +2218,7 @@ def serialize( def refresh_from_db(self, using=None, fields=None, from_queryset=None): if ( - not from_queryset + from_queryset is None and (root_nodes := getattr(self, "_fetched_root_nodes", set())) and self.resourceinstance.graph.slug ): From 96cc1ef6ba215342448fcf6607d1c74a85c2645b Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 13 Nov 2024 11:49:45 -0500 Subject: [PATCH 108/115] Fix root node detection --- arches/app/models/querysets.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index 73403c0607..f25425f18a 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -90,8 +90,9 @@ def _prefetch_related_objects(self): tile._fetched_root_nodes = set() for node in self._fetched_nodes: if node.nodegroup_id == tile.nodegroup_id: - tile._root_node = node - tile._fetched_root_nodes.add(node) + if node.pk == tile.nodegroup_id: + tile._root_node = node + tile._fetched_root_nodes.add(node) tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) From dbdb1b955a92bf8f9c994390790644d037d5e36d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 13 Nov 2024 16:34:32 -0500 Subject: [PATCH 109/115] Fix regression with tile saves; revisit after grouping node field merges --- arches/app/models/models.py | 31 ++++++++++++++++++++++--------- arches/app/models/querysets.py | 11 +++++++---- arches/app/models/utils.py | 6 ++++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 5e07dcdc55..3ce6350e06 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -7,7 +7,7 @@ import traceback from collections import defaultdict from itertools import zip_longest -from operator import attrgetter, itemgetter +from operator import attrgetter from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename @@ -18,6 +18,7 @@ from arches.app.models.utils import ( add_to_update_fields, field_names, + find_root_node_from_fetched_root_nodes, pop_arches_model_kwargs, ) from arches.app.utils.betterJSONSerializer import JSONSerializer @@ -2068,8 +2069,8 @@ def _save_from_pythonic_model_values(self, *, user=None, index=False, **kwargs): ) super().save(**kwargs) - - for node in self._root_node.nodegroup.node_set.all(): + # TODO: address performance. + for node in self.nodegroup.node_set.all(): datatype = datatype_factory.get_instance(node.datatype) datatype.post_tile_save(self, str(node.pk)) proxy._Tile__postSave() @@ -2126,7 +2127,11 @@ def _update_tile_from_pythonic_model_values(self): errors_by_alias = defaultdict(list) # TODO: move this somewhere else. ResourceInstance._validate_and_patch_from_tile_values( - self, root_node=self._root_node, errors_by_node_alias=errors_by_alias + self, + root_node=find_root_node_from_fetched_root_nodes( + self._fetched_root_nodes, self.nodegroup_id + ), + errors_by_node_alias=errors_by_alias, ) if not any(self.data.values()): raise ValidationError(_("Tile is blank.")) @@ -2151,7 +2156,8 @@ def _tile_update_is_noop(self, original_data): # that's probably good. Determine DX here. datatype_factory = DataTypeFactory() - for node in self._root_node.nodegroup.node_set.all(): + # TODO: address performance + for node in self.nodegroup.node_set.all(): if node.datatype == "semantic": continue old = original_data[str(node.nodeid)] @@ -2219,12 +2225,15 @@ def serialize( def refresh_from_db(self, using=None, fields=None, from_queryset=None): if ( from_queryset is None - and (root_nodes := getattr(self, "_fetched_root_nodes", set())) + and (fetched_nodes := getattr(self, "_fetched_nodes", set())) and self.resourceinstance.graph.slug ): - aliases = [n.alias for n in root_nodes] + NOT_PROVIDED = object() + aliases = [n.alias for n in fetched_nodes] from_queryset = self.__class__.as_nodegroup( - root_node_alias=self._root_node.alias, + root_node_alias=find_root_node_from_fetched_root_nodes( + self._fetched_root_nodes, self.nodegroup_id + ).alias, graph_slug=self.resourceinstance.graph.slug, only=aliases, ) @@ -2232,7 +2241,11 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): # Copy over annotations. refreshed_tile = from_queryset[0] for field in aliases: - setattr(self, field, getattr(refreshed_tile, field)) + # TODO: why is this if needed? + if ( + annotation := getattr(refreshed_tile, field, NOT_PROVIDED) + ) is not NOT_PROVIDED: + setattr(self, field, annotation) else: super().refresh_from_db(using, fields, from_queryset) diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py index f25425f18a..7296e8c5bd 100644 --- a/arches/app/models/querysets.py +++ b/arches/app/models/querysets.py @@ -80,19 +80,22 @@ def _prefetch_related_objects(self): Discard annotations that do not pertain to this nodegroup. """ from arches.app.datatypes.datatypes import DataTypeFactory - from arches.app.models.models import TileModel + from arches.app.models.models import Node, TileModel super()._prefetch_related_objects() datatype_factory = DataTypeFactory() NOT_PROVIDED = object() for tile in self._result_cache: + tile._fetched_nodes = self._fetched_nodes tile._fetched_root_nodes = set() for node in self._fetched_nodes: if node.nodegroup_id == tile.nodegroup_id: - if node.pk == tile.nodegroup_id: - tile._root_node = node - tile._fetched_root_nodes.add(node) + # Replace with new v8 root/grouping node lookup. + if Node(pk=tile.nodegroup_id) not in tile._fetched_root_nodes: + tile._fetched_root_nodes.add( + Node.objects.get(pk=tile.nodegroup_id) + ) tile_val = getattr(tile, node.alias, NOT_PROVIDED) if tile_val is not NOT_PROVIDED: datatype_instance = datatype_factory.get_instance(node.datatype) diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index c49040fc3c..1a53445dcb 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -88,6 +88,12 @@ def find_root_node(prefetched_siblings, nodegroup_id): return sibling_node +def find_root_node_from_fetched_root_nodes(fetched_root_nodes, nodegroup_id): + for candidate in fetched_root_nodes: + if candidate.pk == nodegroup_id: + return candidate + + def get_values_query(*, nodegroup, base_lookup, lhs=None, outer_ref) -> BaseExpression: """Return a tile values query expression for use in a ResourceInstanceQuerySet or TileQuerySet. From b45793162669263de76afc176e86cd1bbd5cefb6 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 20 Nov 2024 11:50:18 -0500 Subject: [PATCH 110/115] Set as_representation=True in more cases --- arches/app/models/models.py | 6 +++++- arches/app/models/serializers.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arches/app/models/models.py b/arches/app/models/models.py index 3ce6350e06..ca37b477d9 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1652,7 +1652,11 @@ def refresh_from_db(self, using=None, fields=None, from_queryset=None): root_nodes := getattr(self, "_fetched_root_nodes", set()) ): aliases = [n.alias for n in root_nodes] - from_queryset = self.__class__.as_model(self.graph.slug, only=aliases) + from_queryset = self.__class__.as_model( + self.graph.slug, + only=aliases, + as_representation=getattr(self, "_as_representation", False), + ) super().refresh_from_db(using, fields, from_queryset) # Copy over annotations and annotated tiles. refreshed_resource = from_queryset[0] diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index f379e2095e..54746e8ddd 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -150,5 +150,5 @@ def create(self, validated_data): graph_slug=self.__class__.Meta.graph_slug, only=None if meta.nodegroups == "__all__" else meta.nodegroups, ).get(pk=instance_without_tile_data.pk) - # TODO: fullest/hydrated version of tile data not yet appearing? + instance_from_factory._as_representation = True return self.update(instance_from_factory, validated_data) From bca5cc7a1119cff192c085a03bd7c885c6afd38e Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 3 Dec 2024 15:10:02 -0500 Subject: [PATCH 111/115] Linearize migrations --- .../models/migrations/11043_tile_nodegroup_add_related_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py index b41872d108..114862e13d 100644 --- a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py +++ b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("models", "10437_node_alias_not_null"), + ("models", "11408_loadstaging_sortorder"), ] operations = [ From 80543f967dcd731f3d65b020e9e9e76700c575d4 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 4 Dec 2024 09:40:09 -0500 Subject: [PATCH 112/115] Forbid double quotes in JSONPathFilter --- arches/app/models/lookups.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arches/app/models/lookups.py b/arches/app/models/lookups.py index 5d4f543f6a..b90ac7203a 100644 --- a/arches/app/models/lookups.py +++ b/arches/app/models/lookups.py @@ -3,12 +3,12 @@ class JSONPathFilter: - """Handle the double-quoting and escaping for JSONPath filters.""" - def process_rhs(self, compiler, connection): rhs, params = super().process_rhs(compiler, connection) - escaped = AsIs(QuotedString(params[0]).getquoted().decode()[1:-1]) - return rhs, (escaped,) + if '"' in params[0]: + raise ValueError("Double quotes are not allowed in JSONPath filters.") + quoted = AsIs(QuotedString(params[0]).getquoted().decode()[1:-1]) + return rhs, (quoted,) @JSONField.register_lookup From 8a7e5da1815bc8058c46970675601bcc48eacf55 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 9 Dec 2024 14:50:11 -0500 Subject: [PATCH 113/115] fixup! Improve types handling --- arches/app/datatypes/datatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index 98bfd707c9..06b284628b 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -2286,7 +2286,7 @@ def from_id_string(uuid_string, graph_id=None): # TODO: move this to validate? inner.pop("display_value", None) transformed.append(inner) - return transformed + return transformed if isinstance(value, models.ResourceInstance): return [from_id_string(str(value.pk), value.graph_id)] From 59dad0fd69535e53677e974f386fd605d3b49dbf Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 10 Dec 2024 14:48:09 -0500 Subject: [PATCH 114/115] fixup! Add RDMAdministrator permission class --- arches/app/permissions/rest_framework.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches/app/permissions/rest_framework.py b/arches/app/permissions/rest_framework.py index 4cc995338f..53b8626a1f 100644 --- a/arches/app/permissions/rest_framework.py +++ b/arches/app/permissions/rest_framework.py @@ -5,4 +5,4 @@ class RDMAdministrator(permissions.BasePermission): def has_permission(self, request, view): - return group_required(request.user, ["RDM Administrator"]) + return group_required(request.user, "RDM Administrator") From 7b9fb524f8a48a0b6ed076ffbf5285e1930dfdcc Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 10 Dec 2024 15:33:04 -0500 Subject: [PATCH 115/115] Reject unknown keys --- arches/app/models/serializers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py index 54746e8ddd..87e4f0d0e4 100644 --- a/arches/app/models/serializers.py +++ b/arches/app/models/serializers.py @@ -1,6 +1,7 @@ from copy import deepcopy from django.db.models import F +from rest_framework.exceptions import ValidationError from rest_framework import fields from rest_framework import renderers from rest_framework import serializers @@ -82,6 +83,13 @@ def build_relational_field(self, field_name, relation_info): ) return ret + def validate(self, data): + if hasattr(self, "initial_data") and ( + unknown_keys := set(self.initial_data) - set(self.fields) + ): + raise ValidationError({unknown_keys.pop(): "Unexpected field"}) + return data + class ArchesModelSerializer(serializers.ModelSerializer): legacyid = serializers.CharField(max_length=255, required=False, allow_null=True) @@ -143,6 +151,13 @@ class Meta: allow_null=True, ) + def validate(self, data): + if hasattr(self, "initial_data") and ( + unknown_keys := set(self.initial_data) - set(self.fields) + ): + raise ValidationError({unknown_keys.pop(): "Unexpected field"}) + return data + def create(self, validated_data): meta = self.__class__.Meta instance_without_tile_data = super().create(validated_data)