From 22673d61a130baa5597966c0ee0b7c03faaebb36 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:13:26 -0800 Subject: [PATCH 1/9] create a view for a search_layer to return mvt, re #10502 --- arches/app/views/search_layer.py | 179 +++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 arches/app/views/search_layer.py diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py new file mode 100644 index 0000000000..4c5e04d4ac --- /dev/null +++ b/arches/app/views/search_layer.py @@ -0,0 +1,179 @@ +from django.views import View + +# from django.http import JsonResponse +import json +from django.core.cache import caches +from arches.app.models.system_settings import settings +from django.utils.translation import gettext as _ + +# from arches.app.search.search_engine_factory import SearchEngineFactory +from django.db import connection +from django.http import Http404, HttpResponse + + +class SearchLayer(View): + def get(self, request, zoom, x, y): + # se = SearchEngineFactory().create() + searchid = request.GET.get("searchid", None) + if not searchid: + raise Http404(_("Missing 'searchid' query parameter.")) + EARTHCIRCUM = 40075016.6856 + PIXELSPERTILE = 256 + cache = caches["default"] + resource_ids = cache.get(searchid) + if resource_ids: + resource_ids = json.loads(resource_ids) + else: + print(f"no resourceids found in cache for searchid: {searchid}") + raise Http404(_("Missing resourceids from search cache.")) + + search_geom_count = 0 + cache_key = create_searchlayer_mvt_cache_key(searchid, zoom, x, y, request.user) + tile = cache.get(cache_key) + if tile is None: + with connection.cursor() as cursor: + if len(resource_ids) == 0: + resource_ids.append( + "10000000-0000-0000-0000-000000000001" + ) # This must have a uuid that will never be a resource id. + resource_ids = tuple(resource_ids) + + if int(zoom) < 14: + arc = EARTHCIRCUM / ((1 << int(zoom)) * PIXELSPERTILE) + distance = arc * float(1000) + min_points = 3 + distance = ( + settings.CLUSTER_DISTANCE_MAX + if distance > settings.CLUSTER_DISTANCE_MAX + else distance + ) + + count_query = """ + SELECT count(*) FROM geojson_geometries + WHERE + ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) + AND + resourceinstanceid in %s + """ + + # get the count of matching geometries + cursor.execute( + count_query, + [ + zoom, + x, + y, + resource_ids, + ], + ) + search_geom_count = cursor.fetchone()[0] + + if search_geom_count >= min_points: + cursor.execute( + """WITH clusters(tileid, resourceinstanceid, nodeid, geom, cid) + AS ( + SELECT m.*, + ST_ClusterDBSCAN(geom, eps := %s, minpoints := %s) over () AS cid + FROM ( + SELECT tileid, + resourceinstanceid, + nodeid, + geom + FROM geojson_geometries + WHERE + ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) + AND + resourceinstanceid in %s + ) m + ) + SELECT ST_AsMVT( + tile, + 'search_layer', + 4096, + 'geom', + 'id' + ) FROM ( + SELECT resourceinstanceid::text, + row_number() over () as id, + 1 as point_count, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + '' AS extent + FROM clusters + WHERE cid is NULL + UNION + SELECT NULL as resourceinstanceid, + row_number() over () as id, + count(*) as point_count, + ST_AsMVTGeom( + ST_Centroid( + ST_Collect(geom) + ), + TileBBox(%s, %s, %s, 3857) + ) AS geom, + ST_AsGeoJSON( + ST_Extent(geom) + ) AS extent + FROM clusters + WHERE cid IS NOT NULL + GROUP BY cid + ) as tile;""", + [ + distance, + min_points, + zoom, + x, + y, + resource_ids, + zoom, + x, + y, + zoom, + x, + y, + ], + ) + elif search_geom_count: + cursor.execute( + """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, + id, + resourceinstanceid, + nodeid, + featureid::text AS featureid, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + 1 AS point_count + FROM geojson_geometries + WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", + [zoom, x, y, resource_ids, zoom, x, y], + ) + else: + tile = "" + + cursor.execute( + """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, + id, + resourceinstanceid, + nodeid, + featureid::text AS featureid, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + 1 AS point_count + FROM geojson_geometries + WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", + [zoom, x, y, resource_ids, zoom, x, y], + ) + tile = bytes(cursor.fetchone()[0]) if tile is None else tile + cache.set(cache_key, tile, settings.TILE_CACHE_TIMEOUT) + + return HttpResponse(tile, content_type="application/x-protobuf") + + +def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user): + return f"searchlayer_mvt_{searchid_hash}_{zoom}_{x}_{y}_{user}" From 5e07bb396cb88ab537a387915d76eaaa61cbeebf Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:14:00 -0800 Subject: [PATCH 2/9] create a url for search_layer, re #10502 --- arches/urls.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arches/urls.py b/arches/urls.py index e2772c1999..29fb817f88 100644 --- a/arches/urls.py +++ b/arches/urls.py @@ -56,6 +56,7 @@ ResourceActivityStreamPageView, ResourceActivityStreamCollectionView, ) +from arches.app.views.search_layer import SearchLayer from arches.app.views.plugin import PluginView from arches.app.views.workflow_history import WorkflowHistoryView from arches.app.views.concept import RDMView @@ -675,6 +676,11 @@ api.MVT.as_view(), name="mvt", ), + path( + "search-layer///.pbf", + SearchLayer.as_view(), + name="search_layer", + ), re_path(r"^images$", api.Images.as_view(), name="images"), re_path( r"^ontology_properties$", From 1a70a818f2ec535fba66f00c687958f05cfd7477 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:16:18 -0800 Subject: [PATCH 3/9] rm updateSearchResultsLayer calls in subscriptions, re #10502 --- arches/app/media/js/views/components/search/map-filter.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 2f00cce545..122bbdea62 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -369,14 +369,6 @@ define([ this.updateFilter(); }, this); - this.searchAggregations.subscribe(this.updateSearchResultsLayers, this); - if (ko.isObservable(bins)) { - bins.subscribe(this.updateSearchResultsLayers, this); - } - if (this.searchAggregations()) { - this.updateSearchResultsLayers(); - } - this.mouseoverInstanceId.subscribe(updateSearchResultPointLayer); }, this); }, From 5f6c426f0568e3617dacbef6bdd97bdf264bc095 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:17:12 -0800 Subject: [PATCH 4/9] pass searchQueryId to MapViewModel, re #10502 --- arches/app/media/js/views/components/search/map-filter.js | 1 + 1 file changed, 1 insertion(+) diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 122bbdea62..d3ca1e2001 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -40,6 +40,7 @@ define([ options.name = "Map Filter"; BaseFilter.prototype.initialize.call(this, options); + options.searchQueryId = this.searchQueryId; options.sources = { "geojson-search-buffer-data": { "type": "geojson", From 5ea684a1ce01ae23b948f4e37ab18cdb6b14074f Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:19:48 -0800 Subject: [PATCH 5/9] stub out initial logic for adding, removing search layers in map vm, re #10502 --- arches/app/media/js/viewmodels/map.js | 255 ++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index eac6659c0a..e6678ecdae 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -12,6 +12,261 @@ define([ const viewModel = function(params) { var self = this; + const searchLayerIds = [ + 'searchtiles-unclustered-polygon-fill', + 'searchtiles-unclustered-point', + 'searchtiles-clusters', + 'searchtiles-clusters-halo', + 'searchtiles-cluster-count', + 'searchtiles-unclustered-polypoint' + ]; + const searchLayerDefinitions = [ + { + "id": "searchtiles-unclustered-polygon-fill", + "type": "fill", + "paint": { + "fill-color": "#fa6003", + "fill-opacity": 0.3, + "fill-outline-color": "#fa6003" + }, + "filter": [ + "==", + "$type", + "Polygon" + ], + "source": "search-layer-source", + "source-layer": "search_layer", + "minzoom": 10, + "tolerance": 0.75 + }, + { + "id": "searchtiles-unclustered-point", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 6, + "circle-opacity": 1 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 10, + 100, + 20, + 750, + 30, + 1500, + 40, + 2500, + 50, + 5000, + 65 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 1, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters-halo", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 20, + 100, + 30, + 750, + 40, + 1500, + 50, + 2500, + 60, + 5000, + 75 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 0.5, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-cluster-count", + "type": "symbol", + "paint": { + "text-color": "#fff" + }, + "filter": [ + "has", + "point_count" + ], + "layout": { + "text-font": [ + "DIN Offc Pro Medium", + "Arial Unicode MS Bold" + ], + "text-size": 14, + "text-field": "{point_count}" + }, + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-unclustered-polypoint", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 0, + "circle-opacity": 0, + "circle-stroke-color": "#fff", + "circle-stroke-width": 0 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "layout": { + "visibility": "none" + }, + "source": "search-layer-source", + "source-layer": "search_layer" + } + ]; + this.searchQueryId = params.searchQueryId; + this.searchQueryId.subscribe(function (searchId) { + if (searchId) { + self.addSearchLayer(searchId); + } else { + // optionally, remove the search layer if searchId becomes undefined + self.removeSearchLayer(); + } + }); + + this.addSearchLayer = function (searchId) { + console.log(searchId); + if (!self.map()) + return; + const tileUrlTemplate = `http://localhost:8000/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; + + // Remove existing source and layer if they exist + searchLayerIds.forEach(layerId => { + if (self.map().getLayer(layerId)) { + self.map().removeLayer(layerId); + } + if (self.map().getSource(layerId)) { + self.map().removeSource(layerId); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + + // Add the vector tile source + self.map().addSource('search-layer-source', { + type: 'vector', + tiles: [tileUrlTemplate], + minzoom: 0, + maxzoom: 22, + }); + + // Add the layer to display the data + searchLayerDefinitions.forEach(mapLayer => { + self.map().addLayer(mapLayer); + }); + + // Optionally, fit the map to the data bounds + // self.fitMapToDataBounds(searchId); + }; + + this.removeSearchLayer = function () { + searchLayerDefinitions.forEach(mapLayer => { + if (self.map().getLayer(mapLayer.id)) { + self.map().removeLayer(mapLayer.id); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + }; + var geojsonSourceFactory = function() { return { From 920a3fb34d951218684bffcbe1159500c9e70435 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:21:02 -0800 Subject: [PATCH 6/9] add hook for addSearchLayer in map subscription, re #10502 --- arches/app/media/js/viewmodels/map.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index e6678ecdae..d3f6dbf953 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -317,6 +317,10 @@ define([ map.fitBounds(ko.unwrap(params.bounds), boundingOptions); } + // If searchQueryId is already available, add the search layer + if (self.searchQueryId()) { + self.addSearchLayer(self.searchQueryId()); + } }); this.bounds = ko.observable(ko.unwrap(params.bounds) || arches.hexBinBounds); From 0bdb2d4c0b7d375875a32e3a3abb83b6380516d2 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:32:51 -0800 Subject: [PATCH 7/9] define searchQueryId, override doQuery in standard-search-view, re #10502 --- .../components/search/standard-search-view.js | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arches/app/media/js/views/components/search/standard-search-view.js b/arches/app/media/js/views/components/search/standard-search-view.js index e311bcf438..7134caf193 100644 --- a/arches/app/media/js/views/components/search/standard-search-view.js +++ b/arches/app/media/js/views/components/search/standard-search-view.js @@ -15,6 +15,8 @@ define([ this.selectedPopup = ko.observable(''); this.sharedStateObject.selectedPopup = this.selectedPopup; + this.searchQueryId = ko.observable(null); + this.sharedStateObject.searchQueryId = this.searchQueryId; var firstEnabledFilter = _.find(this.sharedStateObject.searchFilterConfigs, function(filter) { return filter.config.layoutType === 'tabbed'; }, this); @@ -51,6 +53,47 @@ define([ this.searchFilterVms[componentName](this); }, + doQuery: function() { + const queryObj = JSON.parse(this.queryString()); + if (self.updateRequest) { self.updateRequest.abort(); } + self.updateRequest = $.ajax({ + type: "GET", + url: arches.urls.search_results, + data: queryObj, + context: this, + success: function(response) { + _.each(this.sharedStateObject.searchResults, function(value, key, results) { + if (key !== 'timestamp') { + delete this.sharedStateObject.searchResults[key]; + } + }, this); + _.each(response, function(value, key, response) { + if (key !== 'timestamp') { + this.sharedStateObject.searchResults[key] = value; + } + }, this); + this.sharedStateObject.searchResults.timestamp(response.timestamp); + this.searchQueryId(this.sharedStateObject.searchResults.searchqueryid); + this.sharedStateObject.userIsReviewer(response.reviewer); + this.sharedStateObject.userid(response.userid); + this.sharedStateObject.total(response.total_results); + this.sharedStateObject.hits(response.results.hits.hits.length); + this.sharedStateObject.alert(false); + }, + error: function(response, status, error) { + const alert = new AlertViewModel('ep-alert-red', arches.translations.requestFailed.title, response.responseJSON?.message); + if(self.updateRequest.statusText !== 'abort'){ + this.alert(alert); + } + this.sharedStateObject.loading(false); + }, + complete: function(request, status) { + self.updateRequest = undefined; + window.history.pushState({}, '', '?' + $.param(queryObj).split('+').join('%20')); + this.sharedStateObject.loading(false); + } + }); + }, }); return ko.components.register(componentName, { From 3d1349a8886bbbafe96ebfa319ae0f0c48a25dea Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 12:59:18 -0800 Subject: [PATCH 8/9] hold - draft pit implementation in search, re #10502 --- .../search/components/standard_search_view.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 6af783b1af..59ac59f5fb 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -19,6 +19,7 @@ from django.utils.translation import gettext as _ from datetime import datetime import logging +import json details = { @@ -136,6 +137,86 @@ def append_dsl(self, search_query_object, **kwargs): if load_tiles: search_query_object["query"].include("tiles") + def set_search_pit(self, search_query_object, se, cache, **kwargs): + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + pit_response = se.es.open_point_in_time( + index=RESOURCES_INDEX, keep_alive="2m" # Adjust as needed + ) + pit_id = pit_response["pit_id"] + + # Perform the search + search_params = { + # Your search query parameters + } + + search_response = search_query_object["query"].search( + index=RESOURCES_INDEX, + body=search_params, + pit={"id": pit_id, "keep_alive": "2m"}, + size=1000, # Adjust as needed + ) + # TODO: how can I cache the search query itself? The QueryObject is really hard to serialize + # could just re-instantiate the filters from the search_layer to regenerate the QueryObject from scratch + + # Cache the pit_id and search parameters + cache.set( + resourceids_only_query_hash_key, + json.dumps({"pit_id": pit_id, "search_params": search_params}), + timeout=120, + ) + return resourceids_only_query_hash_key + + def execute_resourceids_only_query( + self, search_query_object, response_object, cache, **kwargs + ): + # cached_response_json = cache.get(cache_key) + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + # did we already cache result resourceids for this query under this query hash? + cached_result_resourceids = cache.get(resourceids_only_query_hash_key) + if ( + cached_result_resourceids + ): # we already did the work here; we'll return the hash key + return resourceids_only_query_hash_key + else: + print( + f"no cached resourceids for hashkey {resourceids_only_query_hash_key}" + ) + + if resourceinstanceid is None: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, limit=10000, scroll="1m" + ) + scroll_id = results["_scroll_id"] + scroll_size = results["hits"]["total"]["value"] + total_results = results["hits"]["total"]["value"] + if query_obj.get("paging-filter", None) is None: + while scroll_size > 0: + page = search_query_object["query"].se.es.scroll( + scroll_id=scroll_id, scroll="3m" + ) + scroll_size = len(page["hits"]["hits"]) + results["hits"]["hits"] += page["hits"]["hits"] + else: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, id=resourceinstanceid + ) + total_results = 1 + + if results is not None: + all_resourceids = [hit["_id"] for hit in results["hits"]["hits"]] + cache.set( + resourceids_only_query_hash_key, + json.dumps(all_resourceids), + settings.SEARCH_RESULTS_CACHE_TIMEOUT, + ) + return resourceids_only_query_hash_key + def execute_query(self, search_query_object, response_object, **kwargs): for_export = get_str_kwarg_as_bool("export", self.request.GET) pages = self.request.GET.get("pages", None) @@ -232,6 +313,53 @@ def handle_search_results_query( if returnDsl: return response_object, search_query_object + # at this point we want to FIRST do an unlimited query to get all resourceids + # of the results + # THEN SECOND we want to do a second query to get a rich set of results only for the page + unpaged_query = None + search_query_object["query"].include("tiles") + for_export = get_str_kwarg_as_bool("export", sorted_query_obj) + if not for_export: + resourceids_only_query_hash_key = self.execute_resourceids_only_query( + search_query_object, + response_object, + cache, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + + # now I know the resourceids have been cached under the resourceids_only_query_hash_key + # I should set a start/end limit for the second query + paging_filter = search_filter_factory.get_filter("paging-filter") + if paging_filter: + paging_filter.append_dsl( + search_query_object, + permitted_nodegroups=permitted_nodegroups, + include_provisional=include_provisional, + load_tiles=load_tiles, + for_export=for_export, + querystring=sorted_query_obj.get("paging-filter", "{}"), + search_request_object=sorted_query_obj, + ) + + search_query_object["query"].include("graph_id") + # if geom_only or for_export or map_manager or load_tiles: + search_query_object["query"].include("geometries") + search_query_object["query"].include("points") + # if not geom_only: + for prop in essential_result_properties: + search_query_object["query"].include(prop) + # if load_tiles: + # search_query_object["query"].include("tiles") + search_query_object["query"].include("resourceinstanceid") + + self.execute_paged_query( + search_query_object, + response_object, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) if search_filter: From 675fb32b2fb879ad701c3fe59f4f8c147e21c271 Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 17:40:47 -0800 Subject: [PATCH 9/9] first iteration using geotilegrid in search_layer, re #10502 --- arches/app/views/search_layer.py | 380 ++++++++++++++++++------------- 1 file changed, 225 insertions(+), 155 deletions(-) diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py index 4c5e04d4ac..2db577a43e 100644 --- a/arches/app/views/search_layer.py +++ b/arches/app/views/search_layer.py @@ -1,178 +1,248 @@ +import math from django.views import View -# from django.http import JsonResponse -import json from django.core.cache import caches from arches.app.models.system_settings import settings from django.utils.translation import gettext as _ -# from arches.app.search.search_engine_factory import SearchEngineFactory -from django.db import connection +from arches.app.search.search_engine_factory import SearchEngineFactory +from arches.app.search.elasticsearch_dsl_builder import ( + Query, + Bool, + GeoShape, + Nested, + GeoTileGridAgg, + NestedAgg, + Aggregation, +) + +# from django.db import connection from django.http import Http404, HttpResponse +from arches.app.utils.betterJSONSerializer import JSONDeserializer +from pprint import pprint + +# from django.contrib.gis.geos import Polygon +from datetime import datetime, timedelta +from time import time +import mercantile +import mapbox_vector_tile + +ZOOM_THRESHOLD = 14 +EXTENT = 4096 class SearchLayer(View): def get(self, request, zoom, x, y): - # se = SearchEngineFactory().create() + start = time() + print(f"ZOOM: {zoom}") searchid = request.GET.get("searchid", None) if not searchid: + print("NO SEARCHID FOUND ON REQUEST") raise Http404(_("Missing 'searchid' query parameter.")) + EARTHCIRCUM = 40075016.6856 PIXELSPERTILE = 256 cache = caches["default"] - resource_ids = cache.get(searchid) - if resource_ids: - resource_ids = json.loads(resource_ids) - else: + pit_id = cache.get(searchid + "_pit") + query_dsl = cache.get(searchid + "_dsl") + # pprint(query_dsl) + # {"pit_id": pit_id, "dsl": query.dsl} + if pit_id is None or query_dsl is None: print(f"no resourceids found in cache for searchid: {searchid}") raise Http404(_("Missing resourceids from search cache.")) - search_geom_count = 0 - cache_key = create_searchlayer_mvt_cache_key(searchid, zoom, x, y, request.user) - tile = cache.get(cache_key) - if tile is None: - with connection.cursor() as cursor: - if len(resource_ids) == 0: - resource_ids.append( - "10000000-0000-0000-0000-000000000001" - ) # This must have a uuid that will never be a resource id. - resource_ids = tuple(resource_ids) - - if int(zoom) < 14: - arc = EARTHCIRCUM / ((1 << int(zoom)) * PIXELSPERTILE) - distance = arc * float(1000) - min_points = 3 - distance = ( - settings.CLUSTER_DISTANCE_MAX - if distance > settings.CLUSTER_DISTANCE_MAX - else distance - ) - - count_query = """ - SELECT count(*) FROM geojson_geometries - WHERE - ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) - AND - resourceinstanceid in %s - """ - - # get the count of matching geometries - cursor.execute( - count_query, - [ - zoom, - x, - y, - resource_ids, - ], - ) - search_geom_count = cursor.fetchone()[0] - - if search_geom_count >= min_points: - cursor.execute( - """WITH clusters(tileid, resourceinstanceid, nodeid, geom, cid) - AS ( - SELECT m.*, - ST_ClusterDBSCAN(geom, eps := %s, minpoints := %s) over () AS cid - FROM ( - SELECT tileid, - resourceinstanceid, - nodeid, - geom - FROM geojson_geometries - WHERE - ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) - AND - resourceinstanceid in %s - ) m - ) - SELECT ST_AsMVT( - tile, - 'search_layer', - 4096, - 'geom', - 'id' - ) FROM ( - SELECT resourceinstanceid::text, - row_number() over () as id, - 1 as point_count, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - '' AS extent - FROM clusters - WHERE cid is NULL - UNION - SELECT NULL as resourceinstanceid, - row_number() over () as id, - count(*) as point_count, - ST_AsMVTGeom( - ST_Centroid( - ST_Collect(geom) - ), - TileBBox(%s, %s, %s, 3857) - ) AS geom, - ST_AsGeoJSON( - ST_Extent(geom) - ) AS extent - FROM clusters - WHERE cid IS NOT NULL - GROUP BY cid - ) as tile;""", - [ - distance, - min_points, - zoom, - x, - y, - resource_ids, - zoom, - x, - y, - zoom, - x, - y, - ], - ) - elif search_geom_count: - cursor.execute( - """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, - id, - resourceinstanceid, - nodeid, - featureid::text AS featureid, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - 1 AS point_count - FROM geojson_geometries - WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", - [zoom, x, y, resource_ids, zoom, x, y], - ) - else: - tile = "" - - cursor.execute( - """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, - id, - resourceinstanceid, - nodeid, - featureid::text AS featureid, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - 1 AS point_count - FROM geojson_geometries - WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", - [zoom, x, y, resource_ids, zoom, x, y], - ) - tile = bytes(cursor.fetchone()[0]) if tile is None else tile - cache.set(cache_key, tile, settings.TILE_CACHE_TIMEOUT) - - return HttpResponse(tile, content_type="application/x-protobuf") + se = SearchEngineFactory().create() + query_dsl = JSONDeserializer().deserialize(query_dsl, indent=4) + new_query = Query(se, limit=0) + new_query.prepare() + new_query.dsl = query_dsl + # spatial_query = Bool() + # if int(y) == 203: + # print("\n\n\nwhats my new query\n\n\n") + # pprint(new_query.__str__()) + tile_x = int(x) + tile_y = int(y) + tile_z = int(zoom) + tile_bounds = mercantile.bounds(tile_x, tile_y, tile_z) + bbox = ( + tile_bounds.west, + tile_bounds.south, + tile_bounds.east, + tile_bounds.north, + ) + geo_bbox_query = { + "geo_bounding_box": { + "points.point": { + "top_left": {"lat": tile_bounds.north, "lon": tile_bounds.west}, + "bottom_right": {"lat": tile_bounds.south, "lon": tile_bounds.east}, + } + } + } + + if int(zoom) < ZOOM_THRESHOLD: + + geotile_agg = GeoTileGridAgg( + precision=int(zoom), field="points.point", size=10000 + ) + centroid_agg = Aggregation( + type="geo_centroid", name="centroid", field="points.point" + ) + geotile_agg.add_aggregation(centroid_agg) + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geotile_agg) + + # Build the filter aggregation + geo_filter_agg = Aggregation( + type="filter", + name="geo_filter", + filter=Nested(path="points", query=geo_bbox_query).dsl, + ) + + # Add the geotile_grid aggregation under the filter aggregation + geo_filter_agg.add_aggregation(geotile_agg) + + # Update the nested aggregation + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geo_filter_agg) + new_query.add_aggregation(nested_agg) + + # pit doesn't allow scroll context or index + new_query.dsl["source_includes"] = [] + new_query.dsl["size"] = 0 + # if int(y) == 203: + # pprint(new_query.dsl) + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, _source=False, **new_query.dsl + ) + elapsed = time() - start + # print( + # "_______Time to finish search_layer search 1 (total: {0}) = {1}".format(results["hits"]["total"]["value"], timedelta(seconds=elapsed)) + # ) + # print("search done") + # print(results["hits"]["total"]) + # pprint(results) + features = [] + buckets = results["aggregations"]["geo_aggs"]["geo_filter"]["zoomed_grid"][ + "buckets" + ] + # print(f"Number of buckets: {len(buckets)}") + + for bucket in buckets: + centroid = bucket["centroid"]["location"] + lon = centroid["lon"] + lat = centroid["lat"] + doc_count = bucket["doc_count"] + # px, py = lnglat_to_tile_px(lon, lat, tile_x, tile_y, tile_z, EXTENT) + + feature = { + "geometry": {"type": "Point", "coordinates": [lon, lat]}, + "properties": {"count": doc_count}, + } + + features.append(feature) + + layers = [ + { + "name": "clusters", # Layer name + "features": features, + "version": 2, + "extent": EXTENT, + } + ] + else: + # Fetch individual features + # Add the spatial filter to the query + points_spatial_query = Nested(path="points", query=geo_bbox_query) + # new_query.add_query(spatial_query) + + geometries_spatial_query = Nested(path="geometries", query=geo_bbox_query) + spatial_bool_query = Bool() + spatial_bool_query.should(points_spatial_query) + spatial_bool_query.should(geometries_spatial_query) + new_query.add_query(spatial_bool_query) + + new_query.dsl["size"] = 10000 + + new_query.include("points.point") + new_query.include("geometries.geom") + # new_query.include("resourceinstanceid") + # Add other fields if needed + + # Execute the search + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, **new_query.dsl + ) + + # Process the hits to generate features + features = [] + point_features = [] + geometry_features = [] + + for hit in results["hits"]["hits"]: + source = hit["_source"] + resource_id = hit.get("_id") + + # Handle points + points = source.get("points", []) + for point in points: + point_geom = point.get("point") + if point_geom: + lon = point_geom.get("lon") + lat = point_geom.get("lat") + if lon and lat: + feature = { + "geometry": { + "type": "Point", + "coordinates": [lon, lat], + }, + "properties": { + "resourceinstanceid": resource_id, + "count": 1, + }, + } + point_features.append(feature) + geometries = source.get("geometries", []) + for geometry in geometries: + geom = geometry.get("geom") + if geom: + geom_type = geom.get("type") + coordinates = geom.get("coordinates") + if coordinates: + feature = { + "geometry": { + "type": geom_type, + "coordinates": coordinates, + }, + "properties": {"resourceinstanceid": resource_id}, + } + pprint(feature) + geometry_features.append(feature) + + # Build layers + layers = [] + + if point_features: + point_layer = { + "name": "points", + "features": point_features, + "version": 2, + "extent": EXTENT, + } + layers.append(point_layer) + + if geometry_features: + geometry_layer = { + "name": "geometries", + "features": geometry_features, + "version": 2, + "extent": EXTENT, + } + layers.append(geometry_layer) + + tile = mapbox_vector_tile.encode( + layers, quantize_bounds=bbox, y_coord_down=True, extents=EXTENT + ) + return HttpResponse(tile, content_type="application/vnd.mapbox-vector-tile") def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user):