diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh index 94757ea..c8744c6 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh @@ -18,4 +18,4 @@ echo "Creating judgments index..." # }' echo "Creating judgments..." -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=1" +curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/sample-events.sh b/opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh similarity index 93% rename from opensearch-search-quality-evaluation-plugin/scripts/sample-events.sh rename to opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh index 65c4b21..e50c828 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/sample-events.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh @@ -5,7 +5,7 @@ curl -s -X POST http://localhost:9200/_plugins/ubi/initialize curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", @@ -19,7 +19,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -27,7 +27,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", @@ -41,7 +41,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -49,7 +49,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", @@ -63,7 +63,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -71,7 +71,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", @@ -85,7 +85,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -107,7 +107,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/5 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -115,7 +115,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/5 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", @@ -129,7 +129,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -137,7 +137,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", @@ -151,7 +151,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -159,7 +159,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", @@ -173,7 +173,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -181,7 +181,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: applica curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: application/json" -d' { "application": "esci_ubi_sample", - "action_name": "view", + "action_name": "impression", "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", @@ -195,7 +195,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: applica "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -217,7 +217,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/10 -H "Content-Type: applic "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -239,7 +239,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/11 -H "Content-Type: applic "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq @@ -261,7 +261,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/12 -H "Content-Type: applic "object_id_field": "product_id" }, "position": { - "index": 1 + "ordinal": 1 } } }' | jq \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh b/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh new file mode 100755 index 0000000..37883fa --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e + +curl -s -X DELETE http://localhost:9200/ubi_queries,ubi_events + +curl -s -X POST http://localhost:9200/_plugins/ubi/initialize diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java index 58f8395..3427bd6 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java @@ -237,25 +237,31 @@ private Map> getClickthroughRate(final int maxRank // We need to the hash of the query_id because two users can both search // for "computer" and those searches will have different query IDs, but they are the same search. final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId()); - // LOGGER.debug("user_query = {}", userQuery); - // Get the clicks for this queryId from the map, or an empty list if this is a new query. - final Set clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>()); + // userQuery will be null if there is not a query for this event in ubi_queries. + if(userQuery != null) { - // Get the ClickthroughRate object for the object that was interacted with. - final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId())); + // LOGGER.debug("user_query = {}", userQuery); - if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) { - //LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId()); - clickthroughRate.logClick(); - } else { - //LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId()); - clickthroughRate.logEvent(); - } + // Get the clicks for this queryId from the map, or an empty list if this is a new query. + final Set clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>()); + + // Get the ClickthroughRate object for the object that was interacted with. + final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId())); + + if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) { + //LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId()); + clickthroughRate.logClick(); + } else { + //LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId()); + clickthroughRate.logEvent(); + } - clickthroughRates.add(clickthroughRate); - queriesToClickthroughRates.put(userQuery, clickthroughRates); - // LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size()); + clickthroughRates.add(clickthroughRate); + queriesToClickthroughRates.put(userQuery, clickthroughRates); + // LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size()); + + } } @@ -310,7 +316,7 @@ public Map getRankAggregatedClickThrough() throws Exception { final SearchResponse searchResponse = client.search(searchRequest).get(); final Map clickCounts = new HashMap<>(); - final Map viewCounts = new HashMap<>(); + final Map impressionCounts = new HashMap<>(); final Terms actionTerms = searchResponse.getAggregations().get("By_Action"); final Collection actionBuckets = actionTerms.getBuckets(); @@ -324,6 +330,7 @@ public Map getRankAggregatedClickThrough() throws Exception { final Collection positionBuckets = positionTerms.getBuckets(); for(final Terms.Bucket positionBucket : positionBuckets) { + LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount()); clickCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount()); } @@ -336,7 +343,8 @@ public Map getRankAggregatedClickThrough() throws Exception { final Collection positionBuckets = positionTerms.getBuckets(); for(final Terms.Bucket positionBucket : positionBuckets) { - viewCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount()); + LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount()); + impressionCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount()); } } @@ -344,8 +352,19 @@ public Map getRankAggregatedClickThrough() throws Exception { } for(final Integer x : clickCounts.keySet()) { - //System.out.println("Position = " + x + ", Click Count = " + clickCounts.get(x) + ", Event Count = " + viewCounts.get(x)); - rankAggregatedClickThrough.put(x, clickCounts.get(x) / viewCounts.get(x)); + + if(!(impressionCounts.get(x) == null)) { + + // Calculate the CTR by dividing the number of clicks by the number of impressions. + LOGGER.info("Position = {}, Click Count = {}, Event Count = {}", x, clickCounts.get(x), impressionCounts.get(x)); + rankAggregatedClickThrough.put(x, clickCounts.get(x) / impressionCounts.get(x)); + + } else { + // This will happen in the case where a document has a "click" event but not an "impression." This + // likely should not happen, but we will protect against an NPE anyway by setting the CTR to zero. + rankAggregatedClickThrough.put(x, (double) 0); + } + } if(parameters.isPersist()) { diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java index 21204ba..6441236 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java @@ -75,9 +75,18 @@ public String getUserQuery(final String queryId) throws Exception { // Cache it and return it. final UbiQuery ubiQuery = getQueryFromQueryId(queryId); - userQueryCache.put(queryId, ubiQuery.getUserQuery()); - return ubiQuery.getUserQuery(); + // ubiQuery will be null if the query does not exist. + if(ubiQuery != null) { + + userQueryCache.put(queryId, ubiQuery.getUserQuery()); + return ubiQuery.getUserQuery(); + + } else { + + return null; + + } } @@ -89,7 +98,7 @@ public String getUserQuery(final String queryId) throws Exception { */ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { - //LOGGER.info("Getting query from query ID {}", queryId); + LOGGER.info("Getting query from query ID {}", queryId); final String query = "{\"match\": {\"query_id\": \"" + queryId + "\" }}"; final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); @@ -105,12 +114,20 @@ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); final SearchResponse response = client.search(searchRequest).get(); - // Will only be a single result. - final SearchHit hit = response.getHits().getHits()[0]; + // If this does not return a query then we cannot calculate the judgments. Each even should have a query associated with it. + if(response.getHits().getHits() != null & response.getHits().getHits().length > 0) { - //LOGGER.info("Retrieved query from query ID {}", queryId); + final SearchHit hit = response.getHits().getHits()[0]; - return AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class)); + //LOGGER.info("Retrieved query from query ID {}", queryId); + return AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class)); + + } else { + + LOGGER.warn("No query exists for query ID {} to calculate judgments.", queryId); + return null; + + } }