From df58c5ad9c515f8b843cd52af1339d6e1056aca8 Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Thu, 19 Sep 2024 11:49:53 -0400 Subject: [PATCH] #3 and #4 Working on clickthrough rates. Signed-off-by: jzonthemtn --- .../data/sample-data/ubi_events.json | 10 ++ .../searchevaluationframework/App.java | 9 ++ .../OpenSearchEvaluationFramework.java | 92 ++++++++++++++++++- .../model/ClickthroughRate.java | 43 +++++++++ .../model/QueryResponse.java | 47 ++++++++++ .../model/UbiEvent.java | 3 +- .../model/UbiSearch.java | 58 ++++++++++++ .../model/UbiTransformed.java | 51 ---------- 8 files changed, 259 insertions(+), 54 deletions(-) create mode 100644 search-evaluation-framework/data/sample-data/ubi_events.json create mode 100644 search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/ClickthroughRate.java create mode 100644 search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/QueryResponse.java create mode 100644 search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiSearch.java delete mode 100644 search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiTransformed.java diff --git a/search-evaluation-framework/data/sample-data/ubi_events.json b/search-evaluation-framework/data/sample-data/ubi_events.json new file mode 100644 index 0000000..79daeaf --- /dev/null +++ b/search-evaluation-framework/data/sample-data/ubi_events.json @@ -0,0 +1,10 @@ +{"_index":"ubi_events","_id":"3a5928f1-5cfc-4219-b897-dafaf18c14df","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium (0884420588030)","timestamp":1726759565517,"event_attributes":{"object":{"object_id_field":"product","object_id":"2101957","description":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium","object_detail":null,"key_value":"0884420588030"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"be87c694-c1ab-4b75-a5e1-b6a793b8c318","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium (0884420588030)","timestamp":1726759565565,"event_attributes":{"object":{"object_id_field":"product","object_id":"2101957","description":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium","object_detail":null,"key_value":"0884420588030"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"34e212bf-87e0-4ea9-b241-e6cc8e8d797a","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"StarTech.com DB9 RS232 Serial Null Modem Adapter - M/F (0065030815772)","timestamp":1726759565715,"event_attributes":{"object":{"object_id_field":"product","object_id":"1128895","description":"StarTech.com DB9 RS232 Serial Null Modem Adapter - M/F","object_detail":null,"key_value":"0065030815772"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"995bcf2b-d79a-4370-801b-57de3fcd63aa","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Xerox 006R90321 toner cartridge Original Black 6 pc(s) (0095205603217)","timestamp":1726759565581,"event_attributes":{"object":{"object_id_field":"product","object_id":"3920564","description":"Xerox 006R90321 toner cartridge Original Black 6 pc(s)","object_detail":null,"key_value":"0095205603217"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"a18f8755-9942-4b34-95c1-9bb0201c5090","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP ProCurve 420 Wireless Access Point (0882780985407)","timestamp":1726759565748,"event_attributes":{"object":{"object_id_field":"product","object_id":"1449722","description":"HP ProCurve 420 Wireless Access Point","object_detail":null,"key_value":"0882780985407"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"cc4957f0-9bad-4e39-9cdf-fd7e8e79ba00","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Integral 2GB SD Card memory card (5055288404878)","timestamp":1726759565781,"event_attributes":{"object":{"object_id_field":"product","object_id":"1625640","description":"Integral 2GB SD Card memory card","object_detail":null,"key_value":"5055288404878"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"85254e72-7a2e-4f32-bdb9-ece49c7e02d9","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Integral 2GB SD Card memory card (5055288404878)","timestamp":1726759566065,"event_attributes":{"object":{"object_id_field":"product","object_id":"1625640","description":"Integral 2GB SD Card memory card","object_detail":null,"key_value":"5055288404878"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"04d71fbe-226b-4501-b3b0-07e112f056ef","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"APC Emergency Power Off (EPO) (0731304213239)","timestamp":1726759565864,"event_attributes":{"object":{"object_id_field":"product","object_id":"636678","description":"APC Emergency Power Off (EPO)","object_detail":null,"key_value":"0731304213239"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"fccadeda-6ea5-48c0-8c07-ce21064c1003","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Brother LC51C Original Cyan (0012502615620)","timestamp":1726759565815,"event_attributes":{"object":{"object_id_field":"product","object_id":"2067378","description":"Brother LC51C Original Cyan","object_detail":null,"key_value":"0012502615620"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} +{"_index":"ubi_events","_id":"a6f5b060-aaf0-425e-a5d3-ab0fdcd0fbae","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Brother LC51C Original Cyan (0012502615620)","timestamp":1726759566032,"event_attributes":{"object":{"object_id_field":"product","object_id":"2067378","description":"Brother LC51C Original Cyan","object_detail":null,"key_value":"0012502615620"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} \ No newline at end of file diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/App.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/App.java index 8a3950e..30edf5b 100644 --- a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/App.java +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/App.java @@ -2,7 +2,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.searchevaluationframework.model.ClickthroughRate; +import java.util.Collection; import java.util.Map; public class App { @@ -12,7 +14,14 @@ public class App { public static void main(String[] args) throws Exception { final OpenSearchEvaluationFramework openSearchEvaluationFramework = new OpenSearchEvaluationFramework(); + + // Calculate the rank-aggregated click-through. final Map rankAggregatedClickThrough = openSearchEvaluationFramework.getRankAggregatedClickThrough(); + // TODO: Index the pairs in rankAggregatedClickThrough. + + // Calculate the click-through rate for query/doc pairs. + final Collection clickthroughRates = openSearchEvaluationFramework.getClickthroughRate(); + // TODO: Index the properties in each ClickthroughRate object. } diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/OpenSearchEvaluationFramework.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/OpenSearchEvaluationFramework.java index e7c0917..4c4f531 100644 --- a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/OpenSearchEvaluationFramework.java +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/OpenSearchEvaluationFramework.java @@ -1,11 +1,18 @@ package org.opensearch.searchevaluationframework; import org.apache.commons.lang3.StringUtils; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkRequestBuilder; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.searchevaluationframework.model.ClickthroughRate; import org.opensearch.searchevaluationframework.model.UbiEvent; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.LinkedList; import java.util.Map; import org.apache.commons.lang3.StringUtils; @@ -26,16 +33,20 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.searchevaluationframework.model.UbiEvent; +import org.opensearch.searchevaluationframework.model.UbiSearch; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.UUID; public class OpenSearchEvaluationFramework { public static final String UBI_EVENTS_INDEX = "ubi_events"; public static final String UBI_QUERIES_INDEX = "ubi_queries"; + public static final String CLICK_EVENT = "click"; + private final RestHighLevelClient client; public OpenSearchEvaluationFramework() { @@ -45,6 +56,62 @@ public OpenSearchEvaluationFramework() { } + public Collection getClickthroughRate() throws IOException { + + // For each query: + // - Get each document returned in that query (in the QueryResponse object). + // - Calculate the clickthrough rate for the document. (clicks/impressions) + + final String query = "{\"match_all\":{}}"; + final BoolQueryBuilder queryBuilder = new BoolQueryBuilder().must(new WrapperQueryBuilder(query)); + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(1000); + final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(10L)); + + final SearchRequest searchRequest = Requests + .searchRequest(UBI_EVENTS_INDEX) + .source(searchSourceBuilder) + .scroll(scroll); + + SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); + String scrollId = searchResponse.getScrollId(); + SearchHit[] searchHits = searchResponse.getHits().getHits(); + + final Collection clickthroughRates = new LinkedList<>(); + + while (searchHits != null && searchHits.length > 0) { + + for (final SearchHit hit : searchHits) { + + final UbiEvent ubiEvent = new UbiEvent(hit); + final ClickthroughRate clickthroughRate = new ClickthroughRate(ubiEvent.getQueryId()); + + if (StringUtils.equalsIgnoreCase(ubiEvent.getActionName(), CLICK_EVENT)) { + clickthroughRate.logClick(); + } else { + clickthroughRate.logEvent(); + } + + clickthroughRates.add(clickthroughRate); + System.out.println(clickthroughRate.toString()); + + } + + final SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); + scrollRequest.scroll(scroll); + + searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT); + scrollId = searchResponse.getScrollId(); + + searchHits = searchResponse.getHits().getHits(); + + } + + index(clickthroughRates); + + return clickthroughRates; + + } + /** * Calculate the rank-aggregated click through from the UBI events. * @return The rank-aggregated click through. @@ -77,7 +144,7 @@ public Map getRankAggregatedClickThrough() throws IOException { final UbiEvent ubiEvent = new UbiEvent(hit); // Increment the number of clicks for the position. - if (StringUtils.equalsIgnoreCase(ubiEvent.getActionName(), "click")) { + if (StringUtils.equalsIgnoreCase(ubiEvent.getActionName(), CLICK_EVENT)) { rankAggregatedClickThrough.merge(ubiEvent.getPosition(), 1.0, Double::sum); } @@ -93,7 +160,6 @@ public Map getRankAggregatedClickThrough() throws IOException { scrollId = searchResponse.getScrollId(); searchHits = searchResponse.getHits().getHits(); - System.out.println("hits: " + searchHits.length); } @@ -114,4 +180,26 @@ public Map getRankAggregatedClickThrough() throws IOException { } + private void index(final Collection clickthroughRates) throws IOException { + + final BulkRequest request = new BulkRequest(); + + for(final ClickthroughRate clickthroughRate : clickthroughRates) { + + final Map jsonMap = new HashMap<>(); + jsonMap.put("query_id", clickthroughRate.getQueryId()); + jsonMap.put("clicks", clickthroughRate.getClicks()); + jsonMap.put("events", clickthroughRate.getEvents()); + jsonMap.put("ctr", clickthroughRate.getClickthroughRate()); + + final IndexRequest indexRequest = new IndexRequest("click_through_rates").id(UUID.randomUUID().toString()).source(jsonMap); + + request.add(indexRequest); + + } + + client.bulk(request, RequestOptions.DEFAULT); + + } + } diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/ClickthroughRate.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/ClickthroughRate.java new file mode 100644 index 0000000..68fa94b --- /dev/null +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/ClickthroughRate.java @@ -0,0 +1,43 @@ +package org.opensearch.searchevaluationframework.model; + +public class ClickthroughRate { + + private final String queryId; + private int clicks; + private int events; + + public ClickthroughRate(String queryId) { + this.queryId = queryId; + } + + public void logClick() { + clicks++; + events++; + } + + public void logEvent() { + events++; + } + + public double getClickthroughRate() { + return (double) clicks / events; + } + + @Override + public String toString() { + return "queryId: " + queryId + ", clicks: " + clicks + ", events: " + events + ", ctr: " + getClickthroughRate(); + } + + public String getQueryId() { + return queryId; + } + + public int getClicks() { + return clicks; + } + + public int getEvents() { + return events; + } + +} diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/QueryResponse.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/QueryResponse.java new file mode 100644 index 0000000..bd35c49 --- /dev/null +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/QueryResponse.java @@ -0,0 +1,47 @@ +package org.opensearch.searchevaluationframework.model; + +import java.util.List; + +public class QueryResponse { + + private final String queryId; + private final String queryResponseId; + private final List queryResponseObjectIds; + + /** + * Creates a query response. + * @param queryId The ID of the query. + * @param queryResponseId The ID of the query response. + * @param queryResponseObjectIds A list of IDs for the hits in the query. + */ + public QueryResponse(final String queryId, final String queryResponseId, final List queryResponseObjectIds) { + this.queryId = queryId; + this.queryResponseId = queryResponseId; + this.queryResponseObjectIds = queryResponseObjectIds; + } + + /** + * Gets the query ID. + * @return The query ID. + */ + public String getQueryId() { + return queryId; + } + + /** + * Gets the query response ID. + * @return The query response ID. + */ + public String getQueryResponseId() { + return queryResponseId; + } + + /** + * Gets the list of query response hit IDs. + * @return A list of query response hit IDs. + */ + public List getQueryResponseObjectIds() { + return queryResponseObjectIds; + } + +} diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiEvent.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiEvent.java index 4a33c73..e2ad53a 100644 --- a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiEvent.java +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiEvent.java @@ -42,7 +42,8 @@ public class UbiEvent { private String sessionId; private int position; - public UbiEvent(SearchHit hit) { + public UbiEvent(final SearchHit hit) { + this.actionName = hit.getSourceAsMap().get("action_name").toString(); this.clientId = hit.getSourceAsMap().get("client_id").toString(); diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiSearch.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiSearch.java new file mode 100644 index 0000000..4b281a7 --- /dev/null +++ b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiSearch.java @@ -0,0 +1,58 @@ +package org.opensearch.searchevaluationframework.model; + +import org.opensearch.search.SearchHit; + +import java.util.List; +import java.util.Map; + +public class UbiSearch { + + private final long timestamp; + private final String queryId; + private final String clientId; + private final String userQuery; + private final String query; + private Map queryAttributes; + private QueryResponse queryResponse; + + public UbiSearch(final SearchHit hit) { + + this.timestamp = Long.parseLong(hit.getSourceAsMap().get("timestamp").toString()); + this.queryId = hit.getSourceAsMap().get("query_id").toString(); + this.clientId = hit.getSourceAsMap().get("client_id").toString(); + this.userQuery = hit.getSourceAsMap().get("user_query").toString(); + this.query = hit.getSourceAsMap().get("query").toString(); + + //this.queryResponse = + + } + + public long getTimestamp() { + return timestamp; + } + + public String getQueryId() { + return queryId; + } + + public String getClientId() { + return clientId; + } + + public String getUserQuery() { + return userQuery; + } + + public String getQuery() { + return query; + } + + public Map getQueryAttributes() { + return queryAttributes; + } + + public QueryResponse getQueryResponse() { + return queryResponse; + } + +} diff --git a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiTransformed.java b/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiTransformed.java deleted file mode 100644 index 345e4ae..0000000 --- a/search-evaluation-framework/opensearch-search-quality-implicit-judgments/src/main/java/org/opensearch/searchevaluationframework/model/UbiTransformed.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.opensearch.searchevaluationframework.model; - -public class UbiTransformed { - - private String queryId; - private int position; - private int numResults; - private int clicked; - private String documentId; - - public String getQueryId() { - return queryId; - } - - public void setQueryId(String queryId) { - this.queryId = queryId; - } - - public int getPosition() { - return position; - } - - public void setPosition(int position) { - this.position = position; - } - - public int getNumResults() { - return numResults; - } - - public void setNumResults(int numResults) { - this.numResults = numResults; - } - - public int getClicked() { - return clicked; - } - - public void setClicked(int clicked) { - this.clicked = clicked; - } - - public String getDocumentId() { - return documentId; - } - - public void setDocumentId(String documentId) { - this.documentId = documentId; - } - -}