Skip to content

Commit

Permalink
Merge pull request #57 from o19s/adding-safeguards
Browse files Browse the repository at this point in the history
Adding safeguards for NPEs and certain conditions
  • Loading branch information
jzonthemtn authored Dec 9, 2024
2 parents afc496d + feb8805 commit ecdcb60
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ echo "Creating judgments index..."
# }'

echo "Creating judgments..."
curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=1"
curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20"
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ curl -s -X POST http://localhost:9200/_plugins/ubi/initialize
curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -19,15 +19,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -41,15 +41,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -63,15 +63,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -85,7 +85,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -107,15 +107,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/5 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -129,15 +129,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -151,15 +151,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -173,15 +173,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -195,7 +195,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -217,7 +217,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/10 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -239,7 +239,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/11 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -261,7 +261,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/12 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash -e

curl -s -X DELETE http://localhost:9200/ubi_queries,ubi_events

curl -s -X POST http://localhost:9200/_plugins/ubi/initialize
Original file line number Diff line number Diff line change
Expand Up @@ -237,25 +237,31 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
// We need to the hash of the query_id because two users can both search
// for "computer" and those searches will have different query IDs, but they are the same search.
final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId());
// LOGGER.debug("user_query = {}", userQuery);

// Get the clicks for this queryId from the map, or an empty list if this is a new query.
final Set<ClickthroughRate> clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>());
// userQuery will be null if there is not a query for this event in ubi_queries.
if(userQuery != null) {

// Get the ClickthroughRate object for the object that was interacted with.
final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId()));
// LOGGER.debug("user_query = {}", userQuery);

if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) {
//LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logClick();
} else {
//LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logEvent();
}
// Get the clicks for this queryId from the map, or an empty list if this is a new query.
final Set<ClickthroughRate> clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>());

// Get the ClickthroughRate object for the object that was interacted with.
final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId()));

if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) {
//LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logClick();
} else {
//LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logEvent();
}

clickthroughRates.add(clickthroughRate);
queriesToClickthroughRates.put(userQuery, clickthroughRates);
// LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size());
clickthroughRates.add(clickthroughRate);
queriesToClickthroughRates.put(userQuery, clickthroughRates);
// LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size());

}

}

Expand Down Expand Up @@ -310,7 +316,7 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final SearchResponse searchResponse = client.search(searchRequest).get();

final Map<Integer, Double> clickCounts = new HashMap<>();
final Map<Integer, Double> viewCounts = new HashMap<>();
final Map<Integer, Double> impressionCounts = new HashMap<>();

final Terms actionTerms = searchResponse.getAggregations().get("By_Action");
final Collection<? extends Terms.Bucket> actionBuckets = actionTerms.getBuckets();
Expand All @@ -324,6 +330,7 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final Collection<? extends Terms.Bucket> positionBuckets = positionTerms.getBuckets();

for(final Terms.Bucket positionBucket : positionBuckets) {
LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount());
clickCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
}

Expand All @@ -336,16 +343,28 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final Collection<? extends Terms.Bucket> positionBuckets = positionTerms.getBuckets();

for(final Terms.Bucket positionBucket : positionBuckets) {
viewCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount());
impressionCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
}

}

}

for(final Integer x : clickCounts.keySet()) {
//System.out.println("Position = " + x + ", Click Count = " + clickCounts.get(x) + ", Event Count = " + viewCounts.get(x));
rankAggregatedClickThrough.put(x, clickCounts.get(x) / viewCounts.get(x));

if(!(impressionCounts.get(x) == null)) {

// Calculate the CTR by dividing the number of clicks by the number of impressions.
LOGGER.info("Position = {}, Click Count = {}, Event Count = {}", x, clickCounts.get(x), impressionCounts.get(x));
rankAggregatedClickThrough.put(x, clickCounts.get(x) / impressionCounts.get(x));

} else {
// This will happen in the case where a document has a "click" event but not an "impression." This
// likely should not happen, but we will protect against an NPE anyway by setting the CTR to zero.
rankAggregatedClickThrough.put(x, (double) 0);
}

}

if(parameters.isPersist()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,18 @@ public String getUserQuery(final String queryId) throws Exception {

// Cache it and return it.
final UbiQuery ubiQuery = getQueryFromQueryId(queryId);
userQueryCache.put(queryId, ubiQuery.getUserQuery());

return ubiQuery.getUserQuery();
// ubiQuery will be null if the query does not exist.
if(ubiQuery != null) {

userQueryCache.put(queryId, ubiQuery.getUserQuery());
return ubiQuery.getUserQuery();

} else {

return null;

}

}

Expand All @@ -89,7 +98,7 @@ public String getUserQuery(final String queryId) throws Exception {
*/
public UbiQuery getQueryFromQueryId(final String queryId) throws Exception {

//LOGGER.info("Getting query from query ID {}", queryId);
LOGGER.info("Getting query from query ID {}", queryId);

final String query = "{\"match\": {\"query_id\": \"" + queryId + "\" }}";
final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query);
Expand All @@ -105,12 +114,20 @@ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception {
final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder);
final SearchResponse response = client.search(searchRequest).get();

// Will only be a single result.
final SearchHit hit = response.getHits().getHits()[0];
// If this does not return a query then we cannot calculate the judgments. Each even should have a query associated with it.
if(response.getHits().getHits() != null & response.getHits().getHits().length > 0) {

//LOGGER.info("Retrieved query from query ID {}", queryId);
final SearchHit hit = response.getHits().getHits()[0];

return AccessController.doPrivileged((PrivilegedAction<UbiQuery>) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class));
//LOGGER.info("Retrieved query from query ID {}", queryId);
return AccessController.doPrivileged((PrivilegedAction<UbiQuery>) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class));

} else {

LOGGER.warn("No query exists for query ID {} to calculate judgments.", queryId);
return null;

}

}

Expand Down

0 comments on commit ecdcb60

Please sign in to comment.