Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding safeguards for NPEs and certain conditions #57

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ echo "Creating judgments index..."
# }'

echo "Creating judgments..."
curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=1"
curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is to go a lot deeper....?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, 20 was the original. 1 was for testing.

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ curl -s -X POST http://localhost:9200/_plugins/ubi/initialize
curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -19,15 +19,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -41,15 +41,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -63,15 +63,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5",
"session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2",
"client_id": "28ccfb32-fbd7-4514-9051-cea719db42de",
Expand All @@ -85,7 +85,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -107,15 +107,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/5 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -129,15 +129,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -151,15 +151,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -173,15 +173,15 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq

curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: application/json" -d'
{
"application": "esci_ubi_sample",
"action_name": "view",
"action_name": "impression",
"query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73",
"session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89",
"client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb",
Expand All @@ -195,7 +195,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: applica
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -217,7 +217,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/10 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -239,7 +239,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/11 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Expand All @@ -261,7 +261,7 @@ curl -s -X PUT http://localhost:9200/ubi_events/_doc/12 -H "Content-Type: applic
"object_id_field": "product_id"
},
"position": {
"index": 1
"ordinal": 1
}
}
}' | jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash -e

curl -s -X DELETE http://localhost:9200/ubi_queries,ubi_events

curl -s -X POST http://localhost:9200/_plugins/ubi/initialize
Original file line number Diff line number Diff line change
Expand Up @@ -237,25 +237,31 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
// We need to the hash of the query_id because two users can both search
// for "computer" and those searches will have different query IDs, but they are the same search.
final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId());
// LOGGER.debug("user_query = {}", userQuery);

// Get the clicks for this queryId from the map, or an empty list if this is a new query.
final Set<ClickthroughRate> clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>());
// userQuery will be null if there is not a query for this event in ubi_queries.
if(userQuery != null) {

// Get the ClickthroughRate object for the object that was interacted with.
final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId()));
// LOGGER.debug("user_query = {}", userQuery);

if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) {
//LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logClick();
} else {
//LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logEvent();
}
// Get the clicks for this queryId from the map, or an empty list if this is a new query.
final Set<ClickthroughRate> clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>());

// Get the ClickthroughRate object for the object that was interacted with.
final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId()));

if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) {
//LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logClick();
} else {
//LOGGER.info("Logging a VIEW on " + ubiEvent.getEventAttributes().getObject().getObjectId());
clickthroughRate.logEvent();
}

clickthroughRates.add(clickthroughRate);
queriesToClickthroughRates.put(userQuery, clickthroughRates);
// LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size());
clickthroughRates.add(clickthroughRate);
queriesToClickthroughRates.put(userQuery, clickthroughRates);
// LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size());

}

}

Expand Down Expand Up @@ -310,7 +316,7 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final SearchResponse searchResponse = client.search(searchRequest).get();

final Map<Integer, Double> clickCounts = new HashMap<>();
final Map<Integer, Double> viewCounts = new HashMap<>();
final Map<Integer, Double> impressionCounts = new HashMap<>();

final Terms actionTerms = searchResponse.getAggregations().get("By_Action");
final Collection<? extends Terms.Bucket> actionBuckets = actionTerms.getBuckets();
Expand All @@ -324,6 +330,7 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final Collection<? extends Terms.Bucket> positionBuckets = positionTerms.getBuckets();

for(final Terms.Bucket positionBucket : positionBuckets) {
LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this be crazy verbose?

clickCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
}

Expand All @@ -336,16 +343,28 @@ public Map<Integer, Double> getRankAggregatedClickThrough() throws Exception {
final Collection<? extends Terms.Bucket> positionBuckets = positionTerms.getBuckets();

for(final Terms.Bucket positionBucket : positionBuckets) {
viewCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
LOGGER.info("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount());
impressionCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount());
}

}

}

for(final Integer x : clickCounts.keySet()) {
//System.out.println("Position = " + x + ", Click Count = " + clickCounts.get(x) + ", Event Count = " + viewCounts.get(x));
rankAggregatedClickThrough.put(x, clickCounts.get(x) / viewCounts.get(x));

if(!(impressionCounts.get(x) == null)) {

// Calculate the CTR by dividing the number of clicks by the number of impressions.
LOGGER.info("Position = {}, Click Count = {}, Event Count = {}", x, clickCounts.get(x), impressionCounts.get(x));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wondering if these .info are really .debug?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they are. Probably could use a good going-over to make sure level are appropriate.

rankAggregatedClickThrough.put(x, clickCounts.get(x) / impressionCounts.get(x));

} else {
// This will happen in the case where a document has a "click" event but not an "impression." This
// likely should not happen, but we will protect against an NPE anyway by setting the CTR to zero.
rankAggregatedClickThrough.put(x, (double) 0);
}

}

if(parameters.isPersist()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,18 @@ public String getUserQuery(final String queryId) throws Exception {

// Cache it and return it.
final UbiQuery ubiQuery = getQueryFromQueryId(queryId);
userQueryCache.put(queryId, ubiQuery.getUserQuery());

return ubiQuery.getUserQuery();
// ubiQuery will be null if the query does not exist.
if(ubiQuery != null) {

userQueryCache.put(queryId, ubiQuery.getUserQuery());
return ubiQuery.getUserQuery();

} else {

return null;

}

}

Expand All @@ -89,7 +98,7 @@ public String getUserQuery(final String queryId) throws Exception {
*/
public UbiQuery getQueryFromQueryId(final String queryId) throws Exception {

//LOGGER.info("Getting query from query ID {}", queryId);
LOGGER.info("Getting query from query ID {}", queryId);

final String query = "{\"match\": {\"query_id\": \"" + queryId + "\" }}";
final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query);
Expand All @@ -105,12 +114,20 @@ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception {
final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder);
final SearchResponse response = client.search(searchRequest).get();

// Will only be a single result.
final SearchHit hit = response.getHits().getHits()[0];
// If this does not return a query then we cannot calculate the judgments. Each even should have a query associated with it.
if(response.getHits().getHits() != null & response.getHits().getHits().length > 0) {

//LOGGER.info("Retrieved query from query ID {}", queryId);
final SearchHit hit = response.getHits().getHits()[0];

return AccessController.doPrivileged((PrivilegedAction<UbiQuery>) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class));
//LOGGER.info("Retrieved query from query ID {}", queryId);
return AccessController.doPrivileged((PrivilegedAction<UbiQuery>) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class));

} else {

LOGGER.warn("No query exists for query ID {} to calculate judgments.", queryId);
return null;

}

}

Expand Down
Loading