From 2ab0a91137b0a76571b6fb401425384567535f74 Mon Sep 17 00:00:00 2001
From: alyssachvasta <achvasta@google.com>
Date: Tue, 17 Dec 2024 19:44:34 +0000
Subject: [PATCH] Use commas for numbers and don't ground the stats in the
 intro

GitOrigin-RevId: 01ccac3f20b2347951d77cb9cb4870228fd1ac42
---
 src/sensemaker.ts          | 26 +++++++++++++++++++-------
 src/tasks/grounding.ts     |  2 ++
 src/tasks/stats_checker.ts | 10 ++++++----
 src/tasks/summarization.ts |  5 ++++-
 4 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/src/sensemaker.ts b/src/sensemaker.ts
index 54a3145..ea8e15e 100644
--- a/src/sensemaker.ts
+++ b/src/sensemaker.ts
@@ -42,6 +42,7 @@ import { summaryContainsStats } from "./tasks/stats_checker";
  * @param errorMsg the error message to throw
  * @param retryDelayMS how long to wait in miliseconds between calls
  * @param funcArgs the args for func and isValid
+ * @param isValidArgs the args for isValid
  * @returns the valid response from func
  */
 /* eslint-disable  @typescript-eslint/no-explicit-any */
@@ -51,13 +52,14 @@ async function retryCall<T>(
   maxRetries: number,
   errorMsg: string,
   retryDelayMS: number = RETRY_DELAY_MS,
-  ...funcArgs: any[]
+  funcArgs: any[],
+  isValidArgs: any[]
 ) {
   /* eslint-enable  @typescript-eslint/no-explicit-any */
   for (let attempt = 1; attempt <= maxRetries; attempt++) {
     try {
       const response = await func(...funcArgs);
-      if (isValid(response, ...funcArgs)) {
+      if (isValid(response, ...isValidArgs)) {
         return response;
       }
       console.error(`Attempt ${attempt} failed. Invalid response:`, response);
@@ -146,18 +148,27 @@ export class Sensemaker {
       }
       comments = await this.categorizeComments(comments, true, topics, additionalInstructions);
     }
+    const summaryStats = new SummaryStats(comments);
     const summary = await retryCall(
-      async function (model: Model, summaryStats: SummaryStats): Promise<string> {
+      async function (
+        model: Model,
+        summaryStats: SummaryStats,
+        summarizationType: SummarizationType
+      ): Promise<string> {
         return summarizeByType(model, summaryStats, summarizationType, additionalInstructions);
       },
-      function (summary: string, summaryStats: SummaryStats): boolean {
+      function (
+        summary: string,
+        summaryStats: SummaryStats,
+        summarizationType: SummarizationType
+      ): boolean {
         return summaryContainsStats(summary, summaryStats, summarizationType);
       },
       MAX_RETRIES,
       "The statistics don't match what's in the summary.",
       undefined,
-      this.getModel("summarizationModel"),
-      new SummaryStats(comments)
+      [this.getModel("summarizationModel"), summaryStats, summarizationType],
+      [summaryStats, summarizationType]
     );
 
     return groundSummary(this.getModel("groundingModel"), summary, comments);
@@ -199,7 +210,8 @@ export class Sensemaker {
       MAX_RETRIES,
       "Topic modeling failed.",
       undefined,
-      this.getModel("categorizationModel")
+      [this.getModel("categorizationModel")],
+      []
     );
   }
 
diff --git a/src/tasks/grounding.ts b/src/tasks/grounding.ts
index 385b90f..12f5b0e 100644
--- a/src/tasks/grounding.ts
+++ b/src/tasks/grounding.ts
@@ -45,6 +45,8 @@ However, prefer marking segments of texts that identify atomic or singular claim
 
 Do not ground topic and subtopics names (like "Infrastructure (5 comments)").
 
+Do not ground claims about the number of total comments in the conversation or the number of total votes.
+
 THIS IS IMPORTANT! Leave any portion of text from the original summary that does not need to be grounded alone. The overall structure of the summary text should not change, and all text, punctuation, indentation and aspects of markdown notation should be left as is. The only changes to the original text you should make are in the addition of brackets as described above.
 
 Here is the summary for grounding:
diff --git a/src/tasks/stats_checker.ts b/src/tasks/stats_checker.ts
index ff366a6..756f1bc 100644
--- a/src/tasks/stats_checker.ts
+++ b/src/tasks/stats_checker.ts
@@ -29,18 +29,20 @@ export function summaryContainsStats(
   summaryStats: SummaryStats,
   summarizationType: SummarizationType
 ): boolean {
-  if (!summary.includes(`${summaryStats.commentCount} comments`)) {
+  const commentCount = summaryStats.commentCount.toLocaleString();
+  if (!summary.includes(`${commentCount} comments`)) {
     console.error(`Summary does not contain the correct number of total comments from the
-        deliberation. commentCount=${summaryStats.commentCount} and summary=${summary}`);
+        deliberation. commentCount=${commentCount} and summary=${summary}`);
     return false;
   }
 
+  const voteCount = summaryStats.voteCount.toLocaleString();
   if (
     summarizationType == SummarizationType.VOTE_TALLY &&
-    !summary.includes(`${summaryStats.voteCount} votes`)
+    !summary.includes(`${voteCount} votes`)
   ) {
     console.error(`Summary does not contain the correct number of total votes from the
-        deliberation. voteCount=${summaryStats.voteCount} and summary=${summary}`);
+        deliberation. voteCount=${voteCount} and summary=${summary}`);
     return false;
   }
 
diff --git a/src/tasks/summarization.ts b/src/tasks/summarization.ts
index 312c724..3451c0a 100644
--- a/src/tasks/summarization.ts
+++ b/src/tasks/summarization.ts
@@ -29,6 +29,9 @@ export function getSummarizationInstructions(
   const sortedTopics = _sortTopicsByComments(topicStats);
   const quantifiedTopics = _quantifyTopicNames(sortedTopics);
 
+  const commentCount = summaryStats.commentCount.toLocaleString();
+  const voteCount = summaryStats.voteCount.toLocaleString();
+
   return `You’re analyzing the results of a public deliberation on a topic. It contains comments and associated votes.
 You will summarize with the summary having all of the following categories and subcategories:
 
@@ -68,7 +71,7 @@ ${includeGroups ? "## Description of Groups" : ""}
 ## Conclusion
 
 The introduction should be one paragraph long and contain ${includeGroups ? "five" : "four"} sentences.
-The first sentence should include the information that there were ${summaryStats.commentCount} comments ${includeGroups ? `that had ${summaryStats.voteCount} votes` : ""}.
+The first sentence should include the information that there were ${commentCount} comments ${includeGroups ? `that had ${voteCount} votes` : ""}.
 The second sentence should include what topics were discussed. 
 ${
   includeGroups