llm: add a 8k limited GPT4 Turbo variant, to avoid committing to too …

…much money
sagemathinc · Mar 17, 2024 · 055cbb9 · 055cbb9
1 parent 1e9def5
commit 055cbb9
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 6 deletions.
diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx
@@ -157,6 +157,21 @@ function mentionableUsers({
         });
       }
     }
+
+    if (USER_SELECTABLE_LANGUAGE_MODELS.includes("gpt-4-turbo-preview-8k")) {
+      if (!search || "chatgpt4turbo".includes(search)) {
+        v.push({
+          value: "openai-gpt-4-turbo-preview-8k",
+          label: (
+            <span>
+              <OpenAIAvatar size={24} />{" "}
+              {LLM_USERNAMES["gpt-4-turbo-preview-8k"]}
+            </span>
+          ),
+          search: "chatgpt4turbo",
+        });
+      }
+    }
   }
 
   if (enabledLLMs.google) {

diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx
@@ -112,6 +112,11 @@ export default function ModelSwitch({
     makeLLMOption(ret, "gpt-3.5-turbo", LLM_DESCR["gpt-3.5-turbo"]);
     makeLLMOption(ret, "gpt-3.5-turbo-16k", LLM_DESCR["gpt-3.5-turbo-16k"]);
     makeLLMOption(ret, "gpt-4", LLM_DESCR["gpt-4"]);
+    makeLLMOption(
+      ret,
+      "gpt-4-turbo-preview-8k",
+      LLM_DESCR["gpt-4-turbo-preview-8k"],
+    );
     makeLLMOption(ret, "gpt-4-turbo-preview", LLM_DESCR["gpt-4-turbo-preview"]);
   }
 

diff --git a/src/packages/frontend/purchases/purchases.tsx b/src/packages/frontend/purchases/purchases.tsx
@@ -599,7 +599,16 @@ function Description({ description, period_end, service }) {
     // service should be DescriptionType["type"]
     return null;
   }
-  if (service === "openai-gpt-4" || service === "openai-gpt-4-turbo-preview") {
+  if (
+    service === "openai-gpt-4" ||
+    service === "openai-gpt-4-turbo-preview" ||
+    service === "openai-gpt-4-turbo-preview-8k"
+  ) {
+    const extra = service.includes("turbo")
+      ? service.includes("128k")
+        ? "Turbo 128k"
+        : "Turbo 8k"
+      : "";
     return (
       <Tooltip
         title={() => (
@@ -610,7 +619,7 @@ function Description({ description, period_end, service }) {
           </div>
         )}
       >
-        GPT-4 {service === "openai-gpt-4-turbo-preview" ? " Turbo" : ""}
+        GPT-4 {extra}
       </Tooltip>
     );
   }

diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts
@@ -19,6 +19,7 @@ import { getServerSettings } from "@cocalc/database/settings/server-settings";
 import createPurchase from "@cocalc/server/purchases/create-purchase";
 import {
   DEFAULT_MODEL,
+  LLM_COST,
   LLM_USERNAMES,
   LanguageModel,
   OpenAIMessages,
@@ -285,6 +286,11 @@ async function evaluateOpenAI({
   maxTokens,
   stream,
 }): Promise<ChatOutput> {
+  // the *-8k variant is artificial – the input is already limited/truncated to 8k
+  if (model === "gpt-4-turbo-preview-8k") {
+    model = "gpt-4-turbo-preview";
+  }
+
   const messages: OpenAIMessages = [];
   if (system) {
     messages.push({ role: "system", content: system });

diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts
@@ -11,6 +11,7 @@ const MODELS_OPENAI = [
   "gpt-4",
   "gpt-4-32k",
   "gpt-4-turbo-preview",
+  "gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens
 ] as const;
 
 export type ModelOpenAI = (typeof MODELS_OPENAI)[number];
@@ -49,6 +50,7 @@ export const USER_SELECTABLE_LANGUAGE_MODELS = [
   "gpt-3.5-turbo",
   "gpt-3.5-turbo-16k",
   "gpt-4-turbo-preview",
+  "gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens
   "gpt-4",
   "gemini-pro",
   ...MISTRAL_MODELS,
@@ -133,6 +135,7 @@ export type LanguageService =
   | "openai-gpt-4"
   | "openai-gpt-4-32k"
   | "openai-gpt-4-turbo-preview"
+  | "openai-gpt-4-turbo-preview-8k"
   | "openai-text-embedding-ada-002"
   | "google-text-bison-001"
   | "google-chat-bison-001"
@@ -285,7 +288,8 @@ export const LLM_USERNAMES: LLM2String = {
   "gpt-4-32k": "GPT-4-32k",
   "gpt-3.5-turbo": "GPT-3.5",
   "gpt-3.5-turbo-16k": "GPT-3.5-16k",
-  "gpt-4-turbo-preview": "GPT-4 Turbo",
+  "gpt-4-turbo-preview": "GPT-4 Turbo 128k",
+  "gpt-4-turbo-preview-8k": "GPT-4 Turbo 8k",
   "text-bison-001": "PaLM 2",
   "chat-bison-001": "PaLM 2",
   "gemini-pro": "Gemini Pro",
@@ -306,8 +310,9 @@ export const LLM_DESCR: LLM2String = {
   "gpt-4-32k": "",
   "gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
   "gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
-  "gpt-4-turbo-preview":
-    "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 128k token context)",
+  "gpt-4-turbo-preview-8k":
+    "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
+  "gpt-4-turbo-preview": "Like GPT-4 Turob 8k, but with up to 128k token context",
   "text-bison-001": "",
   "chat-bison-001": "",
   "gemini-pro": "Google's Gemini Pro Generative AI model (30k token context)",
@@ -413,6 +418,12 @@ export const LLM_COST: { [name in string]: Cost } = {
     completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens
     max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
   },
+  // like above, but we limit the tokens to reduce how much money user has to commit to
+  "gpt-4-turbo-preview-8k": {
+    prompt_tokens: 0.01 / 1000, // 	$10.00 / 1M tokens
+    completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens
+    max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
+  },
   "text-embedding-ada-002": {
     prompt_tokens: 0.0001 / 1000,
     completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings

diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts
@@ -38,7 +38,11 @@ export const QUOTA_SPEC: QuotaSpec = {
     noSet: true, // because this is not user visible yet
   },
   "openai-gpt-4-turbo-preview": {
-    display: "OpenAI GPT-4 Turbo",
+    display: "OpenAI GPT-4 Turbo 128k",
+    color: "#10a37f",
+  },
+  "openai-gpt-4-turbo-preview-8k": {
+    display: "OpenAI GPT-4 Turbo 8k",
     color: "#10a37f",
   },
   "mistralai-mistral-large-latest": {