Skip to content

Commit

Permalink
llm: add a 8k limited GPT4 Turbo variant, to avoid committing to too …
Browse files Browse the repository at this point in the history
…much money
  • Loading branch information
haraldschilly committed Mar 17, 2024
1 parent 1e9def5 commit 055cbb9
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 6 deletions.
15 changes: 15 additions & 0 deletions src/packages/frontend/editors/markdown-input/mentionable-users.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,21 @@ function mentionableUsers({
});
}
}

if (USER_SELECTABLE_LANGUAGE_MODELS.includes("gpt-4-turbo-preview-8k")) {
if (!search || "chatgpt4turbo".includes(search)) {
v.push({
value: "openai-gpt-4-turbo-preview-8k",
label: (
<span>
<OpenAIAvatar size={24} />{" "}
{LLM_USERNAMES["gpt-4-turbo-preview-8k"]}
</span>
),
search: "chatgpt4turbo",
});
}
}
}

if (enabledLLMs.google) {
Expand Down
5 changes: 5 additions & 0 deletions src/packages/frontend/frame-editors/llm/model-switch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ export default function ModelSwitch({
makeLLMOption(ret, "gpt-3.5-turbo", LLM_DESCR["gpt-3.5-turbo"]);
makeLLMOption(ret, "gpt-3.5-turbo-16k", LLM_DESCR["gpt-3.5-turbo-16k"]);
makeLLMOption(ret, "gpt-4", LLM_DESCR["gpt-4"]);
makeLLMOption(
ret,
"gpt-4-turbo-preview-8k",
LLM_DESCR["gpt-4-turbo-preview-8k"],
);
makeLLMOption(ret, "gpt-4-turbo-preview", LLM_DESCR["gpt-4-turbo-preview"]);
}

Expand Down
13 changes: 11 additions & 2 deletions src/packages/frontend/purchases/purchases.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,16 @@ function Description({ description, period_end, service }) {
// service should be DescriptionType["type"]
return null;
}
if (service === "openai-gpt-4" || service === "openai-gpt-4-turbo-preview") {
if (
service === "openai-gpt-4" ||
service === "openai-gpt-4-turbo-preview" ||
service === "openai-gpt-4-turbo-preview-8k"
) {
const extra = service.includes("turbo")
? service.includes("128k")
? "Turbo 128k"
: "Turbo 8k"
: "";
return (
<Tooltip
title={() => (
Expand All @@ -610,7 +619,7 @@ function Description({ description, period_end, service }) {
</div>
)}
>
GPT-4 {service === "openai-gpt-4-turbo-preview" ? " Turbo" : ""}
GPT-4 {extra}
</Tooltip>
);
}
Expand Down
6 changes: 6 additions & 0 deletions src/packages/server/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { getServerSettings } from "@cocalc/database/settings/server-settings";
import createPurchase from "@cocalc/server/purchases/create-purchase";
import {
DEFAULT_MODEL,
LLM_COST,
LLM_USERNAMES,
LanguageModel,
OpenAIMessages,
Expand Down Expand Up @@ -285,6 +286,11 @@ async function evaluateOpenAI({
maxTokens,
stream,
}): Promise<ChatOutput> {
// the *-8k variant is artificial – the input is already limited/truncated to 8k
if (model === "gpt-4-turbo-preview-8k") {
model = "gpt-4-turbo-preview";
}

const messages: OpenAIMessages = [];
if (system) {
messages.push({ role: "system", content: system });
Expand Down
17 changes: 14 additions & 3 deletions src/packages/util/db-schema/llm-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const MODELS_OPENAI = [
"gpt-4",
"gpt-4-32k",
"gpt-4-turbo-preview",
"gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens
] as const;

export type ModelOpenAI = (typeof MODELS_OPENAI)[number];
Expand Down Expand Up @@ -49,6 +50,7 @@ export const USER_SELECTABLE_LANGUAGE_MODELS = [
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4-turbo-preview",
"gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens
"gpt-4",
"gemini-pro",
...MISTRAL_MODELS,
Expand Down Expand Up @@ -133,6 +135,7 @@ export type LanguageService =
| "openai-gpt-4"
| "openai-gpt-4-32k"
| "openai-gpt-4-turbo-preview"
| "openai-gpt-4-turbo-preview-8k"
| "openai-text-embedding-ada-002"
| "google-text-bison-001"
| "google-chat-bison-001"
Expand Down Expand Up @@ -285,7 +288,8 @@ export const LLM_USERNAMES: LLM2String = {
"gpt-4-32k": "GPT-4-32k",
"gpt-3.5-turbo": "GPT-3.5",
"gpt-3.5-turbo-16k": "GPT-3.5-16k",
"gpt-4-turbo-preview": "GPT-4 Turbo",
"gpt-4-turbo-preview": "GPT-4 Turbo 128k",
"gpt-4-turbo-preview-8k": "GPT-4 Turbo 8k",
"text-bison-001": "PaLM 2",
"chat-bison-001": "PaLM 2",
"gemini-pro": "Gemini Pro",
Expand All @@ -306,8 +310,9 @@ export const LLM_DESCR: LLM2String = {
"gpt-4-32k": "",
"gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)",
"gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`,
"gpt-4-turbo-preview":
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 128k token context)",
"gpt-4-turbo-preview-8k":
"More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)",
"gpt-4-turbo-preview": "Like GPT-4 Turob 8k, but with up to 128k token context",
"text-bison-001": "",
"chat-bison-001": "",
"gemini-pro": "Google's Gemini Pro Generative AI model (30k token context)",
Expand Down Expand Up @@ -413,6 +418,12 @@ export const LLM_COST: { [name in string]: Cost } = {
completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens
max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit
},
// like above, but we limit the tokens to reduce how much money user has to commit to
"gpt-4-turbo-preview-8k": {
prompt_tokens: 0.01 / 1000, // $10.00 / 1M tokens
completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens
max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt!
},
"text-embedding-ada-002": {
prompt_tokens: 0.0001 / 1000,
completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
Expand Down
6 changes: 5 additions & 1 deletion src/packages/util/db-schema/purchase-quotas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ export const QUOTA_SPEC: QuotaSpec = {
noSet: true, // because this is not user visible yet
},
"openai-gpt-4-turbo-preview": {
display: "OpenAI GPT-4 Turbo",
display: "OpenAI GPT-4 Turbo 128k",
color: "#10a37f",
},
"openai-gpt-4-turbo-preview-8k": {
display: "OpenAI GPT-4 Turbo 8k",
color: "#10a37f",
},
"mistralai-mistral-large-latest": {
Expand Down

0 comments on commit 055cbb9

Please sign in to comment.