diff --git a/src/packages/frontend/account/avatar/avatar.tsx b/src/packages/frontend/account/avatar/avatar.tsx index 089634f57f4..dde3aa35e68 100644 --- a/src/packages/frontend/account/avatar/avatar.tsx +++ b/src/packages/frontend/account/avatar/avatar.tsx @@ -18,7 +18,7 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language- import { ProjectTitle } from "@cocalc/frontend/projects/project-title"; import { DEFAULT_COLOR } from "@cocalc/frontend/users/store"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { service2model } from "@cocalc/util/db-schema/openai"; +import { service2model } from "@cocalc/util/db-schema/llm"; import { ensure_bound, startswith, trunc_middle } from "@cocalc/util/misc"; import { avatar_fontcolor } from "./font-color"; diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index d3f10280dd8..4c237ec7fdb 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -13,7 +13,7 @@ import { MODELS, Vendor, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; // we either check if the prefix is one of the known ones (used in some circumstances) // or if the account id is exactly one of the language models (more precise) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index e01d3ddbc92..27a77341792 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -29,7 +29,7 @@ import { getValidLanguageModelName, isFreeModel, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { VBAR_EXPLANATION, VBAR_KEY, diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index d6719d3627d..e3c431c9eb9 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -5,7 +5,7 @@ import { fromOllamaModel, getValidLanguageModelName, isOllamaLLM, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index fb1a8f786aa..bdbe2413e73 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -22,7 +22,7 @@ import { model2vendor, type LanguageModel, LANGUAGE_MODEL_PREFIXES, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { cmp, isValidUUID, parse_hashtags, uuid } from "@cocalc/util/misc"; import { getSortedDates } from "./chat-log"; import { message_to_markdown } from "./message"; diff --git a/src/packages/frontend/client/openai.ts b/src/packages/frontend/client/openai.ts index aff1bc4dd41..70b70bd9350 100644 --- a/src/packages/frontend/client/openai.ts +++ b/src/packages/frontend/client/openai.ts @@ -8,20 +8,17 @@ import { EventEmitter } from "events"; import { redux } from "@cocalc/frontend/app-framework"; import type { History } from "@cocalc/frontend/misc/openai"; // do not import until needed -- it is HUGE! -import type { - EmbeddingData, - LanguageModel, -} from "@cocalc/util/db-schema/openai"; +import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; import { MAX_EMBEDDINGS_TOKENS, MAX_REMOVE_LIMIT, MAX_SAVE_LIMIT, MAX_SEARCH_LIMIT, - isFreeModel, - model2service, } from "@cocalc/util/db-schema/openai"; import * as message from "@cocalc/util/message"; import type { WebappClient } from "./client"; +import { LanguageModel, LanguageService } from "@cocalc/util/db-schema/llm"; +import { isFreeModel, model2service } from "@cocalc/util/db-schema/llm"; const DEFAULT_SYSTEM_PROMPT = "ASSUME THAT I HAVE FULL ACCESS TO COCALC AND I AM USING COCALC RIGHT NOW. ENCLOSE ALL MATH IN $. INCLUDE THE LANGUAGE DIRECTLY AFTER THE TRIPLE BACKTICKS IN ALL MARKDOWN CODE BLOCKS. BE BRIEF."; @@ -98,7 +95,8 @@ export class LLMClient { } if (!isFreeModel(model)) { - const service = model2service(model); + // Ollama and others are treated as "free" + const service = model2service(model) as LanguageService; // when client gets non-free openai model request, check if allowed. If not, show quota modal. const { allowed, reason } = await this.client.purchases_client.isPurchaseAllowed(service); diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index 71fc33ce5e1..9f69441d060 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -21,7 +21,7 @@ import ModelSwitch, { import { show_react_modal } from "@cocalc/frontend/misc"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/openai"; +import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; type Mode = "tex" | "md"; diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index bef2211d432..80954715c92 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -1,6 +1,5 @@ -import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; - import { CSS } from "@cocalc/frontend/app-framework"; +import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx index 30f52936197..badbffa1489 100644 --- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx +++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx @@ -12,7 +12,7 @@ import { LLM_USERNAMES, USER_SELECTABLE_LANGUAGE_MODELS, model2service, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { cmp, timestamp_cmp, trunc_middle } from "@cocalc/util/misc"; import { Item } from "./complete"; diff --git a/src/packages/frontend/frame-editors/llm/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts index 0c3b66d5bde..8bc58472440 100644 --- a/src/packages/frontend/frame-editors/llm/create-chat.ts +++ b/src/packages/frontend/frame-editors/llm/create-chat.ts @@ -9,7 +9,7 @@ export interface Options { command: string; allowEmpty?: boolean; tag?: string; - model: LanguageModel; + model: LanguageModel | string; } export default async function createChat({ diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index a99d6f66217..5743ad309c6 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -6,10 +6,12 @@ import { LLM_USERNAMES, LanguageModel, USER_SELECTABLE_LANGUAGE_MODELS, + fromOllamaModel, isFreeModel, + isOllamaLLM, model2service, toOllamaModel, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; export { DEFAULT_MODEL }; export type { LanguageModel }; @@ -139,11 +141,18 @@ export default function ModelSwitch({ ); } -export function modelToName(model: LanguageModel): string { +export function modelToName(model: LanguageModel | string): string { + if (isOllamaLLM(model)) { + const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; + const om = ollama[fromOllamaModel(model)]; + if (om) { + return om.display ?? `Ollama ${model}`; + } + } return LLM_USERNAMES[model] ?? model; } -export function modelToMention(model: LanguageModel): string { +export function modelToMention(model: LanguageModel | string): string { return `@${modelToName(model)}`; diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx index 0c46b7b2b3b..638a81d3588 100644 --- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx +++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx @@ -434,7 +434,7 @@ async function updateInput( actions: Actions, id, scope, - model: LanguageModel, + model: LanguageModel | string, ): Promise<{ input: string; inputOrig: string }> { if (scope == "none") { return { input: "", inputOrig: "" }; diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx index 40a388bc3db..f53f8a6563a 100644 --- a/src/packages/frontend/jupyter/chatgpt/explain.tsx +++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx @@ -140,7 +140,7 @@ async function getExplanation({ actions: JupyterActions; project_id: string; path: string; - model: LanguageModel; + model: LanguageModel | string; }) { const message = createMessage({ id, actions, model, open: false }); if (!message) { diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index b20fa35bb76..15ac5ef97b6 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -20,7 +20,7 @@ import { LanguageModel, getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { COLORS } from "@cocalc/util/theme"; import { JupyterActions } from "../browser-actions"; import { insertCell } from "./util"; @@ -168,7 +168,7 @@ interface QueryLanguageModelProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel; + model: LanguageModel | string; path: string; position: "above" | "below"; project_id: string; @@ -316,7 +316,7 @@ interface GetInputProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel; + model: LanguageModel | string; position: "above" | "below"; prompt: string; } diff --git a/src/packages/frontend/misc/openai.ts b/src/packages/frontend/misc/openai.ts index b51688602e3..07dee278482 100644 --- a/src/packages/frontend/misc/openai.ts +++ b/src/packages/frontend/misc/openai.ts @@ -1,8 +1,8 @@ // NOTE! This gpt-3-tokenizer is LARGE, e.g., 1.6MB, so be // sure to async load it by clients of this code. import GPT3Tokenizer from "gpt3-tokenizer"; -import type { Model } from "@cocalc/util/db-schema/openai"; -import { getMaxTokens } from "@cocalc/util/db-schema/openai"; +import type { Model } from "@cocalc/util/db-schema/llm"; +import { getMaxTokens } from "@cocalc/util/db-schema/llm"; export { getMaxTokens }; @@ -25,7 +25,7 @@ const tokenizer = new GPT3Tokenizer({ type: "gpt3" }); export function numTokensUpperBound( content: string, - maxTokens: number + maxTokens: number, ): number { return ( tokenizer.encode(content.slice(0, maxTokens * APPROX_CHARACTERS_PER_TOKEN)) @@ -64,7 +64,7 @@ export function truncateMessage(content: string, maxTokens: number): string { export function truncateHistory( history: History, maxTokens: number, - model: Model + model: Model, ): History { if (maxTokens <= 0) { return []; @@ -101,7 +101,7 @@ export function truncateHistory( const before = tokens[largestIndex].length; const toRemove = Math.max( 1, - Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5)) + Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5)), ); tokens[largestIndex] = tokens[largestIndex].slice(0, -toRemove); const after = tokens[largestIndex].length; diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx index 04016fcb5e7..af1a2bd4134 100644 --- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx +++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx @@ -50,7 +50,7 @@ import { once } from "@cocalc/util/async-utils"; import { getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { field_cmp, to_iso_path } from "@cocalc/util/misc"; import { COLORS } from "@cocalc/util/theme"; import { ensure_project_running } from "../../project-start-warning"; diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts index 84313e3bc31..33b3a72e998 100644 --- a/src/packages/frontend/sagews/chatgpt.ts +++ b/src/packages/frontend/sagews/chatgpt.ts @@ -1,6 +1,6 @@ import { redux } from "@cocalc/frontend/app-framework"; import { getHelp } from "@cocalc/frontend/frame-editors/llm/help-me-fix"; -import { getValidLanguageModelName } from "@cocalc/util/db-schema/openai"; +import { getValidLanguageModelName } from "@cocalc/util/db-schema/llm"; import { MARKERS } from "@cocalc/util/sagews"; import { SETTINGS_LANGUAGE_MODEL_KEY } from "../account/useLanguageModelSetting"; diff --git a/src/packages/next/components/openai/vendor-status-check.tsx b/src/packages/next/components/openai/vendor-status-check.tsx index 67a3ab01396..7b0192802c3 100644 --- a/src/packages/next/components/openai/vendor-status-check.tsx +++ b/src/packages/next/components/openai/vendor-status-check.tsx @@ -1,4 +1,4 @@ -import { Vendor } from "@cocalc/util/db-schema/openai"; +import { Vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import A from "components/misc/A"; diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index 6b32d9583af..ae89cac4dae 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -27,9 +27,10 @@ import { assertPurchaseAllowed } from "@cocalc/server/purchases/is-purchase-allo import { isFreeModel, LanguageModel, + LanguageService, model2service, MODELS, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { isValidUUID } from "@cocalc/util/misc"; const QUOTAS = { @@ -73,7 +74,7 @@ export async function checkForAbuse({ // This is a for-pay product, so let's make sure user can purchase it. await assertPurchaseAllowed({ account_id, - service: model2service(model), + service: model2service(model) as LanguageService, }); // We always allow usage of for pay models, since the user is paying for // them. Only free models need to be throttled. @@ -113,7 +114,7 @@ export async function checkForAbuse({ // This is a for-pay product, so let's make sure user can purchase it. await assertPurchaseAllowed({ account_id, - service: model2service(model), + service: model2service(model) as LanguageService, }); } } diff --git a/src/packages/server/llm/call-llm.ts b/src/packages/server/llm/call-llm.ts index a807d26237a..f59ebdfe26a 100644 --- a/src/packages/server/llm/call-llm.ts +++ b/src/packages/server/llm/call-llm.ts @@ -2,7 +2,7 @@ import { delay } from "awaiting"; import type OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; -import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/openai"; +import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/llm"; import { ChatOutput } from "@cocalc/util/types/llm"; import { Stream } from "openai/streaming"; import { totalNumTokens } from "./chatgpt-numtokens"; diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 79a0bff122b..4f0fa454ea6 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -4,14 +4,14 @@ Get the client for the given LanguageModel. You do not have to worry too much about throwing an exception, because they're caught in ./index::evaluate */ -import OpenAI from "openai"; -import jsonStable from "json-stable-stringify"; import { Ollama } from "@langchain/community/llms/ollama"; +import jsonStable from "json-stable-stringify"; import * as _ from "lodash"; +import OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; +import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index dcd4bbbc834..53bdb8aecf1 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -21,13 +21,14 @@ import { DEFAULT_MODEL, LLM_USERNAMES, LanguageModel, + LanguageService, OpenAIMessages, getLLMCost, isFreeModel, isValidModel, model2service, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-llm"; @@ -136,9 +137,9 @@ async function evaluateImpl({ account_id, project_id, cost, - service: model2service(model), + service: model2service(model) as LanguageService, description: { - type: model2service(model), + type: model2service(model) as LanguageService, prompt_tokens, completion_tokens, }, diff --git a/src/packages/server/llm/vertex-ai-client.ts b/src/packages/server/llm/vertex-ai-client.ts index c80730af509..c7a8cf360c2 100644 --- a/src/packages/server/llm/vertex-ai-client.ts +++ b/src/packages/server/llm/vertex-ai-client.ts @@ -5,7 +5,7 @@ */ import getLogger from "@cocalc/backend/logger"; -import { LanguageModel } from "@cocalc/util/db-schema/openai"; +import { LanguageModel } from "@cocalc/util/db-schema/llm"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { DiscussServiceClient, diff --git a/src/packages/server/purchases/get-service-cost.ts b/src/packages/server/purchases/get-service-cost.ts index f87432e1348..5efb7daad3a 100644 --- a/src/packages/server/purchases/get-service-cost.ts +++ b/src/packages/server/purchases/get-service-cost.ts @@ -9,7 +9,7 @@ import { getLLMCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import type { Service } from "@cocalc/util/db-schema/purchases"; import { unreachable } from "@cocalc/util/misc"; diff --git a/src/packages/server/purchases/is-purchase-allowed.ts b/src/packages/server/purchases/is-purchase-allowed.ts index a469637ea07..26f576f74e6 100644 --- a/src/packages/server/purchases/is-purchase-allowed.ts +++ b/src/packages/server/purchases/is-purchase-allowed.ts @@ -5,7 +5,7 @@ import { getMaxCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { QUOTA_SPEC, Service } from "@cocalc/util/db-schema/purchase-quotas"; import { MAX_COST } from "@cocalc/util/db-schema/purchases"; import { currency, round2up, round2down } from "@cocalc/util/misc"; diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm.test.ts new file mode 100644 index 00000000000..f77084ceee0 --- /dev/null +++ b/src/packages/util/db-schema/llm.test.ts @@ -0,0 +1,13 @@ +// this tests the wrongly named openai.ts file + +import { isFreeModel } from "./llm"; + +describe("openai/llm", () => { + test("isFreeModel", () => { + expect(isFreeModel("gpt-3")).toBe(true); + expect(isFreeModel("gpt-4")).toBe(false); + // WARNING: if the following breaks, and ollama becomes non-free, then a couple of assumptions are broken as well. + // search for model2service(...) as LanguageService in the codebase! + expect(isFreeModel("ollama-1")).toBe(true); + }); +}); diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts new file mode 100644 index 00000000000..73a5e4cf554 --- /dev/null +++ b/src/packages/util/db-schema/llm.ts @@ -0,0 +1,352 @@ +// this contains bits and pieces from the wrongly named openai.ts file + +import type { LLMService, Service } from "@cocalc/util/db-schema/purchases"; +import { unreachable } from "../misc"; + +export const LANGUAGE_MODELS = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + "gpt-4-32k", + // google's are taken from here – we use the generative AI client lib + // https://developers.generativeai.google/models/language + "text-bison-001", + "chat-bison-001", + "embedding-gecko-001", + "text-embedding-ada-002", + "gemini-pro", +] as const; + +// This hardcodes which models can be selected by users. +// Make sure to update this when adding new models. +// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx +export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + // "chat-bison-001", // PaLM2 is not good, replies with no response too often + "gemini-pro", +] as const; + +export type LanguageModel = (typeof LANGUAGE_MODELS)[number]; + +export function isLanguageModel(model?: string): model is LanguageModel { + return LANGUAGE_MODELS.includes(model as LanguageModel); +} + +export function getValidLanguageModelName( + model: string | undefined, + filter: { google: boolean; openai: boolean; ollama: boolean } = { + google: true, + openai: true, + ollama: false, + }, + ollama: string[] = [], // keys of ollama models +): LanguageModel | string { + const dftl = + filter.openai === true + ? DEFAULT_MODEL + : filter.ollama && ollama?.length > 0 + ? toOllamaModel(ollama[0]) + : "chat-bison-001"; + console.log("getValidLanguageModelName", model, filter, ollama, dftl); + if (model == null) { + return dftl; + } + if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + return model; + } + if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { + return model; + } + return dftl; +} + +export interface OpenAIMessage { + role: "system" | "user" | "assistant"; + content: string; +} +export type OpenAIMessages = OpenAIMessage[]; + +export type LanguageService = + | "openai-gpt-3.5-turbo" + | "openai-gpt-3.5-turbo-16k" + | "openai-gpt-4" + | "openai-gpt-4-32k" + | "openai-text-embedding-ada-002" + | "google-text-bison-001" + | "google-chat-bison-001" + | "google-embedding-gecko-001" + | "google-gemini-pro"; + +const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; +export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; + +// used e.g. for checking "account-id={string}" and other things like that +export const LANGUAGE_MODEL_PREFIXES = [ + "chatgpt", + ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), +] as const; + +export function model2service( + model: LanguageModel | string, +): LanguageService | string { + if (model === "text-embedding-ada-002") { + return `openai-${model}`; + } + if (isLanguageModel(model)) { + if ( + model === "text-bison-001" || + model === "chat-bison-001" || + model === "embedding-gecko-001" || + model === "gemini-pro" + ) { + return `google-${model}`; + } else { + return `openai-${model}`; + } + } + if (isOllamaLLM(model)) { + return toOllamaModel(model); + } + throw new Error(`unknown model: ${model}`); +} + +// inverse of model2service, but robust for chat avatars, which might not have a prefix +// TODO: fix the mess +export function service2model( + service: LanguageService | "chatgpt", +): LanguageModel { + if (service === "chatgpt") { + return "gpt-3.5-turbo"; + } + // split off the first part of service, e.g., "openai-" or "google-" + const s = service.split("-")[0]; + const hasPrefix = s === "openai" || s === "google"; + const m = hasPrefix ? service.split("-").slice(1).join("-") : service; + if (!LANGUAGE_MODELS.includes(m as LanguageModel)) { + // We don't throw an error, since the frontend would crash + // throw new Error(`unknown service: ${service}`); + console.warn(`service2model: unknown service: ${service}`); + return "gpt-3.5-turbo"; + } + return m as LanguageModel; +} + +// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function +export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; + +export function model2vendor(model: LanguageModel | string): Vendor { + if (model.startsWith("gpt-")) { + return "openai"; + } else if (model.startsWith("ollama-")) { + return "ollama"; + } else { + return "google"; + } +} + +export function toOllamaModel(model: string) { + return `ollama-${model}`; +} + +export function fromOllamaModel(model: string) { + return model.replace(/^ollama-/, ""); +} + +export function isOllamaLLM(model: string) { + return model.startsWith("ollama-"); +} + +const MODELS_OPENAI = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + "gpt-4-32k", +] as const; + +export const MODELS = [ + ...MODELS_OPENAI, + "text-embedding-ada-002", + "text-bison-001", + "chat-bison-001", + "embedding-gecko-001", + "gemini-pro", +] as const; + +export type Model = (typeof MODELS)[number]; + +export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; + +// Map from psuedo account_id to what should be displayed to user. +// This is used in various places in the frontend. +// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing +export const LLM_USERNAMES = { + chatgpt: "GPT-3.5", + chatgpt3: "GPT-3.5", + chatgpt4: "GPT-4", + "gpt-4": "GPT-4", + "gpt-4-32k": "GPT-4-32k", + "gpt-3.5-turbo": "GPT-3.5", + "gpt-3.5-turbo-16k": "GPT-3.5-16k", + "text-bison-001": "PaLM 2", + "chat-bison-001": "PaLM 2", + "embedding-gecko-001": "PaLM 2", + "gemini-pro": "Gemini Pro", +} as const; + +export function isFreeModel(model: string) { + if (isOllamaLLM(model)) return true; + if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + // of these models, the following are free + return ( + (model as Model) == "gpt-3.5-turbo" || + (model as Model) == "text-bison-001" || + (model as Model) == "chat-bison-001" || + (model as Model) == "embedding-gecko-001" || + (model as Model) == "gemini-pro" + ); + } + // all others are free + return true; +} + +// this is used in purchases/get-service-cost +// we only need to check for the vendor prefixes, no special cases! +export function isLanguageModelService( + service: Service, +): service is LLMService { + for (const v of LANGUAGE_MODEL_VENDORS) { + if (service.startsWith(`${v}-`)) { + return true; + } + } + return false; +} + +export function getVendorStatusCheckMD(vendor: Vendor): string { + switch (vendor) { + case "openai": + return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; + case "google": + return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; + case "ollama": + return `No status information for Ollama available – you have to check with the particular backend for the model.`; + default: + unreachable(vendor); + } + return ""; +} + +export function llmSupportsStreaming(model: LanguageModel): boolean { + return model2vendor(model) === "openai" || model === "gemini-pro"; +} + +interface Cost { + prompt_tokens: number; + completion_tokens: number; + max_tokens: number; +} + +// This is the official published cost that openai charges. +// It changes over time, so this will sometimes need to be updated. +// Our cost is a configurable multiple of this. +// https://openai.com/pricing#language-models +// There appears to be no api that provides the prices, unfortunately. +const LLM_COST: { [name in LanguageModel]: Cost } = { + "gpt-4": { + prompt_tokens: 0.03 / 1000, + completion_tokens: 0.06 / 1000, + max_tokens: 8192, + }, + "gpt-4-32k": { + prompt_tokens: 0.06 / 1000, + completion_tokens: 0.12 / 1000, + max_tokens: 32768, + }, + "gpt-3.5-turbo": { + prompt_tokens: 0.0015 / 1000, + completion_tokens: 0.002 / 1000, + max_tokens: 4096, + }, + "gpt-3.5-turbo-16k": { + prompt_tokens: 0.003 / 1000, + completion_tokens: 0.004 / 1000, + max_tokens: 16384, + }, + "text-embedding-ada-002": { + prompt_tokens: 0.0001 / 1000, + completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings + max_tokens: 8191, + }, + // https://developers.generativeai.google/models/language + "text-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "chat-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "embedding-gecko-001": { + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 8196, // ??? + }, + "gemini-pro": { + // https://ai.google.dev/models/gemini + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 30720, + }, +} as const; + +export function isValidModel(model?: string): boolean { + if (model == null) return false; + if (model.startsWith("ollama-")) return true; + return LLM_COST[model ?? ""] != null; +} + +export function getMaxTokens(model?: Model | string): number { + return LLM_COST[model ?? ""]?.max_tokens ?? 4096; +} + +export interface LLMCost { + prompt_tokens: number; + completion_tokens: number; +} + +export function getLLMCost( + model: Model, + markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 +): LLMCost { + const x = LLM_COST[model]; + if (x == null) { + throw Error(`unknown model "${model}"`); + } + const { prompt_tokens, completion_tokens } = x; + if (markup_percentage < 0) { + throw Error("markup percentage can't be negative"); + } + const f = 1 + markup_percentage / 100; + return { + prompt_tokens: prompt_tokens * f, + completion_tokens: completion_tokens * f, + }; +} + +// The maximum cost for one single call using the given model. +// We can't know the cost until after it happens, so this bound is useful for +// ensuring user can afford to make a call. +export function getMaxCost(model: Model, markup_percentage: number): number { + const { prompt_tokens, completion_tokens } = getLLMCost( + model, + markup_percentage, + ); + const { max_tokens } = LLM_COST[model]; + return Math.max(prompt_tokens, completion_tokens) * max_tokens; +} diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts index bd2b64ab328..8a1b8e5b3d7 100644 --- a/src/packages/util/db-schema/openai.ts +++ b/src/packages/util/db-schema/openai.ts @@ -2,347 +2,12 @@ // Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called. // TODO: refactor this, the names of the tables, etc. to be more generic. -import type { LLMService, Service } from "@cocalc/util/db-schema/purchases"; - import { History } from "@cocalc/util/types/llm"; -import { unreachable } from "../misc"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; +import { LanguageModel } from "./llm"; import { Table } from "./types"; -export const LANGUAGE_MODELS = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-32k", - // google's are taken from here – we use the generative AI client lib - // https://developers.generativeai.google/models/language - "text-bison-001", - "chat-bison-001", - "embedding-gecko-001", - "text-embedding-ada-002", - "gemini-pro", -] as const; - -// This hardcodes which models can be selected by users. -// Make sure to update this when adding new models. -// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx -export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - // "chat-bison-001", // PaLM2 is not good, replies with no response too often - "gemini-pro", -] as const; - -export type LanguageModel = (typeof LANGUAGE_MODELS)[number]; - -export function isLanguageModel(model?: string): model is LanguageModel { - return LANGUAGE_MODELS.includes(model as LanguageModel); -} - -export function getValidLanguageModelName( - model: string | undefined, - filter: { google: boolean; openai: boolean; ollama: boolean } = { - google: true, - openai: true, - ollama: false, - }, - ollama: string[] = [], // keys of ollama models -): LanguageModel | string { - const dftl = - filter.openai === true - ? DEFAULT_MODEL - : filter.ollama && ollama?.length > 0 - ? toOllamaModel(ollama[0]) - : "chat-bison-001"; - console.log("getValidLanguageModelName", model, filter, ollama, dftl); - if (model == null) { - return dftl; - } - if (LANGUAGE_MODELS.includes(model as LanguageModel)) { - return model; - } - if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { - return model; - } - return dftl; -} - -export interface OpenAIMessage { - role: "system" | "user" | "assistant"; - content: string; -} -export type OpenAIMessages = OpenAIMessage[]; - -export type LanguageService = - | "openai-gpt-3.5-turbo" - | "openai-gpt-3.5-turbo-16k" - | "openai-gpt-4" - | "openai-gpt-4-32k" - | "openai-text-embedding-ada-002" - | "google-text-bison-001" - | "google-chat-bison-001" - | "google-embedding-gecko-001" - | "google-gemini-pro"; - -const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; -export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; - -// used e.g. for checking "account-id={string}" and other things like that -export const LANGUAGE_MODEL_PREFIXES = [ - "chatgpt", - ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), -] as const; - -export function model2service(model: LanguageModel): LanguageService { - if (model === "text-embedding-ada-002") return `openai-${model}`; - if ( - model === "text-bison-001" || - model === "chat-bison-001" || - model === "embedding-gecko-001" || - model === "gemini-pro" - ) { - return `google-${model}`; - } else { - return `openai-${model}`; - } -} - -// inverse of model2service, but robust for chat avatars, which might not have a prefix -// TODO: fix the mess -export function service2model( - service: LanguageService | "chatgpt", -): LanguageModel { - if (service === "chatgpt") { - return "gpt-3.5-turbo"; - } - // split off the first part of service, e.g., "openai-" or "google-" - const s = service.split("-")[0]; - const hasPrefix = s === "openai" || s === "google"; - const m = hasPrefix ? service.split("-").slice(1).join("-") : service; - if (!LANGUAGE_MODELS.includes(m as LanguageModel)) { - // We don't throw an error, since the frontend would crash - // throw new Error(`unknown service: ${service}`); - console.warn(`service2model: unknown service: ${service}`); - return "gpt-3.5-turbo"; - } - return m as LanguageModel; -} - -// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function -export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; - -export function model2vendor(model: LanguageModel): Vendor { - if (model.startsWith("gpt-")) { - return "openai"; - } else if (model.startsWith("ollama-")) { - return "ollama"; - } else { - return "google"; - } -} - -export function toOllamaModel(model: string) { - return `ollama-${model}`; -} - -export function fromOllamaModel(model: string) { - return model.replace(/^ollama-/, ""); -} - -export function isOllamaLLM(model: string) { - return model.startsWith("ollama-"); -} - -const MODELS_OPENAI = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-32k", -] as const; - -export const MODELS = [ - ...MODELS_OPENAI, - "text-embedding-ada-002", - "text-bison-001", - "chat-bison-001", - "embedding-gecko-001", - "gemini-pro", -] as const; - -export type Model = (typeof MODELS)[number]; - -export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; - -// Map from psuedo account_id to what should be displayed to user. -// This is used in various places in the frontend. -// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing -export const LLM_USERNAMES = { - chatgpt: "GPT-3.5", - chatgpt3: "GPT-3.5", - chatgpt4: "GPT-4", - "gpt-4": "GPT-4", - "gpt-4-32k": "GPT-4-32k", - "gpt-3.5-turbo": "GPT-3.5", - "gpt-3.5-turbo-16k": "GPT-3.5-16k", - "text-bison-001": "PaLM 2", - "chat-bison-001": "PaLM 2", - "embedding-gecko-001": "PaLM 2", - "gemini-pro": "Gemini Pro", -} as const; - -export function isFreeModel(model: string) { - if (!LANGUAGE_MODELS.includes(model as LanguageModel)) return false; - return ( - (model as Model) == "gpt-3.5-turbo" || - (model as Model) == "text-bison-001" || - (model as Model) == "chat-bison-001" || - (model as Model) == "embedding-gecko-001" || - (model as Model) == "gemini-pro" - ); -} - -// this is used in purchases/get-service-cost -// we only need to check for the vendor prefixes, no special cases! -export function isLanguageModelService( - service: Service, -): service is LLMService { - for (const v of LANGUAGE_MODEL_VENDORS) { - if (service.startsWith(`${v}-`)) { - return true; - } - } - return false; -} - -export function getVendorStatusCheckMD(vendor: Vendor): string { - switch (vendor) { - case "openai": - return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; - case "google": - return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; - case "ollama": - return `No status information for Ollama available – you have to check with the particular backend for the model.`; - default: - unreachable(vendor); - } - return ""; -} - -export function llmSupportsStreaming(model: LanguageModel): boolean { - return model2vendor(model) === "openai" || model === "gemini-pro"; -} - -interface Cost { - prompt_tokens: number; - completion_tokens: number; - max_tokens: number; -} - -// This is the official published cost that openai charges. -// It changes over time, so this will sometimes need to be updated. -// Our cost is a configurable multiple of this. -// https://openai.com/pricing#language-models -// There appears to be no api that provides the prices, unfortunately. -const LLM_COST: { [name in LanguageModel]: Cost } = { - "gpt-4": { - prompt_tokens: 0.03 / 1000, - completion_tokens: 0.06 / 1000, - max_tokens: 8192, - }, - "gpt-4-32k": { - prompt_tokens: 0.06 / 1000, - completion_tokens: 0.12 / 1000, - max_tokens: 32768, - }, - "gpt-3.5-turbo": { - prompt_tokens: 0.0015 / 1000, - completion_tokens: 0.002 / 1000, - max_tokens: 4096, - }, - "gpt-3.5-turbo-16k": { - prompt_tokens: 0.003 / 1000, - completion_tokens: 0.004 / 1000, - max_tokens: 16384, - }, - "text-embedding-ada-002": { - prompt_tokens: 0.0001 / 1000, - completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings - max_tokens: 8191, - }, - // https://developers.generativeai.google/models/language - "text-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, - }, - "chat-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, - }, - "embedding-gecko-001": { - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 8196, // ??? - }, - "gemini-pro": { - // https://ai.google.dev/models/gemini - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 30720, - }, -} as const; - -export function isValidModel(model?: string): boolean { - if (model == null) return false; - if (model.startsWith("ollama-")) return true; - return LLM_COST[model ?? ""] != null; -} - -export function getMaxTokens(model?: Model): number { - return LLM_COST[model ?? ""]?.max_tokens ?? 4096; -} - -export interface LLMCost { - prompt_tokens: number; - completion_tokens: number; -} - -export function getLLMCost( - model: Model, - markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 -): LLMCost { - const x = LLM_COST[model]; - if (x == null) { - throw Error(`unknown model "${model}"`); - } - const { prompt_tokens, completion_tokens } = x; - if (markup_percentage < 0) { - throw Error("markup percentage can't be negative"); - } - const f = 1 + markup_percentage / 100; - return { - prompt_tokens: prompt_tokens * f, - completion_tokens: completion_tokens * f, - }; -} - -// The maximum cost for one single call using the given model. -// We can't know the cost until after it happens, so this bound is useful for -// ensuring user can afford to make a call. -export function getMaxCost(model: Model, markup_percentage: number): number { - const { prompt_tokens, completion_tokens } = getLLMCost( - model, - markup_percentage, - ); - const { max_tokens } = LLM_COST[model]; - return Math.max(prompt_tokens, completion_tokens) * max_tokens; -} - export interface ChatGPTLogEntry { id: number; account_id?: string; diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index a96b749e106..79322048248 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -2,7 +2,7 @@ import { Table } from "./types"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; import type { Service } from "./purchases"; -import { isFreeModel } from "./openai"; +import { isFreeModel } from "./llm"; export type { Service }; diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index 36c8d35668e..70cce04b8c0 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -1,4 +1,4 @@ -import { LanguageModel } from "@cocalc/util/db-schema/openai"; +import { LanguageModel } from "@cocalc/util/db-schema/llm"; export type History = { role: "assistant" | "user" | "system";