diff --git a/src/packages/frontend/account/avatar/avatar.tsx b/src/packages/frontend/account/avatar/avatar.tsx
index 089634f57f4..dde3aa35e68 100644
--- a/src/packages/frontend/account/avatar/avatar.tsx
+++ b/src/packages/frontend/account/avatar/avatar.tsx
@@ -18,7 +18,7 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-
import { ProjectTitle } from "@cocalc/frontend/projects/project-title";
import { DEFAULT_COLOR } from "@cocalc/frontend/users/store";
import { webapp_client } from "@cocalc/frontend/webapp-client";
-import { service2model } from "@cocalc/util/db-schema/openai";
+import { service2model } from "@cocalc/util/db-schema/llm";
import { ensure_bound, startswith, trunc_middle } from "@cocalc/util/misc";
import { avatar_fontcolor } from "./font-color";
diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts
index d3f10280dd8..4c237ec7fdb 100644
--- a/src/packages/frontend/account/chatbot.ts
+++ b/src/packages/frontend/account/chatbot.ts
@@ -13,7 +13,7 @@ import {
MODELS,
Vendor,
model2vendor,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
// we either check if the prefix is one of the known ones (used in some circumstances)
// or if the account id is exactly one of the language models (more precise)
diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx
index e01d3ddbc92..27a77341792 100644
--- a/src/packages/frontend/account/other-settings.tsx
+++ b/src/packages/frontend/account/other-settings.tsx
@@ -29,7 +29,7 @@ import {
getValidLanguageModelName,
isFreeModel,
model2vendor,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import {
VBAR_EXPLANATION,
VBAR_KEY,
diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx
index d6719d3627d..e3c431c9eb9 100644
--- a/src/packages/frontend/account/useLanguageModelSetting.tsx
+++ b/src/packages/frontend/account/useLanguageModelSetting.tsx
@@ -5,7 +5,7 @@ import {
fromOllamaModel,
getValidLanguageModelName,
isOllamaLLM,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model";
diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts
index fb1a8f786aa..bdbe2413e73 100644
--- a/src/packages/frontend/chat/actions.ts
+++ b/src/packages/frontend/chat/actions.ts
@@ -22,7 +22,7 @@ import {
model2vendor,
type LanguageModel,
LANGUAGE_MODEL_PREFIXES,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { cmp, isValidUUID, parse_hashtags, uuid } from "@cocalc/util/misc";
import { getSortedDates } from "./chat-log";
import { message_to_markdown } from "./message";
diff --git a/src/packages/frontend/client/openai.ts b/src/packages/frontend/client/openai.ts
index aff1bc4dd41..70b70bd9350 100644
--- a/src/packages/frontend/client/openai.ts
+++ b/src/packages/frontend/client/openai.ts
@@ -8,20 +8,17 @@ import { EventEmitter } from "events";
import { redux } from "@cocalc/frontend/app-framework";
import type { History } from "@cocalc/frontend/misc/openai"; // do not import until needed -- it is HUGE!
-import type {
- EmbeddingData,
- LanguageModel,
-} from "@cocalc/util/db-schema/openai";
+import type { EmbeddingData } from "@cocalc/util/db-schema/openai";
import {
MAX_EMBEDDINGS_TOKENS,
MAX_REMOVE_LIMIT,
MAX_SAVE_LIMIT,
MAX_SEARCH_LIMIT,
- isFreeModel,
- model2service,
} from "@cocalc/util/db-schema/openai";
import * as message from "@cocalc/util/message";
import type { WebappClient } from "./client";
+import { LanguageModel, LanguageService } from "@cocalc/util/db-schema/llm";
+import { isFreeModel, model2service } from "@cocalc/util/db-schema/llm";
const DEFAULT_SYSTEM_PROMPT =
"ASSUME THAT I HAVE FULL ACCESS TO COCALC AND I AM USING COCALC RIGHT NOW. ENCLOSE ALL MATH IN $. INCLUDE THE LANGUAGE DIRECTLY AFTER THE TRIPLE BACKTICKS IN ALL MARKDOWN CODE BLOCKS. BE BRIEF.";
@@ -98,7 +95,8 @@ export class LLMClient {
}
if (!isFreeModel(model)) {
- const service = model2service(model);
+ // Ollama and others are treated as "free"
+ const service = model2service(model) as LanguageService;
// when client gets non-free openai model request, check if allowed. If not, show quota modal.
const { allowed, reason } =
await this.client.purchases_client.isPurchaseAllowed(service);
diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx
index 71fc33ce5e1..9f69441d060 100644
--- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx
+++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx
@@ -21,7 +21,7 @@ import ModelSwitch, {
import { show_react_modal } from "@cocalc/frontend/misc";
import track from "@cocalc/frontend/user-tracking";
import { webapp_client } from "@cocalc/frontend/webapp-client";
-import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/openai";
+import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/llm";
import { unreachable } from "@cocalc/util/misc";
type Mode = "tex" | "md";
diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx
index bef2211d432..80954715c92 100644
--- a/src/packages/frontend/components/language-model-icon.tsx
+++ b/src/packages/frontend/components/language-model-icon.tsx
@@ -1,6 +1,5 @@
-import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/openai";
-
import { CSS } from "@cocalc/frontend/app-framework";
+import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/llm";
import { unreachable } from "@cocalc/util/misc";
import AIAvatar from "./ai-avatar";
import GoogleGeminiLogo from "./google-gemini-avatar";
diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx
index 30f52936197..badbffa1489 100644
--- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx
+++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx
@@ -12,7 +12,7 @@ import {
LLM_USERNAMES,
USER_SELECTABLE_LANGUAGE_MODELS,
model2service,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { cmp, timestamp_cmp, trunc_middle } from "@cocalc/util/misc";
import { Item } from "./complete";
diff --git a/src/packages/frontend/frame-editors/llm/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts
index 0c3b66d5bde..8bc58472440 100644
--- a/src/packages/frontend/frame-editors/llm/create-chat.ts
+++ b/src/packages/frontend/frame-editors/llm/create-chat.ts
@@ -9,7 +9,7 @@ export interface Options {
command: string;
allowEmpty?: boolean;
tag?: string;
- model: LanguageModel;
+ model: LanguageModel | string;
}
export default async function createChat({
diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx
index a99d6f66217..5743ad309c6 100644
--- a/src/packages/frontend/frame-editors/llm/model-switch.tsx
+++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx
@@ -6,10 +6,12 @@ import {
LLM_USERNAMES,
LanguageModel,
USER_SELECTABLE_LANGUAGE_MODELS,
+ fromOllamaModel,
isFreeModel,
+ isOllamaLLM,
model2service,
toOllamaModel,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
export { DEFAULT_MODEL };
export type { LanguageModel };
@@ -139,11 +141,18 @@ export default function ModelSwitch({
);
}
-export function modelToName(model: LanguageModel): string {
+export function modelToName(model: LanguageModel | string): string {
+ if (isOllamaLLM(model)) {
+ const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {};
+ const om = ollama[fromOllamaModel(model)];
+ if (om) {
+ return om.display ?? `Ollama ${model}`;
+ }
+ }
return LLM_USERNAMES[model] ?? model;
}
-export function modelToMention(model: LanguageModel): string {
+export function modelToMention(model: LanguageModel | string): string {
return `@${modelToName(model)}`;
diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx
index 0c46b7b2b3b..638a81d3588 100644
--- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx
+++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx
@@ -434,7 +434,7 @@ async function updateInput(
actions: Actions,
id,
scope,
- model: LanguageModel,
+ model: LanguageModel | string,
): Promise<{ input: string; inputOrig: string }> {
if (scope == "none") {
return { input: "", inputOrig: "" };
diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx
index 40a388bc3db..f53f8a6563a 100644
--- a/src/packages/frontend/jupyter/chatgpt/explain.tsx
+++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx
@@ -140,7 +140,7 @@ async function getExplanation({
actions: JupyterActions;
project_id: string;
path: string;
- model: LanguageModel;
+ model: LanguageModel | string;
}) {
const message = createMessage({ id, actions, model, open: false });
if (!message) {
diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx
index b20fa35bb76..15ac5ef97b6 100644
--- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx
+++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx
@@ -20,7 +20,7 @@ import {
LanguageModel,
getVendorStatusCheckMD,
model2vendor,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { COLORS } from "@cocalc/util/theme";
import { JupyterActions } from "../browser-actions";
import { insertCell } from "./util";
@@ -168,7 +168,7 @@ interface QueryLanguageModelProps {
actions: JupyterActions;
frameActions: React.MutableRefObject;
id: string;
- model: LanguageModel;
+ model: LanguageModel | string;
path: string;
position: "above" | "below";
project_id: string;
@@ -316,7 +316,7 @@ interface GetInputProps {
actions: JupyterActions;
frameActions: React.MutableRefObject;
id: string;
- model: LanguageModel;
+ model: LanguageModel | string;
position: "above" | "below";
prompt: string;
}
diff --git a/src/packages/frontend/misc/openai.ts b/src/packages/frontend/misc/openai.ts
index b51688602e3..07dee278482 100644
--- a/src/packages/frontend/misc/openai.ts
+++ b/src/packages/frontend/misc/openai.ts
@@ -1,8 +1,8 @@
// NOTE! This gpt-3-tokenizer is LARGE, e.g., 1.6MB, so be
// sure to async load it by clients of this code.
import GPT3Tokenizer from "gpt3-tokenizer";
-import type { Model } from "@cocalc/util/db-schema/openai";
-import { getMaxTokens } from "@cocalc/util/db-schema/openai";
+import type { Model } from "@cocalc/util/db-schema/llm";
+import { getMaxTokens } from "@cocalc/util/db-schema/llm";
export { getMaxTokens };
@@ -25,7 +25,7 @@ const tokenizer = new GPT3Tokenizer({ type: "gpt3" });
export function numTokensUpperBound(
content: string,
- maxTokens: number
+ maxTokens: number,
): number {
return (
tokenizer.encode(content.slice(0, maxTokens * APPROX_CHARACTERS_PER_TOKEN))
@@ -64,7 +64,7 @@ export function truncateMessage(content: string, maxTokens: number): string {
export function truncateHistory(
history: History,
maxTokens: number,
- model: Model
+ model: Model,
): History {
if (maxTokens <= 0) {
return [];
@@ -101,7 +101,7 @@ export function truncateHistory(
const before = tokens[largestIndex].length;
const toRemove = Math.max(
1,
- Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5))
+ Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5)),
);
tokens[largestIndex] = tokens[largestIndex].slice(0, -toRemove);
const after = tokens[largestIndex].length;
diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx
index 04016fcb5e7..af1a2bd4134 100644
--- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx
+++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx
@@ -50,7 +50,7 @@ import { once } from "@cocalc/util/async-utils";
import {
getVendorStatusCheckMD,
model2vendor,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { field_cmp, to_iso_path } from "@cocalc/util/misc";
import { COLORS } from "@cocalc/util/theme";
import { ensure_project_running } from "../../project-start-warning";
diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts
index 84313e3bc31..33b3a72e998 100644
--- a/src/packages/frontend/sagews/chatgpt.ts
+++ b/src/packages/frontend/sagews/chatgpt.ts
@@ -1,6 +1,6 @@
import { redux } from "@cocalc/frontend/app-framework";
import { getHelp } from "@cocalc/frontend/frame-editors/llm/help-me-fix";
-import { getValidLanguageModelName } from "@cocalc/util/db-schema/openai";
+import { getValidLanguageModelName } from "@cocalc/util/db-schema/llm";
import { MARKERS } from "@cocalc/util/sagews";
import { SETTINGS_LANGUAGE_MODEL_KEY } from "../account/useLanguageModelSetting";
diff --git a/src/packages/next/components/openai/vendor-status-check.tsx b/src/packages/next/components/openai/vendor-status-check.tsx
index 67a3ab01396..7b0192802c3 100644
--- a/src/packages/next/components/openai/vendor-status-check.tsx
+++ b/src/packages/next/components/openai/vendor-status-check.tsx
@@ -1,4 +1,4 @@
-import { Vendor } from "@cocalc/util/db-schema/openai";
+import { Vendor } from "@cocalc/util/db-schema/llm";
import { unreachable } from "@cocalc/util/misc";
import A from "components/misc/A";
diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts
index 6b32d9583af..ae89cac4dae 100644
--- a/src/packages/server/llm/abuse.ts
+++ b/src/packages/server/llm/abuse.ts
@@ -27,9 +27,10 @@ import { assertPurchaseAllowed } from "@cocalc/server/purchases/is-purchase-allo
import {
isFreeModel,
LanguageModel,
+ LanguageService,
model2service,
MODELS,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { isValidUUID } from "@cocalc/util/misc";
const QUOTAS = {
@@ -73,7 +74,7 @@ export async function checkForAbuse({
// This is a for-pay product, so let's make sure user can purchase it.
await assertPurchaseAllowed({
account_id,
- service: model2service(model),
+ service: model2service(model) as LanguageService,
});
// We always allow usage of for pay models, since the user is paying for
// them. Only free models need to be throttled.
@@ -113,7 +114,7 @@ export async function checkForAbuse({
// This is a for-pay product, so let's make sure user can purchase it.
await assertPurchaseAllowed({
account_id,
- service: model2service(model),
+ service: model2service(model) as LanguageService,
});
}
}
diff --git a/src/packages/server/llm/call-llm.ts b/src/packages/server/llm/call-llm.ts
index a807d26237a..f59ebdfe26a 100644
--- a/src/packages/server/llm/call-llm.ts
+++ b/src/packages/server/llm/call-llm.ts
@@ -2,7 +2,7 @@ import { delay } from "awaiting";
import type OpenAI from "openai";
import getLogger from "@cocalc/backend/logger";
-import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/openai";
+import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/llm";
import { ChatOutput } from "@cocalc/util/types/llm";
import { Stream } from "openai/streaming";
import { totalNumTokens } from "./chatgpt-numtokens";
diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts
index 79a0bff122b..4f0fa454ea6 100644
--- a/src/packages/server/llm/client.ts
+++ b/src/packages/server/llm/client.ts
@@ -4,14 +4,14 @@ Get the client for the given LanguageModel.
You do not have to worry too much about throwing an exception, because they're caught in ./index::evaluate
*/
-import OpenAI from "openai";
-import jsonStable from "json-stable-stringify";
import { Ollama } from "@langchain/community/llms/ollama";
+import jsonStable from "json-stable-stringify";
import * as _ from "lodash";
+import OpenAI from "openai";
import getLogger from "@cocalc/backend/logger";
import { getServerSettings } from "@cocalc/database/settings/server-settings";
-import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/openai";
+import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/llm";
import { unreachable } from "@cocalc/util/misc";
import { VertexAIClient } from "./vertex-ai-client";
diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts
index dcd4bbbc834..53bdb8aecf1 100644
--- a/src/packages/server/llm/index.ts
+++ b/src/packages/server/llm/index.ts
@@ -21,13 +21,14 @@ import {
DEFAULT_MODEL,
LLM_USERNAMES,
LanguageModel,
+ LanguageService,
OpenAIMessages,
getLLMCost,
isFreeModel,
isValidModel,
model2service,
model2vendor,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm";
import { checkForAbuse } from "./abuse";
import { callChatGPTAPI } from "./call-llm";
@@ -136,9 +137,9 @@ async function evaluateImpl({
account_id,
project_id,
cost,
- service: model2service(model),
+ service: model2service(model) as LanguageService,
description: {
- type: model2service(model),
+ type: model2service(model) as LanguageService,
prompt_tokens,
completion_tokens,
},
diff --git a/src/packages/server/llm/vertex-ai-client.ts b/src/packages/server/llm/vertex-ai-client.ts
index c80730af509..c7a8cf360c2 100644
--- a/src/packages/server/llm/vertex-ai-client.ts
+++ b/src/packages/server/llm/vertex-ai-client.ts
@@ -5,7 +5,7 @@
*/
import getLogger from "@cocalc/backend/logger";
-import { LanguageModel } from "@cocalc/util/db-schema/openai";
+import { LanguageModel } from "@cocalc/util/db-schema/llm";
import { ChatOutput, History } from "@cocalc/util/types/llm";
import {
DiscussServiceClient,
diff --git a/src/packages/server/purchases/get-service-cost.ts b/src/packages/server/purchases/get-service-cost.ts
index f87432e1348..5efb7daad3a 100644
--- a/src/packages/server/purchases/get-service-cost.ts
+++ b/src/packages/server/purchases/get-service-cost.ts
@@ -9,7 +9,7 @@ import {
getLLMCost,
isLanguageModelService,
service2model,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import type { Service } from "@cocalc/util/db-schema/purchases";
import { unreachable } from "@cocalc/util/misc";
diff --git a/src/packages/server/purchases/is-purchase-allowed.ts b/src/packages/server/purchases/is-purchase-allowed.ts
index a469637ea07..26f576f74e6 100644
--- a/src/packages/server/purchases/is-purchase-allowed.ts
+++ b/src/packages/server/purchases/is-purchase-allowed.ts
@@ -5,7 +5,7 @@ import {
getMaxCost,
isLanguageModelService,
service2model,
-} from "@cocalc/util/db-schema/openai";
+} from "@cocalc/util/db-schema/llm";
import { QUOTA_SPEC, Service } from "@cocalc/util/db-schema/purchase-quotas";
import { MAX_COST } from "@cocalc/util/db-schema/purchases";
import { currency, round2up, round2down } from "@cocalc/util/misc";
diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm.test.ts
new file mode 100644
index 00000000000..f77084ceee0
--- /dev/null
+++ b/src/packages/util/db-schema/llm.test.ts
@@ -0,0 +1,13 @@
+// this tests the wrongly named openai.ts file
+
+import { isFreeModel } from "./llm";
+
+describe("openai/llm", () => {
+ test("isFreeModel", () => {
+ expect(isFreeModel("gpt-3")).toBe(true);
+ expect(isFreeModel("gpt-4")).toBe(false);
+ // WARNING: if the following breaks, and ollama becomes non-free, then a couple of assumptions are broken as well.
+ // search for model2service(...) as LanguageService in the codebase!
+ expect(isFreeModel("ollama-1")).toBe(true);
+ });
+});
diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts
new file mode 100644
index 00000000000..73a5e4cf554
--- /dev/null
+++ b/src/packages/util/db-schema/llm.ts
@@ -0,0 +1,352 @@
+// this contains bits and pieces from the wrongly named openai.ts file
+
+import type { LLMService, Service } from "@cocalc/util/db-schema/purchases";
+import { unreachable } from "../misc";
+
+export const LANGUAGE_MODELS = [
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-4",
+ "gpt-4-32k",
+ // google's are taken from here – we use the generative AI client lib
+ // https://developers.generativeai.google/models/language
+ "text-bison-001",
+ "chat-bison-001",
+ "embedding-gecko-001",
+ "text-embedding-ada-002",
+ "gemini-pro",
+] as const;
+
+// This hardcodes which models can be selected by users.
+// Make sure to update this when adding new models.
+// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
+export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-4",
+ // "chat-bison-001", // PaLM2 is not good, replies with no response too often
+ "gemini-pro",
+] as const;
+
+export type LanguageModel = (typeof LANGUAGE_MODELS)[number];
+
+export function isLanguageModel(model?: string): model is LanguageModel {
+ return LANGUAGE_MODELS.includes(model as LanguageModel);
+}
+
+export function getValidLanguageModelName(
+ model: string | undefined,
+ filter: { google: boolean; openai: boolean; ollama: boolean } = {
+ google: true,
+ openai: true,
+ ollama: false,
+ },
+ ollama: string[] = [], // keys of ollama models
+): LanguageModel | string {
+ const dftl =
+ filter.openai === true
+ ? DEFAULT_MODEL
+ : filter.ollama && ollama?.length > 0
+ ? toOllamaModel(ollama[0])
+ : "chat-bison-001";
+ console.log("getValidLanguageModelName", model, filter, ollama, dftl);
+ if (model == null) {
+ return dftl;
+ }
+ if (LANGUAGE_MODELS.includes(model as LanguageModel)) {
+ return model;
+ }
+ if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
+ return model;
+ }
+ return dftl;
+}
+
+export interface OpenAIMessage {
+ role: "system" | "user" | "assistant";
+ content: string;
+}
+export type OpenAIMessages = OpenAIMessage[];
+
+export type LanguageService =
+ | "openai-gpt-3.5-turbo"
+ | "openai-gpt-3.5-turbo-16k"
+ | "openai-gpt-4"
+ | "openai-gpt-4-32k"
+ | "openai-text-embedding-ada-002"
+ | "google-text-bison-001"
+ | "google-chat-bison-001"
+ | "google-embedding-gecko-001"
+ | "google-gemini-pro";
+
+const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const;
+export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number];
+
+// used e.g. for checking "account-id={string}" and other things like that
+export const LANGUAGE_MODEL_PREFIXES = [
+ "chatgpt",
+ ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`),
+] as const;
+
+export function model2service(
+ model: LanguageModel | string,
+): LanguageService | string {
+ if (model === "text-embedding-ada-002") {
+ return `openai-${model}`;
+ }
+ if (isLanguageModel(model)) {
+ if (
+ model === "text-bison-001" ||
+ model === "chat-bison-001" ||
+ model === "embedding-gecko-001" ||
+ model === "gemini-pro"
+ ) {
+ return `google-${model}`;
+ } else {
+ return `openai-${model}`;
+ }
+ }
+ if (isOllamaLLM(model)) {
+ return toOllamaModel(model);
+ }
+ throw new Error(`unknown model: ${model}`);
+}
+
+// inverse of model2service, but robust for chat avatars, which might not have a prefix
+// TODO: fix the mess
+export function service2model(
+ service: LanguageService | "chatgpt",
+): LanguageModel {
+ if (service === "chatgpt") {
+ return "gpt-3.5-turbo";
+ }
+ // split off the first part of service, e.g., "openai-" or "google-"
+ const s = service.split("-")[0];
+ const hasPrefix = s === "openai" || s === "google";
+ const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
+ if (!LANGUAGE_MODELS.includes(m as LanguageModel)) {
+ // We don't throw an error, since the frontend would crash
+ // throw new Error(`unknown service: ${service}`);
+ console.warn(`service2model: unknown service: ${service}`);
+ return "gpt-3.5-turbo";
+ }
+ return m as LanguageModel;
+}
+
+// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function
+export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo";
+
+export function model2vendor(model: LanguageModel | string): Vendor {
+ if (model.startsWith("gpt-")) {
+ return "openai";
+ } else if (model.startsWith("ollama-")) {
+ return "ollama";
+ } else {
+ return "google";
+ }
+}
+
+export function toOllamaModel(model: string) {
+ return `ollama-${model}`;
+}
+
+export function fromOllamaModel(model: string) {
+ return model.replace(/^ollama-/, "");
+}
+
+export function isOllamaLLM(model: string) {
+ return model.startsWith("ollama-");
+}
+
+const MODELS_OPENAI = [
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-4",
+ "gpt-4-32k",
+] as const;
+
+export const MODELS = [
+ ...MODELS_OPENAI,
+ "text-embedding-ada-002",
+ "text-bison-001",
+ "chat-bison-001",
+ "embedding-gecko-001",
+ "gemini-pro",
+] as const;
+
+export type Model = (typeof MODELS)[number];
+
+export type ModelOpenAI = (typeof MODELS_OPENAI)[number];
+
+// Map from psuedo account_id to what should be displayed to user.
+// This is used in various places in the frontend.
+// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
+export const LLM_USERNAMES = {
+ chatgpt: "GPT-3.5",
+ chatgpt3: "GPT-3.5",
+ chatgpt4: "GPT-4",
+ "gpt-4": "GPT-4",
+ "gpt-4-32k": "GPT-4-32k",
+ "gpt-3.5-turbo": "GPT-3.5",
+ "gpt-3.5-turbo-16k": "GPT-3.5-16k",
+ "text-bison-001": "PaLM 2",
+ "chat-bison-001": "PaLM 2",
+ "embedding-gecko-001": "PaLM 2",
+ "gemini-pro": "Gemini Pro",
+} as const;
+
+export function isFreeModel(model: string) {
+ if (isOllamaLLM(model)) return true;
+ if (LANGUAGE_MODELS.includes(model as LanguageModel)) {
+ // of these models, the following are free
+ return (
+ (model as Model) == "gpt-3.5-turbo" ||
+ (model as Model) == "text-bison-001" ||
+ (model as Model) == "chat-bison-001" ||
+ (model as Model) == "embedding-gecko-001" ||
+ (model as Model) == "gemini-pro"
+ );
+ }
+ // all others are free
+ return true;
+}
+
+// this is used in purchases/get-service-cost
+// we only need to check for the vendor prefixes, no special cases!
+export function isLanguageModelService(
+ service: Service,
+): service is LLMService {
+ for (const v of LANGUAGE_MODEL_VENDORS) {
+ if (service.startsWith(`${v}-`)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+export function getVendorStatusCheckMD(vendor: Vendor): string {
+ switch (vendor) {
+ case "openai":
+ return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
+ case "google":
+ return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
+ case "ollama":
+ return `No status information for Ollama available – you have to check with the particular backend for the model.`;
+ default:
+ unreachable(vendor);
+ }
+ return "";
+}
+
+export function llmSupportsStreaming(model: LanguageModel): boolean {
+ return model2vendor(model) === "openai" || model === "gemini-pro";
+}
+
+interface Cost {
+ prompt_tokens: number;
+ completion_tokens: number;
+ max_tokens: number;
+}
+
+// This is the official published cost that openai charges.
+// It changes over time, so this will sometimes need to be updated.
+// Our cost is a configurable multiple of this.
+// https://openai.com/pricing#language-models
+// There appears to be no api that provides the prices, unfortunately.
+const LLM_COST: { [name in LanguageModel]: Cost } = {
+ "gpt-4": {
+ prompt_tokens: 0.03 / 1000,
+ completion_tokens: 0.06 / 1000,
+ max_tokens: 8192,
+ },
+ "gpt-4-32k": {
+ prompt_tokens: 0.06 / 1000,
+ completion_tokens: 0.12 / 1000,
+ max_tokens: 32768,
+ },
+ "gpt-3.5-turbo": {
+ prompt_tokens: 0.0015 / 1000,
+ completion_tokens: 0.002 / 1000,
+ max_tokens: 4096,
+ },
+ "gpt-3.5-turbo-16k": {
+ prompt_tokens: 0.003 / 1000,
+ completion_tokens: 0.004 / 1000,
+ max_tokens: 16384,
+ },
+ "text-embedding-ada-002": {
+ prompt_tokens: 0.0001 / 1000,
+ completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
+ max_tokens: 8191,
+ },
+ // https://developers.generativeai.google/models/language
+ "text-bison-001": {
+ // we assume 5 characters is 1 token on average
+ prompt_tokens: (5 * 0.0005) / 1000,
+ completion_tokens: (5 * 0.0005) / 1000,
+ max_tokens: 8196,
+ },
+ "chat-bison-001": {
+ // we assume 5 characters is 1 token on average
+ prompt_tokens: (5 * 0.0005) / 1000,
+ completion_tokens: (5 * 0.0005) / 1000,
+ max_tokens: 8196,
+ },
+ "embedding-gecko-001": {
+ prompt_tokens: (5 * 0.0001) / 1000,
+ completion_tokens: 0,
+ max_tokens: 8196, // ???
+ },
+ "gemini-pro": {
+ // https://ai.google.dev/models/gemini
+ prompt_tokens: (5 * 0.0001) / 1000,
+ completion_tokens: 0,
+ max_tokens: 30720,
+ },
+} as const;
+
+export function isValidModel(model?: string): boolean {
+ if (model == null) return false;
+ if (model.startsWith("ollama-")) return true;
+ return LLM_COST[model ?? ""] != null;
+}
+
+export function getMaxTokens(model?: Model | string): number {
+ return LLM_COST[model ?? ""]?.max_tokens ?? 4096;
+}
+
+export interface LLMCost {
+ prompt_tokens: number;
+ completion_tokens: number;
+}
+
+export function getLLMCost(
+ model: Model,
+ markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
+): LLMCost {
+ const x = LLM_COST[model];
+ if (x == null) {
+ throw Error(`unknown model "${model}"`);
+ }
+ const { prompt_tokens, completion_tokens } = x;
+ if (markup_percentage < 0) {
+ throw Error("markup percentage can't be negative");
+ }
+ const f = 1 + markup_percentage / 100;
+ return {
+ prompt_tokens: prompt_tokens * f,
+ completion_tokens: completion_tokens * f,
+ };
+}
+
+// The maximum cost for one single call using the given model.
+// We can't know the cost until after it happens, so this bound is useful for
+// ensuring user can afford to make a call.
+export function getMaxCost(model: Model, markup_percentage: number): number {
+ const { prompt_tokens, completion_tokens } = getLLMCost(
+ model,
+ markup_percentage,
+ );
+ const { max_tokens } = LLM_COST[model];
+ return Math.max(prompt_tokens, completion_tokens) * max_tokens;
+}
diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts
index bd2b64ab328..8a1b8e5b3d7 100644
--- a/src/packages/util/db-schema/openai.ts
+++ b/src/packages/util/db-schema/openai.ts
@@ -2,347 +2,12 @@
// Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called.
// TODO: refactor this, the names of the tables, etc. to be more generic.
-import type { LLMService, Service } from "@cocalc/util/db-schema/purchases";
-
import { History } from "@cocalc/util/types/llm";
-import { unreachable } from "../misc";
import { CREATED_BY, ID } from "./crm";
import { SCHEMA as schema } from "./index";
+import { LanguageModel } from "./llm";
import { Table } from "./types";
-export const LANGUAGE_MODELS = [
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-4",
- "gpt-4-32k",
- // google's are taken from here – we use the generative AI client lib
- // https://developers.generativeai.google/models/language
- "text-bison-001",
- "chat-bison-001",
- "embedding-gecko-001",
- "text-embedding-ada-002",
- "gemini-pro",
-] as const;
-
-// This hardcodes which models can be selected by users.
-// Make sure to update this when adding new models.
-// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx
-export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-4",
- // "chat-bison-001", // PaLM2 is not good, replies with no response too often
- "gemini-pro",
-] as const;
-
-export type LanguageModel = (typeof LANGUAGE_MODELS)[number];
-
-export function isLanguageModel(model?: string): model is LanguageModel {
- return LANGUAGE_MODELS.includes(model as LanguageModel);
-}
-
-export function getValidLanguageModelName(
- model: string | undefined,
- filter: { google: boolean; openai: boolean; ollama: boolean } = {
- google: true,
- openai: true,
- ollama: false,
- },
- ollama: string[] = [], // keys of ollama models
-): LanguageModel | string {
- const dftl =
- filter.openai === true
- ? DEFAULT_MODEL
- : filter.ollama && ollama?.length > 0
- ? toOllamaModel(ollama[0])
- : "chat-bison-001";
- console.log("getValidLanguageModelName", model, filter, ollama, dftl);
- if (model == null) {
- return dftl;
- }
- if (LANGUAGE_MODELS.includes(model as LanguageModel)) {
- return model;
- }
- if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) {
- return model;
- }
- return dftl;
-}
-
-export interface OpenAIMessage {
- role: "system" | "user" | "assistant";
- content: string;
-}
-export type OpenAIMessages = OpenAIMessage[];
-
-export type LanguageService =
- | "openai-gpt-3.5-turbo"
- | "openai-gpt-3.5-turbo-16k"
- | "openai-gpt-4"
- | "openai-gpt-4-32k"
- | "openai-text-embedding-ada-002"
- | "google-text-bison-001"
- | "google-chat-bison-001"
- | "google-embedding-gecko-001"
- | "google-gemini-pro";
-
-const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const;
-export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number];
-
-// used e.g. for checking "account-id={string}" and other things like that
-export const LANGUAGE_MODEL_PREFIXES = [
- "chatgpt",
- ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`),
-] as const;
-
-export function model2service(model: LanguageModel): LanguageService {
- if (model === "text-embedding-ada-002") return `openai-${model}`;
- if (
- model === "text-bison-001" ||
- model === "chat-bison-001" ||
- model === "embedding-gecko-001" ||
- model === "gemini-pro"
- ) {
- return `google-${model}`;
- } else {
- return `openai-${model}`;
- }
-}
-
-// inverse of model2service, but robust for chat avatars, which might not have a prefix
-// TODO: fix the mess
-export function service2model(
- service: LanguageService | "chatgpt",
-): LanguageModel {
- if (service === "chatgpt") {
- return "gpt-3.5-turbo";
- }
- // split off the first part of service, e.g., "openai-" or "google-"
- const s = service.split("-")[0];
- const hasPrefix = s === "openai" || s === "google";
- const m = hasPrefix ? service.split("-").slice(1).join("-") : service;
- if (!LANGUAGE_MODELS.includes(m as LanguageModel)) {
- // We don't throw an error, since the frontend would crash
- // throw new Error(`unknown service: ${service}`);
- console.warn(`service2model: unknown service: ${service}`);
- return "gpt-3.5-turbo";
- }
- return m as LanguageModel;
-}
-
-// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function
-export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo";
-
-export function model2vendor(model: LanguageModel): Vendor {
- if (model.startsWith("gpt-")) {
- return "openai";
- } else if (model.startsWith("ollama-")) {
- return "ollama";
- } else {
- return "google";
- }
-}
-
-export function toOllamaModel(model: string) {
- return `ollama-${model}`;
-}
-
-export function fromOllamaModel(model: string) {
- return model.replace(/^ollama-/, "");
-}
-
-export function isOllamaLLM(model: string) {
- return model.startsWith("ollama-");
-}
-
-const MODELS_OPENAI = [
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-4",
- "gpt-4-32k",
-] as const;
-
-export const MODELS = [
- ...MODELS_OPENAI,
- "text-embedding-ada-002",
- "text-bison-001",
- "chat-bison-001",
- "embedding-gecko-001",
- "gemini-pro",
-] as const;
-
-export type Model = (typeof MODELS)[number];
-
-export type ModelOpenAI = (typeof MODELS_OPENAI)[number];
-
-// Map from psuedo account_id to what should be displayed to user.
-// This is used in various places in the frontend.
-// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
-export const LLM_USERNAMES = {
- chatgpt: "GPT-3.5",
- chatgpt3: "GPT-3.5",
- chatgpt4: "GPT-4",
- "gpt-4": "GPT-4",
- "gpt-4-32k": "GPT-4-32k",
- "gpt-3.5-turbo": "GPT-3.5",
- "gpt-3.5-turbo-16k": "GPT-3.5-16k",
- "text-bison-001": "PaLM 2",
- "chat-bison-001": "PaLM 2",
- "embedding-gecko-001": "PaLM 2",
- "gemini-pro": "Gemini Pro",
-} as const;
-
-export function isFreeModel(model: string) {
- if (!LANGUAGE_MODELS.includes(model as LanguageModel)) return false;
- return (
- (model as Model) == "gpt-3.5-turbo" ||
- (model as Model) == "text-bison-001" ||
- (model as Model) == "chat-bison-001" ||
- (model as Model) == "embedding-gecko-001" ||
- (model as Model) == "gemini-pro"
- );
-}
-
-// this is used in purchases/get-service-cost
-// we only need to check for the vendor prefixes, no special cases!
-export function isLanguageModelService(
- service: Service,
-): service is LLMService {
- for (const v of LANGUAGE_MODEL_VENDORS) {
- if (service.startsWith(`${v}-`)) {
- return true;
- }
- }
- return false;
-}
-
-export function getVendorStatusCheckMD(vendor: Vendor): string {
- switch (vendor) {
- case "openai":
- return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`;
- case "google":
- return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`;
- case "ollama":
- return `No status information for Ollama available – you have to check with the particular backend for the model.`;
- default:
- unreachable(vendor);
- }
- return "";
-}
-
-export function llmSupportsStreaming(model: LanguageModel): boolean {
- return model2vendor(model) === "openai" || model === "gemini-pro";
-}
-
-interface Cost {
- prompt_tokens: number;
- completion_tokens: number;
- max_tokens: number;
-}
-
-// This is the official published cost that openai charges.
-// It changes over time, so this will sometimes need to be updated.
-// Our cost is a configurable multiple of this.
-// https://openai.com/pricing#language-models
-// There appears to be no api that provides the prices, unfortunately.
-const LLM_COST: { [name in LanguageModel]: Cost } = {
- "gpt-4": {
- prompt_tokens: 0.03 / 1000,
- completion_tokens: 0.06 / 1000,
- max_tokens: 8192,
- },
- "gpt-4-32k": {
- prompt_tokens: 0.06 / 1000,
- completion_tokens: 0.12 / 1000,
- max_tokens: 32768,
- },
- "gpt-3.5-turbo": {
- prompt_tokens: 0.0015 / 1000,
- completion_tokens: 0.002 / 1000,
- max_tokens: 4096,
- },
- "gpt-3.5-turbo-16k": {
- prompt_tokens: 0.003 / 1000,
- completion_tokens: 0.004 / 1000,
- max_tokens: 16384,
- },
- "text-embedding-ada-002": {
- prompt_tokens: 0.0001 / 1000,
- completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings
- max_tokens: 8191,
- },
- // https://developers.generativeai.google/models/language
- "text-bison-001": {
- // we assume 5 characters is 1 token on average
- prompt_tokens: (5 * 0.0005) / 1000,
- completion_tokens: (5 * 0.0005) / 1000,
- max_tokens: 8196,
- },
- "chat-bison-001": {
- // we assume 5 characters is 1 token on average
- prompt_tokens: (5 * 0.0005) / 1000,
- completion_tokens: (5 * 0.0005) / 1000,
- max_tokens: 8196,
- },
- "embedding-gecko-001": {
- prompt_tokens: (5 * 0.0001) / 1000,
- completion_tokens: 0,
- max_tokens: 8196, // ???
- },
- "gemini-pro": {
- // https://ai.google.dev/models/gemini
- prompt_tokens: (5 * 0.0001) / 1000,
- completion_tokens: 0,
- max_tokens: 30720,
- },
-} as const;
-
-export function isValidModel(model?: string): boolean {
- if (model == null) return false;
- if (model.startsWith("ollama-")) return true;
- return LLM_COST[model ?? ""] != null;
-}
-
-export function getMaxTokens(model?: Model): number {
- return LLM_COST[model ?? ""]?.max_tokens ?? 4096;
-}
-
-export interface LLMCost {
- prompt_tokens: number;
- completion_tokens: number;
-}
-
-export function getLLMCost(
- model: Model,
- markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3
-): LLMCost {
- const x = LLM_COST[model];
- if (x == null) {
- throw Error(`unknown model "${model}"`);
- }
- const { prompt_tokens, completion_tokens } = x;
- if (markup_percentage < 0) {
- throw Error("markup percentage can't be negative");
- }
- const f = 1 + markup_percentage / 100;
- return {
- prompt_tokens: prompt_tokens * f,
- completion_tokens: completion_tokens * f,
- };
-}
-
-// The maximum cost for one single call using the given model.
-// We can't know the cost until after it happens, so this bound is useful for
-// ensuring user can afford to make a call.
-export function getMaxCost(model: Model, markup_percentage: number): number {
- const { prompt_tokens, completion_tokens } = getLLMCost(
- model,
- markup_percentage,
- );
- const { max_tokens } = LLM_COST[model];
- return Math.max(prompt_tokens, completion_tokens) * max_tokens;
-}
-
export interface ChatGPTLogEntry {
id: number;
account_id?: string;
diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts
index a96b749e106..79322048248 100644
--- a/src/packages/util/db-schema/purchase-quotas.ts
+++ b/src/packages/util/db-schema/purchase-quotas.ts
@@ -2,7 +2,7 @@ import { Table } from "./types";
import { CREATED_BY, ID } from "./crm";
import { SCHEMA as schema } from "./index";
import type { Service } from "./purchases";
-import { isFreeModel } from "./openai";
+import { isFreeModel } from "./llm";
export type { Service };
diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts
index 36c8d35668e..70cce04b8c0 100644
--- a/src/packages/util/types/llm.ts
+++ b/src/packages/util/types/llm.ts
@@ -1,4 +1,4 @@
-import { LanguageModel } from "@cocalc/util/db-schema/openai";
+import { LanguageModel } from "@cocalc/util/db-schema/llm";
export type History = {
role: "assistant" | "user" | "system";