ChatGPTNextWeb · Dogtiti · Dec 22, 2024 · Dec 10, 2024 · Dec 20, 2024 · Dec 21, 2024
diff --git a/app/config/build.ts b/app/config/build.ts
@@ -40,6 +40,7 @@ export const getBuildConfig = () => {
     buildMode,
     isApp,
     template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE,
+    visionModels: process.env.VISION_MODELS || "",
-    visionModels: process.env.VISION_MODELS || "",
+    visionModels: process.env.NEXT_PUBLIC_VISION_MODELS || "",
-    visionModels: process.env.VISION_MODELS || "",
+    visionModels: process.env.NEXT_PUBLIC_VISION_MODELS || "",
   };
 };
 

diff --git a/app/constant.ts b/app/constant.ts
@@ -291,6 +291,22 @@ export const DEFAULT_TTS_VOICES = [
   "shimmer",
 ];
 
+export const VISION_MODEL_REGEXES = [
+  /vision/,
+  /gpt-4o/,
+  /claude-3/,
+  /gemini-1\.5/,
+  /gemini-exp/,
+  /gemini-2\.0/,
+  /learnlm/,
+  /qwen-vl/,
+  /qwen2-vl/,
+  /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
+  /^dall-e-3$/, // Matches exactly "dall-e-3"
+];
+
+export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
+
 const openaiModels = [
   "gpt-3.5-turbo",
   "gpt-3.5-turbo-1106",

diff --git a/app/utils.ts b/app/utils.ts
@@ -5,6 +5,8 @@ import { RequestMessage } from "./client/api";
 import { ServiceProvider } from "./constant";
 // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
 import { fetch as tauriStreamFetch } from "./utils/stream";
+import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
+import { getClientConfig } from "./config/client";
 
 export function trimTopic(topic: string) {
   // Fix an issue where double quotes still show in the Indonesian language
@@ -252,28 +254,16 @@ export function getMessageImages(message: RequestMessage): string[] {
 }
 
 export function isVisionModel(model: string) {
-  // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using)
-
-  const excludeKeywords = ["claude-3-5-haiku-20241022"];
-  const visionKeywords = [
-    "vision",
-    "gpt-4o",
-    "claude-3",
-    "gemini-1.5",
-    "gemini-exp",
-    "gemini-2.0",
-    "learnlm",
-    "qwen-vl",
-    "qwen2-vl",
-  ];
-  const isGpt4Turbo =
-    model.includes("gpt-4-turbo") && !model.includes("preview");
-
+  const clientConfig = getClientConfig();
+  const envVisionModels = clientConfig?.visionModels
+    ?.split(",")
+    .map((m) => m.trim());
+  if (envVisionModels?.includes(model)) {
+    return true;
+  }
   return (
-    !excludeKeywords.some((keyword) => model.includes(keyword)) &&
-    (visionKeywords.some((keyword) => model.includes(keyword)) ||
-      isGpt4Turbo ||
-      isDalle3(model))
+    !EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) &&
+    VISION_MODEL_REGEXES.some((regex) => regex.test(model))
   );
 }
 

diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts
@@ -0,0 +1,67 @@
+import { isVisionModel } from "../app/utils";
+
+describe("isVisionModel", () => {
+  const originalEnv = process.env;
+
+  beforeEach(() => {
+    jest.resetModules();
+    process.env = { ...originalEnv };
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+
+  test("should identify vision models using regex patterns", () => {
+    const visionModels = [
+      "gpt-4-vision",
+      "claude-3-opus",
+      "gemini-1.5-pro",
+      "gemini-2.0",
+      "gemini-exp-vision",
+      "learnlm-vision",
+      "qwen-vl-max",
+      "qwen2-vl-max",
+      "gpt-4-turbo",
+      "dall-e-3",
+    ];
+
+    visionModels.forEach((model) => {
+      expect(isVisionModel(model)).toBe(true);
+    });
+  });
+
+  test("should exclude specific models", () => {
+    expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false);
+  });
+
+  test("should not identify non-vision models", () => {
+    const nonVisionModels = [
+      "gpt-3.5-turbo",
+      "gpt-4-turbo-preview",
+      "claude-2",
+      "regular-model",
+    ];
+
+    nonVisionModels.forEach((model) => {
+      expect(isVisionModel(model)).toBe(false);
+    });
+  });
+
+  test("should identify models from VISION_MODELS env var", () => {
+    process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
+
+    expect(isVisionModel("custom-vision-model")).toBe(true);
+    expect(isVisionModel("another-vision-model")).toBe(true);
+    expect(isVisionModel("unrelated-model")).toBe(false);
+  });
+
+  test("should handle empty or missing VISION_MODELS", () => {
+    process.env.VISION_MODELS = "";
+    expect(isVisionModel("unrelated-model")).toBe(false);
+
+    delete process.env.VISION_MODELS;
+    expect(isVisionModel("unrelated-model")).toBe(false);
+    expect(isVisionModel("gpt-4-vision")).toBe(true);
+  });
+});