Merge pull request #154 from NexaAI/perry/vlm-api

Feature Implementation for issue #136 : supports vlm requests for /chat/completions api
NexaAI · Oct 9, 2024 · 43450f5 · 43450f5
2 parents 67f1370 + 00902be
commit 43450f5
Show file tree

Hide file tree

Showing 4 changed files with 347 additions and 98 deletions.
diff --git a/SERVER.md b/SERVER.md
@@ -22,14 +22,15 @@ nexa server gemma
 nexa server llama2-function-calling
 nexa server sd1-5
 nexa server faster-whipser-large
+nexa server ../models/llava-v1.6-vicuna-7b/ -lp -mt MULTIMODAL
 ```
 
 By default, `nexa server` will run gguf models. To run onnx models, simply add `onnx` after `nexa server`.
 
 ## API Endpoints
 
-
 ### 1. Text Generation: <code>/v1/completions</code>
+
 Generates text based on a single prompt.
 
 #### Request body:
@@ -54,13 +55,46 @@ Generates text based on a single prompt.
 }
 ```
 
-
 ### 2. Chat Completions: <code>/v1/chat/completions</code>
 
+Update: Now supports multimodal inputs when using Multimodal models.
+
 Handles chat completions with support for conversation history.
 
 #### Request body:
 
+Multimodal models (VLM):
+
+```json
+{
+  "model": "anything",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "text",
+          "text": "What’s in this image?"
+        },
+        {
+          "type": "image_url",
+          "image_url": {
+            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+          }
+        }
+      ]
+    }
+  ],
+  "max_tokens": 300,
+  "temperature": 0.7,
+  "top_p": 0.95,
+  "top_k": 40,
+  "stream": false
+}
+```
+
+Traditional NLP models:
+
 ```json
 {
   "messages": [
@@ -94,7 +128,6 @@ Handles chat completions with support for conversation history.
 }
 ```
 
-
 ### 3. Function Calling: <code>/v1/function-calling</code>
 
 Call the most appropriate function based on user's prompt.
@@ -198,7 +231,6 @@ Call the most appropriate function based on user's prompt.
 }
 ```
 
-
 ### 4. Text-to-Image: <code>/v1/txt2img</code>
 
 Generates images based on a single prompt.
@@ -232,7 +264,6 @@ Generates images based on a single prompt.
 }
 ```
 
-
 ### 5. Image-to-Image: <code>/v1/img2img</code>
 
 Modifies existing images based on a single prompt.
@@ -266,7 +297,6 @@ Modifies existing images based on a single prompt.
 }
 ```
 
-
 ### 6. Audio Transcriptions: <code>/v1/audio/transcriptions</code>
 
 Transcribes audio files to text.
@@ -293,7 +323,6 @@ Transcribes audio files to text.
 }
 ```
 
-
 ### 7. Audio Translations: <code>/v1/audio/translations</code>
 
 Translates audio files to text in English.
@@ -318,4 +347,3 @@ Translates audio files to text in English.
   "text": " Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday"
 }
 ```
-
diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py
@@ -15,13 +15,61 @@ def run_ggml_inference(args):
     if model_type:
         run_type = ModelType[model_type].value
 
+    def choose_files(local_path):
+        """ Helper function for Multimodal inference only: select the model and projector ggufs from the local_path. """
+        print(f"Files in {local_path}:")
+        files = os.listdir(local_path)
+        for i, file in enumerate(files):
+            print(f"{i+1}. {file}")
+
+        while True:
+            try:
+                model_choice = int(input(">>> Enter the index of the model gguf: ")) - 1
+                if 0 <= model_choice < len(files):
+                    break
+                else:
+                    print("Invalid selection. Please enter a valid number.")
+            except ValueError:
+                print("Invalid input. Please enter a number.")
+
+        while True:
+            try:
+                projector_choice = int(input(">>> Enter the index of the projector gguf: ")) - 1
+                if 0 <= projector_choice < len(files):
+                    break
+                else:
+                    print("Invalid selection. Please enter a valid number.")
+            except ValueError:
+                print("Invalid input. Please enter a number.")
+
+        return os.path.join(local_path, files[model_choice]), os.path.join(local_path, files[projector_choice])
+
     if args.command == "server":
         from nexa.gguf.server.nexa_service import run_nexa_ai_service as NexaServer
+
+        projector_local_path = None
+        if run_type == "Multimodal" and is_local_path:
+            local_path = os.path.abspath(model_path)
+            if not os.path.isdir(local_path):
+                print("Error: For Multimodal models with --local_path, the provided path must be a directory.")
+                return
+
+            model_path, projector_local_path = choose_files(local_path)
+
+            if not model_path or not projector_local_path:
+                return
+        elif run_type == "Audio" and is_local_path:
+            local_path = os.path.abspath(model_path)
+            if not os.path.isdir(local_path):
+                print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.")
+                return
+
         NexaServer(
             model_path_arg=model_path,
             is_local_path_arg=is_local_path,
             model_type_arg=run_type,
             huggingface=hf,
+            projector_local_path_arg=projector_local_path,
             **kwargs
         )
         return
@@ -38,26 +86,18 @@ def run_ggml_inference(args):
             model_path = local_path
             if run_type == "Multimodal":
                 if not os.path.isdir(local_path):
-                    print("Error: For Multimodal models with --local_path, the provided path must be a directory.")
+                    print("Error: For Multimodal models with --local_path, the provided path must be a directory containing both model and projector ggufs.")
                     return
-                print(f"Files in {local_path}:")
-                files = os.listdir(local_path)
-                for i, file in enumerate(files):
-                    print(f"{i+1}. {file}")
 
-                model_choice = int(input("Enter the index of the model gguf: ")) - 1
-                projector_choice = int(input("Enter the index of the projector gguf: ")) - 1
+                model_path, projector_local_path = choose_files(local_path)
 
-                if 0 <= model_choice < len(files) and 0 <= projector_choice < len(files):
-                    local_path = os.path.join(local_path, files[model_choice])
-                    model_path = local_path
-                    projector_local_path = os.path.join(os.path.dirname(local_path), files[projector_choice])
-                else:
-                    print("Invalid selection. Aborting.")
+                if not model_path or not projector_local_path:
                     return
+
+                local_path = model_path
             elif run_type == "Audio":
                 if not os.path.isdir(local_path):
-                    print("Error: For Audio models with --local_path, the provided path must be a directory.")
+                    print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.")
                     return
         else:  # hf case
             # TODO: remove this after adding support for Multimodal model in CLI

diff --git a/nexa/general.py b/nexa/general.py
@@ -131,11 +131,11 @@ def pull_model(model_path, hf = False, **kwargs):
                 print(f"Successfully pulled model {model_path} to {result['local_path']}, run_type: {result['run_type']}")
             return result["local_path"], result["run_type"]
         else:
-            print(f"Failed to pull model {model_path}")
-            return None, "NLP"
+            print(f"Failed to pull model {model_path}. If you are using local path, be sure to add --local_path and --model_type flags.")
+            return None, None
     except Exception as e:
         logging.error(f"An error occurred while pulling the model: {e}")
-        return None, "NLP"
+        return None, None
 
 
 def pull_model_from_hub(model_path, **kwargs):