Skip to content

Commit

Permalink
Merge pull request #154 from NexaAI/perry/vlm-api
Browse files Browse the repository at this point in the history
Feature Implementation for issue #136 : supports vlm requests for /chat/completions api
  • Loading branch information
zhiyuan8 authored Oct 9, 2024
2 parents 67f1370 + 00902be commit 43450f5
Show file tree
Hide file tree
Showing 4 changed files with 347 additions and 98 deletions.
44 changes: 36 additions & 8 deletions SERVER.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ nexa server gemma
nexa server llama2-function-calling
nexa server sd1-5
nexa server faster-whipser-large
nexa server ../models/llava-v1.6-vicuna-7b/ -lp -mt MULTIMODAL
```

By default, `nexa server` will run gguf models. To run onnx models, simply add `onnx` after `nexa server`.

## API Endpoints


### 1. Text Generation: <code>/v1/completions</code>

Generates text based on a single prompt.

#### Request body:
Expand All @@ -54,13 +55,46 @@ Generates text based on a single prompt.
}
```


### 2. Chat Completions: <code>/v1/chat/completions</code>

Update: Now supports multimodal inputs when using Multimodal models.

Handles chat completions with support for conversation history.

#### Request body:

Multimodal models (VLM):

```json
{
"model": "anything",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
}
}
]
}
],
"max_tokens": 300,
"temperature": 0.7,
"top_p": 0.95,
"top_k": 40,
"stream": false
}
```

Traditional NLP models:

```json
{
"messages": [
Expand Down Expand Up @@ -94,7 +128,6 @@ Handles chat completions with support for conversation history.
}
```


### 3. Function Calling: <code>/v1/function-calling</code>

Call the most appropriate function based on user's prompt.
Expand Down Expand Up @@ -198,7 +231,6 @@ Call the most appropriate function based on user's prompt.
}
```


### 4. Text-to-Image: <code>/v1/txt2img</code>

Generates images based on a single prompt.
Expand Down Expand Up @@ -232,7 +264,6 @@ Generates images based on a single prompt.
}
```


### 5. Image-to-Image: <code>/v1/img2img</code>

Modifies existing images based on a single prompt.
Expand Down Expand Up @@ -266,7 +297,6 @@ Modifies existing images based on a single prompt.
}
```


### 6. Audio Transcriptions: <code>/v1/audio/transcriptions</code>

Transcribes audio files to text.
Expand All @@ -293,7 +323,6 @@ Transcribes audio files to text.
}
```


### 7. Audio Translations: <code>/v1/audio/translations</code>

Translates audio files to text in English.
Expand All @@ -318,4 +347,3 @@ Translates audio files to text in English.
"text": " Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday"
}
```

68 changes: 54 additions & 14 deletions nexa/cli/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,61 @@ def run_ggml_inference(args):
if model_type:
run_type = ModelType[model_type].value

def choose_files(local_path):
""" Helper function for Multimodal inference only: select the model and projector ggufs from the local_path. """
print(f"Files in {local_path}:")
files = os.listdir(local_path)
for i, file in enumerate(files):
print(f"{i+1}. {file}")

while True:
try:
model_choice = int(input(">>> Enter the index of the model gguf: ")) - 1
if 0 <= model_choice < len(files):
break
else:
print("Invalid selection. Please enter a valid number.")
except ValueError:
print("Invalid input. Please enter a number.")

while True:
try:
projector_choice = int(input(">>> Enter the index of the projector gguf: ")) - 1
if 0 <= projector_choice < len(files):
break
else:
print("Invalid selection. Please enter a valid number.")
except ValueError:
print("Invalid input. Please enter a number.")

return os.path.join(local_path, files[model_choice]), os.path.join(local_path, files[projector_choice])

if args.command == "server":
from nexa.gguf.server.nexa_service import run_nexa_ai_service as NexaServer

projector_local_path = None
if run_type == "Multimodal" and is_local_path:
local_path = os.path.abspath(model_path)
if not os.path.isdir(local_path):
print("Error: For Multimodal models with --local_path, the provided path must be a directory.")
return

model_path, projector_local_path = choose_files(local_path)

if not model_path or not projector_local_path:
return
elif run_type == "Audio" and is_local_path:
local_path = os.path.abspath(model_path)
if not os.path.isdir(local_path):
print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.")
return

NexaServer(
model_path_arg=model_path,
is_local_path_arg=is_local_path,
model_type_arg=run_type,
huggingface=hf,
projector_local_path_arg=projector_local_path,
**kwargs
)
return
Expand All @@ -38,26 +86,18 @@ def run_ggml_inference(args):
model_path = local_path
if run_type == "Multimodal":
if not os.path.isdir(local_path):
print("Error: For Multimodal models with --local_path, the provided path must be a directory.")
print("Error: For Multimodal models with --local_path, the provided path must be a directory containing both model and projector ggufs.")
return
print(f"Files in {local_path}:")
files = os.listdir(local_path)
for i, file in enumerate(files):
print(f"{i+1}. {file}")

model_choice = int(input("Enter the index of the model gguf: ")) - 1
projector_choice = int(input("Enter the index of the projector gguf: ")) - 1
model_path, projector_local_path = choose_files(local_path)

if 0 <= model_choice < len(files) and 0 <= projector_choice < len(files):
local_path = os.path.join(local_path, files[model_choice])
model_path = local_path
projector_local_path = os.path.join(os.path.dirname(local_path), files[projector_choice])
else:
print("Invalid selection. Aborting.")
if not model_path or not projector_local_path:
return

local_path = model_path
elif run_type == "Audio":
if not os.path.isdir(local_path):
print("Error: For Audio models with --local_path, the provided path must be a directory.")
print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.")
return
else: # hf case
# TODO: remove this after adding support for Multimodal model in CLI
Expand Down
6 changes: 3 additions & 3 deletions nexa/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def pull_model(model_path, hf = False, **kwargs):
print(f"Successfully pulled model {model_path} to {result['local_path']}, run_type: {result['run_type']}")
return result["local_path"], result["run_type"]
else:
print(f"Failed to pull model {model_path}")
return None, "NLP"
print(f"Failed to pull model {model_path}. If you are using local path, be sure to add --local_path and --model_type flags.")
return None, None
except Exception as e:
logging.error(f"An error occurred while pulling the model: {e}")
return None, "NLP"
return None, None


def pull_model_from_hub(model_path, **kwargs):
Expand Down
Loading

0 comments on commit 43450f5

Please sign in to comment.