Skip to content

Commit

Permalink
[Model] IBM Granite 3.1 (vllm-project#11307)
Browse files Browse the repository at this point in the history
Signed-off-by: Travis Johnson <[email protected]>
Signed-off-by: Bowen Wang <[email protected]>
  • Loading branch information
tjohnson31415 authored and abmfy committed Jan 24, 2025
1 parent 6813302 commit 79f96fd
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
4 changes: 2 additions & 2 deletions docs/source/models/supported_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ Text Generation (``--task generate``)
-
- ✅︎
* - :code:`GraniteForCausalLM`
- Granite 3.0, PowerLM
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.0-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
- Granite 3.0, Granite 3.1, PowerLM
- :code:`ibm-granite/granite-3.0-2b-base`, :code:`ibm-granite/granite-3.1-8b-instruct`, :code:`ibm/PowerLM-3b`, etc.
- ✅︎
- ✅︎
* - :code:`GraniteMoeForCausalLM`
Expand Down
7 changes: 6 additions & 1 deletion docs/source/usage/tool_calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ Recommended flags: `--tool-call-parser granite --chat-template examples/tool_cha

`examples/tool_chat_template_granite.jinja`: this is a modified chat template from the original on Huggingface. Parallel function calls are supported.

* `ibm-granite/granite-3.1-8b-instruct`

Recommended flags: `--tool-call-parser granite`

The chat template from Huggingface can be used directly. Parallel function calls are supported.

* `ibm-granite/granite-20b-functioncalling`

Recommended flags: `--tool-call-parser granite-20b-fc --chat-template examples/tool_chat_template_granite_20b_fc.jinja`
Expand Down Expand Up @@ -284,4 +290,3 @@ Then you can use this plugin in the command line like this.
--tool-call-parser example \
--chat-template <your chat template> \
```

10 changes: 9 additions & 1 deletion tests/tool_use/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,22 @@ def ensure_system_prompt(messages: List[Dict[str, Any]],
"supports_rocm":
False,
},
"granite8b": {
"granite-3.0-8b": {
"model":
"ibm-granite/granite-3.0-8b-instruct",
"arguments": [
"--tool-call-parser", "granite", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
],
},
"granite-3.1-8b": {
"model": "ibm-granite/granite-3.1-8b-instruct",
"arguments": [
"--tool-call-parser",
"granite",
],
"supports_parallel": True,
},
"internlm": {
"model":
"internlm/internlm2_5-7b-chat",
Expand Down
12 changes: 10 additions & 2 deletions vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ class GraniteToolParser(ToolParser):

def __init__(self, tokenizer: AnyTokenizer):
super().__init__(tokenizer)
# for granite 3.0, the token `<|tool_call|>`
self.bot_token = "<|tool_call|>"
# for granite 3.1, the string `<tool_call>`
self.bot_string = "<tool_call>"

def extract_tool_calls(
self, model_output: str,
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
# remove whitespace and the BOT token if it exists
stripped = model_output.strip().removeprefix(self.bot_token).lstrip()
stripped = model_output.strip()\
.removeprefix(self.bot_token)\
.removeprefix(self.bot_string)\
.lstrip()
if not stripped or stripped[0] != '[':
return ExtractedToolCallInformation(tools_called=False,
tool_calls=[],
Expand Down Expand Up @@ -91,6 +96,9 @@ def extract_tool_calls_streaming(
if current_text[start_idx:].startswith(self.bot_token):
start_idx = consume_space(start_idx + len(self.bot_token),
current_text)
if current_text[start_idx:].startswith(self.bot_string):
start_idx = consume_space(start_idx + len(self.bot_string),
current_text)
if not current_text or start_idx >= len(current_text)\
or current_text[start_idx] != '[':
return DeltaMessage(content=delta_text)
Expand Down

0 comments on commit 79f96fd

Please sign in to comment.