-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
💎 Gemma on TGI Jetstream Pytorch (#99)
* feat(Jetstream Pt): add gemma support * test(TGI): add gemma 7b slow test that uses Pytorch Jetstream * doc: update Jetstream Pytorch install command * refactor(Jetstream Pt): simplify model_class.from_config call * refactor(engine loader): DRY code * fix(test): clarify warmup test comment
- Loading branch information
1 parent
a0464df
commit 1194f61
Showing
6 changed files
with
85 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
...generation-inference/server/text_generation_server/jetstream_pt_support/gemma_model_hf.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
|
||
from jetstream_pt.third_party.gemma import config as gemma_config | ||
from jetstream_pt.third_party.gemma.model import GemmaModel | ||
|
||
#.model_exportable import Transformer, model_args | ||
from transformers import GemmaConfig, GenerationConfig, GenerationMixin | ||
|
||
|
||
class GemmaModelHf(GemmaModel, GenerationMixin): | ||
"""Transformer module that uses HF GemmaConfig instead of Jetstream Pytorch GemmaConfig + device. | ||
Note that this class also derives from GenerationMixin, so that we can use its methods. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
config: GemmaConfig, | ||
device, | ||
env, | ||
): | ||
self.config = config | ||
self.generation_config = GenerationConfig.from_model_config(config) | ||
|
||
args = gemma_config.GemmaConfig( | ||
vocab_size=config.vocab_size, | ||
max_position_embeddings=config.max_position_embeddings, | ||
num_hidden_layers=config.num_hidden_layers, | ||
num_attention_heads=config.num_attention_heads, | ||
num_key_value_heads=config.num_key_value_heads, | ||
hidden_size=config.hidden_size, | ||
intermediate_size=config.intermediate_size, | ||
head_dim=config.head_dim, | ||
rms_norm_eps=config.rms_norm_eps, | ||
dtype="bfloat16", | ||
quant=False, # No quantization support for now | ||
tokenizer=None, | ||
) | ||
|
||
args.device = device | ||
super().__init__(args, env) | ||
|
||
|
||
@classmethod | ||
def from_config(cls, config, env): | ||
device = "meta" | ||
model = cls(config, device, env) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters