Skip to content

Commit

Permalink
Modify example from fp32 to fp16 (intel#10528)
Browse files Browse the repository at this point in the history
* Modify example from fp32 to fp16

* Remove Falcon from fp16 example for now

* Remove MPT from fp16 example
  • Loading branch information
Zhangky11 authored Apr 9, 2024
1 parent 44922bb commit 1e27e08
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
load_in_4bit=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
optimize_model=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
optimize_model=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
optimize_model=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
optimize_model=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
optimize_model=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
model = AutoModelForCausalLM.from_pretrained(model_path,
load_in_4bit=True,
trust_remote_code=True)
model = model.to("xpu")
model = model.half().to("xpu")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down

0 comments on commit 1e27e08

Please sign in to comment.