-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Forward pass working on Metal. Sampling slow though.
- Loading branch information
Showing
6 changed files
with
102 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
module MetalExt | ||
|
||
#Note: Metal speeds things up a little for forward_inference and forward_loss calls, but is VERY slow for sampling. | ||
#It seems that each single Metal call has some constant overhead that kills it. | ||
|
||
using Metal, Jjama3.NNlib | ||
|
||
function NNlib.batched_mul(a::MtlArray, b::MtlArray) | ||
a_shape = size(a) | ||
b_shape = size(b) | ||
a_reshaped = reshape(a, a_shape[1], a_shape[2], :) | ||
b_reshaped = reshape(b, b_shape[1], b_shape[2], :) | ||
res = Metal.zeros(a_shape[1], b_shape[2], size(a_reshaped)[3]) | ||
Metal.MPS.matmul!(res, a_reshaped,b_reshaped) | ||
return reshape(res, a_shape[1], b_shape[2], a_shape[3:end]...) | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
module Jjama3 | ||
|
||
using Flux, BytePairEncoding, SafeTensors, Distributions, LinearAlgebra | ||
using Flux, BytePairEncoding, SafeTensors, Distributions, LinearAlgebra, StatsBase, NNlib | ||
|
||
include("model.jl") | ||
include("utils.jl") | ||
include("sampling.jl") | ||
|
||
export load_llama321B_from_safetensors, load_llama3_from_safetensors, llama3_tokenizer, assistant_prompt, format_llama32_instruction_prompt, generate, forward_loss, forward_inference | ||
export load_llama321B_from_safetensors, load_llama3_from_safetensors, llama3_tokenizer, assistant_prompt, format_llama32_instruction_prompt, generate, forward_loss, forward_inference, top_pk_sampler, argmax_sampler | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters