Skip to content

Commit

Permalink
add warm up also for promp_len=32, warm up both gemm and gemv
Browse files Browse the repository at this point in the history
  • Loading branch information
luoyu-intel committed Jul 18, 2024
1 parent 6e3ab87 commit 90e93db
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2126,6 +2126,25 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
llama_kv_cache_clear(lctx);
llama_synchronize(lctx);
llama_reset_timings(lctx);
tmp.clear();
for (int i = 0; i < 32; i++)
{
tmp.push_back(bos);
}
tmp.push_back(eos);
if (llama_model_has_encoder(model)) {
llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size(), 0, 0));
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
if (decoder_start_token_id == -1) {
decoder_start_token_id = bos;
}
tmp.clear();
tmp.push_back(decoder_start_token_id);
}
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
llama_kv_cache_clear(lctx);
llama_synchronize(lctx);
llama_reset_timings(lctx);
}

return std::make_tuple(model, lctx);
Expand Down

0 comments on commit 90e93db

Please sign in to comment.