Skip to content

Commit

Permalink
save speculation time in sd
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Jan 24, 2025
1 parent 165cd31 commit ed1ae73
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ struct NewProfileInfo {
int num_generated_tokens;
long long speculation_start_timestamp;
long long speculation_end_timestamp;
long long suffix_tree_update_time;
};
struct RequestProfileInfo {
int llm_prefilling_steps = 0;
Expand Down
5 changes: 4 additions & 1 deletion inference/suffix_decoding/suffix_decoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ void FlexFlow::top_level_task(Task const *task,
}
*/

std::string header = "llm,partition,max_tree_depth,online_tree_update,matching_strategy,max_requests_per_batch,max_tokens_per_batch,request_guid,request_step_idx,timestamp,num_speculated_tokens,num_accepted_tokens,prefix_length,speculation_score,num_generated_tokens";
std::string header = "llm,partition,max_tree_depth,online_tree_update,matching_strategy,max_requests_per_batch,max_tokens_per_batch,request_guid,request_step_idx,timestamp,speculation_start_timestamp,speculation_end_timestamp,tree_update_time,num_speculated_tokens,num_accepted_tokens,prefix_length,speculation_score,num_generated_tokens";
// csv filepath
// create csv filepath and add header if it doesn't exist

Expand Down Expand Up @@ -755,6 +755,9 @@ void FlexFlow::top_level_task(Task const *task,
file << info.request_guid << ","
<< info.request_step_idx << ","
<< info.timestamp << ","
<< info.speculation_start_timestamp << ","
<< info.speculation_end_timestamp << ","
<< info.suffix_tree_update_time << ","
<< info.num_speculated_tokens << ","
<< info.num_accepted_tokens << ","
<< info.prefix_length << ","
Expand Down
25 changes: 25 additions & 0 deletions src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2177,6 +2177,7 @@ bool RequestManager::update_llm_suffix_decoding_results(
bool request_completed = false;

// Iterate over the requests
long long tree_update_time = 0;
for (int request_index = 0; request_index < get_max_requests_per_batch();
++request_index) {
if (!request_available[request_index]) {
Expand Down Expand Up @@ -2213,7 +2214,10 @@ bool RequestManager::update_llm_suffix_decoding_results(
if (eos_token_found or request.decode_length() >= get_max_output_length() or
request.tokens.size() >= get_max_sequence_length()) {
if (get_suffix_tree_online_tree_update()) {
long long int start = Realm::Clock::current_time_in_microseconds();
insert_completed_request_into_suffix_tree(request_index);
long long int end = Realm::Clock::current_time_in_microseconds();
tree_update_time += (end - start);
}
// Request is completed
request_update_attainment(request_index, attained);
Expand All @@ -2230,6 +2234,24 @@ bool RequestManager::update_llm_suffix_decoding_results(
}
}

int idx=0;
for (int request_index = 0; request_index < get_max_requests_per_batch(); ++request_index) {
if (!request_available[request_index]) {
// Request in this slot is unavailable
continue;
}
int guid = guid_of_requests[request_index];

// check that the new_profiling_info.size()-nb_requests_decoded + idx has request_guid == guid
assert(new_profiling_info.size() - nb_requests_decoded + idx < new_profiling_info.size() >= 0);
assert(new_profiling_info.size() - nb_requests_decoded + idx < new_profiling_info.size());
assert(new_profiling_info[new_profiling_info.size()-nb_requests_decoded + idx].request_guid == guid);

new_profiling_info[new_profiling_info.size()-nb_requests_decoded + idx].suffix_tree_update_time = tree_update_time;

idx++;
}

// Some requests may be completed after appending the verified tokens.
// If there is a request completed, return true.
return request_completed;
Expand Down Expand Up @@ -2937,6 +2959,9 @@ void RequestManager::get_verify_results_suffix_decoding(
new_profile_info.timestamp = current_time;
new_profile_info.request_guid = guid;
new_profile_info.request_step_idx = profiling_requests[guid].llm_decoding_steps-1;
new_profile_info.speculation_start_timestamp = profiling_requests[guid].speculation_start_timestamp;
new_profile_info.speculation_end_timestamp = profiling_requests[guid].speculation_end_timestamp;
new_profile_info.suffix_tree_update_time = 0;
new_profile_info.num_speculated_tokens = (int)request.suffix_decoding_best_token_ids.size();
new_profile_info.num_accepted_tokens = accepted_tokens;
new_profile_info.prefix_length = request.suffix_decoding_best_prefix_length;
Expand Down

0 comments on commit ed1ae73

Please sign in to comment.