Skip to content

Commit

Permalink
[metrics](shuffle) Add necessary metrics (apache#40476)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel39 authored Sep 9, 2024
1 parent cd35604 commit 9d5da50
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 8 deletions.
8 changes: 5 additions & 3 deletions be/src/vec/runtime/vdata_stream_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ Status VDataStreamMgr::transmit_block(const PTransmitDataParams* request,
t_finst_id.hi = finst_id.hi();
t_finst_id.lo = finst_id.lo();
std::shared_ptr<VDataStreamRecvr> recvr = nullptr;
ThreadCpuStopWatch cpu_time_stop_watch;
cpu_time_stop_watch.start();
static_cast<void>(find_recvr(t_finst_id, request->node_id(), &recvr));
if (recvr == nullptr) {
// The receiver may remove itself from the receiver map via deregister_recvr()
Expand Down Expand Up @@ -137,9 +139,9 @@ Status VDataStreamMgr::transmit_block(const PTransmitDataParams* request,

bool eos = request->eos();
if (request->has_block()) {
RETURN_IF_ERROR(recvr->add_block(request->block(), request->sender_id(),
request->be_number(), request->packet_seq(),
eos ? nullptr : done, wait_for_worker));
RETURN_IF_ERROR(recvr->add_block(
request->block(), request->sender_id(), request->be_number(), request->packet_seq(),
eos ? nullptr : done, wait_for_worker, cpu_time_stop_watch.elapsed_time()));
}

if (eos) {
Expand Down
13 changes: 10 additions & 3 deletions be/src/vec/runtime/vdata_stream_recvr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ void VDataStreamRecvr::SenderQueue::try_set_dep_ready_without_lock() {
Status VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_number,
int64_t packet_seq,
::google::protobuf::Closure** done,
const int64_t wait_for_worker) {
const int64_t wait_for_worker,
const uint64_t time_to_find_recvr) {
{
std::lock_guard<std::mutex> l(_lock);
if (_is_cancelled) {
Expand Down Expand Up @@ -189,6 +190,10 @@ Status VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_num
_recvr->_max_wait_worker_time->set(wait_for_worker);
}

if (_recvr->_max_find_recvr_time->value() < time_to_find_recvr) {
_recvr->_max_find_recvr_time->set((int64_t)time_to_find_recvr);
}

_block_queue.emplace_back(std::move(block), block_byte_size);
COUNTER_UPDATE(_recvr->_remote_bytes_received_counter, block_byte_size);
_record_debug_info();
Expand Down Expand Up @@ -363,6 +368,7 @@ VDataStreamRecvr::VDataStreamRecvr(VDataStreamMgr* stream_mgr, RuntimeState* sta
_blocks_produced_counter = ADD_COUNTER(_profile, "BlocksProduced", TUnit::UNIT);
_max_wait_worker_time = ADD_COUNTER(_profile, "MaxWaitForWorkerTime", TUnit::UNIT);
_max_wait_to_process_time = ADD_COUNTER(_profile, "MaxWaitToProcessTime", TUnit::UNIT);
_max_find_recvr_time = ADD_COUNTER(_profile, "MaxFindRecvrTime(NS)", TUnit::UNIT);
}

VDataStreamRecvr::~VDataStreamRecvr() {
Expand Down Expand Up @@ -391,11 +397,12 @@ Status VDataStreamRecvr::create_merger(const VExprContextSPtrs& ordering_expr,

Status VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number,
int64_t packet_seq, ::google::protobuf::Closure** done,
const int64_t wait_for_worker) {
const int64_t wait_for_worker,
const uint64_t time_to_find_recvr) {
SCOPED_ATTACH_TASK(_query_thread_context);
int use_sender_id = _is_merging ? sender_id : 0;
return _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done,
wait_for_worker);
wait_for_worker, time_to_find_recvr);
}

void VDataStreamRecvr::add_block(Block* block, int sender_id, bool use_move) {
Expand Down
7 changes: 5 additions & 2 deletions be/src/vec/runtime/vdata_stream_recvr.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ class VDataStreamRecvr : public HasTaskExecutionCtx {
std::vector<SenderQueue*> sender_queues() const { return _sender_queues; }

Status add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq,
::google::protobuf::Closure** done, const int64_t wait_for_worker);
::google::protobuf::Closure** done, const int64_t wait_for_worker,
const uint64_t time_to_find_recvr);

void add_block(Block* block, int sender_id, bool use_move);

Expand Down Expand Up @@ -160,6 +161,7 @@ class VDataStreamRecvr : public HasTaskExecutionCtx {
RuntimeProfile::Counter* _blocks_produced_counter = nullptr;
RuntimeProfile::Counter* _max_wait_worker_time = nullptr;
RuntimeProfile::Counter* _max_wait_to_process_time = nullptr;
RuntimeProfile::Counter* _max_find_recvr_time = nullptr;

std::vector<std::shared_ptr<pipeline::Dependency>> _sender_to_local_channel_dependency;
};
Expand All @@ -178,7 +180,8 @@ class VDataStreamRecvr::SenderQueue {
Status get_batch(Block* next_block, bool* eos);

Status add_block(const PBlock& pblock, int be_number, int64_t packet_seq,
::google::protobuf::Closure** done, const int64_t wait_for_worker);
::google::protobuf::Closure** done, const int64_t wait_for_worker,
const uint64_t time_to_find_recvr);

void add_block(Block* block, bool use_move);

Expand Down

0 comments on commit 9d5da50

Please sign in to comment.