Skip to content

Commit

Permalink
Apply parallel copy of IO tensors when possible
Browse files Browse the repository at this point in the history
  • Loading branch information
maxnick committed Dec 8, 2023
1 parent 04db11e commit 593e3ea
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,14 +895,22 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor>& inp
if (input_itr != inputNodesMap.end()) {
auto node = input_itr->second;
auto childEdge = node->getChildEdgeAt(0);
auto edgeMemory = childEdge->getMemoryPtr();

const void* ext_data_ptr = input->data();
void* inter_data_ptr = childEdge->getMemory().getData();
void* inter_data_ptr = edgeMemory->getData();

if (ext_data_ptr != inter_data_ptr) {
auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input);
Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false);
childEdge->getMemory().load(ext_mem, false);
auto actualDesc = edgeMemory->getDescPtr();

if (!actualDesc->isCompatible(*ext_tensor_desc)) {
Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false);
edgeMemory->load(ext_mem, false);
} else {
size_t size_to_copy = ext_tensor_desc->getCurrentMemSize();
cpu_parallel_memcpy(inter_data_ptr, ext_data_ptr, size_to_copy);
}
}
} else {
OPENVINO_THROW("Input blob for infer '", name, "' doesn't correspond to input in network");
Expand Down Expand Up @@ -975,13 +983,12 @@ void Graph::PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>&
// That is the same memory. No need to copy
if (ext_blob_ptr == intr_blob_ptr) continue;

if (actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) {
if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) {
Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false);
outBloMem.load(intr_blob, false);
} else {
size_t size_to_copy = intr_blob.getDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
DEBUG_LOG("pull_output: convert ", srcPrec, " to ", dstPrec);
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
size_t size_to_copy = intr_blob.getSize();
cpu_parallel_memcpy(ext_blob_ptr, intr_blob_ptr, size_to_copy);
}
}
}
Expand Down

0 comments on commit 593e3ea

Please sign in to comment.