From 593e3ea044be45e6d07f92b14ee3064db2cebc20 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 8 Dec 2023 12:12:19 +0100 Subject: [PATCH] Apply parallel copy of IO tensors when possible --- src/plugins/intel_cpu/src/graph.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 932c40e8c5cb0f..d690c276c9a907 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -895,14 +895,22 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr& inp if (input_itr != inputNodesMap.end()) { auto node = input_itr->second; auto childEdge = node->getChildEdgeAt(0); + auto edgeMemory = childEdge->getMemoryPtr(); const void* ext_data_ptr = input->data(); - void* inter_data_ptr = childEdge->getMemory().getData(); + void* inter_data_ptr = edgeMemory->getData(); if (ext_data_ptr != inter_data_ptr) { auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input); - Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false); - childEdge->getMemory().load(ext_mem, false); + auto actualDesc = edgeMemory->getDescPtr(); + + if (!actualDesc->isCompatible(*ext_tensor_desc)) { + Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false); + edgeMemory->load(ext_mem, false); + } else { + size_t size_to_copy = ext_tensor_desc->getCurrentMemSize(); + cpu_parallel_memcpy(inter_data_ptr, ext_data_ptr, size_to_copy); + } } } else { OPENVINO_THROW("Input blob for infer '", name, "' doesn't correspond to input in network"); @@ -975,13 +983,12 @@ void Graph::PullOutputData(std::unordered_map>& // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - if (actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) { + if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) { Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false); outBloMem.load(intr_blob, false); } else { - size_t size_to_copy = intr_blob.getDescWithType()->getPaddedElementsCount(); - DEBUG_LOG("pull_output: convert ", srcPrec, " to ", dstPrec); - cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy); + size_t size_to_copy = intr_blob.getSize(); + cpu_parallel_memcpy(ext_blob_ptr, intr_blob_ptr, size_to_copy); } } }