From fb60406304439678d1663b78d8d967646ec84030 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 6 Nov 2023 15:50:28 +0100 Subject: [PATCH] Add support from an orphan ReadValue --- src/plugins/intel_cpu/src/infer_request.cpp | 10 +--- src/plugins/intel_cpu/src/memory_state.cpp | 56 +++++++++++++++++---- src/plugins/intel_cpu/src/memory_state.h | 11 ++-- src/plugins/intel_cpu/src/nodes/memory.cpp | 30 ++++++++--- src/plugins/intel_cpu/src/nodes/memory.hpp | 4 +- 5 files changed, 80 insertions(+), 31 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index bf918ccea3ec1f..f00007a3b5b5d7 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -70,16 +70,8 @@ void SyncInferRequest::create_infer_request() { if (!memoryNode) { OPENVINO_THROW("Cannot cast ", node->getName(), " to MemoryInput"); } - auto state_name = memoryNode->getId(); - // Remove suffix with pair ID. Internal information. - auto suffix_idx = state_name.find("/id="); - if (suffix_idx != std::string::npos) { - state_name = state_name.substr(0, suffix_idx); - } - - m_memory_states.emplace_back( - std::make_shared(state_name, memoryNode->memoryBuilder(), memoryNode->getMemoryPtr())); + m_memory_states.emplace_back(memoryNode->makeState()); } } } diff --git a/src/plugins/intel_cpu/src/memory_state.cpp b/src/plugins/intel_cpu/src/memory_state.cpp index f257786190c8a3..81656581a3ec40 100644 --- a/src/plugins/intel_cpu/src/memory_state.cpp +++ b/src/plugins/intel_cpu/src/memory_state.cpp @@ -3,6 +3,8 @@ // #include +#include + #include "memory_state.h" #include "dnnl_extension_utils.h" @@ -16,12 +18,13 @@ namespace intel_cpu { VariableStateDoubleBuffer::VariableStateDoubleBuffer(std::string name, const MemBuilder& mem_build, + MemoryDescPtr external_desc, MemoryCPtr init_val) : - IVariableState{name} { + IVariableState{name}, m_external_desc{external_desc} { ResetPrimeMem(mem_build()); ResetSecondMem(mem_build()); - m_desc = PrimeMem()->getDescPtr(); - auto&& shape = m_desc->getShape(); + m_internal_desc = PrimeMem()->getDescPtr(); + auto&& shape = m_internal_desc->getShape(); //TODO what if by some reason we already have internal static state while the node is dynamic, is it even possible? if (shape.isStatic()) { @@ -32,7 +35,7 @@ VariableStateDoubleBuffer::VariableStateDoubleBuffer(std::string name, } } else { //in the case of the original desc has dynamic shape we create an empty tensor - auto new_desc = ToStatic(m_desc); + auto new_desc = ToStatic(m_internal_desc); PrimeMem()->redefineDesc(new_desc); } } @@ -41,24 +44,55 @@ void VariableStateDoubleBuffer::SetState(const Blob::Ptr& newState) { state = newState; // simply to extend the lifetime auto&& tensor_desc = state->getTensorDesc(); if (PrimeMem()->getStaticDims() != tensor_desc.getDims()) { - auto new_desc = m_desc->cloneWithNewDims(tensor_desc.getDims()); + auto new_desc = m_internal_desc->cloneWithNewDims(tensor_desc.getDims()); PrimeMem()->redefineDesc(new_desc); } auto blob_desc = MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensor_desc); auto src = state->buffer().as(); - static const dnnl::engine eng(dnnl::engine::kind::cpu, 0); - Memory mem(eng, blob_desc, src); + Memory mem(getEngine(), blob_desc, src); PrimeMem()->load(mem); } +const dnnl::engine& VariableStateDoubleBuffer::getEngine() const { + static const dnnl::engine eng(dnnl::engine::kind::cpu, 0); + return eng; +} + Blob::CPtr VariableStateDoubleBuffer::GetState() const { - auto tensor = std::make_shared(PrimeMem()); + const auto& current_dims = PrimeMem()->getStaticDims(); + auto current_ext_desc = m_external_desc->cloneWithNewDims(current_dims); + auto current_internal_desc = PrimeMem()->getDescPtr(); + + if (current_ext_desc->isCompatible(*current_internal_desc)) { + auto tensor = std::make_shared(PrimeMem()); + return tensor_to_blob({tensor, nullptr}); // TODO: shouldn't we provide the so ptr? + } + + //test precision + { + auto internal_prc = current_internal_desc->getPrecision(); + auto tmp_desc = current_ext_desc->cloneWithNewPrecision(internal_prc); + if (tmp_desc->isCompatible(*current_internal_desc)) { + auto mem = std::make_shared(getEngine(), current_ext_desc); + size_t elements_to_convert = PrimeMem()->getDescWithType()->getPaddedElementsCount(); + auto external_prc = current_ext_desc->getPrecision(); + + cpu_convert(PrimeMem()->getData(), mem->getData(), internal_prc, external_prc, elements_to_convert); + auto tensor = std::make_shared(mem); + return tensor_to_blob({tensor, nullptr}); // TODO: shouldn't we provide the so ptr? + } + } + + //reorder + auto mem = std::make_shared(getEngine(), current_ext_desc); + mem->load(*(PrimeMem())); + auto tensor = std::make_shared(mem); return tensor_to_blob({tensor, nullptr}); // TODO: shouldn't we provide the so ptr? } void VariableStateDoubleBuffer::Reset() { - auto new_desc = ToStatic(m_desc); + auto new_desc = ToStatic(m_internal_desc); for (auto&& mem : m_internal_mem) { if (mem) { mem->redefineDesc(new_desc); @@ -91,8 +125,8 @@ MemoryPtr VariableStateDoubleBuffer::OutputMem() { return SecondMem(); } -MemoryDescPtr VariableStateDoubleBuffer::OriginalDesc() const { - return m_desc; +MemoryDescPtr VariableStateDoubleBuffer::InternalDesc() const { + return m_internal_desc; } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/memory_state.h b/src/plugins/intel_cpu/src/memory_state.h index 7d8b821ce179b6..6d5ba7d8cfbd1a 100644 --- a/src/plugins/intel_cpu/src/memory_state.h +++ b/src/plugins/intel_cpu/src/memory_state.h @@ -25,7 +25,7 @@ class IVariableState : public ov::IVariableState { virtual MemoryPtr InputMem() = 0; virtual MemoryPtr OutputMem() = 0; - virtual MemoryDescPtr OriginalDesc() const = 0; + virtual MemoryDescPtr InternalDesc() const = 0; }; class VariableStateDoubleBuffer : public IVariableState { @@ -35,6 +35,7 @@ class VariableStateDoubleBuffer : public IVariableState { public: VariableStateDoubleBuffer(std::string name, const MemBuilder& mem_build, + MemoryDescPtr external_desc, MemoryCPtr init_val); //InferenceEngine::IVariableStateInternal void Reset() override; @@ -46,7 +47,7 @@ class VariableStateDoubleBuffer : public IVariableState { MemoryPtr InputMem() override; MemoryPtr OutputMem() override; - MemoryDescPtr OriginalDesc() const override; + MemoryDescPtr InternalDesc() const override; private: static MemoryDescPtr ToStatic(const MemoryDescPtr& desc); @@ -67,8 +68,12 @@ class VariableStateDoubleBuffer : public IVariableState { return m_internal_mem[buffer_num ^ 0x1]; } + + const dnnl::engine& getEngine() const; + private: - MemoryDescPtr m_desc; //mem desc required by the graph internal tensor + MemoryDescPtr m_external_desc; + MemoryDescPtr m_internal_desc; //mem desc required by the graph internal tensor std::array m_internal_mem{}; size_t buffer_num = 0; }; diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 0ae2639c48333b..ad14c0a0016061 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -234,10 +234,12 @@ MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphCon void MemoryInput::createPrimitive() { Input::createPrimitive(); - auto parentEdge = getParentEdgeAt(0); + if (!inputShapes.empty()) { + auto parentEdge = getParentEdgeAt(0); - if (parentEdge->getParent()->isConstant()) { - Input::resetMemoryPtr(parentEdge->getMemoryPtr()); + if (parentEdge->getParent()->isConstant()) { + Input::resetMemoryPtr(parentEdge->getMemoryPtr()); + } } } @@ -380,13 +382,29 @@ void MemoryInput::assignState(MemStatePtr newState) { outMem->load(*assignedMem); } - getOutputNode().assignExtMemory(newState->OutputMem(), newState->OriginalDesc()); + getOutputNode().assignExtMemory(newState->OutputMem(), newState->InternalDesc()); } -std::function MemoryInput::memoryBuilder() const { +MemStatePtr MemoryInput::makeState() const { + // assume ov::Tensor is always dense + auto original_desc = + std::make_shared(getOriginalOutputPrecisionAtPort(0), outputShapes.at(0)); + auto mem_desc = getBaseMemDescAtOutputPort(0); const auto& eng = getEngine(); - return [mem_desc, eng](){ return std::make_shared(eng, mem_desc); }; + + auto state_name = getId(); + + // Remove suffix with pair ID. Internal information. + auto suffix_idx = state_name.find("/id="); + if (suffix_idx != std::string::npos) { + state_name = state_name.substr(0, suffix_idx); + } + + return std::make_shared(state_name, + [mem_desc, eng](){ return std::make_shared(eng, mem_desc); }, + original_desc, + getMemoryPtr()); } void MemoryInput::registerOutputNode(MemoryOutput* node) { diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 971fe5aee22c8b..51247a6261de72 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -28,7 +28,7 @@ class MemoryNode { explicit MemoryNode(std::string id) : _id(id) {} explicit MemoryNode(const std::shared_ptr& op); virtual ~MemoryNode() = default; - std::string getId() { + std::string getId() const { return _id; } virtual void registerInputNode(MemoryInput*) = 0; @@ -133,7 +133,7 @@ class MemoryInput : public Input, public MemoryNode { void deregisterSibling(MemoryNode* node) override; void assignState(MemStatePtr newState); - std::function memoryBuilder() const; + MemStatePtr makeState() const; private: MemoryOutput& getOutputNode();