diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 5e8536f8acc31c..04238e7eb5fd58 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -68,7 +68,7 @@ dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::elem case ov::element::undefined: return memory::data_type::undef; default: { - OPENVINO_THROW("The plugin does not support ", elementType.to_string(), " for use with oneDNN"); + OPENVINO_THROW("CPU plugin does not support ", elementType.to_string(), " for use with oneDNN."); } } } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index ed6921ed88f19d..932c40e8c5cb0f 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -47,7 +47,6 @@ #include "utils/ngraph_utils.hpp" #include "utils/node_dumper.h" #include "utils/verbose.h" -#include "memory_desc/cpu_memory_desc_utils.h" #include "openvino/runtime/memory_solver.hpp" @@ -894,63 +893,17 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr& inp if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready."); auto input_itr = inputNodesMap.find(name); if (input_itr != inputNodesMap.end()) { - auto create_mem_desc = [&](const ov::SoPtr& tensor) -> CpuBlockedMemoryDesc { - auto element_type = tensor->get_element_type(); - auto shape = tensor->get_shape(); - if (shape.empty()) - shape = {tensor->get_size()}; - std::vector blk_order(shape.size()); - std::iota(blk_order.begin(), blk_order.end(), 0); - std::vector dim_offset(shape.size(), 0); - std::vector blk_strides; - auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{}; - if (byte_strides.empty()) { - blk_strides = ov::row_major_strides(shape); - } else { - // ROI tensor need figure out correct blk_strides - blk_strides.resize(byte_strides.size()); - std::transform(byte_strides.begin(), - byte_strides.end(), - blk_strides.begin(), - [&element_type](size_t byte_stride) { - OPENVINO_ASSERT(byte_stride % element_type.size() == 0, - "Limitation: Stride in bytes ", - byte_stride, - " should be divisible by size of element ", - element_type.size()); - return byte_stride / element_type.size(); - }); - } - InferenceEngine::TensorDesc tensorDesc( - InferenceEngine::details::convertPrecision(tensor->get_element_type()), - shape, - InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}); - return MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensorDesc); - }; - auto node = input_itr->second; auto childEdge = node->getChildEdgeAt(0); - const auto& outDims = node->getOutputShapeAtPort(0); const void* ext_data_ptr = input->data(); void* inter_data_ptr = childEdge->getMemory().getData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tensor_desc = create_mem_desc(input); + auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input); Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false); childEdge->getMemory().load(ext_mem, false); } - - // todo: make sure 'name' exists in this map... - if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { - if (input->get_element_type() == ov::element::f32) { - _normalizePreprocMap[name].NormalizeImage(outDims, - reinterpret_cast(inter_data_ptr), - TensorDesc::getLayoutByDims(input->get_shape())); - } else { - OPENVINO_THROW("Mean image of type ", input->get_element_type().get_type_name(), " is unsupported"); - } - } } else { OPENVINO_THROW("Input blob for infer '", name, "' doesn't correspond to input in network"); } @@ -973,44 +926,32 @@ void Graph::PullOutputData(std::unordered_map>& OPENVINO_THROW("The CPU plugin graph doesn't contain output node with name: ", name.c_str()); } - InferenceEngine::TensorDesc expectedDesc( - InferenceEngine::details::convertPrecision(ext_blob->get_element_type()), - ext_blob->get_shape(), - InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size())); - DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data())); + auto expected_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(ext_blob); + const auto actualDesc = intr_blob.getDescWithType(); - const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc()); + DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data())); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar bool isScalarOutput = false; - if (actualDesc.getLayout() == SCALAR) { - isScalarOutput = expectedDesc.getLayout() == SCALAR || - (!expectedDesc.getDims().empty() && - std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies()) == 1); - } else if (expectedDesc.getLayout() == SCALAR) { - isScalarOutput = actualDesc.getLayout() == SCALAR || - (!actualDesc.getDims().empty() && - std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies()) == 1); + if (ext_blob->get_shape().empty() && ext_blob->get_size() == 1) { + const auto& actualDims = expected_desc_ptr->getShape().getStaticDims(); + isScalarOutput = + !actualDims.empty() && + std::accumulate(actualDims.begin(), actualDims.end(), (size_t)1, std::multiplies()) == 1; } auto outDims = intr_blob.getStaticDims(); if (ext_blob->get_shape() != outDims && !isScalarOutput) { // WA: because input/output info initially contains non empty dims, order etc. // and setDims (called inside setShape) can't correct modify blocked desc for desc with blocked layout - if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) { - expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout()); - } DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data()), " dims ", PartialShape(output[name]->get_shape()), " -> ", PartialShape(outDims), ", intr ptr ", intr_blob.getData(), " , parentedge's memory object ", parentEdge->getMemoryPtr().get()); ext_blob->set_shape(outDims); DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data()), " dims ", PartialShape(output[name]->get_shape()), ", intr ptr ", intr_blob.getData()); - expectedDesc = - InferenceEngine::TensorDesc(InferenceEngine::details::convertPrecision(ext_blob->get_element_type()), - ext_blob->get_shape(), - InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size())); + expected_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(ext_blob); } // check for empty output blob @@ -1018,8 +959,8 @@ void Graph::PullOutputData(std::unordered_map>& continue; } - auto srcPrec = actualDesc.getPrecision(); - auto dstPrec = expectedDesc.getPrecision(); + auto srcPrec = actualDesc->getPrecision(); + auto dstPrec = expected_desc_ptr->getPrecision(); if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->get_byte_size() != intr_blob.getSize()) OPENVINO_THROW("Output blob byte size is not equal network output byte size (", ext_blob->get_byte_size(), @@ -1034,24 +975,13 @@ void Graph::PullOutputData(std::unordered_map>& // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { - // User can initialize output via SetOutput API using tensorDesc with ANY layout. - // For these cases we create planar memory descriptor. - auto outBlobDesc = - expectedDesc.getLayout() == InferenceEngine::Layout::ANY - ? DnnlBlockedMemoryDesc(InferenceEngine::details::convertPrecision(expectedDesc.getPrecision()), - Shape(expectedDesc.getDims())) - : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); - Memory outBloMem(getEngine(), outBlobDesc, ext_blob_ptr, false); + if (actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) { + Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false); outBloMem.load(intr_blob, false); } else { size_t size_to_copy = intr_blob.getDescWithType()->getPaddedElementsCount(); DEBUG_LOG("pull_output: convert ", srcPrec, " to ", dstPrec); - cpu_convert(intr_blob_ptr, - ext_blob_ptr, - InferenceEngine::details::convertPrecision(srcPrec), - InferenceEngine::details::convertPrecision(dstPrec), - size_to_copy); + cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy); } } } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 890b9de8bcf604..955cbe014fd7a4 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -11,7 +11,6 @@ #include "edge.h" #include "graph_context.h" #include "node.h" -#include "normalize_preprocess.h" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/profiling_info.hpp" @@ -60,10 +59,6 @@ class Graph { const GraphContext::CPtr ctx, std::string name); - bool hasMeanImageFor(const std::string& name) { - return _normalizePreprocMap.find(name) != _normalizePreprocMap.end(); - } - void PushInputData(const std::string& name, const ov::SoPtr& input); void PullOutputData(std::unordered_map>& output); @@ -212,7 +207,6 @@ class Graph { outputNodesMap.clear(); graphNodes.clear(); graphEdges.clear(); - _normalizePreprocMap.clear(); syncNodesInds.clear(); } Status status { Status::NotReady }; @@ -228,7 +222,6 @@ class Graph { std::vector graphNodes; std::vector graphEdges; - std::map _normalizePreprocMap; std::string _name; bool graphHasDynamicInput = false; diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 2ca2913b7ccdc6..b3620b37623cbc 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -332,37 +332,6 @@ void SyncInferRequest::throw_if_canceled() const { } } -static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr& tensor) { - auto element_type = tensor->get_element_type(); - auto shape = tensor->get_shape(); - std::vector blk_order(shape.size()); - std::iota(blk_order.begin(), blk_order.end(), 0); - std::vector dim_offset(shape.size(), 0); - std::vector blk_strides; - auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{}; - if (byte_strides.empty()) { - blk_strides = ov::row_major_strides(shape); - } else { - blk_strides.resize(byte_strides.size()); - std::transform(byte_strides.begin(), - byte_strides.end(), - blk_strides.begin(), - [&element_type](size_t byte_stride) { - OPENVINO_ASSERT(byte_stride % element_type.size() == 0, - "Limitation: Stride in bytes ", - byte_stride, - " should be divisible by size of element ", - element_type.size()); - return byte_stride / element_type.size(); - }); - } - OPENVINO_SUPPRESS_DEPRECATED_START - return InferenceEngine::TensorDesc{InferenceEngine::details::convertPrecision(element_type), - shape, - InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}}; - OPENVINO_SUPPRESS_DEPRECATED_END -} - ov::SoPtr SyncInferRequest::get_tensor(const ov::Output& in_port) const { auto port = get_internal_port(in_port); return ov::ISyncInferRequest::get_tensor(port); @@ -398,7 +367,7 @@ void SyncInferRequest::set_tensor(const ov::Output& in_port, con tensor = ov::make_tensor(in_tensor->get_element_type(), in_port.get_shape(), in_tensor->data()); } auto name = get_port_name(in_port, m_is_legacy_api); - auto tensor_desc = create_tensor_desc(tensor); + auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor); bool is_input = ov::op::util::is_parameter(port.get_node()); if (is_input) { const auto netInPrc = port.get_element_type(); @@ -436,14 +405,11 @@ void SyncInferRequest::set_tensor(const ov::Output& in_port, con // we must define desc for dynamic case // otherwise we got incorrect check on shape compatibility inside isCompatible // because lower and upper bound will be compared - OPENVINO_SUPPRESS_DEPRECATED_START - actualDesc = actualDesc->cloneWithNewDims(tensor_desc.getLayout() == InferenceEngine::Layout::SCALAR - ? InferenceEngine::SizeVector{1} - : tensor_desc.getDims()); - OPENVINO_SUPPRESS_DEPRECATED_END + actualDesc = actualDesc->cloneWithNewDims( + ov::is_scalar(tensor->get_shape()) ? VectorDims{1} : VectorDims{tensor->get_shape()}); } - if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensor_desc)) && - m_graph->_normalizePreprocMap.find(name) == m_graph->_normalizePreprocMap.end()) { + + if (actualDesc->isCompatible(*mem_desc_ptr)) { m_external_ptr[name] = tensor; } else if (m_external_ptr.find(name) != m_external_ptr.end()) { m_external_ptr.erase(name); @@ -481,7 +447,7 @@ void SyncInferRequest::set_tensor(const ov::Output& in_port, con } const auto& desc = m_graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); - if (!isDynamic && tensor_desc == MemoryDescUtils::convertToTensorDesc(desc)) { + if (!isDynamic && mem_desc_ptr->isCompatible(desc)) { m_external_ptr[name] = tensor; } else if (m_external_ptr.find(name) != m_external_ptr.end()) { m_external_ptr.erase(name); @@ -538,12 +504,12 @@ void SyncInferRequest::init_tensor(const std::string& name) { tensor = ov::make_tensor(port.get_element_type(), tensor_shape); ov::ISyncInferRequest::set_tensor(port, tensor); - auto desc = create_tensor_desc(tensor); - if (!isDynamic && - desc == MemoryDescUtils::convertToTensorDesc( - m_graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && - m_graph->_normalizePreprocMap.find(name) == m_graph->_normalizePreprocMap.end()) { - m_external_ptr[name] = tensor; + if (!isDynamic) { + auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor); + if (mem_desc_ptr->isCompatible( + m_graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc())) { + m_external_ptr[name] = tensor; + } } } } @@ -626,11 +592,11 @@ void SyncInferRequest::init_tensor(const std::string& name) { } } m_outputs[name] = tensor; - auto desc = create_tensor_desc(tensor); - if (!port_shape.is_dynamic() && !m_external_ptr.count(name) && - desc == MemoryDescUtils::convertToTensorDesc( - output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) { - m_external_ptr[name] = tensor; + if (!port_shape.is_dynamic() && !m_external_ptr.count(name)) { + auto desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor); + if (desc->isCompatible(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) { + m_external_ptr[name] = tensor; + } } // update tensors in case of multiple output ports with the same name for (const auto& out : get_outputs()) { diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp index b7a3ac26a18b1f..2f39090ac40837 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp @@ -5,15 +5,18 @@ #include #include "cpu_memory_desc.h" #include "memory_desc/cpu_memory_desc_utils.h" -#include #include "memory_desc/dnnl_blocked_memory_desc.h" -#include "utils/general_utils.h" +#include "openvino/runtime/itensor.hpp" +#include "openvino/runtime/so_ptr.hpp" #include "utils/cpu_utils.hpp" -#include -#include -#include +#include "utils/general_utils.h" + #include +#include #include +#include +#include +#include using namespace dnnl; using namespace InferenceEngine; @@ -46,27 +49,12 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Memo } } -CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) { - if (desc.getLayout() == InferenceEngine::Layout::ANY) - OPENVINO_THROW("Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc"); - - const auto& blkDesc = desc.getBlockingDesc(); - const auto& dims = desc.getDims(); - - auto strides = blkDesc.getStrides(); - // for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims - // i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1] - if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) { - std::fill(strides.begin(), strides.end(), 0); +BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) { + if (desc->getType() & MemoryDescType::Blocked) { + return std::dynamic_pointer_cast(desc); + } else { + OPENVINO_THROW("Can not convert unsupported memory descriptor"); } - - return CpuBlockedMemoryDesc(InferenceEngine::details::convertPrecision(desc.getPrecision()), - Shape(dims), - blkDesc.getBlockDims(), - blkDesc.getOrder(), - blkDesc.getOffsetPadding(), - blkDesc.getOffsetPaddingToData(), - strides); } CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov::SoPtr& tensor) { @@ -82,6 +70,8 @@ CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov:: if (byte_strides.empty()) { blk_strides = ov::row_major_strides(shape); + } else if (tensor->get_size() == 0) { + blk_strides.resize(shape.size()); } else { // ROI tensor need figure out correct blk_strides blk_strides.resize(byte_strides.size()); @@ -108,6 +98,7 @@ CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov:: blk_strides); } +OPENVINO_SUPPRESS_DEPRECATED_START DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) { if (desc.getLayout() == InferenceEngine::Layout::ANY) OPENVINO_THROW("Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc"); @@ -131,15 +122,7 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Infe strides); } -BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) { - if (desc->getType() & MemoryDescType::Blocked) { - return std::dynamic_pointer_cast(desc); - } else { - OPENVINO_THROW("Can not convert unsupported memory descriptor"); - } -} - -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem) { +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory& mem) { // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); @@ -148,7 +131,7 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem) return make_blob_with_precision(desc, mem.getData()); } -InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory &mem) { +InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory& mem) { auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); @@ -174,6 +157,7 @@ InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDes OPENVINO_THROW("Cannot convert MemoryDesc to InferenceEngine::TensorDesc"); } } +OPENVINO_SUPPRESS_DEPRECATED_END std::string MemoryDescUtils::dim2str(Dim dim) { return dim == Shape::UNDEFINED_DIM ? "?" : std::to_string(dim); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h index 43c1d2a2d410ef..d930612ac0a173 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h @@ -7,7 +7,8 @@ #include #include "cpu_types.h" #include "cpu_shape.h" - +#include "openvino/runtime/itensor.hpp" +#include "openvino/runtime/so_ptr.hpp" #include #include #include @@ -42,11 +43,11 @@ class MemoryDescUtils { static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const MemoryDesc& desc); /** - * @brief Converts InferenceEngine::TensorDesc to CpuBlockedMemoryDesc - * @param desc InferenceEngine::TensorDesc to be converted - * @return converted CpuBlockedMemoryDesc + * @brief Converts MemoryDesc to BlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted BlockedMemoryDesc */ - static CpuBlockedMemoryDesc convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc); + static std::shared_ptr convertToBlockedMemoryDesc(const std::shared_ptr &desc); /** * @brief Builds CpuBlockedMemoryDesc for given ov::ITensor @@ -55,6 +56,7 @@ class MemoryDescUtils { */ static std::shared_ptr generateCpuBlockedMemoryDesc(const ov::SoPtr& tensor); + OPENVINO_SUPPRESS_DEPRECATED_START /** * @brief Converts InferenceEngine::TensorDesc to DnnlBlockedMemoryDesc * @param desc InferenceEngine::TensorDesc to be converted @@ -62,13 +64,6 @@ class MemoryDescUtils { */ static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc); - /** - * @brief Converts MemoryDesc to BlockedMemoryDesc - * @param desc MemoryDesc to be converted - * @return converted BlockedMemoryDesc - */ - static std::shared_ptr convertToBlockedMemoryDesc(const std::shared_ptr &desc); - /** * @brief Creates InferenceEngine::Blob from Memory with the memory reuse * @param desc Memory from which will be created InferenceEngine::Blob @@ -89,6 +84,7 @@ class MemoryDescUtils { * @return converted InferenceEngine::TensorDesc */ static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); + OPENVINO_SUPPRESS_DEPRECATED_END static constexpr Dim DEFAULT_DUMMY_VAL = 64; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f6dc2284200961..2c3749f2b08dda 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -23,7 +23,6 @@ #include "nodes/eltwise.h" #include "nodes/matmul.h" #include "nodes/fullyconnected.h" -#include "nodes/generic.h" #include "nodes/if.h" #include "nodes/input.h" #include "nodes/lrn.h" @@ -855,13 +854,11 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { internalBlobs.size()); } - const auto &internalBlob = internalBlobs[indx]; + const auto& internalBlob = internalBlobs[indx]; - auto create = [&] () { - // TODO [DS]: internal blobs should be removed or rewritten using Memory object - auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); - - Memory memory{engine, newDesc, internalBlob->buffer()}; + auto create = [&]() { + auto newDesc = internalBlob->getDescPtr(); + Memory memory{engine, newDesc, internalBlob->getData()}; MemoryPtr _ptr = std::make_shared(engine, intDesc); node::Reorder::reorderData(memory, *_ptr, context->getParamsCache()); @@ -872,12 +869,13 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { auto weightCache = context->getWeightsCache(); if (weightCache != nullptr && memory::format_kind::blocked == intDesc->getDnnlDesc().get_format_kind()) { const auto& format = intDesc->serializeFormat(); - const uint64_t data_hash = weightCache->GetHashFunc().hash( - internalBlob->buffer(), internalBlob->byteSize()); + const uint64_t data_hash = + weightCache->GetHashFunc().hash(static_cast(internalBlob->getData()), + internalBlob->getSize()); const std::string string_hash = name + "_" + std::to_string(indx) + "_" + format - + "_" + std::to_string(internalBlob->byteSize()) + + "_" + std::to_string(internalBlob->getSize()) + "_" + std::to_string(data_hash); ptr = *weightCache->findOrCreate(string_hash, create); @@ -1254,24 +1252,22 @@ bool Node::isFusedWith(Type fusedNodeType) const { return false; } -InferenceEngine::Layout Node::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) { +dnnl::memory::format_tag Node::getWeightsFormatTagByDims(const SizeVector& dims) const { switch (dims.size()) { - case 0: - return InferenceEngine::Layout::SCALAR; - case 1: - return InferenceEngine::Layout::C; - case 2: - return InferenceEngine::Layout::NC; - case 3: - return InferenceEngine::Layout::CHW; - case 4: - return InferenceEngine::Layout::OIHW; - case 5: - return isGrouped ? InferenceEngine::Layout::GOIHW : InferenceEngine::Layout::OIDHW; - case 6: - return isGrouped ? InferenceEngine::Layout::GOIDHW : InferenceEngine::Layout::BLOCKED; - default: - return InferenceEngine::Layout::BLOCKED; + case 1: + return dnnl::memory::format_tag::a; + case 2: + return dnnl::memory::format_tag::ab; + case 3: + return dnnl::memory::format_tag::abc; + case 4: + return dnnl::memory::format_tag::abcd; + case 5: + return dnnl::memory::format_tag::abcde; + case 6: + return dnnl::memory::format_tag::abcdef; + default: + OPENVINO_THROW("getWeightsFormatTagByDims doesn't support dims.size() = ", dims.size()); } } diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 0e56f0632aa9fb..0d998c39d889a1 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -526,7 +526,7 @@ class Node { return outputShapes[port]; } - const std::vector& getInternalBlobs() const { + const std::vector& getInternalBlobs() const { return internalBlobs; } @@ -606,7 +606,7 @@ class Node { }; mutable InPlaceType inplace = InPlaceType::Unknown; ConstantType constant = ConstantType::Unknown; - std::vector internalBlobs; + std::vector internalBlobs; std::vector internalBlobMemory; std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; @@ -633,7 +633,7 @@ class Node { virtual std::vector getAvailableFormatsForDims(const Shape& dims) const; - InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped); + dnnl::memory::format_tag getWeightsFormatTagByDims(const InferenceEngine::SizeVector& dims) const; /** * @brief Auxiliary function to get node input precisions diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 9c836c7a16c6aa..f235688f533c57 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -105,10 +105,6 @@ void AdaptivePooling::initSupportedPrimitiveDescriptors() { // we supports only fp32 currently precision = ov::element::f32; - InferenceEngine::LayerConfig config; - config.inConfs.resize(2); - config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2)); - std::vector dataFormats{ LayoutType::ncsp }; const auto &inDims = getInputShapeAtPort(0).getDims(); if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] != 1) { diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index d605f5271da6e6..705c825b31b282 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1193,35 +1193,6 @@ bool Convolution::isNspcAvailable() const { return true; } -InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) { - const auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(edgeNum)->getParent()); - if (!constNode) { - OPENVINO_THROW("Cannot cast ", edgeNum, " input to Input node for ", getName(), "."); - } - auto blb = constNode->getMemoryPtr(); - if (blb == nullptr) - OPENVINO_THROW("Cannot get const blob for node ", getName(), "."); - - auto const elementsCount = blb->getDescWithType()->getPaddedElementsCount(); - - InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(ov::element::f32), dims, getWeightsLayoutByDims(dims, isGrouped)); - - Blob::Ptr internalBlob = InferenceEngine::make_shared_blob(desc); - internalBlob->allocate(); - - if (internalBlob->size() != elementsCount) { - OPENVINO_THROW("Created internal blob and const blob has different size for node: ", getName(), "."); - } - - cpu_convert(blb->getData(), - internalBlob->buffer(), - DnnlExtensionUtils::DataTypeToElementType(blb->getDataType()), - InferenceEngine::details::convertPrecision(internalBlob->getTensorDesc().getPrecision()), - elementsCount); - - return internalBlob; -} - void Convolution::prepareParams() { auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr(); diff --git a/src/plugins/intel_cpu/src/nodes/conv.h b/src/plugins/intel_cpu/src/nodes/conv.h index 2279e2fc80ef67..5d7e89aa7fc009 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.h +++ b/src/plugins/intel_cpu/src/nodes/conv.h @@ -116,7 +116,6 @@ class Convolution : public Node { void SetPostOpsAndZeroPoints(std::vector &attrs); void filterSupportedDescriptors(); bool isNspcAvailable() const; - InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false); void updatePadding(); MemoryDescPtr getSumMemDesc(const dnnl::primitive_desc &primitive_desc_it); diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index a3d141ff28be98..cc1731ca596ebf 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -233,7 +233,7 @@ Deconvolution::Deconvolution(const std::shared_ptr& op, attr = std::make_shared(); } -InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::SizeVector dims) { +MemoryPtr Deconvolution::createWeiBlobAsIO(const VectorDims& dims) { auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()); if (!constNode) OPENVINO_THROW("Cannot cast const input node for node ", getName(), "."); @@ -244,7 +244,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz auto const blbSize = blb->getSize(); // WA: In int8 case, we are processing weights using internal blob. - InferenceEngine::SizeVector dimsForBlockedDesc{dims}; + VectorDims dimsForBlockedDesc{dims}; std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]); VectorDims orderForBlockedDesc; @@ -256,18 +256,15 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz for (size_t i = 2 + withGroups; i < dimsForBlockedDesc.size(); i++) orderForBlockedDesc.push_back(i); - BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc); - InferenceEngine::TensorDesc tensorDesc( - InferenceEngine::details::convertPrecision(DnnlExtensionUtils::DataTypeToElementType(blb->getDataType())), - dims, - blkDesc); - - Blob::Ptr internalBlob = InferenceEngine::make_shared_blob(tensorDesc); - internalBlob->allocate(); - char *data = internalBlob->buffer(); - if (data == nullptr) - OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), "."); - size_t intBuffSize = internalBlob->byteSize(); + auto desc = CpuBlockedMemoryDesc(DnnlExtensionUtils::DataTypeToElementType(blb->getDataType()), + Shape(dims), + dimsForBlockedDesc, + orderForBlockedDesc); + MemoryPtr mem_ptr = std::make_shared(getEngine(), desc); + if (!mem_ptr->isAllocated()) + OPENVINO_THROW("NotAllocated: Internal tensor was not allocated for node ", getName(), "."); + char* data = static_cast(mem_ptr->getData()); + size_t intBuffSize = mem_ptr->getSize(); size_t offset = blbSize; if (intBuffSize < offset) { @@ -275,7 +272,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz } cpu_memcpy_s(data, intBuffSize, blb->getData(), blbSize); - return internalBlob; + return mem_ptr; } bool Deconvolution::canBeExecutedInInt8() const { @@ -846,8 +843,7 @@ void Deconvolution::createPrimitive() { if (found) { prepareMemory({DnnlExtensionUtils::makeDescriptor(prim_desc.weights_desc(0))}); } else { - prepareMemory({std::make_shared( - MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlobs.front()->getTensorDesc()))}); + prepareMemory({internalBlobs.front()->getDescWithType()}); } } diff --git a/src/plugins/intel_cpu/src/nodes/deconv.h b/src/plugins/intel_cpu/src/nodes/deconv.h index 5477feadc4ffc6..8043705d7e4152 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.h +++ b/src/plugins/intel_cpu/src/nodes/deconv.h @@ -118,7 +118,7 @@ class Deconvolution : public Node { std::string errorPrefix; - InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims); + MemoryPtr createWeiBlobAsIO(const VectorDims& dims); }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/generic.h b/src/plugins/intel_cpu/src/nodes/generic.h index dd78515b2922b3..43408d826cd8b1 100644 --- a/src/plugins/intel_cpu/src/nodes/generic.h +++ b/src/plugins/intel_cpu/src/nodes/generic.h @@ -18,7 +18,7 @@ namespace node { class Generic : public Node { public: - Generic(const std::shared_ptr& op, const GraphContext::CPtr context); + Generic(const std::shared_ptr& op, const GraphContext::CPtr context); ~Generic() = default; void getSupportedDescriptors() override; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 44fba765ceffc7..d7e4d204e64354 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -746,24 +746,23 @@ void RNN::fillSequenceDesc() { template void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) { - const auto& weightPrec = DnnlExtensionUtils::DataTypeToElementType(inDataTypes[wIdx]); - const auto& targetWeightPrec = DnnlExtensionUtils::DataTypeToElementType(weightsByinputDataType.at(inDataTypes[xIdx])); + const auto& weightPrec = DnnlExtensionUtils::DataTypeToElementType(inDataTypes[wIdx]); + const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]); + const auto& targetWeightPrec = DnnlExtensionUtils::DataTypeToElementType(targetWeightDataType); // create weight blobs (data and state part) - const VectorDims dims_w = { L, D, DC, G, SC }; - TensorDesc w_data_desc(InferenceEngine::details::convertPrecision(targetWeightPrec), dims_w, getWeightsLayoutByDims(dims_w, false)); + const VectorDims dims_w = {L, D, DC, G, SC}; + auto w_data_desc = DnnlBlockedMemoryDesc(Shape(dims_w), targetWeightDataType, getWeightsFormatTagByDims(dims_w)); + MemoryPtr w_data_mem = std::make_shared(getEngine(), w_data_desc); + auto w_ptr = static_cast(w_data_mem->getData()); - Blob::Ptr w_data_mem = make_shared_blob(w_data_desc); - w_data_mem->allocate(); - auto w_ptr = static_cast(w_data_mem->buffer()); if (w_ptr == nullptr) OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), "."); - const VectorDims dims_s = { L, D, SC, G, SC }; - TensorDesc w_state_desc(InferenceEngine::details::convertPrecision(targetWeightPrec), dims_s, getWeightsLayoutByDims(dims_s, false)); - Blob::Ptr w_state_mem = make_shared_blob(w_state_desc); - w_state_mem->allocate(); - auto r_ptr = static_cast(w_state_mem->buffer()); + const VectorDims dims_s = {L, D, SC, G, SC}; + auto w_state_desc = DnnlBlockedMemoryDesc(Shape(dims_s), targetWeightDataType, getWeightsFormatTagByDims(dims_s)); + MemoryPtr w_state_mem = std::make_shared(getEngine(), w_state_desc); + auto r_ptr = static_cast(w_state_mem->getData()); if (r_ptr == nullptr) OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), "."); @@ -803,7 +802,6 @@ void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) } } } - internalBlobs.push_back(w_data_mem); internalBlobs.push_back(w_state_mem); } @@ -817,10 +815,11 @@ void RNN::fillBiases(const int *gate_map) { } VectorDims dims_b = { L, D, Gb, SC }; - TensorDesc w_bias_data_desc(InferenceEngine::details::convertPrecision(Prec), dims_b, getWeightsLayoutByDims(dims_b, false)); - Blob::Ptr w_bias_data_mem = make_shared_blob(w_bias_data_desc); - w_bias_data_mem->allocate(); - auto b_ptr = static_cast(w_bias_data_mem->buffer()); + + auto _data_type = DnnlExtensionUtils::ElementTypeToDataType(Prec); + auto w_bias_data_desc = DnnlBlockedMemoryDesc(Shape(dims_b), _data_type, getWeightsFormatTagByDims(dims_b)); + MemoryPtr w_bias_data_mem = std::make_shared(getEngine(), w_bias_data_desc); + auto b_ptr = static_cast(w_bias_data_mem->getData()); if (b_ptr == nullptr) OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), "."); diff --git a/src/plugins/intel_cpu/src/normalize_preprocess.cpp b/src/plugins/intel_cpu/src/normalize_preprocess.cpp deleted file mode 100644 index 4d1bf74484f693..00000000000000 --- a/src/plugins/intel_cpu/src/normalize_preprocess.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "normalize_preprocess.h" -#include "ie_parallel.hpp" -#include "nodes/common/cpu_memcpy.h" -#include "utils/general_utils.h" -#include "ie_ngraph_utils.hpp" - -using namespace InferenceEngine; - -namespace ov { -namespace intel_cpu { - -NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) { -} - -void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo) { - PreProcessInfo &pp = inputInfo->getPreProcess(); - size_t inChannels = pp.getNumberOfChannels(); - if (inChannels == 0) { - meanBuffer = nullptr; - return; - } - - if (!dimsEqualStrong(inChannels, inputShape.getDims()[1])) { - OPENVINO_THROW("channels mismatch between mean and input"); - } - - switch (pp.getMeanVariant()) { - case MEAN_VALUE: { - // mean and standard deviation image common value per channel (1x1xC) - meanValues.resize(inChannels); - stdScales.resize(inChannels); - - for (unsigned channel = 0; channel < inChannels; channel++) { - if (pp[channel]->stdScale == 0) { - OPENVINO_THROW("Preprocessing error: stdScale cannot be equal zero"); - } - meanValues[channel] = pp[channel]->meanValue; - stdScales[channel] = pp[channel]->stdScale; - } - } - break; - case MEAN_IMAGE: { - // since oneDNN expects all channels in the same buffer - we copy it here as it comes from different channels... - auto meanWidth = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 1]; - auto meanHeight = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 2]; - - TensorDesc desc(InferenceEngine::details::convertPrecision(ov::element::f32), {inChannels, meanHeight, meanWidth}, InferenceEngine::Layout::CHW); - - meanBuffer = make_shared_blob(desc); - - meanBuffer->allocate(); - - for (unsigned channel = 0; channel < inChannels; channel++) { - Blob::Ptr meanBlob = pp[channel]->meanData; - if (!meanBlob || InferenceEngine::details::convertPrecision(meanBlob->getTensorDesc().getPrecision()) != ov::element::f32) - OPENVINO_THROW("mean image not provided or not in Float 32"); - if (meanBlob->size() != meanHeight*meanWidth) { - OPENVINO_THROW("mean image size does not match expected network input, expecting ", - meanWidth, - " x ", - meanHeight); - } - // todo: cast to TBlob and make sure it is floats - cpu_memcpy_s(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(), - meanBlob->buffer(), meanBlob->byteSize()); - } - } - break; - - case NONE: { - // there is no mean image. So disable mean image step - meanBuffer = nullptr; - } - break; - - default: { - OPENVINO_THROW("Unsupported mean variant: ", pp.getMeanVariant()); - } - } -} - -void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout) { - OPENVINO_ASSERT(input != nullptr); - - const auto inputDims = inputShape.getStaticDims(); - if (inputDims.size() != 4) { - OPENVINO_THROW("Expecting input as 4 dimension blob with format NxCxHxW."); - } - - if (layout != NCHW && layout != NHWC) { - OPENVINO_THROW("Expecting input layout NCHW or NHWC."); - } - - int MB = inputDims[0]; - int srcSize = inputShape.getElementsCount() / MB; - - if (meanBuffer && meanBuffer->size()) { - const float * meanBufferValues = meanBuffer->readOnly(); - - parallel_for2d(MB, srcSize, [&](int mb, int i) { - input[srcSize * mb + i] -= meanBufferValues[i]; - }); - } else if (!meanValues.empty() && !stdScales.empty()) { - int C = inputDims[1]; - srcSize /= inputDims[1]; - - if (layout == NCHW) { - parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) { - input[mb * C * srcSize + c * srcSize + i] -= meanValues[c]; - input[mb * C * srcSize + c * srcSize + i] /= stdScales[c]; - }); - } else if (layout == NHWC) { - parallel_for2d(MB, srcSize, [&](int mb, int i) { - for (int c = 0; c < C; c++) { - input[mb * srcSize * C + i * C + c] -= meanValues[c]; - input[mb * srcSize * C + i * C + c] /= stdScales[c]; - } - }); - } - } else { - OPENVINO_THROW("Preprocessing error: meanValues and stdScales arrays are inconsistent."); - } -} - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/normalize_preprocess.h b/src/plugins/intel_cpu/src/normalize_preprocess.h deleted file mode 100644 index a6d03cfcad5297..00000000000000 --- a/src/plugins/intel_cpu/src/normalize_preprocess.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "ie_input_info.hpp" - -#include "cpu_shape.h" -#include "ie_parallel.hpp" -#include -#include - -namespace ov { -namespace intel_cpu { - -class NormalizePreprocess { -public: - NormalizePreprocess(); - -public: - void Load(const Shape& inputShape, InferenceEngine::InputInfo::Ptr inputInfo); - void NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout); - - template::value>::type* = nullptr> - void NormalizeImage(const Shape &inputShape, T *input, InferenceEngine::Layout layout) { - OPENVINO_ASSERT(input != nullptr); - - const auto inputDims = inputShape.getStaticDims(); - if (inputDims.size() != 4) { - OPENVINO_THROW("Expecting input as 4 dimension blob with format NxCxHxW."); - } - - if (layout != InferenceEngine::NCHW && layout != InferenceEngine::NHWC) { - OPENVINO_THROW("Expecting input layout NCHW or NHWC."); - } - - int MB = inputDims[0]; - int srcSize = inputShape.getElementsCount() / MB; - - if (meanBuffer && meanBuffer->size()) { - const float * meanBufferValues = meanBuffer->readOnly(); - - InferenceEngine::parallel_for2d(MB, srcSize, [&](int mb, int i) { - int buf = input[srcSize * mb + i]; - buf -= meanBufferValues[i]; - if (buf < (std::numeric_limits::min)()) buf = (std::numeric_limits::min)(); - if (buf > (std::numeric_limits::max)()) buf = (std::numeric_limits::max)(); - input[srcSize * mb + i] = buf; - }); - } else if (!meanValues.empty() && !stdScales.empty()) { - int C = inputDims[1]; - srcSize /= inputDims[1]; - - for (int c = 0; c < C; c++) { - if (stdScales[c] != 1) - OPENVINO_THROW("Preprocessing error: fractional normalization is not supported for integer data. "); - } - - if (layout == InferenceEngine::NCHW) { - InferenceEngine::parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) { - int buf = input[srcSize * mb * C + c * srcSize + i]; - buf -= meanValues[c]; - if (buf < (std::numeric_limits::min)()) buf = (std::numeric_limits::min)(); - if (buf > (std::numeric_limits::max)()) buf = (std::numeric_limits::max)(); - input[srcSize * mb * C + c * srcSize + i] = buf; - }); - } else if (layout == InferenceEngine::NHWC) { - InferenceEngine::parallel_for2d(MB, srcSize, [&](int mb, int i) { - for (int c = 0; c < C; c++) { - int buf = input[mb * srcSize * C + i * C + c]; - buf -= meanValues[c]; - if (buf < (std::numeric_limits::min)()) buf = (std::numeric_limits::min)(); - if (buf > (std::numeric_limits::max)()) buf = (std::numeric_limits::max)(); - input[mb * srcSize * C + i * C + c] = buf; - } - }); - } - } else { - OPENVINO_THROW("Preprocessing error: meanValues and stdScales arrays are inconsistent."); - } - } - -private: - std::vector meanValues; - - std::vector stdScales; - - InferenceEngine::TBlob::Ptr meanBuffer; -}; - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 03bd79e28c85dd..7ca44a5d283d7f 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -136,7 +136,7 @@ std::mutex Engine::SchedulerGuard::mutex; std::weak_ptr Engine::SchedulerGuard::ptr; Engine::SchedulerGuard::SchedulerGuard() { -#if IE_THREAD == IE_THREAD_SEQ +#if OV_THREAD == OV_THREAD_SEQ // To save state for ACL cores in single-thread mode arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST); #else diff --git a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp index 1c607d6c805c90..c2f7e867956382 100644 --- a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp +++ b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp @@ -86,11 +86,6 @@ inline bool isPerTensorOrPerChannelBroadcastable(const VectorDims &firstInputDim return true; } -inline bool isEmptyTensorDesc(const InferenceEngine::TensorDesc &td) { - const auto dims = td.getDims(); - return std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } ); -} - /** * @brief Return precision to which given precision must be converted to be supported in plug-in * @param precision diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index ed4793ab2e88da..24d7a6d403081e 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -107,12 +107,10 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config std::string file_name = NameFromType(node->getType()) + "_" + nodeName + "_blb" + std::to_string(i) + ".ieb"; auto dump_file = config.blobDumpDir + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name; - TensorDesc desc = blb->getTensorDesc(); - if (InferenceEngine::details::convertPrecision(desc.getPrecision()) == ov::element::u1) + if (blb->getDesc().getPrecision() == ov::element::u1) continue; - MemoryPtr memory = std::make_shared(node->getEngine(), MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); - BlobDumper dumper(memory); + BlobDumper dumper(blb); dump(dumper, dump_file, config); } }