From 907507745b87f3e9161255eaddddb371a5c2e16b Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 11 Dec 2024 14:09:15 +0100 Subject: [PATCH] [tmva][pymva] Add new Pad operator Add new pad operator and a corresponding test. Fix also the parsing of Constant in case explicitly the data are not stored as raw_data in the onnx::TensorProto class --- tmva/sofie/CMakeLists.txt | 2 + tmva/sofie/inc/TMVA/ROperator_Pad.hxx | 200 +++++++++++++++++++ tmva/sofie/inc/TMVA/ROperator_Split.hxx | 4 +- tmva/sofie/test/TestCustomModelsFromONNX.cxx | 19 ++ tmva/sofie/test/input_models/Pad.onnx | Bin 0 -> 211 bytes tmva/sofie_parsers/CMakeLists.txt | 1 + tmva/sofie_parsers/src/ParseConstant.cxx | 46 ++++- tmva/sofie_parsers/src/ParsePad.cxx | 69 +++++++ tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 4 +- 9 files changed, 336 insertions(+), 9 deletions(-) create mode 100644 tmva/sofie/inc/TMVA/ROperator_Pad.hxx create mode 100644 tmva/sofie/test/input_models/Pad.onnx create mode 100644 tmva/sofie_parsers/src/ParsePad.cxx diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt index 198c24d2e9b16..35d41f7159d42 100644 --- a/tmva/sofie/CMakeLists.txt +++ b/tmva/sofie/CMakeLists.txt @@ -52,6 +52,8 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie TMVA/ROperator_TopK.hxx TMVA/ROperator_Tile.hxx TMVA/ROperator_Split.hxx + TMVA/ROperator_SubGraph.hxx + TMVA/ROperator_Pad.hxx TMVA/SOFIE_common.hxx TMVA/SOFIEHelpers.hxx diff --git a/tmva/sofie/inc/TMVA/ROperator_Pad.hxx b/tmva/sofie/inc/TMVA/ROperator_Pad.hxx new file mode 100644 index 0000000000000..7815851808796 --- /dev/null +++ b/tmva/sofie/inc/TMVA/ROperator_Pad.hxx @@ -0,0 +1,200 @@ +#ifndef TMVA_SOFIE_ROPERATOR_Pad +#define TMVA_SOFIE_ROPERATOR_Pad + +#include "TMVA/SOFIE_common.hxx" +#include "TMVA/ROperator.hxx" +#include "TMVA/RModel.hxx" + +#include + +namespace TMVA{ +namespace Experimental{ +namespace SOFIE{ + +template +class ROperator_Pad final : public ROperator +{ +public: + enum EMode { kConstant, kReflect, kEdge, kWrap }; +private: + + std::string fNX; + std::string fNP; + std::string fNCV; + std::string fNAX; + std::string fNY; + T fConstantValue; + EMode fMode; + std::vector fInputShape; + std::vector fOutputShape; + std::vector> fPads; + +public: + + ROperator_Pad(){} + ROperator_Pad(const std::string & nameX, const std::string & nameP, const std::string & nameCV, + const std::string & nameAX, const std::string & nameY, const std::string & mode) : + fNX(UTILITY::Clean_name(nameX)), fNP(UTILITY::Clean_name(nameP)), + fNCV(UTILITY::Clean_name(nameCV)), fNAX(UTILITY::Clean_name(nameAX)), + fNY(UTILITY::Clean_name(nameY)) + { + fMode = kConstant; + if (mode == "constant") + fMode = kConstant; + else if (mode == "reflect") + fMode = kReflect; + else if (mode == "edge") + fMode = kEdge; + else if (mode == "wrap") + fMode = kWrap; + } + + std::vector TypeInference(std::vector input){ + return input; + } + + std::vector> ShapeInference(std::vector> input){ + auto ret = input; //suggest copy to compiler + return ret; + } + + void Initialize(RModel& model){ + if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor + throw std::runtime_error("TMVA SOFIE Pad Op Input Tensor is not found in model"); + } + + fInputShape = model.GetTensorShape(fNX); + + if (fMode != EMode::kConstant) { + throw std::runtime_error("TMVA SOFIE Pad Op supports now only Constant mode"); + } + + // get pads data + int64_t * padsData = nullptr; + if (model.IsInitializedTensor(fNP)) { + padsData = static_cast(model.GetInitializedTensorData(fNP).get()); + } else { + throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Pads data"); + } + // get constant value + fConstantValue = 0; + if (!fNCV.empty()) { + if (model.IsInitializedTensor(fNCV)) { + T * cData = static_cast(model.GetInitializedTensorData(fNCV).get()); + fConstantValue = cData[0]; + } else { + throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Constant Value data"); + } + } + std::vector axes; + if (!fNAX.empty()) { + if (model.IsInitializedTensor(fNAX)) { + auto shape = model.GetTensorShape(fNAX); + // it should be a 1D tensor + size_t nax = shape[0]; + // switch types + if (model.GetTensorType(fNAX) == ETensorType::INT64) { + auto data = static_cast(model.GetInitializedTensorData(fNAX).get()); + axes = std::vector(data, data + nax); + } else if (model.GetTensorType(fNAX) == ETensorType::INT32) { + auto data = static_cast(model.GetInitializedTensorData(fNAX).get()); + axes.resize(nax); + for (size_t i = 0; i < nax; i++) + axes[i] = data[i]; + } else { + throw std::runtime_error("TMVA SOFIE Pad Op invalid input Axes type"); + } + } else { + throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Axes data"); + } + } + + + fOutputShape = fInputShape; + size_t axesSize = axes.size(); + if (axesSize == 0) { + for (size_t i = 0; i < fInputShape.size(); i++) { + axes.push_back(i); + } + axesSize = fInputShape.size(); + } + fPads.resize(fInputShape.size()); + for (size_t i = 0; i < fInputShape.size(); i++) { + if (axes[i] < 0) axes[i] += fInputShape.size(); + if (axes[i] == int64_t(i)) { + fPads[i].first = padsData[i]; + fPads[i].second = padsData[axesSize + i]; + int64_t outDim = static_cast(fOutputShape[i]) + fPads[i].first + fPads[i].second; + if (outDim < 0) + throw std::runtime_error("TMVA SOFIE Pad Op : invalid Pads values"); + fOutputShape[i] = outDim; + } + } + + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fOutputShape); + + if (model.Verbose()) { + std::cout << "initializing Pad operator with pads .. : "; + for (auto & p : fPads) + std::cout << "{ " << p.first << " , " << p.second << "} "; + std::cout << std::endl; + std::cout << "Pad: " << fNX << " " << ConvertShapeToString(fInputShape) << " -> " << fNY << " with shape " << ConvertShapeToString(fOutputShape) + << std::endl; + } + + } + + + std::string Generate(std::string OpName){ + OpName = "op_" + OpName; + if (fOutputShape.empty()){ + throw std::runtime_error("TMVA SOFIE Operator Pad called to Generate without being initialized first"); + } + std::stringstream out; + auto inputStride = UTILITY::ComputeStrideFromShape(fInputShape); + auto outStride = UTILITY::ComputeStrideFromShape(fOutputShape); + out << "\n//------ Pad\n"; + // fill first output tensor with the constant values + int length = ConvertShapeToLength(fOutputShape); + int dims = fOutputShape.size(); + out << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << "," + << fConstantValue << ");\n"; + + // copy now data from input tensor in output ones + for (int i = 0; i < dims; i++) { + for (int j = 1; j < i; j++) out << SP; + out << "for (int id" << i << " = 0; id" << i << " < " << fInputShape[i] << "; id" + << i << "++) {\n"; + } + // compute index from strides + //linear_index = i_1 * stride[0] + i_2 * stride[1] + ... + i_N * stride[N-1] + for (int j = 0; j < dims; j++) out << SP; + out << "tensor_" << fNY << "["; + for (int i = 0; i < dims; i++) { + out << "(id" << i; + if (fPads[i].first != 0) out << " + " << fPads[i].first; + out << ")"; + if (i < dims-1) out << " * " << outStride[i] << " + "; + } + out << "] =\n tensor_" << fNX << "["; + for (int i = 0; i < dims; i++) { + out << "id" << i; + if (i < dims-1) out << " * " << inputStride[i] << " + "; + } + out << "];\n"; + for (int i = dims-1; i >= 0; i--) { + for (int j = 1; j < i; j++) out << SP; + out << "}\n"; + } + + return out.str(); + } + +}; + +}//SOFIE +}//Experimental +}//TMVA + + +#endif //TMVA_SOFIE_ROPERATOR_Swish diff --git a/tmva/sofie/inc/TMVA/ROperator_Split.hxx b/tmva/sofie/inc/TMVA/ROperator_Split.hxx index 2218f5f7187e2..d3aa0a3604c5b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Split.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Split.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_Swish -#define TMVA_SOFIE_ROPERATOR_Swish +#ifndef TMVA_SOFIE_ROPERATOR_Split +#define TMVA_SOFIE_ROPERATOR_Split #include "TMVA/SOFIE_common.hxx" #include "TMVA/ROperator.hxx" diff --git a/tmva/sofie/test/TestCustomModelsFromONNX.cxx b/tmva/sofie/test/TestCustomModelsFromONNX.cxx index 8be28073c5ce0..877a4515bf1b3 100644 --- a/tmva/sofie/test/TestCustomModelsFromONNX.cxx +++ b/tmva/sofie/test/TestCustomModelsFromONNX.cxx @@ -300,6 +300,8 @@ #include "Tile5D_FromONNX.hxx" #include "input_models/references/Tile5D.ref.hxx" +#include "Pad_FromONNX.hxx" + #include "gtest/gtest.h" constexpr float DEFAULT_TOLERANCE = 1e-3f; @@ -2898,4 +2900,21 @@ TEST(ONNX, Tile5D) { for (size_t i = 0; i < output.size(); ++i) { EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); } +} +TEST(ONNX, Pad) { + // add constant pad values of zeros + // input tensor [1,2,2] and pad in (1,0),(0,1),(2,1) -> with shape (2,3,5) + std::vector input = {1,2,3,4}; + std::vector correct = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 3, + 4, 0, 0, 0, 0, 0, 0, 0}; + TMVA_SOFIE_Pad::Session s("Pad_FromONNX.dat"); + std::vector output(s.infer(input.data())); + + // Checking the output size + EXPECT_EQ(output.size(), correct.size()); + + // Checking every output value, one by one + for (size_t i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i], correct[i]); + } } \ No newline at end of file diff --git a/tmva/sofie/test/input_models/Pad.onnx b/tmva/sofie/test/input_models/Pad.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3e2c0d8af8b0cce9edbc4d6136812ddb4fa23d52 GIT binary patch literal 211 zcmdQ<_?=#NnKuS6q^qSE8lH#R?YF zlHy values(length); + if (t.int32_data_size() == int(length)) { + for (size_t i = 0; i < length; i++) + values[i] = t.int32_data(i); + } else { + auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); + std::memcpy(values.data(), raw_data_ptr, length * sizeof(int32_t)); + } + op.reset(new ROperator_Constant("int32_t", values, shape, input_name, output_name)); + break; + } case ETensorType::INT64: { std::vector values(length); - // case empty shape with length=1 represents scalars - auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); - std::memcpy(values.data(), raw_data_ptr, length * sizeof(int64_t)); + if (t.int64_data_size() == int(length)) { + for (size_t i = 0; i < length; i++) + values[i] = t.int64_data(i); + } else { // cannot get size of raw data : assume is ok + auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); + std::memcpy(values.data(), raw_data_ptr, length * sizeof(int64_t)); + } op.reset(new ROperator_Constant("int64_t", values, shape, input_name, output_name)); break; } case ETensorType::FLOAT: { std::vector values(length); - auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); - std::memcpy(values.data(), raw_data_ptr, length * sizeof(float)); + if (t.float_data_size() == int(length)) { + for (size_t i = 0; i < length; i++) + values[i] = t.float_data(i); + } else { + auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); + std::memcpy(values.data(), raw_data_ptr, length * sizeof(float)); + } op.reset(new ROperator_Constant("float",values, shape, input_name, output_name)); break; } + case ETensorType::DOUBLE: { + std::vector values(length); + if (t.double_data_size() == int(length)) { + for (size_t i = 0; i < length; i++) + values[i] = t.double_data(i); + } else { + auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); + std::memcpy(values.data(), raw_data_ptr, length * sizeof(double)); + } + op.reset(new ROperator_Constant("double",values, shape, input_name, output_name)); + break; + } case ETensorType::BOOL: { std::vector values(length); auto raw_data_ptr = reinterpret_cast(const_cast(t.raw_data().c_str())); diff --git a/tmva/sofie_parsers/src/ParsePad.cxx b/tmva/sofie_parsers/src/ParsePad.cxx new file mode 100644 index 0000000000000..9b7f4dde2e8a7 --- /dev/null +++ b/tmva/sofie_parsers/src/ParsePad.cxx @@ -0,0 +1,69 @@ +#include "TMVA/RModelParser_ONNX.hxx" +#include "TMVA/ROperator_Pad.hxx" +#include "onnx_proto3.pb.h" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +ParserFuncSignature ParsePad = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + ETensorType input_type; + + std::string input_name = nodeproto.input(0); + if (parser.IsRegisteredTensorType(input_name)) { + input_type = parser.GetTensorType(input_name); + } else { + throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has input tensor" + input_name + + " but its type is not yet registered"); + } + + if (nodeproto.input_size() < 2) { + throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has invalid input size < 2"); + } + + // pads is second inputs + std::string pads_name = nodeproto.input(1); + if (!parser.IsRegisteredTensorType(pads_name)) { + throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has input tensor" + pads_name + + " but its type is not yet registered"); + } + // in case of optional inputs + std::string cvalue_name; + if (nodeproto.input_size() > 2) { + cvalue_name = nodeproto.input(2); + } + std::string axes_name; + if (nodeproto.input_size() > 3) { + axes_name = nodeproto.input(3); + } + + // get attributes + std::string mode = "constant"; + if (nodeproto.attribute_size() > 0 ) { + std::string attribute_name = nodeproto.attribute(0).name(); + if (attribute_name == "mode") { + mode = nodeproto.attribute(0).s(); + } + } + std::string output_name = nodeproto.output(0); + + std::unique_ptr op; + switch (input_type) { + case ETensorType::FLOAT: + op.reset(new ROperator_Pad(input_name, pads_name, cvalue_name, axes_name, output_name, mode)); + break; + default: + throw std::runtime_error("TMVA::SOFIE - Unsupported - Operator Pad does not yet support input type " + + std::to_string(static_cast(input_type))); + } + + if (!parser.IsRegisteredTensorType(output_name)) { + parser.RegisterTensorType(output_name, input_type); + } + + return op; +}; + +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index 23549646486fb..11deee51a1de7 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -79,6 +79,7 @@ extern ParserFuncSignature ParseTopK; extern ParserFuncSignature ParseTile; extern ParserFuncSignature ParseSplit; extern ParserFuncSignature ParseIf; +extern ParserFuncSignature ParsePad; // Decalaration of fused operators extern ParserFuseFuncSignature ParseFuseConvAdd; extern ParserFuseFuncSignature ParseFuseConvTransposeAdd; @@ -213,6 +214,7 @@ RModelParser_ONNX::RModelParser_ONNX() noexcept : fOperatorsMapImpl(std::make_un RegisterOperator("Tile", ParseTile); RegisterOperator("Split", ParseSplit); RegisterOperator("If", ParseIf); + RegisterOperator("Pad", ParsePad); } // Destructor of the parser @@ -367,7 +369,7 @@ void RModelParser_ONNX::CheckGraph(const onnx::GraphProto & graph, int & level, const std::string opType = node.op_type(); if (fVerbose) { std::cout << "\tOperator " << i << " : " << opType << " (" << node.name() << "), " << graph.node(i).input_size() - << " inputs : {" + << " inputs : {"; for (int j = 0; j < graph.node(i).input_size(); j++) { std::cout << graph.node(i).input(j); if (j < graph.node(i).input_size() - 1)