Skip to content

Commit

Permalink
[tmva][sofie] Add new Pad operator
Browse files Browse the repository at this point in the history
Add new pad operator and a corresponding test.

Fix also the parsing of Constant in case explicitly the data are not stored as
raw_data in the onnx::TensorProto class
  • Loading branch information
lmoneta committed Dec 11, 2024
1 parent 1ab6713 commit 8a7d0d4
Show file tree
Hide file tree
Showing 9 changed files with 336 additions and 9 deletions.
2 changes: 2 additions & 0 deletions tmva/sofie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
TMVA/ROperator_TopK.hxx
TMVA/ROperator_Tile.hxx
TMVA/ROperator_Split.hxx
TMVA/ROperator_SubGraph.hxx
TMVA/ROperator_Pad.hxx
TMVA/SOFIE_common.hxx
TMVA/SOFIEHelpers.hxx

Expand Down
200 changes: 200 additions & 0 deletions tmva/sofie/inc/TMVA/ROperator_Pad.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#ifndef TMVA_SOFIE_ROPERATOR_Pad
#define TMVA_SOFIE_ROPERATOR_Pad

#include "TMVA/SOFIE_common.hxx"
#include "TMVA/ROperator.hxx"
#include "TMVA/RModel.hxx"

#include <sstream>

namespace TMVA{
namespace Experimental{
namespace SOFIE{

template <typename T>
class ROperator_Pad final : public ROperator
{
public:
enum EMode { kConstant, kReflect, kEdge, kWrap };
private:

std::string fNX;
std::string fNP;
std::string fNCV;
std::string fNAX;
std::string fNY;
T fConstantValue;
EMode fMode;
std::vector<size_t> fInputShape;
std::vector<size_t> fOutputShape;
std::vector<std::pair<int64_t, int64_t>> fPads;

public:

ROperator_Pad(){}
ROperator_Pad(const std::string & nameX, const std::string & nameP, const std::string & nameCV,
const std::string & nameAX, const std::string & nameY, const std::string & mode) :
fNX(UTILITY::Clean_name(nameX)), fNP(UTILITY::Clean_name(nameP)),
fNCV(UTILITY::Clean_name(nameCV)), fNAX(UTILITY::Clean_name(nameAX)),
fNY(UTILITY::Clean_name(nameY))
{
fMode = kConstant;
if (mode == "constant")
fMode = kConstant;
else if (mode == "reflect")
fMode = kReflect;
else if (mode == "edge")
fMode = kEdge;
else if (mode == "wrap")
fMode = kWrap;
}

std::vector<ETensorType> TypeInference(std::vector<ETensorType> input){
return input;
}

std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input){
auto ret = input; //suggest copy to compiler
return ret;
}

void Initialize(RModel& model){
if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor
throw std::runtime_error("TMVA SOFIE Pad Op Input Tensor is not found in model");
}

fInputShape = model.GetTensorShape(fNX);

if (fMode != EMode::kConstant) {
throw std::runtime_error("TMVA SOFIE Pad Op supports now only Constant mode");
}

// get pads data
int64_t * padsData = nullptr;
if (model.IsInitializedTensor(fNP)) {
padsData = static_cast<int64_t*>(model.GetInitializedTensorData(fNP).get());
} else {
throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Pads data");
}
// get constant value
fConstantValue = 0;
if (!fNCV.empty()) {
if (model.IsInitializedTensor(fNCV)) {
T * cData = static_cast<T*>(model.GetInitializedTensorData(fNCV).get());
fConstantValue = cData[0];
} else {
throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Constant Value data");
}
}
std::vector<int64_t> axes;
if (!fNAX.empty()) {
if (model.IsInitializedTensor(fNAX)) {
auto shape = model.GetTensorShape(fNAX);
// it should be a 1D tensor
size_t nax = shape[0];
// switch types
if (model.GetTensorType(fNAX) == ETensorType::INT64) {
auto data = static_cast<int64_t*>(model.GetInitializedTensorData(fNAX).get());
axes = std::vector<int64_t>(data, data + nax);
} else if (model.GetTensorType(fNAX) == ETensorType::INT32) {
auto data = static_cast<int32_t*>(model.GetInitializedTensorData(fNAX).get());
axes.resize(nax);
for (size_t i = 0; i < nax; i++)
axes[i] = data[i];
} else {
throw std::runtime_error("TMVA SOFIE Pad Op invalid input Axes type");
}
} else {
throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Axes data");
}
}


fOutputShape = fInputShape;
size_t axesSize = axes.size();
if (axesSize == 0) {
for (size_t i = 0; i < fInputShape.size(); i++) {
axes.push_back(i);
}
axesSize = fInputShape.size();
}
fPads.resize(fInputShape.size());
for (size_t i = 0; i < fInputShape.size(); i++) {
if (axes[i] < 0) axes[i] += fInputShape.size();
if (axes[i] == int64_t(i)) {
fPads[i].first = padsData[i];
fPads[i].second = padsData[axesSize + i];
int64_t outDim = static_cast<int64_t>(fOutputShape[i]) + fPads[i].first + fPads[i].second;
if (outDim < 0)
throw std::runtime_error("TMVA SOFIE Pad Op : invalid Pads values");
fOutputShape[i] = outDim;
}
}

model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fOutputShape);

if (model.Verbose()) {
std::cout << "initializing Pad operator with pads .. : ";
for (auto & p : fPads)
std::cout << "{ " << p.first << " , " << p.second << "} ";
std::cout << std::endl;
std::cout << "Pad: " << fNX << " " << ConvertShapeToString(fInputShape) << " -> " << fNY << " with shape " << ConvertShapeToString(fOutputShape)
<< std::endl;
}

}


std::string Generate(std::string OpName){
OpName = "op_" + OpName;
if (fOutputShape.empty()){
throw std::runtime_error("TMVA SOFIE Operator Pad called to Generate without being initialized first");
}
std::stringstream out;
auto inputStride = UTILITY::ComputeStrideFromShape(fInputShape);
auto outStride = UTILITY::ComputeStrideFromShape(fOutputShape);
out << "\n//------ Pad\n";
// fill first output tensor with the constant values
int length = ConvertShapeToLength(fOutputShape);
int dims = fOutputShape.size();
out << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << ","
<< fConstantValue << ");\n";

// copy now data from input tensor in output ones
for (int i = 0; i < dims; i++) {
for (int j = 1; j < i; j++) out << SP;
out << "for (int id" << i << " = 0; id" << i << " < " << fInputShape[i] << "; id"
<< i << "++) {\n";
}
// compute index from strides
//linear_index = i_1 * stride[0] + i_2 * stride[1] + ... + i_N * stride[N-1]
for (int j = 0; j < dims; j++) out << SP;
out << "tensor_" << fNY << "[";
for (int i = 0; i < dims; i++) {
out << "(id" << i;
if (fPads[i].first != 0) out << " + " << fPads[i].first;
out << ")";
if (i < dims-1) out << " * " << outStride[i] << " + ";
}
out << "] =\n tensor_" << fNX << "[";
for (int i = 0; i < dims; i++) {
out << "id" << i;
if (i < dims-1) out << " * " << inputStride[i] << " + ";
}
out << "];\n";
for (int i = dims-1; i >= 0; i--) {
for (int j = 1; j < i; j++) out << SP;
out << "}\n";
}

return out.str();
}

};

}//SOFIE
}//Experimental
}//TMVA


#endif //TMVA_SOFIE_ROPERATOR_Swish
4 changes: 2 additions & 2 deletions tmva/sofie/inc/TMVA/ROperator_Split.hxx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef TMVA_SOFIE_ROPERATOR_Swish
#define TMVA_SOFIE_ROPERATOR_Swish
#ifndef TMVA_SOFIE_ROPERATOR_Split
#define TMVA_SOFIE_ROPERATOR_Split

#include "TMVA/SOFIE_common.hxx"
#include "TMVA/ROperator.hxx"
Expand Down
19 changes: 19 additions & 0 deletions tmva/sofie/test/TestCustomModelsFromONNX.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@
#include "Tile5D_FromONNX.hxx"
#include "input_models/references/Tile5D.ref.hxx"

#include "Pad_FromONNX.hxx"

#include "gtest/gtest.h"

constexpr float DEFAULT_TOLERANCE = 1e-3f;
Expand Down Expand Up @@ -2898,4 +2900,21 @@ TEST(ONNX, Tile5D) {
for (size_t i = 0; i < output.size(); ++i) {
EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE);
}
}
TEST(ONNX, Pad) {
// add constant pad values of zeros
// input tensor [1,2,2] and pad in (1,0),(0,1),(2,1) -> with shape (2,3,5)
std::vector<float> input = {1,2,3,4};
std::vector<float> correct = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 0};
TMVA_SOFIE_Pad::Session s("Pad_FromONNX.dat");
std::vector<float> output(s.infer(input.data()));

// Checking the output size
EXPECT_EQ(output.size(), correct.size());

// Checking every output value, one by one
for (size_t i = 0; i < output.size(); i++) {
EXPECT_EQ(output[i], correct[i]);
}
}
Binary file added tmva/sofie/test/input_models/Pad.onnx
Binary file not shown.
1 change: 1 addition & 0 deletions tmva/sofie_parsers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofieParser
src/ParseTile.cxx
src/ParseSplit.cxx
src/ParseIf.cxx
src/ParsePad.cxx
${PROTO_SRCS}
LIBRARIES PUBLIC
protobuf::libprotobuf
Expand Down
46 changes: 40 additions & 6 deletions tmva/sofie_parsers/src/ParseConstant.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,56 @@ ParserFuncSignature ParseConstant = [](RModelParser_ONNX &parser, const onnx::No
throw std::runtime_error("TMVA::SOFIE ONNX Parser ConstantOfShape has invalid tensor size " + std::to_string(length));
}
switch(output_type) {
// need to use raw_data() to get the tensor values
// to get the tensor values one needs to use the given data types or the raw_data.
// it depends how the operator was created. We cannot get size of the raw_data
case ETensorType::INT32: {
std::vector<int32_t> values(length);
if (t.int32_data_size() == int(length)) {
for (size_t i = 0; i < length; i++)
values[i] = t.int32_data(i);
} else {
auto raw_data_ptr = reinterpret_cast<int32_t *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(int32_t));
}
op.reset(new ROperator_Constant<int32_t>("int32_t", values, shape, input_name, output_name));
break;
}
case ETensorType::INT64: {
std::vector<int64_t> values(length);
// case empty shape with length=1 represents scalars
auto raw_data_ptr = reinterpret_cast<int64_t *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(int64_t));
if (t.int64_data_size() == int(length)) {
for (size_t i = 0; i < length; i++)
values[i] = t.int64_data(i);
} else { // cannot get size of raw data : assume is ok
auto raw_data_ptr = reinterpret_cast<int64_t *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(int64_t));
}
op.reset(new ROperator_Constant<int64_t>("int64_t", values, shape, input_name, output_name));
break;
}
case ETensorType::FLOAT: {
std::vector<float> values(length);
auto raw_data_ptr = reinterpret_cast<float *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(float));
if (t.float_data_size() == int(length)) {
for (size_t i = 0; i < length; i++)
values[i] = t.float_data(i);
} else {
auto raw_data_ptr = reinterpret_cast<float *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(float));
}
op.reset(new ROperator_Constant<float>("float",values, shape, input_name, output_name));
break;
}
case ETensorType::DOUBLE: {
std::vector<double> values(length);
if (t.double_data_size() == int(length)) {
for (size_t i = 0; i < length; i++)
values[i] = t.double_data(i);
} else {
auto raw_data_ptr = reinterpret_cast<double *>(const_cast<char *>(t.raw_data().c_str()));
std::memcpy(values.data(), raw_data_ptr, length * sizeof(double));
}
op.reset(new ROperator_Constant<double>("double",values, shape, input_name, output_name));
break;
}
case ETensorType::BOOL: {
std::vector<bool> values(length);
auto raw_data_ptr = reinterpret_cast<bool *>(const_cast<char *>(t.raw_data().c_str()));
Expand Down
69 changes: 69 additions & 0 deletions tmva/sofie_parsers/src/ParsePad.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "TMVA/RModelParser_ONNX.hxx"
#include "TMVA/ROperator_Pad.hxx"
#include "onnx_proto3.pb.h"

namespace TMVA {
namespace Experimental {
namespace SOFIE {

ParserFuncSignature ParsePad = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) {
ETensorType input_type;

std::string input_name = nodeproto.input(0);
if (parser.IsRegisteredTensorType(input_name)) {
input_type = parser.GetTensorType(input_name);
} else {
throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has input tensor" + input_name +
" but its type is not yet registered");
}

if (nodeproto.input_size() < 2) {
throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has invalid input size < 2");
}

// pads is second inputs
std::string pads_name = nodeproto.input(1);
if (!parser.IsRegisteredTensorType(pads_name)) {
throw std::runtime_error("TMVA::SOFIE ONNX Parser Pad op has input tensor" + pads_name +
" but its type is not yet registered");
}
// in case of optional inputs
std::string cvalue_name;
if (nodeproto.input_size() > 2) {
cvalue_name = nodeproto.input(2);
}
std::string axes_name;
if (nodeproto.input_size() > 3) {
axes_name = nodeproto.input(3);
}

// get attributes
std::string mode = "constant";
if (nodeproto.attribute_size() > 0 ) {
std::string attribute_name = nodeproto.attribute(0).name();
if (attribute_name == "mode") {
mode = nodeproto.attribute(0).s();
}
}
std::string output_name = nodeproto.output(0);

std::unique_ptr<ROperator> op;
switch (input_type) {
case ETensorType::FLOAT:
op.reset(new ROperator_Pad<float>(input_name, pads_name, cvalue_name, axes_name, output_name, mode));
break;
default:
throw std::runtime_error("TMVA::SOFIE - Unsupported - Operator Pad does not yet support input type " +
std::to_string(static_cast<int>(input_type)));
}

if (!parser.IsRegisteredTensorType(output_name)) {
parser.RegisterTensorType(output_name, input_type);
}

return op;
};

} // namespace SOFIE
} // namespace Experimental
} // namespace TMVA
Loading

0 comments on commit 8a7d0d4

Please sign in to comment.