From 9edf0fafa307dbfcba33ef3d0b583df2ae2092bf Mon Sep 17 00:00:00 2001 From: moneta Date: Mon, 10 Nov 2025 17:30:32 +0100 Subject: [PATCH 01/12] [tmva][sofie] Apply fixes for supporting Dynamic tensors Add missing support for Dynamic tensors for some operators. With this commit a full support for dynamic tensor is available for ParticleNet model. Fix also a bug in Concat operator when the concat axis is not the first one --- tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx | 189 ++++++--- tmva/sofie/inc/TMVA/ROperator_Cast.hxx | 21 +- tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 362 +++++++++++------- tmva/sofie/inc/TMVA/ROperator_Concat.hxx | 27 +- tmva/sofie/inc/TMVA/ROperator_Constant.hxx | 4 +- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 15 +- tmva/sofie/inc/TMVA/ROperator_Gather.hxx | 9 +- tmva/sofie/inc/TMVA/ROperator_Range.hxx | 134 +++++-- tmva/sofie/inc/TMVA/ROperator_Reduce.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Reshape.hxx | 8 +- tmva/sofie/inc/TMVA/ROperator_Slice.hxx | 14 +- tmva/sofie/inc/TMVA/ROperator_Tile.hxx | 31 +- tmva/sofie/inc/TMVA/ROperator_TopK.hxx | 59 ++- tmva/sofie/inc/TMVA/SOFIE_common.hxx | 6 + tmva/sofie/src/RModel.cxx | 22 +- 15 files changed, 598 insertions(+), 305 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx index bcc0e52a40ca3..f73bd34e53386 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx @@ -23,10 +23,11 @@ struct NaryOperatorTraits { static const std::string Name() {return "Max";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0] << ";\n"; + out << res << " = std::max({ " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { - out << "\t" << "\t" << res << " = std::max(" << res << ", " << inputs[i] << ");\n"; + out << ", " << inputs[i]; } + out << "});\n"; return out.str(); } }; @@ -36,10 +37,11 @@ struct NaryOperatorTraits { static const std::string Name() {return "Min";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0] << ";\n"; + out << res << " = std::min({ " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { - out << "\t" << "\t" << res << " = std::min(" << res << ", " << inputs[i] << ");\n"; + out << ", " << inputs[i]; } + out << "});\n"; return out.str(); } }; @@ -52,7 +54,7 @@ struct NaryOperatorTraits { static const std::string Name() {return "Mean";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = (" << inputs[0]; + out << res << " = (" << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { out << " + " << inputs[i]; } @@ -66,7 +68,7 @@ struct NaryOperatorTraits { static const std::string Name() {return "Sum";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0]; + out << res << " = " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { out << " + " << inputs[i]; } @@ -83,10 +85,11 @@ private: std::vector fNInputs; std::string fNY; - std::vector> fShapeInputs; + std::vector> fShapeInputs; std::vector fNBroadcastedInputs; std::vector fShapeY; + std::vector fDimShapeY; bool fBroadcast = false; @@ -119,64 +122,164 @@ public: } void Initialize(RModel& model) override { + std::vector> inputShapes; for (auto &it : fNInputs) { if (!model.CheckIfTensorAlreadyExist(it)) { throw std::runtime_error("TMVA SOFIE BasicNary Op Input Tensor " + it + " is not found in model"); } - fShapeInputs.push_back(model.GetTensorShape(it)); + fShapeInputs.push_back(model.GetDimTensorShape(it)); + if (fNInputs.size()> 2) { + if (model.IsDimInputTensor(it)) + throw std::runtime_error("TMVA SOFIE BasicNary : supports only 2 inputs for dynamic tensors"); + else + inputShapes.push_back(model.GetTensorShape(it)); + } } // Find the common shape of the input tensors - fShapeY = UTILITY::MultidirectionalBroadcastShape(fShapeInputs); - model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY); - // Broadcasting - size_t N = fNInputs.size(); - fNBroadcastedInputs.reserve(N); - for (size_t i = 0; i < N; i++) { - if (!UTILITY::AreSameShape(model.GetTensorShape(fNInputs[i]), fShapeY)) { - fBroadcast = true; - std::string name = "Broadcasted" + fNInputs[i]; - model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY); - fNBroadcastedInputs.emplace_back("tensor_" + name); - } else { - fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]); + if (fShapeInputs.size() > 2 ) { + // support dynamic tensors now for input list of size=2 + auto shapeY = UTILITY::MultidirectionalBroadcastShape(inputShapes); + fDimShapeY = ConvertShapeToDim(shapeY); + } else if (fShapeInputs.size() == 2 ) { + auto ret = UTILITY::MultidirectionalBroadcastShape(fShapeInputs[0], fShapeInputs[1]); + // use same code as in BinaryOperator (need to extend for input sizes > 2) + fBroadcast = ret.first; + fDimShapeY = ret.second; + // case of all parametric shapes and MultiDirectionalBroadcastShape return the max of the 2 + // need to do before we declare the output tensor shape and the broadcasted ones + if (ret.first & 4) { + // check if one of the parameter is an input dimension + // define function to find this + auto IsInputDimParam = [&](const std::string &p) { + auto inputNames = model.GetInputTensorNames(); + for (auto &input : inputNames) { + for (auto &i_s : model.GetDimTensorShape(input)) { + if (i_s.isParam && i_s.param == p) + return true; + } + } + return false; + }; + auto & shapeA = fShapeInputs[0]; + auto & shapeB = fShapeInputs[1]; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + auto &s = fDimShapeY[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + if (IsInputDimParam(shapeA[i].param)) { + // case dim is 1 we indicate that the input parameter is equal to 1 + if (shapeA[i].dim != 1) + s = shapeA[i]; + else + s = shapeB[i]; + } else if (IsInputDimParam(shapeB[i].param)) { + if (shapeB[i].dim != 1) + s = shapeB[i]; + else + s = shapeA[i]; + } + } + } } + } else if (fShapeInputs.size() == 1 ) { + fDimShapeY = fShapeInputs[0]; } + if (!fShapeY.empty()) + model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY); + else + model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fDimShapeY); + + fType = ConvertTypeToString(model.GetTensorType(fNInputs[0])); + + if (model.Verbose()) { + std::cout << NaryOperatorTraits::Name() << " : "; + if (fNInputs.size() == 2) + std::cout << ConvertShapeToString(fShapeInputs[0]) << " , " + << ConvertShapeToString(fShapeInputs[1]); + std::cout << " --> " << ConvertShapeToString(fDimShapeY) << std::endl; + } } std::string Generate(std::string OpName) override { OpName = "op_" + OpName; - if (fShapeY.empty()) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE BasicNary called to Generate without being initialized first"); } std::stringstream out; - size_t length = ConvertShapeToLength(fShapeY); + auto length = ConvertDimShapeToLength(fDimShapeY); out << SP << "\n//------ BasicNary operator\n"; - if (fBroadcast) { - for (size_t i = 0; i < fNInputs.size(); i++) { - if (fNBroadcastedInputs[i] != fNInputs[i]) { - out << SP << SP << "// Broadcasting " << fNInputs[i] << " to " << ConvertShapeToString(fShapeY) << "\n"; - out << SP << SP << "{\n"; - out << SP << SP << SP << fType << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" + fNInputs[i] << ", " << ConvertShapeToString(fShapeInputs[i]); - out << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << SP << "std::copy(data, data + " << length << ", " << fNBroadcastedInputs[i] << ");\n"; - out << SP << SP << SP << "delete[] data;\n"; - out << SP << SP << "}\n"; - } - } - } - if (fNInputs.size() == 1) { + int nInputs = fNInputs.size(); + + if (nInputs == 1) { out << SP << "std::copy(tensor_" << fNInputs[0] << ", tensor_" << fNInputs[0] << " + "; out << length << ", tensor_" << fNY << ");\n"; } else { - std::vector inputs(fNBroadcastedInputs.size()); - for (size_t i = 0; i < fNBroadcastedInputs.size(); i++) { - inputs[i] = fNBroadcastedInputs[i] + "[id]"; + + // implement operator without broadcasting, but using loos on all indices + std::vector> inputStrides(nInputs); + for (int i = 0; i < nInputs; i++) + inputStrides[i] = UTILITY::ComputeStrideFromShape(fShapeInputs[i]); + + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + // make loop on output indices + std::string compute_idx_Y; + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i] + << "; ++idx_" << i << "){\n"; + compute_idx_Y += "idx_" + std::to_string(i); + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + // find indices for input tensors + std::vector inputs(nInputs); + for (int ipt = 0; ipt < nInputs; ipt++ ) { + std::string compute_idx_X; + auto & shape = fShapeInputs[ipt]; + auto & stride = inputStrides[ipt]; + if (shape.empty() || + std::all_of(shape.begin(), shape.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X = "0"; + } else { + for (size_t i = 0; i < shape.size(); ++i) { + if (shape[i].dim == 1 || shape[i].GetVal() == "1") + continue; + compute_idx_X += "idx_" + std::to_string(i + (fDimShapeY.size() - shape.size())); + if (stride[i].GetVal() != "1") + compute_idx_X += " * " + stride[i].GetVal(); + compute_idx_X += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X.pop_back(); + } + inputs[ipt] = "tensor_" + fNInputs[ipt] + "[" + compute_idx_X + "]"; + } + + // perform the operation + for (int j = 0; j < nloop + 1; j++) out << SP; + std::string output = "tensor_" + fNY + "[" + compute_idx_Y + "]"; + out << NaryOperatorTraits::Op(output, inputs); + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; } - out << SP << "for (size_t id = 0; id < " << length << "; id++) {\n"; - out << NaryOperatorTraits::Op("tensor_" + fNY + "[id]", inputs); - out << SP << "}\n"; } return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx index f48e27ee4f264..8267bb8a7e4f4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx @@ -46,7 +46,7 @@ public: throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model"); } fShape = model.GetDimTensorShape(fNX); - // shoud we add a check if the same type + // should we add a check if the same type auto inputType = model.GetTensorType(fNX); if (model.IsInitializedTensor(fNX)) { fIsOutputConstant = true; @@ -57,29 +57,30 @@ public: } else fIsOutputConstant = false; + } else if (model.IsShapeTensor(fNX) && ConvertStringToType(fAttrType) == ETensorType::INT64) { + auto shapeData = model.GetShapeTensorValues(fNX); + model.AddShapeTensor(fNY, shapeData, fShape.size() == 0); + fIsOutputConstant = true; } if (!fIsOutputConstant) model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape); if (model.Verbose()) { - std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY; + std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY + << " shape " << ConvertDimShapeToString(fShape); if (fIsOutputConstant) std::cout << " (constant) "; std::cout << std::endl; } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; + std::string Generate(std::string opName) override { + + // output shape can be empty if is a scalar - OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first"); - } std::stringstream out; auto length = ConvertDimShapeToLength(fShape); - // out << SP << ETensorType << " " << OpName << "_attr = " << fattr << ";\n"; - out << "\n//------ CAST\n"; + out << "\n//------ CAST " << opName << " ---> " << fNY << " " << ConvertDimShapeToString(fShape) << "\n"; // no generated code for constant outputs if (fIsOutputConstant) return out.str(); diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx index 0d365ae517de5..40c8923676aaf 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx @@ -65,11 +65,10 @@ private: std::vector fDimShapeX1; std::vector fDimShapeX2; std::vector fShapeY; - std::string fNBroadcastedX1; - std::string fNBroadcastedX2; + std::vector fDimShapeY; ETensorType fTensorType1 = ETensorType::UNDEFINED; ETensorType fTensorType2 = ETensorType::UNDEFINED; - bool fBroadcast = false; + int fBroadcastFlag = 0; public: @@ -115,136 +114,175 @@ public: } fTensorType1 = model.GetTensorType(fNX1); fTensorType2 = model.GetTensorType(fNX2); - bool broadcast = !UTILITY::AreSameShape(fShapeX1, fShapeX2); - if (broadcast) { - // Y is the common shape of A and B - fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2); - bool broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY); - bool broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY); - // Broadcast A to Y - if (broadcastX1) { - if (model.IsInitializedTensor(fNX1)) { - auto data = model.GetInitializedTensorData(fNX1); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX1, fShapeY), - std::default_delete()); - // Update the data and the shape of A - model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData); - fShapeX1 = fShapeY; - } else { - // Add an intermediate tensor for broadcasting A - fNBroadcastedX1 = "Broadcasted" + fNX1; - model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY); + // case of non dynamic tensors + if (!fShapeX1.empty() && !fShapeX2.empty()) { + bool broadcastX1 = false; + bool broadcastX2 = false; + if (UTILITY::AreSameShape(fShapeX1, fShapeX2)) { + // no broadcast needed + fShapeY = fShapeX1; + } else { + // Y is the common shape of A and B + fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2); + broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY); + broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY); + } + + + // analyze case of constant tensors or shape tensors (which have known shapes but data as Dim values + // normal case with non-dynamic tensor is also here + T *data1 = nullptr; + T *data2 = nullptr; + std::unique_ptr broadcastedData1; + std::unique_ptr broadcastedData2; + // data for shape tensors + std::vector shapeData1; + std::vector shapeData2; + size_t length = ConvertShapeToLength(fShapeY); + bool *outData = new bool[length]; + if (model.IsInitializedTensor(fNX1)) { + data1 = static_cast(model.GetInitializedTensorData(fNX1).get()); + if (broadcastX1) { + broadcastedData1 = std::unique_ptr( + UTILITY::UnidirectionalBroadcast(data1, fShapeX1, fShapeY)); + data1 = broadcastedData1.get(); } + + } else if (model.IsShapeTensor(fNX1)) { + shapeData1 = model.GetShapeTensorValues(fNX1); } - // Broadcast B to Y - if (broadcastX2) { - if (model.IsInitializedTensor(fNX2)) { - auto data = model.GetInitializedTensorData(fNX2); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX2, fShapeY), - std::default_delete()); - // Update the data and the shape of B - model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData); - fShapeX2 = fShapeY; - } else { - // Add an intermediate tensor for broadcasting B - fNBroadcastedX2 = "Broadcasted" + fNX2; - model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY); + if (model.IsInitializedTensor(fNX2)) { + data2 = static_cast(model.GetInitializedTensorData(fNX2).get()); + if (broadcastX2) { + broadcastedData2 = std::unique_ptr( + UTILITY::UnidirectionalBroadcast(data2, fShapeX2, fShapeY)); + data2 = broadcastedData2.get(); } + } else if (model.IsShapeTensor(fNX2)) { + shapeData2 = model.GetShapeTensorValues(fNX2); } - } else { - fShapeY = fShapeX1; - } - // case of constant tensors - T * data1 = nullptr; - T * data2 = nullptr; - std::vector shapeData1; - std::vector shapeData2; - size_t length = ConvertShapeToLength(fShapeY); - bool * outData = new bool[length]; - if (model.IsInitializedTensor(fNX1)) { - data1 = static_cast(model.GetInitializedTensorData(fNX1).get()); - } else if (model.IsShapeTensor(fNX1)) { - shapeData1 = model.GetShapeTensorValues(fNX1); - } - if (model.IsInitializedTensor(fNX2)) { - data2 = static_cast(model.GetInitializedTensorData(fNX2).get()); - } else if (model.IsShapeTensor(fNX2)) { - shapeData2 = model.GetShapeTensorValues(fNX2); - } - if (data1 && data2) { - fIsOutputConstant = true; - for (size_t i = 0; i < length; i++) - outData[i] = ComparisionTrait::Result(data1[i], data2[i]); - model.AddConstantTensor(fNY, fShapeY, outData); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(length,outData) << std::endl; - } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) { - fIsOutputConstant = true; - if (data1 && !data2) { - // data 1 is constant and data2 is shape - for (size_t i = 0; i < length; i++) { - if (shapeData2[i].isParam) { - if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) { - fIsOutputConstant = false; - break; - } else { - // assume a comparison is done with .dim = 0 - shapeData2[i].dim = 0; + if (data1 && data2) { + fIsOutputConstant = true; + for (size_t i = 0; i < length; i++) + outData[i] = ComparisionTrait::Result(data1[i], data2[i]); + model.AddConstantTensor(fNY, fShapeY, outData); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData) + << std::endl; + } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) { + fIsOutputConstant = true; + if (data1 && !data2) { + // data 1 is constant and data2 is shape + for (size_t i = 0; i < length; i++) { + if (shapeData2[i].isParam) { + if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) { + fIsOutputConstant = false; + break; + } else { + // assume a comparison is done with .dim = 0 + shapeData2[i].dim = 0; + } } + outData[i] = ComparisionTrait::Result(data1[i], static_cast(shapeData2[i].dim)); } - outData[i] = ComparisionTrait::Result(data1[i], static_cast(shapeData2[i].dim)); - } - } else if (!data1 && data2) { - // data 1 is shape and dat2 is constant - for (size_t i = 0; i < length; i++) { - if (shapeData1[i].isParam) { - if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) { + } else if (!data1 && data2) { + // data 1 is shape and dat2 is constant + for (size_t i = 0; i < length; i++) { + if (shapeData1[i].isParam) { + if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) { + fIsOutputConstant = false; + break; + } else { + // assume a comparison is done with .dim = 0 + shapeData1[i].dim = 0; + } + } + outData[i] = ComparisionTrait::Result(static_cast(shapeData1[i].dim), data2[i]); + } + } else if (!shapeData1.empty() && !shapeData2.empty()) { + // both data1 and data2 are shape tensors + for (size_t i = 0; i < length; i++) { + if (!shapeData1[i].isParam && !shapeData2[i].isParam) { + outData[i] = ComparisionTrait::Result(shapeData1[i].dim, shapeData2[i].dim); + } else if (shapeData1[i].isParam && shapeData2[i].isParam) { + if (shapeData1[i].param == shapeData2[i].param) + outData[i] = ComparisionTrait::Result(1, 1); // comparison of two equal value + else { + fIsOutputConstant = false; + break; + } + } else { fIsOutputConstant = false; break; - } else { - // assume a comparison is done with .dim = 0 - shapeData1[i].dim = 0; } } - outData[i] = ComparisionTrait::Result(static_cast(shapeData1[i].dim), data2[i]); } - } else if (!shapeData1.empty() && !shapeData2.empty() ) { - // both data1 and data2 are shape tensors - for (size_t i = 0; i < length; i++) { - if (!shapeData1[i].isParam && !shapeData2[i].isParam) { - outData[i] = ComparisionTrait::Result(shapeData1[i].dim, shapeData2[i].dim); - } - else if (shapeData1[i].isParam && shapeData2[i].isParam) { - if (shapeData1[i].param == shapeData2[i].param) - outData[i] = ComparisionTrait::Result(1,1); // comparison of two equal value - else { - fIsOutputConstant = false; - break; + if (fIsOutputConstant) { + model.AddConstantTensor(fNY, fShapeY, outData); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData) + << " (constant) " << std::endl; + } + } + delete[] outData; + // case of non constant output (no constant or shape tensors) + if (!fIsOutputConstant && !fShapeY.empty()) { + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY); + fDimShapeY = ConvertShapeToDim(fShapeY); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << std::endl; + } + } else { + // case of dynamic tensors + // case A or B have dynamic shapes. We need to broadcast if shape are not same + auto ret = UTILITY::MultidirectionalBroadcastShape(fDimShapeX1, fDimShapeX2); + fBroadcastFlag = ret.first; + fDimShapeY = ret.second; + // case of all parametric shapes and MultiDirectionalBroadcastShape return the max of the 2 + // need to do before we declare the output tensor shape and the broadcasted ones + if (ret.first & 4) { + // check if one of the parameter is an input dimension + // define function to find this + auto IsInputDimParam = [&](const std::string &p) { + auto inputNames = model.GetInputTensorNames(); + for (auto &input : inputNames) { + for (auto &i_s : model.GetDimTensorShape(input)) { + if (i_s.isParam && i_s.param == p) + return true; } } - else { - fIsOutputConstant = false; - break; + return false; + }; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + auto &s = fDimShapeY[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + if (IsInputDimParam(fDimShapeX1[i].param)) { + // case dim is 1 we indicate that the input parameter is equal to 1 + if (fDimShapeX1[i].dim != 1) + s = fDimShapeX1[i]; + else + s = fDimShapeX2[i]; + } else if (IsInputDimParam(fDimShapeX2[i].param)) { + if (fDimShapeX2[i].dim != 1) + s = fDimShapeX2[i]; + else + s = fDimShapeX1[i]; + } } } } - if (fIsOutputConstant) { - model.AddConstantTensor(fNY, fShapeY, outData); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(length,outData) << " (constant) " << std::endl; + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fDimShapeY); + if (model.Verbose()) { + std::cout << ComparisionTrait::Name() << " : " << fNX1 << " " << ConvertShapeToString(fDimShapeX1) << " , " + << fNX2 << " " << ConvertShapeToString(fDimShapeX2) << " --> " + << fNY << " " << ConvertShapeToString(fDimShapeY) << std::endl; + model.PrintIntermediateTensors(); } } - delete [] outData; - if (!fIsOutputConstant) { - model.AddIntermediateTensor(fNY, ETensorType::BOOL , fShapeY); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << std::endl; - } // check if this is not output operators to add a specific line for definining the tensor_xxx variable const auto & outputTensorNames = model.GetOutputTensorNames(); @@ -257,39 +295,85 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; - if (fShapeY.empty()) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//------ " << ComparisionTrait::Name() << " " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; - size_t length = ConvertShapeToLength(fShapeY); - // Broadcast A if it's uninitialized - if (!fNBroadcastedX1.empty()) { - std::string type1 = ConvertTypeToString(fTensorType1); - out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n"; - out << SP << "{\n"; - out << SP << SP << type1 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type1 << ">(tensor_" << fNX1 << ", " << ConvertShapeToString(fShapeX1) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX1 << ");\n"; - out << SP << SP << "delete[] data;\n"; - out << SP << "}\n"; + + // need to add check if tensors are compatible as in binary operator + + // use same code as Binary operator + auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeX1); + auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeX2); + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + std::string compute_idx_X1, compute_idx_X2, compute_idx_Y; + if (fDimShapeX1.empty() || + std::all_of(fDimShapeX1.begin(), fDimShapeX1.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X1 = "0"; + } else { + for (size_t i = 0; i < fDimShapeX1.size(); ++i) { + if (fDimShapeX1[i].dim == 1 || fDimShapeX1[i].GetVal() == "1") + continue; + compute_idx_X1 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX1.size())); + if (stridesA[i].GetVal() != "1") + compute_idx_X1 += " * " + stridesA[i].GetVal(); + compute_idx_X1 += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X1.pop_back(); + } + if (fDimShapeX2.empty() || + std::all_of(fDimShapeX2.begin(), fDimShapeX2.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X2 = "0"; + } else { + for (size_t i = 0; i < fDimShapeX2.size(); ++i) { + if (fDimShapeX2[i].dim == 1 || fDimShapeX2[i].GetVal() == "1") + continue; + compute_idx_X2 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX2.size())); + if (stridesB[i].GetVal() != "1") + compute_idx_X2 += " * " + stridesB[i].GetVal(); + compute_idx_X2 += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X2.pop_back(); } - // Broadcast B if it's uninitialized - if (!fNBroadcastedX2.empty()) { - std::string type2 = ConvertTypeToString(fTensorType2); - out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n"; - out << SP << "{\n"; - out << SP << SP << type2 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type2 << ">(tensor_" << fNX2 << ", " << ConvertShapeToString(fShapeX2) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX2 << ");\n"; - out << SP << SP << "delete[] data;\n"; - out << SP << "}\n"; + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i] + << "; ++idx_" << i << "){\n"; + compute_idx_Y += "idx_" + std::to_string(i); + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + for (int j = 0; j < nloop + 1; j++) out << SP; + out << "tensor_" << fNY << "[" << compute_idx_Y << "] = " + << ComparisionTrait::Op( "tensor_" + fNX1 + "[" + compute_idx_X1 + "]" , + "tensor_" + fNX2 + "[" + compute_idx_X2 + "]") << " ;\n"; + + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; } - const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1; - const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2; - out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; - out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") << " ;\n"; - out << SP << "}\n"; // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector if (!fIsModelOutput) out << SP << "const std::vector & tensor_" << fNY << " = fTensor_" << fNY << ";\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx index ad855341dfc17..d8155195c9f49 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx @@ -123,7 +123,7 @@ concat_dim = inputs[i][iaxis]; else if (inputs[i][iaxis].isParam || concat_dim.isParam) { concat_dim = - Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(), + Dim{ concat_dim.GetVal() + std::string(" + ") + inputs[i][iaxis].GetVal(), static_cast(-1)}; } else { concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim }; @@ -156,7 +156,7 @@ } // output shape for concatenated axis - ret[fAxis] = Dim{concat_dim}; + ret[fAxis] = concat_dim; } // case of stacking (not supported yet) @@ -205,7 +205,7 @@ size_t inputLength = ConvertShapeToLength(inputShape); std::copy(inputData, inputData + inputLength, outputData.begin() + offset ); offset += inputLength; - // data do not need to be written as a weight + // data do not need to be written in teh generated code model.SetNotWritableInitializedTensor(input); } model.AddConstantTensor(fOutput, outputShape, outputData.data()); @@ -221,15 +221,18 @@ std::vector inputData; auto inputShape = model.GetTensorShape(input); // shape is not dynamic size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar - if (model.IsShapeTensor(input)) + if (model.IsShapeTensor(input)) { inputData = model.GetShapeTensorValues(input); - else if (model.IsConstantTensor(input)) { + } else if (model.IsInitializedTensor(input)) { inputData.resize(inputLength); auto intData = static_cast(model.GetInitializedTensorData(input).get()); for (size_t i = 0; i < inputData.size(); i++) inputData[i] = Dim{ static_cast(intData[i])}; } - std::cout << "concatenating input data " << inputLength << " " << inputData[0] << std::endl; + else { + // this should not happen + throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type"); + } std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset ); offset += inputLength; } @@ -251,13 +254,15 @@ } std::string Generate(std::string opName) override { - if (fIsOutputConstant) return ""; opName = "op_" + opName; + std::stringstream out; + out<<"\n//--------- Concat " << opName << " --> " << fOutput << " " << ConvertShapeToString(fOutputShape) << "\n"; + + if (fIsOutputConstant) return out.str(); + if(fOutputShape.empty()){ throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first"); } - std::stringstream out; - out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n"; // special case when memory is contiguous bool hasShapeOnes = true; for(int i = 0; i0) - out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis].GetVal() << ";\n"; + out << SP << SP << SP << "idxOut += " << inStrides[j-1][fAxis-1].GetVal() << ";\n"; out << SP << SP << SP << "int idxIn" << j <<" = "; for (int k = 0; k < fAxis; k++) { if (k > 0) out << " + "; out << inStrides[j][k].GetVal() << "*i" << k; } out << ";\n"; - out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis].GetVal() << "; ++iC) {\n"; + out << SP << SP << SP << "for (size_t iC = 0; iC < " << inStrides[j][fAxis-1].GetVal() << "; ++iC) {\n"; out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n"; out << SP << SP << SP << "}\n"; // concatenate the axis values diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index 1cf5d13f5cd6f..3b339e3440488 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -136,9 +136,9 @@ public: std::stringstream out; if (fIsOutputConstant) { if (fNX.empty()) - out << "// ---- Constant (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n"; + out << "// ---- Constant (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n"; else - out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n"; + out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n"; return out.str(); } // Only ConstantOfShape might require generation code diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 95f226ca91d4b..2681eeb2dd84c 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -297,16 +297,25 @@ public: } } } - // output channel size can be parametric + // output channel size can be parametric and is an expression std::vector outputDims = std::vector(fShapeY.begin()+2, fShapeY.end()); - auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W + //check if shape is not parametric + std::vector outputInts = ConvertShapeToInt(outputDims); + Dim channelDim; + if (outputInts.empty()) { + auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W + channelDim = Dim{ outputChannelSize, static_cast(-1)}; + } else { + size_t outputChannelSize = ConvertShapeToLength(outputInts); + channelDim = Dim{ outputChannelSize }; + } size_t kernelSize = fAttrKernelShape[0]; for (size_t i = 1; i < fDim; i++) { kernelSize *= fAttrKernelShape[i]; } std::vector shape1 = {fShapeW[0], fShapeW[1], kernelSize}; - std::vector shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}}; + std::vector shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim }; model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 ); model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 ); convK = fNX +"_f"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx index 81411b8ebf71a..1d51c59380dae 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx @@ -153,13 +153,14 @@ public: } std::string Generate(std::string opName) override { + opName = "op_" + opName; + std::stringstream out; + out << "//--------- Gather " << opName << " --> " << fNY << " " << ConvertShapeToString(fShapeY) << "\n"; if (fIsOutputConstant) { // no code to generate here for constant output. Tensor output is defined in Session constructor - return "//---------------------------------------\n"; + out << "//--------------------(constant)----------\n"; + return out.str(); } - opName = "op_" + opName; - std::stringstream out; - out << "//--------- Gather " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; // The shape of the output is q + r - 1 size_t r = fShapeX.size(); // Indices of shape q diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx index 9cac15a14fc52..7c138c3b3def5 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx @@ -39,15 +39,6 @@ public: "TMVA::SOFIE - Unsupported type by Range operator"); } - std::vector TypeInference(std::vector input) override { - return input; - } - - std::vector> ShapeInference(std::vector> input) override { - auto ret = input; //suggest copy to compiler - return ret; - } - void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNStart)) { @@ -63,32 +54,94 @@ public: std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNDelta + "is not found in model"); } ETensorType type = ConvertStringToType(fType); - if (model.IsInitializedTensor(fNStart) && model.IsInitializedTensor(fNDelta) && model.IsInitializedTensor(fNLimit)) { - T * start = static_cast(model.GetInitializedTensorData(fNStart).get()); - T * limit = static_cast(model.GetInitializedTensorData(fNLimit).get()); - T * delta = static_cast(model.GetInitializedTensorData(fNDelta).get()); - if (!start || !delta || !limit) - std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data"); - T a = *start; - T b = *limit; - T d = *delta; - int number_of_elements = std::max( static_cast(std::ceil( (b - a) / d )) , 0. ); + + + + auto analyzeInput = [&](const std::string & tName, T & value, Dim & dim) { + int ftype = 0; // type of input (0 intermediate, 1 constant , 2 shape) + if (model.IsInitializedTensor(tName)) { + T * data = static_cast(model.GetInitializedTensorData(tName).get()); + if (!data) + std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data"); + value = *data; + ftype = 1; + } else if (model.IsShapeTensor(tName)) { + auto data = model.GetShapeTensorValues(tName); + dim = data[0]; + if (!dim.isParam) { + value = static_cast(dim.dim); + ftype = 1; + } else + ftype = 2; + } + return ftype; + }; + + T start_value; + T limit_value; + T delta_value; + Dim start_dim; + Dim limit_dim; + Dim delta_dim; + int res1 = analyzeInput(fNStart, start_value, start_dim); + int res2 = analyzeInput(fNLimit, limit_value, limit_dim); + int res3 = analyzeInput(fNDelta, delta_value, delta_dim); + if (res1 == 0 || res2 == 0 || res3 == 0) { + // cannot know at compile time- need to do fully at run time + // + fShape = {Dim{"range_size_" + fNStart + "_" + fNLimit}}; + model.AddDynamicTensor(fNOutput, type, fShape); + } else if (res1 == 1 && res2 == 1 && res3 == 1) { + size_t number_of_elements = std::max(static_cast(std::ceil((limit_value - start_value) / delta_value )) , 0 ); + fIsOutputConstant = true; + + // compute output std::vector output(number_of_elements); - for (int i=0; i shape = {static_cast(number_of_elements)}; + std::vector shape = {number_of_elements}; model.AddConstantTensor(fNOutput,shape, output.data()); - fIsOutputConstant = true; - // set the input tensor not writable + fShape = ConvertShapeToDim(shape); + + // set the input tensor not writable model.SetNotWritableInitializedTensor(fNStart); model.SetNotWritableInitializedTensor(fNDelta); model.SetNotWritableInitializedTensor(fNLimit); + + } else { // case of a shape tensor + std::string start = (res1 == 1) ? std::to_string(start_value) : start_dim.GetVal(); + std::string limit = (res2 == 1) ? std::to_string(limit_value) : limit_dim.GetVal(); + std::string delta = (res3 == 1) ? std::to_string(delta_value) : delta_dim.GetVal(); + std::stringstream s; + if (type == ETensorType::FLOAT ) { + if (delta_value == 1) + s << "std::max(std::ceil("<< limit << " - " << start << "),0.0f)"; + else + s << "std::max(std::ceil(("<< limit << " - " << start << ")/" << delta << "),0.0f)"; + } else if (type == ETensorType::INT64 ) { + if (delta == "1") { + if (start == "0") + s << limit; + else + s << "std::max((" << limit << " - " << start << "),0L)"; + } else { + if (start == "0") + s << "((" << limit << ")/" << delta << ")"; + else + s << "std::max((" << limit << " - " << start << ")/"<< delta << "),0L)"; + } + } else { + throw + std::runtime_error("TMVA SOFIE Range Op Input Tensor " + ConvertTypeToString(type) + "is not supported"); + } + + + fShape = { Dim {s.str(), static_cast(-1)} }; + model.AddDynamicTensor(fNOutput,type, fShape); } - else { - fShape = {Dim{"range_size"}}; - model.AddDynamicTensor(fNOutput, type, fShape); - } + + if (model.Verbose()) { std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape); if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData(fNOutput)); @@ -96,26 +149,31 @@ public: } } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { std::stringstream out; - out << "\n//------ Range\n"; + out << "\n//------ Range " << opName << "---> " << ConvertDimShapeToString(fShape) << "\n"; if (fIsOutputConstant) return out.str(); - OpName = "op_" + OpName; + opName = "op_" + opName; if (fShape.empty()) { throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first"); } std::string sizeName = fShape[0].param; - out << SP << "size_t " << sizeName << " = static_cast(std::max(std::ceil((static_cast(*tensor_" << fNLimit << ") - static_cast(*tensor_" << fNStart << ")) / static_cast(*tensor_" << fNDelta << ")), 0.0f));\n"; - out << SP << "if (" << sizeName << " > " << "fTensor_" << fNOutput << ".size() ){\n"; - out << SP << SP << "fTensor_" << fNOutput << ".resize(" << sizeName << ");\n"; + if (sizeName.find("range_size") != std::string::npos) + sizeName = "static_cast(std::max(std::ceil((static_cast(*tensor_" + fNLimit + + ") - static_cast(*tensor_" + fNStart + ")) / static_cast(*tensor_" + fNDelta + ")), 0.0f))"; + out << SP << "{\n"; + out << SP << SP << "size_t range" << " = " << sizeName << ";\n"; + out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n"; + out << SP << SP << SP << "fTensor_" << fNOutput << ".resize(range);\n"; // need to re-initialized pointer to tensor data - out << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n"; - out << SP << "}\n"; - out << SP << "for (size_t i = 0; i < " << sizeName << "; i++) {\n"; - out << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; + out << SP << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n"; + out << SP << SP << "}\n"; + out << SP << SP << "for (size_t i = 0; i < range; i++) {\n"; + out << SP << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; + out << SP << SP << "}\n"; out << SP << "}\n"; return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx index 1204770d3d321..1da588e965a01 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx @@ -166,7 +166,7 @@ public: std::string reducedLength; if (fInputDimShape) { reducedLength = "reducedLength_" + opName; - out << SP << "size_t " << reducedLength << " = " << inputLength << " / " << outputLength << ";\n"; + out << SP << "size_t " << reducedLength << " = (" << inputLength << ") / (" << outputLength << ");\n"; } else { int rLength = std::stoi(inputLength) / std::stoi(outputLength); reducedLength = std::to_string(rLength); diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx index 2634b68dbc875..a3ed28c4860bc 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx @@ -108,6 +108,9 @@ public: if (IsInteger(tmp_length) && IsInteger(input_length)) output_shape[i] = Dim{static_cast(std::stoi(input_length) / std::stoi(tmp_length))}; + else if (IsInteger(tmp_length) && std::stoi(tmp_length) == 1) { + output_shape[i] = Dim{input_length, static_cast(-1)}; + } else { //we can try simplifying expression if tmp_length is integer and part of input_length // contains tmp_length @@ -243,7 +246,7 @@ public: // check if optional tensor exists defining shape or axes if (!fNInput2.empty()) { if (model.CheckIfTensorAlreadyExist(fNInput2)) { - if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) { + if (model.IsInitializedTensor(fNInput2)) { // assume input shape is an initialized tensor auto dptr = model.GetInitializedTensorData(fNInput2); auto values = static_cast(dptr.get()); @@ -260,6 +263,9 @@ public: fShapeOutput = ShapeInference({fShapeInput})[0]; // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed model.SetNotWritableInitializedTensor(fNInput2); + } else if (model.IsShapeTensor(fNInput2)) { + auto shapeData = model.GetShapeTensorValues(fNInput2); + fShapeOutput = shapeData; } else { // we cannot get shape at initialization time but at run-time fDynamicShape = true; diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx index b23e3b0a86d21..3add774b0d8d4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx @@ -235,6 +235,8 @@ public: if (iend < 0) { std::string send = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-iend) +")"; fEnd[fAxes[i]] = Dim{send,size_t(-1)}; + } else if (iend == std::numeric_limits::max()){ + fEnd[fAxes[i]] = fShapeInput[fAxes[i]]; } else { fEnd[fAxes[i]] = Dim{size_t(iend)}; } @@ -332,23 +334,23 @@ public: else { model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); if (model.Verbose()) { - std::cout << "Slice ---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; + std::cout << "Slice " << fNData << " " << ConvertShapeToString(fShapeInput) + << "---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; } } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; //no op for constant tensors + std::string Generate(std::string opName) override { - OpName = "op_" + OpName; if (fShapeInput.empty() || fShapeOutput.empty()){ throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); } std::stringstream out; - //std::string opName = "Slice"; - out << SP << "///------- Slice operator\n" << std::endl; + out << "///------- Slice operator " << opName << "---> " << fNOutput << " " + << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl; + if (fIsOutputConstant) return out.str(); //no op for constant tensors // loop on the dimensions depending no the orders size_t ndim = fShapeInput.size(); auto strides = UTILITY::ComputeStrideFromShape(fShapeInput); diff --git a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx index 1086f72eae71c..9b291b40e0854 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx @@ -20,8 +20,8 @@ private: std::string fNRepeats; std::string fNInput; std::string fNY; - std::vectorfShapeInput; - std::vector fShapeY; + std::vectorfShapeInput; + std::vector fShapeY; public: ROperator_Tile(){} @@ -35,13 +35,18 @@ public: return input; } - std::vector> ShapeInference(std::vector> input) override { - std::vector ret = input[0]; - - for(size_t i=0; i < input[1].size(); i++) { - ret[i]=ret[i]*input[1][i]; + std::vector DoShapeInference(const std::vector & input, const std::vector repeat) { + std::vector ret = input; + for(size_t i=0; i < repeat.size(); i++) { + if (repeat[i] != 1) { + if (ret[i].isParam) { + ret[i] = Dim{ std::string(ret[i].GetVal() + "*" + std::to_string(repeat[i])), static_cast(-1) }; + } else { + ret[i]=Dim { ret[i].dim *repeat[i] }; + } + } } - return {ret}; + return ret; } void Initialize(RModel& model) override { @@ -52,7 +57,7 @@ public: if (model.CheckIfTensorAlreadyExist(fNRepeats) == false){ throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); } - fShapeInput=model.GetTensorShape(fNInput); + fShapeInput=model.GetDimTensorShape(fNInput); // if repeats vector is not initialized we cannot deduce shape of output // not support for time being this case @@ -79,12 +84,12 @@ public: std::copy(repeats_data, repeats_data + num_elements, repeats_vector.begin()); - fShapeY = ShapeInference({fShapeInput,repeats_vector})[0]; + fShapeY = DoShapeInference(fShapeInput,repeats_vector); model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY); if (model.Verbose()) - std::cout << "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) + std::cout << "Tile: " << fNInput << " " << ConvertDimShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY) << " given repeats " << ConvertShapeToString(repeats_vector) << std::endl; } @@ -103,9 +108,9 @@ public: std::string output = "tensor_" + fNY; out << "///-------- Tile operator\n"; out << "{\n"; // add scope to re-use same names - out << "const int input_shape[" << fShapeInput.size() << "] = " << ConvertShapeToString(fShapeInput) << ";\n"; + out << "const size_t input_shape[" << fShapeInput.size() << "] = " << ConvertDimShapeToString(fShapeInput) << ";\n"; - out << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n"; + out << "int inputLength = " << ConvertDimShapeToLength(fShapeInput) << ";\n"; out << "int s = 1;\n"; // loop from inverse dim order out << "for (int i = " << fShapeInput.size()-1 << "; i >=0; i--) {\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx index 0869437bb6b0c..edee91de8eb57 100644 --- a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx @@ -19,13 +19,13 @@ private: int fAttrLargest; int fAttrSorted; - size_t fK; + Dim fK; std::string fNK; std::string fNX; std::string fNVal; std::string fNInd; - std::vector fShapeX; - std::vector fShapeY; + std::vector fShapeX; + std::vector fShapeY; std::string fType; public: @@ -43,23 +43,10 @@ public: } std::vector TypeInference(std::vector input) override { - ETensorType ret = input[0]; - return {ret, ret}; - } - - std::vector> ShapeInference(std::vector> input) override { - if (input.size() != 2) { - throw std::runtime_error("TMVA SOFIE TopK Op Shape Inference needs exactly 2 input tensors"); - } - - auto shape = input[0]; // Shape format: [ m x n x o x p ... ] - - // set the dimension at the specified axis to k (fAttrAxis is checked before that is in the correct range - shape[fAttrAxis] = fK; // Modified shape: [ m x n x k x p ... ] - return {shape, shape}; + ETensorType ret = input[0]; + return {ret, ret}; } - void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false) { // input must be a graph input, or already initialized intermediate tensor @@ -70,10 +57,10 @@ public: throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor i.e. K is not found in model"); } - fShapeX = model.GetTensorShape(fNX); + fShapeX = model.GetDimTensorShape(fNX); auto fShapeK = model.GetTensorShape(fNK); auto kptr = static_cast(model.GetInitializedTensorData(fNK).get()); - fK = *kptr; + size_t kval = *kptr; model.SetNotWritableInitializedTensor(fNK); fAttrAxis = fAttrAxis < 0 ? fShapeX.size() + fAttrAxis : fAttrAxis; if(static_cast(fAttrAxis) >= fShapeX.size()){ @@ -81,14 +68,25 @@ public: std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+fShapeX.size()+" ."); } // fK cannot be larger that axis dimension - fK = std::min(fK, fShapeX[fAttrAxis]); + if (fShapeX[fAttrAxis].isParam) + fK = Dim{std::string("std::min(size_t(" + std::to_string(kval) + "), " + fShapeX[fAttrAxis].GetVal() + ")" ), static_cast(-1) }; + else + fK = Dim { std::min(kval, fShapeX[fAttrAxis].dim) }; + + // output shape is equal to input shape apart for value in fAttrAxis + fShapeY = fShapeX; + fShapeY[fAttrAxis] = Dim{fK}; - fShapeY = ShapeInference({fShapeX, fShapeK})[0]; model.AddIntermediateTensor(fNVal, model.GetTensorType(fNX), fShapeY); // output indices should be an int64 tensor model.AddIntermediateTensor(fNInd, ETensorType::INT64, fShapeY); fType = ConvertTypeToString(model.GetTensorType(fNX)); + + if (model.Verbose()) { + std::cout << "TopK " << fNX << " " << ConvertShapeToString(fShapeX) + << "---> " << fNVal << " " << ConvertShapeToString(fShapeY) << std::endl; + } } std::string Generate(std::string OpName) override { @@ -101,19 +99,20 @@ public: size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis; out << "\n" << SP << "//------ TopK\n"; - size_t length=ConvertShapeToLength(fShapeX); + auto length=ConvertDimShapeToLength(fShapeX); auto strideX = UTILITY::ComputeStrideFromShape(fShapeX); auto strideY = UTILITY::ComputeStrideFromShape(fShapeY); // we perform loop on dimension before sorted axis and after sorted axis - size_t n_before = (axis>0) ? length/strideX[axis-1] : 1; - size_t n_after = strideX[axis]; - size_t n_elements = fShapeX[axis]; // number of elements to be sorted + std::vector shape_before(fShapeX.begin(), fShapeX.begin() + axis); // input shape before axis + std::string n_before = (axis>0) ? ConvertDimShapeToLength(shape_before) : "1"; + std::string n_after = strideX[axis].GetVal(); + std::string n_elements = fShapeX[axis].GetVal(); // number of elements to be sorted // } out << SP << "{\n"; // to define a separate scope for the operator code out << SP << "std::vector> elements(" << n_elements << ");\n"; // loop on elements before - if (n_before > 1) { + if (n_before != "1") { out << SP << "for (size_t i = 0; i < " << n_before << "; i++) {\n"; out << SP << SP << "size_t xoffset = i*" << strideX[axis-1] << ";\n"; out << SP << SP << "size_t yoffset = i*" << strideY[axis-1] << ";\n"; @@ -122,7 +121,7 @@ public: out << SP << "size_t xoffset = 0;\n"; out << SP << "size_t yoffset = 0;\n"; } - if (n_after > 1) + if (n_after != "1") out << SP << "for (size_t j = 0; j < " << n_after << "; j++) {\n"; else out << SP << "const size_t j = 0;\n"; @@ -149,8 +148,8 @@ public: out << SP << SP << SP << "tensor_" << fNVal << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].first;\n"; out << SP << SP << SP << "tensor_" << fNInd << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].second;\n"; out << SP << SP << "}\n"; - if (n_after > 1) out << SP << SP << "}\n"; - if (n_before> 1) out << SP << "}\n"; + if (n_after != "1") out << SP << SP << "}\n"; + if (n_before != "1") out << SP << "}\n"; out << SP << "}\n"; // end operator scope return out.str(); } diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index 2dae4f7d03ce7..dfa46a44c03b0 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -252,8 +252,14 @@ public: bool IsConstantTensor() const { return fConstant;} // query if tensor needs to be written in a weight file. Constant tensors are not written in a file bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;} + // check if a Tensor is Writable (need to be written in teh file or in the generated code (e.g. as a costant tensor) + // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in + // the generated code + bool IsNotWritable() const { return fIsNotWritable; } // set not writable initialized tensors - i.e. tensor that must not be written in a file void SetNotWritable() { fIsNotWritable = true;} + // set as constant (needed for non-flot initialized tensors) + void SetConstant() { fConstant = true;} template T const *data() const diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 2fa6df3f04f8f..32a1d3f235e11 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -222,6 +222,7 @@ bool RModel::IsInitializedTensor(const std::string& tensorName) const { return fInitializedTensors.find(name) != fInitializedTensors.end(); } bool RModel::IsConstantTensor(const std::string& tensorName) const { + // a constant tensor is an initialized tensor but has the constant flag set std::string name = UTILITY::Clean_name(tensorName); auto itr = fInitializedTensors.find(name); if (itr == fInitializedTensors.end()) return false; @@ -522,6 +523,7 @@ void RModel::Initialize(const std::map & inputParams, bool fIntermediateTensorInfos.clear(); fDynamicTensorInfos.clear(); + // loop on inputs and see if shape can be full specified // if the batch size is provided it can be used to specify the full shape // Add the full specified tensors in fReadyInputTensors collection @@ -581,7 +583,7 @@ void RModel::Initialize(const std::map & inputParams, bool if (fUseWeightFile) { bool modelHasWeights = false; for (auto &i : fInitializedTensors) { - if (i.second.type() == ETensorType::FLOAT) { + if (i.second.IsWeightTensor()) { modelHasWeights = true; break; } @@ -612,6 +614,13 @@ void RModel::Initialize(const std::map & inputParams, bool i++; } + // loop on initialized tensors and make the integers as constant to be + // not written in a weight file + for (auto &i : fInitializedTensors) { + if (i.second.IsWeightTensor() && i.second.type() != ETensorType::FLOAT) + i.second.SetConstant(); + } + fIsInitialized = true; } @@ -684,9 +693,11 @@ std::string GenerateConstantTensorCode(const std::pair(i); @@ -772,6 +783,9 @@ void RModel::GenerateIntermediateTensorInfo() { } else if (i.second.type == ETensorType::INT64) { fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "int64_t * tensor_" + i.first + " = nullptr;\n"; + } else if (i.second.type == ETensorType::BOOL) { + fGC += "std::vector fTensor_" + i.first + ";\n"; + fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n"; } } } @@ -1143,7 +1157,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::string length = std::to_string(ConvertShapeToLength(i.second.shape())); fGC += " ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n"; } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); } } fGC += " f.close();\n"; @@ -1288,7 +1302,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); } if (f.fail()) std::runtime_error("tmva-sofie failed to write tensor data to file for " + tensor_name); From aa4d008b35d8dc2eae37a4c1a12b4b3e19ece00b Mon Sep 17 00:00:00 2001 From: moneta Date: Mon, 10 Nov 2025 23:16:35 +0100 Subject: [PATCH 02/12] [tmva][sofie] Remove special case handling bool outputs Since we use now for boolean tensors a std::vector it is not needed to have a special treatment when the output ttype of the operator is a boolean (e.g. in Comparison) --- tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx index 40c8923676aaf..734434357a149 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx @@ -56,7 +56,6 @@ template class ROperator_Comparision final : public ROperator{ private: - bool fIsModelOutput = false; std::string fNX1; std::string fNX2; std::string fNY; @@ -283,12 +282,6 @@ public: model.PrintIntermediateTensors(); } } - - // check if this is not output operators to add a specific line for definining the tensor_xxx variable - const auto & outputTensorNames = model.GetOutputTensorNames(); - fIsModelOutput = false; - if (std::find(outputTensorNames.begin(), outputTensorNames.end(), fNY) != outputTensorNames.end()) - fIsModelOutput = true; } std::string Generate(std::string opName) override { @@ -374,9 +367,6 @@ public: out << "}\n"; } - // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector - if (!fIsModelOutput) - out << SP << "const std::vector & tensor_" << fNY << " = fTensor_" << fNY << ";\n"; return out.str(); } From 6b5c35ba01fab8ad89e2e175a7b2e66144383057 Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 12 Nov 2025 09:30:56 +0100 Subject: [PATCH 03/12] [tmva][sofie] Add support for greedy memory allocation for dynammic tensors Add a new function in SOFIE_common OrganizeMemory which computes the total memory and the offset for each tensor given tensor begin /end life and size. Fix also some small issue with dynamic tensor. One is for the bias of Gemm and Conv. The broadcasting of bias is done for dynamic tensor in the Session constructor only if needed. For the broadcasted tensor there is no need to create a new tensor, but the existing one is resized to the broadcasted needed size using vector::resize --- .../inc/TMVA/ROperator_BatchNormalization.hxx | 6 +- tmva/sofie/inc/TMVA/ROperator_Constant.hxx | 5 +- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 35 +++-- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 52 ++++--- tmva/sofie/inc/TMVA/ROperator_Range.hxx | 17 ++- tmva/sofie/inc/TMVA/SOFIE_common.hxx | 16 +++ tmva/sofie/src/RModel.cxx | 115 ++++++++++++---- tmva/sofie/src/SOFIE_common.cxx | 128 +++++++++++++++++- 8 files changed, 305 insertions(+), 69 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx index f2d31796bbbcd..c37e7fc4b68de 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx @@ -141,8 +141,8 @@ public: } } - std::string Generate(std::string OpName) override { - OpName = "op_" + OpName; + std::string Generate(std::string opName) override { + opName = "op_" + opName; if (fShapeX.empty()){ throw std::runtime_error("TMVA SOFIE Batch Normalization called to Generate without being initialized first"); } @@ -158,7 +158,7 @@ public: spatial_dim = ConvertDimShapeToLength( spatialShape); } - out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU" : "") << "\n"; + out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU " : " ") << opName << "\n"; out << SP << "{\n"; out << SP << " size_t i = 0;\n"; out << SP << " for (size_t n = 0; n < " << batchSize << "; ++n) {\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index 3b339e3440488..93f3c43feceb9 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -128,6 +128,7 @@ public: } } else { model.AddIntermediateTensor(fNY, ConvertStringToType(TensorType::Name()), fDimOutputShape); + fOutputTensorNames.emplace_back(fNY); } } @@ -153,9 +154,7 @@ public: } auto length = ConvertDimShapeToLength(fDimOutputShape); // vector is already allocated- fill with values - out << SP << "if (" << length << " > fTensor_" << fNY << ".size())\n"; - out << SP << SP << "fTensor_" << fNY << ".resize(" << length << ");\n"; - out << SP << "std::fill(fTensor_" << fNY << ".begin(), fTensor_" << fNY << ".end(), " << fValues[0] << ");\n"; + out << SP << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << ", " << fValues[0] << ");\n"; return out.str(); } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 2681eeb2dd84c..823e7fa04717e 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -20,6 +20,8 @@ template class ROperator_Conv final : public ROperator { private: + bool fBroadcastBias = false; + std::string fAttrAutopad; std::vector fAttrDilations; size_t fAttrGroup; @@ -30,7 +32,6 @@ private: std::string fNX; std::string fNW; std::string fNB; - std::string fNB2; // bias tensor name after broadcasting std::string fNY; std::string convK; @@ -262,6 +263,9 @@ public: std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); } fShapeB = model.GetTensorShape(fNB); + if (fShapeB.size() != 1) + throw + std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)"); std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); auto shapeDimB = model.GetDimTensorShape(fNB); bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape); @@ -278,7 +282,9 @@ public: if (fType != "float") throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); // here is the actual broadcasting + fBroadcastBias = true; if (!fUseSession) { + // do here broadcasting std::vector shape(fDim + 1, 1); shape[0] = fShapeB[0]; auto intTargetShape = ConvertShapeToInt(targetShape); @@ -287,13 +293,6 @@ public: std::default_delete()); model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr); fShapeB = model.GetTensorShape(fNB); - fNB2 = fNB; // use same name - } - else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNB2 = fNB + "bcast"; - model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); } } } @@ -334,15 +333,25 @@ public: std::string GenerateInitCode() override { std::stringstream out; // Generate initialization code for broadcasting of bias tensor - if (!fNB2.empty()) { + if (fBroadcastBias) { // include a separate scope to avoid defining unique operator temp variables std::vector shape(fDim + 1, 1); + // bias (is a 1D tensor) shape[0] = fShapeB[0]; std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); - out << SP << "{\n"; + out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n"; + // in case of dynamic tensors check needs to be done at run time + bool isOutDynamic = ConvertShapeToInt(targetShape).empty(); + auto length = ConvertDimShapeToLength(targetShape); + if (isOutDynamic) + out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n"; + else + out << SP << "{\n"; out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n"; + out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n"; + out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNB << ");\n"; out << SP << SP << "delete[] data;\n"; out << SP << "}\n"; } @@ -562,13 +571,13 @@ public: out << SP << SP << "}\n"; // end of group loop } - if (fNB2 != "") { + if (fNB != "") { out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n"; out << SP << "float " << OpName << "_gamma = 1.0;\n"; out << SP << "int " << OpName << "_incx = 1;\n"; out << SP << "int " << OpName << "_incy = 1;\n"; - out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &" + out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &" << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n"; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index d954720396151..1c8b51d991af2 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -24,6 +24,7 @@ namespace SOFIE{ private: bool fIsDynamic = false; + bool fBroadcastBias = false; float fAttrAlpha = 1.0; float fAttrBeta = 1.0; @@ -33,7 +34,6 @@ namespace SOFIE{ std::string fNA; std::string fNB; std::string fNC = ""; - std::string fNC2; // bias tensor name after broadcasting std::string fNY; std::string fType; EActivationType fActivation; @@ -222,7 +222,6 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported"); } fShapeC = model.GetTensorShape(fNC); - fNC2 = fNC; size_t lengthC = ConvertShapeToLength(fShapeC); size_t lengthY = ConvertShapeToLength(shapeY); // for dynamic outputs broadcasting is always done @@ -230,6 +229,7 @@ namespace SOFIE{ if (broadcast_needed) { + fBroadcastBias = true; if (!model.UseSession()) { // without session dynamic tensors not supported in Gemm if (fIsDynamic) { @@ -246,14 +246,18 @@ namespace SOFIE{ fShapeC = shapeY; } } else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNC2 = fNC + "bcast"; - if (!fIsDynamic) { - model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY); - } - else - model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); + // /d to add a new intermediate tensor for broadcasted bias tensor + // fNC2 = fNC + "bcast"; + // if (!fIsDynamic) { + // model.AddIntermed/ In case of session add broadcasting code in Session constructor and in GenerateInitCode + // // we neeiateTensor(fNC2, model.GetTensorType(fNC), shapeY); + // } + // else + // model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); + // // do not add to lists of input/output tensors since broadcasted tensors are special + // // and we manage their memory separatly + // //fInputTensorNames.emplace_back(fNC2); + // //fOutputTensorNames.emplace_back(fNC2); } } } @@ -291,18 +295,26 @@ namespace SOFIE{ std::string GenerateInitCode() override { std::stringstream out; // generate initialization code for broadcasting of bias tensor - if (fShapeC.size() != fShapeY.size() && fNC != fNC2) { + if (fShapeC.size() != fShapeY.size() && fBroadcastBias) { // we broadcast here always C in Y output, so target shape is the one of Y // no need to call UTILITY::UnidirectionalBroadcastShape. // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. - auto targetShape = fShapeY; - // include a separate scope to avoid defining unique operator temp variables - out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; - out << SP << "{\n"; - out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n"; auto length = ConvertDimShapeToLength(fShapeY); // output size - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n"; + // include a separate scope to avoid defining unique operator temp variables + out << "//--- broadcast bias tensor " << fNC << "for Gemm op if needed \n"; + // in case of dynamic tensors check needs to be done at run time + bool isOutDynamic = ConvertShapeToInt(fShapeY).empty(); + if (isOutDynamic) + out << SP << "if (" << length << " > " << ConvertShapeToLength(fShapeC) << ") {\n"; + else + out << SP << "{\n"; + // here we broadcast + out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n"; + + out << SP << SP << "fTensor_" << fNC << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << fNC << " = fTensor_" << fNC << ".data();\n"; + out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC << ");\n"; out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } @@ -338,7 +350,7 @@ namespace SOFIE{ // case bias is present if (!fNC.empty()){ - if (fNC2 == fNC) { + if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session // C should have smaller dimension of Y if (!fIsDynamic) { @@ -381,7 +393,7 @@ namespace SOFIE{ out << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; // in the case of bias if (!fNC.empty()) - out << "tensor_" << fNC2; + out << "tensor_" << fNC; else out << "nullptr"; out << ");\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx index 7c138c3b3def5..16d2cb689d518 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx @@ -37,6 +37,10 @@ public: } static_assert( (std::is_same_v || std::is_same_v), "TMVA::SOFIE - Unsupported type by Range operator"); + { + fInputTensorNames = { fNStart, fNLimit, fNDelta }; + fOutputTensorNames = { fNOutput }; + } } void Initialize(RModel& model) override { @@ -166,13 +170,14 @@ public: ") - static_cast(*tensor_" + fNStart + ")) / static_cast(*tensor_" + fNDelta + ")), 0.0f))"; out << SP << "{\n"; out << SP << SP << "size_t range" << " = " << sizeName << ";\n"; - out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n"; - out << SP << SP << SP << "fTensor_" << fNOutput << ".resize(range);\n"; - // need to re-initialized pointer to tensor data - out << SP << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n"; - out << SP << SP << "}\n"; + if (sizeName != fShape[0].param) { + out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n"; + // we should probably resize the tensor here + out << SP << SP << SP << "throw std::runtime_error(\"wrong size allocated for output of range\");\n"; + out << SP << SP << "}\n"; + } out << SP << SP << "for (size_t i = 0; i < range; i++) {\n"; - out << SP << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; + out << SP << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; out << SP << SP << "}\n"; out << SP << "}\n"; return out.str(); diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index dfa46a44c03b0..7abb7df68d997 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -811,6 +811,22 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect } } + +// code for the memory greeding allocations +struct TensorLifeInfo { + int begin; // start time (op index) lifetime + int end; // end time lifetime + size_t size; // size of tensors in bytes +}; + +struct MemoryResult { + std::size_t total_bytes = 0; // total memory needed + std::vector offsets; // resulted offsets for each tensor +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ); + } // namespace SOFIE } // namespace Experimental } // namespace TMVA diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 32a1d3f235e11..d7ab2b4ad39af 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -167,16 +167,15 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { } // storing the last usage of tensors which are input to - // operators (but are not inputs to the model, i.e. they are intermediate - // tensors). This information is needed to keep a check on when a - // particular intermediate tensor can be flushed to free up memory for reuse. + // operators (but are not inputs to the model or they are not initialized) + // We call this function during parsing so we don't have yet initialized the operators for(size_t index = 0; index & inputParams, bool fOperators[op_idx]->Initialize(*this); for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){ std::string name = std::string{it}; + // check if tensor is not an initialized or output tensor and it is not already in the list if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() && - fInitializedTensors.find(name) == fInitializedTensors.end() && - fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){ + fInitializedTensors.find(name) == fInitializedTensors.end()) + { fIntermediateTensorFrequencyLookup[it] = op_idx; } } @@ -616,9 +616,9 @@ void RModel::Initialize(const std::map & inputParams, bool // loop on initialized tensors and make the integers as constant to be // not written in a weight file - for (auto &i : fInitializedTensors) { - if (i.second.IsWeightTensor() && i.second.type() != ETensorType::FLOAT) - i.second.SetConstant(); + for (auto &it : fInitializedTensors) { + if (it.second.IsWeightTensor() && it.second.type() != ETensorType::FLOAT) + it.second.SetConstant(); } fIsInitialized = true; @@ -775,19 +775,21 @@ void RModel::GenerateIntermediateTensorInfo() { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { if (i.second.type == ETensorType::FLOAT) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "float * tensor_" + i.first + " = nullptr;\n"; } else if (i.second.type == ETensorType::DOUBLE) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "double * tensor_" + i.first + " = nullptr;\n"; } else if (i.second.type == ETensorType::INT64) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "int64_t * tensor_" + i.first + " = nullptr;\n"; } else if (i.second.type == ETensorType::BOOL) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n"; } } + fGC += "//--- dynamic tensors pool\n"; + fGC += "std::vector fDynamicMemoryPool;\n"; } } @@ -805,14 +807,81 @@ void RModel::GenerateOperatorDeclarations() { void RModel::GenerateDynamicTensorInfo() { + // generate code for allocating dynamic tensors using the greedy memory allocations + if (fDynamicTensorInfos.empty()) + return; + std::stringstream out; + out << "// dynamic tensor memory management\n"; + out << SP << "std::vector dynamicTensorInfos;\n"; + out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n"; + + // loop on all the operators to find begin/end life of the tensors + int op_index = 0; + std::vector> tensors; + tensors.reserve(fDynamicTensorInfos.size()); + for (auto & op : fOperators) { + // loop on output tensors - + for (auto &it : op->GetOpOutputTensors()) { + if (fVerbose) { + auto op_ptr = op.get(); + std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; + } + // check if is a dynamic tensor + std::string name = std::string(it); + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() ) { + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + auto type = GetTensorType(name); + size_t type_size = GetTypeSize(type); + int begin = op_index; + int end = fOperators.size(); + // look for end + auto it_lookup = fIntermediateTensorFrequencyLookup.find(name); + if (it_lookup != fIntermediateTensorFrequencyLookup.end()) + end = it_lookup->second + 1; // end is last time used + 1 + // // some tensors (like xcol in convolutions) are just used within the operators + // if (end == 0 && begin > 0) end = begin+1; + + if (begin> end) { + std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << " " << " end " << end << std::endl; + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin"); + } + + // write in code + out << SP << "dynamicTensorInfos.push_back( {" << begin << ", " << end << ", " << type_size << "* (" << tensor_size << ") });" + << " // tensor_" << name << std::endl; + tensors.push_back({name,type}); + } + } + op_index++; // increment operator index + } + out << "\n" << SP << "auto memory_result = OrganizeMemory(dynamicTensorInfos);\n\n"; + out << "// allocating now the memory\n"; + out << SP << "fDynamicMemoryPool = std::vector(memory_result.total_bytes);\n"; + out << SP << "int idx = 0;\n"; + for (auto & it : tensors) { + out << SP << "tensor_" << it.first << " = reinterpret_cast<" << ConvertTypeToString(it.second) << " *>(fDynamicMemoryPool.data() + memory_result.offsets[idx++]);\n"; + } + // check that all dynamic tensors are covered + bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; - out << SP << "}\n"; + if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { + std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; + missingTensor = true; + } } + if (missingTensor) + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list"); + + + + // for (auto &i : fDynamicTensorInfos) { + // auto length = ConvertDynamicShapeToLength(i.second.shape); + // out << SP << "if (" << length << " > 0) {\n"; + // out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; + // out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + // out << SP << "}\n"; + // } fGC += out.str(); } diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx index c107b489be19e..1ff510842643a 100644 --- a/tmva/sofie/src/SOFIE_common.cxx +++ b/tmva/sofie/src/SOFIE_common.cxx @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace TMVA { namespace Experimental { @@ -89,7 +91,7 @@ std::string ConvertTypeToString(ETensorType type){ return "double"; } case ETensorType::BOOL : { - return "bool"; + return "uint8_t"; } default:{ return "other_" + std::to_string( (int) type); @@ -547,6 +549,130 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } +struct FreeBlock { + std::size_t offset; + std::size_t size; + bool operator<(const FreeBlock& other) const { + // order by offset for deterministic coalescing + return offset < other.offset; + } +}; + +struct MemoryEvent { + int t; // time (i.e. operator index) + int type; // 0 = END first, 1 = START + int idx; // tensor index + bool operator<(const MemoryEvent& o) const { + if (t != o.t) return t < o.t; + return type < o.type; // END before START at the same time + } +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ) +{ + // Basic validation + for (const auto &t : tensorsInfo) { + if (!(t.end > t.begin)) { + throw std::runtime_error("Each tensor must have end > begin."); + } + } + + // Build events: free before allocate at equal times. + std::vector events; + events.reserve(tensorsInfo.size() * 2); + for (int i = 0; i < (int)tensorsInfo.size(); ++i) { + events.push_back({tensorsInfo[i].end, 0, i}); // END + events.push_back({tensorsInfo[i].begin, 1, i}); // START + } + std::sort(events.begin(), events.end()); + + std::vector tensorsOffset(tensorsInfo.size()); + + // Free list ordered by offset (for O(log n) coalescing) + // and faster insert/erase with respect to a vector + std::set free_list; + + // Bookkeeping: size/offset map for frees. + std::unordered_map live_size; + std::unordered_map live_offset; + + std::size_t total_bytes = 0; + + auto allocate_best_fit = [&](std::size_t need) -> std::size_t { + // Find the *smallest* block whose size >= need (best-fit). + // Since free_list is ordered by offset, we scan to find best by size. + // (For very large sets you could maintain a multimap by size as well.) + auto best = free_list.end(); + for (auto it = free_list.begin(); it != free_list.end(); ++it) { + if (it->size >= need) { + if (best == free_list.end() || it->size < best->size) + best = it; + } + } + if (best != free_list.end()) { + std::size_t off = best->offset; + if (best->size == need) { + free_list.erase(best); + } else { + FreeBlock updated{best->offset + need, best->size - need}; + free_list.erase(best); + free_list.insert(updated); + } + return off; + } + // No free block large enough; grow the heap. + std::size_t off = total_bytes; + total_bytes += need; + return off; + }; + + auto try_coalesce = [&](std::set::iterator it) { + // Coalesce with previous + if (it != free_list.begin()) { + auto prev = std::prev(it); + if (prev->offset + prev->size == it->offset) { + FreeBlock merged{prev->offset, prev->size + it->size}; + free_list.erase(prev); + it = free_list.erase(it); + it = free_list.insert(merged).first; + } + } + // Coalesce with next + auto next = std::next(it); + if (next != free_list.end() && it->offset + it->size == next->offset) { + FreeBlock merged{it->offset, it->size + next->size}; + free_list.erase(next); + it = free_list.erase(it); + free_list.insert(merged); + } + }; + + // Sweep through time. + for (const auto &e : events) { + if (e.type == 0) { // END: free + auto it_sz = live_size.find(e.idx); + auto it_off = live_offset.find(e.idx); + if (it_sz != live_size.end() && it_off != live_offset.end()) { + FreeBlock fb{it_off->second, it_sz->second}; + // Insert and coalesce with neighbors + auto it = free_list.insert(fb).first; + try_coalesce(it); + live_size.erase(it_sz); + live_offset.erase(it_off); + } + } else { // START: allocate + auto &t = tensorsInfo[e.idx]; + std::size_t off = allocate_best_fit(t.size); + tensorsOffset[e.idx] = off; + live_size[e.idx] = t.size; + live_offset[e.idx] = off; + } + } + + return MemoryResult{total_bytes, std::move(tensorsOffset)}; +} + } // namespace SOFIE } // namespace Experimental } // namespace TMVA From 556f0d701718a577ebe6757dfc55e45fbd7940d4 Mon Sep 17 00:00:00 2001 From: moneta Date: Fri, 14 Nov 2025 10:41:42 +0100 Subject: [PATCH 04/12] [tmva][sofie] Fix an issue in genereting code for dynamic tensor when broadcasting The assert that was generated when broadcasting dynamic tensors was not correct --- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 1c8b51d991af2..1a0fa7b16868b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -207,13 +207,7 @@ namespace SOFIE{ } fShapeY = DynamicShapeInference({fShapeA, fShapeB}); - std::vector shapeY; - if (!fIsDynamic) { - shapeY = ConvertShapeToInt(fShapeY); - if (shapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertShapeToString(fShapeY)); - } - } + std::vector shapeY = ConvertShapeToInt(fShapeY); // bias is normally not dynamic (not support it for time being) if (fNC != ""){ @@ -225,7 +219,11 @@ namespace SOFIE{ size_t lengthC = ConvertShapeToLength(fShapeC); size_t lengthY = ConvertShapeToLength(shapeY); // for dynamic outputs broadcasting is always done - bool broadcast_needed = lengthC != lengthY; + bool broadcast_needed = false; + if (fIsDynamic && shapeY.empty()) + broadcast_needed = true; + else + broadcast_needed = lengthC != lengthY; if (broadcast_needed) { @@ -359,7 +357,7 @@ namespace SOFIE{ + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); } else { // add a dynamic check (C should not be a dynamic tensor) - out << SP << "assert(" << lengthGemm << " != " << ConvertShapeToLength(fShapeC) << ");\n"; + out << SP << "assert(" << lengthGemm << " == " << ConvertShapeToLength(fShapeC) << ");\n"; } } } else { From 3e6691e478f45ea545606ece67768e02570907bf Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 17 Dec 2025 22:32:27 +0100 Subject: [PATCH 05/12] [tmva][sofie] Fix stacked MatMul and speedup LayerNorm Apply also other fixes for the SOFIE tests and add a new test for StackMul --- tmva/sofie/inc/TMVA/ROperator_Gather.hxx | 2 - tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 46 ++- .../inc/TMVA/ROperator_LayerNormalization.hxx | 265 +++++++++--------- tmva/sofie/inc/TMVA/ROperator_Range.hxx | 29 +- tmva/sofie/src/RModel.cxx | 33 ++- tmva/sofie/test/TestCustomModelsFromONNX.cxx | 29 +- .../test/input_models/MatMul_Stacked.onnx | 19 ++ 7 files changed, 251 insertions(+), 172 deletions(-) create mode 100644 tmva/sofie/test/input_models/MatMul_Stacked.onnx diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx index 1d51c59380dae..0d50c0747c028 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx @@ -72,8 +72,6 @@ public: // empty shape Indices is a scalar value for the indices size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices)); int64_t* indicesData = static_cast(model.GetInitializedTensorData(fNIndices).get()); - //flag index tensor as not writable (not sure this is needed since index tensor might be used in generated code) - model.SetNotWritableInitializedTensor(fNIndices); // update indices data in case of negative dim values for (size_t i = 0; i < indicesLength; i++) { // move this at generation time? diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 1a0fa7b16868b..47bc5392fede4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -337,6 +337,8 @@ namespace SOFIE{ auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); auto k = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + // size of A: if (trasposeA) is m*k else k*m + // size of B n*k std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; // extra dimensions in case of stacked MatMul std::vector sA; @@ -371,9 +373,32 @@ namespace SOFIE{ // include MatMul case where we stack the Gemm operations // exclude case where we have only 1's in the additional dims bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra) > 1); + // compute input offset for stack multiplications + std::string lengthExtra_A; + std::string lengthExtra_B; + std::string increment_A; + std::string increment_B; + + if (doStackMul) { + std::vector sA(fShapeA.begin(), fShapeA.begin()+dimA-2); + std::vector sB(fShapeB.begin(), fShapeB.begin()+dimB-2); + std::vector mA = {fShapeA[dimA-2], fShapeA[dimA-1]}; + std::vector mB = {fShapeA[dimB-2], fShapeB[dimB-1]}; + lengthExtra_A = ConvertDimShapeToLength(sA); + lengthExtra_B = ConvertDimShapeToLength(sB); + // size of A performing matmul is m*k and n*k for B + increment_A = ConvertDimShapeToLength(mA); + increment_B = ConvertDimShapeToLength(mB); + } + bool extraA = (doStackMul && lengthExtra_A != "1"); + bool extraB = (doStackMul && lengthExtra_B != "1"); if (doStackMul) { - out << SP << "size_t " << opName << "_yoffset = 0;\n"; // needed if we stack the gemm operations - out << SP << "for (int i = 0; i < " << lengthExtra << "; i++){\n"; + out << SP << "size_t " << opName << "_y_offset = 0;\n"; // needed if we stack the gemm operations + if (extraA) + out << SP << "size_t " << opName << "_A_offset = 0;\n"; + if (extraB) + out << SP << "size_t " << opName << "_B_offset = 0;\n"; + out << SP << "for (size_t i = 0; i < " << lengthExtra << "; i++){\n"; out << SP; } @@ -381,14 +406,16 @@ namespace SOFIE{ out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" << "tensor_" << fNY; - if (doStackMul) out << " + " << opName << "_yoffset"; + if (doStackMul) out << " + " << opName << "_y_offset"; out << ", " << (fAttrTransB ? "true, " : "false, ") << (fAttrTransA ? "true, " : "false, ") << n << ", " << m << ", " << k << ", "; - out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ","; - out << "tensor_" << fNB << ", " << "tensor_" << fNA << ", "; - out << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; + out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", tensor_" << fNB; + if (extraB) out << " + " << opName << "_B_offset"; + out << ", tensor_" << fNA; + if (extraA) out << " + " << opName << "_A_offset"; + out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; // in the case of bias if (!fNC.empty()) out << "tensor_" << fNC; @@ -404,7 +431,12 @@ namespace SOFIE{ } if (doStackMul) { - out << SP << SP << opName << "_yoffset += " << lengthGemm << ";\n"; + out << SP << SP << opName << "_y_offset += " << lengthGemm << ";\n"; + if (lengthExtra_A != "1") + out << SP << SP << opName << "_A_offset += " << increment_A << ";\n"; + if (lengthExtra_B != "1") + out << SP << SP << opName << "_B_offset += " << increment_B << ";\n"; + out << "}\n"; // end of loop on the stacked multiplications } diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx index 239c5332172b0..f98ce201d400d 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx @@ -14,6 +14,7 @@ namespace SOFIE { template class ROperator_LayerNormalization : public ROperator { private: + bool fCastToFloat = false; // flag to indicate if operation 1 are in floats (to be impl) int fAttrAxis; float fAttrEpsilon; size_t fAttrStashType; @@ -31,7 +32,7 @@ private: std::vector fShapeX; std::vector fShapeScale; - std::vector fShapeB; // shape of input Bias (B) is assumed to be fully defined + std::vector fShapeB; std::vector fShapeY; std::vector fShapeMean; std::vector fShapeInvStdDev; @@ -40,8 +41,8 @@ private: size_t fSize; // Size of the input // size_t fAxisDim; - std::vector fNormalizedShape; - std::vector fAxesShape; + std::vector fNormalizedShape; // shape from X[ axis,...,N-1] + std::vector fAxesShape; // shape from X[0,..,axis-1] // lengths in string format std::string fLength; // Length of the input std::string fNormalizedLength; @@ -79,7 +80,7 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found."); + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Tensor " + fNX + " not found."); } bool isDynamic = model.IsDynamicTensor(fNX); fShapeX = model.GetDimTensorShape(fNX); @@ -104,8 +105,7 @@ public: // Type of mean and std ETensorType type = (fAttrStashType == 1) ? ETensorType::FLOAT : model.GetTensorType(fNX); // Mean - if (fNMean.empty()) { - fNMean = "Mean" + fNX; + if (!fNMean.empty()) { // cannot use initializer list with one element since it is ambiguous if (isDynamic) // add size_t(-1) to indicate that shape is an expression @@ -114,29 +114,60 @@ public: model.AddIntermediateTensor(fNMean, type, std::vector(1,std::stoi(fAxesLength))); } // Inverse Standard Deviation - if (fNInvStdDev.empty()) { - fNInvStdDev = "InvStdDev" + fNX; + if (!fNInvStdDev.empty()) { if (isDynamic) model.AddIntermediateTensor(fNInvStdDev, type, std::vector(1,Dim{fAxesLength,std::size_t(-1)})); else model.AddIntermediateTensor(fNInvStdDev, type, std::vector(1,std::stoi(fAxesLength))); } + // if mean and stdev are not empty they are not defined in the output list // Cast X to float if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) { - fNCastedX = "Casted" + fNX; - model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX); - fNNormalizedX = "Normalized" + fNX; - model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX); + fCastToFloat = true; + fType = "float"; + // fNCastedX = "Casted" + fNX; + // model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX); + // fNNormalizedX = "Normalized" + fNX; + // model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX); + } + // scale shape + fShapeScale = model.GetDimTensorShape(fNScale); + // appends 1 to scale shapes if missing + size_t dimScale = fShapeScale.size(); + if (dimScale < fSize) { + for (size_t i = 0; i < fSize-dimScale; i++) + fShapeScale.insert(fShapeScale.begin(), Dim{1}); + } + // check also shape if consistent now + for (size_t i = 0; i < fSize; i++) { + if (fShapeScale[i].dim != 1 && fShapeScale[i] != fShapeX[i]) + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Scale Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale)); } - // Broadcast the bias if (!fNB.empty()) { - fShapeB = model.GetTensorShape(fNB); - size_t lengthB = ConvertShapeToLength(fShapeB); - if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { - fNBroadcastedB = "Broadcasted" + fNB; - model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); + fShapeB = model.GetDimTensorShape(fNB); + // appends 1 to bias shapes if missing + size_t dimB = fShapeB.size(); + if (dimB < fShapeX.size()) { + for (size_t i = 0; i < fSize-dimB; i++) + fShapeB.insert(fShapeB.begin(), Dim{1}); + } + for (size_t i = 0; i < fSize; i++) { + if (fShapeB[i].dim != 1 && fShapeB[i] != fShapeX[i]) + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Bias Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale)); } } + + std::cout << "bias + scale " << ConvertDimShapeToString(fShapeB) << " " << ConvertDimShapeToString(fShapeScale) << std::endl; + + // // Broadcast the bias + // if (!fNB.empty()) { + // fShapeB = model.GetTensorShape(fNB); + // size_t lengthB = ConvertShapeToLength(fShapeB); + // if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { + // fNBroadcastedB = "Broadcasted" + fNB; + // model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); + // } + // } model.AddNeededStdLib("cmath"); } @@ -162,10 +193,6 @@ public: throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + opName + " called to generate without being initialized first."); } - if (fShapeX.size() > 5) { - throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not " - "implemented for input tensor of size > 5."); - } std::stringstream out; @@ -179,10 +206,32 @@ public: } auto strides = UTILITY::ComputeStrideFromShape(fShapeX); - std::string InputIndex = "axis_0 * " + strides[0].GetVal(); + std::string inputIndex = "axis_0 * " + strides[0].GetVal(); for (size_t i = 1; i < fSize; i++) { - InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal(); + inputIndex += " + axis_" + std::to_string(i); + if (i < fSize-1) inputIndex += " * " + strides[i].GetVal(); } + auto scaleStrides = UTILITY::ComputeStrideFromShape(fShapeScale); + std::string scaleIndex; + for (size_t i = 0; i < fSize; i++) { + if (fShapeScale[i].dim != 1) { + if (!scaleIndex.empty()) scaleIndex += " + "; + scaleIndex += "axis_" + std::to_string(i); + if ( scaleStrides[i].dim != 1) scaleIndex += " * " + scaleStrides[i].GetVal(); + } + } + if (scaleIndex.empty()) scaleIndex = "0"; + + auto biasStrides = UTILITY::ComputeStrideFromShape(fShapeB); + std::string biasIndex; + for (size_t i = 0; i < fSize; i++) { + if (fShapeB[i].dim != 1) { + if (!biasIndex.empty()) biasIndex += " + "; + biasIndex += "axis_" + std::to_string(i); + if ( biasStrides[i].dim != 1) biasIndex += " * " + biasStrides[i].GetVal(); + } + } + if (biasIndex.empty()) biasIndex = "0"; auto axesStrides = UTILITY::ComputeStrideFromShape(fAxesShape); std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal(); @@ -190,51 +239,42 @@ public: axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal(); } - auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape); - std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal(); - for (size_t i = fAxis + 1; i < fSize; i++) { - normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal(); - } - if (!fNCastedX.empty()) { - // Cast X to float - out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n"; - out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast(tensor_" << fNX; - out << "[i]);\n"; - out << SP << "}\n"; - } + // compute mean and std-dev. Save in tensors if requested out << SP << "// Compute the mean\n"; - // Loop over the normalized dimensions + // Loop over all the dims in [0, fAxis) for (size_t i = 0; i < fAxis; i++) { std::string iIdx = "axis_" + std::to_string(i); out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] << "; " << iIdx << "++) {\n"; } - out << SP << SP << fType << " sum = 0.;\n"; - // loop over all the dims in [0, fAxis) + out << SP << SP << fType << " mean = 0.;\n"; + // loop over the normalized dimensions (fAxis,....,N-1) for (size_t j = fAxis; j < fSize; j++) { std::string jIdx = "axis_" + std::to_string(j); out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx << "++) {\n"; } - out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n"; + out << SP << SP << SP << "mean += tensor_" << fNX << "[" << inputIndex << "];\n"; for (size_t j = fAxis; j < fSize; j++) { out << SP << SP << "}\n"; } - out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "("; - out << fNormalizedLength << ");\n"; - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } + out << SP << SP << "mean /= " << fType << "(" << fNormalizedLength << ");\n"; + + // for (size_t i = fAxis; i < fSize; i++) { + // out << SP << "}\n"; + // } + // tensor_" << fNMean << "[" << axesIndex << "] out << SP << "// Compute the inverse Standard Deviation\n"; // Loop over the normalized dimensions - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } + // for (size_t i = 0; i < fAxis; i++) { + // std::string iIdx = "axis_" + std::to_string(i); + // out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] + // << "; " << iIdx << "++){\n"; + // } + // Set sum = 0 out << SP << SP << fType << " sum = 0.;\n"; // loop over all the dims in [0, fAxis) @@ -243,92 +283,63 @@ public: out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx << "++){\n"; } - out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << InputIndex << "] - tensor_" - << fNMean << "[" << axesIndex << "];\n"; + out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << inputIndex << "] - mean;\n"; out << SP << SP << SP << "sum += tmp*tmp;\n"; for (size_t j = fAxis; j < fSize; j++) { out << SP << SP << "}\n"; } - out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt("; + out << SP << SP << fType << " invStdDev = 1 / std::sqrt("; out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n"; - for (size_t i = 0; i < fAxis; i++) { - out << SP << "}\n"; - } - if (!fNCastedX.empty()) { - out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_"; - out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex; - out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } - out << "// Y = Scale o NormalizedX"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale; - out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex; - out << "]);\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } - } else { - out << SP << "// Y = Scale o InvStdDev (X - Mean)\n"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale; - out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex; - out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "["; - out << axesIndex << "]);\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } + // for (size_t i = 0; i < fAxis; i++) { + // out << SP << "}\n"; + // } + + // set output mean and invStdDev if requested + if (!fNMean.empty()) + out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = mean;\n"; + if (!fNInvStdDev.empty()) + out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = invStdDev;\n"; + + // scale and add bias + + out << SP << "// Y = Scale o InvStdDev (X - Mean)\n"; + // for (size_t i = 0; i < fAxis; i++) { + // std::string iIdx = "axis_" + std::to_string(i); + // out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] + // << "; " << iIdx << "++){\n"; + // } + + for (size_t j = fAxis; j < fSize; j++) { + std::string jIdx = "axis_" + std::to_string(j); + out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx + << "++){\n"; } + out << SP << SP << SP << "tensor_" << fNY << "[" << inputIndex << "] = tensor_" << fNScale; + out << "[" << scaleIndex << "] * invStdDev * (tensor_" << fNX << "[" << inputIndex << "] - mean)"; - if (!fNB.empty()) { - std::string bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB); - out << SP << "// Add the bias to Y\n"; - out << SP << "int " << opName << "_n = " << fLength << ";\n"; - out << SP << "float " << opName << "_alpha = 1.;\n"; - out << SP << "int " << opName << "_inc = 1;\n"; - out << SP << "BLAS::saxpy_(&" << opName << "_n, &" << opName << "_alpha, " << bias << ", &"; - out << opName << "_inc, " << "tensor_" << fNY << ", &" << opName << "_inc);\n"; + // add bias if needed + if (!fNB.empty()) + // assume bias has index as scale + out << " + tensor_" << fNB << "[" << biasIndex << "]"; + out << ";\n"; + + for (size_t j = fAxis; j < fSize; j++) { + out << SP << SP << "}\n"; } + for (size_t i = fAxis; i < fSize; i++) { + out << SP << "}\n"; + } + + // if (!fNB.empty()) { + // std::string bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB); + // out << SP << "// Add the bias to Y\n"; + // out << SP << "int " << opName << "_n = " << fLength << ";\n"; + // out << SP << "float " << opName << "_alpha = 1.;\n"; + // out << SP << "int " << opName << "_inc = 1;\n"; + // out << SP << "BLAS::saxpy_(&" << opName << "_n, &" << opName << "_alpha, " << bias << ", &"; + // out << opName << "_inc, " << "tensor_" << fNY << ", &" << opName << "_inc);\n"; + // } return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx index 16d2cb689d518..b91e45dd6d84b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx @@ -108,11 +108,6 @@ public: model.AddConstantTensor(fNOutput,shape, output.data()); fShape = ConvertShapeToDim(shape); - // set the input tensor not writable - model.SetNotWritableInitializedTensor(fNStart); - model.SetNotWritableInitializedTensor(fNDelta); - model.SetNotWritableInitializedTensor(fNLimit); - } else { // case of a shape tensor std::string start = (res1 == 1) ? std::to_string(start_value) : start_dim.GetVal(); std::string limit = (res2 == 1) ? std::to_string(limit_value) : limit_dim.GetVal(); @@ -164,22 +159,20 @@ public: throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first"); } - std::string sizeName = fShape[0].param; - if (sizeName.find("range_size") != std::string::npos) - sizeName = "static_cast(std::max(std::ceil((static_cast(*tensor_" + fNLimit + + std::string outputSizeVar; + std::string outputSize = fShape[0].param; + if (outputSize.find("range_size") != std::string::npos) { + outputSizeVar = outputSize; + outputSize = "static_cast(std::max(std::ceil((static_cast(*tensor_" + fNLimit + ") - static_cast(*tensor_" + fNStart + ")) / static_cast(*tensor_" + fNDelta + ")), 0.0f))"; - out << SP << "{\n"; - out << SP << SP << "size_t range" << " = " << sizeName << ";\n"; - if (sizeName != fShape[0].param) { - out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n"; - // we should probably resize the tensor here - out << SP << SP << SP << "throw std::runtime_error(\"wrong size allocated for output of range\");\n"; - out << SP << SP << "}\n"; + } else { + outputSizeVar = "range_" + opName; } - out << SP << SP << "for (size_t i = 0; i < range; i++) {\n"; - out << SP << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; - out << SP << SP << "}\n"; + out << SP << "size_t " << outputSizeVar << " = " << outputSize << ";\n"; + out << SP << "for (size_t i = 0; i < " << outputSizeVar << "; i++) {\n"; + out << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; out << SP << "}\n"; + return out.str(); } }; diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index d7ab2b4ad39af..2f80138265ee7 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -577,19 +577,6 @@ void RModel::Initialize(const std::map & inputParams, bool PrintDynamicTensors(); } - // check if there are initialized tensors to write in a weight file - // support for the time being only weight of FLOAT type - if (fUseWeightFile) { - bool modelHasWeights = false; - for (auto &i : fInitializedTensors) { - if (i.second.IsWeightTensor()) { - modelHasWeights = true; - break; - } - } - if (!modelHasWeights) - fUseWeightFile = false; - } // Go through model and initialize each operator int i = 0; @@ -621,6 +608,20 @@ void RModel::Initialize(const std::map & inputParams, bool it.second.SetConstant(); } + // check if there are initialized tensors to write in a weight file + // support for the time being only weight of FLOAT type + if (fUseWeightFile) { + bool modelHasWeights = false; + for (auto &i : fInitializedTensors) { + if (i.second.IsWeightTensor()) { + modelHasWeights = true; + break; + } + } + if (!modelHasWeights) + fUseWeightFile = false; + } + fIsInitialized = true; } @@ -698,7 +699,7 @@ void RModel::GenerateInitializedTensorInfo() // here are constant tensor or initialized ones which are not weights (e.g. int64_t tensors ) for (auto &i : fInitializedTensors) { if (i.second.IsNotWritable()) continue; - if (!fUseWeightFile || i.second.IsConstantTensor()) { + if (!fUseWeightFile || i.second.IsConstantTensor() || !i.second.IsWeightTensor() ) { if (i.second.type() == ETensorType::FLOAT) { fGC += GenerateConstantTensorCode(i); fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 4; @@ -1203,7 +1204,9 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo void RModel::ReadInitializedTensorsFromFile(long pos) { // generate the code to read initialized tensors from a text data file if (fWeightFile == WeightFileType::Text) { - if (fInitializedTensors.empty()) return; + // check if there are tensors to write + + if (!fUseWeightFile) return; fGC += " std::ifstream f;\n"; fGC += " f.open(filename);\n"; diff --git a/tmva/sofie/test/TestCustomModelsFromONNX.cxx b/tmva/sofie/test/TestCustomModelsFromONNX.cxx index 5b77caf2aed1d..401afb8257e25 100644 --- a/tmva/sofie/test/TestCustomModelsFromONNX.cxx +++ b/tmva/sofie/test/TestCustomModelsFromONNX.cxx @@ -323,6 +323,8 @@ #include "ScatterElements_FromONNX.hxx" +#include "MatMul_Stacked_FromONNX.hxx" + #include "gtest/gtest.h" constexpr float DEFAULT_TOLERANCE = 1e-3f; @@ -2856,7 +2858,7 @@ TEST(ONNX, RangeFloat) { float start = 1.; float limit = 10.; float delta = 2.; - TMVA_SOFIE_RangeFloat::Session s("RangeFloat_FromONNX.dat"); + TMVA_SOFIE_RangeFloat::Session s("RangeFloat_FromONNX.dat",5); std::vector output(s.infer(&start, &limit, &delta)); // Checking the output size @@ -2875,7 +2877,7 @@ TEST(ONNX, RangeInt) { int64_t start = 1; int64_t limit = 10; int64_t delta = 2; - TMVA_SOFIE_RangeInt::Session s("RangeInt_FromONNX.dat"); + TMVA_SOFIE_RangeInt::Session s("RangeInt_FromONNX.dat",5); std::vector output(s.infer(&start, &limit, &delta)); // Checking the output size @@ -2947,7 +2949,7 @@ TEST(ONNX, Where) { // test also the broadcast of boolean tensors std::vector input1 = {1,2}; std::vector input2 = {3,4,5,6}; - bool cond[] = {true, false, true}; // need to pass arrays for booleans + uint8_t cond[] = {true, false, true}; // need to pass arrays for booleans std::vector correct = {1,2,5,6,1,2}; TMVA_SOFIE_Where::Session s("Where_FromONNX.dat"); std::vector output(s.infer(input1.data(), input2.data(), cond)); @@ -3214,3 +3216,24 @@ TEST(ONNX, ScatterElements) EXPECT_LE(std::abs(output[i] - correct_output[i]), DEFAULT_TOLERANCE); } } + +TEST(ONNX, MatMul_Stacked) +{ + // test scatter elements (similar test as in ONNX doc) + std::vector input1 = {1,2,3,4,5,6,7,8}; // input tensor shape is (2,2,2) + std::vector input2 = {2,3}; // shape is (2,1) + + std::vector correct_output = {8,18, 28,38}; + + // model is dynamic , use N = 2 + TMVA_SOFIE_MatMul_Stacked::Session s("MatMul_Stacked_FromONNX.dat", 2); + + auto output = s.infer(2, input1.data(), input2.data()); + + // Checking output size + EXPECT_EQ(output.size(), correct_output.size()); + // Checking output + for (size_t i = 0; i < output.size(); ++i) { + EXPECT_LE(std::abs(output[i] - correct_output[i]), DEFAULT_TOLERANCE); + } +} diff --git a/tmva/sofie/test/input_models/MatMul_Stacked.onnx b/tmva/sofie/test/input_models/MatMul_Stacked.onnx new file mode 100644 index 0000000000000..19c39ee2adddd --- /dev/null +++ b/tmva/sofie/test/input_models/MatMul_Stacked.onnx @@ -0,0 +1,19 @@ + + onnx-example:„ + +input1 +input2output"MatMulAddGraphZ +input1 + +N + +Z +input2 +  + +b +output + +N + +B \ No newline at end of file From 2fba9e5ea8f00baceda6a6e10d153911e049a71d Mon Sep 17 00:00:00 2001 From: moneta Date: Sat, 20 Dec 2025 17:41:21 +0100 Subject: [PATCH 06/12] [tmva][sofie] Fix issue with order execution of tensors in GNN models The order execution ws not set for tensor inputs to operators added using the GNN Sofie classes. This is now fixed and the correct memory mangement can be performed. --- tmva/sofie/inc/TMVA/RFunction.hxx | 6 +++--- tmva/sofie/inc/TMVA/RModel.hxx | 10 +++++----- tmva/sofie/inc/TMVA/SOFIE_common.hxx | 4 ++-- tmva/sofie/src/RFunction.cxx | 20 +++++++++----------- tmva/sofie/src/RFunction_MLP.cxx | 16 ++++++++-------- tmva/sofie/src/RModel.cxx | 26 +++++++++++++++++--------- 6 files changed, 44 insertions(+), 38 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RFunction.hxx b/tmva/sofie/inc/TMVA/RFunction.hxx index 1cca39aa7ff3e..9247bd4180d26 100644 --- a/tmva/sofie/inc/TMVA/RFunction.hxx +++ b/tmva/sofie/inc/TMVA/RFunction.hxx @@ -32,7 +32,7 @@ public: class RFunction_Update: public RFunction { protected: - std::shared_ptr function_block; + std::shared_ptr fFunction_block; FunctionTarget fTarget; GraphType fGraphType; std::vector fInputTensors; @@ -50,9 +50,9 @@ public: void AddInputTensors(const std::vector>& inputShapes); void AddInputTensors(const std::vector>& inputShapes); std::shared_ptr GetFunctionBlock() { - return function_block; + return fFunction_block; } - std::string GenerateModel(const std::string& filename, long read_pos = 0, long block_size = -1); + std::string GenerateModel(const std::string& filename, long read_pos = 0, long block_size = -1, bool verbose = false); std::string Generate(const std::vector& inputPtrs); FunctionTarget GetFunctionTarget() { return fTarget; diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 996c51020270f..2a68bcb3593d3 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -205,8 +205,8 @@ public: void ReadInitializedTensorsFromFile(long); long WriteInitializedTensorsToFile(std::string filename = ""); - void PrintIntermediateTensors(); - void PrintOutputTensors(); + void PrintIntermediateTensors() const; + void PrintOutputTensors() const; void OutputGenerated(std::string filename = "", bool append = false); std::vector GetOutputTensorNames() { return fOutputTensorNames; } void SetFilename(std::string filename) { fName = filename; } @@ -224,9 +224,9 @@ public: } */ - void PrintRequiredInputTensors(); - void PrintInitializedTensors(); - void PrintDynamicTensors(); + void PrintRequiredInputTensors() const; + void PrintInitializedTensors() const; + void PrintDynamicTensors() const; void HeadInitializedTensors(std::string name, int n_print = 50); bool UseSession() const { return fUseSession; } diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index 7abb7df68d997..68a74d08fd93a 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -252,13 +252,13 @@ public: bool IsConstantTensor() const { return fConstant;} // query if tensor needs to be written in a weight file. Constant tensors are not written in a file bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;} - // check if a Tensor is Writable (need to be written in teh file or in the generated code (e.g. as a costant tensor) + // check if a Tensor is Writable (need to be written in the file or in the generated code (e.g. as a constant tensor) // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in // the generated code bool IsNotWritable() const { return fIsNotWritable; } // set not writable initialized tensors - i.e. tensor that must not be written in a file void SetNotWritable() { fIsNotWritable = true;} - // set as constant (needed for non-flot initialized tensors) + // set as constant (needed for non-float initialized tensors) void SetConstant() { fConstant = true;} template diff --git a/tmva/sofie/src/RFunction.cxx b/tmva/sofie/src/RFunction.cxx index a6df8dcb43e61..505d84187ca9a 100644 --- a/tmva/sofie/src/RFunction.cxx +++ b/tmva/sofie/src/RFunction.cxx @@ -26,7 +26,7 @@ RFunction_Update::RFunction_Update(FunctionTarget target, GraphType gType): fTar throw std::runtime_error("Invalid target for Update function"); } fType = FunctionType::UPDATE; - function_block = std::make_unique(fFuncName); + fFunction_block = std::make_unique(fFuncName); if(fGraphType == GraphType::GNN) { if(fTarget == FunctionTarget::EDGES) { @@ -49,25 +49,23 @@ RFunction_Update::RFunction_Update(FunctionTarget target, GraphType gType): fTar // add input tensors, order of provided shapes must be the same as in fInputTensors void RFunction_Update::AddInputTensors(const std::vector>& inputShapes) { for(long unsigned int i=0; iAddInputTensorInfo(fInputTensors[i],ETensorType::FLOAT, inputShapes[i]); - function_block->AddInputTensorName(fInputTensors[i]); + fFunction_block->AddInputTensorInfo(fInputTensors[i],ETensorType::FLOAT, inputShapes[i]); + fFunction_block->AddInputTensorName(fInputTensors[i]); } } void RFunction_Update::AddInputTensors(const std::vector>& inputShapes) { for(long unsigned int i=0; iAddInputTensorInfo(fInputTensors[i],ETensorType::FLOAT, inputShapes[i]); - function_block->AddInputTensorName(fInputTensors[i]); + fFunction_block->AddInputTensorInfo(fInputTensors[i],ETensorType::FLOAT, inputShapes[i]); + fFunction_block->AddInputTensorName(fInputTensors[i]); } } -std::string RFunction_Update::GenerateModel(const std::string& filename, long read_pos, long block_size) { - function_block->SetFilename(filename); +std::string RFunction_Update::GenerateModel(const std::string& filename, long read_pos, long block_size, bool verbose) { + fFunction_block->SetFilename(filename); // use batch size as block size in RModel::generate - function_block->PrintRequiredInputTensors(); - function_block->PrintDynamicTensors(); - function_block->Generate(Options::kGNNComponent,block_size,read_pos); + fFunction_block->Generate(Options::kGNNComponent,block_size,read_pos, verbose); std::string modelGenerationString; - modelGenerationString = "\n//--------- GNN_Update_Function---"+fFuncName+"\n"+function_block->ReturnGenerated(); + modelGenerationString = "\n//--------- GNN_Update_Function---"+fFuncName+"\n"+fFunction_block->ReturnGenerated(); return modelGenerationString; } diff --git a/tmva/sofie/src/RFunction_MLP.cxx b/tmva/sofie/src/RFunction_MLP.cxx index 32148cae36794..c41135de49902 100644 --- a/tmva/sofie/src/RFunction_MLP.cxx +++ b/tmva/sofie/src/RFunction_MLP.cxx @@ -20,9 +20,9 @@ RFunction_MLP::RFunction_MLP(FunctionTarget target, Int_t numLayers, Activation throw std::runtime_error("TMVA SOFIE GNN doesn't currently supports the provided activation function for " + fFuncName + " update."); } - function_block->AddOutputTensorNameList({fFuncName + "Relu" + std::to_string(fNumLayers)}); + fFunction_block->AddOutputTensorNameList({fFuncName + "Relu" + std::to_string(fNumLayers)}); } else { - function_block->AddOutputTensorNameList({fFuncName + "Gemm" + std::to_string(fNumLayers)}); + fFunction_block->AddOutputTensorNameList({fFuncName + "Gemm" + std::to_string(fNumLayers)}); } } @@ -32,7 +32,7 @@ void RFunction_MLP::Initialize() { if(fGraphType == GraphType::GNN) { std::unique_ptr op_concat; op_concat.reset(new ROperator_Concat(fInputTensors,1,0,fFuncName+"InputConcat")); - function_block->AddOperator(std::move(op_concat)); + fFunction_block->AddOperator(std::move(op_concat)); fGemmInput = fFuncName+"InputConcat"; } else if(fGraphType == GraphType::GraphIndependent) { @@ -43,24 +43,24 @@ void RFunction_MLP::Initialize() { for(int i=0; i(1.0,beta,0,0,fGemmInput,UTILITY::Clean_name(fKernelTensors[i]),UTILITY::Clean_name(fBiasTensors[i]),fFuncName+"Gemm"+std::to_string(i))); - function_block->AddOperator(std::move(op_gemm)); + fFunction_block->AddOperator(std::move(op_gemm)); fGemmInput = fFuncName+"Gemm"+i; if (fActivationFunction == Activation::RELU) { std::unique_ptr op_relu; op_relu.reset(new ROperator_Relu(fFuncName+"Gemm"+std::to_string(i), fFuncName+"Relu"+std::to_string(i))); - function_block->AddOperator(std::move(op_relu)); + fFunction_block->AddOperator(std::move(op_relu)); fGemmInput = fFuncName+"Relu"+i; } } double beta = (fBiasTensors.back().empty()) ? 0. : 1.; op_gemm.reset(new ROperator_Gemm(1.0,beta,0,0,fGemmInput,UTILITY::Clean_name(fKernelTensors.back()),UTILITY::Clean_name(fBiasTensors.back()),fFuncName+"Gemm"+std::to_string(fNumLayers))); - function_block->AddOperator(std::move(op_gemm)); + fFunction_block->AddOperator(std::move(op_gemm)); if(fActivateFinal) { if (fActivationFunction == Activation::RELU) { std::unique_ptr op_relu; op_relu.reset(new ROperator_Relu(fFuncName+"Gemm"+std::to_string(fNumLayers), fFuncName+"Relu"+std::to_string(fNumLayers))); - function_block->AddOperator(std::move(op_relu)); + fFunction_block->AddOperator(std::move(op_relu)); } } @@ -68,7 +68,7 @@ void RFunction_MLP::Initialize() { if(fAddlOp.size()) { for(auto &i:fAddlOp) { std::unique_ptr tmp(i); - function_block->AddOperator(std::move(tmp)); + fFunction_block->AddOperator(std::move(tmp)); } } } diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 2f80138265ee7..d6c2e31a20893 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -164,6 +164,7 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { fOperators.insert(fOperators.begin() + order_execution, std::move(op)); } else { fOperators.push_back(std::move(op)); + order_execution = fOperators.size()-1; } // storing the last usage of tensors which are input to @@ -812,6 +813,11 @@ void RModel::GenerateDynamicTensorInfo() if (fDynamicTensorInfos.empty()) return; + if (fVerbose) { + std::cout << "generating code for dynamic tensor management" << std::endl; + PrintDynamicTensors(); + } + std::stringstream out; out << "// dynamic tensor memory management\n"; out << SP << "std::vector dynamicTensorInfos;\n"; @@ -1387,7 +1393,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } } -void RModel::PrintRequiredInputTensors() { +void RModel::PrintRequiredInputTensors() const { std::cout << "Model requires following inputs:\n"; for (auto& inputInfo: fInputTensorInfos) { std::cout << "Parametrised Tensor name: " << inputInfo.first << "\t"; @@ -1417,7 +1423,7 @@ void RModel::PrintRequiredInputTensors() { std::cout << "\n"; } -void RModel::PrintInitializedTensors() { +void RModel::PrintInitializedTensors() const { std::cout << "Model initialized the following tensors:\n"; for (auto& it: fInitializedTensors) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1435,7 +1441,7 @@ void RModel::PrintInitializedTensors() { std::cout << "\n"; } -void RModel::PrintIntermediateTensors() { +void RModel::PrintIntermediateTensors() const { std::cout << "Model specify the following intermediate tensors:\n"; for (auto& it: fIntermediateTensorInfos) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1450,7 +1456,7 @@ void RModel::PrintIntermediateTensors() { std::cout << "\n"; } -void RModel::PrintDynamicTensors() { +void RModel::PrintDynamicTensors() const { std::cout << "Model specify the following dynamic tensors:\n"; for (auto& it: fDynamicTensorInfos) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1465,14 +1471,16 @@ void RModel::PrintDynamicTensors() { std::cout << "\n"; } -void RModel::PrintOutputTensors() { +void RModel::PrintOutputTensors() const { std::cout << "Model specify the following output tensors:\n"; for (auto& it: fOutputTensorNames) { std::cout << "Tensor name: \"" << it << "\"\t"; - if (!IsDynamicTensor(it)) - std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; - else - std::cout << "shape: " << ConvertShapeToString(GetDynamicTensorShape(it)) << std::endl; + try { + auto shape = GetDimTensorShape(it); + std::cout << "with shape: " << ConvertShapeToString(shape) << std::endl; + } catch (...) { + std::cout << "with shape not yet defined" << std::endl; + } } std::cout << "\n"; } From 55fb3e7eb17f095a2960e384fe68afe89e528083 Mon Sep 17 00:00:00 2001 From: moneta Date: Mon, 5 Jan 2026 18:36:47 +0100 Subject: [PATCH 07/12] [tmva][sofie] Apply fixes for the TestCustiomModelsFrom ROOT SOme fixes are needed for the test, since the session is not used for this tests. Need also to force using Session in case of Dynamic tensors Fix also a warning in Gemm operator and RModel --- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 13 +++++++------ tmva/sofie/src/RModel.cxx | 16 ++++++++++++---- tmva/sofie/test/TestCustomModelsFromROOT.cxx | 6 ++++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 823e7fa04717e..87d1ad0a0bf67 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -350,8 +350,8 @@ public: out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n"; out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n"; + out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n"; out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNB << ");\n"; out << SP << SP << "delete[] data;\n"; out << SP << "}\n"; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 47bc5392fede4..9f911756196a8 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -107,6 +107,7 @@ namespace SOFIE{ if (input[0].size() > 2 && input[1].size() == input[0].size()) { // in case of dim > 2 first dimensions are equal to the input ones not // equal to 1 (e.g. (1,2,3) * (2,3,4) -> (2,2,4)) + // here could probably use the Broadcasting function UTILITY::MultidirectionalBroadcastShape for (size_t i = 0; i < input[0].size()-2; i++) { Dim valueA = input[0][i]; Dim valueB = input[1][i]; @@ -311,8 +312,8 @@ namespace SOFIE{ << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n"; out << SP << SP << "fTensor_" << fNC << ".resize(" << length << ");\n"; + out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNC << ".begin());\n"; out << SP << SP << "tensor_" << fNC << " = fTensor_" << fNC << ".data();\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC << ");\n"; out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } @@ -341,12 +342,12 @@ namespace SOFIE{ // size of B n*k std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; // extra dimensions in case of stacked MatMul - std::vector sA; + std::vector sExtraY; for (int64_t i = 0; i < dimY-2; i++) { - sA.push_back(fShapeY[i]); + sExtraY.push_back(fShapeY[i]); } auto lengthGemm = ConvertDimShapeToLength(sY); // size of the Gemm operation - auto lengthExtra = ConvertDimShapeToLength(sA); // extra length in case input tensors are of dim>2 (MatMul) + auto lengthExtra_Y = ConvertDimShapeToLength(sExtraY); // extra length in case input tensors are of dim>2 (MatMul) // case bias is present if (!fNC.empty()){ @@ -372,7 +373,7 @@ namespace SOFIE{ // include MatMul case where we stack the Gemm operations // exclude case where we have only 1's in the additional dims - bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra) > 1); + bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra_Y) > 1); // compute input offset for stack multiplications std::string lengthExtra_A; std::string lengthExtra_B; @@ -398,7 +399,7 @@ namespace SOFIE{ out << SP << "size_t " << opName << "_A_offset = 0;\n"; if (extraB) out << SP << "size_t " << opName << "_B_offset = 0;\n"; - out << SP << "for (size_t i = 0; i < " << lengthExtra << "; i++){\n"; + out << SP << "for (size_t i = 0; i < " << lengthExtra_Y << "; i++){\n"; out << SP; } diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index d6c2e31a20893..dbd9c3666e01f 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -613,8 +613,8 @@ void RModel::Initialize(const std::map & inputParams, bool // support for the time being only weight of FLOAT type if (fUseWeightFile) { bool modelHasWeights = false; - for (auto &i : fInitializedTensors) { - if (i.second.IsWeightTensor()) { + for (auto &it : fInitializedTensors) { + if (it.second.IsWeightTensor()) { modelHasWeights = true; break; } @@ -664,7 +664,8 @@ std::string GenerateConstantTensorCode(const std::pair 100) ? false : true; + // also for weights which can be broadcasted do not use stack but allocate as a std::vector + bool allocateOnStack = (length > 100 || t.second.IsWeightTensor()) ? false : true; const T *data = t.second.data(); @@ -687,7 +688,7 @@ std::string GenerateConstantTensorCode(const std::pair options, int batchSize, lo // initialize the model including all operators and sub-graphs Initialize(batchSize, verbose); + // if having dynamic tensor we need to have a Session + if (!fDynamicTensorInfos.empty()) { + fUseSession = true; + if (verbose) + std::cout << "Warning: Force having a Session since model has dynamic tensors " << std::endl; + } + std::string hgname; if (!fIsGNNComponent && !fIsSubGraph) { fGC.clear(); diff --git a/tmva/sofie/test/TestCustomModelsFromROOT.cxx b/tmva/sofie/test/TestCustomModelsFromROOT.cxx index d077aede3e2e6..7e3c8c9c2fc09 100644 --- a/tmva/sofie/test/TestCustomModelsFromROOT.cxx +++ b/tmva/sofie/test/TestCustomModelsFromROOT.cxx @@ -891,7 +891,8 @@ TEST(ROOT, RangeFloat) { float start = 1.; float limit = 10.; float delta = 2.; - std::vector output = TMVA_SOFIE_RangeFloat::infer(&start, &limit, &delta); + TMVA_SOFIE_RangeFloat::Session s("",5); + std::vector output(s.infer(&start, &limit, &delta)); // Checking the output size EXPECT_EQ(output.size(), sizeof(RangeFloat_ExpectedOutput::outputs) / sizeof(float)); @@ -909,7 +910,8 @@ TEST(ROOT, RangeInt) { int64_t start = 1; int64_t limit = 10; int64_t delta = 2; - std::vector output = TMVA_SOFIE_RangeInt::infer(&start, &limit, &delta); + TMVA_SOFIE_RangeInt::Session s("",5); + std::vector output(s.infer(&start, &limit, &delta)); // Checking the output size EXPECT_EQ(output.size(), sizeof(RangeInt_ExpectedOutput::outputs) / sizeof(int64_t)); From 4b87a64e41794d425de06ec55a9d02ea792076fb Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 7 Jan 2026 13:28:30 +0100 Subject: [PATCH 08/12] [tmva][sofie] Sort in alphabetical order the shape parameter in Session ctor Do an alphabetical order of Session shape parameters for dynamic tensors, otherwise they may get a random order. Observed different order on different platforms Add some small improvements in the generated code (add nunber and shape informations) when generating Gemm code --- tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 3 ++- tmva/sofie/src/RModel.cxx | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx index 1c4f20363ebe2..491b669554118 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx @@ -192,7 +192,7 @@ public: dataY[i] = BinaryOperatorTrait::Func(dataA[i], dataB[i]); } model.AddConstantTensor(fNY, fShapeY, dataY.data()); - // flag tensors to not be written in the weight file + // flag tensors to not be written in the generated code or weight file model.SetNotWritableInitializedTensor(nameA); model.SetNotWritableInitializedTensor(nameB); fIsOutputConstant = true; diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 9f911756196a8..2c2df2aa37830 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -327,7 +327,8 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first"); } std::stringstream out; - out << "\n//--------- Gemm\n"; + out << "\n//--------- Gemm " << opName << " " << ConvertShapeToString(fShapeA) << " * " << ConvertShapeToString(fShapeB) + << " -> " << ConvertShapeToString(fShapeY) << "\n"; // need to consider case A and B have dim > 2 (for MatMul) int64_t dimA = fShapeA.size(); int64_t dimB = fShapeB.size(); diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index dbd9c3666e01f..089e656fedbd1 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -1087,6 +1087,8 @@ void RModel::GenerateSessionCode() // add initialization of shape parameters // assume all parameters are of type size_t if (!fDimShapeNames.empty()) { + // sort first the shape parameters in alphabetical order to avoid a random order + std::sort(fDimShapeNames.begin(), fDimShapeNames.end() ); for (auto &p : fDimShapeNames) { fGC += ",\n"; fGC += " size_t " + p + " = " + fShapeParams[p]; From b95b9a7b989fa0158d71b9b662178cd0ac7110bd Mon Sep 17 00:00:00 2001 From: moneta Date: Thu, 8 Jan 2026 17:03:06 +0100 Subject: [PATCH 09/12] [tmva][sofie] Do not permor broadcast of bias tensor in Gemm in Session ctor Avoid creating a broadcasted bias tensor which uses lots of memory. Do broadcasting of the bias on the fly before computing Gemm by using the output tensor. This saves a large amount of memory on models using large Gemm calss like the atlas GNN model used for tracking --- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 82 +++++++++++++++----------- tmva/sofie/src/SOFIE_common.cxx | 11 ++-- 2 files changed, 55 insertions(+), 38 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 2c2df2aa37830..a18914b8892a8 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -219,7 +219,7 @@ namespace SOFIE{ fShapeC = model.GetTensorShape(fNC); size_t lengthC = ConvertShapeToLength(fShapeC); size_t lengthY = ConvertShapeToLength(shapeY); - // for dynamic outputs broadcasting is always done + // for dynamic outputs broadcasting is always needed bool broadcast_needed = false; if (fIsDynamic && shapeY.empty()) broadcast_needed = true; @@ -229,34 +229,21 @@ namespace SOFIE{ if (broadcast_needed) { fBroadcastBias = true; - if (!model.UseSession()) { - // without session dynamic tensors not supported in Gemm - if (fIsDynamic) { - throw std::runtime_error("TMVA SOFIE Gemm Op: dynamic tensors not supported without a session"); - } - auto original_data = model.GetInitializedTensorData(fNC); - auto targetShape = UTILITY::UnidirectionalBroadcastShape(fShapeC, shapeY); - if (fType == "float") { - std::shared_ptr new_data_ptr(UTILITY::UnidirectionalBroadcast( - static_cast(original_data.get()), fShapeC, targetShape), - std::default_delete()); - - model.UpdateInitializedTensor(fNC, model.GetTensorType(fNC), shapeY, new_data_ptr); - fShapeC = shapeY; - } - } else { - // /d to add a new intermediate tensor for broadcasted bias tensor - // fNC2 = fNC + "bcast"; - // if (!fIsDynamic) { - // model.AddIntermed/ In case of session add broadcasting code in Session constructor and in GenerateInitCode - // // we neeiateTensor(fNC2, model.GetTensorType(fNC), shapeY); - // } - // else - // model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); - // // do not add to lists of input/output tensors since broadcasted tensors are special - // // and we manage their memory separatly - // //fInputTensorNames.emplace_back(fNC2); - // //fOutputTensorNames.emplace_back(fNC2); + // check if broadcasting is compatible and note that prepend 1 to shapeC + auto shapeDimC = ConvertShapeToDim(fShapeC); + auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, shapeDimC); + // return flag must be equal to 1 since this is a unidirectional broadcast of C->Y + if (r.first > 1) { + throw std::runtime_error("TMVA SOFIE Gemm Op - bias tensor of shape " + ConvertShapeToString(fShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY)); + } + fShapeC = ConvertShapeToInt(shapeDimC); + if (fShapeC.empty()) { + throw std::runtime_error("TMVA SOFIE Gemm Op - Error in bias tensor " + ConvertDimShapeToString(shapeDimC) ); + } + } else { + // for the case lengthY == lengthC but shape is different (e.g. Y is (2,3) and is (6)) + if (shapeY != fShapeC) { + throw std::runtime_error("TMVA SOFIE Gemm Op: invalid shape for bias tensor " + ConvertShapeToString(fShapeC)); } } } @@ -294,6 +281,7 @@ namespace SOFIE{ std::string GenerateInitCode() override { std::stringstream out; // generate initialization code for broadcasting of bias tensor +#if 0 if (fShapeC.size() != fShapeY.size() && fBroadcastBias) { // we broadcast here always C in Y output, so target shape is the one of Y // no need to call UTILITY::UnidirectionalBroadcastShape. @@ -317,6 +305,7 @@ namespace SOFIE{ out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } +#endif return out.str(); } @@ -403,6 +392,33 @@ namespace SOFIE{ out << SP << "for (size_t i = 0; i < " << lengthExtra_Y << "; i++){\n"; out << SP; } + // do the bias broadcasting + if (fBroadcastBias) { + out << SP << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n"; + out << SP << SP << "size_t y_index = "; + if (doStackMul) // add offset in caseof stack multiplications (not sure if bias is present in these cases) + out << opName << "_y_offset + "; + if (sY[1].GetVal() != "1") + out << sY[1] << " * j;\n"; + else + out << "j;\n"; + + out << SP << SP << "for (size_t k = 0; k < " << sY[1] << "; k++) { \n"; + std::string bias_index; + if (fShapeC[0] == 1 && fShapeC[1] == sY[1].dim) + bias_index = "k"; + else if (fShapeC[1] == 1 && fShapeC[0] == sY[0].dim) + bias_index = "j"; + else if (fShapeC[0] == 1 && fShapeC[1] == 1) // scalar case + bias_index = "0"; + else { + throw std::runtime_error("TMVA SOFIE Gemm Op - invalid shape for bias tensor " + ConvertShapeToString(fShapeC)); + } + + out << SP << SP << SP << "tensor_" << fNY << "[y_index + k] = " << "tensor_" << fNC << "[" << bias_index << "];\n"; + out << SP << SP << "}\n"; + out << SP << "}\n"; + } if (fType == "float"){ @@ -418,12 +434,12 @@ namespace SOFIE{ out << ", tensor_" << fNA; if (extraA) out << " + " << opName << "_A_offset"; out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; - // in the case of bias - if (!fNC.empty()) + // in the case of bias and no broadcasting needed + if (!fNC.empty() && !fBroadcastBias) out << "tensor_" << fNC; - else + else out << "nullptr"; - out << ");\n"; + out << ");\n"; if(fActivation == EActivationType::RELU){ out << SP << "for (int id = 0; id < " << ConvertDimShapeToLength(fShapeY) << " ; id++){\n"; diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx index 1ff510842643a..f659d0e1a2fe6 100644 --- a/tmva/sofie/src/SOFIE_common.cxx +++ b/tmva/sofie/src/SOFIE_common.cxx @@ -414,14 +414,15 @@ std::pair> UTILITY::MultidirectionalBroadcastShape(std + " to a common shape."); } } -// unidirectional broadcast- only B changes +// unidirectional broadcast- of shape A to target B std::vector UTILITY::UnidirectionalBroadcastShape(std::vector & shapeA, std::vector & shapeB) { - auto ret = UTILITY::MultidirectionalBroadcastShape(shapeA, shapeB); + auto ret = UTILITY::MultidirectionalBroadcastShape(shapeB, shapeA); if (ret.first > 1) { - std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " - + ConvertShapeToString(shapeA) + " and " + ConvertShapeToString(shapeB) - + " to a common shape."); + throw + std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " + + ConvertShapeToString(shapeA) + " to " + ConvertShapeToString(shapeB) + + " in a common shape."); } return ret.second; } From 6282757cc6b3d5afb60e311a5abe7e87cffbf98a Mon Sep 17 00:00:00 2001 From: moneta Date: Thu, 8 Jan 2026 19:09:39 +0100 Subject: [PATCH 10/12] [tmva][sofie] Add alias tensors Add alias tensors to cope with identity operators. In this case just a pointer assignment is performed by the operator. Exclude this tensors in the allocation and take care of them in the dynamic memory pool Optimise Slice operator when slice is an identity and also ScatterElements --- tmva/sofie/inc/TMVA/RModel.hxx | 5 ++ tmva/sofie/inc/TMVA/ROperator_Constant.hxx | 8 ++- .../inc/TMVA/ROperator_ScatterElements.hxx | 23 +++++- tmva/sofie/inc/TMVA/ROperator_Slice.hxx | 36 +++++++++- tmva/sofie/src/RModel.cxx | 71 +++++++++++++------ tmva/sofie/src/SOFIE_common.cxx | 2 +- 6 files changed, 118 insertions(+), 27 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 2a68bcb3593d3..13d95935d9600 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -30,6 +30,7 @@ private: std::unordered_map fDynamicTensorInfos; std::unordered_map, bool>> fShapeTensors; // constant tensors describing a shape std::unordered_map fShapeParams; // parameters defining the dynamic shape (e.g. batch size), store also its default value + std::unordered_map fAliasTensors; // list of alias tensors std::vector fDimShapeNames; // parameter names used to define the shapes std::vector fOutputTensorNames; std::vector fInputTensorNames; // input tensor names using ONNX order @@ -82,6 +83,8 @@ public: void AddConstantTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data); + void AddAliasTensor(const std::string & tensor_name, const std::string & orig_tensor_name); + template void AddConstantTensor(const std::string & name, const std::vector & shape, const T * data) { @@ -130,6 +133,8 @@ public: bool IsReadyInputTensor(const std::string &name) const; /// check if a tensor is a shape tensor bool IsShapeTensor(const std::string & name) const; + /// check if a tensor is a alias tensor + bool IsAliasTensor(const std::string & name) const; // Add intermediate tensor void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector dim_shape); diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index 93f3c43feceb9..7c824f1abe6e3 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -123,8 +123,12 @@ public: if (model.Verbose()) { std::cout << "adding constant tensor " << fNY << " with shape " << ConvertShapeToString(fShape) << " and values ["; - for (auto v : fValues) std::cout << " " << v; - std::cout << "]" << std::endl; + if (!fIsConstantOfShape) { + for (auto v : fValues) std::cout << " " << v; + std::cout << "]" << std::endl; + } else { // for constant of shape is enough to print one value + std::cout << "... " << fValues[0] << " ....]" << std::endl; + } } } else { model.AddIntermediateTensor(fNY, ConvertStringToType(TensorType::Name()), fDimOutputShape); diff --git a/tmva/sofie/inc/TMVA/ROperator_ScatterElements.hxx b/tmva/sofie/inc/TMVA/ROperator_ScatterElements.hxx index 626debd13038e..2525ea32629df 100644 --- a/tmva/sofie/inc/TMVA/ROperator_ScatterElements.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_ScatterElements.hxx @@ -136,6 +136,17 @@ public: return strst.str(); }; + auto tensorIndexOpt = [](const std::vector & sdx, const std::vector & idx) { + std::stringstream strst; + int dims = idx.size(); + for (int i = 0; i < dims-1; i++) { + strst << sdx[i]; + strst << " + "; + } + strst << idx[dims-1]; + return strst.str(); + }; + // copy first input in output (maybe can be avoided??) out << SP << "std::copy(tensor_" << fNX << ", tensor_" << fNX << " + " << length << ", tensor_" << fNY << ");\n"; @@ -143,14 +154,24 @@ public: // loop on tensor rank int dims = fShapeY.size(); std::vector idx(dims); + std::vector sdx(dims); // stride for indices for (int i = 0; i < dims; i++) { idx[i] = std::string("i") + std::to_string(i); + sdx[i] = std::string("s") + std::to_string(i); for (int j = 0; j <= i; j++) out << SP; out << "for (int " << idx[i] << " = 0; " << idx[i] << " < " << fShapeI[i] << "; " << idx[i] << "++) {\n"; + if (i < dims-1) { + for (int j = 0; j <= i+1 ; j++) out << SP; + if (strideI[i].GetVal() != "1") + out << "int "<< sdx[i] << " = " << strideI[i] << " * " << idx[i] << ";\n"; + else + out << "int "<< sdx[i] << " = " << idx[i] << ";\n"; + } } // correct index for specific axis for (int j = 0; j <= dims; j++) out << SP; - out << "int updateIndex = " << tensorIndex(strideI,idx) << ";\n"; + // can use optimised formula for indices since the loop above is on fShapeI + out << "int updateIndex = " << tensorIndexOpt(sdx,idx) << ";\n"; for (int j = 0; j <= dims; j++) out << SP; out << "int iAxis = tensor_" << fNI << "[updateIndex];\n"; for (int j = 0; j <= dims; j++) out << SP; diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx index 3add774b0d8d4..4e3c1319bd772 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx @@ -25,6 +25,7 @@ private: bool fIsStartUndef = false; bool fIsEndUndef = false; bool fIsStepUndef = false; + bool fIdentitySlice = false; std::string fNData; // input data tensor name std::string fNOutput; // output data name std::vector fNames; // tensor names for meta(axis) information @@ -332,10 +333,25 @@ public: } } else { + // check if Slice is just an Identity operator in case start = 0, end = input_shape and step=1 + size_t ndim = fShapeInput.size(); + fIdentitySlice = fShapeOutput.size() == ndim; + for (size_t idim = 0; idim < ndim; idim++) { + if (!fIdentitySlice) break; + fIdentitySlice &= (fStart[idim].GetVal() == "0"); + fIdentitySlice &= (fSteps[idim].GetVal() == "1"); + fIdentitySlice &= (fEnd[idim].GetVal() == fShapeOutput[idim].GetVal()); + } + model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); + if (fIdentitySlice) model.AddAliasTensor(fNOutput, fNData); + if (model.Verbose()) { std::cout << "Slice " << fNData << " " << ConvertShapeToString(fShapeInput) - << "---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; + << "---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput); + if (fIdentitySlice) std::cout << " (using alias tensor since slice is an identity) "; + std::cout << std::endl; + } } } @@ -351,8 +367,24 @@ public: out << "///------- Slice operator " << opName << "---> " << fNOutput << " " << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl; if (fIsOutputConstant) return out.str(); //no op for constant tensors - // loop on the dimensions depending no the orders + size_t ndim = fShapeInput.size(); + // check if Slice is just an Identity operator in case start = 0, end = input_shape and step=1 + bool identitySlice = fShapeInput.size() == fShapeOutput.size(); + for (size_t idim = 0; idim < ndim; idim++) { + if (!identitySlice) break; + identitySlice &= (fStart[idim].GetVal() == "0"); + identitySlice &= (fSteps[idim].GetVal() == "1"); + identitySlice &= (fEnd[idim].GetVal() == fShapeOutput[idim].GetVal()); + } + + if (identitySlice) { + out << "/// Slice is just an identity (copy pointers) \n"; + out << SP << "tensor_" << fNOutput << " = tensor_" << fNData << ";\n"; + return out.str(); + } + + // loop on the dimensions depending no the orders auto strides = UTILITY::ComputeStrideFromShape(fShapeInput); diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 089e656fedbd1..5e22f48b8f2be 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -167,8 +167,8 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { order_execution = fOperators.size()-1; } - // storing the last usage of tensors which are input to - // operators (but are not inputs to the model or they are not initialized) + // storing the last usage of tensors which are input to the operator + // (excluding tensors which are inputs to the model or the initialized (weights) tensors) // We call this function during parsing so we don't have yet initialized the operators for(size_t index = 0; index & s fShapeTensors[tensor_name] = std::make_pair(shape_values, scalar); } +void RModel::AddAliasTensor(const std::string & name, const std::string & origin){ + // add an alias tensor to origin + auto tensor_name = UTILITY::Clean_name(name); + auto origin_name = UTILITY::Clean_name(origin); + if (fAliasTensors.count(tensor_name) != 0) { + throw std::runtime_error("TMVA-SOFIE: alias tensor with name " + tensor_name + " already exists \n"); + } + fAliasTensors[tensor_name] = origin_name; +} + bool RModel::IsShapeTensor(const std::string & tensor_name) const { return fShapeTensors.count(tensor_name) != 0; } +bool RModel::IsAliasTensor(const std::string & tensor_name) const { + return fAliasTensors.count(tensor_name) != 0; +} + const std::vector & RModel::GetShapeTensorValues(const std::string & tensor_name) const { //if (!IsShapeTensor(tensor_name) ) return std::vector{}; return fShapeTensors.at(tensor_name).first; @@ -356,6 +370,11 @@ std::string RModel::AllocateIntermediateMemory(std::span fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) continue; + // case of alias tensor + if (IsAliasTensor(name)) { + continue; + } + auto tensor_size = GetTypeSize(GetTensorType(name)) * ConvertShapeToLength(GetTensorShape(name)); // important fill the pair in the ordered output tensors with the string view and not the string TensorMemoryInfo tmi = {it, tensor_size}; @@ -435,9 +454,14 @@ void RModel::CheckAndFlushIntermediateMemory(std::span o chunk != fIntermediateMemoryInfo.available_stack.end(); chunk++) { if (fVerbose) std::cout << "-- free chunk " << chunk->first << " size = " << chunk->second << std::endl; } - for (auto &it : op_input_tensors) { + for (auto &iv : op_input_tensors) { // last occurrence of the tensor is reached => flush it from memory - if (fVerbose) std::cout << ".. input tensors : " << it; + if (fVerbose) std::cout << ".. input tensors : " << iv; + + // for alias tensors replace name with its alias + std::string it{iv}; // convert view to string + if (IsAliasTensor(it)) + it = fAliasTensors[it]; if (fIntermediateTensorFrequencyLookup[it] == op_idx) { if (fVerbose) std::cout << " flash condition is met - looping on chunks to find matching one \n"; for (auto chunk = fIntermediateMemoryInfo.total_stack.begin(); @@ -623,6 +647,17 @@ void RModel::Initialize(const std::map & inputParams, bool fUseWeightFile = false; } + // update fIntermediateTensorFrequencyLookup for alias tensors + for (auto & it : fAliasTensors) { + if (fIntermediateTensorFrequencyLookup.find(it.first) == fIntermediateTensorFrequencyLookup.end()) continue; + if (fIntermediateTensorFrequencyLookup.find(it.second) == fIntermediateTensorFrequencyLookup.end() ) + fIntermediateTensorFrequencyLookup[it.second] = fIntermediateTensorFrequencyLookup[it.first]; + else { + // take the largest one + fIntermediateTensorFrequencyLookup[it.second] = std::max(fIntermediateTensorFrequencyLookup[it.second],fIntermediateTensorFrequencyLookup[it.first] ); + } + } + fIsInitialized = true; } @@ -737,7 +772,8 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fIntermediateTensorInfos.empty()) { std::string tensor_declaration_block = ""; for (auto &i : fIntermediateTensorInfos) { - if (i.second.type == ETensorType::BOOL) { + bool is_alias = (IsAliasTensor(i.first)); + if (i.second.type == ETensorType::BOOL && !is_alias) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; continue; @@ -748,7 +784,7 @@ void RModel::GenerateIntermediateTensorInfo() { bool not_in_output_names = (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()); - if ((not_in_freq_map && not_in_output_names) || (!not_in_freq_map && !is_extended && not_in_output_names)) { + if (((not_in_freq_map && not_in_output_names) || (!not_in_freq_map && !is_extended && not_in_output_names) ) && !is_alias) { size_t length = ConvertShapeToLength(i.second.shape); if (i.second.type == ETensorType::FLOAT) { @@ -767,6 +803,10 @@ void RModel::GenerateIntermediateTensorInfo() { fOtherTensorSize += 8 * length; } } + if (is_alias) { + tensor_declaration_block += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; + } + } if (tensor_declaration_block.length()) { @@ -777,19 +817,7 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fDynamicTensorInfos.empty()) { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { - if (i.second.type == ETensorType::FLOAT) { - //fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "float * tensor_" + i.first + " = nullptr;\n"; - } else if (i.second.type == ETensorType::DOUBLE) { - //fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "double * tensor_" + i.first + " = nullptr;\n"; - } else if (i.second.type == ETensorType::INT64) { - //fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "int64_t * tensor_" + i.first + " = nullptr;\n"; - } else if (i.second.type == ETensorType::BOOL) { - //fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n"; - } + fGC += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; } fGC += "//--- dynamic tensors pool\n"; fGC += "std::vector fDynamicMemoryPool;\n"; @@ -835,9 +863,9 @@ void RModel::GenerateDynamicTensorInfo() auto op_ptr = op.get(); std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; } - // check if is a dynamic tensor + // check if is a dynamic tensor and not an alias tensor std::string name = std::string(it); - if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() ) { + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name)) { auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); auto type = GetTensorType(name); size_t type_size = GetTypeSize(type); @@ -873,6 +901,7 @@ void RModel::GenerateDynamicTensorInfo() // check that all dynamic tensors are covered bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { + if (IsAliasTensor(i.first)) continue; if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; missingTensor = true; diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx index f659d0e1a2fe6..54fed04ba42b1 100644 --- a/tmva/sofie/src/SOFIE_common.cxx +++ b/tmva/sofie/src/SOFIE_common.cxx @@ -132,7 +132,7 @@ std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; for (size_t i = 0; i < shape.size(); i++) { - out << shape[i].GetVal(); + out << shape[i]; if (i < shape.size()-1) out << " , "; } out << " }"; From 08bfc2897c5bfe6b852111dbe4c1743d96483da6 Mon Sep 17 00:00:00 2001 From: Olia Date: Mon, 11 Aug 2025 12:46:25 +0200 Subject: [PATCH 11/12] Time Profiler for Sofie --- tmva/sofie/CMakeLists.txt | 2 + tmva/sofie/inc/TMVA/RModel.hxx | 9 +- tmva/sofie/inc/TMVA/RModelProfiler.hxx | 42 +++++ tmva/sofie/inc/TMVA/RModel_Base.hxx | 1 + tmva/sofie/inc/TMVA/ROperator.hxx | 3 + tmva/sofie/src/RModel.cxx | 49 +++--- tmva/sofie/src/RModelProfiler.cxx | 161 +++++++++++++++++++ tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 9 +- tutorials/machine_learning/TMVA_SOFIE_ONNX.C | 2 +- 9 files changed, 255 insertions(+), 23 deletions(-) create mode 100644 tmva/sofie/inc/TMVA/RModelProfiler.hxx create mode 100644 tmva/sofie/src/RModelProfiler.cxx diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt index c807d1b7b8c27..f56d2350ecadd 100644 --- a/tmva/sofie/CMakeLists.txt +++ b/tmva/sofie/CMakeLists.txt @@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie TMVA/OperatorList.hxx TMVA/RModel_Base.hxx TMVA/RModel.hxx + TMVA/RModelProfiler.hxx TMVA/ROperator.hxx TMVA/ROperator_BasicUnary.hxx TMVA/ROperator_BasicBinary.hxx @@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie SOURCES src/RModel_Base.cxx src/RModel.cxx + src/RModelProfiler.cxx src/RModel_GNN.cxx src/RModel_GraphIndependent.cxx src/RFunction.cxx diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 13d95935d9600..a82c58c75b2e2 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -11,16 +11,23 @@ namespace SOFIE { class RModel final : public RModel_Base { + friend class RModelProfiler; + private: bool fIsInitialized = false; bool fIsSubGraph = false; + bool fProfile = false; + int fVerbose = 0; int fBatchSize = -1; long fReadPos = 0; // reading file position + size_t fConstantTensorSize = 0; // size (in Bytes) of the allocated constant tensors size_t fWeightsTensorSize = 0; // size (in Bytes) of the allocated weight tensors size_t fOtherTensorSize = 0; // size (in Bytes) of intermediate tensors which are not managed by the memory pool + std::string fProfilerGC = ""; + OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended; std::unordered_map fInputTensorInfos; // input tensors where shape may not fully defined or other graph inputs? @@ -157,7 +164,7 @@ public: void Initialize(int batchSize = -1, bool verbose = false); void Initialize(const std::map & inputParams, bool verbose = false); - void Generate(std::underlying_type_t options, int batchSize = -1, long pos = 0, bool verbose = false); + void Generate(std::underlying_type_t options, int batchSize = -1, long pos = 0, bool verbose = false); void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false) { Generate(static_cast>(options), batchSize, pos, verbose); diff --git a/tmva/sofie/inc/TMVA/RModelProfiler.hxx b/tmva/sofie/inc/TMVA/RModelProfiler.hxx new file mode 100644 index 0000000000000..fd9c8c7d0267d --- /dev/null +++ b/tmva/sofie/inc/TMVA/RModelProfiler.hxx @@ -0,0 +1,42 @@ +#ifndef TMVA_SOFIE_RMODELPROFILER +#define TMVA_SOFIE_RMODELPROFILER + +#include "TMVA/RModel.hxx" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +/// \class RModelProfiler +/// \brief A helper class to generate profiled inference code for an RModel. +/// +/// This class instruments the generated C++ code to measure the execution +/// time of each operator. It is invoked when the RModel::Generate is called +/// with the Options::kProfile flag. +class RModelProfiler { +private: + RModel &fModel; + + void GenerateUtilityFunctions(); + +public: + // The profiler must be constructed with a model to work on. + RModelProfiler() = delete; + RModelProfiler(RModel &model); + ~RModelProfiler() = default; + + // There is no point in copying or moving an RModelProfiler + RModelProfiler(const RModelProfiler &other) = delete; + RModelProfiler(RModelProfiler &&other) = delete; + RModelProfiler &operator=(const RModelProfiler &other) = delete; + RModelProfiler &operator=(RModelProfiler &&other) = delete; + + // Main function to generate the profiled code. + void Generate(); +}; + +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA + +#endif // TMVA_SOFIE_RMODELPROFILER diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx index 2cbcc6cc8ea41..2ab5dacaac57f 100644 --- a/tmva/sofie/inc/TMVA/RModel_Base.hxx +++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx @@ -26,6 +26,7 @@ enum class Options { kRootBinaryWeightFile = 0x4, kGNN = 0x8, kGNNComponent = 0x10, + kProfile = 0x20, }; // Optimization levels inspired by ONNXRuntime. diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index f0afd9c4374c1..200cd3f2976fe 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -37,6 +37,9 @@ public: //virtual void Forward_blas() = 0; virtual ~ROperator(){} + std::string name = "UnnamedOperator"; + const std::string &GetOperatorName() { return name; }; + protected: const std::string SP = " "; ///< space used to correctly indent the generated C++ code diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 5e22f48b8f2be..32da75fdc045b 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -9,6 +9,7 @@ #endif #include "TMVA/RModel.hxx" +#include "TMVA/RModelProfiler.hxx" #include "TMVA/SOFIE_common.hxx" namespace TMVA { @@ -1061,7 +1062,7 @@ void RModel::GenerateSessionCode() CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); } - // to check remaining unused fragments after memory allocation (lesser the better) + // to check remaining unused fragments after memory allocation (lesser the better) // for (const auto &it: fIntermediateMemoryInfo.available_stack){ // std::cout<<"chunk_idx: "<GenerateSessionMembersCode(opName); + fGC += fOperators[id]->GenerateSessionMembersCode(opName); } fGC += "\n"; // here add initialization and reading of weight tensors @@ -1143,23 +1144,28 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - fGC += doInferSignature + "{\n"; - fGC += "\n"; + if (fProfile) { + RModelProfiler profiler(*this); + profiler.Generate(); + fGC += fProfilerGC; + } else { + fGC += doInferSignature + "{\n"; + fGC += "\n"; - // generate the inference code - if (fVerbose) - std::cout << "Generating main inference code for " << fName << std::endl; + // generate the inference code + if (fVerbose) + std::cout << "Generating main inference code for " << fName << std::endl; - if (fOutputTensorNames.size() == 0) - throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + if (fOutputTensorNames.size() == 0) + throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); - for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - if (fVerbose) + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); - } + fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + } - fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; for (std::string const &name : fOutputTensorNames) { // need to check is size is the same (don't want to return a vector with @@ -1170,7 +1176,8 @@ void RModel::GenerateSessionCode() fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; } - fGC += "}\n\n"; + fGC += "}\n\n"; + } // generate the inference overload that returns an output struct GenerateOutput(); @@ -1183,9 +1190,11 @@ void RModel::GenerateSessionCode() void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) { + bool profile = (options & static_cast>(Options::kProfile)); fVerbose = verbose; fBatchSize = batchSize; fReadPos = pos; + fProfile = profile; // session flag is used in operator initialize if (static_cast>(Options::kNoSession) & options) { @@ -1205,9 +1214,9 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); } - if (static_cast>(Options::kGNN) & options) + if (static_cast>(Options::kGNN) & options) fIsGNN = true; - if (static_cast>(Options::kGNNComponent) & options) + if (static_cast>(Options::kGNNComponent) & options) fIsGNNComponent = true; // initialize the model including all operators and sub-graphs @@ -1228,13 +1237,13 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo // generate first code for the subgraphs for (auto &graph : fSubGraphs) { - if (fVerbose) + if (fVerbose) std::cout << "generate session code for subgraph " << graph->fName << std::endl; graph->GenerateSessionCode(); fGC += graph->fGC; } - if (fVerbose) + if (fVerbose) std::cout << "generate Main session code - model " << fName << std::endl; // generate main session code diff --git a/tmva/sofie/src/RModelProfiler.cxx b/tmva/sofie/src/RModelProfiler.cxx new file mode 100644 index 0000000000000..76386e6de817d --- /dev/null +++ b/tmva/sofie/src/RModelProfiler.cxx @@ -0,0 +1,161 @@ +#include "TMVA/RModelProfiler.hxx" +#include "TMVA/SOFIE_common.hxx" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +// The constructor now just registers the necessary C++ libraries. +RModelProfiler::RModelProfiler(RModel &model) : fModel(model) +{ + fModel.AddNeededStdLib("chrono"); // for timing operators + fModel.AddNeededStdLib("vector"); // for storing profiling results + fModel.AddNeededStdLib("string"); // for operator names + fModel.AddNeededStdLib("map"); // for the results map + fModel.AddNeededStdLib("iostream"); // for printing results + fModel.AddNeededStdLib("iomanip"); // for printing results +} + +// This function generates the helper functions inside the Session struct. +void RModelProfiler::GenerateUtilityFunctions() +{ + auto &gc = fModel.fProfilerGC; + + // Generate PrintProfilingResults function + gc += " void PrintProfilingResults() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " std::cout << \"No profiling results to display.\" << std::endl;\n"; + gc += " return;\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n"; + gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n"; + gc += " std::cout << std::string(50, '=') << std::endl;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " double sum = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " sum += time;\n"; + gc += " }\n"; + gc += " double average = sum / op.second.size();\n"; + gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n"; + gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n"; + gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n"; + gc += " }\n"; + gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n"; + gc += " }\n"; + gc += "\n"; + + // Generate ResetProfilingResults function + gc += " void ResetProfilingResults() {\n"; + gc += " fProfilingResults.clear();\n"; + gc += " }\n"; + gc += "\n"; + + // Generate GetOpAvgTime function + gc += " std::map GetOpAvgTime() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " return {};\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::map avg;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " double mean = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " mean += time;\n"; + gc += " }\n"; + gc += " mean /= op.second.size();\n"; + gc += " avg[op.first] = mean;\n"; + gc += " }\n"; + gc += "\n"; + gc += " return avg;\n"; + gc += " }\n"; + gc += "\n"; + + // Generate GetOpVariance function + gc += " std::map GetOpVariance() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " return {};\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::map variance;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " // Var[X] = E[X^2] - E[X]^2\n"; + gc += " double mean = 0.0, mean2 = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " mean += time;\n"; + gc += " mean2 += time * time;\n"; + gc += " }\n"; + gc += " mean /= op.second.size();\n"; + gc += " mean2 /= op.second.size();\n"; + gc += " variance[op.first] = mean2 - mean * mean;\n"; + gc += " }\n"; + gc += "\n"; + gc += " return variance;\n"; + gc += " }\n"; +} + +// Main generation function for the profiler. +void RModelProfiler::Generate() +{ + // Clear the profiler's code string to start fresh. + fModel.fProfilerGC.clear(); + auto &gc = fModel.fProfilerGC; + + // 1. Add the data member to the Session struct to store results. + gc += "public:\n"; + gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n"; + gc += " std::map> fProfilingResults;\n\n"; + + // 2. Generate and add the utility functions like PrintProfilingResults. + GenerateUtilityFunctions(); + + // 3. Generate the signature for the profiled doInfer method. + std::string doInferSignature = fModel.GenerateInferSignature(); + if (!doInferSignature.empty()) doInferSignature += ", "; + for (auto const &name : fModel.GetOutputTensorNames()) { + doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ","; + } + if (!fModel.GetOutputTensorNames().empty()) { + doInferSignature.back() = ' '; + } + gc += "void doInfer(" + doInferSignature + ") {\n"; + + // 4. Generate the body of the doInfer method with timing instrumentation. + gc += " // Timer variable for profiling\n"; + gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n"; + gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n"; + + for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) { + const auto& op = fModel.fOperators[op_idx]; + gc += " // -- Profiling for operator " + op->name + " --\n"; + gc += " tp_start = std::chrono::steady_clock::now();\n\n"; + + // Add the actual operator inference code + gc += op->Generate(std::to_string(op_idx)); + + // Add the code to stop the timer and store the result + gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n"; + gc += " std::chrono::duration_cast>(\n"; + gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n"; + } + + // 5. Generate the code to fill the output tensors. + gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + for (std::string const &name : fModel.GetOutputTensorNames()) { + bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0; + std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name))) + : ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name)); + gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + } + + gc += "\n // -- Record overall inference time --\n"; + gc += " fProfilingResults[\"Overall_Time\"].push_back(\n"; + gc += " std::chrono::duration_cast>(\n"; + gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n"; + + gc += "}\n\n"; // End of doInfer function +} + +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index 7b4ade2b6bc09..4903c8d1c6511 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -731,7 +731,8 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & std::cout << "\t" << i << " " << nodesOrder[i] << " parsing operator " << op_type << std::endl; } - std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[i]); + std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[nodesOrder[i]]); + if (!op) { if (verbose) { std::cout << "\t\tskipping operator since it is fused with previous one" << std::endl; @@ -739,6 +740,12 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & // for skipping the fused nodes like Add after MatMul continue; } + const auto &nodeproto = graph.node(nodesOrder[i]); + op->name = nodeproto.name(); + if (op->name.empty()) { + op->name = op_type + "_" + std::to_string(i); + } + rmodel.AddOperator(std::move(op), node_order_exec++); } diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C index 8c192789e1210..878167db8c791 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C @@ -19,7 +19,7 @@ void TMVA_SOFIE_ONNX(std::string inputFile = ""){ SOFIE::RModel model = parser.Parse(inputFile, true); //Generating inference code - model.Generate(); + model.Generate(SOFIE::Options::kProfile); // write the code in a file (by default Linear_16.hxx and Linear_16.dat model.OutputGenerated(); From 32d38a2855b91550f5ec5d3ebdf0121fe0e4f57a Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 7 Jan 2026 16:00:54 +0100 Subject: [PATCH 12/12] [tmva][sofie] Imporve RModel profiler Compute also the error on the average when printing results and sort them in decreasing order in time --- tmva/sofie/src/RModelProfiler.cxx | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tmva/sofie/src/RModelProfiler.cxx b/tmva/sofie/src/RModelProfiler.cxx index 76386e6de817d..c56d4127e99b7 100644 --- a/tmva/sofie/src/RModelProfiler.cxx +++ b/tmva/sofie/src/RModelProfiler.cxx @@ -22,24 +22,39 @@ void RModelProfiler::GenerateUtilityFunctions() auto &gc = fModel.fProfilerGC; // Generate PrintProfilingResults function - gc += " void PrintProfilingResults() const {\n"; + gc += " // generate code for printing operator results. By default order according to time (from higher to lower)\n"; + gc += " void PrintProfilingResults(bool order = true) const {\n"; gc += " if (fProfilingResults.empty()) {\n"; gc += " std::cout << \"No profiling results to display.\" << std::endl;\n"; gc += " return;\n"; gc += " }\n"; gc += "\n"; + gc += " // compute summary statistics of profiling results and sort them in decreasing time\n"; + gc += " std::vector> averageResults;\n"; gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n"; gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n"; gc += " std::cout << std::string(50, '=') << std::endl;\n"; gc += " for (const auto& op : fProfilingResults) {\n"; gc += " double sum = 0.0;\n"; + gc += " double sum2 = 0.0;\n"; gc += " for (double time : op.second) {\n"; gc += " sum += time;\n"; + gc += " sum2 += time*time;\n"; gc += " }\n"; gc += " double average = sum / op.second.size();\n"; - gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n"; - gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n"; - gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n"; + gc += " double stddev = std::sqrt(( sum2 - sum *average)/ (op.second.size()-1));\n"; + gc += " averageResults.push_back({op.first, average, stddev, op.second.size()});\n"; + gc += " }\n"; + gc += "\n"; + gc += " // sort average results in decreasing time\n"; + gc += " std::sort(averageResults.begin(), averageResults.end(),\n"; + gc += " []( std::tuple a, std::tuple b) {return std::get<1>(a) > std::get<1>(b); });\n"; + gc += "\n"; + gc += " for (const auto & r : averageResults) {\n"; + gc += " std::cout << \" \" << std::left << std::setw(20) << std::get<0>(r)\n"; + gc += " << \": \" << std::fixed << std::setprecision(6) << std::get<1>(r) << \" +/- \" \n"; + gc += " << std::get<2>(r)/std::sqrt(std::get<3>(r)) << \" us\"\n"; + gc += " << \" (over \" << std::get<3>(r) << \" runs)\" << std::endl;\n"; gc += " }\n"; gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n"; gc += " }\n"; @@ -71,7 +86,7 @@ void RModelProfiler::GenerateUtilityFunctions() gc += " }\n"; gc += "\n"; - // Generate GetOpVariance function + // Generate GetOpVariance function gc += " std::map GetOpVariance() const {\n"; gc += " if (fProfilingResults.empty()) {\n"; gc += " return {};\n"; @@ -129,10 +144,10 @@ void RModelProfiler::Generate() const auto& op = fModel.fOperators[op_idx]; gc += " // -- Profiling for operator " + op->name + " --\n"; gc += " tp_start = std::chrono::steady_clock::now();\n\n"; - + // Add the actual operator inference code gc += op->Generate(std::to_string(op_idx)); - + // Add the code to stop the timer and store the result gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n"; gc += " std::chrono::duration_cast>(\n";