root-project · lmoneta · Nov 10, 2025 · Nov 10, 2025 · Nov 12, 2025 · Nov 14, 2025
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
    TMVA/OperatorList.hxx
    TMVA/RModel_Base.hxx
    TMVA/RModel.hxx
+   TMVA/RModelProfiler.hxx
    TMVA/ROperator.hxx
    TMVA/ROperator_BasicUnary.hxx
    TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
   SOURCES
     src/RModel_Base.cxx
     src/RModel.cxx
+    src/RModelProfiler.cxx
     src/RModel_GNN.cxx
     src/RModel_GraphIndependent.cxx
     src/RFunction.cxx

@@ -32,7 +32,7 @@ public:
 
 class RFunction_Update: public RFunction {
 protected:
-    std::shared_ptr<RModel> function_block;
+    std::shared_ptr<RModel> fFunction_block;
     FunctionTarget fTarget;
     GraphType fGraphType;
     std::vector<std::string> fInputTensors;
@@ -50,9 +50,9 @@ public:
     void AddInputTensors(const std::vector<std::vector<std::size_t>>& inputShapes);
     void AddInputTensors(const std::vector<std::vector<Dim>>& inputShapes);
     std::shared_ptr<RModel> GetFunctionBlock() {
-        return function_block;
+        return fFunction_block;
     }
-    std::string GenerateModel(const std::string& filename, long read_pos = 0, long block_size = -1);
+    std::string GenerateModel(const std::string& filename, long read_pos = 0, long block_size = -1, bool verbose = false);
     std::string Generate(const std::vector<std::string>& inputPtrs);
     FunctionTarget GetFunctionTarget() {
         return fTarget;

@@ -11,16 +11,23 @@ namespace SOFIE {
 
 class RModel final : public RModel_Base {
 
+   friend class RModelProfiler;
+
 private:
    bool fIsInitialized = false;
    bool fIsSubGraph = false;
+   bool fProfile = false;
+
    int fVerbose = 0;
    int fBatchSize = -1;
    long fReadPos = 0;  // reading file position
+
    size_t fConstantTensorSize = 0; // size  (in Bytes) of the allocated constant tensors
    size_t fWeightsTensorSize = 0;  // size  (in Bytes) of the allocated weight tensors
    size_t fOtherTensorSize = 0;    // size  (in Bytes) of intermediate tensors which are not managed by the memory pool
 
+   std::string fProfilerGC = "";
+
    OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
 
    std::unordered_map<std::string, InputTensorInfo> fInputTensorInfos; // input tensors where shape may not fully defined or other graph inputs?
@@ -30,6 +37,7 @@ private:
    std::unordered_map<std::string, DynamicTensorInfo> fDynamicTensorInfos;
    std::unordered_map<std::string, std::pair<std::vector<Dim>, bool>> fShapeTensors; // constant tensors describing a shape
    std::unordered_map<std::string, std::string> fShapeParams; // parameters defining the dynamic shape (e.g. batch size), store also its default value
+   std::unordered_map<std::string, std::string> fAliasTensors;   // list of alias tensors
    std::vector<std::string> fDimShapeNames; // parameter names used to define the shapes
    std::vector<std::string> fOutputTensorNames;
    std::vector<std::string> fInputTensorNames; // input tensor names using ONNX order
@@ -82,6 +90,8 @@ public:
    void AddConstantTensor(std::string tensor_name, ETensorType type, std::vector<std::size_t> shape,
                              std::shared_ptr<void> data);
 
+   void AddAliasTensor(const std::string & tensor_name, const std::string & orig_tensor_name);
+
 
    template<class T>
    void AddConstantTensor(const std::string & name, const std::vector<size_t> & shape, const T * data) {
@@ -130,6 +140,8 @@ public:
    bool IsReadyInputTensor(const std::string &name) const;
    /// check if a tensor is a shape tensor
    bool IsShapeTensor(const std::string & name) const;
+   /// check if a tensor is a alias tensor
+   bool IsAliasTensor(const std::string & name) const;
 
    // Add intermediate tensor
    void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector<Dim> dim_shape);
@@ -152,7 +164,7 @@ public:
    void Initialize(int batchSize = -1, bool verbose = false);
    void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
 
-   void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
+    void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
    void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
    {
       Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
@@ -205,8 +217,8 @@ public:
    void ReadInitializedTensorsFromFile(long);
    long WriteInitializedTensorsToFile(std::string filename = "");
 
-   void PrintIntermediateTensors();
-   void PrintOutputTensors();
+   void PrintIntermediateTensors() const;
+   void PrintOutputTensors() const;
    void OutputGenerated(std::string filename = "", bool append = false);
    std::vector<std::string> GetOutputTensorNames() { return fOutputTensorNames; }
    void SetFilename(std::string filename) { fName = filename; }
@@ -224,9 +236,9 @@ public:
       }
    */
 
-   void PrintRequiredInputTensors();
-   void PrintInitializedTensors();
-   void PrintDynamicTensors();
+   void PrintRequiredInputTensors() const;
+   void PrintInitializedTensors() const;
+   void PrintDynamicTensors() const;
    void HeadInitializedTensors(std::string name, int n_print = 50);
 
    bool UseSession() const { return fUseSession; }

@@ -0,0 +1,42 @@
+#ifndef TMVA_SOFIE_RMODELPROFILER
+#define TMVA_SOFIE_RMODELPROFILER
+
+#include "TMVA/RModel.hxx"
+
+namespace TMVA {
+namespace Experimental {
+namespace SOFIE {
+
+/// \class RModelProfiler
+/// \brief A helper class to generate profiled inference code for an RModel.
+///
+/// This class instruments the generated C++ code to measure the execution
+/// time of each operator. It is invoked when the RModel::Generate is called
+/// with the Options::kProfile flag. 
+class RModelProfiler {
+private:
+   RModel &fModel;
+
+   void GenerateUtilityFunctions();
+
+public:
+   // The profiler must be constructed with a model to work on.
+   RModelProfiler() = delete;
+   RModelProfiler(RModel &model);
+   ~RModelProfiler() = default;
+
+   // There is no point in copying or moving an RModelProfiler
+   RModelProfiler(const RModelProfiler &other) = delete;
+   RModelProfiler(RModelProfiler &&other) = delete;
+   RModelProfiler &operator=(const RModelProfiler &other) = delete;
+   RModelProfiler &operator=(RModelProfiler &&other) = delete;
+
+   // Main function to generate the profiled code.
+   void Generate();
+};
+
+} // namespace SOFIE
+} // namespace Experimental
+} // namespace TMVA
+
+#endif // TMVA_SOFIE_RMODELPROFILER
@@ -26,6 +26,7 @@ enum class Options {
    kRootBinaryWeightFile = 0x4,
    kGNN = 0x8,
    kGNNComponent = 0x10,
+   kProfile = 0x20,
 };
 
 // Optimization levels inspired by ONNXRuntime.

@@ -37,6 +37,9 @@ public:
    //virtual void Forward_blas() = 0;
    virtual ~ROperator(){}
 
+   std::string name = "UnnamedOperator";
+   const std::string &GetOperatorName() { return name; };
+
 protected:
 
    const std::string SP = "   ";    ///< space used to correctly indent the generated C++ code

@@ -192,7 +192,7 @@ public:
                dataY[i] = BinaryOperatorTrait<T, Op>::Func(dataA[i], dataB[i]);
             }
             model.AddConstantTensor<T>(fNY, fShapeY, dataY.data());
-            // flag tensors to not be written in the weight file
+            // flag tensors to not be written in the generated code or weight file
             model.SetNotWritableInitializedTensor(nameA);
             model.SetNotWritableInitializedTensor(nameB);
             fIsOutputConstant = true;