From 25189fba48d4539ed3b729c43d991b1ba140ce6f Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Sun, 5 Apr 2026 23:49:46 +0200 Subject: [PATCH 1/3] Format TMVA SOFIE tutorial code User-facing code like tutorials should be nicely formatted, in particular avoiding non-standard indentations with three spaces that can annoy users when copy-pasting code. --- .../machine_learning/TMVA_SOFIE_Keras.py | 34 ++- .../TMVA_SOFIE_Keras_HiggsModel.py | 126 ++++----- .../machine_learning/TMVA_SOFIE_Models.py | 253 +++++++++--------- tutorials/machine_learning/TMVA_SOFIE_ONNX.py | 188 +++++++------ 4 files changed, 300 insertions(+), 301 deletions(-) diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras.py b/tutorials/machine_learning/TMVA_SOFIE_Keras.py index 083c768b3bede..09ef6ffb6cac7 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras.py @@ -9,7 +9,6 @@ ### \author Sanjiban Sengupta and Lorenzo Moneta - import ROOT # Enable ROOT in batch mode (same effect as -nodraw) @@ -23,22 +22,22 @@ from tensorflow.keras.layers import Activation, Dense, Input, Softmax from tensorflow.keras.models import Model -input=Input(shape=(4,),batch_size=2) -x=Dense(32)(input) -x=Activation('relu')(x) -x=Dense(16,activation='relu')(x) -x=Dense(8,activation='relu')(x) -x=Dense(2)(x) -output=Softmax()(x) -model=Model(inputs=input,outputs=output) +input = Input(shape=(4,), batch_size=2) +x = Dense(32)(input) +x = Activation("relu")(x) +x = Dense(16, activation="relu")(x) +x = Dense(8, activation="relu")(x) +x = Dense(2)(x) +output = Softmax()(x) +model = Model(inputs=input, outputs=output) -randomGenerator=np.random.RandomState(0) -x_train=randomGenerator.rand(4,4) -y_train=randomGenerator.rand(4,2) +randomGenerator = np.random.RandomState(0) +x_train = randomGenerator.rand(4, 4) +y_train = randomGenerator.rand(4, 2) -model.compile(loss='mse', optimizer='adam') +model.compile(loss="mse", optimizer="adam") model.fit(x_train, y_train, epochs=3, batch_size=2) -model.save('KerasModel.keras') +model.save("KerasModel.keras") model.summary() # ----------------------------------------------------------------------------- @@ -54,7 +53,7 @@ # Generate inference code model.Generate() model.OutputGenerated() -#print generated code +# print generated code print("\n**************************************************") print(" Generated code") print("**************************************************\n") @@ -69,14 +68,13 @@ # Step 3: Run inference # ----------------------------------------------------------------------------- -#instantiate SOFIE session class +# instantiate SOFIE session class session = ROOT.TMVA_SOFIE_KerasModel.Session() # Input tensor (same shape as training input) -x = np.array([[0.1, 0.2, 0.3, 0.4],[0.5, 0.6, 0.7, 0.8]], dtype=np.float32) +x = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]], dtype=np.float32) # Run inference y = session.infer(x) print("Inference output:", y) - diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py index 10fd3aa2792aa..38b7ffdefae9e 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py @@ -16,82 +16,85 @@ from sklearn.model_selection import train_test_split -def CreateModel(nlayers = 4, nunits = 64): - input = layers.Input(shape=(7,)) - x = input - for i in range(1,nlayers) : - y = layers.Dense(nunits, activation='relu')(x) - x = y +def CreateModel(nlayers=4, nunits=64): + input = layers.Input(shape=(7,)) + x = input + for i in range(1, nlayers): + y = layers.Dense(nunits, activation="relu")(x) + x = y - output = layers.Dense(1, activation='sigmoid')(x) - model = models.Model(input, output) - model.compile(loss = 'binary_crossentropy', optimizer = 'adam', weighted_metrics = ['accuracy']) - model.summary() - return model + output = layers.Dense(1, activation="sigmoid")(x) + model = models.Model(input, output) + model.compile(loss="binary_crossentropy", optimizer="adam", weighted_metrics=["accuracy"]) + model.summary() + return model -def PrepareData() : - #get the input data - inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" - df1 = ROOT.RDataFrame("sig_tree", inputFile) - sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - #print(sigData) +def PrepareData(): + # get the input data + inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" - # stack all the 7 numpy array in a single array (nevents x nvars) - xsig = np.column_stack(list(sigData.values())) - data_sig_size = xsig.shape[0] - print("size of data", data_sig_size) + df1 = ROOT.RDataFrame("sig_tree", inputFile) + sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + # print(sigData) - # make SOFIE inference on background data - df2 = ROOT.RDataFrame("bkg_tree", inputFile) - bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - xbkg = np.column_stack(list(bkgData.values())) - data_bkg_size = xbkg.shape[0] + # stack all the 7 numpy array in a single array (nevents x nvars) + xsig = np.column_stack(list(sigData.values())) + data_sig_size = xsig.shape[0] + print("size of data", data_sig_size) - ysig = np.ones(data_sig_size) - ybkg = np.zeros(data_bkg_size) - inputs_data = np.concatenate((xsig,xbkg),axis=0) - inputs_targets = np.concatenate((ysig,ybkg),axis=0) + # make SOFIE inference on background data + df2 = ROOT.RDataFrame("bkg_tree", inputFile) + bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + xbkg = np.column_stack(list(bkgData.values())) + data_bkg_size = xbkg.shape[0] - #split data in training and test data + ysig = np.ones(data_sig_size) + ybkg = np.zeros(data_bkg_size) + inputs_data = np.concatenate((xsig, xbkg), axis=0) + inputs_targets = np.concatenate((ysig, ybkg), axis=0) - x_train, x_test, y_train, y_test = train_test_split( - inputs_data, inputs_targets, test_size=0.50, random_state=1234) + # split data in training and test data - return x_train, y_train, x_test, y_test + x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234) -def TrainModel(model, x, y, name) : - model.fit(x,y,epochs=5,batch_size=50) - modelFile = name + '.keras' - model.save(modelFile) - return model, modelFile + return x_train, y_train, x_test, y_test -def GenerateCode(modelFile = "model.keras") : +def TrainModel(model, x, y, name): + model.fit(x, y, epochs=5, batch_size=50) + modelFile = name + ".keras" + model.save(modelFile) + return model, modelFile - #check if the input file exists - if not exists(modelFile): - raise FileNotFoundError("INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model") +def GenerateCode(modelFile="model.keras"): - #parse the input Keras model into RModel object (force batch size to be 1) - model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) + # check if the input file exists + if not exists(modelFile): + raise FileNotFoundError( + "INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model" + ) - #Generating inference code - model.Generate() - model.OutputGenerated() + # parse the input Keras model into RModel object (force batch size to be 1) + model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) + + # Generating inference code + model.Generate() + model.OutputGenerated() + + modelName = modelFile.replace(".keras", "") + return modelName - modelName = modelFile.replace(".keras","") - return modelName ################################################################### ## Step 1 : Create and Train model ################################################################### x_train, y_train, x_test, y_test = PrepareData() -#create dense model with 3 layers of 64 units -model = CreateModel(3,64) -model, modelFile = TrainModel(model,x_train, y_train, 'HiggsModel') +# create dense model with 3 layers of 64 units +model = CreateModel(3, 64) +model, modelFile = TrainModel(model, x_train, y_train, "HiggsModel") ################################################################### ## Step 2 : Parse model and generate inference code with SOFIE @@ -110,20 +113,17 @@ def GenerateCode(modelFile = "model.keras") : ## Step 4: Evaluate the model ################################################################### -#get first the SOFIE session namespace -sofie = getattr(ROOT, 'TMVA_SOFIE_' + modelName) +# get first the SOFIE session namespace +sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName) session = sofie.Session() -x = np.random.normal(0,1,7).astype(np.float32) +x = np.random.normal(0, 1, 7).astype(np.float32) y = session.infer(x) -ykeras = model(x.reshape(1,7)).numpy() +ykeras = model(x.reshape(1, 7)).numpy() -print("input to model is ",x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0]) +print("input to model is ", x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0]) -if (abs(y[0]-ykeras[0]) > 0.01) : - raise RuntimeError('ERROR: Result is different between SOFIE and Keras') +if abs(y[0] - ykeras[0]) > 0.01: + raise RuntimeError("ERROR: Result is different between SOFIE and Keras") print("OK") - - - diff --git a/tutorials/machine_learning/TMVA_SOFIE_Models.py b/tutorials/machine_learning/TMVA_SOFIE_Models.py index 1cef6275dd706..45bd96baf56ba 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Models.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Models.py @@ -25,53 +25,55 @@ ## generate and train Keras models with different architectures -def CreateModel(nlayers = 4, nunits = 64): - model = Sequential() - model.add(Dense(nunits, activation='relu',input_dim=7)) - for i in range(1,nlayers) : - model.add(Dense(nunits, activation='relu')) - - model.add(Dense(1, activation='sigmoid')) - model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy']) - model.summary() - return model - -def PrepareData() : - #get the input data - inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" - - df1 = ROOT.RDataFrame("sig_tree", inputFile) - sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - #print(sigData) - - # stack all the 7 numpy array in a single array (nevents x nvars) - xsig = np.column_stack(list(sigData.values())) - data_sig_size = xsig.shape[0] - print("size of data", data_sig_size) - - # make SOFIE inference on background data - df2 = ROOT.RDataFrame("bkg_tree", inputFile) - bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - xbkg = np.column_stack(list(bkgData.values())) - data_bkg_size = xbkg.shape[0] - - ysig = np.ones(data_sig_size) - ybkg = np.zeros(data_bkg_size) - inputs_data = np.concatenate((xsig,xbkg),axis=0) - inputs_targets = np.concatenate((ysig,ybkg),axis=0) - - #split data in training and test data - - x_train, x_test, y_train, y_test = train_test_split( - inputs_data, inputs_targets, test_size=0.50, random_state=1234) - - return x_train, y_train, x_test, y_test - -def TrainModel(model, x, y, name) : - model.fit(x,y,epochs=5,batch_size=50) - modelFile = name + '.keras' - model.save(modelFile) - return modelFile +def CreateModel(nlayers=4, nunits=64): + model = Sequential() + model.add(Dense(nunits, activation="relu", input_dim=7)) + for i in range(1, nlayers): + model.add(Dense(nunits, activation="relu")) + + model.add(Dense(1, activation="sigmoid")) + model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"]) + model.summary() + return model + + +def PrepareData(): + # get the input data + inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" + + df1 = ROOT.RDataFrame("sig_tree", inputFile) + sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + # print(sigData) + + # stack all the 7 numpy array in a single array (nevents x nvars) + xsig = np.column_stack(list(sigData.values())) + data_sig_size = xsig.shape[0] + print("size of data", data_sig_size) + + # make SOFIE inference on background data + df2 = ROOT.RDataFrame("bkg_tree", inputFile) + bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + xbkg = np.column_stack(list(bkgData.values())) + data_bkg_size = xbkg.shape[0] + + ysig = np.ones(data_sig_size) + ybkg = np.zeros(data_bkg_size) + inputs_data = np.concatenate((xsig, xbkg), axis=0) + inputs_targets = np.concatenate((ysig, ybkg), axis=0) + + # split data in training and test data + + x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234) + + return x_train, y_train, x_test, y_test + + +def TrainModel(model, x, y, name): + model.fit(x, y, epochs=5, batch_size=50) + modelFile = name + ".keras" + model.save(modelFile) + return modelFile + ### run the models @@ -79,122 +81,129 @@ def TrainModel(model, x, y, name) : ## create models and train them -model1 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_50') -model2 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_200') -model3 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_2L_500') +model1 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_50") +model2 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_200") +model3 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_2L_500") -#evaluate with SOFIE the 3 trained models +# evaluate with SOFIE the 3 trained models def GenerateModelCode(modelFile, generatedHeaderFile): - model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) + model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) - print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile) - #Generating inference code using a ROOT binary file - model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kRootBinaryWeightFile) - # add option to append to the same file the generated headers (pass True for append flag) - model.OutputGenerated(generatedHeaderFile, True) - #model.PrintGenerated() - return generatedHeaderFile + print("Generating inference code for the Keras model from ", modelFile, "in the header ", generatedHeaderFile) + # Generating inference code using a ROOT binary file + model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kRootBinaryWeightFile) + # add option to append to the same file the generated headers (pass True for append flag) + model.OutputGenerated(generatedHeaderFile, True) + # model.PrintGenerated() + return generatedHeaderFile generatedHeaderFile = "Higgs_Model.hxx" -#need to remove existing header file since we are appending on same one -if (os.path.exists(generatedHeaderFile)): - print("removing existing file", generatedHeaderFile) - os.remove(generatedHeaderFile) +# need to remove existing header file since we are appending on same one +if os.path.exists(generatedHeaderFile): + print("removing existing file", generatedHeaderFile) + os.remove(generatedHeaderFile) weightFile = "Higgs_Model.root" -if (os.path.exists(weightFile)): - print("removing existing file", weightFile) - os.remove(weightFile) +if os.path.exists(weightFile): + print("removing existing file", weightFile) + os.remove(weightFile) GenerateModelCode(model1, generatedHeaderFile) GenerateModelCode(model2, generatedHeaderFile) GenerateModelCode(model3, generatedHeaderFile) -#compile the generated code +# compile the generated code ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"') -#run the inference on the test data +# run the inference on the test data session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root") session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root") session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root") -hs1 = ROOT.TH1D("hs1","Signal result 4L 50",100,0,1) -hs2 = ROOT.TH1D("hs2","Signal result 4L 200",100,0,1) -hs3 = ROOT.TH1D("hs3","Signal result 2L 500",100,0,1) - -hb1 = ROOT.TH1D("hb1","Background result 4L 50",100,0,1) -hb2 = ROOT.TH1D("hb2","Background result 4L 200",100,0,1) -hb3 = ROOT.TH1D("hb3","Background result 2L 500",100,0,1) - -def EvalModel(session, x) : - result = session.infer(x) - return result[0] - -for i in range(0,x_test.shape[0]): - result1 = EvalModel(session1, x_test[i,:]) - result2 = EvalModel(session2, x_test[i,:]) - result3 = EvalModel(session3, x_test[i,:]) - if (y_test[i] == 1) : - hs1.Fill(result1) - hs2.Fill(result2) - hs3.Fill(result3) - else: - hb1.Fill(result1) - hb2.Fill(result2) - hb3.Fill(result3) - -def PlotHistos(hs,hb): - hs.SetLineColor("kRed") - hb.SetLineColor("kBlue") - hs.Draw() - hb.Draw("same") +hs1 = ROOT.TH1D("hs1", "Signal result 4L 50", 100, 0, 1) +hs2 = ROOT.TH1D("hs2", "Signal result 4L 200", 100, 0, 1) +hs3 = ROOT.TH1D("hs3", "Signal result 2L 500", 100, 0, 1) + +hb1 = ROOT.TH1D("hb1", "Background result 4L 50", 100, 0, 1) +hb2 = ROOT.TH1D("hb2", "Background result 4L 200", 100, 0, 1) +hb3 = ROOT.TH1D("hb3", "Background result 2L 500", 100, 0, 1) + + +def EvalModel(session, x): + result = session.infer(x) + return result[0] + + +for i in range(0, x_test.shape[0]): + result1 = EvalModel(session1, x_test[i, :]) + result2 = EvalModel(session2, x_test[i, :]) + result3 = EvalModel(session3, x_test[i, :]) + if y_test[i] == 1: + hs1.Fill(result1) + hs2.Fill(result2) + hs3.Fill(result3) + else: + hb1.Fill(result1) + hb2.Fill(result2) + hb3.Fill(result3) + + +def PlotHistos(hs, hb): + hs.SetLineColor("kRed") + hb.SetLineColor("kBlue") + hs.Draw() + hb.Draw("same") + c1 = ROOT.TCanvas() -c1.Divide(1,3) +c1.Divide(1, 3) c1.cd(1) -PlotHistos(hs1,hb1) +PlotHistos(hs1, hb1) c1.cd(2) -PlotHistos(hs2,hb2) +PlotHistos(hs2, hb2) c1.cd(3) -PlotHistos(hs3,hb3) +PlotHistos(hs3, hb3) c1.Draw() ## draw also ROC curves -def GetContent(h) : - n = h.GetNbinsX() - x = ROOT.std.vector['float'](n) - w = ROOT.std.vector['float'](n) - for i in range(0,n): - x[i] = h.GetBinCenter(i+1) - w[i] = h.GetBinContent(i+1) - return x,w - -def MakeROCCurve(hs, hb) : - xs,ws = GetContent(hs) - xb,wb = GetContent(hb) - roc = ROOT.TMVA.ROCCurve(xs,xb,ws,wb) - print("ROC integral for ",hs.GetName(), roc.GetROCIntegral()) - curve = roc.GetROCCurve() - curve.SetName(hs.GetName()) - return roc,curve + +def GetContent(h): + n = h.GetNbinsX() + x = ROOT.std.vector["float"](n) + w = ROOT.std.vector["float"](n) + for i in range(0, n): + x[i] = h.GetBinCenter(i + 1) + w[i] = h.GetBinContent(i + 1) + return x, w + + +def MakeROCCurve(hs, hb): + xs, ws = GetContent(hs) + xb, wb = GetContent(hb) + roc = ROOT.TMVA.ROCCurve(xs, xb, ws, wb) + print("ROC integral for ", hs.GetName(), roc.GetROCIntegral()) + curve = roc.GetROCCurve() + curve.SetName(hs.GetName()) + return roc, curve + c2 = ROOT.TCanvas() -r1,curve1 = MakeROCCurve(hs1,hb1) +r1, curve1 = MakeROCCurve(hs1, hb1) curve1.SetLineColor("kRed") curve1.Draw("AC") -r2,curve2 = MakeROCCurve(hs2,hb2) +r2, curve2 = MakeROCCurve(hs2, hb2) curve2.SetLineColor("kBlue") curve2.Draw("C") -r3,curve3 = MakeROCCurve(hs3,hb3) +r3, curve3 = MakeROCCurve(hs3, hb3) curve3.SetLineColor("kGreen") curve3.Draw("C") diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py index c323ccd59d2e1..db67e42517f65 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py @@ -23,95 +23,86 @@ def CreateAndTrainModel(modelName): - model = nn.Sequential( - nn.Linear(32,16), - nn.ReLU(), - nn.Linear(16,8), - nn.ReLU(), - nn.Linear(8,2), - nn.Softmax(dim=1) - ) - - criterion = nn.MSELoss() - optimizer = torch.optim.SGD(model.parameters(),lr=0.01) - - - #train model with the random data - for i in range(500): - x=torch.randn(2,32) - y=torch.randn(2,2) - y_pred = model(x) - loss = criterion(y_pred,y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - - #******************************************************* - ## EXPORT to ONNX - # - # need to evaluate the model before exporting to ONNX - # and to provide a dummy input tensor to set the input model shape - model.eval() - - modelFile = modelName + ".onnx" - dummy_x = torch.randn(1,32) - model(dummy_x) - - #check for torch.onnx.export parameters - def filtered_kwargs(func, **candidate_kwargs): - sig = inspect.signature(func) - return { - k: v for k, v in candidate_kwargs.items() - if k in sig.parameters - } - kwargs = filtered_kwargs( - torch.onnx.export, - input_names=["input"], - output_names=["output"], - external_data=False, # may not exist - dynamo=True # may not exist - ) - print("calling torch.onnx.export with parameters",kwargs) - - try: - torch.onnx.export(model, dummy_x, modelFile, **kwargs) - print("model exported to ONNX as",modelFile) - return modelFile - except TypeError: - print("Cannot export model from pytorch to ONNX - with version ",torch.__version__) - print("Skip tutorial execution") - exit() + model = nn.Sequential(nn.Linear(32, 16), nn.ReLU(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 2), nn.Softmax(dim=1)) + + criterion = nn.MSELoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + # train model with the random data + for i in range(500): + x = torch.randn(2, 32) + y = torch.randn(2, 2) + y_pred = model(x) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # ******************************************************* + ## EXPORT to ONNX + # + # need to evaluate the model before exporting to ONNX + # and to provide a dummy input tensor to set the input model shape + model.eval() + + modelFile = modelName + ".onnx" + dummy_x = torch.randn(1, 32) + model(dummy_x) + + # check for torch.onnx.export parameters + def filtered_kwargs(func, **candidate_kwargs): + sig = inspect.signature(func) + return {k: v for k, v in candidate_kwargs.items() if k in sig.parameters} + + kwargs = filtered_kwargs( + torch.onnx.export, + input_names=["input"], + output_names=["output"], + external_data=False, # may not exist + dynamo=True, # may not exist + ) + print("calling torch.onnx.export with parameters", kwargs) + + try: + torch.onnx.export(model, dummy_x, modelFile, **kwargs) + print("model exported to ONNX as", modelFile) + return modelFile + except TypeError: + print("Cannot export model from pytorch to ONNX - with version ", torch.__version__) + print("Skip tutorial execution") + exit() def ParseModel(modelFile, verbose=False): - parser = ROOT.TMVA.Experimental.SOFIE.RModelParser_ONNX() - model = parser.Parse(modelFile,verbose) - # - #print model weights - if (verbose): - model.PrintInitializedTensors() - data = model.GetTensorData['float']('0weight') - print("0weight",data) - data = model.GetTensorData['float']('2weight') - print("2weight",data) - - # Generating inference code - model.Generate() - #generate header file (and .dat file) with modelName+.hxx - model.OutputGenerated() - if (verbose) : - model.PrintGenerated() - - modelCode = modelFile.replace(".onnx",".hxx") - print("Generated model header file ",modelCode) - return modelCode + parser = ROOT.TMVA.Experimental.SOFIE.RModelParser_ONNX() + model = parser.Parse(modelFile, verbose) + # + # print model weights + if verbose: + model.PrintInitializedTensors() + data = model.GetTensorData["float"]("0weight") + print("0weight", data) + data = model.GetTensorData["float"]("2weight") + print("2weight", data) + + # Generating inference code + model.Generate() + # generate header file (and .dat file) with modelName+.hxx + model.OutputGenerated() + if verbose: + model.PrintGenerated() + + modelCode = modelFile.replace(".onnx", ".hxx") + print("Generated model header file ", modelCode) + return modelCode + ################################################################### ## Step 1 : Create and Train model ################################################################### -#use an arbitrary modelName +# use an arbitrary modelName modelName = "LinearModel" modelFile = CreateAndTrainModel(modelName) @@ -132,36 +123,37 @@ def ParseModel(modelFile, verbose=False): ## Step 4: Evaluate the model ################################################################### -#get first the SOFIE session namespace -sofie = getattr(ROOT, 'TMVA_SOFIE_' + modelName) +# get first the SOFIE session namespace +sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName) session = sofie.Session() -x = np.random.normal(0,1,(1,32)).astype(np.float32) +x = np.random.normal(0, 1, (1, 32)).astype(np.float32) print("\n************************************************************") print("Running inference with SOFIE ") -print("\ninput to model is ",x) +print("\ninput to model is ", x) y = session.infer(x) # output shape is (1,2) y_sofie = np.asarray(y.data()) print("-> output using SOFIE = ", y_sofie) -#check inference with onnx +# check inference with onnx try: - import onnxruntime as ort + import onnxruntime as ort + # Load model - print("Running inference with ONNXRuntime ") - ort_session = ort.InferenceSession(modelFile) + print("Running inference with ONNXRuntime ") + ort_session = ort.InferenceSession(modelFile) - # Run inference - outputs = ort_session.run(None, {"input": x}) - y_ort = outputs[0] - print("-> output using ORT =", y_ort) + # Run inference + outputs = ort_session.run(None, {"input": x}) + y_ort = outputs[0] + print("-> output using ORT =", y_ort) - testFailed = abs(y_sofie-y_ort) > 0.01 - if (np.any(testFailed)): - raiseError('Result is different between SOFIE and ONNXRT') - else : - print("OK") + testFailed = abs(y_sofie - y_ort) > 0.01 + if np.any(testFailed): + raiseError("Result is different between SOFIE and ONNXRT") + else: + print("OK") except ImportError: - print("Missing ONNXRuntime: skipping comparison test") + print("Missing ONNXRuntime: skipping comparison test") From ac956e4bad79247380b75e5c61d22eb6486c0495 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Sun, 5 Apr 2026 23:45:30 +0200 Subject: [PATCH 2/3] Silence TMVA SOFIE tutorial warnings from third party libraries Keras and PyTorch still cause some warnings in a few places where they use deprecated features from their dependencies. We need a mechanism to silence these warnings to that our tests will run with warnings as errors, while also making sure we catch when the warnings don't happen anymore, so we can remove the boilerplate code for warnings silencing. --- .../machine_learning/TMVA_SOFIE_Keras.py | 39 +++++++++++++++++- .../TMVA_SOFIE_Keras_HiggsModel.py | 37 ++++++++++++++++- .../machine_learning/TMVA_SOFIE_Models.py | 41 +++++++++++++++++-- tutorials/machine_learning/TMVA_SOFIE_ONNX.py | 30 +++++++++++++- 4 files changed, 141 insertions(+), 6 deletions(-) diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras.py b/tutorials/machine_learning/TMVA_SOFIE_Keras.py index 09ef6ffb6cac7..6f107c6850f76 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras.py @@ -9,11 +9,37 @@ ### \author Sanjiban Sengupta and Lorenzo Moneta +import contextlib +import warnings + import ROOT # Enable ROOT in batch mode (same effect as -nodraw) ROOT.gROOT.SetBatch(True) + +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + # ----------------------------------------------------------------------------- # Step 1: Create and train a simple Keras model (via embedded Python) # ----------------------------------------------------------------------------- @@ -37,7 +63,18 @@ model.compile(loss="mse", optimizer="adam") model.fit(x_train, y_train, epochs=3, batch_size=2) -model.save("KerasModel.keras") + +# Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement +# the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a +# DeprecationWarning that we cannot fix from user code. +if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") +else: + ctx = contextlib.nullcontext() + +with ctx: + model.save("KerasModel.keras") + model.summary() # ----------------------------------------------------------------------------- diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py index 38b7ffdefae9e..bfda3972b976c 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py @@ -8,6 +8,8 @@ ### \author Lorenzo Moneta +import contextlib +import warnings from os.path import exists import numpy as np @@ -16,6 +18,28 @@ from sklearn.model_selection import train_test_split +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + def CreateModel(nlayers=4, nunits=64): input = layers.Input(shape=(7,)) x = input @@ -64,7 +88,18 @@ def PrepareData(): def TrainModel(model, x, y, name): model.fit(x, y, epochs=5, batch_size=50) modelFile = name + ".keras" - model.save(modelFile) + + # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement + # the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a + # DeprecationWarning that we cannot fix from user code. + if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") + else: + ctx = contextlib.nullcontext() + + with ctx: + model.save(modelFile) + return model, modelFile diff --git a/tutorials/machine_learning/TMVA_SOFIE_Models.py b/tutorials/machine_learning/TMVA_SOFIE_Models.py index 45bd96baf56ba..14114a5831db7 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Models.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Models.py @@ -13,21 +13,47 @@ ### \macro_output ### \author Lorenzo Moneta +import contextlib import os +import warnings import numpy as np import ROOT from sklearn.model_selection import train_test_split -from tensorflow.keras.layers import Dense +from tensorflow.keras.layers import Dense, Input from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam + +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning. Raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + ## generate and train Keras models with different architectures def CreateModel(nlayers=4, nunits=64): model = Sequential() - model.add(Dense(nunits, activation="relu", input_dim=7)) + model.add(Input(shape=(7,))) + model.add(Dense(nunits, activation="relu")) for i in range(1, nlayers): model.add(Dense(nunits, activation="relu")) @@ -71,7 +97,16 @@ def PrepareData(): def TrainModel(model, x, y, name): model.fit(x, y, epochs=5, batch_size=50) modelFile = name + ".keras" - model.save(modelFile) + # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet + # implement the NumPy 2.0 ``__array__(copy=...)`` signature, so saving + # emits a DeprecationWarning that we cannot fix from user code. + if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") + else: + ctx = contextlib.nullcontext() + + with ctx: + model.save(modelFile) return modelFile diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py index db67e42517f65..9321829496b8c 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py @@ -13,7 +13,9 @@ ## \author Lorenzo Moneta +import contextlib import inspect +import warnings import numpy as np import ROOT @@ -21,6 +23,28 @@ import torch.nn as nn +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + def CreateAndTrainModel(modelName): model = nn.Sequential(nn.Linear(32, 16), nn.ReLU(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 2), nn.Softmax(dim=1)) @@ -64,7 +88,11 @@ def filtered_kwargs(func, **candidate_kwargs): print("calling torch.onnx.export with parameters", kwargs) try: - torch.onnx.export(model, dummy_x, modelFile, **kwargs) + # torch.onnx.export (dynamo path) pickles its export program through + # copyreg, which still references the deprecated LeafSpec. The warning + # is emitted from inside PyTorch and cannot be avoided from user code. + with expect_warning(FutureWarning, "isinstance(treespec, LeafSpec)"): + torch.onnx.export(model, dummy_x, modelFile, **kwargs) print("model exported to ONNX as", modelFile) return modelFile except TypeError: From bccc598bdd8ef3c7ee929c22d184097aa2724c82 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Tue, 5 May 2026 11:37:34 +0200 Subject: [PATCH 3/3] Fix ruff warnings in SOFIE tutorials In particular, replace `raiseError` which is only available in pytest fixtures. --- tutorials/machine_learning/TMVA_SOFIE_Keras.py | 9 +++------ tutorials/machine_learning/TMVA_SOFIE_ONNX.py | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras.py b/tutorials/machine_learning/TMVA_SOFIE_Keras.py index 6f107c6850f76..a5af486430849 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras.py @@ -12,7 +12,10 @@ import contextlib import warnings +import numpy as np import ROOT +from tensorflow.keras.layers import Activation, Dense, Input, Softmax +from tensorflow.keras.models import Model # Enable ROOT in batch mode (same effect as -nodraw) ROOT.gROOT.SetBatch(True) @@ -44,10 +47,6 @@ def expect_warning(category, message): # Step 1: Create and train a simple Keras model (via embedded Python) # ----------------------------------------------------------------------------- -import numpy as np -from tensorflow.keras.layers import Activation, Dense, Input, Softmax -from tensorflow.keras.models import Model - input = Input(shape=(4,), batch_size=2) x = Dense(32)(input) x = Activation("relu")(x) @@ -81,8 +80,6 @@ def expect_warning(category, message): # Step 2: Use TMVA::SOFIE to parse the ONNX model # ----------------------------------------------------------------------------- -import ROOT - # Parse the ONNX model model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse("KerasModel.keras") diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py index 9321829496b8c..0116a2684f5f0 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py @@ -179,7 +179,7 @@ def ParseModel(modelFile, verbose=False): testFailed = abs(y_sofie - y_ort) > 0.01 if np.any(testFailed): - raiseError("Result is different between SOFIE and ONNXRT") + raise RuntimeError("Result is different between SOFIE and ONNXRT") else: print("OK")