diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras.py b/tutorials/machine_learning/TMVA_SOFIE_Keras.py index 083c768b3bede..a5af486430849 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras.py @@ -9,44 +9,77 @@ ### \author Sanjiban Sengupta and Lorenzo Moneta +import contextlib +import warnings +import numpy as np import ROOT +from tensorflow.keras.layers import Activation, Dense, Input, Softmax +from tensorflow.keras.models import Model # Enable ROOT in batch mode (same effect as -nodraw) ROOT.gROOT.SetBatch(True) + +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + # ----------------------------------------------------------------------------- # Step 1: Create and train a simple Keras model (via embedded Python) # ----------------------------------------------------------------------------- -import numpy as np -from tensorflow.keras.layers import Activation, Dense, Input, Softmax -from tensorflow.keras.models import Model - -input=Input(shape=(4,),batch_size=2) -x=Dense(32)(input) -x=Activation('relu')(x) -x=Dense(16,activation='relu')(x) -x=Dense(8,activation='relu')(x) -x=Dense(2)(x) -output=Softmax()(x) -model=Model(inputs=input,outputs=output) +input = Input(shape=(4,), batch_size=2) +x = Dense(32)(input) +x = Activation("relu")(x) +x = Dense(16, activation="relu")(x) +x = Dense(8, activation="relu")(x) +x = Dense(2)(x) +output = Softmax()(x) +model = Model(inputs=input, outputs=output) -randomGenerator=np.random.RandomState(0) -x_train=randomGenerator.rand(4,4) -y_train=randomGenerator.rand(4,2) +randomGenerator = np.random.RandomState(0) +x_train = randomGenerator.rand(4, 4) +y_train = randomGenerator.rand(4, 2) -model.compile(loss='mse', optimizer='adam') +model.compile(loss="mse", optimizer="adam") model.fit(x_train, y_train, epochs=3, batch_size=2) -model.save('KerasModel.keras') + +# Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement +# the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a +# DeprecationWarning that we cannot fix from user code. +if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") +else: + ctx = contextlib.nullcontext() + +with ctx: + model.save("KerasModel.keras") + model.summary() # ----------------------------------------------------------------------------- # Step 2: Use TMVA::SOFIE to parse the ONNX model # ----------------------------------------------------------------------------- -import ROOT - # Parse the ONNX model model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse("KerasModel.keras") @@ -54,7 +87,7 @@ # Generate inference code model.Generate() model.OutputGenerated() -#print generated code +# print generated code print("\n**************************************************") print(" Generated code") print("**************************************************\n") @@ -69,14 +102,13 @@ # Step 3: Run inference # ----------------------------------------------------------------------------- -#instantiate SOFIE session class +# instantiate SOFIE session class session = ROOT.TMVA_SOFIE_KerasModel.Session() # Input tensor (same shape as training input) -x = np.array([[0.1, 0.2, 0.3, 0.4],[0.5, 0.6, 0.7, 0.8]], dtype=np.float32) +x = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]], dtype=np.float32) # Run inference y = session.infer(x) print("Inference output:", y) - diff --git a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py index 10fd3aa2792aa..bfda3972b976c 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Keras_HiggsModel.py @@ -8,6 +8,8 @@ ### \author Lorenzo Moneta +import contextlib +import warnings from os.path import exists import numpy as np @@ -16,82 +18,118 @@ from sklearn.model_selection import train_test_split -def CreateModel(nlayers = 4, nunits = 64): - input = layers.Input(shape=(7,)) - x = input - for i in range(1,nlayers) : - y = layers.Dense(nunits, activation='relu')(x) - x = y +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. - output = layers.Dense(1, activation='sigmoid')(x) - model = models.Model(input, output) - model.compile(loss = 'binary_crossentropy', optimizer = 'adam', weighted_metrics = ['accuracy']) - model.summary() - return model + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) -def PrepareData() : - #get the input data - inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" - df1 = ROOT.RDataFrame("sig_tree", inputFile) - sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - #print(sigData) +def CreateModel(nlayers=4, nunits=64): + input = layers.Input(shape=(7,)) + x = input + for i in range(1, nlayers): + y = layers.Dense(nunits, activation="relu")(x) + x = y - # stack all the 7 numpy array in a single array (nevents x nvars) - xsig = np.column_stack(list(sigData.values())) - data_sig_size = xsig.shape[0] - print("size of data", data_sig_size) + output = layers.Dense(1, activation="sigmoid")(x) + model = models.Model(input, output) + model.compile(loss="binary_crossentropy", optimizer="adam", weighted_metrics=["accuracy"]) + model.summary() + return model - # make SOFIE inference on background data - df2 = ROOT.RDataFrame("bkg_tree", inputFile) - bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - xbkg = np.column_stack(list(bkgData.values())) - data_bkg_size = xbkg.shape[0] - ysig = np.ones(data_sig_size) - ybkg = np.zeros(data_bkg_size) - inputs_data = np.concatenate((xsig,xbkg),axis=0) - inputs_targets = np.concatenate((ysig,ybkg),axis=0) +def PrepareData(): + # get the input data + inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" - #split data in training and test data + df1 = ROOT.RDataFrame("sig_tree", inputFile) + sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + # print(sigData) - x_train, x_test, y_train, y_test = train_test_split( - inputs_data, inputs_targets, test_size=0.50, random_state=1234) + # stack all the 7 numpy array in a single array (nevents x nvars) + xsig = np.column_stack(list(sigData.values())) + data_sig_size = xsig.shape[0] + print("size of data", data_sig_size) - return x_train, y_train, x_test, y_test + # make SOFIE inference on background data + df2 = ROOT.RDataFrame("bkg_tree", inputFile) + bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + xbkg = np.column_stack(list(bkgData.values())) + data_bkg_size = xbkg.shape[0] -def TrainModel(model, x, y, name) : - model.fit(x,y,epochs=5,batch_size=50) - modelFile = name + '.keras' - model.save(modelFile) - return model, modelFile + ysig = np.ones(data_sig_size) + ybkg = np.zeros(data_bkg_size) + inputs_data = np.concatenate((xsig, xbkg), axis=0) + inputs_targets = np.concatenate((ysig, ybkg), axis=0) + # split data in training and test data -def GenerateCode(modelFile = "model.keras") : + x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234) - #check if the input file exists - if not exists(modelFile): - raise FileNotFoundError("INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model") + return x_train, y_train, x_test, y_test - #parse the input Keras model into RModel object (force batch size to be 1) - model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) +def TrainModel(model, x, y, name): + model.fit(x, y, epochs=5, batch_size=50) + modelFile = name + ".keras" - #Generating inference code - model.Generate() - model.OutputGenerated() + # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet implement + # the NumPy 2.0 ``__array__(copy=...)`` signature, so saving the model emits a + # DeprecationWarning that we cannot fix from user code. + if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") + else: + ctx = contextlib.nullcontext() + + with ctx: + model.save(modelFile) + + return model, modelFile + + +def GenerateCode(modelFile="model.keras"): + + # check if the input file exists + if not exists(modelFile): + raise FileNotFoundError( + "INput model file not existing. You need to run TMVA_Higgs_Classification.C to generate the Keras trained model" + ) + + # parse the input Keras model into RModel object (force batch size to be 1) + model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) + + # Generating inference code + model.Generate() + model.OutputGenerated() + + modelName = modelFile.replace(".keras", "") + return modelName - modelName = modelFile.replace(".keras","") - return modelName ################################################################### ## Step 1 : Create and Train model ################################################################### x_train, y_train, x_test, y_test = PrepareData() -#create dense model with 3 layers of 64 units -model = CreateModel(3,64) -model, modelFile = TrainModel(model,x_train, y_train, 'HiggsModel') +# create dense model with 3 layers of 64 units +model = CreateModel(3, 64) +model, modelFile = TrainModel(model, x_train, y_train, "HiggsModel") ################################################################### ## Step 2 : Parse model and generate inference code with SOFIE @@ -110,20 +148,17 @@ def GenerateCode(modelFile = "model.keras") : ## Step 4: Evaluate the model ################################################################### -#get first the SOFIE session namespace -sofie = getattr(ROOT, 'TMVA_SOFIE_' + modelName) +# get first the SOFIE session namespace +sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName) session = sofie.Session() -x = np.random.normal(0,1,7).astype(np.float32) +x = np.random.normal(0, 1, 7).astype(np.float32) y = session.infer(x) -ykeras = model(x.reshape(1,7)).numpy() +ykeras = model(x.reshape(1, 7)).numpy() -print("input to model is ",x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0]) +print("input to model is ", x, "\n\t -> output using SOFIE = ", y[0], " using Keras = ", ykeras[0]) -if (abs(y[0]-ykeras[0]) > 0.01) : - raise RuntimeError('ERROR: Result is different between SOFIE and Keras') +if abs(y[0] - ykeras[0]) > 0.01: + raise RuntimeError("ERROR: Result is different between SOFIE and Keras") print("OK") - - - diff --git a/tutorials/machine_learning/TMVA_SOFIE_Models.py b/tutorials/machine_learning/TMVA_SOFIE_Models.py index 1cef6275dd706..14114a5831db7 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_Models.py +++ b/tutorials/machine_learning/TMVA_SOFIE_Models.py @@ -13,65 +13,102 @@ ### \macro_output ### \author Lorenzo Moneta +import contextlib import os +import warnings import numpy as np import ROOT from sklearn.model_selection import train_test_split -from tensorflow.keras.layers import Dense +from tensorflow.keras.layers import Dense, Input from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam + +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning. Raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + ## generate and train Keras models with different architectures -def CreateModel(nlayers = 4, nunits = 64): - model = Sequential() - model.add(Dense(nunits, activation='relu',input_dim=7)) - for i in range(1,nlayers) : - model.add(Dense(nunits, activation='relu')) +def CreateModel(nlayers=4, nunits=64): + model = Sequential() + model.add(Input(shape=(7,))) + model.add(Dense(nunits, activation="relu")) + for i in range(1, nlayers): + model.add(Dense(nunits, activation="relu")) + + model.add(Dense(1, activation="sigmoid")) + model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"]) + model.summary() + return model + - model.add(Dense(1, activation='sigmoid')) - model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy']) - model.summary() - return model +def PrepareData(): + # get the input data + inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" -def PrepareData() : - #get the input data - inputFile = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root" + df1 = ROOT.RDataFrame("sig_tree", inputFile) + sigData = df1.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + # print(sigData) - df1 = ROOT.RDataFrame("sig_tree", inputFile) - sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - #print(sigData) + # stack all the 7 numpy array in a single array (nevents x nvars) + xsig = np.column_stack(list(sigData.values())) + data_sig_size = xsig.shape[0] + print("size of data", data_sig_size) - # stack all the 7 numpy array in a single array (nevents x nvars) - xsig = np.column_stack(list(sigData.values())) - data_sig_size = xsig.shape[0] - print("size of data", data_sig_size) + # make SOFIE inference on background data + df2 = ROOT.RDataFrame("bkg_tree", inputFile) + bkgData = df2.AsNumpy(columns=["m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb"]) + xbkg = np.column_stack(list(bkgData.values())) + data_bkg_size = xbkg.shape[0] - # make SOFIE inference on background data - df2 = ROOT.RDataFrame("bkg_tree", inputFile) - bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']) - xbkg = np.column_stack(list(bkgData.values())) - data_bkg_size = xbkg.shape[0] + ysig = np.ones(data_sig_size) + ybkg = np.zeros(data_bkg_size) + inputs_data = np.concatenate((xsig, xbkg), axis=0) + inputs_targets = np.concatenate((ysig, ybkg), axis=0) - ysig = np.ones(data_sig_size) - ybkg = np.zeros(data_bkg_size) - inputs_data = np.concatenate((xsig,xbkg),axis=0) - inputs_targets = np.concatenate((ysig,ybkg),axis=0) + # split data in training and test data - #split data in training and test data + x_train, x_test, y_train, y_test = train_test_split(inputs_data, inputs_targets, test_size=0.50, random_state=1234) - x_train, x_test, y_train, y_test = train_test_split( - inputs_data, inputs_targets, test_size=0.50, random_state=1234) + return x_train, y_train, x_test, y_test - return x_train, y_train, x_test, y_test -def TrainModel(model, x, y, name) : - model.fit(x,y,epochs=5,batch_size=50) - modelFile = name + '.keras' - model.save(modelFile) - return modelFile +def TrainModel(model, x, y, name): + model.fit(x, y, epochs=5, batch_size=50) + modelFile = name + ".keras" + # Keras' internal ``np.array(x)`` (TensorFlow backend) does not yet + # implement the NumPy 2.0 ``__array__(copy=...)`` signature, so saving + # emits a DeprecationWarning that we cannot fix from user code. + if tuple(int(p) for p in np.__version__.split(".")[:2]) >= (2, 0): + ctx = expect_warning(DeprecationWarning, "__array__ implementation doesn't accept a copy keyword") + else: + ctx = contextlib.nullcontext() + + with ctx: + model.save(modelFile) + return modelFile + ### run the models @@ -79,122 +116,129 @@ def TrainModel(model, x, y, name) : ## create models and train them -model1 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_50') -model2 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_4L_200') -model3 = TrainModel(CreateModel(4,64),x_train, y_train, 'Higgs_Model_2L_500') +model1 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_50") +model2 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_4L_200") +model3 = TrainModel(CreateModel(4, 64), x_train, y_train, "Higgs_Model_2L_500") -#evaluate with SOFIE the 3 trained models +# evaluate with SOFIE the 3 trained models def GenerateModelCode(modelFile, generatedHeaderFile): - model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) + model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile) - print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile) - #Generating inference code using a ROOT binary file - model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kRootBinaryWeightFile) - # add option to append to the same file the generated headers (pass True for append flag) - model.OutputGenerated(generatedHeaderFile, True) - #model.PrintGenerated() - return generatedHeaderFile + print("Generating inference code for the Keras model from ", modelFile, "in the header ", generatedHeaderFile) + # Generating inference code using a ROOT binary file + model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kRootBinaryWeightFile) + # add option to append to the same file the generated headers (pass True for append flag) + model.OutputGenerated(generatedHeaderFile, True) + # model.PrintGenerated() + return generatedHeaderFile generatedHeaderFile = "Higgs_Model.hxx" -#need to remove existing header file since we are appending on same one -if (os.path.exists(generatedHeaderFile)): - print("removing existing file", generatedHeaderFile) - os.remove(generatedHeaderFile) +# need to remove existing header file since we are appending on same one +if os.path.exists(generatedHeaderFile): + print("removing existing file", generatedHeaderFile) + os.remove(generatedHeaderFile) weightFile = "Higgs_Model.root" -if (os.path.exists(weightFile)): - print("removing existing file", weightFile) - os.remove(weightFile) +if os.path.exists(weightFile): + print("removing existing file", weightFile) + os.remove(weightFile) GenerateModelCode(model1, generatedHeaderFile) GenerateModelCode(model2, generatedHeaderFile) GenerateModelCode(model3, generatedHeaderFile) -#compile the generated code +# compile the generated code ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"') -#run the inference on the test data +# run the inference on the test data session1 = ROOT.TMVA_SOFIE_Higgs_Model_4L_50.Session("Higgs_Model.root") session2 = ROOT.TMVA_SOFIE_Higgs_Model_4L_200.Session("Higgs_Model.root") session3 = ROOT.TMVA_SOFIE_Higgs_Model_2L_500.Session("Higgs_Model.root") -hs1 = ROOT.TH1D("hs1","Signal result 4L 50",100,0,1) -hs2 = ROOT.TH1D("hs2","Signal result 4L 200",100,0,1) -hs3 = ROOT.TH1D("hs3","Signal result 2L 500",100,0,1) - -hb1 = ROOT.TH1D("hb1","Background result 4L 50",100,0,1) -hb2 = ROOT.TH1D("hb2","Background result 4L 200",100,0,1) -hb3 = ROOT.TH1D("hb3","Background result 2L 500",100,0,1) - -def EvalModel(session, x) : - result = session.infer(x) - return result[0] - -for i in range(0,x_test.shape[0]): - result1 = EvalModel(session1, x_test[i,:]) - result2 = EvalModel(session2, x_test[i,:]) - result3 = EvalModel(session3, x_test[i,:]) - if (y_test[i] == 1) : - hs1.Fill(result1) - hs2.Fill(result2) - hs3.Fill(result3) - else: - hb1.Fill(result1) - hb2.Fill(result2) - hb3.Fill(result3) - -def PlotHistos(hs,hb): - hs.SetLineColor("kRed") - hb.SetLineColor("kBlue") - hs.Draw() - hb.Draw("same") +hs1 = ROOT.TH1D("hs1", "Signal result 4L 50", 100, 0, 1) +hs2 = ROOT.TH1D("hs2", "Signal result 4L 200", 100, 0, 1) +hs3 = ROOT.TH1D("hs3", "Signal result 2L 500", 100, 0, 1) + +hb1 = ROOT.TH1D("hb1", "Background result 4L 50", 100, 0, 1) +hb2 = ROOT.TH1D("hb2", "Background result 4L 200", 100, 0, 1) +hb3 = ROOT.TH1D("hb3", "Background result 2L 500", 100, 0, 1) + + +def EvalModel(session, x): + result = session.infer(x) + return result[0] + + +for i in range(0, x_test.shape[0]): + result1 = EvalModel(session1, x_test[i, :]) + result2 = EvalModel(session2, x_test[i, :]) + result3 = EvalModel(session3, x_test[i, :]) + if y_test[i] == 1: + hs1.Fill(result1) + hs2.Fill(result2) + hs3.Fill(result3) + else: + hb1.Fill(result1) + hb2.Fill(result2) + hb3.Fill(result3) + + +def PlotHistos(hs, hb): + hs.SetLineColor("kRed") + hb.SetLineColor("kBlue") + hs.Draw() + hb.Draw("same") + c1 = ROOT.TCanvas() -c1.Divide(1,3) +c1.Divide(1, 3) c1.cd(1) -PlotHistos(hs1,hb1) +PlotHistos(hs1, hb1) c1.cd(2) -PlotHistos(hs2,hb2) +PlotHistos(hs2, hb2) c1.cd(3) -PlotHistos(hs3,hb3) +PlotHistos(hs3, hb3) c1.Draw() ## draw also ROC curves -def GetContent(h) : - n = h.GetNbinsX() - x = ROOT.std.vector['float'](n) - w = ROOT.std.vector['float'](n) - for i in range(0,n): - x[i] = h.GetBinCenter(i+1) - w[i] = h.GetBinContent(i+1) - return x,w - -def MakeROCCurve(hs, hb) : - xs,ws = GetContent(hs) - xb,wb = GetContent(hb) - roc = ROOT.TMVA.ROCCurve(xs,xb,ws,wb) - print("ROC integral for ",hs.GetName(), roc.GetROCIntegral()) - curve = roc.GetROCCurve() - curve.SetName(hs.GetName()) - return roc,curve + +def GetContent(h): + n = h.GetNbinsX() + x = ROOT.std.vector["float"](n) + w = ROOT.std.vector["float"](n) + for i in range(0, n): + x[i] = h.GetBinCenter(i + 1) + w[i] = h.GetBinContent(i + 1) + return x, w + + +def MakeROCCurve(hs, hb): + xs, ws = GetContent(hs) + xb, wb = GetContent(hb) + roc = ROOT.TMVA.ROCCurve(xs, xb, ws, wb) + print("ROC integral for ", hs.GetName(), roc.GetROCIntegral()) + curve = roc.GetROCCurve() + curve.SetName(hs.GetName()) + return roc, curve + c2 = ROOT.TCanvas() -r1,curve1 = MakeROCCurve(hs1,hb1) +r1, curve1 = MakeROCCurve(hs1, hb1) curve1.SetLineColor("kRed") curve1.Draw("AC") -r2,curve2 = MakeROCCurve(hs2,hb2) +r2, curve2 = MakeROCCurve(hs2, hb2) curve2.SetLineColor("kBlue") curve2.Draw("C") -r3,curve3 = MakeROCCurve(hs3,hb3) +r3, curve3 = MakeROCCurve(hs3, hb3) curve3.SetLineColor("kGreen") curve3.Draw("C") diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py index c323ccd59d2e1..0116a2684f5f0 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py @@ -13,7 +13,9 @@ ## \author Lorenzo Moneta +import contextlib import inspect +import warnings import numpy as np import ROOT @@ -21,97 +23,114 @@ import torch.nn as nn +@contextlib.contextmanager +def expect_warning(category, message): + """Silence a known third-party warning and raise if it stops firing. + + Notifies us to drop the workaround once the upstream library is fixed. + """ + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + yield + seen = False + for w in caught: + if issubclass(w.category, category) and message in str(w.message): + seen = True + else: + warnings.warn_explicit(w.message, w.category, w.filename, w.lineno) + if not seen: + raise RuntimeError( + f"Expected {category.__name__} containing {message!r} was not " + "emitted. This tutorial's workaround can probably be removed." + ) + + def CreateAndTrainModel(modelName): - model = nn.Sequential( - nn.Linear(32,16), - nn.ReLU(), - nn.Linear(16,8), - nn.ReLU(), - nn.Linear(8,2), - nn.Softmax(dim=1) - ) - - criterion = nn.MSELoss() - optimizer = torch.optim.SGD(model.parameters(),lr=0.01) - - - #train model with the random data - for i in range(500): - x=torch.randn(2,32) - y=torch.randn(2,2) - y_pred = model(x) - loss = criterion(y_pred,y) - optimizer.zero_grad() - loss.backward() - optimizer.step() - - #******************************************************* - ## EXPORT to ONNX - # - # need to evaluate the model before exporting to ONNX - # and to provide a dummy input tensor to set the input model shape - model.eval() - - modelFile = modelName + ".onnx" - dummy_x = torch.randn(1,32) - model(dummy_x) - - #check for torch.onnx.export parameters - def filtered_kwargs(func, **candidate_kwargs): - sig = inspect.signature(func) - return { - k: v for k, v in candidate_kwargs.items() - if k in sig.parameters - } - kwargs = filtered_kwargs( - torch.onnx.export, - input_names=["input"], - output_names=["output"], - external_data=False, # may not exist - dynamo=True # may not exist - ) - print("calling torch.onnx.export with parameters",kwargs) - - try: - torch.onnx.export(model, dummy_x, modelFile, **kwargs) - print("model exported to ONNX as",modelFile) - return modelFile - except TypeError: - print("Cannot export model from pytorch to ONNX - with version ",torch.__version__) - print("Skip tutorial execution") - exit() + model = nn.Sequential(nn.Linear(32, 16), nn.ReLU(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 2), nn.Softmax(dim=1)) + + criterion = nn.MSELoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + # train model with the random data + for i in range(500): + x = torch.randn(2, 32) + y = torch.randn(2, 2) + y_pred = model(x) + loss = criterion(y_pred, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # ******************************************************* + ## EXPORT to ONNX + # + # need to evaluate the model before exporting to ONNX + # and to provide a dummy input tensor to set the input model shape + model.eval() + + modelFile = modelName + ".onnx" + dummy_x = torch.randn(1, 32) + model(dummy_x) + + # check for torch.onnx.export parameters + def filtered_kwargs(func, **candidate_kwargs): + sig = inspect.signature(func) + return {k: v for k, v in candidate_kwargs.items() if k in sig.parameters} + + kwargs = filtered_kwargs( + torch.onnx.export, + input_names=["input"], + output_names=["output"], + external_data=False, # may not exist + dynamo=True, # may not exist + ) + print("calling torch.onnx.export with parameters", kwargs) + + try: + # torch.onnx.export (dynamo path) pickles its export program through + # copyreg, which still references the deprecated LeafSpec. The warning + # is emitted from inside PyTorch and cannot be avoided from user code. + with expect_warning(FutureWarning, "isinstance(treespec, LeafSpec)"): + torch.onnx.export(model, dummy_x, modelFile, **kwargs) + print("model exported to ONNX as", modelFile) + return modelFile + except TypeError: + print("Cannot export model from pytorch to ONNX - with version ", torch.__version__) + print("Skip tutorial execution") + exit() def ParseModel(modelFile, verbose=False): - parser = ROOT.TMVA.Experimental.SOFIE.RModelParser_ONNX() - model = parser.Parse(modelFile,verbose) - # - #print model weights - if (verbose): - model.PrintInitializedTensors() - data = model.GetTensorData['float']('0weight') - print("0weight",data) - data = model.GetTensorData['float']('2weight') - print("2weight",data) - - # Generating inference code - model.Generate() - #generate header file (and .dat file) with modelName+.hxx - model.OutputGenerated() - if (verbose) : - model.PrintGenerated() - - modelCode = modelFile.replace(".onnx",".hxx") - print("Generated model header file ",modelCode) - return modelCode + parser = ROOT.TMVA.Experimental.SOFIE.RModelParser_ONNX() + model = parser.Parse(modelFile, verbose) + # + # print model weights + if verbose: + model.PrintInitializedTensors() + data = model.GetTensorData["float"]("0weight") + print("0weight", data) + data = model.GetTensorData["float"]("2weight") + print("2weight", data) + + # Generating inference code + model.Generate() + # generate header file (and .dat file) with modelName+.hxx + model.OutputGenerated() + if verbose: + model.PrintGenerated() + + modelCode = modelFile.replace(".onnx", ".hxx") + print("Generated model header file ", modelCode) + return modelCode + ################################################################### ## Step 1 : Create and Train model ################################################################### -#use an arbitrary modelName +# use an arbitrary modelName modelName = "LinearModel" modelFile = CreateAndTrainModel(modelName) @@ -132,36 +151,37 @@ def ParseModel(modelFile, verbose=False): ## Step 4: Evaluate the model ################################################################### -#get first the SOFIE session namespace -sofie = getattr(ROOT, 'TMVA_SOFIE_' + modelName) +# get first the SOFIE session namespace +sofie = getattr(ROOT, "TMVA_SOFIE_" + modelName) session = sofie.Session() -x = np.random.normal(0,1,(1,32)).astype(np.float32) +x = np.random.normal(0, 1, (1, 32)).astype(np.float32) print("\n************************************************************") print("Running inference with SOFIE ") -print("\ninput to model is ",x) +print("\ninput to model is ", x) y = session.infer(x) # output shape is (1,2) y_sofie = np.asarray(y.data()) print("-> output using SOFIE = ", y_sofie) -#check inference with onnx +# check inference with onnx try: - import onnxruntime as ort + import onnxruntime as ort + # Load model - print("Running inference with ONNXRuntime ") - ort_session = ort.InferenceSession(modelFile) + print("Running inference with ONNXRuntime ") + ort_session = ort.InferenceSession(modelFile) - # Run inference - outputs = ort_session.run(None, {"input": x}) - y_ort = outputs[0] - print("-> output using ORT =", y_ort) + # Run inference + outputs = ort_session.run(None, {"input": x}) + y_ort = outputs[0] + print("-> output using ORT =", y_ort) - testFailed = abs(y_sofie-y_ort) > 0.01 - if (np.any(testFailed)): - raiseError('Result is different between SOFIE and ONNXRT') - else : - print("OK") + testFailed = abs(y_sofie - y_ort) > 0.01 + if np.any(testFailed): + raise RuntimeError("Result is different between SOFIE and ONNXRT") + else: + print("OK") except ImportError: - print("Missing ONNXRuntime: skipping comparison test") + print("Missing ONNXRuntime: skipping comparison test")