[tmva][sofie] Run PyTorch ONNX export in a separate process in tutorial

guitargeek · guitargeek · commit ce08cbbe2dc5 · 2026-06-16T15:59:15.000+02:00
The tutorial exported a PyTorch model to ONNX and then parsed it with
SOFIE in the same process. `torch.onnx` and ROOT's SOFIE ONNX parser are
both linked against protobuf, but generally against different versions,
so loading them into one process causes a symbol clash and aborts.

Move the model creation, training and ONNX export into a small standalone
script that runs in its own Python process via subprocess, before ROOT is
imported, so the two protobuf copies are never loaded together.

The parent detects success by the presence of the generated .onnx file
and raises a RuntimeError if it is missing.
diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.py b/tutorials/machine_learning/TMVA_SOFIE_ONNX.py
@@ -7,28 +7,42 @@
 ##  - compiling the model using ROOT Cling
 ##  - run the code and optionally compare with ONNXRuntime
 ##
+## The PyTorch export and ROOT's SOFIE parser are both linked against protobuf,
+## but usually against different versions, so loading them in the same process
+## leads to a symbol clash. We therefore run the PyTorch -> ONNX export in a
+## separate Python process and only import ROOT afterwards.
 ##
 ## \macro_code
 ## \macro_output
 ## \author Lorenzo Moneta
 
+import os
+import sys
+import subprocess
 
-import contextlib
+import numpy as np
+import ROOT
+
+
+# The PyTorch export, as a small standalone script run in its own process.
+# It takes the model name as its only argument and writes <modelName>.onnx.
+EXPORT_SCRIPT = r"""
+import sys
 import inspect
 import warnings
+import contextlib
 
-import numpy as np
-import ROOT
 import torch
 import torch.nn as nn
 
+modelName = sys.argv[1]
+
 
 @contextlib.contextmanager
 def expect_warning(category, message):
-    """Silence a known third-party warning and raise if it stops firing.
+    # Silence a known third-party warning and raise if it stops firing.
 
-    Notifies us to drop the workaround once the upstream library is fixed.
-    """
+    # Notifies us to drop the workaround once the upstream library is fixed.
     with warnings.catch_warnings(record=True) as caught:
         warnings.simplefilter("always")
         yield
@@ -97,8 +111,11 @@ def filtered_kwargs(func, **candidate_kwargs):
         return modelFile
     except TypeError:
         print("Cannot export model from pytorch to ONNX - with version ", torch.__version__)
-        print("Skip tutorial execution")
-        exit()
+        # leave no .onnx behind: which the parent process treats as a RuntimeError
+        sys.exit()
+
+CreateAndTrainModel(modelName)
+"""
 
 
 def ParseModel(modelFile, verbose=False):
@@ -127,12 +144,17 @@ def ParseModel(modelFile, verbose=False):
 
 
 ###################################################################
-## Step 1 : Create and Train model
+## Step 1 : Create and train the model, export it to ONNX
+##          (done in a separate process to avoid the protobuf clash)
 ###################################################################
 
 # use an arbitrary modelName
 modelName = "LinearModel"
-modelFile = CreateAndTrainModel(modelName)
+modelFile = modelName + ".onnx"
+
+subprocess.run([sys.executable, "-c", EXPORT_SCRIPT, modelName])
+if not os.path.exists(modelFile):
+    raise RuntimeError("ONNX model could not be exported")
 
 
 ###################################################################