The following script, run with the latest version of Combine compiled against ROOT master in the LCG nightlies, seems to suggest that RooBernsteinFast is not anymore faster than ROOT's RooBernstein.
import statistics
import timeit
import ROOT
ROOT.gROOT.SetBatch(True)
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)
# Configuration
DEGREES = [3, 4, 5]
N_DATA = 100_000
REPEAT = 7 # timeit repetitions
# Observable
x = ROOT.RooRealVar("x", "x", 0, 1)
# Shared coefficients c0..c8 (enough for degree 8).
# RooBernstein degree N uses c0..cN (c0 frozen to 1 to match Fast).
# RooBernsteinFast<N> uses c1..cN (c0 fixed to 1 internally).
_all_coeffs = [
ROOT.RooRealVar(f"c{i}", f"c{i}", 1.0, 0.1, 10.0)
for i in range(max(DEGREES) + 1)
]
# Freeze c0 = 1 once for all degrees, mirroring RooBernsteinFast's internal constraint
_all_coeffs[0].setVal(1.0)
_all_coeffs[0].setConstant(True)
# Uniform toy data
data = ROOT.RooDataSet("data", "data", ROOT.RooArgSet(x))
for _ in range(N_DATA):
x.setVal(ROOT.gRandom.Rndm())
data.add(ROOT.RooArgSet(x))
# Helpers
def make_arglist(coeffs):
al = ROOT.RooArgList()
for c in coeffs:
al.add(c)
return al
def report(label, results_s):
mean_ms = statistics.mean(results_s) * 1000
std_ms = statistics.stdev(results_s) * 1000
best_ms = min(results_s) * 1000
print(f" {label}: best={best_ms:.3f} ms mean={mean_ms:.3f} ms std={std_ms:.3f} ms")
# Main loop
print()
print(f"{'RooBernstein vs RooBernsteinFast':^72}")
print(f" dataset: {N_DATA} events | timeit repeats: {REPEAT}")
for deg in DEGREES:
print(f"\n{'='*72}")
print(f" Degree {deg}")
print(f" RooBernstein : {deg} free coefficients (c0 frozen to 1)")
print(f" RooBernsteinFast<{deg}>: {deg} free coefficients (c0 fixed to 1 internally)")
print(f"{'='*72}")
coeffs_std = _all_coeffs[:deg + 1] # c0 .. cN
coeffs_fast = _all_coeffs[1:deg + 1] # c1 .. cN
cl_std = make_arglist(coeffs_std)
cl_fast = make_arglist(coeffs_fast)
# Construction
print(f"\n Construction ({REPEAT} repetitions):")
results = timeit.repeat(
stmt=lambda: ROOT.RooBernstein("model", "model", x, cl_std),
repeat=REPEAT, number=1,
)
report(f"RooBernstein deg={deg} construction", results)
results = timeit.repeat(
stmt=lambda: ROOT.RooBernsteinFast[deg]("model", "model", x, cl_fast),
repeat=REPEAT, number=1,
)
report(f"RooBernsteinFast<{deg}> deg={deg} construction", results)
# Persistent instances for NLL / fit benchmarks
model_std = ROOT.RooBernstein(
f"bstd{deg}", f"RooBernstein_deg{deg}", x, cl_std,
)
model_fast = ROOT.RooBernsteinFast[deg](
f"bfst{deg}", f"RooBernsteinFast_deg{deg}", x, cl_fast,
)
# NLL evaluation
nll_std = model_std.createNLL(data)
nll_fast = model_fast.createNLL(data)
print(f"\n NLL evaluation ({REPEAT} repetitions):")
results = timeit.repeat(
stmt=lambda: nll_std.getVal(),
repeat=REPEAT, number=1,
)
report(f"RooBernstein deg={deg} NLL eval ", results)
results = timeit.repeat(
stmt=lambda: nll_fast.getVal(),
repeat=REPEAT, number=1,
)
report(f"RooBernsteinFast<{deg}> deg={deg} NLL eval ", results)
# fit
print(f"\n fitTo ({REPEAT} repetitions):")
results = timeit.repeat(
stmt=lambda: model_std.fitTo(
data,
ROOT.RooFit.PrintLevel(-1),
ROOT.RooFit.Verbose(False),
),
repeat=REPEAT, number=1,
)
report(f"RooBernstein deg={deg} fitTo ", results)
results = timeit.repeat(
stmt=lambda: model_fast.fitTo(
data,
ROOT.RooFit.PrintLevel(-1),
ROOT.RooFit.Verbose(False),
),
repeat=REPEAT, number=1,
)
report(f"RooBernsteinFast<{deg}> deg={deg} fitTo ", results)
# Fit comparison: run one fresh fit each and compare results
print(f"\n Fit result comparison:")
r_std = model_std.fitTo(
data, ROOT.RooFit.Save(True), ROOT.RooFit.PrintLevel(-1), ROOT.RooFit.Verbose(False),
)
r_fast = model_fast.fitTo(
data, ROOT.RooFit.Save(True), ROOT.RooFit.PrintLevel(-1), ROOT.RooFit.Verbose(False),
)
print(f" {'Quantity':<30} {'RooBernstein':>16} {'RooBernsteinFast':>18}")
print(f" {'-'*64}")
print(f" {'NLL at minimum':<30} {r_std.minNll():>16.6f} {r_fast.minNll():>18.6f}")
print(f" {'EDM':<30} {r_std.edm():>16.2e} {r_fast.edm():>18.2e}")
# c0 is frozen to 1 in both models; compare the free coefficients c1..cN.
print(f" {'Coefficients (c0=1 frozen in both):'}")
print(f" {'coeff':<8} {'RooBernstein':>14} {'RooBernsteinFast':>18} {'diff':>12}")
for i, c_fast_coeff in enumerate(coeffs_fast):
c_std_match = coeffs_std[i + 1] # c_{i+1} in std list
diff = abs(c_std_match.getVal() - c_fast_coeff.getVal())
print(f" c{i+1:<7} {c_std_match.getVal():>14.6f} {c_fast_coeff.getVal():>18.6f} {diff:>12.2e}")
print(f"\n{'='*72}\n")
========================================================================
Degree 4
RooBernstein : 4 free coefficients (c0 frozen to 1)
RooBernsteinFast<4>: 4 free coefficients (c0 fixed to 1 internally)
========================================================================
Construction (7 repetitions):
RooBernstein deg=4 construction: best=0.005 ms mean=0.012 ms std=0.015 ms
RooBernsteinFast<4> deg=4 construction: best=0.006 ms mean=8.393 ms std=22.154 ms
NLL evaluation (7 repetitions):
RooBernstein deg=4 NLL eval : best=0.001 ms mean=0.207 ms std=0.543 ms
RooBernsteinFast<4> deg=4 NLL eval : best=0.001 ms mean=2.590 ms std=6.848 ms
fitTo (7 repetitions):
RooBernstein deg=4 fitTo : best=101.244 ms mean=110.333 ms std=22.375 ms
RooBernsteinFast<4> deg=4 fitTo : best=1278.058 ms mean=1327.556 ms std=37.203 ms
Fit result comparison:
Quantity RooBernstein RooBernsteinFast
----------------------------------------------------------------
NLL at minimum -1.938296 -1.938296
EDM 3.36e-07 2.98e-07
Coefficients (c0=1 frozen in both):
coeff RooBernstein RooBernsteinFast diff
c1 0.973396 0.973396 0.00e+00
c2 1.078788 1.078788 0.00e+00
c3 0.935047 0.935047 0.00e+00
c4 1.011103 1.011103 0.00e+00
The script measures the time taken to build the likelihood, the evaluation time and the fitting time. If this is confirmed, we should proceed with making RooBernsteinFast obsolete via schema evolution (as it is done for RooDoubleCBFast #1235), in order to benefit from ROOT's code generation for RooBernstein.
For context, this class is still vastly used in all the H->gamma gamma analyses (see for example here). This means that taking care of this class is necessary in order to provide full codegen backend support.
The following script, run with the latest version of Combine compiled against ROOT master in the LCG nightlies, seems to suggest that
RooBernsteinFastis not anymore faster than ROOT'sRooBernstein.with the following example output for degree 4:
The script measures the time taken to build the likelihood, the evaluation time and the fitting time. If this is confirmed, we should proceed with making
RooBernsteinFastobsolete via schema evolution (as it is done forRooDoubleCBFast#1235), in order to benefit from ROOT's code generation forRooBernstein.For context, this class is still vastly used in all the H->gamma gamma analyses (see for example here). This means that taking care of this class is necessary in order to provide full codegen backend support.