RooBernsteinFast obsolete (?)

The following script, run with the latest version of Combine compiled against ROOT master in the LCG nightlies, seems to suggest that `RooBernsteinFast` is not anymore faster than ROOT's `RooBernstein`.
```
import statistics
import timeit

import ROOT

ROOT.gROOT.SetBatch(True)
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)

# Configuration
DEGREES = [3, 4, 5]
N_DATA  = 100_000
REPEAT  = 7   # timeit repetitions

# Observable
x = ROOT.RooRealVar("x", "x", 0, 1)

# Shared coefficients c0..c8 (enough for degree 8).
# RooBernstein degree N     uses c0..cN  (c0 frozen to 1 to match Fast).
# RooBernsteinFast<N>       uses c1..cN  (c0 fixed to 1 internally).
_all_coeffs = [
    ROOT.RooRealVar(f"c{i}", f"c{i}", 1.0, 0.1, 10.0)
    for i in range(max(DEGREES) + 1)
]
# Freeze c0 = 1 once for all degrees, mirroring RooBernsteinFast's internal constraint
_all_coeffs[0].setVal(1.0)
_all_coeffs[0].setConstant(True)

# Uniform toy data
data = ROOT.RooDataSet("data", "data", ROOT.RooArgSet(x))
for _ in range(N_DATA):
    x.setVal(ROOT.gRandom.Rndm())
    data.add(ROOT.RooArgSet(x))


# Helpers

def make_arglist(coeffs):
    al = ROOT.RooArgList()
    for c in coeffs:
        al.add(c)
    return al


def report(label, results_s):
    mean_ms = statistics.mean(results_s) * 1000
    std_ms  = statistics.stdev(results_s) * 1000
    best_ms = min(results_s) * 1000
    print(f"  {label}: best={best_ms:.3f} ms  mean={mean_ms:.3f} ms  std={std_ms:.3f} ms")


# Main loop
print()
print(f"{'RooBernstein vs RooBernsteinFast':^72}")
print(f"  dataset: {N_DATA} events | timeit repeats: {REPEAT}")

for deg in DEGREES:
    print(f"\n{'='*72}")
    print(f"  Degree {deg}")
    print(f"    RooBernstein        : {deg} free coefficients  (c0 frozen to 1)")
    print(f"    RooBernsteinFast<{deg}>: {deg} free coefficients  (c0 fixed to 1 internally)")
    print(f"{'='*72}")

    coeffs_std  = _all_coeffs[:deg + 1]   # c0 .. cN
    coeffs_fast = _all_coeffs[1:deg + 1]  # c1 .. cN
    cl_std      = make_arglist(coeffs_std)
    cl_fast     = make_arglist(coeffs_fast)

    # Construction
    print(f"\n  Construction ({REPEAT} repetitions):")

    results = timeit.repeat(
        stmt=lambda: ROOT.RooBernstein("model", "model", x, cl_std),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernstein        deg={deg} construction", results)

    results = timeit.repeat(
        stmt=lambda: ROOT.RooBernsteinFast[deg]("model", "model", x, cl_fast),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernsteinFast<{deg}> deg={deg} construction", results)

    # Persistent instances for NLL / fit benchmarks
    model_std  = ROOT.RooBernstein(
        f"bstd{deg}", f"RooBernstein_deg{deg}", x, cl_std,
    )
    model_fast = ROOT.RooBernsteinFast[deg](
        f"bfst{deg}", f"RooBernsteinFast_deg{deg}", x, cl_fast,
    )

    # NLL evaluation
    nll_std  = model_std.createNLL(data)
    nll_fast = model_fast.createNLL(data)

    print(f"\n  NLL evaluation ({REPEAT} repetitions):")

    results = timeit.repeat(
        stmt=lambda: nll_std.getVal(),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernstein        deg={deg} NLL eval    ", results)

    results = timeit.repeat(
        stmt=lambda: nll_fast.getVal(),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernsteinFast<{deg}> deg={deg} NLL eval    ", results)

    # fit
    print(f"\n  fitTo ({REPEAT} repetitions):")

    results = timeit.repeat(
        stmt=lambda: model_std.fitTo(
            data,
            ROOT.RooFit.PrintLevel(-1),
            ROOT.RooFit.Verbose(False),
            ),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernstein        deg={deg} fitTo       ", results)

    results = timeit.repeat(
        stmt=lambda: model_fast.fitTo(
            data,
            ROOT.RooFit.PrintLevel(-1),
            ROOT.RooFit.Verbose(False),
            ),
        repeat=REPEAT, number=1,
    )
    report(f"RooBernsteinFast<{deg}> deg={deg} fitTo       ", results)

    # Fit comparison: run one fresh fit each and compare results
    print(f"\n  Fit result comparison:")

    r_std  = model_std.fitTo(
        data, ROOT.RooFit.Save(True), ROOT.RooFit.PrintLevel(-1), ROOT.RooFit.Verbose(False),
    )
    r_fast = model_fast.fitTo(
        data, ROOT.RooFit.Save(True), ROOT.RooFit.PrintLevel(-1), ROOT.RooFit.Verbose(False),
    )

    print(f"    {'Quantity':<30} {'RooBernstein':>16} {'RooBernsteinFast':>18}")
    print(f"    {'-'*64}")
    print(f"    {'NLL at minimum':<30} {r_std.minNll():>16.6f} {r_fast.minNll():>18.6f}")
    print(f"    {'EDM':<30} {r_std.edm():>16.2e} {r_fast.edm():>18.2e}")

    # c0 is frozen to 1 in both models; compare the free coefficients c1..cN.
    print(f"    {'Coefficients (c0=1 frozen in both):'}")
    print(f"      {'coeff':<8} {'RooBernstein':>14} {'RooBernsteinFast':>18} {'diff':>12}")
    for i, c_fast_coeff in enumerate(coeffs_fast):
        c_std_match = coeffs_std[i + 1]  # c_{i+1} in std list
        diff = abs(c_std_match.getVal() - c_fast_coeff.getVal())
        print(f"      c{i+1:<7} {c_std_match.getVal():>14.6f} {c_fast_coeff.getVal():>18.6f} {diff:>12.2e}")

print(f"\n{'='*72}\n")
```

with the following example output for degree 4:

```
========================================================================
  Degree 4
    RooBernstein        : 4 free coefficients  (c0 frozen to 1)
    RooBernsteinFast<4>: 4 free coefficients  (c0 fixed to 1 internally)
========================================================================

  Construction (7 repetitions):
  RooBernstein        deg=4 construction: best=0.005 ms  mean=0.012 ms  std=0.015 ms
  RooBernsteinFast<4> deg=4 construction: best=0.006 ms  mean=8.393 ms  std=22.154 ms

  NLL evaluation (7 repetitions):
  RooBernstein        deg=4 NLL eval    : best=0.001 ms  mean=0.207 ms  std=0.543 ms
  RooBernsteinFast<4> deg=4 NLL eval    : best=0.001 ms  mean=2.590 ms  std=6.848 ms

  fitTo (7 repetitions):
  RooBernstein        deg=4 fitTo       : best=101.244 ms  mean=110.333 ms  std=22.375 ms
  RooBernsteinFast<4> deg=4 fitTo       : best=1278.058 ms  mean=1327.556 ms  std=37.203 ms

  Fit result comparison:
    Quantity                           RooBernstein   RooBernsteinFast
    ----------------------------------------------------------------
    NLL at minimum                        -1.938296          -1.938296
    EDM                                    3.36e-07           2.98e-07
    Coefficients (c0=1 frozen in both):
      coeff      RooBernstein   RooBernsteinFast         diff
      c1             0.973396           0.973396     0.00e+00
      c2             1.078788           1.078788     0.00e+00
      c3             0.935047           0.935047     0.00e+00
      c4             1.011103           1.011103     0.00e+00
```

The script measures the time taken to build the likelihood, the evaluation time and the fitting time. If this is confirmed, we should proceed with making `RooBernsteinFast` obsolete via schema evolution (as it is done for `RooDoubleCBFast` https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/pull/1235), in order to benefit from ROOT's code generation for `RooBernstein`.

For context, this class is still vastly used in all the H->gamma gamma analyses (see for example [here](https://github.com/cms-analysis/flashggFinalFit/blob/f715b266dea7776d29353afab40fb7f815c3e7a0/Background/src/PdfModelBuilder.cc#L108-L131)). This means that taking care of this class is necessary in order to provide full codegen backend support.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

RooBernsteinFast obsolete (?) #1249

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

RooBernsteinFast obsolete (?) #1249

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions