GENIE-MC · kjplows · May 25, 2026
diff --git a/src/contrib/ccqe_spline_smoothing/construct_representative_spline.py b/src/contrib/ccqe_spline_smoothing/construct_representative_spline.py
@@ -0,0 +1,163 @@
+import argparse
+import numpy as np
+import pandas as pd
+import xml.etree.ElementTree as ET
+
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF, ConstantKernel
+
+from pathlib import Path
+
+# For conversion from number --> 1e-38 cm2
+# * CONV_CONST: number --> 1e-38cm2, / CONV_CONST: 1e-38cm2 --> number
+hbarc = 1.973269804e-16
+meter = 1.0 / hbarc
+m2    = meter*meter
+cm2   = 1.0e-4 * m2
+CONV_CONST = 1.0e+38 / cm2
+
+# A dictionary of models for CCQE.
+# Perhaps there is a simpler way of extracting these from GENIE...
+models = {
+    "AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes
+    "AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva",
+    "AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature",
+    "AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd",
+    "AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA",
+    "AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva",
+    "AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature",
+    "AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd",
+    "G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes
+    "G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes
+}
+
+# Custom formatter for XML tags to be "genie-like"
+def format_text(elem):
+    if elem.tag in ["E", "xsec"]:
+        raw = (elem.text or "").strip()
+        val  = float(raw)
+        #width = 9 if elem.tag == "E" else 22
+        #elem.text = f" {raw:<{width}} "
+        if elem.tag == "E":
+
+            elem.text = f" {val:10.5f} "
+        elif elem.tag == "xsec":
+            s    = f"{val:.10e}"
+            m, e = s.split("e")
+            ei   = int(e)
+            s    = f"{m}e{ei:+03d}"
+            elem.text = f" {s:<16} "
+    for child in elem:
+        format_text(child)
+
+def genie_indent(elem, level=0):
+    indent = "  "
+    i = "\n" + level * indent
+    child = "\n" + (level+1) * indent
+    children = list(elem)
+    if not children:
+        return
+
+    if elem.tag in ["knot"]:
+        elem.text = " "
+        for kid in children:
+            genie_indent(kid, level+1)
+            kid.tail = " "
+    else:
+        elem.text = child
+        for kid in children:
+            genie_indent(kid, level+1)
+        for kid in children[:-1]:
+            kid.tail = child
+        children[-1].tail = i
+
+def main(args):
+
+    df = pd.read_parquet(Path(args.input).resolve())
+    idx = df.index.to_numpy()
+
+    mean, std, err = np.zeros_like( idx ), np.zeros_like( idx ), np.zeros_like( idx )
+    for i, pt in enumerate(idx):
+        mean[i] = np.mean(df.loc[pt].to_numpy())
+        std [i] = np.std (df.loc[pt].to_numpy())
+    err = std / np.sqrt( float(df.iloc[0].to_numpy().shape[0]) )
+
+    zerodims = mean[mean == 0.0].shape[0]
+    idx  = idx [zerodims:]
+    mean = mean[zerodims:]
+    std  = std [zerodims:]
+    err  = err [zerodims:]
+
+    # The points gmkspl gives you are equidistant in log space, we'll take advantage of that
+    # Support is only in idx[:]
+    lidx  = np.log(idx)
+    ckernel = ConstantKernel(1.0) * RBF(length_scale = 0.5)
+    cgp = GaussianProcessRegressor(kernel=ckernel, alpha=err**2, normalize_y=True)
+    cgp.fit(lidx.reshape(-1, 1), mean)
+
+    resp, cov = cgp.predict(lidx.reshape(-1, 1), return_cov=True)
+    eresp = np.sqrt(np.diag(cov))
+
+    out_mean, out_err, out_resp, out_eresp = (np.zeros_like(df.index.to_numpy()) for _ in range(4))
+    out_mean [zerodims:] = np.clip(mean,  0.0, None)
+    out_err  [zerodims:] = np.clip(err,   0.0, None)
+    out_resp [zerodims:] = np.clip(resp,  0.0, None)
+    out_eresp[zerodims:] = np.clip(eresp, 0.0, None)
+
+    # Write the output to a parquet and an xml
+    df_out = pd.DataFrame(index = df.index)
+    df_out["Smoothed xsec [1e-38 cm2/A]"] = out_resp
+    df_out["Error on smoothed xsec"] = out_eresp
+    df_out[f"Mean xsec from {len(df.columns)} nucleons [1e-38 cm2/A]"] = out_mean
+    df_out["Error on mean xsec"] = out_err
+    df_out.to_parquet(Path(f"{args.output}.parquet").resolve())
+
+    # careful which tune was given...
+    model_choice = None
+    if "G18_01" in args.tune or "G18_02" in args.tune or \
+       ("G18_10" in args.tune and args.tune[6] in ['a', 'b', 'c', 'd']):
+        model_choice = models["G18_01a_00_000"]
+    elif "G18_10" in args.tune or "G24_20" in args.tune or args.tune == "AR23_20i_00_000":
+        model_choice = models["AR23_20i_00_000"]
+    elif "G21_11" in args.tune:
+        model_choice = models["G21_11a_00_000"]
+    else:
+        model_choice = models[args.tune]
+
+    with open(Path(f"{args.output}.xml").resolve(), 'wb') as fxout:
+        root = ET.Element("genie_xsec_spline_list")
+        tree = ET.ElementTree(root)
+        root.set("version", "3.00")
+        root.set("uselog", "1")
+        tune = ET.SubElement(root, "genie_tune")
+        tune.set("name", f"{args.tune}")
+        spl  = ET.SubElement(tune, "spline")
+        nucpdg = 2112 if int(args.nupdg) > 0 else 2212
+        spl.set("name", f"{model_choice}/nu:{args.nupdg};tgt:{args.tgtpdg};N:{nucpdg};proc:Weak[CC],QES;")
+        lknots = len(df.index)
+        spl.set("nknots", str(lknots))
+        for Enu, xs in zip(df.index, out_resp):
+            knot = ET.SubElement(spl, "knot")
+            Eknot = ET.SubElement(knot, "E")
+            Eknot.text = f"{Enu}"
+            Xknot = ET.SubElement(knot, "xsec")
+            Xknot.text = f"{xs / CONV_CONST}"
+        #ET.indent(tree)
+        format_text(root)
+        genie_indent(root)
+        tree.write(fxout, encoding="utf-8", xml_declaration=True)
+
+    print(f"Saved outputs: {args.output}.parquet, {args.output}.xml")
+
+if __name__ == "__main__":    
+    parser = argparse.ArgumentParser(
+        description = "From a single parquet file produced from the concatenation of many, many nucleon throws for CCQE splines, I will construct a single representative for that parquet and save it as a new parquet. I will also save a single XML file with that spline. Pass the input parquet and the tune used to run it, as well as the neutrino and target used."
+    )
+    parser.add_argument('-i', "--input", type=str, required=True, help="Input parquet.")
+    parser.add_argument('-o', "--output", type=str, required=True, help="Output basename (no extension).")
+    parser.add_argument('-t', "--tune", type=str, required=True, help="GENIE tune used.")
+    parser.add_argument('-p', "--nupdg", type=int, required=True, help="Neutrino PDG code.")
+    parser.add_argument('-n', "--tgtpdg", type=int, required=True, help="Target PDG code.")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/src/contrib/ccqe_spline_smoothing/example.cmd b/src/contrib/ccqe_spline_smoothing/example.cmd
@@ -0,0 +1 @@
+./run_splines_parquet_FNAL-SL7.sh -T /pnfs/sbnd/scratch/users/kplows/gen_genie_splines_v3/pull_request/LARGE-SCALE-TEST-04-AR25Nature -v v3_06_02_sbn3 -q e26:prof --tune AR25_20i_01_001 --genlist CCQE --config pdgs_for_splines.cfg -n 250 -e 100 -r 1000 --setup sbnd --rewrite
diff --git a/src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk b/src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk
@@ -0,0 +1,81 @@
+#! /usr/bin/gawk -f
+# Reduce the combinations of neutrino flavors and target isotopes in a
+# GENIE gxspl XML spline file.
+#    gawk -f reduce_gxspl_for_sbn.awk gxspl-big.xml > gxspl-small.xml
+#
+# This only limits what is allowed; if a combination isn't in the
+# input file it won't magically appear in the output
+#
+#
+BEGIN {
+  doout=1; keep=1;
+  nlines=0; maxlines=0; # set non-zero maxlines only for testing puroses
+  # whether to keep each species of neutrino
+  nukeep[12] = 1;   #
+  nukeep[-12] = 1;   #
+  nukeep[14] = 1;   #
+  nukeep[-14] = 1;   #
+  nukeep[16] = 0;   #
+  nukeep[-16] = 0;   #
+  # whether to keep particular isotopes
+  tgtkeep[1000180400] = 1;   # Ar40
+  tgtkeep[1000080160] = 1;   # O16
+  tgtkeep[1000060120] = 1;   # C12
+  tgtkeep[1000260560] = 1;   # Fe56
+  # whether to keep particular processes
+  prockeep["CC"] = 1;
+  prockeep["QES"] = 1;
+}
+# decide whether to keep or reject a sub-process x-section based on the
+# name string.  Note picking out "nu:XY" and "tgt:XYZ" is dependent on
+# the exact naming formulation ... hopefully this won't change.
+# example string:
+#    <spline name="genie::AhrensNCELPXSec/Default/nu:-14;tgt:1000020040;N:2212;proc:Weak[NC],QES;" nknots="500">
+#
+/<spline/ {
+  keep=0; doout=0;
+  # check if we want this set, if yes set both keep & doout = 1
+  split($0,array,";");
+  split(array[2],tgtarray,":");
+  tgtval=tgtarray[2];
+  split(array[1],nuarray,"/");
+  nuvaltmp=nuarray[3];
+  split(nuvaltmp,nuarray,":");
+  nuval=nuarray[2];
+  split($0,parray,"proc:");
+  split(parray[2],procarraytmp,",");
+  split(procarraytmp[2],procarray,";");
+  procval=procarray[1];
+  split(parray[2],pvtmp,"[");
+  split(pvtmp[2],ppp,"]")
+  proccurr=ppp[1];
+
+  if ( tgtval in tgtkeep && 0 != tgtkeep[tgtval] ) {
+    if ( nuval in nukeep && 0 != nukeep[nuval] ) {
+	if( procval in prockeep && 0 != prockeep[procval] && 0 != prockeep[proccurr] && procarray[2] !~ /charm/ && procarray[2] !~ /strange/ ) {
+	    #print "KEEP this proc ", procval;
+	    keep=1; doout=1;
+	} else {
+	    #print "reject this proc ", procval;
+	    keep=0; doout=0;
+	}
+    } else {
+      keep=0; doout=0;
+    }
+  } else {
+    keep=0; doout=0;
+  }
+}
+# close out a particular spline
+/<\/spline/ {
+  if ( doout == 1 ) { keep = 1 } else { keep = 0 }
+  doout=1;
+}
+# regular lines depend on the current state
+// {
+  if ( keep  == 1 ) print $0;
+  if ( doout == 1 ) keep = 1;
+  nlines++;
+  if ( maxlines > 0 && nlines > maxlines ) exit
+}
+# end-of-script reduce_gxspl.awk
diff --git a/src/contrib/ccqe_spline_smoothing/replace_splines.py b/src/contrib/ccqe_spline_smoothing/replace_splines.py
@@ -0,0 +1,100 @@
+import xml.etree.ElementTree as ET
+from pathlib import Path
+import argparse
+
+parser = argparse.ArgumentParser(description="Replaces keys in one GENIE spline file with those from another GENIE spline file. Only checks spline names.")
+parser.add_argument('-b', '--base', type=str, required=True, help="Base file to make changes to.")
+parser.add_argument('-r', '--revision', type=str, required=True, help="Revision file to take changes from.")
+parser.add_argument('-o', '--outpath', type=str, default="./output.xml", help="Path to output file.")
+args = parser.parse_args()
+
+base_tree = ET.parse(Path(args.base))
+rev_tree  = ET.parse(Path(args.revision))
+
+root_base = base_tree.getroot()
+root_rev  = rev_tree.getroot()
+
+# assert base and revision have same tune
+base_tune = root_base.findall("./genie_tune")[0].get('name')
+rev_tune = root_rev.findall("./genie_tune")[0].get('name')
+
+assert( base_tune == rev_tune )
+
+splines_base = {}
+splines_rev  = {}
+
+# A dictionary of models for CCQE.
+# Perhaps there is a simpler way of extracting these from GENIE...
+models = {
+    "AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes
+    "AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva",
+    "AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature",
+    "AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd",
+    "AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA",
+    "AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva",
+    "AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature",
+    "AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd",
+    "G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes
+    "G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes
+}
+
+'''
+# pick out the appropriate configuration
+desired_conf = None
+if "G18_01" in base_tune or "G18_02" in base_tune or \
+   ("G18_10" in base_tune and base_tune[6] in ['a', 'b', 'c', 'd']):
+    desired_conf = models["G18_01a_00_000"]
+elif "G18_10" in base_tune or "G24_20" in base_tune or base_tune == "AR23_20i_00_000":
+    desired_conf = models["AR23_20i_00_000"]
+elif "G21_11" in base_tune:
+    desired_conf = models["G21_11a_00_000"]
+else:
+    desired_conf = models[base_tune]
+'''
+
+# Base splines
+for spline in root_base.findall(".//spline"):
+    name = spline.get('name')
+    '''
+    if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name):
+        # Split configuration out
+        namebits = name.split('/')
+        name = f"{namebits[0]}/{desired_conf}"
+        for nb in namebits[2:]:
+            name = f"{name}/{nb}"
+        if(desired_conf == "CRPA-QEL"):
+            name = "genie::HybridXSecAlgorithm/CRPA-QEL"
+            for nb in namebits[2:]:
+                name = f"{name}/{nb}"
+    '''
+    splines_base[name] = spline
+
+# Revision splines
+for spline in root_rev.findall(".//spline"):
+    name = spline.get('name')
+    '''
+    if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name):
+        name = desired_conf
+    '''
+    splines_rev[name] = spline
+
+print("Found", len(splines_base), "splines in", args.base)
+print("Found", len(splines_rev), "splines in", args.revision)
+
+for name, rsp in splines_rev.items():
+    try:
+        bsp = splines_base[name]
+        # also replace the spline name
+        old_name = bsp.get("name")
+        bsp.attrib["name"] = name
+        old_knots = list(bsp.findall("knot"))
+        for knot in old_knots:
+            bsp.remove(knot)
+        # Copy knots
+        for knot in rsp:
+            bsp.append(knot)
+    except:
+        continue
+
+base_tree.write(Path(args.outpath), encoding="ISO-8859-1", xml_declaration=True)
+print("Saved output XML to", args.outpath)
diff --git a/src/contrib/ccqe_spline_smoothing/requirements.txt b/src/contrib/ccqe_spline_smoothing/requirements.txt
@@ -0,0 +1,26 @@
+awkward==2.9.0
+awkward_cpp==52
+colorama==0.4.6
+contourpy==1.3.3
+cramjam==2.11.0
+cycler==0.12.1
+fonttools==4.62.1
+fsspec==2026.3.0
+h5py==3.16.0
+joblib==1.5.3
+kiwisolver==1.5.0
+matplotlib==3.10.9
+numpy==2.4.4
+packaging==26.2
+pandas==3.0.2
+pillow==12.2.0
+pyarrow==24.0.0
+pyparsing==3.3.2
+python-dateutil==2.9.0.post0
+scikit-learn==1.8.0
+scipy==1.17.1
+six==1.17.0
+threadpoolctl==3.6.0
+tqdm==4.67.3
+uproot==5.7.4
+xxhash==3.7.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		./run_splines_parquet_FNAL-SL7.sh -T /pnfs/sbnd/scratch/users/kplows/gen_genie_splines_v3/pull_request/LARGE-SCALE-TEST-04-AR25Nature -v v3_06_02_sbn3 -q e26:prof --tune AR25_20i_01_001 --genlist CCQE --config pdgs_for_splines.cfg -n 250 -e 100 -r 1000 --setup sbnd --rewrite