Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions src/contrib/ccqe_spline_smoothing/construct_representative_spline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import argparse
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel

from pathlib import Path

# For conversion from number --> 1e-38 cm2
# * CONV_CONST: number --> 1e-38cm2, / CONV_CONST: 1e-38cm2 --> number
hbarc = 1.973269804e-16
meter = 1.0 / hbarc
m2 = meter*meter
cm2 = 1.0e-4 * m2
CONV_CONST = 1.0e+38 / cm2

# A dictionary of models for CCQE.
# Perhaps there is a simpler way of extracting these from GENIE...
models = {
"AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes
"AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva",
"AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature",
"AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd",
"AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA",
"AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva",
"AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature",
"AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd",
"G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes
"G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes
}

# Custom formatter for XML tags to be "genie-like"
def format_text(elem):
if elem.tag in ["E", "xsec"]:
raw = (elem.text or "").strip()
val = float(raw)
#width = 9 if elem.tag == "E" else 22
#elem.text = f" {raw:<{width}} "
if elem.tag == "E":

elem.text = f" {val:10.5f} "
elif elem.tag == "xsec":
s = f"{val:.10e}"
m, e = s.split("e")
ei = int(e)
s = f"{m}e{ei:+03d}"
elem.text = f" {s:<16} "
for child in elem:
format_text(child)

def genie_indent(elem, level=0):
indent = " "
i = "\n" + level * indent
child = "\n" + (level+1) * indent
children = list(elem)
if not children:
return

if elem.tag in ["knot"]:
elem.text = " "
for kid in children:
genie_indent(kid, level+1)
kid.tail = " "
else:
elem.text = child
for kid in children:
genie_indent(kid, level+1)
for kid in children[:-1]:
kid.tail = child
children[-1].tail = i

def main(args):

df = pd.read_parquet(Path(args.input).resolve())
idx = df.index.to_numpy()

mean, std, err = np.zeros_like( idx ), np.zeros_like( idx ), np.zeros_like( idx )
for i, pt in enumerate(idx):
mean[i] = np.mean(df.loc[pt].to_numpy())
std [i] = np.std (df.loc[pt].to_numpy())
err = std / np.sqrt( float(df.iloc[0].to_numpy().shape[0]) )

zerodims = mean[mean == 0.0].shape[0]
idx = idx [zerodims:]
mean = mean[zerodims:]
std = std [zerodims:]
err = err [zerodims:]

# The points gmkspl gives you are equidistant in log space, we'll take advantage of that
# Support is only in idx[:]
lidx = np.log(idx)
ckernel = ConstantKernel(1.0) * RBF(length_scale = 0.5)
cgp = GaussianProcessRegressor(kernel=ckernel, alpha=err**2, normalize_y=True)
cgp.fit(lidx.reshape(-1, 1), mean)

resp, cov = cgp.predict(lidx.reshape(-1, 1), return_cov=True)
eresp = np.sqrt(np.diag(cov))

out_mean, out_err, out_resp, out_eresp = (np.zeros_like(df.index.to_numpy()) for _ in range(4))
out_mean [zerodims:] = np.clip(mean, 0.0, None)
out_err [zerodims:] = np.clip(err, 0.0, None)
out_resp [zerodims:] = np.clip(resp, 0.0, None)
out_eresp[zerodims:] = np.clip(eresp, 0.0, None)

# Write the output to a parquet and an xml
df_out = pd.DataFrame(index = df.index)
df_out["Smoothed xsec [1e-38 cm2/A]"] = out_resp
df_out["Error on smoothed xsec"] = out_eresp
df_out[f"Mean xsec from {len(df.columns)} nucleons [1e-38 cm2/A]"] = out_mean
df_out["Error on mean xsec"] = out_err
df_out.to_parquet(Path(f"{args.output}.parquet").resolve())

# careful which tune was given...
model_choice = None
if "G18_01" in args.tune or "G18_02" in args.tune or \
("G18_10" in args.tune and args.tune[6] in ['a', 'b', 'c', 'd']):
model_choice = models["G18_01a_00_000"]
elif "G18_10" in args.tune or "G24_20" in args.tune or args.tune == "AR23_20i_00_000":
model_choice = models["AR23_20i_00_000"]
elif "G21_11" in args.tune:
model_choice = models["G21_11a_00_000"]
else:
model_choice = models[args.tune]

with open(Path(f"{args.output}.xml").resolve(), 'wb') as fxout:
root = ET.Element("genie_xsec_spline_list")
tree = ET.ElementTree(root)
root.set("version", "3.00")
root.set("uselog", "1")
tune = ET.SubElement(root, "genie_tune")
tune.set("name", f"{args.tune}")
spl = ET.SubElement(tune, "spline")
nucpdg = 2112 if int(args.nupdg) > 0 else 2212
spl.set("name", f"{model_choice}/nu:{args.nupdg};tgt:{args.tgtpdg};N:{nucpdg};proc:Weak[CC],QES;")
lknots = len(df.index)
spl.set("nknots", str(lknots))
for Enu, xs in zip(df.index, out_resp):
knot = ET.SubElement(spl, "knot")
Eknot = ET.SubElement(knot, "E")
Eknot.text = f"{Enu}"
Xknot = ET.SubElement(knot, "xsec")
Xknot.text = f"{xs / CONV_CONST}"
#ET.indent(tree)
format_text(root)
genie_indent(root)
tree.write(fxout, encoding="utf-8", xml_declaration=True)

print(f"Saved outputs: {args.output}.parquet, {args.output}.xml")

if __name__ == "__main__":
parser = argparse.ArgumentParser(
description = "From a single parquet file produced from the concatenation of many, many nucleon throws for CCQE splines, I will construct a single representative for that parquet and save it as a new parquet. I will also save a single XML file with that spline. Pass the input parquet and the tune used to run it, as well as the neutrino and target used."
)
parser.add_argument('-i', "--input", type=str, required=True, help="Input parquet.")
parser.add_argument('-o', "--output", type=str, required=True, help="Output basename (no extension).")
parser.add_argument('-t', "--tune", type=str, required=True, help="GENIE tune used.")
parser.add_argument('-p', "--nupdg", type=int, required=True, help="Neutrino PDG code.")
parser.add_argument('-n', "--tgtpdg", type=int, required=True, help="Target PDG code.")
args = parser.parse_args()

main(args)
1 change: 1 addition & 0 deletions src/contrib/ccqe_spline_smoothing/example.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./run_splines_parquet_FNAL-SL7.sh -T /pnfs/sbnd/scratch/users/kplows/gen_genie_splines_v3/pull_request/LARGE-SCALE-TEST-04-AR25Nature -v v3_06_02_sbn3 -q e26:prof --tune AR25_20i_01_001 --genlist CCQE --config pdgs_for_splines.cfg -n 250 -e 100 -r 1000 --setup sbnd --rewrite
81 changes: 81 additions & 0 deletions src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#! /usr/bin/gawk -f
# Reduce the combinations of neutrino flavors and target isotopes in a
# GENIE gxspl XML spline file.
# gawk -f reduce_gxspl_for_sbn.awk gxspl-big.xml > gxspl-small.xml
#
# This only limits what is allowed; if a combination isn't in the
# input file it won't magically appear in the output
#
#
BEGIN {
doout=1; keep=1;
nlines=0; maxlines=0; # set non-zero maxlines only for testing puroses
# whether to keep each species of neutrino
nukeep[12] = 1; #
nukeep[-12] = 1; #
nukeep[14] = 1; #
nukeep[-14] = 1; #
nukeep[16] = 0; #
nukeep[-16] = 0; #
# whether to keep particular isotopes
tgtkeep[1000180400] = 1; # Ar40
tgtkeep[1000080160] = 1; # O16
tgtkeep[1000060120] = 1; # C12
tgtkeep[1000260560] = 1; # Fe56
# whether to keep particular processes
prockeep["CC"] = 1;
prockeep["QES"] = 1;
}
# decide whether to keep or reject a sub-process x-section based on the
# name string. Note picking out "nu:XY" and "tgt:XYZ" is dependent on
# the exact naming formulation ... hopefully this won't change.
# example string:
# <spline name="genie::AhrensNCELPXSec/Default/nu:-14;tgt:1000020040;N:2212;proc:Weak[NC],QES;" nknots="500">
#
/<spline/ {
keep=0; doout=0;
# check if we want this set, if yes set both keep & doout = 1
split($0,array,";");
split(array[2],tgtarray,":");
tgtval=tgtarray[2];
split(array[1],nuarray,"/");
nuvaltmp=nuarray[3];
split(nuvaltmp,nuarray,":");
nuval=nuarray[2];
split($0,parray,"proc:");
split(parray[2],procarraytmp,",");
split(procarraytmp[2],procarray,";");
procval=procarray[1];
split(parray[2],pvtmp,"[");
split(pvtmp[2],ppp,"]")
proccurr=ppp[1];

if ( tgtval in tgtkeep && 0 != tgtkeep[tgtval] ) {
if ( nuval in nukeep && 0 != nukeep[nuval] ) {
if( procval in prockeep && 0 != prockeep[procval] && 0 != prockeep[proccurr] && procarray[2] !~ /charm/ && procarray[2] !~ /strange/ ) {
#print "KEEP this proc ", procval;
keep=1; doout=1;
} else {
#print "reject this proc ", procval;
keep=0; doout=0;
}
} else {
keep=0; doout=0;
}
} else {
keep=0; doout=0;
}
}
# close out a particular spline
/<\/spline/ {
if ( doout == 1 ) { keep = 1 } else { keep = 0 }
doout=1;
}
# regular lines depend on the current state
// {
if ( keep == 1 ) print $0;
if ( doout == 1 ) keep = 1;
nlines++;
if ( maxlines > 0 && nlines > maxlines ) exit
}
# end-of-script reduce_gxspl.awk
100 changes: 100 additions & 0 deletions src/contrib/ccqe_spline_smoothing/replace_splines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import xml.etree.ElementTree as ET
from pathlib import Path
import argparse

parser = argparse.ArgumentParser(description="Replaces keys in one GENIE spline file with those from another GENIE spline file. Only checks spline names.")
parser.add_argument('-b', '--base', type=str, required=True, help="Base file to make changes to.")
parser.add_argument('-r', '--revision', type=str, required=True, help="Revision file to take changes from.")
parser.add_argument('-o', '--outpath', type=str, default="./output.xml", help="Path to output file.")
args = parser.parse_args()

base_tree = ET.parse(Path(args.base))
rev_tree = ET.parse(Path(args.revision))

root_base = base_tree.getroot()
root_rev = rev_tree.getroot()

# assert base and revision have same tune
base_tune = root_base.findall("./genie_tune")[0].get('name')
rev_tune = root_rev.findall("./genie_tune")[0].get('name')

assert( base_tune == rev_tune )

splines_base = {}
splines_rev = {}

# A dictionary of models for CCQE.
# Perhaps there is a simpler way of extracting these from GENIE...
models = {
"AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes
"AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva",
"AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature",
"AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd",
"AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA",
"AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva",
"AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature",
"AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd",
"G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes
"G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes
}

'''
# pick out the appropriate configuration
desired_conf = None
if "G18_01" in base_tune or "G18_02" in base_tune or \
("G18_10" in base_tune and base_tune[6] in ['a', 'b', 'c', 'd']):
desired_conf = models["G18_01a_00_000"]
elif "G18_10" in base_tune or "G24_20" in base_tune or base_tune == "AR23_20i_00_000":
desired_conf = models["AR23_20i_00_000"]
elif "G21_11" in base_tune:
desired_conf = models["G21_11a_00_000"]
else:
desired_conf = models[base_tune]
'''

# Base splines
for spline in root_base.findall(".//spline"):
name = spline.get('name')
'''
if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name):
# Split configuration out
namebits = name.split('/')
name = f"{namebits[0]}/{desired_conf}"
for nb in namebits[2:]:
name = f"{name}/{nb}"
if(desired_conf == "CRPA-QEL"):
name = "genie::HybridXSecAlgorithm/CRPA-QEL"
for nb in namebits[2:]:
name = f"{name}/{nb}"
'''
splines_base[name] = spline

# Revision splines
for spline in root_rev.findall(".//spline"):
name = spline.get('name')
'''
if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name):
name = desired_conf
'''
splines_rev[name] = spline

print("Found", len(splines_base), "splines in", args.base)
print("Found", len(splines_rev), "splines in", args.revision)

for name, rsp in splines_rev.items():
try:
bsp = splines_base[name]
# also replace the spline name
old_name = bsp.get("name")
bsp.attrib["name"] = name
old_knots = list(bsp.findall("knot"))
for knot in old_knots:
bsp.remove(knot)
# Copy knots
for knot in rsp:
bsp.append(knot)
except:
continue

base_tree.write(Path(args.outpath), encoding="ISO-8859-1", xml_declaration=True)
print("Saved output XML to", args.outpath)
26 changes: 26 additions & 0 deletions src/contrib/ccqe_spline_smoothing/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
awkward==2.9.0
awkward_cpp==52
colorama==0.4.6
contourpy==1.3.3
cramjam==2.11.0
cycler==0.12.1
fonttools==4.62.1
fsspec==2026.3.0
h5py==3.16.0
joblib==1.5.3
kiwisolver==1.5.0
matplotlib==3.10.9
numpy==2.4.4
packaging==26.2
pandas==3.0.2
pillow==12.2.0
pyarrow==24.0.0
pyparsing==3.3.2
python-dateutil==2.9.0.post0
scikit-learn==1.8.0
scipy==1.17.1
six==1.17.0
threadpoolctl==3.6.0
tqdm==4.67.3
uproot==5.7.4
xxhash==3.7.0
Loading