diff --git a/src/contrib/ccqe_spline_smoothing/construct_representative_spline.py b/src/contrib/ccqe_spline_smoothing/construct_representative_spline.py new file mode 100644 index 000000000..7a59be114 --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/construct_representative_spline.py @@ -0,0 +1,163 @@ +import argparse +import numpy as np +import pandas as pd +import xml.etree.ElementTree as ET + +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel + +from pathlib import Path + +# For conversion from number --> 1e-38 cm2 +# * CONV_CONST: number --> 1e-38cm2, / CONV_CONST: 1e-38cm2 --> number +hbarc = 1.973269804e-16 +meter = 1.0 / hbarc +m2 = meter*meter +cm2 = 1.0e-4 * m2 +CONV_CONST = 1.0e+38 / cm2 + +# A dictionary of models for CCQE. +# Perhaps there is a simpler way of extracting these from GENIE... +models = { + "AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes + "AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva", + "AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature", + "AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd", + "AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA", + "AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva", + "AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature", + "AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd", + "G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes + "G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes +} + +# Custom formatter for XML tags to be "genie-like" +def format_text(elem): + if elem.tag in ["E", "xsec"]: + raw = (elem.text or "").strip() + val = float(raw) + #width = 9 if elem.tag == "E" else 22 + #elem.text = f" {raw:<{width}} " + if elem.tag == "E": + + elem.text = f" {val:10.5f} " + elif elem.tag == "xsec": + s = f"{val:.10e}" + m, e = s.split("e") + ei = int(e) + s = f"{m}e{ei:+03d}" + elem.text = f" {s:<16} " + for child in elem: + format_text(child) + +def genie_indent(elem, level=0): + indent = " " + i = "\n" + level * indent + child = "\n" + (level+1) * indent + children = list(elem) + if not children: + return + + if elem.tag in ["knot"]: + elem.text = " " + for kid in children: + genie_indent(kid, level+1) + kid.tail = " " + else: + elem.text = child + for kid in children: + genie_indent(kid, level+1) + for kid in children[:-1]: + kid.tail = child + children[-1].tail = i + +def main(args): + + df = pd.read_parquet(Path(args.input).resolve()) + idx = df.index.to_numpy() + + mean, std, err = np.zeros_like( idx ), np.zeros_like( idx ), np.zeros_like( idx ) + for i, pt in enumerate(idx): + mean[i] = np.mean(df.loc[pt].to_numpy()) + std [i] = np.std (df.loc[pt].to_numpy()) + err = std / np.sqrt( float(df.iloc[0].to_numpy().shape[0]) ) + + zerodims = mean[mean == 0.0].shape[0] + idx = idx [zerodims:] + mean = mean[zerodims:] + std = std [zerodims:] + err = err [zerodims:] + + # The points gmkspl gives you are equidistant in log space, we'll take advantage of that + # Support is only in idx[:] + lidx = np.log(idx) + ckernel = ConstantKernel(1.0) * RBF(length_scale = 0.5) + cgp = GaussianProcessRegressor(kernel=ckernel, alpha=err**2, normalize_y=True) + cgp.fit(lidx.reshape(-1, 1), mean) + + resp, cov = cgp.predict(lidx.reshape(-1, 1), return_cov=True) + eresp = np.sqrt(np.diag(cov)) + + out_mean, out_err, out_resp, out_eresp = (np.zeros_like(df.index.to_numpy()) for _ in range(4)) + out_mean [zerodims:] = np.clip(mean, 0.0, None) + out_err [zerodims:] = np.clip(err, 0.0, None) + out_resp [zerodims:] = np.clip(resp, 0.0, None) + out_eresp[zerodims:] = np.clip(eresp, 0.0, None) + + # Write the output to a parquet and an xml + df_out = pd.DataFrame(index = df.index) + df_out["Smoothed xsec [1e-38 cm2/A]"] = out_resp + df_out["Error on smoothed xsec"] = out_eresp + df_out[f"Mean xsec from {len(df.columns)} nucleons [1e-38 cm2/A]"] = out_mean + df_out["Error on mean xsec"] = out_err + df_out.to_parquet(Path(f"{args.output}.parquet").resolve()) + + # careful which tune was given... + model_choice = None + if "G18_01" in args.tune or "G18_02" in args.tune or \ + ("G18_10" in args.tune and args.tune[6] in ['a', 'b', 'c', 'd']): + model_choice = models["G18_01a_00_000"] + elif "G18_10" in args.tune or "G24_20" in args.tune or args.tune == "AR23_20i_00_000": + model_choice = models["AR23_20i_00_000"] + elif "G21_11" in args.tune: + model_choice = models["G21_11a_00_000"] + else: + model_choice = models[args.tune] + + with open(Path(f"{args.output}.xml").resolve(), 'wb') as fxout: + root = ET.Element("genie_xsec_spline_list") + tree = ET.ElementTree(root) + root.set("version", "3.00") + root.set("uselog", "1") + tune = ET.SubElement(root, "genie_tune") + tune.set("name", f"{args.tune}") + spl = ET.SubElement(tune, "spline") + nucpdg = 2112 if int(args.nupdg) > 0 else 2212 + spl.set("name", f"{model_choice}/nu:{args.nupdg};tgt:{args.tgtpdg};N:{nucpdg};proc:Weak[CC],QES;") + lknots = len(df.index) + spl.set("nknots", str(lknots)) + for Enu, xs in zip(df.index, out_resp): + knot = ET.SubElement(spl, "knot") + Eknot = ET.SubElement(knot, "E") + Eknot.text = f"{Enu}" + Xknot = ET.SubElement(knot, "xsec") + Xknot.text = f"{xs / CONV_CONST}" + #ET.indent(tree) + format_text(root) + genie_indent(root) + tree.write(fxout, encoding="utf-8", xml_declaration=True) + + print(f"Saved outputs: {args.output}.parquet, {args.output}.xml") + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description = "From a single parquet file produced from the concatenation of many, many nucleon throws for CCQE splines, I will construct a single representative for that parquet and save it as a new parquet. I will also save a single XML file with that spline. Pass the input parquet and the tune used to run it, as well as the neutrino and target used." + ) + parser.add_argument('-i', "--input", type=str, required=True, help="Input parquet.") + parser.add_argument('-o', "--output", type=str, required=True, help="Output basename (no extension).") + parser.add_argument('-t', "--tune", type=str, required=True, help="GENIE tune used.") + parser.add_argument('-p', "--nupdg", type=int, required=True, help="Neutrino PDG code.") + parser.add_argument('-n', "--tgtpdg", type=int, required=True, help="Target PDG code.") + args = parser.parse_args() + + main(args) diff --git a/src/contrib/ccqe_spline_smoothing/example.cmd b/src/contrib/ccqe_spline_smoothing/example.cmd new file mode 100644 index 000000000..0955af3e2 --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/example.cmd @@ -0,0 +1 @@ +./run_splines_parquet_FNAL-SL7.sh -T /pnfs/sbnd/scratch/users/kplows/gen_genie_splines_v3/pull_request/LARGE-SCALE-TEST-04-AR25Nature -v v3_06_02_sbn3 -q e26:prof --tune AR25_20i_01_001 --genlist CCQE --config pdgs_for_splines.cfg -n 250 -e 100 -r 1000 --setup sbnd --rewrite diff --git a/src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk b/src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk new file mode 100644 index 000000000..afcc9feef --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/reduce_gxspl_CCQES_for_sbn.awk @@ -0,0 +1,81 @@ +#! /usr/bin/gawk -f +# Reduce the combinations of neutrino flavors and target isotopes in a +# GENIE gxspl XML spline file. +# gawk -f reduce_gxspl_for_sbn.awk gxspl-big.xml > gxspl-small.xml +# +# This only limits what is allowed; if a combination isn't in the +# input file it won't magically appear in the output +# +# +BEGIN { + doout=1; keep=1; + nlines=0; maxlines=0; # set non-zero maxlines only for testing puroses + # whether to keep each species of neutrino + nukeep[12] = 1; # + nukeep[-12] = 1; # + nukeep[14] = 1; # + nukeep[-14] = 1; # + nukeep[16] = 0; # + nukeep[-16] = 0; # + # whether to keep particular isotopes + tgtkeep[1000180400] = 1; # Ar40 + tgtkeep[1000080160] = 1; # O16 + tgtkeep[1000060120] = 1; # C12 + tgtkeep[1000260560] = 1; # Fe56 + # whether to keep particular processes + prockeep["CC"] = 1; + prockeep["QES"] = 1; +} +# decide whether to keep or reject a sub-process x-section based on the +# name string. Note picking out "nu:XY" and "tgt:XYZ" is dependent on +# the exact naming formulation ... hopefully this won't change. +# example string: +# +# +/ 0 && nlines > maxlines ) exit +} +# end-of-script reduce_gxspl.awk diff --git a/src/contrib/ccqe_spline_smoothing/replace_splines.py b/src/contrib/ccqe_spline_smoothing/replace_splines.py new file mode 100644 index 000000000..f274068c8 --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/replace_splines.py @@ -0,0 +1,100 @@ +import xml.etree.ElementTree as ET +from pathlib import Path +import argparse + +parser = argparse.ArgumentParser(description="Replaces keys in one GENIE spline file with those from another GENIE spline file. Only checks spline names.") +parser.add_argument('-b', '--base', type=str, required=True, help="Base file to make changes to.") +parser.add_argument('-r', '--revision', type=str, required=True, help="Revision file to take changes from.") +parser.add_argument('-o', '--outpath', type=str, default="./output.xml", help="Path to output file.") +args = parser.parse_args() + +base_tree = ET.parse(Path(args.base)) +rev_tree = ET.parse(Path(args.revision)) + +root_base = base_tree.getroot() +root_rev = rev_tree.getroot() + +# assert base and revision have same tune +base_tune = root_base.findall("./genie_tune")[0].get('name') +rev_tune = root_rev.findall("./genie_tune")[0].get('name') + +assert( base_tune == rev_tune ) + +splines_base = {} +splines_rev = {} + +# A dictionary of models for CCQE. +# Perhaps there is a simpler way of extracting these from GENIE... +models = { + "AR23_20i_00_000": "genie::NievesQELCCPXSec/ZExp", # same for G18_10(i, j) and G24_20* tunes + "AR23_20i_01_000": "genie::NievesQELCCPXSec/ZExp_minerva", + "AR23_20i_01_001": "genie::NievesQELCCPXSec/ZExp_minervaNature", + "AR23_20i_02_000": "genie::NievesQELCCPXSec/ZExp_lqcd", + "AR25_20i_00_000": "genie::NievesQELCCPXSec/ZExpNoRPA", + "AR25_20i_01_000": "genie::NievesQELCCPXSec/ZExpNoRPA_minerva", + "AR25_20i_01_001": "genie::NievesQELCCPXSec/ZExpNoRPA_minervaNature", + "AR25_20i_02_000": "genie::NievesQELCCPXSec/ZExpNoRPA_lqcd", + "G18_01a_00_000" : "genie::LwlynSmithQELCCPXSec/Dipole", # same for all G18_(01, 02)* and G18_10(a, b, c, d) tunes + "G21_11a_00_000" : "genie::HybridXSecAlgorithm/SuSAv2-QEL" # same for all G21_11* tunes +} + +''' +# pick out the appropriate configuration +desired_conf = None +if "G18_01" in base_tune or "G18_02" in base_tune or \ + ("G18_10" in base_tune and base_tune[6] in ['a', 'b', 'c', 'd']): + desired_conf = models["G18_01a_00_000"] +elif "G18_10" in base_tune or "G24_20" in base_tune or base_tune == "AR23_20i_00_000": + desired_conf = models["AR23_20i_00_000"] +elif "G21_11" in base_tune: + desired_conf = models["G21_11a_00_000"] +else: + desired_conf = models[base_tune] +''' + +# Base splines +for spline in root_base.findall(".//spline"): + name = spline.get('name') + ''' + if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name): + # Split configuration out + namebits = name.split('/') + name = f"{namebits[0]}/{desired_conf}" + for nb in namebits[2:]: + name = f"{name}/{nb}" + if(desired_conf == "CRPA-QEL"): + name = "genie::HybridXSecAlgorithm/CRPA-QEL" + for nb in namebits[2:]: + name = f"{name}/{nb}" + ''' + splines_base[name] = spline + +# Revision splines +for spline in root_rev.findall(".//spline"): + name = spline.get('name') + ''' + if('Weak[CC],QES' in name and 'charm' not in name and 'strange' not in name): + name = desired_conf + ''' + splines_rev[name] = spline + +print("Found", len(splines_base), "splines in", args.base) +print("Found", len(splines_rev), "splines in", args.revision) + +for name, rsp in splines_rev.items(): + try: + bsp = splines_base[name] + # also replace the spline name + old_name = bsp.get("name") + bsp.attrib["name"] = name + old_knots = list(bsp.findall("knot")) + for knot in old_knots: + bsp.remove(knot) + # Copy knots + for knot in rsp: + bsp.append(knot) + except: + continue + +base_tree.write(Path(args.outpath), encoding="ISO-8859-1", xml_declaration=True) +print("Saved output XML to", args.outpath) diff --git a/src/contrib/ccqe_spline_smoothing/requirements.txt b/src/contrib/ccqe_spline_smoothing/requirements.txt new file mode 100644 index 000000000..1f51ea251 --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/requirements.txt @@ -0,0 +1,26 @@ +awkward==2.9.0 +awkward_cpp==52 +colorama==0.4.6 +contourpy==1.3.3 +cramjam==2.11.0 +cycler==0.12.1 +fonttools==4.62.1 +fsspec==2026.3.0 +h5py==3.16.0 +joblib==1.5.3 +kiwisolver==1.5.0 +matplotlib==3.10.9 +numpy==2.4.4 +packaging==26.2 +pandas==3.0.2 +pillow==12.2.0 +pyarrow==24.0.0 +pyparsing==3.3.2 +python-dateutil==2.9.0.post0 +scikit-learn==1.8.0 +scipy==1.17.1 +six==1.17.0 +threadpoolctl==3.6.0 +tqdm==4.67.3 +uproot==5.7.4 +xxhash==3.7.0 diff --git a/src/contrib/ccqe_spline_smoothing/run_splines_parquet_FNAL-SL7.sh b/src/contrib/ccqe_spline_smoothing/run_splines_parquet_FNAL-SL7.sh new file mode 100755 index 000000000..8d84ad910 --- /dev/null +++ b/src/contrib/ccqe_spline_smoothing/run_splines_parquet_FNAL-SL7.sh @@ -0,0 +1,287 @@ +#!/usr/bin/bash + +# Robert Hatcher's colours +if [ -z "$PS1" ]; then + # if $- contains "i" then interactive session + export ESCCHAR="\x1B" # or \033 # Mac OS X bash doesn't support \e as esc? + export OUTBLACK="${ESCCHAR}[0;30m" + export OUTBLUE="${ESCCHAR}[0;34m" + export OUTGREEN="${ESCCHAR}[0;32m" + export OUTCYAN="${ESCCHAR}[0;36m" + export OUTRED="${ESCCHAR}[0;31m" + export OUTPURPLE="${ESCCHAR}[0;35m" + export OUTORANGE="${ESCCHAR}[0;33m" # orange, more brownish? + export OUTLTGRAY="${ESCCHAR}[0;37m" + export OUTDKGRAY="${ESCCHAR}[1;30m" + # labelled "light but appear in some cases to show as "bold" + export OUTLTBLUE="${ESCCHAR}[1;34m" + export OUTLTGREEN="${ESCCHAR}[1;32m" + export OUTLTCYAN="${ESCCHAR}[1;36m" + export OUTLTRED="${ESCCHAR}[1;31m" + export OUTLTPURPLE="${ESCCHAR}[1;35m" + export OUTYELLOW="${ESCCHAR}[1;33m" + export OUTWHITE="${ESCCHAR}[1;37m" + export OUTNOCOL="${ESCCHAR}[0m" # No Color +fi +# use as: echo -e "${OUTRED} this is red ${OUTNOCOL}" + +# Take in arguments and construct a DAG + +export b0=$(basename $0) +export MAXCALLS=100 +export DOREWRITE=0 + +usage() { + echo -e "${OUTCYAN}" + cat >&2 < ${EXECFILE} < *[0-9]+/, "NumNucleonThrows\"> 1")}1' \${GENIE}/config/NewQELXSec.xml > \${JOBXMLPATH}/NewQELXSec.xml +# (temporarily) copy in average_splines.py from GPVMs... This should live in tagged contrib +ifdh cp -D /exp/sbnd/app/users/kplows/gen_workshop/BuildEventGenerators/production/splines-interpolation/test_configs/average_splines.py \${JOBXMLPATH}/ + +# build the command +GENIECMD="gmkspl" +GENIEARGS=" -p \${JOBNUPDG} -t \${JOBTGTPDG} -e \${JOBEMAX} -n \${JOBNKNOTS} --tune \${JOBGENIETUNE}" +if [[ \${JOBGENLIST} != "Default" ]] && [[ ! -z \${JOBGENLIST} ]] ; then + GENIEARGS=\${GENIEARGS}" --event-generator-list \${JOBGENLIST}" +fi + +mkdir -p \${WORKDIR}/xmls +for i in \$(seq \${FIRSTSEED} \${LASTSEED}) ; do + fullcmd=\${GENIECMD}\${GENIEARGS}" --seed \${i} -o \${WORKDIR}/xmls/seed-\${i}.xml" + echo \$fullcmd + eval \$fullcmd 2>&1 | tee -a run-\${SUBPROCESS}.log +done + +# Set up python and run the average_splines script +setup_python +export OUTNAME=throws_\${JOBPRETTYNU}_\${JOBPRETTYTGT}_idx\${JOBINDEX} +python3 \${JOBXMLPATH}/average_splines.py -b \${WORKDIR}/xmls -o \${WORKDIR}/\${OUTNAME} +#python3 \${GENIE}/src/contrib/splines/average_splines.py -b \${WORKDIR}/xmls -o \${WORKDIR}/\${OUTNAME} + +# ifdh cp the resulting files +ifdh cp -D \$(find \${WORKDIR} -iname "\${OUTNAME}.parquet") ${OUTPUTTOP}/work-products/ +ifdh cp -D \$(find \${WORKDIR} -iname "\${OUTNAME}.xml") ${OUTPUTTOP}/work-products/ +#ifdh cp -D \$(find \${WORKDIR} -iname "run-\${SUBPROCESS}.log") ${OUTPUTTOP}/work-products/ + +# cleanup +cd \${BASEDIR} && rm -rf \${WORKDIR}/ +EOF +chmod u+x ${EXECFILE} + +# Now write a DAG file. +JOBSUBMAIN="jobsub_submit -n -G $(id -ng) --resource-provides=usage_model=DEDICATED,OPPORTUNISTIC,OFFSITE --append_condor_requirements='(TARGET.HAS_CVMFS_sbn_opensciencegrid_org==true)' --singularity-image=/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest" +RESOURCES="--expected-lifetime 8h --disk 8GB --memory 2GB" +JOBSUBFULL=${JOBSUBMAIN}" "${RESOURCES} +echo -e "" > genie-splines.dag +for i in $(seq 0 ${LASTPROCESS}) ; do + echo -e "${JOBSUBFULL} file://${EXECFILE} ${MULTIPLIER} ${i}" >> genie-splines.dag +done +echo -e "" >> genie-splines.dag + +mv genie-splines.dag ${OUTPUTTOP}/cfg/ + +echo -e "${OUTCYAN}Exit status 0${OUTNOCOL}"