Skip to content

Commit b59080c

Browse files
authored
Merge pull request #2418 from NNPDF/implement_CMS_2JET_8TEV_3D
datasets excluded from NNPDF4.0: CMS_2JET_8TEV_3D
2 parents d2bd97f + c31ed68 commit b59080c

19 files changed

Lines changed: 62162 additions & 29009 deletions

nnpdf_data/nnpdf_data/commondata/CMS_1JET_8TEV/uncertainties.yaml

Lines changed: 28999 additions & 28999 deletions
Large diffs are not rendered by default.
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
data_central:
2+
- 1304.0
3+
- 638.1
4+
- 329.4
5+
- 176.1
6+
- 95.42
7+
- 53.14
8+
- 30.04
9+
- 17.36
10+
- 10.07
11+
- 5.904
12+
- 3.53
13+
- 2.101
14+
- 1.257
15+
- 0.7631
16+
- 0.4598
17+
- 0.2762
18+
- 0.1661
19+
- 0.1004
20+
- 0.06007
21+
- 0.03592
22+
- 0.02109
23+
- 0.01254
24+
- 0.007197
25+
- 0.004058
26+
- 0.002391
27+
- 0.001419
28+
- 0.0007634
29+
- 0.0003934
30+
- 0.0002155
31+
- 0.0001198
32+
- 4.445e-05
33+
- 518.7
34+
- 239.1
35+
- 118.0
36+
- 59.7
37+
- 30.5
38+
- 15.88
39+
- 8.356
40+
- 4.508
41+
- 2.401
42+
- 1.303
43+
- 0.6989
44+
- 0.3753
45+
- 0.2016
46+
- 0.1091
47+
- 0.05782
48+
- 0.03049
49+
- 0.01591
50+
- 0.008247
51+
- 0.004217
52+
- 0.002188
53+
- 0.001044
54+
- 0.0004743
55+
- 0.000229
56+
- 9.465e-05
57+
- 4.074e-05
58+
- 2.321e-05
59+
- 39.11
60+
- 15.49
61+
- 5.749
62+
- 2.689
63+
- 1.138
64+
- 0.4543
65+
- 0.1875
66+
- 0.08166
67+
- 0.03022
68+
- 0.009612
69+
- 0.004567
70+
- 0.00146
71+
- 0.0004647
72+
- 0.0001141
73+
- 790.8
74+
- 376.0
75+
- 176.4
76+
- 90.12
77+
- 46.08
78+
- 23.09
79+
- 11.96
80+
- 6.188
81+
- 3.148
82+
- 1.634
83+
- 0.8323
84+
- 0.4201
85+
- 0.2114
86+
- 0.1044
87+
- 0.0504
88+
- 0.02377
89+
- 0.01105
90+
- 0.005018
91+
- 0.00204
92+
- 0.00086
93+
- 0.0003546
94+
- 0.0001093
95+
- 3.493e-05
96+
- 188.4
97+
- 79.95
98+
- 36.65
99+
- 16.05
100+
- 7.067
101+
- 3.108
102+
- 1.352
103+
- 0.593
104+
- 0.2592
105+
- 0.1068
106+
- 0.04124
107+
- 0.01593
108+
- 0.005859
109+
- 0.002166
110+
- 0.0006671
111+
- 0.0001653
112+
- 4.066e-05
113+
- 126.0
114+
- 47.86
115+
- 18.92
116+
- 7.102
117+
- 2.474
118+
- 0.8486
119+
- 0.2511
120+
- 0.06703
121+
- 0.01889
122+
- 0.002968
123+
- 0.0006449
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
import yaml
2+
import numpy as np
3+
import copy
4+
from nnpdf_data.filter_utils.utils import cormat_to_covmat, covmat_to_artunc
5+
6+
def read_metadata() -> tuple:
7+
'''
8+
takes the important information from the metadata file
9+
'''
10+
with open('metadata.yaml', 'r') as file:
11+
info = yaml.safe_load(file)
12+
obs_info = info['implemented_observables'][0]
13+
return obs_info['tables'], obs_info['ndata']
14+
15+
16+
def read_kinematics_and_centrals(tables: list) -> tuple:
17+
'''
18+
using the table numbers from metadata,
19+
reads the bins and central values from rawdata
20+
return two lists: one for kinematics, one for central values
21+
'''
22+
bins = list()
23+
centrals = list()
24+
for table_num in tables[:6]: # select just the tables with kinematic data
25+
with open(f'rawdata/table_{table_num}.yaml', 'r') as file:
26+
kins = yaml.safe_load(file)
27+
# get ystar, yboost, sqrts bins:
28+
current_yy_bin = dict()
29+
for yy_dict in kins['dependent_variables'][0]['qualifiers']:
30+
for k, v in yy_dict.items():
31+
if k == 'name':
32+
if v != 'SQRT(S)':
33+
bin_name = v.lower().replace('oost', '')
34+
current_yy_bin[bin_name] = dict()
35+
if k == 'value':
36+
if type(v) == str:
37+
lower = float(v[:3])
38+
upper = float(v[-3:])
39+
middle = (lower+upper)/2
40+
current_yy_bin[bin_name] = {'min': lower, 'mid': middle, 'max': upper}
41+
# get the ptavg bins and combine them with yys bins
42+
for ptavg_dict in kins['independent_variables'][0]['values']:
43+
lower = ptavg_dict['low']
44+
upper = ptavg_dict['high']
45+
middle = (lower + upper)/2
46+
current_p_bin = {'pTavg': {'min': lower, 'mid': middle, 'max': upper}}
47+
copy_yy = copy.deepcopy(current_yy_bin)
48+
current_pyys_bin = copy_yy | current_p_bin
49+
bins.append(current_pyys_bin)
50+
# get the central values
51+
for vals in kins['dependent_variables'][0]['values']:
52+
centrals.append(vals['value'])
53+
return bins, centrals
54+
55+
56+
def dump_kinematics_and_centrals(bins: list, centrals: list) -> None:
57+
# dump the dictionaries into files
58+
with open('kinematics.yaml', 'w') as file:
59+
yaml.safe_dump({'bins': bins}, file, sort_keys=False)
60+
with open('data.yaml', 'w') as file:
61+
yaml.safe_dump({'data_central': centrals}, file, sort_keys=False)
62+
63+
64+
def read_kinematics_lengths(tables: list) -> list:
65+
ndata = list()
66+
for table_id in tables:
67+
with open(f'rawdata/table_{table_id}.yaml', 'r') as file:
68+
working_dict = yaml.safe_load(file)
69+
ndata.append(len(working_dict['independent_variables'][0]['values']))
70+
return ndata
71+
72+
73+
def read_correlation_matrix(table_no: int) -> list:
74+
corr_mat = list()
75+
with open(f'rawdata/table_{table_no}.yaml', 'r') as file:
76+
temp_mat = yaml.safe_load(file)['dependent_variables'][0]['values']
77+
corr_mat = [small_dict['value'] for small_dict in temp_mat]
78+
return corr_mat
79+
80+
81+
def read_rel_errors(tables: list) -> list:
82+
errors = list()
83+
for table_num in tables:
84+
temp_list = list()
85+
with open(f'rawdata/table_{table_num}.yaml', 'r') as file:
86+
temp_list = yaml.safe_load(file)['dependent_variables'][0]['values']
87+
errors_in_table = list()
88+
for data_point in temp_list:
89+
errors_in_table.append(data_point['errors'])
90+
errors += errors_in_table
91+
return errors
92+
93+
94+
def make_errors_absolute(errors: list, centrals: list):
95+
if len(errors) != len(centrals):
96+
print('lengths dont match')
97+
else:
98+
abs_errors = list()
99+
for i in range(len(errors)):
100+
c_val = centrals[i]
101+
extracted_errors_at_dp = {small_dict['label']: c_val*float(small_dict['symerror'][:-1])/100 for small_dict in errors[i]}
102+
abs_errors.append(extracted_errors_at_dp)
103+
return abs_errors
104+
105+
106+
def generate_stat_art_unc(abs_errors, kin_lengths):
107+
stat_errors = [item['stat'] for item in abs_errors]
108+
split_indices = [kin_lengths[0]]+[0]*(len(kin_lengths)-1)
109+
for i in range(1, len(kin_lengths)):
110+
split_indices[i] = split_indices[i-1]+kin_lengths[i]
111+
split_indices = [0]+split_indices
112+
art_unc = list()
113+
for i in range(1, len(split_indices)):
114+
current_errors = stat_errors[split_indices[i-1]:split_indices[i]]
115+
current_corr_mat = read_correlation_matrix(i+6)
116+
current_ndata = kin_lengths[i-1]
117+
if not (len(current_errors) == current_ndata and len(current_corr_mat) == current_ndata**2):
118+
print('lengths not matching:')
119+
else:
120+
current_cov_mat = cormat_to_covmat(err_list=current_errors, cormat_list=current_corr_mat)
121+
current_art_unc = covmat_to_artunc(ndata = current_ndata, covmat_list = current_cov_mat)
122+
big_art_unc = []
123+
for small_row in current_art_unc:
124+
big_art_unc.append([0]*split_indices[i-1]+small_row+[0]*(122-split_indices[i]))
125+
art_unc += big_art_unc
126+
return art_unc
127+
128+
129+
def aggregate_uncertainties(abs_errors, art_unc):
130+
all_uncertainties = []
131+
for i in range(len(abs_errors)):
132+
current_dict = abs_errors[i]
133+
current_dict.pop('stat')
134+
art_unc_list = art_unc[i]
135+
art_unc_dict = {f'art_unc_{j+1}': art_unc_list[j] for j in range(len(art_unc_list))}
136+
total_dict = current_dict | art_unc_dict
137+
all_uncertainties.append(total_dict)
138+
return all_uncertainties
139+
140+
141+
def dump_uncertainties(all_unc):
142+
singular_art_unc_desc = {'description': 'artificial uncertainty originating from correlated statistical uncertainties',
143+
'treatment': 'ADD',
144+
'type': 'CORR'}
145+
all_art_unc_desc = {f'art_unc_{i+1}': singular_art_unc_desc.copy() for i in range(122)}
146+
other_unc = {
147+
'uncor': {'description': 'stems from residual effects of small inefficiencies in the jet identification',
148+
'treatment': 'ADD',
149+
'type': 'UNCORR'},
150+
'jererr': {'description': 'jet energy resolution',
151+
'treatment': 'MULT',
152+
'type': 'CORR'},
153+
'lumi': {'description': 'luminosity uncertainty',
154+
'treatment': 'MULT',
155+
'type': 'CMSLUMI19P7'},
156+
'nongaussiantails': {'description': 'non-Gaussian tails in detector response to jets',
157+
'treatment': 'MULT',
158+
'type': 'CORR'},
159+
'AbsoluteScale': {'description': 'absolute jet energy scale calibration',
160+
'treatment': 'MULT',
161+
'type': 'CORR'},
162+
'AbsoluteStat': {'description': 'statistical uncertainty of absolute JES',
163+
'treatment': 'MULT',
164+
'type': 'CORR'},
165+
'AbsoluteMPFBias': {'description': 'bias in MPF response method',
166+
'treatment': 'MULT',
167+
'type': 'CORR'},
168+
'Fragmentation': {'description': 'fragmentation uncertainty',
169+
'treatment': 'MULT',
170+
'type': 'CORR'},
171+
'SinglePionECAL': {'description': 'e-calorimeter response to single pions',
172+
'treatment': 'MULT',
173+
'type': 'CORR'},
174+
'SinglePionHCAL': {'description': 'h-calorimeter response to single pions',
175+
'treatment': 'MULT',
176+
'type': 'CORR'},
177+
'FlavorQCD': {'description': 'jet flavour composition uncertainty',
178+
'treatment': 'MULT',
179+
'type': 'CORR'},
180+
'RelativeJEREC1': {'description': 'JER relative uncertainty',
181+
'treatment': 'MULT',
182+
'type': 'CORR'},
183+
'RelativeJEREC2': {'description': 'JER relative uncertainty',
184+
'treatment': 'MULT',
185+
'type': 'CORR'},
186+
'RelativeJERHF': {'description': 'JER relative uncertainty',
187+
'treatment': 'MULT',
188+
'type': 'CORR'},
189+
'RelativePtBB': {'description': 'Relative JES vs pT',
190+
'treatment': 'MULT',
191+
'type': 'CORR'},
192+
'RelativePtEC1': {'description': 'Relative JES vs pT',
193+
'treatment': 'MULT',
194+
'type': 'CORR'},
195+
'RelativePtEC2': {'description': 'Relative JES vs pT',
196+
'treatment': 'MULT',
197+
'type': 'CORR'},
198+
'RelativePtHF': {'description': 'Relative JES vs pT',
199+
'treatment': 'MULT',
200+
'type': 'CORR'},
201+
'RelativeFSR': {'description': 'Final-state radiation modeling',
202+
'treatment': 'MULT',
203+
'type': 'CORR'},
204+
'RelativeStatEC2': {'description': 'Relative JES statistical uncertainty',
205+
'treatment': 'MULT',
206+
'type': 'CORR'},
207+
'RelativeStatHF': {'description': 'Relative JES statistical uncertainty',
208+
'treatment': 'MULT',
209+
'type': 'CORR'},
210+
'RelativeStatFSR': {'description': 'Relative JES statistical uncertainty',
211+
'treatment': 'MULT',
212+
'type': 'CORR'},
213+
'PileUpDataMC': {'description': 'Data–MC pileup mismatch',
214+
'treatment': 'MULT',
215+
'type': 'CORR'},
216+
'PileUpPtRef': {'description': 'Pileup pT reference uncertainty',
217+
'treatment': 'MULT',
218+
'type': 'CORR'},
219+
'PileUpPtBB': {'description': 'Pileup pT uncertainty',
220+
'treatment': 'MULT',
221+
'type': 'CORR'},
222+
'PileUpPtEC1': {'description': 'Pileup pT uncertainty',
223+
'treatment': 'MULT',
224+
'type': 'CORR'},
225+
'PileUpPtEC2': {'description': 'Pileup pT uncertainty',
226+
'treatment': 'MULT',
227+
'type': 'CORR'},
228+
'PileUpPtHF': {'description': 'Pileup pT uncertainty',
229+
'treatment': 'MULT',
230+
'type': 'CORR'}
231+
}
232+
definitions = {'definitions': other_unc | all_art_unc_desc}
233+
uncertainties_yaml = definitions | {'bins': all_unc}
234+
with open('uncertainties.yaml', 'w') as file:
235+
yaml.safe_dump(uncertainties_yaml, file, sort_keys = False)
236+
237+
def main_filter():
238+
tables = read_metadata()[0][:6]
239+
bins, centrals = read_kinematics_and_centrals(tables)
240+
kin_lengths = read_kinematics_lengths(tables)
241+
errors = read_rel_errors([1,2,3,4,5,6])
242+
abs_errors = make_errors_absolute(errors, centrals)
243+
art_unc = generate_stat_art_unc(abs_errors, kin_lengths)
244+
all_unc = aggregate_uncertainties(abs_errors, art_unc)
245+
246+
dump_kinematics_and_centrals(bins, centrals)
247+
dump_uncertainties(all_unc)
248+
249+
if __name__ == '__main__':
250+
main_filter()

0 commit comments

Comments
 (0)