Skip to content

Commit 6b6a612

Browse files
committed
added docstrings to functions and moved some functions to helpers
1 parent 16fe05f commit 6b6a612

2 files changed

Lines changed: 79 additions & 68 deletions

File tree

validphys2/src/validphys/closuretest/multiclosure_nsigma.py

Lines changed: 25 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
"""
22
This module contains the functions to compute the consistency / inconsistency sets.
33
4-
Assuming that we have two datasets A and B, and that we are investigating whether A is consistent or not
5-
we can define the following sets:
6-
7-
8-
1⍺ = {i | nσi > Z⍺} ...
9-
4+
TODO
105
"""
116

127
import dataclasses
@@ -83,43 +78,6 @@ def chi2_nsigma_deviation(central_member_chi2: CentralChi2Data) -> float:
8378
fits_data = collect("data", ("fits", "fitinputcontext"))
8479

8580

86-
def is_weighted(fits_data: list) -> bool:
87-
"""
88-
Returns whether the considered multiclosure tests has been weighted or not.
89-
If the weighted datasets are not the same for all fits,
90-
or there is more than one weighted dataset, an error is raised.
91-
92-
Parameters
93-
----------
94-
fits_data: list
95-
List of data for each fit.
96-
97-
Returns
98-
-------
99-
str or None
100-
Name of the weighted dataset.
101-
"""
102-
# Extract the set of unique weighted dataset names from all fits
103-
weighted_ds_sets = [{ds.name for ds in data.datasets if ds.weight != 1} for data in fits_data]
104-
105-
# Ensure all fits have the same set of weighted datasets
106-
if len(set(frozenset(ds_set) for ds_set in weighted_ds_sets)) > 1:
107-
error_msg = "Weighted datasets are not the same for all fits in the same multiclosure test (dataspec)."
108-
log.error(error_msg)
109-
raise ValueError(error_msg)
110-
111-
# Extract the single weighted dataset set (all should be identical)
112-
weighted_ds = next(iter(weighted_ds_sets))
113-
114-
# Ensure there is exactly one weighted dataset
115-
if len(weighted_ds) > 1:
116-
error_msg = "Only one dataset can be weighted in a multiclosure test."
117-
log.error(error_msg)
118-
raise ValueError(error_msg)
119-
120-
return bool(weighted_ds)
121-
122-
12381
@dataclasses.dataclass
12482
class MulticlosureNsigma:
12583
"""
@@ -179,23 +137,6 @@ def multiclosurefits_nsigma(
179137
dataspecs_multiclosurefits_nsigma = collect("multiclosurefits_nsigma", ("dataspecs",))
180138

181139

182-
def n_fits(dataspecs):
183-
"""
184-
Computes the total number of fits in the multiclosure test.
185-
If the number of fits is not the same across dataspecs it raises an error.
186-
"""
187-
n_fits = set()
188-
for dataspec in dataspecs:
189-
n_fits.add(len(dataspec['fits']))
190-
191-
if len(n_fits) > 1:
192-
error_msg = "The number of fits is not the same across dataspecs."
193-
log.error(error_msg)
194-
raise ValueError(error_msg)
195-
196-
return next(iter(n_fits))
197-
198-
199140
@dataclasses.dataclass
200141
class NsigmaAlpha:
201142
"""
@@ -228,7 +169,7 @@ def def_of_nsigma_alpha(
228169
The name of the weighted dataset.
229170
complement: bool, default=False
230171
Whether to compute the complement set 1 alpha values.
231-
172+
232173
Returns
233174
-------
234175
NsigmaAlpha
@@ -275,13 +216,18 @@ def comp_nsigma_alpha(multiclosurefits_nsigma: pd.DataFrame, weighted_dataset: s
275216

276217
def set_1_alpha(dataspecs_nsigma_alpha: list) -> dict:
277218
"""
278-
Returns the set 1 alpha values.
219+
Returns the set 1 alpha values, these are defined as
220+
221+
1_{\alpha} = {i | n_{\sigma}^{i} > Z_{\alpha}}
222+
223+
where i is the index of the fit and n_{\sigma}^{i} is the n-sigma value computed
224+
for fit i.
279225
280226
Parameters
281227
----------
282228
dataspecs_nsigma_alpha: list
283229
List of NsigmaAlpha dataclasses.
284-
230+
285231
Returns
286232
-------
287233
dict
@@ -293,13 +239,18 @@ def set_1_alpha(dataspecs_nsigma_alpha: list) -> dict:
293239

294240
def set_3_alpha(dataspecs_nsigma_alpha: list) -> dict:
295241
"""
296-
Same as the set 1 alpha values, but for the weighted datasets.
242+
Same as the set 1 alpha values, but for the weighted fits.
243+
244+
3_{\alpha} = {i | n_{weighted, \sigma}^{i} > Z_{\alpha}}
245+
246+
where i is the index of the fit and n_{weighted, \sigma}^{i} is the n-sigma value computed
247+
on the weighted dataset for fit i.
297248
298249
Parameters
299250
----------
300251
dataspecs_nsigma_alpha: list
301252
List of NsigmaAlpha dataclasses.
302-
253+
303254
Returns
304255
-------
305256
dict
@@ -320,7 +271,7 @@ def comp_set_1_alpha(dataspecs_comp_nsigma_alpha: list) -> dict:
320271

321272
def comp_set_3_alpha(dataspecs_comp_nsigma_alpha: list) -> dict:
322273
"""
323-
Same as the complement set 1 alpha values, but for the weighted datasets.
274+
Returns the complement set 3 alpha values.
324275
"""
325276
for dataspec_nsigma in dataspecs_comp_nsigma_alpha:
326277
if dataspec_nsigma.is_weighted:
@@ -342,7 +293,7 @@ def def_set_2(
342293
The name of the weighted dataset.
343294
complement: bool, default=False
344295
Whether to compute the complement set 2 alpha values.
345-
296+
346297
Returns
347298
-------
348299
dict
@@ -384,7 +335,13 @@ def def_set_2(
384335

385336
def set_2_alpha(dataspecs_multiclosurefits_nsigma: list, weighted_dataset: str) -> dict:
386337
"""
387-
Computes the set 2 alpha values.
338+
Computes the set 2 alpha values. The set 2 is defined as:
339+
340+
2_{\alpha} = {i | n_{weighted, \sigma}^{i} - n_{ref, \sigma}^{i}> + Z_{\alpha}}
341+
342+
where the n-sigma is computed on all datasets that are not the weighted dataset.
343+
Moreover if for a fit i any dataset has a n-sigma value greater than Z_{\alpha}, then
344+
the fit i is included in the set.
388345
"""
389346
return def_set_2(dataspecs_multiclosurefits_nsigma, weighted_dataset, complement=False)
390347

validphys2/src/validphys/closuretest/multiclosure_nsigma_helpers.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,57 @@ def compute_nsigma_critical_value(
112112

113113
z_alpha = norm.ppf(1 - alpha)
114114
return c, z_alpha
115+
116+
117+
def is_weighted(fits_data: list) -> bool:
118+
"""
119+
Returns whether the considered multiclosure tests has been weighted or not.
120+
If the weighted datasets are not the same for all fits,
121+
or there is more than one weighted dataset, an error is raised.
122+
123+
Parameters
124+
----------
125+
fits_data: list
126+
List of data for each fit.
127+
128+
Returns
129+
-------
130+
str or None
131+
Name of the weighted dataset.
132+
"""
133+
# Extract the set of unique weighted dataset names from all fits
134+
weighted_ds_sets = [{ds.name for ds in data.datasets if ds.weight != 1} for data in fits_data]
135+
136+
# Ensure all fits have the same set of weighted datasets
137+
if len(set(frozenset(ds_set) for ds_set in weighted_ds_sets)) > 1:
138+
error_msg = "Weighted datasets are not the same for all fits in the same multiclosure test (dataspec)."
139+
log.error(error_msg)
140+
raise ValueError(error_msg)
141+
142+
# Extract the single weighted dataset set (all should be identical)
143+
weighted_ds = next(iter(weighted_ds_sets))
144+
145+
# Ensure there is exactly one weighted dataset
146+
if len(weighted_ds) > 1:
147+
error_msg = "Only one dataset can be weighted in a multiclosure test."
148+
log.error(error_msg)
149+
raise ValueError(error_msg)
150+
151+
return bool(weighted_ds)
152+
153+
154+
def n_fits(dataspecs):
155+
"""
156+
Computes the total number of fits in the multiclosure test.
157+
If the number of fits is not the same across dataspecs it raises an error.
158+
"""
159+
n_fits = set()
160+
for dataspec in dataspecs:
161+
n_fits.add(len(dataspec['fits']))
162+
163+
if len(n_fits) > 1:
164+
error_msg = "The number of fits is not the same across dataspecs."
165+
log.error(error_msg)
166+
raise ValueError(error_msg)
167+
168+
return next(iter(n_fits))

0 commit comments

Comments
 (0)