1010
1111from numba import njit
1212from statistics import NormalDist
13+ import alphaquant .diffquant .diffutils as aqdiffutils
1314
14- @njit
15- def _compute_zscore_fast_bg (cumulative , min_fc , total ):
16- """Fast computation of z-scores using Numba JIT compilation for background distributions"""
17- zscores = np .zeros (len (cumulative ))
18- zero_pos = - min_fc
19-
20- # Pre-calculate normalization factors
21- normfact_posvals = 1 / (total - cumulative [zero_pos ]+ 1 )
22- normfact_negvals = 1 / (cumulative [zero_pos - 1 ]+ 1 )
23-
24- # Standard normal inverse CDF approximation (Beasley-Springer-Moro algorithm)
25- # This is much faster than calling NormalDist().inv_cdf()
26- for i in range (len (cumulative )):
27- if i == zero_pos or i == len (cumulative ) - 1 :
28- zscores [i ] = 0.0
29- continue
30-
31- if i < zero_pos :
32- num_more_extreme = cumulative [i ]
33- normfact = normfact_negvals
34- sign = - 1.0
35- else :
36- num_more_extreme = total - cumulative [i + 1 ]
37- normfact = normfact_posvals
38- sign = 1.0
39-
40- p_val = 0.5 * max (1e-9 , (num_more_extreme + 1 ) * normfact )
41-
42- # Fast inverse normal CDF approximation
43- if p_val <= 0.5 :
44- # For p <= 0.5, use symmetry: inv_cdf(p) = -inv_cdf(1-p)
45- t = np .sqrt (- 2.0 * np .log (p_val ))
46- z = - (((2.515517 + 0.802853 * t + 0.010328 * t * t ) /
47- (1.0 + 1.432788 * t + 0.189269 * t * t + 0.001308 * t * t * t )) - t )
48- else :
49- t = np .sqrt (- 2.0 * np .log (1.0 - p_val ))
50- z = (((2.515517 + 0.802853 * t + 0.010328 * t * t ) /
51- (1.0 + 1.432788 * t + 0.189269 * t * t + 0.001308 * t * t * t )) - t )
52-
53- zscores [i ] = sign * abs (z )
54-
55- return zscores
5615
57- @njit
58- def _compute_sd_fast_bg (cumulative , min_fc , mean , fc_conversion_factor ):
59- """Fast computation of standard deviation using Numba JIT compilation for background distributions"""
60- sq_err = 0.0
61- previous = 0
6216
63- for i in range (len (cumulative )):
64- fc = (i + min_fc ) * fc_conversion_factor
65- freq = cumulative [i ] - previous
66- sq_err += freq * (fc - mean ) ** 2
67- previous = cumulative [i ]
6817
69- total = cumulative [- 1 ]
70- var = sq_err / total
71- return math .sqrt (var )
7218
7319class ConditionBackgrounds ():
7420
@@ -284,11 +230,11 @@ def transform_cumulative_into_z_values(self, p2z: dict):
284230 self .max_z = abs (NormalDist ().inv_cdf (max (1e-9 , min_pval )))
285231
286232 # Use the Numba-optimized function for dramatic speedup (100x+ faster)
287- return _compute_zscore_fast_bg (self .cumulative , self .min_fc , total )
233+ return aqdiffutils . zscores_from_cumulative (self .cumulative , self .min_fc , total )
288234
289235
290236 def calc_zscore_from_fc (self , fc ):
291- return _calc_zscore_from_fc (fc , self .fc_conversion_factor , self .fc_resolution_factor , self .min_fc , self .cumulative , self .max_z , self .zscores )
237+ return aqdiffutils . z_from_fc_lookup (fc , self .fc_conversion_factor , self .fc_resolution_factor , self .min_fc , self .cumulative , self .max_z , self .zscores )
292238
293239
294240
@@ -300,7 +246,7 @@ def calc_SD(self, mean:float, cumulative:list):
300246 cumulative (list[int]): cumulative distribution array
301247 """
302248 # Use the Numba-optimized function for dramatic speedup (100x+ faster)
303- self .SD = _compute_sd_fast_bg (np .asarray (cumulative ), self .min_fc , mean , self .fc_conversion_factor )
249+ self .SD = aqdiffutils . sd_from_cumulative (np .asarray (cumulative ), self .min_fc , mean , self .fc_conversion_factor )
304250 self .var = self .SD ** 2
305251
306252 def get_cache_key (self ):
@@ -319,27 +265,7 @@ def get_cache_key(self):
319265 return (self .start_idx , self .end_idx , self .min_fc , self .max_fc ,
320266 len (self .cumulative ), round (self .SD , 6 ))
321267
322- @njit
323- def _calc_zscore_from_fc (fc , fc_conversion_factor , fc_resolution_factor , min_fc , cumulative , max_z , zscores ):
324- """
325- Quick conversion function that looks up the z-value corresponding to an observed new fold change.
326- The fold change is mapped to its fc-bin in the binned fold change distribution and then the z-value of the bin is looked up
327-
328- Args:
329- fc (float): [description]
330268
331- Returns:
332- float: z-value of the observed fold change, based on the background distribution
333- """
334- if abs (fc )< fc_conversion_factor :
335- return 0
336- k = int (fc * fc_resolution_factor )
337- rank = k - min_fc
338- if rank < 0 :
339- return - max_z
340- if rank >= len (cumulative ):
341- return max_z
342- return zscores [rank ]
343269
344270
345271# Cell
0 commit comments