@@ -4046,6 +4046,8 @@ def _precompute_agg_sufficient_stats(
40464046):
40474047 """Compute per-bin sufficient statistics for aggregation.
40484048
4049+ Uses np.bincount for O(N) vectorized accumulation (C-implemented).
4050+
40494051 Returns
40504052 -------
40514053 sum_x : ndarray[n_bins, n_cols] — sum of values (or weighted sum)
@@ -4060,26 +4062,34 @@ def _precompute_agg_sufficient_stats(
40604062 sum_x2 = np .zeros ((n_bins , n_cols ), dtype = np .float64 )
40614063 counts = np .zeros ((n_bins , n_cols ), dtype = np .float64 )
40624064
4065+ # Common mask: valid bin assignment
4066+ valid_bin = bin_ids >= 0
4067+
4068+ # Weight validity mask (if weights provided)
4069+ if weight_array is not None :
4070+ w_valid = np .isfinite (weight_array ) & (weight_array > 0 )
4071+ else :
4072+ w_valid = None
4073+
40634074 for ci , col in enumerate (col_names ):
40644075 vals = agg_arrays [col ]
4065- for i in range (len (vals )):
4066- bi = bin_ids [i ]
4067- if bi < 0 :
4068- continue
4069- v = vals [i ]
4070- if not np .isfinite (v ):
4071- continue
4072- if weight_array is not None :
4073- w = weight_array [i ]
4074- if not np .isfinite (w ) or w <= 0 :
4075- continue
4076- sum_x [bi , ci ] += w * v
4077- sum_x2 [bi , ci ] += w * v * v
4078- counts [bi , ci ] += w
4079- else :
4080- sum_x [bi , ci ] += v
4081- sum_x2 [bi , ci ] += v * v
4082- counts [bi , ci ] += 1.0
4076+ finite = np .isfinite (vals )
4077+
4078+ if weight_array is not None :
4079+ mask = valid_bin & finite & w_valid
4080+ ids = bin_ids [mask ]
4081+ v = vals [mask ]
4082+ w = weight_array [mask ]
4083+ sum_x [:, ci ] = np .bincount (ids , weights = w * v , minlength = n_bins )[:n_bins ]
4084+ sum_x2 [:, ci ] = np .bincount (ids , weights = w * v * v , minlength = n_bins )[:n_bins ]
4085+ counts [:, ci ] = np .bincount (ids , weights = w , minlength = n_bins )[:n_bins ]
4086+ else :
4087+ mask = valid_bin & finite
4088+ ids = bin_ids [mask ]
4089+ v = vals [mask ]
4090+ sum_x [:, ci ] = np .bincount (ids , weights = v , minlength = n_bins )[:n_bins ]
4091+ sum_x2 [:, ci ] = np .bincount (ids , weights = v * v , minlength = n_bins )[:n_bins ]
4092+ counts [:, ci ] = np .bincount (ids , minlength = n_bins )[:n_bins ].astype (np .float64 )
40834093
40844094 return sum_x , sum_x2 , counts , col_names
40854095
0 commit comments