11import functools
2- from typing import Any , Callable , Optional , Tuple , TypeVar
2+ from typing import Any , Callable , Optional , Tuple , TypeVar , Union
33
44import numpy as np
55import pandas as pd
@@ -26,6 +26,41 @@ def inner(
2626 return inner
2727
2828
29+ def safe_histogram (
30+ values : np .ndarray ,
31+ bins : Union [int , str , np .ndarray ] = "auto" ,
32+ weights : Optional [np .ndarray ] = None ,
33+ density : bool = False ,
34+ ) -> Tuple [np .ndarray , np .ndarray ]:
35+ """
36+ Wrapper to avoid
37+ ValueError: Too many bins for data range. Cannot create N finite-sized bins.
38+ """
39+ try :
40+ return np .histogram (values , bins = bins , weights = weights , density = density )
41+ except ValueError as exc :
42+ if "Too many bins for data range" in str (exc ):
43+ try :
44+ return np .histogram (
45+ values , bins = "auto" , weights = weights , density = density
46+ )
47+ except ValueError :
48+ finite = values [np .isfinite (values )]
49+ if finite .size == 0 :
50+ return np .array ([]), np .array ([])
51+ vmin = float (np .min (finite ))
52+ vmax = float (np .max (finite ))
53+ if vmin == vmax :
54+ eps = 0.5 if vmin == 0 else abs (vmin ) * 0.5
55+ bin_edges = np .array ([vmin - eps , vmin + eps ])
56+ else :
57+ bin_edges = np .array ([vmin , vmax ])
58+ return np .histogram (
59+ values , bins = bin_edges , weights = weights , density = density
60+ )
61+ raise
62+
63+
2964def histogram_compute (
3065 config : Settings ,
3166 finite_values : np .ndarray ,
@@ -36,27 +71,75 @@ def histogram_compute(
3671 stats = {}
3772 if len (finite_values ) == 0 :
3873 return {name : []}
74+
3975 hist_config = config .plot .histogram
40- bins_arg = "auto" if hist_config .bins == 0 else min (hist_config .bins , n_unique )
41- bins = np .histogram_bin_edges (finite_values , bins = bins_arg )
42- if len (bins ) > hist_config .max_bins :
43- bins = np .histogram_bin_edges (finite_values , bins = hist_config .max_bins )
44- weights = weights if weights and len (weights ) == hist_config .max_bins else None
45-
46- stats [name ] = np .histogram (
47- finite_values , bins = bins , weights = weights , density = config .plot .histogram .density
76+
77+ # Compute data range
78+ finite = finite_values [np .isfinite (finite_values )]
79+ vmin = float (np .min (finite ))
80+ vmax = float (np .max (finite ))
81+ data_range = vmax - vmin
82+
83+ # Choose of Bins based on observed data values
84+ if data_range == 0 :
85+ eps = 0.5 if vmin == 0 else abs (vmin ) * 0.1
86+ bins = np .array ([vmin - eps , vmin + eps ])
87+ else :
88+ requested_bins = hist_config .bins if hist_config .bins > 0 else "auto"
89+
90+ if isinstance (requested_bins , int ):
91+ safe_bins = min (requested_bins , n_unique , hist_config .max_bins )
92+
93+ safe_bins = max (1 , safe_bins )
94+
95+ bins = np .linspace (vmin , vmax , safe_bins + 1 )
96+ else :
97+ bins = np .histogram_bin_edges (finite_values , bins = "auto" )
98+ if len (bins ) - 1 > hist_config .max_bins :
99+ bins = np .linspace (vmin , vmax , hist_config .max_bins + 1 )
100+
101+ hist = np .histogram (
102+ finite_values ,
103+ bins = bins ,
104+ weights = weights ,
105+ density = hist_config .density ,
48106 )
107+
108+ stats [name ] = hist
49109 return stats
50110
51111
52112def chi_square (
53- values : Optional [np .ndarray ] = None , histogram : Optional [np .ndarray ] = None
113+ values : Optional [np .ndarray ] = None ,
114+ histogram : Optional [np .ndarray ] = None ,
54115) -> dict :
116+ # Case 1: histogram not passed → we compute it
55117 if histogram is None :
56- bins = np .histogram_bin_edges (values , bins = "auto" )
118+ if values is None :
119+ return {"statistic" : 0 , "pvalue" : 0 }
120+
121+ # Try NumPy "auto" binning (may fail under NumPy 2)
122+ try :
123+ bins = np .histogram_bin_edges (values , bins = "auto" )
124+ except ValueError :
125+ # Fallback: basic 1-bin histogram covering the min→max range
126+ finite = values [np .isfinite (values )]
127+ if finite .size == 0 :
128+ return {"statistic" : 0 , "pvalue" : 0 }
129+
130+ vmin = float (finite .min ())
131+ vmax = float (finite .max ())
132+ if vmin == vmax :
133+ bins = np .array ([vmin - 0.5 , vmin + 0.5 ])
134+ else :
135+ bins = np .array ([vmin , vmax ])
136+
57137 histogram , _ = np .histogram (values , bins = bins )
58- if len (histogram ) == 0 or np .sum (histogram ) == 0 :
138+
139+ # Case 2: histogram exists but is empty
140+ if histogram .size == 0 or histogram .sum () == 0 :
59141 return {"statistic" : 0 , "pvalue" : 0 }
142+
60143 return dict (chisquare (histogram )._asdict ())
61144
62145
0 commit comments