1414
1515# ---------------------------------------------------------------------------
1616# Fake pandas-like NA / NaT types for testing without importing pandas.
17- # __module__ is set to "pandas" so that _val_to_delete's module check works.
17+ # __module__ mirrors real pandas internals closely enough to exercise fallback
18+ # detection without importing pandas.
1819# ---------------------------------------------------------------------------
1920
21+
2022class NAType :
2123 """Mimics pandas.NA (pandas.core.arrays.masked.NAType)."""
22- __module__ = "pandas"
24+
25+ __module__ = "pandas._libs.missing"
2326
2427 def __bool__ (self ):
2528 raise TypeError ("boolean value of NA is ambiguous" )
@@ -36,7 +39,8 @@ def __repr__(self):
3639
3740class NaTType :
3841 """Mimics pandas.NaT (pandas._libs.tslibs.nattype.NaTType)."""
39- __module__ = "pandas"
42+
43+ __module__ = "pandas._libs.tslibs.nattype"
4044
4145 def __bool__ (self ):
4246 raise TypeError ("boolean value of NaT is ambiguous" )
@@ -58,10 +62,21 @@ def __repr__(self):
5862_VALS_WITH_NAN = _DEFAULT_VALS + [float ("nan" ), _NA , _NaT ]
5963
6064
65+ @pytest .fixture (scope = "module" )
66+ def pandas ():
67+ return pytest .importorskip ("pandas" )
68+
69+
70+ @pytest .fixture (scope = "module" )
71+ def numpy ():
72+ return pytest .importorskip ("numpy" )
73+
74+
6175# ---------------------------------------------------------------------------
6276# _val_to_delete helper tests
6377# ---------------------------------------------------------------------------
6478
79+
6580class TestValToDelete :
6681 # -- default vals (no NaN entries) -----------------------------------
6782
@@ -73,6 +88,9 @@ def test_regular_values_not_deleted(self):
7388 for val in [0 , 1 , - 1 , "hello" , None , [], {}, 0.0 , 1.5 , True , False ]:
7489 assert _val_to_delete (val , _DEFAULT_VALS ) is False , f"should keep { val !r} "
7590
91+ def test_string_vals_matched (self ):
92+ assert _val_to_delete ("NA" , "NA" ) is True
93+
7694 def test_float_nan_not_deleted_by_default (self ):
7795 """float NaN is kept when vals does not contain a NaN float."""
7896 assert _val_to_delete (float ("nan" ), _DEFAULT_VALS ) is False
@@ -96,11 +114,19 @@ def test_na_deleted_when_in_vals(self):
96114 def test_nat_deleted_when_in_vals (self ):
97115 assert _val_to_delete (_NaT , _VALS_WITH_NAN ) is True
98116
117+ def test_na_and_nat_are_not_interchangeable (self ):
118+ assert _val_to_delete (_NA , _DEFAULT_VALS + [_NaT ]) is False
119+ assert _val_to_delete (_NaT , _DEFAULT_VALS + [_NA ]) is False
120+
121+ def test_regular_match_after_na_like_value (self ):
122+ assert _val_to_delete ("" , [_NA , "" ]) is True
123+
99124
100125# ---------------------------------------------------------------------------
101126# dict_sweep — default vals (NaN kept)
102127# ---------------------------------------------------------------------------
103128
129+
104130class TestDictSweepDefaultKeepsNan :
105131 def test_float_nan_kept (self ):
106132 d = {"a" : 1 , "b" : float ("nan" )}
@@ -135,6 +161,7 @@ def test_default_vals_still_removed(self):
135161# dict_sweep — opt-in NaN removal (NaN in vals)
136162# ---------------------------------------------------------------------------
137163
164+
138165class TestDictSweepOptInNanRemoval :
139166 def test_float_nan_removed (self ):
140167 d = {"a" : 1 , "b" : float ("nan" )}
@@ -161,6 +188,7 @@ def test_multiple_nan_types_removed(self):
161188# dict_sweep — NaN inside lists (opt-in)
162189# ---------------------------------------------------------------------------
163190
191+
164192class TestDictSweepNanInList :
165193 def test_nan_removed_from_list (self ):
166194 d = {"a" : [1 , float ("nan" ), 2 ]}
@@ -192,11 +220,22 @@ def test_all_nan_list_removed_with_remove_invalid_list(self):
192220 result = dict_sweep (d , vals = _VALS_WITH_NAN , remove_invalid_list = True )
193221 assert "a" not in result
194222
223+ def test_all_invalid_list_preserves_false_mode_behavior (self ):
224+ d = {"gene" : [None , None ], "site" : ["Intron" , None ], "snp_build" : 136 }
225+ result = dict_sweep (d , vals = [None ], remove_invalid_list = False )
226+ assert result == {"gene" : [None ], "site" : ["Intron" ], "snp_build" : 136 }
227+
228+ def test_all_invalid_list_removed_with_remove_invalid_list (self ):
229+ d = {"gene" : [None , None ], "site" : ["Intron" , None ], "snp_build" : 136 }
230+ result = dict_sweep (d , vals = [None ], remove_invalid_list = True )
231+ assert result == {"site" : ["Intron" ], "snp_build" : 136 }
232+
195233
196234# ---------------------------------------------------------------------------
197235# dict_sweep — NaN in nested dicts (opt-in)
198236# ---------------------------------------------------------------------------
199237
238+
200239class TestDictSweepNanNested :
201240 def test_nan_in_nested_dict (self ):
202241 d = {"a" : {"b" : float ("nan" ), "c" : 1 }}
@@ -223,6 +262,7 @@ def test_nan_in_list_of_dicts(self):
223262# dict_sweep — default vals behaviour unchanged
224263# ---------------------------------------------------------------------------
225264
265+
226266class TestDictSweepDefaultBehavior :
227267 def test_normal_values_kept (self ):
228268 d = {"a" : 1 , "b" : "hello" , "c" : [1 , 2 ], "d" : {"nested" : True }}
@@ -234,38 +274,51 @@ def test_mixed_nan_and_default_vals_with_optin(self):
234274 result = dict_sweep (d , vals = _VALS_WITH_NAN )
235275 assert result == {"d" : "keep" }
236276
277+ def test_string_vals_supported (self ):
278+ d = {"a" : "NA" , "b" : "keep" , "c" : ["NA" , "keep" ]}
279+ result = dict_sweep (d , vals = "NA" )
280+ assert result == {"b" : "keep" , "c" : ["keep" ]}
281+
237282
238283# ---------------------------------------------------------------------------
239- # Tests using real pandas and numpy types
284+ # Tests using real pandas and numpy types, when the optional deps are installed.
240285# ---------------------------------------------------------------------------
241286
242- pandas = pytest .importorskip ("pandas" )
243- numpy = pytest .importorskip ("numpy" )
244-
245287
246288class TestValToDeleteRealTypes :
247- def test_numpy_nan_not_deleted_by_default (self ):
289+ def test_numpy_nan_not_deleted_by_default (self , numpy ):
248290 assert _val_to_delete (numpy .nan , _DEFAULT_VALS ) is False
249291
250- def test_pandas_na_not_deleted_by_default (self ):
292+ def test_pandas_na_not_deleted_by_default (self , pandas ):
251293 assert _val_to_delete (pandas .NA , _DEFAULT_VALS ) is False
252294
253- def test_pandas_nat_not_deleted_by_default (self ):
295+ def test_pandas_nat_not_deleted_by_default (self , pandas ):
254296 assert _val_to_delete (pandas .NaT , _DEFAULT_VALS ) is False
255297
256- def test_numpy_nan_deleted_when_in_vals (self ):
298+ def test_numpy_nan_deleted_when_in_vals (self , numpy ):
257299 vals_with_nan = _DEFAULT_VALS + [float ("nan" )]
258300 assert _val_to_delete (numpy .nan , vals_with_nan ) is True
259301
260- def test_pandas_na_deleted_when_in_vals (self ):
302+ def test_pandas_na_deleted_when_in_vals (self , pandas ):
261303 vals_with_na = _DEFAULT_VALS + [pandas .NA ]
262304 assert _val_to_delete (pandas .NA , vals_with_na ) is True
263305
264- def test_pandas_nat_deleted_when_in_vals (self ):
306+ def test_pandas_nat_deleted_when_in_vals (self , pandas ):
265307 vals_with_nat = _DEFAULT_VALS + [pandas .NaT ]
266308 assert _val_to_delete (pandas .NaT , vals_with_nat ) is True
267309
268- def test_numpy_float64_nan_deleted_when_in_vals (self ):
310+ def test_pandas_na_and_nat_are_not_interchangeable (self , pandas ):
311+ assert _val_to_delete (pandas .NA , _DEFAULT_VALS + [pandas .NaT ]) is False
312+ assert _val_to_delete (pandas .NaT , _DEFAULT_VALS + [pandas .NA ]) is False
313+
314+ def test_regular_match_after_pandas_na (self , pandas ):
315+ assert _val_to_delete ("" , [pandas .NA , "" ]) is True
316+
317+ def test_numpy_float32_nan_deleted_when_in_vals (self , numpy ):
318+ vals_with_nan = _DEFAULT_VALS + [float ("nan" )]
319+ assert _val_to_delete (numpy .float32 ("nan" ), vals_with_nan ) is True
320+
321+ def test_numpy_float64_nan_deleted_when_in_vals (self , numpy ):
269322 vals_with_nan = _DEFAULT_VALS + [float ("nan" )]
270323 assert _val_to_delete (numpy .float64 ("nan" ), vals_with_nan ) is True
271324
@@ -276,52 +329,52 @@ class TestDictSweepRealPandas:
276329 def _vals_with (self , * extras ):
277330 return _DEFAULT_VALS + list (extras )
278331
279- def test_pandas_na_top_level (self ):
332+ def test_pandas_na_top_level (self , pandas ):
280333 d = {"a" : 1 , "b" : pandas .NA }
281334 result = dict_sweep (d , vals = self ._vals_with (pandas .NA ))
282335 assert result == {"a" : 1 }
283336
284- def test_pandas_nat_top_level (self ):
337+ def test_pandas_nat_top_level (self , pandas ):
285338 d = {"a" : 1 , "b" : pandas .NaT }
286339 result = dict_sweep (d , vals = self ._vals_with (pandas .NaT ))
287340 assert result == {"a" : 1 }
288341
289- def test_numpy_nan_top_level (self ):
342+ def test_numpy_nan_top_level (self , numpy ):
290343 d = {"a" : 1 , "b" : numpy .nan }
291344 result = dict_sweep (d , vals = self ._vals_with (float ("nan" )))
292345 assert result == {"a" : 1 }
293346
294- def test_pandas_na_in_list (self ):
347+ def test_pandas_na_in_list (self , pandas ):
295348 d = {"a" : [1 , pandas .NA , 2 ]}
296349 result = dict_sweep (d , vals = self ._vals_with (pandas .NA ))
297350 assert result == {"a" : [1 , 2 ]}
298351
299- def test_pandas_nat_in_list (self ):
352+ def test_pandas_nat_in_list (self , pandas ):
300353 d = {"a" : [1 , pandas .NaT , 2 ]}
301354 result = dict_sweep (d , vals = self ._vals_with (pandas .NaT ))
302355 assert result == {"a" : [1 , 2 ]}
303356
304- def test_numpy_nan_in_list (self ):
357+ def test_numpy_nan_in_list (self , numpy ):
305358 d = {"a" : [1 , numpy .nan , 2 ]}
306359 result = dict_sweep (d , vals = self ._vals_with (float ("nan" )))
307360 assert result == {"a" : [1 , 2 ]}
308361
309- def test_pandas_na_in_nested_dict (self ):
362+ def test_pandas_na_in_nested_dict (self , pandas ):
310363 d = {"a" : {"b" : pandas .NA , "c" : 1 }}
311364 result = dict_sweep (d , vals = self ._vals_with (pandas .NA ))
312365 assert result == {"a" : {"c" : 1 }}
313366
314- def test_pandas_na_in_list_with_remove_invalid_list (self ):
367+ def test_pandas_na_in_list_with_remove_invalid_list (self , pandas ):
315368 d = {"a" : [pandas .NA , pandas .NaT , "valid" ]}
316369 result = dict_sweep (d , vals = self ._vals_with (pandas .NA , pandas .NaT ), remove_invalid_list = True )
317370 assert result == {"a" : ["valid" ]}
318371
319- def test_all_real_nan_types_removed (self ):
372+ def test_all_real_nan_types_removed (self , pandas , numpy ):
320373 d = {"a" : numpy .nan , "b" : pandas .NA , "c" : pandas .NaT , "d" : "keep" }
321374 result = dict_sweep (d , vals = self ._vals_with (float ("nan" ), pandas .NA , pandas .NaT ))
322375 assert result == {"d" : "keep" }
323376
324- def test_real_nan_kept_by_default (self ):
377+ def test_real_nan_kept_by_default (self , pandas , numpy ):
325378 """Without opting in, real NaN types are preserved."""
326379 d = {"a" : numpy .nan , "b" : pandas .NA , "c" : pandas .NaT , "d" : "keep" }
327380 result = dict_sweep (d )
0 commit comments