99# from __future__ import unicode_literals
1010import itertools
1111import json
12+ import math
1213import os
1314import os .path
1415from collections import Counter , OrderedDict
2122csv .field_size_limit (10000000 ) # default is 131072, too small for some big files
2223
2324
25+ def _missing_value_kind (val ):
26+ """Return a stable kind for NaN-like values without importing optional deps."""
27+ val_cls = val .__class__
28+ cls_module = getattr (val_cls , "__module__" , "" )
29+ cls_name = getattr (val_cls , "__name__" , "" )
30+
31+ if (cls_module == "pandas" or cls_module .startswith ("pandas." )) and cls_name in ("NAType" , "NaTType" ):
32+ return cls_name
33+
34+ try :
35+ if math .isnan (val ):
36+ return "NaN"
37+ except (TypeError , ValueError ):
38+ pass
39+
40+ return None
41+
42+
43+ def _val_to_delete (val , vals ):
44+ """Return True if val is considered as a value to delete, False otherwise.
45+
46+ NaN-like values (float NaN, pandas.NA, pandas.NaT) are only removed when
47+ explicitly included in the vals list.
48+ """
49+ if is_str (vals ):
50+ vals = [vals ]
51+
52+ val_missing_kind = _missing_value_kind (val )
53+
54+ for candidate in vals :
55+ candidate_missing_kind = _missing_value_kind (candidate )
56+ if val_missing_kind or candidate_missing_kind :
57+ if val_missing_kind == candidate_missing_kind :
58+ return True
59+ continue
60+
61+ try :
62+ if val == candidate :
63+ return True
64+ except (TypeError , ValueError ):
65+ continue
66+
67+ return False
68+
69+
2470def dict_sweep (d , vals = None , remove_invalid_list = False ):
2571 """
26- Remove keys whose values are ".", "-", "", "NA", "none", " "; and remove empty dictionaries
72+ Remove keys whose values are ".", "-", "", "NA", "none", " "; and remove empty dictionaries.
73+
74+ NaN-like values (float NaN, pandas.NA, pandas.NaT) are only removed when
75+ explicitly included in the ``vals`` list.
2776
2877 Args:
2978 d (dict): a dictionary
@@ -45,11 +94,11 @@ def dict_sweep(d, vals=None, remove_invalid_list=False):
4594 # set default supported vals for empty values
4695 vals = vals or ["." , "-" , "" , "NA" , "none" , " " , "Not Available" , "unknown" ]
4796 for key , val in list (d .items ()):
48- if val in vals :
97+ if _val_to_delete ( val , vals ) :
4998 del d [key ]
5099 elif isinstance (val , list ):
51100 if remove_invalid_list :
52- val = [v for v in val if v not in vals ]
101+ val = [v for v in val if not _val_to_delete ( v , vals ) ]
53102 for item in val :
54103 if isinstance (item , dict ):
55104 dict_sweep (item , vals , remove_invalid_list = remove_invalid_list )
@@ -59,12 +108,14 @@ def dict_sweep(d, vals=None, remove_invalid_list=False):
59108 else :
60109 d [key ] = val
61110 else :
62- for item in val :
63- if item in vals :
64- val .remove (item )
111+ i = 0
112+ while i < len (val ):
113+ item = val [i ]
114+ if _val_to_delete (item , vals ):
115+ del val [i ]
65116 elif isinstance (item , dict ):
66117 dict_sweep (item , vals , remove_invalid_list = remove_invalid_list )
67- # if len(val) == 0:
118+ i += 1
68119 if not val :
69120 del d [key ]
70121 elif isinstance (val , dict ):
0 commit comments