MAINT: make compatible with NumPy 2 (#525)

DerWeh · web-flow · commit 346362ffb5bd · 2024-04-10T12:22:41.000-05:00
Signed-off-by: DerWeh &lt;andreas.weh@web.de&gt;
diff --git a/python/interpret-core/interpret/utils/_clean_simple.py b/python/interpret-core/interpret/utils/_clean_simple.py
@@ -260,9 +260,9 @@ def typify_classification(vec):
         ):
             dtype = np.bool_
         else:
-            dtype = np.unicode_
+            dtype = np.str_
     else:
-        dtype = np.unicode_
+        dtype = np.str_
 
     return vec.astype(dtype, copy=False)
 
diff --git a/python/interpret-core/interpret/utils/_clean_x.py b/python/interpret-core/interpret/utils/_clean_x.py
@@ -294,8 +294,8 @@
         dict,
         Ellipsis,
         np.csingle,
-        np.complex_,
-        np.clongfloat,
+        np.complex128,
+        np.clongdouble,
         np.void,
     ]
 )
@@ -314,7 +314,7 @@ def _densify_object_ndarray(X_col):
     types = set(map(type, X_col))
     if len(types) == 1:
         if str in types:
-            return X_col.astype(np.unicode_)
+            return X_col.astype(np.str_)
         elif bool in types:
             return X_col.astype(np.bool_)
 
@@ -353,7 +353,7 @@ def _densify_object_ndarray(X_col):
             # it will silently convert negative integers to unsigned!
 
             # TODO : should this be np.float64 with a check for big integers
-            return X_col.astype(np.unicode_)
+            return X_col.astype(np.str_)
 
     if all(
         one_type is float or issubclass(one_type, np.floating) for one_type in types
@@ -430,7 +430,7 @@ def _densify_object_ndarray(X_col):
     # writing our own cython code that can be more efficient at walking through items in an array.  If we write
     # our own cython there is the added advantage that we can check types in the same loop and therefore eliminate
     # the costly "set(map(type, X_col))" calls above
-    return X_col.astype(np.unicode_)
+    return X_col.astype(np.str_)
 
 
 def _process_column_initial(X_col, nonmissings, processing, min_unique_continuous):
@@ -448,9 +448,9 @@ def _process_column_initial(X_col, nonmissings, processing, min_unique_continuou
 
     if issubclass(uniques.dtype.type, np.floating):
         floats = uniques.astype(np.float64, copy=False)
-        uniques = floats.astype(np.unicode_)
+        uniques = floats.astype(np.str_)
     else:
-        uniques = uniques.astype(np.unicode_, copy=False)
+        uniques = uniques.astype(np.str_, copy=False)
         try:
             # we rely here on there being a round trip format within this language from float64 to text to float64
 
@@ -544,7 +544,7 @@ def _encode_categorical_existing(X_col, nonmissings, categories):
 
     if issubclass(X_col.dtype.type, np.floating):
         uniques = uniques.astype(np.float64, copy=False)
-    uniques = uniques.astype(np.unicode_, copy=False)
+    uniques = uniques.astype(np.str_, copy=False)
 
     mapping = np.fromiter(
         (categories.get(val, -1) for val in uniques), np.int64, count=len(uniques)
@@ -725,7 +725,7 @@ def _process_continuous(X_col, nonmissings):
                     floats[idx] = one_item_array.astype(dtype=np.float64)[0]
                 except TypeError:
                     # use .astype instead of str(one_item_array) here to ensure identical string categories
-                    one_str_array = one_item_array.astype(dtype=np.unicode_)
+                    one_str_array = one_item_array.astype(dtype=np.str_)
                     try:
                         # use .astype(..) instead of float(..) to ensure identical conversion results
                         floats[idx] = one_str_array.astype(dtype=np.float64)[0]
@@ -948,7 +948,7 @@ def _process_pandas_column(X_col, categories, feature_type, min_unique_continuou
         # unlike other missing value types, we get back -1's for missing here, so no need to drop them
         X_col = X_col.values
         is_ordered = X_col.ordered
-        pd_categories = X_col.categories.values.astype(dtype=np.unicode_, copy=False)
+        pd_categories = X_col.categories.values.astype(dtype=np.str_, copy=False)
         X_col = X_col.codes
 
         if feature_type == "ignore":
diff --git a/python/interpret-core/interpret/utils/_measure_interactions.py b/python/interpret-core/interpret/utils/_measure_interactions.py
@@ -159,7 +159,7 @@ def measure_interactions(
         try:
             y_discard = y.astype(dtype=np.float64, copy=False)
         except (TypeError, ValueError):
-            y_discard = y.astype(dtype=np.unicode_, copy=False)
+            y_discard = y.astype(dtype=np.str_, copy=False)
 
         target_type = type_of_target(y_discard)
         if target_type == "continuous":
diff --git a/python/interpret-core/tests/utils/test_clean_x.py b/python/interpret-core/tests/utils/test_clean_x.py
@@ -628,15 +628,15 @@ def test_process_continuous_obj_hard_bad():
 
 
 def test_process_continuous_str_simple():
-    vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.unicode_), None)
+    vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.str_), None)
     assert bad is None
     assert vals.dtype == np.float64
     assert np.array_equal(vals, np.array([1, 2.5], dtype=np.float64))
 
 
 def test_process_continuous_str_simple_missing():
     vals, bad = _process_continuous(
-        np.array(["1", "2.5"], dtype=np.unicode_),
+        np.array(["1", "2.5"], dtype=np.str_),
         np.array([True, True, False], dtype=np.bool_),
     )
     assert bad is None
@@ -649,7 +649,7 @@ def test_process_continuous_str_simple_missing():
 
 def test_process_continuous_str_hard_bad():
     vals, bad = _process_continuous(
-        np.array(["1", "2.5", "bad"], dtype=np.unicode_),
+        np.array(["1", "2.5", "bad"], dtype=np.str_),
         np.array([True, True, True, False], dtype=np.bool_),
     )
     assert len(bad) == 4
@@ -708,7 +708,7 @@ def test_process_column_initial_obj_obj():
 
 def test_process_column_initial_alphabetical_nomissing():
     encoded, c = _process_column_initial(
-        np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
+        np.array(["xyz", "abc", "xyz"], dtype=np.str_),
         None,
         "nominal_alphabetical",
         None,
@@ -723,7 +723,7 @@ def test_process_column_initial_alphabetical_nomissing():
 
 def test_process_column_initial_alphabetical_missing():
     encoded, c = _process_column_initial(
-        np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
+        np.array(["xyz", "abc", "xyz"], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         "nominal_alphabetical",
         None,
@@ -738,7 +738,7 @@ def test_process_column_initial_alphabetical_missing():
 
 def test_process_column_initial_prevalence_nomissing():
     encoded, c = _process_column_initial(
-        np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
+        np.array(["xyz", "abc", "xyz"], dtype=np.str_),
         None,
         "nominal_prevalence",
         None,
@@ -753,7 +753,7 @@ def test_process_column_initial_prevalence_nomissing():
 
 def test_process_column_initial_prevalence_missing():
     encoded, c = _process_column_initial(
-        np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
+        np.array(["xyz", "abc", "xyz"], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         "nominal_prevalence",
         None,
@@ -768,7 +768,7 @@ def test_process_column_initial_prevalence_missing():
 
 def test_process_column_initial_float64_nomissing():
     encoded, c = _process_column_initial(
-        np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
+        np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
         None,
         "ANYTHING_ELSE",
         None,
@@ -783,7 +783,7 @@ def test_process_column_initial_float64_nomissing():
 
 def test_process_column_initial_float64_missing():
     encoded, c = _process_column_initial(
-        np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
+        np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         "ANYTHING_ELSE",
         None,
@@ -1016,7 +1016,7 @@ def test_encode_categorical_existing_obj_floats():
                 np.float16(2.2),
                 np.float32(3.3),
                 np.float64(4.4),
-                np.longfloat(5.5),
+                np.longdouble(5.5),
             ],
             dtype=np.object_,
         ),
@@ -1110,7 +1110,7 @@ def test_encode_categorical_existing_obj_obj():
 def test_encode_categorical_existing_str():
     c = {"abc": 1, "def": 2, "ghi": 3}
     encoded, bad = _encode_categorical_existing(
-        np.array(["abc", "ghi", "def", "something"], dtype=np.unicode_),
+        np.array(["abc", "ghi", "def", "something"], dtype=np.str_),
         np.array([True, True, False, True, True], dtype=np.bool_),
         c,
     )
@@ -1144,7 +1144,7 @@ def test_encode_categorical_existing_int8():
 def test_encode_categorical_existing_bool():
     c = {"False": 1, "True": 2}
     encoded, bad = _encode_categorical_existing(
-        np.array([False, True, False], dtype=np.unicode_),
+        np.array([False, True, False], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         c,
     )
@@ -1157,7 +1157,7 @@ def test_encode_categorical_existing_bool():
 def test_encode_categorical_existing_bool_true():
     c = {"True": 1}
     encoded, bad = _encode_categorical_existing(
-        np.array([False, True, False], dtype=np.unicode_),
+        np.array([False, True, False], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         c,
     )
@@ -1170,7 +1170,7 @@ def test_encode_categorical_existing_bool_true():
 def test_encode_categorical_existing_bool_false():
     c = {"False": 1}
     encoded, bad = _encode_categorical_existing(
-        np.array([False, True, False], dtype=np.unicode_),
+        np.array([False, True, False], dtype=np.str_),
         np.array([True, True, False, True], dtype=np.bool_),
         c,
     )
@@ -1794,7 +1794,7 @@ def test_unify_columns_pandas_missings_float64():
 
 
 def test_unify_columns_pandas_missings_longfloat():
-    check_pandas_float(np.longfloat, -1.1, 2.2)
+    check_pandas_float(np.longdouble, -1.1, 2.2)
 
 
 def test_unify_columns_pandas_missings_float32():