Skip to content

Commit 72f3fbc

Browse files
Secboneclaude
andauthored
fix: pandas 3.x compat and CI updates (#159)
* fix: pandas compat in impute.py, update CI runners and Python versions - Fix chained assignment in impute.py _fit_encode/_encode to use .loc indexing instead of X[col].loc[] which breaks pandas Copy-on-Write - Convert string columns to object dtype before processing to avoid StringDtype incompatibility with newer pandas - Replace deprecated macos-13 runner with macos-14 - Promote Python 3.13 from experimental to stable in CI matrix - Add Python 3.14 as experimental across all platforms Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: use iloc for positional indexing in process_test.py Replace `F(len)(A)[0]` with `F(len)(A).iloc[0]` to fix KeyError on pandas 3.x where string-indexed Series no longer accept integer keys. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: use tolist() instead of .values for StringArray compat In pandas 3.x, Index.values returns StringArray instead of numpy array. Using .tolist() ensures str() gives the expected list format `['A']` instead of `<StringArray>` in the duplicate keys error message. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: replace deprecated applymap with map for pandas 3.x DataFrame.applymap was removed in pandas 3.0. Use DataFrame.map instead. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5eb589e commit 72f3fbc

7 files changed

Lines changed: 26 additions & 15 deletions

File tree

.github/workflows/linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
test:
1212
strategy:
1313
matrix:
14-
python-version: ['3.9', '3.10', '3.11', '3.12']
14+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
1515
experimental: [false]
1616
include:
17-
- python-version: '3.13'
17+
- python-version: '3.14'
1818
experimental: true
1919
fail-fast: false
2020
runs-on: ubuntu-latest

.github/workflows/macos.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
strategy:
1313
matrix:
1414
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
15-
macos-version: ['macos-13', 'macos-latest'] # macos-13: Intel, macos-latest: Apple Silicon
15+
macos-version: ['macos-14', 'macos-latest'] # macos-14: Intel(x86_64), macos-latest: Apple Silicon
1616
include:
1717
- experimental: false
1818
# macos-latest (Apple Silicon) is experimental
@@ -21,8 +21,12 @@ jobs:
2121
# Python 3.9 is experimental
2222
- python-version: '3.9'
2323
experimental: true
24-
# Python 3.13 is experimental
25-
- python-version: '3.13'
24+
# Python 3.14 is experimental
25+
- python-version: '3.14'
26+
macos-version: 'macos-14'
27+
experimental: true
28+
- python-version: '3.14'
29+
macos-version: 'macos-latest'
2630
experimental: true
2731
fail-fast: false
2832
runs-on: ${{ matrix.macos-version }}

.github/workflows/windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
test:
1212
strategy:
1313
matrix:
14-
python-version: ['3.9', '3.10', '3.11', '3.12']
14+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
1515
experimental: [false]
1616
include:
17-
- python-version: '3.13'
17+
- python-version: '3.14'
1818
experimental: true
1919
fail-fast: false
2020
runs-on: windows-latest

toad/impute.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def transform(self, X, **kwargs):
5151

5252

5353
def _replace_empty(self, X):
54+
X = X.copy()
55+
# Convert string columns to object dtype to avoid StringDtype issues in newer pandas
56+
for col in X.select_dtypes(include=['string']).columns:
57+
X[col] = X[col].astype(object)
5458
mask = X.isin(self.missing_values_list)
5559
X = X.where(~mask, np.nan)
5660
return X, mask
@@ -63,12 +67,14 @@ def _fit_encode(self, X, mask):
6367
mask (Mask): empty mask for X
6468
"""
6569
category_data = X.select_dtypes(exclude = np.number).columns
66-
70+
6771
for col in category_data:
68-
unique, X[col].loc[~mask[col]] = np.unique(X[col][~mask[col]], return_inverse = True)
72+
valid = ~mask[col]
73+
unique, inverse = np.unique(X.loc[valid, col], return_inverse = True)
74+
X.loc[valid, col] = inverse.astype(float)
6975

7076
self.encoder_dict[col] = unique
71-
77+
7278
return X
7379

7480
def _encode(self, X, mask):
@@ -79,9 +85,10 @@ def _encode(self, X, mask):
7985
mask (Mask): empty mask for X
8086
"""
8187
for col, unique in self.encoder_dict.items():
88+
valid = ~mask[col]
8289
table = dict(zip(unique, np.arange(len(unique))))
83-
X[col].loc[~mask[col]] = np.array([table[v] for v in X[col][~mask[col]]])
84-
90+
X.loc[valid, col] = np.array([table[v] for v in X.loc[valid, col]]).astype(float)
91+
8592
return X
8693

8794
def _decode(self, X):

toad/preprocessing/process_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def test_mask_isna():
4141
assert m.replay(df).sum() == df['A'].isna().sum()
4242

4343
def test_f():
44-
assert F(len)(A)[0] == 500
44+
assert F(len)(A).iloc[0] == 500
4545

4646
def test_processing():
4747
res = (

toad/transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _raise_unfitted(self):
100100

101101
def _check_duplicated_keys(self, X):
102102
if isinstance(X, pd.DataFrame) and X.columns.has_duplicates:
103-
keys = X.columns[X.columns.duplicated()].values
103+
keys = X.columns[X.columns.duplicated()].tolist()
104104
raise Exception("X has duplicate keys `{keys}`".format(keys = str(keys)))
105105

106106
return True

toad/utils/func_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def test_bin_to_number_for_frame():
103103
},
104104
])
105105

106-
res = df.applymap(bin_to_number())
106+
res = df.map(bin_to_number())
107107
assert res.loc[1, 'area_2'] == 225
108108

109109
def test_generate_target():

0 commit comments

Comments
 (0)