|
8 | 8 | pd.set_option("future.no_silent_downcasting", True) |
9 | 9 |
|
10 | 10 |
|
| 11 | +def _convert_dtype_with_nullable_int(series, dtype): |
| 12 | + """Convert a series to the specified dtype, handling nullable integers. |
| 13 | +
|
| 14 | + When converting to integer types and the series contains NaN values, |
| 15 | + this function automatically uses pandas nullable integer types (Int64, Int32, etc.) |
| 16 | + instead of numpy integer types which don't support NaN. |
| 17 | +
|
| 18 | + Args: |
| 19 | + series: The pandas Series to convert |
| 20 | + dtype: The target dtype |
| 21 | +
|
| 22 | + Returns: |
| 23 | + The series with the appropriate dtype applied |
| 24 | + """ |
| 25 | + # Check if it's already a pandas nullable int type |
| 26 | + is_pandas_nullable_int = isinstance( |
| 27 | + dtype, |
| 28 | + (pd.Int64Dtype, pd.Int32Dtype, pd.Int16Dtype, pd.Int8Dtype), |
| 29 | + ) |
| 30 | + |
| 31 | + if is_pandas_nullable_int: |
| 32 | + # Keep pandas nullable integer types as is |
| 33 | + return series.astype(dtype) |
| 34 | + # If column has NaN and dtype is integer, use nullable Int type |
| 35 | + if np.issubdtype(dtype, np.integer) and series.isna().any(): |
| 36 | + # Convert numpy int types to pandas nullable Int types |
| 37 | + if dtype == np.int64: |
| 38 | + return series.astype("Int64") |
| 39 | + if dtype == np.int32: |
| 40 | + return series.astype("Int32") |
| 41 | + if dtype == np.int16: |
| 42 | + return series.astype("Int16") |
| 43 | + if dtype == np.int8: |
| 44 | + return series.astype("Int8") |
| 45 | + # Fallback for other integer types |
| 46 | + return series.astype("Int64") |
| 47 | + return series.astype(dtype) |
| 48 | + |
| 49 | + |
11 | 50 | class ModifyColumnCommand(QUndoCommand): |
12 | 51 | """Command to add or remove a column in the table. |
13 | 52 |
|
@@ -155,38 +194,9 @@ def redo(self): |
155 | 194 | if np.any(dtypes != df.dtypes): |
156 | 195 | for col, dtype in dtypes.items(): |
157 | 196 | if dtype != df.dtypes[col]: |
158 | | - is_pandas_nullable_int = isinstance( |
159 | | - dtype, |
160 | | - ( |
161 | | - pd.Int64Dtype, |
162 | | - pd.Int32Dtype, |
163 | | - pd.Int16Dtype, |
164 | | - pd.Int8Dtype, |
165 | | - ), |
| 197 | + df[col] = _convert_dtype_with_nullable_int( |
| 198 | + df[col], dtype |
166 | 199 | ) |
167 | | - |
168 | | - if is_pandas_nullable_int: |
169 | | - # Keep pandas nullable integer types as is |
170 | | - df[col] = df[col].astype(dtype) |
171 | | - # If column has NaN and dtype is integer, use nullable Int type |
172 | | - elif ( |
173 | | - np.issubdtype(dtype, np.integer) |
174 | | - and df[col].isna().any() |
175 | | - ): |
176 | | - # Convert numpy int types to pandas nullable Int types |
177 | | - if dtype == np.int64: |
178 | | - df[col] = df[col].astype("Int64") |
179 | | - elif dtype == np.int32: |
180 | | - df[col] = df[col].astype("Int32") |
181 | | - elif dtype == np.int16: |
182 | | - df[col] = df[col].astype("Int16") |
183 | | - elif dtype == np.int8: |
184 | | - df[col] = df[col].astype("Int8") |
185 | | - else: |
186 | | - # Fallback for other integer types |
187 | | - df[col] = df[col].astype("Int64") |
188 | | - else: |
189 | | - df[col] = df[col].astype(dtype) |
190 | 200 | self.model.endInsertRows() |
191 | 201 | else: |
192 | 202 | self.model.beginRemoveRows( |
@@ -292,32 +302,17 @@ def _apply_changes(self, use_new: bool): |
292 | 302 | for col, dtype in original_dtypes.items(): |
293 | 303 | if col not in update_df.columns: |
294 | 304 | continue |
295 | | - # Check if it's a pandas extension dtype (like Int64) |
| 305 | + |
| 306 | + # For numeric types, convert string inputs to numbers first |
296 | 307 | is_pandas_nullable_int = isinstance( |
297 | 308 | dtype, |
298 | 309 | (pd.Int64Dtype, pd.Int32Dtype, pd.Int16Dtype, pd.Int8Dtype), |
299 | 310 | ) |
300 | | - |
301 | | - if is_pandas_nullable_int: |
302 | | - # Keep pandas nullable integer types as is |
| 311 | + if is_pandas_nullable_int or np.issubdtype(dtype, np.number): |
303 | 312 | df[col] = pd.to_numeric(df[col], errors="coerce") |
304 | | - df[col] = df[col].astype(dtype) |
305 | | - elif np.issubdtype(dtype, np.number): |
306 | | - df[col] = pd.to_numeric(df[col], errors="coerce") |
307 | | - # If original dtype was integer and column has NaN, use nullable Int type |
308 | | - if np.issubdtype(dtype, np.integer) and df[col].isna().any(): |
309 | | - if dtype == np.int64: |
310 | | - df[col] = df[col].astype("Int64") |
311 | | - elif dtype == np.int32: |
312 | | - df[col] = df[col].astype("Int32") |
313 | | - elif dtype == np.int16: |
314 | | - df[col] = df[col].astype("Int16") |
315 | | - elif dtype == np.int8: |
316 | | - df[col] = df[col].astype("Int8") |
317 | | - else: |
318 | | - df[col] = df[col].astype("Int64") |
319 | | - else: |
320 | | - df[col] = df[col].astype(dtype) |
| 313 | + |
| 314 | + # Convert to appropriate dtype, handling nullable integers |
| 315 | + df[col] = _convert_dtype_with_nullable_int(df[col], dtype) |
321 | 316 |
|
322 | 317 | rows = [df.index.get_loc(row_key) for (row_key, _) in self.changes] |
323 | 318 | cols = [ |
|
0 commit comments