|
27 | 27 | from ..primitives.stack import Stack |
28 | 28 | from ..primitives.sum import Sum |
29 | 29 |
|
30 | | -from .filling_utils import check_column, check_dtype |
| 30 | +from .filling_utils import check_column, normalize_dtype |
31 | 31 |
|
32 | 32 |
|
33 | 33 | class HistogramFillerBase(object): |
@@ -111,7 +111,7 @@ def __init__( |
111 | 111 | self.bin_specs = bin_specs or {} |
112 | 112 | self.time_axis = time_axis |
113 | 113 | var_dtype = var_dtype or {} |
114 | | - self.var_dtype = {k: check_dtype(v) for k, v in var_dtype.items()} |
| 114 | + self.var_dtype = {k: normalize_dtype(v) for k, v in var_dtype.items()} |
115 | 115 | self.read_key = read_key |
116 | 116 | self.store_key = store_key |
117 | 117 |
|
@@ -404,32 +404,31 @@ def categorize_features(self, df): |
404 | 404 |
|
405 | 405 | for col_list in features: |
406 | 406 | for col in col_list: |
| 407 | + # data type with metadata |
| 408 | + dt_col = self.get_data_type(df, col) |
407 | 409 |
|
408 | | - dt = self.var_dtype.get(col, check_dtype(self.get_data_type(df, col))) |
| 410 | + # normalized data type |
| 411 | + dt = self.var_dtype.get(col, normalize_dtype(dt_col)) |
409 | 412 |
|
410 | 413 | if col not in self.var_dtype: |
411 | 414 | self.var_dtype[col] = dt |
412 | 415 |
|
| 416 | + # metadata indicates decimal |
| 417 | + if hasattr(dt_col, 'metadata') and dt_col.metadata is not None and dt_col.metadata["decimal"]: |
| 418 | + cols_by_type["decimal"].add(col) |
| 419 | + |
413 | 420 | if np.issubdtype(dt, np.integer): |
414 | | - colset = cols_by_type["int"] |
415 | | - if col not in colset: |
416 | | - colset.add(col) |
| 421 | + cols_by_type["int"].add(col) |
| 422 | + |
417 | 423 | if np.issubdtype(dt, np.number): |
418 | 424 | colset = cols_by_type["num"] |
419 | | - if col not in colset: |
420 | | - colset.add(col) |
421 | 425 | elif np.issubdtype(dt, np.datetime64): |
422 | 426 | colset = cols_by_type["dt"] |
423 | | - if col not in colset: |
424 | | - colset.add(col) |
425 | 427 | elif np.issubdtype(dt, np.bool_): |
426 | 428 | colset = cols_by_type["bool"] |
427 | | - if col not in colset: |
428 | | - colset.add(col) |
429 | 429 | else: |
430 | 430 | colset = cols_by_type["str"] |
431 | | - if col not in colset: |
432 | | - colset.add(col) |
| 431 | + colset.add(col) |
433 | 432 |
|
434 | 433 | self.logger.debug( |
435 | 434 | 'Data type of column "{col}" is "{type}".'.format( |
|
0 commit comments