Skip to content

Commit 2b5a107

Browse files
author
Sam Borms
authored
Merge pull request #60 from PythonPredictions/fix/progress-indication-#41
Progress indication for forward selection & preprocessing (fixes #41).
2 parents 6ebe0a0 + b46ba47 commit 2b5a107

5 files changed

Lines changed: 22 additions & 15 deletions

File tree

cobra/model_building/forward_selection.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import logging
2-
log = logging.getLogger(__name__)
32

43
import pandas as pd
4+
from tqdm.auto import tqdm
55

66
from cobra.model_building import LogisticRegressionModel as MLModel
77

8+
log = logging.getLogger(__name__)
9+
810

911
class ForwardFeatureSelection:
1012

@@ -159,7 +161,7 @@ def fit(self, train_data: pd.DataFrame, target_column_name: str,
159161
def _forward_selection(self, train_data: pd.DataFrame,
160162
target_column_name: str, predictors: list,
161163
forced_predictors: list=[]) -> list:
162-
"""Perform the forward feature selection algoritm to compute a list
164+
"""Perform the forward feature selection algorithm to compute a list
163165
of models (with increasing performance?). The length of the list,
164166
i.e. the number of models is bounded by the max_predictors class
165167
attribute.
@@ -186,7 +188,8 @@ def _forward_selection(self, train_data: pd.DataFrame,
186188

187189
max_steps = 1 + min(self.max_predictors,
188190
len(predictors) + len(forced_predictors))
189-
for step in range(1, max_steps):
191+
for step in tqdm(range(1, max_steps), desc="Sequentially adding best "
192+
"predictor..."):
190193
if step <= len(forced_predictors):
191194
# first, we go through forced predictors
192195
candidate_predictors = [var for var in forced_predictors

cobra/preprocessing/categorical_data_processor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,16 @@
1717
# standard lib imports
1818
import re
1919
from typing import Optional
20+
import logging
2021

2122
# third party imports
2223
import numpy as np
2324
import pandas as pd
2425
from scipy import stats
25-
26+
from tqdm.auto import tqdm
2627
from sklearn.base import BaseEstimator
2728
from sklearn.exceptions import NotFittedError
2829

29-
import logging
3030
log = logging.getLogger(__name__)
3131

3232

@@ -149,7 +149,8 @@ def fit(self, data: pd.DataFrame, column_names: list,
149149
log.info("regroup was set to False, so no fitting is required")
150150
return None
151151

152-
for column_name in column_names:
152+
for column_name in tqdm(column_names, desc="Fitting category "
153+
"regrouping..."):
153154

154155
if column_name not in data.columns:
155156
log.warning("DataFrame has no column '{}', so it will be "

cobra/preprocessing/kbins_discretizer.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,18 @@
1616
from copy import deepcopy
1717
from typing import List
1818
import numbers
19-
2019
import logging
21-
log = logging.getLogger(__name__)
2220

2321
# third party imports
2422
import numpy as np
2523
import pandas as pd
26-
24+
from tqdm.auto import tqdm
2725
from sklearn.base import BaseEstimator
2826
from sklearn.exceptions import NotFittedError
2927
#from sklearn.cluster import KMeans
3028

29+
log = logging.getLogger(__name__)
30+
3131

3232
class KBinsDiscretizer(BaseEstimator):
3333

@@ -186,7 +186,8 @@ def fit(self, data: pd.DataFrame, column_names: list):
186186
.format(KBinsDiscretizer.__name__,
187187
self.valid_strategies, self.strategy))
188188

189-
for column_name in column_names:
189+
for column_name in tqdm(column_names, desc="Computing "
190+
"discretization bins..."):
190191

191192
if column_name not in data.columns:
192193
log.warning("DataFrame has no column '{}', so it will be "
@@ -266,7 +267,7 @@ def transform(self, data: pd.DataFrame,
266267

267268
raise NotFittedError(msg.format(self.__class__.__name__))
268269

269-
for column_name in column_names:
270+
for column_name in tqdm(column_names, desc="Discretizing columns..."):
270271
if column_name not in self._bins_by_column:
271272
log.warning("Column '{}' is not in fitted output "
272273
"and will be skipped".format(column_name))

cobra/preprocessing/target_encoder.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
- Matthias Roels (implementation)
99
"""
1010
import logging
11-
log = logging.getLogger(__name__)
1211

1312
#import numpy as np
1413
import pandas as pd
15-
14+
from tqdm.auto import tqdm
1615
from sklearn.base import BaseEstimator
1716
from sklearn.exceptions import NotFittedError
1817

18+
log = logging.getLogger(__name__)
19+
1920

2021
class TargetEncoder(BaseEstimator):
2122

@@ -144,7 +145,7 @@ def fit(self, data: pd.DataFrame, column_names: list,
144145
y = data[target_column]
145146
self._global_mean = y.sum() / y.count()
146147

147-
for column in column_names:
148+
for column in tqdm(column_names, desc="Fitting target encoding..."):
148149
if column not in data.columns:
149150
log.warning("DataFrame has no column '{}', so it will be "
150151
"skipped in fitting" .format(column))
@@ -209,7 +210,7 @@ def transform(self, data: pd.DataFrame,
209210

210211
raise NotFittedError(msg.format(self.__class__.__name__))
211212

212-
for column in column_names:
213+
for column in tqdm(column_names, desc="Applying target encoding..."):
213214

214215
if column not in data.columns:
215216
log.warning("Unknown column '{}' will be skipped"

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ scipy>=1.5.4
44
scikit-learn>=0.23.1
55
matplotlib>=3.3.3
66
seaborn>=0.11.0
7+
tqdm>=4.59.0

0 commit comments

Comments
 (0)