Skip to content

Commit ca2c036

Browse files
committed
Fix broken doctests in xgboost_classifier.py
1 parent 8106aea commit ca2c036

File tree

1 file changed

+25
-27
lines changed

1 file changed

+25
-27
lines changed

machine_learning/xgboost_classifier.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,33 @@
88

99

1010
def data_handling(data: dict) -> tuple:
11-
# Split dataset into features and target
12-
# data is features
1311
"""
14-
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
15-
('[5.1, 3.5, 1.4, 0.2]', [0])
16-
>>> data_handling(
17-
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
18-
... )
19-
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
12+
Split dataset into features and target.
13+
14+
>>> from sklearn.datasets import load_iris
15+
>>> iris = load_iris()
16+
>>> features, targets = data_handling(iris)
17+
>>> features.shape
18+
(150, 4)
19+
>>> targets.shape
20+
(150,)
2021
"""
2122
return (data["data"], data["target"])
2223

2324

2425
def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
2526
"""
26-
# THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
27-
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
28-
colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
29-
early_stopping_rounds=None, enable_categorical=False,
30-
eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
31-
importance_type=None, interaction_constraints='',
32-
learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
33-
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
34-
missing=nan, monotone_constraints='()', n_estimators=100,
35-
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
36-
reg_alpha=0, reg_lambda=1, ...)
27+
Train an XGBoost classifier.
28+
29+
>>> from sklearn.datasets import load_iris
30+
>>> iris = load_iris()
31+
>>> X_train, y_train = iris.data[:100], iris.target[:100]
32+
>>> classifier = xgboost(X_train, y_train)
33+
>>> predictions = classifier.predict(iris.data[:5])
34+
>>> len(predictions)
35+
5
36+
>>> all(pred in [0, 1, 2] for pred in predictions)
37+
True
3738
"""
3839
classifier = XGBClassifier()
3940
classifier.fit(features, target)
@@ -46,20 +47,18 @@ def main() -> None:
4647
https://xgboost.readthedocs.io/en/stable/
4748
Iris type dataset is used to demonstrate algorithm.
4849
"""
49-
5050
# Load Iris dataset
5151
iris = load_iris()
5252
features, targets = data_handling(iris)
5353
x_train, x_test, y_train, y_test = train_test_split(
54-
features, targets, test_size=0.25
54+
features, targets, test_size=0.25, random_state=42
5555
)
56-
5756
names = iris["target_names"]
58-
57+
5958
# Create an XGBoost Classifier from the training data
6059
xgboost_classifier = xgboost(x_train, y_train)
61-
62-
# Display the confusion matrix of the classifier with both training and test sets
60+
61+
# Display the confusion matrix of the classifier with test set
6362
ConfusionMatrixDisplay.from_estimator(
6463
xgboost_classifier,
6564
x_test,
@@ -74,6 +73,5 @@ def main() -> None:
7473

7574
if __name__ == "__main__":
7675
import doctest
77-
7876
doctest.testmod(verbose=True)
79-
main()
77+
main()

0 commit comments

Comments
 (0)