88
99
1010def data_handling (data : dict ) -> tuple :
11- # Split dataset into features and target
12- # data is features
1311 """
14- >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
15- ('[5.1, 3.5, 1.4, 0.2]', [0])
16- >>> data_handling(
17- ... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
18- ... )
19- ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
12+ Split dataset into features and target.
13+
14+ >>> from sklearn.datasets import load_iris
15+ >>> iris = load_iris()
16+ >>> features, targets = data_handling(iris)
17+ >>> features.shape
18+ (150, 4)
19+ >>> targets.shape
20+ (150,)
2021 """
2122 return (data ["data" ], data ["target" ])
2223
2324
2425def xgboost (features : np .ndarray , target : np .ndarray ) -> XGBClassifier :
2526 """
26- # THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
27- XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
28- colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
29- early_stopping_rounds=None, enable_categorical=False,
30- eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
31- importance_type=None, interaction_constraints='',
32- learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
33- max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
34- missing=nan, monotone_constraints='()', n_estimators=100,
35- n_jobs= 0, num_parallel_tree= 1, predictor='auto', random_state=0,
36- reg_alpha=0, reg_lambda=1, ...)
27+ Train an XGBoost classifier.
28+
29+ >>> from sklearn.datasets import load_iris
30+ >>> iris = load_iris()
31+ >>> X_train, y_train = iris.data[:100], iris.target[:100]
32+ >>> classifier = xgboost(X_train, y_train)
33+ >>> predictions = classifier.predict(iris.data[:5])
34+ >>> len(predictions)
35+ 5
36+ >>> all(pred in [ 0, 1, 2] for pred in predictions)
37+ True
3738 """
3839 classifier = XGBClassifier ()
3940 classifier .fit (features , target )
@@ -46,20 +47,18 @@ def main() -> None:
4647 https://xgboost.readthedocs.io/en/stable/
4748 Iris type dataset is used to demonstrate algorithm.
4849 """
49-
5050 # Load Iris dataset
5151 iris = load_iris ()
5252 features , targets = data_handling (iris )
5353 x_train , x_test , y_train , y_test = train_test_split (
54- features , targets , test_size = 0.25
54+ features , targets , test_size = 0.25 , random_state = 42
5555 )
56-
5756 names = iris ["target_names" ]
58-
57+
5958 # Create an XGBoost Classifier from the training data
6059 xgboost_classifier = xgboost (x_train , y_train )
61-
62- # Display the confusion matrix of the classifier with both training and test sets
60+
61+ # Display the confusion matrix of the classifier with test set
6362 ConfusionMatrixDisplay .from_estimator (
6463 xgboost_classifier ,
6564 x_test ,
@@ -74,6 +73,5 @@ def main() -> None:
7473
7574if __name__ == "__main__" :
7675 import doctest
77-
7876 doctest .testmod (verbose = True )
79- main ()
77+ main ()
0 commit comments