-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathNeptune_CatBoost.py
More file actions
107 lines (82 loc) · 3.24 KB
/
Neptune_CatBoost.py
File metadata and controls
107 lines (82 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Neptune + CatBoost
## Import dependencies
import neptune
from catboost import CatBoostClassifier
from catboost.datasets import titanic
from neptune.types import File
from neptune.utils import stringify_unsupported
from sklearn.model_selection import train_test_split
## (Neptune) Start a run
run = neptune.init_run(
api_token=neptune.ANONYMOUS_API_TOKEN, # Replace with your own
project="common/catboost-support", # Replace with your own
tags=["catboost", "classifier", "script"], # (optional) use your own
)
## Load data
titanic_train, titanic_test = titanic()
### (Neptune) Upload raw data
run["data/raw/train"].upload(File.as_html(titanic_train))
run["data/raw/test"].upload(File.as_html(titanic_test))
### Preprocess data
titanic_train["Age"].fillna(titanic_train["Age"].median(), inplace=True)
titanic_train["Cabin"].fillna("", inplace=True)
titanic_train["Embarked"].fillna(titanic_train["Embarked"].mode()[0], inplace=True)
titanic_test["Age"].fillna(titanic_test["Age"].median(), inplace=True)
titanic_test["Fare"].fillna(titanic_test["Fare"].median(), inplace=True)
titanic_test["Cabin"].fillna("", inplace=True)
label = ["Survived"]
cat_features = ["Sex", "Embarked"]
text_features = ["Name", "Ticket", "Cabin"]
X_train, X_eval, y_train, y_eval = train_test_split(
titanic_train.drop(columns=label + ["PassengerId"]),
titanic_train[label],
test_size=0.25,
shuffle=True,
)
### (Neptune) Upload processed data
run["data/processed/train"].upload(File.as_html(titanic_train))
run["data/processed/test"].upload(File.as_html(titanic_test))
## Train a CatBoost model
model = CatBoostClassifier()
plot_file = "training_plot.html"
model.fit(
X=X_train,
y=y_train,
eval_set=(X_eval, y_eval),
cat_features=cat_features,
text_features=text_features,
plot_file=plot_file,
use_best_model=True,
)
### (Neptune) Upload training results
#### Upload training plot
run["training/plot"].upload(plot_file)
#### Upload training metrics
run["training/best_score"] = stringify_unsupported(model.get_best_score())
run["training/best_iteration"] = stringify_unsupported(model.get_best_iteration())
## Make predictions
titanic_test["prediction"] = model.predict(
data=titanic_test.drop(columns=["PassengerId"]),
prediction_type="Class",
)
### (Neptune) Upload predictions
titanic_test.to_csv("results.csv", index=False)
run["data/results"].upload("results.csv")
## (Neptune) Upload model metadata to Neptune
### Upload model binary
model.save_model("model.cbm")
run["model/binary"].upload("model.cbm")
### Upload model attributes
run["model/attributes/tree_count"] = model.tree_count_
run["model/attributes/feature_importances"] = dict(
zip(model.feature_names_, model.get_feature_importance())
)
run["model/attributes/probability_threshold"] = model.get_probability_threshold()
### Upload model parameters
run["model/parameters"] = stringify_unsupported(model.get_all_params())
## Stop logging
run.stop()
## Analyze run in the Neptune app
# Follow the run link in the console output and explore the logged metadata.
# You can also explore this example run
# https://app.neptune.ai/o/common/org/catboost-support/runs/details?viewId=standard-view&detailsTab=dashboard&dashboardId=Overview-99f571df-0fec-4447-9ffe-5a4c668577cd&shortId=CAT-2