MLflow implementation

GoJo-Rika · GoJo-Rika · commit 055aa15f99ab · 2025-06-20T03:14:25.000+01:00
diff --git a/network_security/components/model_trainer.py b/network_security/components/model_trainer.py
@@ -1,6 +1,7 @@
 import sys
 from pathlib import Path
 
+import mlflow
 from sklearn.ensemble import (
     AdaBoostClassifier,
     GradientBoostingClassifier,
@@ -40,6 +41,17 @@ def __init__(
         except Exception as e:
             raise NetworkSecurityException(e, sys)
 
+    def track_mlflow(self, best_model, classificationmetric):
+        with mlflow.start_run():
+            f1_score = classificationmetric.f1_score
+            precision_score = classificationmetric.precision_score
+            recall_score = classificationmetric.recall_score
+
+            mlflow.log_metric("f1_score", f1_score)
+            mlflow.log_metric("precision", precision_score)
+            mlflow.log_metric("recall_score", recall_score)
+            mlflow.sklearn.log_model(best_model, "model")
+
     def train_model(
         self,
         X_train: object,
@@ -57,20 +69,20 @@ def train_model(
         params = {
             "Decision Tree": {
                 "criterion": ["gini", "entropy", "log_loss"],
-                "splitter": ["best", "random"],
-                "max_features": ["sqrt", "log2"],
+                # "splitter": ["best", "random"],
+                # "max_features": ["sqrt", "log2"],
             },
             "Random Forest": {
-                "criterion": ["gini", "entropy", "log_loss"],
-                "max_features": ["sqrt", "log2", None],
+                # "criterion": ["gini", "entropy", "log_loss"],
+                # "max_features": ["sqrt", "log2", None],
                 "n_estimators": [8, 16, 32, 128, 256],
             },
             "Gradient Boosting": {
-                "loss": ["log_loss", "exponential"],
+                # "loss": ["log_loss", "exponential"],
                 "learning_rate": [0.1, 0.01, 0.05, 0.001],
                 "subsample": [0.6, 0.7, 0.75, 0.85, 0.9],
-                "criterion": ["squared_error", "friedman_mse"],
-                "max_features": ["auto", "sqrt", "log2"],
+                # "criterion": ["squared_error", "friedman_mse"],
+                # "max_features": ["auto", "sqrt", "log2"],
                 "n_estimators": [8, 16, 32, 64, 128, 256],
             },
             "Logistic Regression": {},
@@ -102,13 +114,17 @@ def train_model(
             y_true=y_train,
             y_pred=y_train_pred,
         )
+        ## Track the training experiements with mlflow
+        self.track_mlflow(best_model, classification_train_metric)
 
         y_test_pred = best_model.predict(X_test)
         classification_test_metric = get_classification_score(
             y_true=y_test,
             y_pred=y_test_pred,
         )
 
+        self.track_mlflow(best_model, classification_test_metric)
+
         preprocessor = load_object(
             file_path=self.data_transformation_artifact.transformed_object_file_path,
         )
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.12"
 dependencies = [
     "certifi>=2025.6.15",
     "dill>=0.4.0",
+    "mlflow>=3.1.0",
     "numpy>=2.3.0",
     "pandas>=2.3.0",
     "pyaml>=25.5.0",
diff --git a/requirements.txt b/requirements.txt
@@ -7,5 +7,6 @@ pymongo[srv]==3.12
 scikit-learn
 dill
 pyaml
+mlflow
 
 # -e .
diff --git a/uv.lock b/uv.lock

-Original file line number
+Diff line change
 scikit-learn
 dill
 pyaml
 +mlflow
 # -e .