diff --git a/tutorials/get-started-notebooks/deploy/credit_defaults_model/conda.yaml b/tutorials/get-started-notebooks/deploy/credit_defaults_model/conda.yaml index d0833ae0ee..2c970ac753 100644 --- a/tutorials/get-started-notebooks/deploy/credit_defaults_model/conda.yaml +++ b/tutorials/get-started-notebooks/deploy/credit_defaults_model/conda.yaml @@ -4,7 +4,7 @@ dependencies: - python=3.8.15 - pip<=21.2.4 - pip: - - mlflow + - mlflow==2.4.1 - cloudpickle==2.2.0 - psutil==5.8.0 - scikit-learn==0.24.2 diff --git a/tutorials/get-started-notebooks/deploy/credit_defaults_model/requirements.txt b/tutorials/get-started-notebooks/deploy/credit_defaults_model/requirements.txt index 0bf2a3cbe4..268a59e5a1 100644 --- a/tutorials/get-started-notebooks/deploy/credit_defaults_model/requirements.txt +++ b/tutorials/get-started-notebooks/deploy/credit_defaults_model/requirements.txt @@ -1,4 +1,4 @@ -mlflow +mlflow==2.4.1 cloudpickle==2.2.0 psutil==5.8.0 -scikit-learn==0.24.2 \ No newline at end of file +scikit-learn==0.24.2 diff --git a/tutorials/get-started-notebooks/pipeline.ipynb b/tutorials/get-started-notebooks/pipeline.ipynb index f0a20a3d2a..68cc1fd0e2 100644 --- a/tutorials/get-started-notebooks/pipeline.ipynb +++ b/tutorials/get-started-notebooks/pipeline.ipynb @@ -275,7 +275,7 @@ " description=\"Custom environment for Credit Card Defaults pipeline\",\n", " tags={\"scikit-learn\": \"0.24.2\"},\n", " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", - " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest\",\n", + " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:latest\",\n", " version=\"0.2.0\",\n", ")\n", "pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)\n", @@ -536,97 +536,89 @@ }, "outputs": [], "source": [ + "\n", "%%writefile {train_src_dir}/train.py\n", "import argparse\n", - "from sklearn.ensemble import GradientBoostingClassifier\n", - "from sklearn.metrics import classification_report\n", "import os\n", "import pandas as pd\n", + "import numpy as np\n", "import mlflow\n", "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import classification_report, accuracy_score, f1_score\n", + "\n", "\n", "def select_first_file(path):\n", - " \"\"\"Selects first file in folder, use under assumption there is only one file in folder\n", - " Args:\n", - " path (str): path to directory or file to choose\n", - " Returns:\n", - " str: full path of selected file\n", - " \"\"\"\n", + " \"\"\"Selects first file in folder (assumes single file).\"\"\"\n", " files = os.listdir(path)\n", " return os.path.join(path, files[0])\n", "\n", "\n", - "# Start Logging\n", - "mlflow.start_run()\n", - "\n", - "# enable autologging\n", - "mlflow.sklearn.autolog()\n", - "\n", - "os.makedirs(\"./outputs\", exist_ok=True)\n", - "\n", - "\n", "def main():\n", - " \"\"\"Main function of the script.\"\"\"\n", - "\n", - " # input and output arguments\n", " parser = argparse.ArgumentParser()\n", " parser.add_argument(\"--train_data\", type=str, help=\"path to train data\")\n", " parser.add_argument(\"--test_data\", type=str, help=\"path to test data\")\n", " parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n", " parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n", - " parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n", - " parser.add_argument(\"--model\", type=str, help=\"path to model file\")\n", + "\n", + " # ✅ Keep this arg to avoid breaking existing job/pipeline definitions\n", + " # Option A will NOT use it to register via MLflow.\n", + " parser.add_argument(\"--registered_model_name\", type=str, required=False, help=\"model name (ignored in Option A)\")\n", + "\n", + " parser.add_argument(\"--model\", type=str, help=\"path to model output folder\")\n", " args = parser.parse_args()\n", "\n", - " # paths are mounted as folder, therefore, we are selecting the file from folder\n", - " train_df = pd.read_csv(select_first_file(args.train_data))\n", + " # ✅ Option A: Disable autologging to avoid AML backend unsupported endpoints\n", + " mlflow.sklearn.autolog(disable=True)\n", "\n", - " # Extracting the label column\n", - " y_train = train_df.pop(\"default payment next month\")\n", + " os.makedirs(\"./outputs\", exist_ok=True)\n", "\n", - " # convert the dataframe values to array\n", - " X_train = train_df.values\n", + " # Start run (Azure ML handles tracking URI)\n", + " mlflow.start_run()\n", "\n", - " # paths are mounted as folder, therefore, we are selecting the file from folder\n", - " test_df = pd.read_csv(select_first_file(args.test_data))\n", + " # Optional: record the intended model name as a tag for traceability\n", + " if args.registered_model_name:\n", + " mlflow.set_tag(\"registered_model_name_requested\", args.registered_model_name)\n", "\n", - " # Extracting the label column\n", - " y_test = test_df.pop(\"default payment next month\")\n", + " # Load data\n", + " train_df = pd.read_csv(select_first_file(args.train_data))\n", + " y_train = train_df.pop(\"default payment next month\").to_numpy()\n", + " X_train = train_df.values\n", "\n", - " # convert the dataframe values to array\n", + " test_df = pd.read_csv(select_first_file(args.test_data))\n", + " y_test = test_df.pop(\"default payment next month\").to_numpy()\n", " X_test = test_df.values\n", "\n", " print(f\"Training with data of shape {X_train.shape}\")\n", "\n", + " # Train\n", " clf = GradientBoostingClassifier(\n", - " n_estimators=args.n_estimators, learning_rate=args.learning_rate\n", + " n_estimators=args.n_estimators,\n", + " learning_rate=args.learning_rate\n", " )\n", " clf.fit(X_train, y_train)\n", "\n", + " # Evaluate\n", " y_pred = clf.predict(X_test)\n", - "\n", " print(classification_report(y_test, y_pred))\n", "\n", - " # Registering the model to the workspace\n", - " print(\"Registering the model via MLFlow\")\n", - " mlflow.sklearn.log_model(\n", - " sk_model=clf,\n", - " registered_model_name=args.registered_model_name,\n", - " artifact_path=args.registered_model_name,\n", - " )\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " f1 = f1_score(y_test, y_pred)\n", "\n", - " # Saving the model to a file\n", - " mlflow.sklearn.save_model(\n", - " sk_model=clf,\n", - " path=os.path.join(args.model, \"trained_model\"),\n", - " )\n", + " # Log metrics only (safe)\n", + " mlflow.log_metric(\"accuracy\", accuracy)\n", + " mlflow.log_metric(\"f1_score\", f1)\n", "\n", - " # Stop Logging\n", + " # ✅ Save model artifact (still using MLflow serialization, but NOT registering)\n", + " model_output_path = os.path.join(args.model, \"trained_model\")\n", + " mlflow.sklearn.save_model(sk_model=clf, path=model_output_path)\n", + "\n", + " print(f\"Model saved to {model_output_path}\")\n", " mlflow.end_run()\n", "\n", "\n", "if __name__ == \"__main__\":\n", - " main()" + " main()\n" ] }, {