aws · aviruthen · Mar 30, 2026 · Mar 30, 2026 · sagemaker-bot · Mar 30, 2026
diff --git a/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb b/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb
@@ -61,6 +61,7 @@
    "outputs": [],
    "source": [
     "import uuid\n",
+    "import boto3\n",
     "from sagemaker.core import image_uris\n",
     "from sagemaker.core.helper.session_helper import Session\n",
     "\n",
@@ -71,7 +72,9 @@
     "MLFLOW_TRACKING_ARN = \"XXXXX\"\n",
     "\n",
     "# AWS Configuration\n",
-    "AWS_REGION = Session.boto_region_name\n",
+    "boto_session = boto3.Session()\n",
+    "sagemaker_session = Session(boto_session=boto_session)\n",
+    "AWS_REGION = sagemaker_session.boto_region_name\n",
     "\n",
     "# Get PyTorch training image dynamically\n",
     "PYTORCH_TRAINING_IMAGE = image_uris.retrieve(\n",
@@ -297,6 +300,7 @@
     "\n",
     "# Training on SageMaker managed infrastructure\n",
     "model_trainer = ModelTrainer(\n",
+    "    sagemaker_session=sagemaker_session,\n",
     "    training_image=PYTORCH_TRAINING_IMAGE,\n",
     "    source_code=SourceCode(\n",
     "        source_dir=training_code_dir,\n",
@@ -333,22 +337,29 @@
     "from mlflow import MlflowClient\n",
     "\n",
     "client = MlflowClient()\n",
-    "registered_model = client.get_registered_model(name=MLFLOW_REGISTERED_MODEL_NAME)\n",
     "\n",
-    "latest_version = registered_model.latest_versions[0]\n",
+    "# Use search_model_versions (compatible with MLflow 3.x)\n",
+    "# Note: latest_versions attribute was removed in MLflow 3.x\n",
+    "model_versions = client.search_model_versions(\n",
+    "    filter_string=f\"name='{MLFLOW_REGISTERED_MODEL_NAME}'\",\n",
+    "    order_by=['version_number DESC'],\n",
+    "    max_results=1\n",
+    ")\n",
+    "\n",
+    "if not model_versions:\n",
+    "    raise ValueError(f\"No versions found for model '{MLFLOW_REGISTERED_MODEL_NAME}'\")\n",
+    "\n",
+    "latest_version = model_versions[0]\n",
     "model_version = latest_version.version\n",
     "model_source = latest_version.source\n",
     "\n",
-    "# Get S3 URL of model files (for info only)\n",
-    "artifact_uri = client.get_model_version_download_uri(MLFLOW_REGISTERED_MODEL_NAME, model_version)\n",
-    "\n",
     "# MLflow model registry path to use with ModelBuilder\n",
     "mlflow_model_path = f\"models:/{MLFLOW_REGISTERED_MODEL_NAME}/{model_version}\"\n",
     "\n",
     "print(f\"Registered Model: {MLFLOW_REGISTERED_MODEL_NAME}\")\n",
     "print(f\"Latest Version: {model_version}\")\n",
-    "print(f\"Source: {model_source}\")\n",
-    "print(f\"Model artifacts location: {artifact_uri}\")"
+    "print(f\"Source (artifact location): {model_source}\")\n",
+    "print(f\"MLflow model path for deployment: {mlflow_model_path}\")"
    ]
   },
   {
@@ -481,23 +492,44 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import boto3\n",
-    "\n",
     "# Test with JSON input\n",
     "test_data = [[0.1, 0.2, 0.3, 0.4]]\n",
     "\n",
-    "runtime_client = boto3.client('sagemaker-runtime')\n",
-    "response = runtime_client.invoke_endpoint(\n",
-    "    EndpointName=core_endpoint.endpoint_name,\n",
-    "    Body=json.dumps(test_data),\n",
-    "    ContentType='application/json'\n",
+    "result = core_endpoint.invoke(\n",
+    "    body=json.dumps(test_data),\n",
+    "    content_type='application/json'\n",
     ")\n",
     "\n",
-    "prediction = json.loads(response['Body'].read().decode('utf-8'))\n",
+    "# Decode and display the result\n",
+    "prediction = json.loads(result.body.read().decode('utf-8'))\n",
     "print(f\"Input: {test_data}\")\n",
     "print(f\"Prediction: {prediction}\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test with different tensor inputs\n",
+    "test_inputs = [\n",
+    "    [[0.5, 0.3, 0.2, 0.1]],\n",
+    "    [[0.9, 0.1, 0.8, 0.2]],\n",
+    "    [[0.2, 0.7, 0.4, 0.6]]\n",
+    "]\n",
+    "\n",
+    "for i, test_input in enumerate(test_inputs, 1):\n",
+    "    result = core_endpoint.invoke(\n",
+    "        body=json.dumps(test_input),\n",
+    "        content_type='application/json'\n",
+    "    )\n",
+    "    \n",
+    "    prediction = json.loads(result.body.read().decode('utf-8'))\n",
+    "    print(f\"Test {i} - Input {test_input}: {prediction}\")\n",
+    "    print(\"-\" * 50)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},