|
7 | 7 | "# Feature Group Lake Formation Governance " |
8 | 8 | ] |
9 | 9 | }, |
| 10 | + { |
| 11 | + "cell_type": "markdown", |
| 12 | + "metadata": {}, |
| 13 | + "source": [ |
| 14 | + "This Demo builds on sagemaker v3 features which is not out yet so we need to tell pip to use local version \n", |
| 15 | + "\n", |
| 16 | + "1. install `pyenv` following the instructions here https://github.com/pyenv/pyenv?tab=readme-ov-file#a-getting-pyenv\n", |
| 17 | + "2. setup your shell env for pyenv https://github.com/pyenv/pyenv?tab=readme-ov-file#b-set-up-your-shell-environment-for-pyenv\n", |
| 18 | + "3. install and activate python for example (this code was tested on python 3.10.14)\n", |
| 19 | + "```\n", |
| 20 | + "pyenv install 3.10.14\n", |
| 21 | + "pyenv virtualenv 3.10.14 py3.10.14\n", |
| 22 | + "pyenv activate py3.10.14\n", |
| 23 | + "```\n", |
| 24 | + "3. You will then need to run `pip install -e ./sagemaker-mlops` before starting the jupyter notebook to use the local packages\n", |
| 25 | + "4. `pip install jupyter notebook`\n", |
| 26 | + "5. `jupyter notebook`" |
| 27 | + ] |
| 28 | + }, |
10 | 29 | { |
11 | 30 | "cell_type": "markdown", |
12 | 31 | "metadata": {}, |
|
230 | 249 | ] |
231 | 250 | }, |
232 | 251 | { |
233 | | - "cell_type": "markdown", |
| 252 | + "cell_type": "code", |
| 253 | + "execution_count": null, |
234 | 254 | "metadata": {}, |
| 255 | + "outputs": [], |
235 | 256 | "source": [ |
236 | | - "This Demo builds on sagemaker v3 features which is not out yet so we need to tell pip to use local version \n", |
| 257 | + "import boto3\n", |
| 258 | + "import os\n", |
237 | 259 | "\n", |
238 | | - "1. install `pyenv` https://github.com/pyenv/pyenv?tab=readme-ov-file#a-getting-pyenv\n", |
239 | | - "2. install and activate python for example (this code was tested on python 3.10.14)\n", |
240 | | - "```\n", |
241 | | - "pyenv install 3.10.14\n", |
242 | | - "pyenv virtualenv 3.10.14 py3.10.14\n", |
243 | | - "pyenv activate py3.10.14\n", |
244 | | - "```\n", |
245 | | - "3. You will then need to run `pip install -e .` before starting the jupyter notebook to use the local packages\n", |
246 | | - "4. `pip install jupyter notebook`\n", |
247 | | - "5. `jupyter notebook`" |
| 260 | + "boto3.DEFAULT_SESSION = None\n", |
| 261 | + "def assume_role(role_arn, session_name=\"AssumedRoleSession\"):\n", |
| 262 | + " \"\"\"\n", |
| 263 | + " Assume an AWS IAM role and return temporary credentials.\n", |
| 264 | + " \n", |
| 265 | + " Args:\n", |
| 266 | + " role_arn: The ARN of the role to assume\n", |
| 267 | + " session_name: A name for the assumed role session\n", |
| 268 | + " \n", |
| 269 | + " Returns:\n", |
| 270 | + " A boto3 session with the assumed role credentials\n", |
| 271 | + " \"\"\"\n", |
| 272 | + " sts_client = boto3.client('sts')\n", |
| 273 | + " \n", |
| 274 | + " response = sts_client.assume_role(\n", |
| 275 | + " RoleArn=role_arn,\n", |
| 276 | + " RoleSessionName=session_name\n", |
| 277 | + " )\n", |
| 278 | + " \n", |
| 279 | + " credentials = response['Credentials']\n", |
| 280 | + " \n", |
| 281 | + " # Create a new session with the temporary credentials\n", |
| 282 | + " session = boto3.Session(\n", |
| 283 | + " aws_access_key_id=credentials['AccessKeyId'],\n", |
| 284 | + " aws_secret_access_key=credentials['SecretAccessKey'],\n", |
| 285 | + " aws_session_token=credentials['SessionToken']\n", |
| 286 | + " )\n", |
| 287 | + " \n", |
| 288 | + " return session" |
248 | 289 | ] |
249 | 290 | }, |
250 | 291 | { |
|
287 | 328 | "source": [ |
288 | 329 | "# Use SageMaker session to get default bucket and execution role\n", |
289 | 330 | "\n", |
290 | | - "boto_session = boto3.Session()\n", |
291 | | - "new_sagemaker_session = SageMakerSession(boto_session=boto_session)\n", |
292 | | - "sagemaker_session = SageMakerSession()\n", |
293 | | - "S3_BUCKET = sagemaker_session.default_bucket()\n", |
294 | | - "REGION = sagemaker_session.boto_session.region_name\n", |
295 | | - "\n", |
296 | 331 | "# Execution role (for running this notebook)\n", |
297 | | - "EXECUTION_ROLE_ARN = get_execution_role(sagemaker_session)\n", |
| 332 | + "EXECUTION_ROLE_ARN = 'arn:aws:iam::<account id>:role/<role>'\n", |
| 333 | + "\n", |
298 | 334 | "\n", |
299 | 335 | "# Offline store role (dedicated role for Feature Store S3 access)\n", |
300 | 336 | "# Replace with your dedicated offline store role ARN\n", |
301 | 337 | "# https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store-adding-policies.html\n", |
302 | | - "OFFLINE_STORE_ROLE_ARN = \"arn:aws:iam::<aws account id>:role/<role name>\"\n", |
| 338 | + "OFFLINE_STORE_ROLE_ARN = \"arn:aws:iam::<account id>:role/<role>\"\n", |
| 339 | + "\n", |
| 340 | + "\n", |
| 341 | + "boto_session = assume_role(EXECUTION_ROLE_ARN)\n", |
| 342 | + "sagemaker_session = SageMakerSession(boto_session=boto_session)\n", |
| 343 | + "sts = boto_session.client('sts')\n", |
| 344 | + "\n", |
| 345 | + "S3_BUCKET = sagemaker_session.default_bucket()\n", |
| 346 | + "REGION = sagemaker_session.boto_session.region_name\n", |
| 347 | + "\n", |
303 | 348 | "\n", |
304 | 349 | "print(f\"S3 Bucket: {S3_BUCKET}\")\n", |
305 | | - "print(f\"Execution Role ARN: {EXECUTION_ROLE_ARN}\")\n", |
| 350 | + "print(f\"Execution Role ARN:{sts.get_caller_identity()['Arn']}\")\n", |
306 | 351 | "print(f\"Offline Store Role ARN: {OFFLINE_STORE_ROLE_ARN}\")\n", |
307 | 352 | "print(f\"Region: {REGION}\")" |
308 | 353 | ] |
|
572 | 617 | ] |
573 | 618 | }, |
574 | 619 | { |
575 | | - "cell_type": "code", |
576 | | - "execution_count": null, |
| 620 | + "cell_type": "markdown", |
577 | 621 | "metadata": {}, |
578 | | - "outputs": [], |
579 | 622 | "source": [ |
580 | | - "fg_workflow2 = FeatureGroup.refresh()" |
| 623 | + "Here you can optionally assume a different role with just datalake permissions and describe Feature Group permission" |
581 | 624 | ] |
582 | 625 | }, |
583 | 626 | { |
|
593 | 636 | "print(\" 2. Grant permissions to execution role\")\n", |
594 | 637 | "print(\" 3. Revoke IAMAllowedPrincipal permissions\")\n", |
595 | 638 | "print()\n", |
596 | | - "\n", |
| 639 | + "fg_workflow2 = FeatureGroup.get(FG_NAME_WORKFLOW2)\n", |
597 | 640 | "result = fg_workflow2.enable_lake_formation( # new method\n", |
598 | 641 | " use_service_linked_role=True,\n", |
599 | | - " session=boto_session\n", |
600 | 642 | ")\n", |
601 | 643 | "\n", |
602 | 644 | "print(f\"\\nLake Formation setup results:\")\n", |
|
645 | 687 | "print(f\"Status: {fg_workflow2.feature_group_status}\")\n", |
646 | 688 | "print(f\"ARN: {fg_workflow2.feature_group_arn}\")\n", |
647 | 689 | "DataCatalogTable = fg_workflow2.offline_store_config.data_catalog_config.table_name\n", |
648 | | - "print(f\"{DataCatalogTable}\")" |
| 690 | + "print(f\"Table Name: {DataCatalogTable}\")" |
649 | 691 | ] |
650 | 692 | }, |
651 | 693 | { |
|
715 | 757 | "outputs": [], |
716 | 758 | "source": [ |
717 | 759 | "# Uncomment to delete the Feature Groups\n", |
718 | | - "# cleanup_feature_group(fg_workflow1)\n", |
| 760 | + "cleanup_feature_group(fg_workflow1)\n", |
719 | 761 | "cleanup_feature_group(fg_workflow2)" |
720 | 762 | ] |
721 | 763 | }, |
|
0 commit comments