Skip to content

Commit cfe9d9a

Browse files
JiwaniZakirclaude
andcommitted
Replace internal S3 URIs with user placeholders in SFT notebook
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e161199 commit cfe9d9a

1 file changed

Lines changed: 13 additions & 7 deletions

File tree

v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,19 @@
8585
"from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n",
8686
"\n",
8787
"\n",
88-
"\n",
8988
"# Register dataset in SageMaker AI Registry\n",
9089
"# This creates a versioned dataset that can be referenced by ARN\n",
91-
"# Provide a source (it can be local file path or S3 URL)\n",
90+
"# Provide a source: a local file path or your own S3 URI pointing to a JSONL file.\n",
91+
"# The dataset must be in JSONL format with each line containing a JSON object\n",
92+
"# with 'prompt' and 'completion' fields. See the SageMaker documentation for details:\n",
93+
"# https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-sft.html\n",
94+
"#\n",
95+
"# Replace the placeholder below with your own S3 URI or local file path:\n",
96+
"MY_DATASET_S3_URI = \"s3://<your-bucket>/<path-to-your-dataset>.jsonl\" # TODO: replace with your dataset URI\n",
97+
"\n",
9298
"dataset = DataSet.create(\n",
9399
" name=\"demo-1\",\n",
94-
" source=\"s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl\"\n",
100+
" source=MY_DATASET_S3_URI\n",
95101
")\n",
96102
"\n",
97103
"print(f\"Dataset ARN: {dataset.arn}\")"
@@ -163,7 +169,7 @@
163169
" mlflow_experiment_name=\"test-finetuned-models-exp\", \n",
164170
" mlflow_run_name=\"test-finetuned-models-run\", \n",
165171
" training_dataset=dataset.arn, \n",
166-
" s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n",
172+
" s3_output_path=\"s3://<your-bucket>/output/\", # TODO: replace with your S3 output path\n",
167173
" accept_eula=True\n",
168174
")\n"
169175
]
@@ -378,7 +384,7 @@
378384
" mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n",
379385
" mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n",
380386
" training_dataset=dataset.arn, #Optional[]\n",
381-
" s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n",
387+
" s3_output_path=\"s3://<your-bucket>/output/\", # TODO: replace with your S3 output path\n",
382388
")\n"
383389
]
384390
},
@@ -439,7 +445,7 @@
439445
" mlflow_experiment_name=\"test-nova-finetuned-models-exp\", \n",
440446
" mlflow_run_name=\"test-nova-finetuned-models-run\", \n",
441447
" training_dataset=\"arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1\",\n",
442-
" s3_output_path=\"s3://mc-flows-sdk-testing-us-east-1/output/\"\n",
448+
" s3_output_path=\"s3://<your-bucket>/output/\" # TODO: replace with your S3 output path\n",
443449
")\n"
444450
]
445451
},
@@ -487,4 +493,4 @@
487493
},
488494
"nbformat": 4,
489495
"nbformat_minor": 5
490-
}
496+
}

0 commit comments

Comments
 (0)