8585 " from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n " ,
8686 " \n " ,
8787 " \n " ,
88- " \n " ,
8988 " # Register dataset in SageMaker AI Registry\n " ,
9089 " # This creates a versioned dataset that can be referenced by ARN\n " ,
91- " # Provide a source (it can be local file path or S3 URL)\n " ,
90+ " # Provide a source: a local file path or your own S3 URI pointing to a JSONL file.\n " ,
91+ " # The dataset must be in JSONL format with each line containing a JSON object\n " ,
92+ " # with 'prompt' and 'completion' fields. See the SageMaker documentation for details:\n " ,
93+ " # https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-sft.html\n " ,
94+ " #\n " ,
95+ " # Replace the placeholder below with your own S3 URI or local file path:\n " ,
96+ " MY_DATASET_S3_URI = \" s3://<your-bucket>/<path-to-your-dataset>.jsonl\" # TODO: replace with your dataset URI\n " ,
97+ " \n " ,
9298 " dataset = DataSet.create(\n " ,
9399 " name=\" demo-1\" ,\n " ,
94- " source=\" s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl \" \n" ,
100+ " source=MY_DATASET_S3_URI \n " ,
95101 " )\n " ,
96102 " \n " ,
97103 " print(f\" Dataset ARN: {dataset.arn}\" )"
163169 " mlflow_experiment_name=\" test-finetuned-models-exp\" , \n " ,
164170 " mlflow_run_name=\" test-finetuned-models-run\" , \n " ,
165171 " training_dataset=dataset.arn, \n " ,
166- " s3_output_path=\" s3://mc-flows-sdk-testing /output/\" ,\n " ,
172+ " s3_output_path=\" s3://<your-bucket> /output/\" , # TODO: replace with your S3 output path \n " ,
167173 " accept_eula=True\n " ,
168174 " )\n "
169175 ]
378384 " mlflow_experiment_name=\" test-finetuned-models-exp\" , # Optional[str]\n " ,
379385 " mlflow_run_name=\" test-finetuned-models-run\" , # Optional[str]\n " ,
380386 " training_dataset=dataset.arn, #Optional[]\n " ,
381- " s3_output_path=\" s3://mc-flows-sdk-testing /output/\" ,\n " ,
387+ " s3_output_path=\" s3://<your-bucket> /output/\" , # TODO: replace with your S3 output path \n " ,
382388 " )\n "
383389 ]
384390 },
439445 " mlflow_experiment_name=\" test-nova-finetuned-models-exp\" , \n " ,
440446 " mlflow_run_name=\" test-nova-finetuned-models-run\" , \n " ,
441447 " training_dataset=\" arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1\" ,\n " ,
442- " s3_output_path=\" s3://mc-flows-sdk-testing-us-east-1 /output/\"\n " ,
448+ " s3_output_path=\" s3://<your-bucket> /output/\" # TODO: replace with your S3 output path \n " ,
443449 " )\n "
444450 ]
445451 },
487493 },
488494 "nbformat" : 4 ,
489495 "nbformat_minor" : 5
490- }
496+ }
0 commit comments