Skip to content

Commit f71d183

Browse files
author
Syed Jafri
committed
debug: adding additonal logs to investigate tests
1 parent 2bbe72a commit f71d183

2 files changed

Lines changed: 23 additions & 2 deletions

File tree

sagemaker-train/src/sagemaker/train/rlvr_trainer.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,21 @@ def train(self, training_dataset: Optional[Union[str, DataSet]] = None,
276276
if self.stopping_condition is not None:
277277
create_args["stopping_condition"] = self.stopping_condition
278278

279+
# Log the IAM role being used
280+
logger.info(f"IAM Role ARN: {role}")
281+
282+
# Log the full training job arguments for debugging
283+
logger.info(f"TrainingJob.create() arguments: {create_args}")
284+
279285
try:
280286
training_job = TrainingJob.create(**create_args)
281287
except Exception as e:
282-
logger.error("Error: %s", e)
288+
logger.error(f"Error creating training job: {e}")
289+
logger.error(f"Training job name: {current_training_job_name}")
290+
logger.error(f"Serverless config: {serverless_config}")
291+
logger.error(f"Evaluator ARN: {evaluator_arn}")
292+
logger.error(f"Role ARN: {role}")
293+
logger.error(f"Full create_args: {create_args}")
283294
raise e
284295

285296
if wait:

sagemaker-train/src/sagemaker/train/sft_trainer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,20 @@ def train(self, training_dataset: Optional[Union[str, DataSet]] = None, validati
270270
if self.stopping_condition is not None:
271271
create_args["stopping_condition"] = self.stopping_condition
272272

273+
# Log the IAM role being used
274+
logger.info(f"IAM Role ARN: {role}")
275+
276+
# Log the full training job arguments for debugging
277+
logger.info(f"TrainingJob.create() arguments: {create_args}")
278+
273279
try:
274280
training_job = TrainingJob.create(**create_args)
275281
except Exception as e:
276-
logger.error("Error: %s", e)
282+
logger.error(f"Error creating training job: {e}")
283+
logger.error(f"Training job name: {current_training_job_name}")
284+
logger.error(f"Serverless config: {serverless_config}")
285+
logger.error(f"Role ARN: {role}")
286+
logger.error(f"Full create_args: {create_args}")
277287
raise e
278288

279289
if wait:

0 commit comments

Comments
 (0)