@@ -39,8 +39,6 @@ def _command(self, args: argparse.Namespace):
3939 run = self .api .runs .get (run_name = args .run_name )
4040 if run is None :
4141 raise CLIError (f"Run { args .run_name } not found" )
42- if run .status .is_finished ():
43- raise CLIError (f"Run { args .run_name } is finished" )
4442 metrics = _get_run_jobs_metrics (api = self .api , run = run )
4543
4644 if not args .watch :
@@ -78,11 +76,12 @@ def _get_run_jobs_metrics(api: Client, run: Run) -> List[JobMetrics]:
7876def _get_metrics_table (run : Run , metrics : List [JobMetrics ]) -> Table :
7977 table = Table (box = None )
8078 table .add_column ("NAME" , style = "bold" , no_wrap = True )
79+ table .add_column ("STATUS" )
8180 table .add_column ("CPU" )
8281 table .add_column ("MEMORY" )
8382 table .add_column ("GPU" )
8483
85- run_row : Dict [Union [str , int ], Any ] = {"NAME" : run .name }
84+ run_row : Dict [Union [str , int ], Any ] = {"NAME" : run .name , "STATUS" : run . status . value }
8685 if len (run ._run .jobs ) != 1 :
8786 add_row_from_dict (table , run_row )
8887
@@ -101,9 +100,9 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
101100 cpu_usage = f"{ cpu_usage :.0f} %"
102101 memory_usage = _get_metric_value (job_metrics , "memory_working_set_bytes" )
103102 if memory_usage is not None :
104- memory_usage = f"{ round (memory_usage / 1024 / 1024 ) } MB "
103+ memory_usage = f"{ round (memory_usage / 1024 / 1024 / 1024 ) } GB "
105104 if resources is not None :
106- memory_usage += f"/{ resources .memory_mib } MB "
105+ memory_usage += f"/{ round ( resources .memory_mib / 1024 ) } GB "
107106 gpu_metrics = ""
108107 gpus_detected_num = _get_metric_value (job_metrics , "gpus_detected_num" )
109108 if gpus_detected_num is not None :
@@ -113,13 +112,14 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
113112 if gpu_memory_usage is not None :
114113 if i != 0 :
115114 gpu_metrics += "\n "
116- gpu_metrics += f"# { i } { round (gpu_memory_usage / 1024 / 1024 ) } MB "
115+ gpu_metrics += f"gpu= { i } mem= { round (gpu_memory_usage / 1024 / 1024 / 1024 ) } GB "
117116 if resources is not None :
118- gpu_metrics += f"/{ resources .gpus [i ].memory_mib } MB "
119- gpu_metrics += f" { gpu_util_percent } % Util "
117+ gpu_metrics += f"/{ round ( resources .gpus [i ].memory_mib / 1024 ) } GB "
118+ gpu_metrics += f" util= { gpu_util_percent } %"
120119
121120 job_row : Dict [Union [str , int ], Any ] = {
122121 "NAME" : f" replica={ job .job_spec .replica_num } job={ job .job_spec .job_num } " ,
122+ "STATUS" : job .job_submissions [- 1 ].status .value ,
123123 "CPU" : cpu_usage or "-" ,
124124 "MEMORY" : memory_usage or "-" ,
125125 "GPU" : gpu_metrics or "-" ,
0 commit comments