1313
1414import cv2
1515import habitat
16+ import imageio
1617import numpy as np
1718import quaternion
1819import torch
@@ -105,6 +106,8 @@ def __init__(self, cfg: EvalCfg):
105106
106107 # ------------------------------------- model ------------------------------------------
107108 self .model_args = argparse .Namespace (** cfg .agent .model_settings )
109+ self .vis_debug = bool (getattr (self .model_args , "vis_debug" , False ))
110+ self .vis_debug_path = getattr (self .model_args , "vis_debug_path" , os .path .join (self .output_path , "vis_debug" ))
108111
109112 processor = AutoProcessor .from_pretrained (self .model_args .model_path )
110113 processor .tokenizer .padding_side = 'left'
@@ -256,7 +259,7 @@ def resume_from_output_path(self) -> None:
256259 ndtw .append (res ['ndtw' ])
257260 return sucs , spls , oss , nes , ndtw
258261
259- def _run_eval_dual_system (self ) -> tuple :
262+ def _run_eval_dual_system (self ) -> tuple : # noqa: C901
260263 self .model .eval ()
261264
262265 # resume from previous results
@@ -288,9 +291,17 @@ def _run_eval_dual_system(self) -> tuple:
288291
289292 vis_frames = []
290293 step_id = 0
294+ vis_writer = None
291295
292296 if self .save_video :
293297 os .makedirs (os .path .join (self .output_path , f'vis_{ self .epoch } ' , f'{ scene_id } ' ), exist_ok = True )
298+ if self .vis_debug :
299+ debug_dir = os .path .join (self .vis_debug_path , f'epoch_{ self .epoch } ' )
300+ os .makedirs (debug_dir , exist_ok = True )
301+ vis_writer = imageio .get_writer (
302+ os .path .join (debug_dir , f'{ scene_id } _{ episode_id :04d} .mp4' ),
303+ fps = 5 ,
304+ )
294305
295306 rgb_list = []
296307 action_seq = []
@@ -307,6 +318,7 @@ def _run_eval_dual_system(self) -> tuple:
307318
308319 # ---------- 2. Episode step loop -----------
309320 while (not done ) and (step_id <= self .max_steps_per_episode ):
321+ draw_pixel_goal = False
310322 # refactor agent get action
311323 rgb = observations ["rgb" ]
312324 depth = observations ["depth" ]
@@ -422,6 +434,7 @@ def _run_eval_dual_system(self) -> tuple:
422434 coord = [int (c ) for c in re .findall (r'\d+' , llm_outputs )]
423435
424436 pixel_goal = [int (coord [1 ]), int (coord [0 ])]
437+ draw_pixel_goal = True
425438
426439 # look down --> horizontal
427440 self .env .step (action_code .LOOKUP )
@@ -526,6 +539,22 @@ def _run_eval_dual_system(self) -> tuple:
526539
527540 print ("step_id" , step_id , "action" , action )
528541
542+ if vis_writer is not None :
543+ vis = np .asarray (save_raw_image ).copy ()
544+ vis = cv2 .putText (
545+ vis ,
546+ f"step { step_id } action { int (action )} " ,
547+ (20 , 40 ),
548+ cv2 .FONT_HERSHEY_SIMPLEX ,
549+ 1 ,
550+ (0 , 255 , 0 ),
551+ 2 ,
552+ )
553+ if pixel_goal is not None :
554+ if draw_pixel_goal :
555+ cv2 .circle (vis , (pixel_goal [0 ], pixel_goal [1 ]), radius = 8 , color = (255 , 0 , 0 ), thickness = - 1 )
556+ vis_writer .append_data (vis )
557+
529558 if action == action_code .LOOKDOWN :
530559 self .env .step (action )
531560 observations , _ , done , _ = self .env .step (action )
@@ -586,6 +615,8 @@ def _run_eval_dual_system(self) -> tuple:
586615 quality = 9 ,
587616 )
588617 vis_frames .clear ()
618+ if vis_writer is not None :
619+ vis_writer .close ()
589620
590621 self .env .close ()
591622
@@ -643,9 +674,17 @@ def _run_eval_system2(self) -> tuple:
643674
644675 vis_frames = []
645676 step_id = 0
677+ vis_writer = None
646678
647679 if self .save_video :
648680 os .makedirs (os .path .join (self .output_path , f'vis_{ self .epoch } ' , f'{ scene_id } ' ), exist_ok = True )
681+ if self .vis_debug :
682+ debug_dir = os .path .join (self .vis_debug_path , f'epoch_{ self .epoch } ' )
683+ os .makedirs (debug_dir , exist_ok = True )
684+ vis_writer = imageio .get_writer (
685+ os .path .join (debug_dir , f'{ scene_id } _{ episode_id :04d} .mp4' ),
686+ fps = 5 ,
687+ )
649688 initial_height = self .env ._env .sim .get_agent_state ().position [1 ]
650689
651690 rgb_list = []
@@ -662,6 +701,7 @@ def _run_eval_system2(self) -> tuple:
662701
663702 # ---------- 2. Episode step loop -----------
664703 while (not done ) and (step_id <= self .max_steps_per_episode ):
704+ draw_pixel_goal = False
665705 # refactor agent get action
666706 rgb = observations ["rgb" ]
667707 depth = observations ["depth" ]
@@ -755,6 +795,7 @@ def _run_eval_system2(self) -> tuple:
755795 coord = [int (c ) for c in re .findall (r'\d+' , llm_outputs )]
756796
757797 pixel_goal = [int (coord [1 ]), int (coord [0 ])]
798+ draw_pixel_goal = True
758799
759800 # look down --> horizontal
760801 self .env .step (action_code .LOOKUP )
@@ -818,6 +859,21 @@ def _run_eval_system2(self) -> tuple:
818859
819860 print ("step_id" , step_id , "action" , action )
820861
862+ if vis_writer is not None :
863+ vis = np .asarray (save_raw_image ).copy ()
864+ vis = cv2 .putText (
865+ vis ,
866+ f"step { step_id } action { int (action )} " ,
867+ (20 , 40 ),
868+ cv2 .FONT_HERSHEY_SIMPLEX ,
869+ 1 ,
870+ (0 , 255 , 0 ),
871+ 2 ,
872+ )
873+ if draw_pixel_goal :
874+ cv2 .circle (vis , (pixel_goal [0 ], pixel_goal [1 ]), radius = 8 , color = (255 , 0 , 0 ), thickness = - 1 )
875+ vis_writer .append_data (vis )
876+
821877 if action == action_code .LOOKDOWN :
822878 self .env .step (action )
823879 observations , _ , done , _ = self .env .step (action )
@@ -875,6 +931,8 @@ def _run_eval_system2(self) -> tuple:
875931 quality = 9 ,
876932 )
877933 vis_frames .clear ()
934+ if vis_writer is not None :
935+ vis_writer .close ()
878936
879937 self .env .close ()
880938
0 commit comments