@@ -1157,21 +1157,24 @@ def concat_tar_parts(tar_parts, output_tar):
11571157 if split in [self .config .training_split , self .config .validation_split , self .config .test_split , self .config .fewshot_split ]:
11581158 self .dataset [split ] = self .config .process_docs (self .dataset [split ])
11591159
1160- # copy dataset, remove image features
1161- self .dataset_no_image = self .dataset .copy ()
1162- for doc_name in self .dataset_no_image :
1163- remove_cols = []
1164- features = self .dataset_no_image [doc_name ].features
1165- # If it is an Image instance or a Sequence of Image instance. Remove it
1166- for feature in features :
1167- if isinstance (features [feature ], Image ):
1168- remove_cols .append (feature )
1169- elif isinstance (features [feature ], Sequence ) and isinstance (features [feature ].feature , Image ):
1170- remove_cols .append (feature )
1171- elif isinstance (features [feature ], Audio ):
1172- remove_cols .append (feature )
1173- for remove_col in remove_cols :
1174- self .dataset_no_image [doc_name ] = self .dataset_no_image [doc_name ].remove_columns (remove_col )
1160+ # copy dataset, remove image features (unless process_results needs them)
1161+ if getattr (self .config , "process_results_use_image" , False ):
1162+ self .dataset_no_image = self .dataset
1163+ else :
1164+ self .dataset_no_image = self .dataset .copy ()
1165+ for doc_name in self .dataset_no_image :
1166+ remove_cols = []
1167+ features = self .dataset_no_image [doc_name ].features
1168+ # If it is an Image instance or a Sequence of Image instance. Remove it
1169+ for feature in features :
1170+ if isinstance (features [feature ], Image ):
1171+ remove_cols .append (feature )
1172+ elif isinstance (features [feature ], Sequence ) and isinstance (features [feature ].feature , Image ):
1173+ remove_cols .append (feature )
1174+ elif isinstance (features [feature ], Audio ):
1175+ remove_cols .append (feature )
1176+ for remove_col in remove_cols :
1177+ self .dataset_no_image [doc_name ] = self .dataset_no_image [doc_name ].remove_columns (remove_col )
11751178
11761179 def has_training_docs (self ) -> bool :
11771180 if self .config .training_split is not None :
@@ -1766,7 +1769,17 @@ def auto_doc_to_messages(doc):
17661769 if isinstance (visual , PIL_Image .Image ):
17671770 content .append ({"type" : "image" , "url" : visual })
17681771 elif isinstance (visual , dict ):
1769- content .append ({"type" : "audio" , "url" : visual })
1772+ # Dict visuals carry explicit type (default: video).
1773+ # Metadata keys (video_start, video_end, etc.) are
1774+ # preserved in the url field so they flow through
1775+ # ChatVideoContent → extract_media → fetch_video.
1776+ media_type = visual .get ("type" , "video" )
1777+ has_metadata = any (k in visual for k in ("video_start" , "video_end" ))
1778+ if has_metadata :
1779+ media_url = visual # pass full dict as url
1780+ else :
1781+ media_url = visual .get ("url" ) or visual .get ("path" ) or visual
1782+ content .append ({"type" : media_type , "url" : media_url })
17701783 elif isinstance (visual , str ):
17711784 ext = os .path .splitext (visual )[1 ].lower ()
17721785 if ext in _IMAGE_EXTS :
0 commit comments