3333logger = logging .getLogger (__name__ )
3434
3535
36- RunStatus = namedtuple ('RunStatus' , ('tind_id' , 'status' , 'path' ))
36+ RunStatus = namedtuple ('RunStatus' , ('tind_id' , 'status' , 'description' , ' path' ))
3737
3838SUPPORTED_IMAGE_TYPES = {"image/jpeg" , "image/png" , "image/gif" , "image/webp" }
3939"""The supported image MIME types we will fetch."""
@@ -213,15 +213,16 @@ def fetch_image_to_record_directory(run_id: str, fetcher: ImageFetcher,
213213 return RunStatus (
214214 tind_id = tind_id ,
215215 path = "" ,
216- status = f"skipped: Unsupported file type { file_md .get ('mime' )} " ,
216+ status = "skipped" ,
217+ description = f"Unsupported file type { file_md .get ('mime' )} " ,
217218 )
218219
219220 path = str (fetcher .fetch_one_image_for_record (tind_id , run_id ))
220221 except Exception as ex : # pylint: disable=broad-exception-caught
221222 logger .warning ("Fetcher encountered exception" , exc_info = ex )
222- return RunStatus (tind_id = tind_id , status = f' failed: { str (ex )} ' , path = '' )
223+ return RunStatus (tind_id = tind_id , status = " failed" , description = str (ex ), path = "" )
223224
224- return RunStatus (tind_id = tind_id , status = "fetched" , path = path )
225+ return RunStatus (tind_id = tind_id , status = "fetched" , description = "" , path = path )
225226
226227 @task
227228 def write_status_to_fetched_csv (
@@ -233,13 +234,16 @@ def write_status_to_fetched_csv(
233234 fetched_path = run_dir (context ["run_id" ]) / "fetched.csv"
234235 with fetched_path .open ("w" , encoding = "utf-8" ) as csv_file :
235236 writer = csv .writer (csv_file )
236- writer .writerow ((* records ["Record ID" ], "Image Path" ))
237237
238238 status_col = records ["Record ID" ].index ("Status" )
239+ records ["Record ID" ].insert (status_col + 1 , "Status Description" )
240+
241+ writer .writerow ((* records ["Record ID" ], "Image Path" ))
239242
240243 for status in statuses :
241- record = [* records [status [0 ]], * status [2 :]]
244+ record = [* records [status [0 ]], * status [3 :]]
242245 record [status_col ] = status [1 ]
246+ record .insert (status_col + 1 , status [2 ])
243247 writer .writerow (record )
244248
245249 processed = read_csv_to_process ()
@@ -308,6 +312,7 @@ def transform_results(
308312 "Image Name" : Path (record ["Image Path" ]).name ,
309313 "Collection name" : record ["Collection name" ],
310314 "Status" : record ["Status" ],
315+ "Status description" : record .get ("Status description" , "" ),
311316 "520__a-1" : record .get ("Description" , "" ),
312317 "5880_a" : f"Image description generated by AI ({ ENV .get ('AWS_MODEL_LABEL' )} )"
313318 " and reviewed on [MM/YYYY]." ,
@@ -336,8 +341,8 @@ def write_output_csv(processed_dicts: list[list[dict[str, str]]]) -> None:
336341 writer .writeheader ()
337342 writer .writerows (all_results )
338343
339- prompt = get_prompt ()
340344 batches = read_and_batch_csv ()
345+ prompt = get_prompt ()
341346 batch_results = invoke_llm_on_batch_with_prompt .partial (prompt = prompt ).expand (
342347 batch = batches
343348 )
0 commit comments