@@ -199,52 +199,69 @@ def _build_and_upload_artifacts(
199199 obs_duckdb_path : str ,
200200 upload_timeout_seconds : int ,
201201 upload_max_workers : int = 4 ,
202- ) -> dict [str , str ]:
203- """Build local artifacts and upload them before annotate."""
204- logger .info ("Saving vars.h5 artifact from normalized counts..." )
205- save_features_matrix (
206- out_file = vars_h5_path ,
207- mat = self .adata .X ,
208- var_df = self .adata .var ,
209- var_names = self .adata .var_names ,
210- )
202+ ) -> tuple [dict [str , str ], list [tuple [str , Exception ]]]:
203+ """Build and upload each artifact as an independent unit.
211204
212- logger .info ("Saving obs.duckdb artifact from observation metadata..." )
213- save_obs_duckdb_file (
214- out_file = obs_duckdb_path ,
215- obs_df = self .adata .obs ,
216- )
205+ Returns (uploaded_ids, errors) so the caller can decide whether
206+ partial success is acceptable.
207+ """
208+ uploaded : dict [str , str ] = {}
209+ errors : list [tuple [str , Exception ]] = []
210+ timeout = (30.0 , float (upload_timeout_seconds ))
217211
218- logger .info ("Uploading obs.duckdb artifact..." )
219- obs_upload = upload_obs_duckdb_file (
220- self .api_url ,
221- self .auth_token ,
222- obs_duckdb_path ,
223- timeout = (30.0 , float (upload_timeout_seconds )),
224- max_workers = upload_max_workers ,
225- )
226- if obs_upload .file_kind != "obs_duckdb" :
227- raise ValueError (
228- f"Unexpected upload file_kind for obs artifact: { obs_upload .file_kind } "
212+ # --- vars.h5 (save then upload) ---
213+ try :
214+ logger .info ("Saving vars.h5 artifact from normalized counts..." )
215+ save_features_matrix (
216+ out_file = vars_h5_path ,
217+ mat = self .adata .X ,
218+ var_df = self .adata .var ,
219+ var_names = self .adata .var_names ,
220+ )
221+ logger .info ("Uploading vars.h5 artifact..." )
222+ vars_upload = upload_vars_h5_file (
223+ self .api_url ,
224+ self .auth_token ,
225+ vars_h5_path ,
226+ timeout = timeout ,
227+ max_workers = upload_max_workers ,
229228 )
229+ if vars_upload .file_kind != "vars_h5" :
230+ raise ValueError (
231+ f"Unexpected upload file_kind for vars artifact: { vars_upload .file_kind } "
232+ )
233+ uploaded ["vars_h5" ] = vars_upload .upload_id
234+ except Exception as exc :
235+ logger .warning (f"vars.h5 artifact failed: { exc } " )
236+ errors .append (("vars_h5" , exc ))
230237
231- logger .info ("Uploading vars.h5 artifact..." )
232- vars_upload = upload_vars_h5_file (
233- self .api_url ,
234- self .auth_token ,
235- vars_h5_path ,
236- timeout = (30.0 , float (upload_timeout_seconds )),
237- max_workers = upload_max_workers ,
238- )
239- if vars_upload .file_kind != "vars_h5" :
240- raise ValueError (
241- f"Unexpected upload file_kind for vars artifact: { vars_upload .file_kind } "
238+ print ()
239+
240+ # --- obs.duckdb (save then upload) ---
241+ try :
242+ logger .info ("Saving obs.duckdb artifact from observation metadata..." )
243+ save_obs_duckdb_file (
244+ out_file = obs_duckdb_path ,
245+ obs_df = self .adata .obs ,
246+ )
247+ logger .info ("Uploading obs.duckdb artifact..." )
248+ obs_upload = upload_obs_duckdb_file (
249+ self .api_url ,
250+ self .auth_token ,
251+ obs_duckdb_path ,
252+ timeout = timeout ,
253+ max_workers = upload_max_workers ,
242254 )
255+ if obs_upload .file_kind != "obs_duckdb" :
256+ raise ValueError (
257+ f"Unexpected upload file_kind for obs artifact: { obs_upload .file_kind } "
258+ )
259+ uploaded ["obs_duckdb" ] = obs_upload .upload_id
260+ except Exception as exc :
261+ logger .warning (f"obs.duckdb artifact failed: { exc } " )
262+ errors .append (("obs_duckdb" , exc ))
243263
244- return {
245- "obs_duckdb" : obs_upload .upload_id ,
246- "vars_h5" : vars_upload .upload_id ,
247- }
264+ return uploaded , errors
248265
249266 @staticmethod
250267 def _cleanup_artifact_files (paths : list [str ]) -> None :
@@ -372,26 +389,29 @@ def run(
372389
373390 artifact_paths = [vars_h5_path , obs_duckdb_path ]
374391 try :
375- try :
376- uploaded_file_refs = self . _build_and_upload_artifacts (
377- vars_h5_path = vars_h5_path ,
378- obs_duckdb_path = obs_duckdb_path ,
379- upload_timeout_seconds = upload_timeout_seconds ,
380- upload_max_workers = upload_max_workers ,
381- )
392+ uploaded_file_refs , artifact_errors = self . _build_and_upload_artifacts (
393+ vars_h5_path = vars_h5_path ,
394+ obs_duckdb_path = obs_duckdb_path ,
395+ upload_timeout_seconds = upload_timeout_seconds ,
396+ upload_max_workers = upload_max_workers ,
397+ )
398+ if uploaded_file_refs :
382399 payload ["uploaded_files" ] = uploaded_file_refs
383- except Exception as exc :
400+
401+ if artifact_errors :
402+ failed_names = ", " .join (name for name , _ in artifact_errors )
384403 if require_artifacts :
385404 logger .error (
386- "Artifact build/upload failed. "
405+ f "Artifact build/upload failed for: { failed_names } . "
387406 "Rerun with `require_artifacts=False` to skip this error.\n "
388407 "Please report the error below in a new issue at "
389408 "https://github.com/NygenAnalytics/CyteType\n "
390- f"({ type (exc ).__name__ } : { exc } )"
409+ f"({ type (artifact_errors [ 0 ][ 1 ] ).__name__ } : { str ( artifact_errors [ 0 ][ 1 ]). strip () } )"
391410 )
392- raise
411+ raise artifact_errors [ 0 ][ 1 ]
393412 logger .warning (
394- "Artifact build/upload failed. Continuing without artifacts. "
413+ f"Artifact build/upload failed for: { failed_names } . "
414+ "Continuing without those artifacts. "
395415 "Set `require_artifacts=True` to see the full traceback."
396416 )
397417
@@ -400,6 +420,7 @@ def run(
400420 save_query_to_file (payload ["input_data" ], query_filename )
401421
402422 # Submit job and store details
423+ print ()
403424 job_id = submit_annotation_job (self .api_url , self .auth_token , payload )
404425 store_job_details (self .adata , job_id , self .api_url , results_prefix )
405426
0 commit comments