@@ -381,6 +381,7 @@ async def extract_single(req: ExtractSingleRequest) -> Dict[str, Any]:
381381 method = method ,
382382 system_prompt = discovery_prompt if discovery_prompt else req .system_prompt ,
383383 task_description = None , # task_description already incorporated into discovery_prompt
384+ fail_on_empty_properties = False ,
384385 model_name = req .model_name or "gpt-4.1" ,
385386 temperature = req .temperature or 0.7 ,
386387 top_p = req .top_p or 0.95 ,
@@ -444,30 +445,29 @@ async def extract_batch(req: ExtractBatchRequest) -> Dict[str, Any]:
444445 "available" : list (df .columns ),
445446 })
446447
447- # Generate prompts and capture metadata
448- prompts_metadata = None
449- if req .task_description and req .use_dynamic_prompts :
450- from stringsight .core .data_objects import PropertyDataset
451- from stringsight .prompt_generation import generate_prompts
448+ # Generate prompts and capture metadata (always generate to get metadata)
449+ from stringsight .core .data_objects import PropertyDataset
450+ from stringsight .prompt_generation import generate_prompts
452451
453- temp_dataset = PropertyDataset .from_dataframe (df , method = method )
454- discovery_prompt , custom_clustering_prompts , prompts_metadata = generate_prompts (
455- task_description = req .task_description ,
456- dataset = temp_dataset ,
457- method = method ,
458- use_dynamic_prompts = req .use_dynamic_prompts ,
459- dynamic_prompt_samples = req .dynamic_prompt_samples or 5 ,
460- model = req .model_name or "gpt-4.1" ,
461- system_prompt_override = req .system_prompt ,
462- output_dir = req .output_dir
463- )
452+ temp_dataset = PropertyDataset .from_dataframe (df , method = method )
453+ discovery_prompt , custom_clustering_prompts , prompts_metadata = generate_prompts (
454+ task_description = req .task_description ,
455+ dataset = temp_dataset ,
456+ method = method ,
457+ use_dynamic_prompts = req .use_dynamic_prompts if req . use_dynamic_prompts is not None else True ,
458+ dynamic_prompt_samples = req .dynamic_prompt_samples or 5 ,
459+ model = req .model_name or "gpt-4.1" ,
460+ system_prompt_override = req .system_prompt ,
461+ output_dir = req .output_dir
462+ )
464463
465464 try :
466465 result = await public_api .extract_properties_only_async (
467466 df ,
468467 method = method ,
469- system_prompt = req .system_prompt ,
470- task_description = req .task_description ,
468+ system_prompt = discovery_prompt if discovery_prompt else req .system_prompt ,
469+ task_description = None , # task_description already incorporated into discovery_prompt
470+ fail_on_empty_properties = False ,
471471 model_name = req .model_name or "gpt-4.1" ,
472472 temperature = req .temperature or 0.7 ,
473473 top_p = req .top_p or 0.95 ,
@@ -578,24 +578,23 @@ def update_progress(completed: int, total: int):
578578 # Create dataset once and reuse
579579 dataset = PropertyDataset .from_dataframe (df , method = method )
580580
581- # Generate prompts and capture metadata
582- prompts_metadata = None
583- if req .task_description and req .use_dynamic_prompts :
584- discovery_prompt , custom_clustering_prompts , prompts_metadata = generate_prompts (
585- task_description = req .task_description ,
586- dataset = dataset ,
587- method = method ,
588- use_dynamic_prompts = req .use_dynamic_prompts ,
589- dynamic_prompt_samples = req .dynamic_prompt_samples or 5 ,
590- model = req .model_name or "gpt-4.1" ,
591- system_prompt_override = req .system_prompt ,
592- output_dir = req .output_dir
593- )
594- # Store prompts metadata in job
595- with _JOBS_LOCK :
596- job .prompts_metadata = prompts_metadata .dict () if prompts_metadata else None
581+ # Generate prompts and capture metadata (always generate to get metadata)
582+ discovery_prompt , custom_clustering_prompts , prompts_metadata = generate_prompts (
583+ task_description = req .task_description ,
584+ dataset = dataset ,
585+ method = method ,
586+ use_dynamic_prompts = req .use_dynamic_prompts if req .use_dynamic_prompts is not None else True ,
587+ dynamic_prompt_samples = req .dynamic_prompt_samples or 5 ,
588+ model = req .model_name or "gpt-4.1" ,
589+ system_prompt_override = req .system_prompt ,
590+ output_dir = req .output_dir
591+ )
592+ # Store prompts metadata in job
593+ with _JOBS_LOCK :
594+ job .prompts_metadata = prompts_metadata .dict () if prompts_metadata else None
597595
598- system_prompt = get_system_prompt (method , req .system_prompt , req .task_description )
596+ # Use the generated discovery_prompt if available, otherwise fall back to get_system_prompt
597+ system_prompt = discovery_prompt if discovery_prompt else get_system_prompt (method , req .system_prompt , req .task_description )
599598
600599 extractor = get_extractor (
601600 model_name = req .model_name or "gpt-4.1" ,
0 commit comments