dynamically generated prompt works in frontend

lisadunlap · lisadunlap · commit 2b3a6b29afcb · 2026-01-13T17:05:31.000Z
diff --git a/stringsight/_public/async_api.py b/stringsight/_public/async_api.py
@@ -20,6 +20,7 @@ async def extract_properties_only_async(
     method: str = "single_model",
     system_prompt: str | None = None,
     task_description: str | None = None,
+    fail_on_empty_properties: bool = True,
     prompt_builder: Callable[[pd.Series, str], str] | None = None,
     model_name: str = "gpt-4.1",
     temperature: float = 0.7,
@@ -52,8 +53,6 @@ async def extract_properties_only_async(
     See extract_properties_only() for full documentation.
     """
     # Just call the sync version's implementation but await the pipeline
-    from ..prompts import get_system_prompt
-    from ..pipeline import Pipeline
     from ..extractors import get_extractor
     from ..postprocess import LLMJsonParser, PropertyValidator
     from ..core.preprocessing import validate_and_prepare_dataframe
@@ -113,7 +112,7 @@ async def extract_properties_only_async(
 
     extractor = get_extractor(**extractor_kwargs)  # type: ignore[arg-type]
     parser = LLMJsonParser(fail_fast=False, **common_cfg)  # type: ignore[arg-type]
-    validator = PropertyValidator(**common_cfg)  # type: ignore[arg-type]
+    validator = PropertyValidator(fail_on_empty=fail_on_empty_properties, **common_cfg)  # type: ignore[arg-type]
 
     if output_dir:
         extractor.output_dir = output_dir  # type: ignore[attr-defined]
diff --git a/stringsight/_public/sync_api.py b/stringsight/_public/sync_api.py
@@ -22,6 +22,7 @@ def extract_properties_only(
     method: str = "single_model",
     system_prompt: str | None = None,
     task_description: str | None = None,
+    fail_on_empty_properties: bool = True,
     # Data preparation
     score_columns: List[str] | None = None,
     sample_size: int | None = None,
@@ -59,6 +60,8 @@ def extract_properties_only(
         method: "single_model" | "side_by_side"
         system_prompt: Explicit system prompt text or a short prompt name from stringsight.prompts
         task_description: Optional task-aware description (used only if the chosen prompt has {task_description})
+        fail_on_empty_properties: If True, raise a RuntimeError when 0 valid properties remain after validation.
+            If False, return an empty PropertyDataset.properties list.
         score_columns: Optional list of column names containing score metrics to convert to dict format
         sample_size: Optional number of rows to sample from the dataset before processing
         model_a: For side_by_side method with tidy data, specifies first model to select
@@ -150,7 +153,7 @@ def extract_properties_only(
     extractor = get_extractor(**extractor_kwargs)  # type: ignore[arg-type]
     # Do not fail the whole run on parsing errors – collect failures and drop those rows
     parser = LLMJsonParser(fail_fast=False, output_dir=output_dir, **common_cfg)  # type: ignore[arg-type]
-    validator = PropertyValidator(output_dir=output_dir, **common_cfg)  # type: ignore[arg-type]
+    validator = PropertyValidator(output_dir=output_dir, fail_on_empty=fail_on_empty_properties, **common_cfg)  # type: ignore[arg-type]
 
     pipeline = PipelineBuilder(name=f"StringSight-extract-{method}") \
         .extract_properties(extractor) \
diff --git a/stringsight/postprocess/validator.py b/stringsight/postprocess/validator.py
@@ -4,9 +4,6 @@
 This stage validates and cleans extracted properties.
 """
 
-from pathlib import Path
-import json
-import pandas as pd
 from typing import Optional, List, Any
 from ..core.stage import PipelineStage
 from ..core.data_objects import PropertyDataset, Property
@@ -26,12 +23,22 @@ def __init__(
         self,
         output_dir: Optional[str] = None,
         storage: Optional[StorageAdapter] = None,
+        fail_on_empty: bool = True,
         **kwargs
     ):
-        """Initialize the property validator."""
+        """Initialize the property validator.
+
+        Args:
+            output_dir: Optional directory to auto-save stage artefacts.
+            storage: Optional StorageAdapter for writing artefacts.
+            fail_on_empty: If True, raise a RuntimeError when 0 valid properties remain after validation.
+                If False, keep an empty `properties` list and allow the pipeline to continue/return.
+            **kwargs: Forwarded to PipelineStage / LoggingMixin configuration.
+        """
         super().__init__(**kwargs)
         self.output_dir = output_dir
         self.storage = storage or get_storage_adapter()
+        self.fail_on_empty = fail_on_empty
         
     def run(self, data: PropertyDataset, progress_callback: Any = None, **kwargs: Any) -> PropertyDataset:
         """
@@ -67,13 +74,19 @@ def run(self, data: PropertyDataset, progress_callback: Any = None, **kwargs: An
         
         
         # Check for 0 valid properties and provide helpful error message
-        if len(valid_properties) == 0:
+        if len(valid_properties) == 0 and self.fail_on_empty:
             raise RuntimeError(
                 "ERROR: 0 valid properties after validation. "
                 "This typically means: (1) LLM returned empty/invalid responses, "
                 "(2) JSON parsing failures, or (3) All properties filtered during validation. "
                 "Check logs above for details."
             )
+        if len(valid_properties) == 0 and not self.fail_on_empty:
+            self.log(
+                "WARNING: 0 valid properties after validation. Returning an empty properties list. "
+                "This typically means: (1) LLM returned empty/invalid responses, (2) JSON parsing failures, "
+                "or (3) All properties filtered during validation."
+            )
         
         # Auto-save validation results if output_dir is provided
         if self.output_dir:
diff --git a/stringsight/prompt_generation.py b/stringsight/prompt_generation.py
@@ -57,8 +57,14 @@ def generate_prompts(
     # Can generate prompts with or without task_description (will infer from conversations if not provided)
     logger.info(f"Prompt generation config: use_dynamic_prompts={use_dynamic_prompts}, system_prompt_override={system_prompt_override is not None}")
 
-    if use_dynamic_prompts and not system_prompt_override:
-        logger.info("Generating dynamic prompts...")
+    # Check if system_prompt_override is a known template alias (not a custom literal prompt)
+    KNOWN_PROMPT_ALIASES = {"default", "agent", "universal", "agent_universal"}
+    is_custom_literal_prompt = system_prompt_override is not None and system_prompt_override not in KNOWN_PROMPT_ALIASES
+
+    # Only skip dynamic generation if there's a custom literal prompt
+    # Template aliases like "default" should still allow dynamic generation
+    if use_dynamic_prompts and not is_custom_literal_prompt:
+        logger.info(f"Generating dynamic prompts (system_prompt_override={system_prompt_override})...")
 
         # Use a default task description if none provided
         task_desc_for_generation = task_description_clean or "Analyze the behavioral patterns and characteristics in these AI model conversations."
@@ -128,6 +134,7 @@ def generate_prompts(
                     clustering_prompts=custom_clustering_prompts,
                     expanded_task_description=result.expanded_task_description
                 )
+                _save_metadata_to_file(output_dir=output_dir, metadata=metadata)
         except Exception as e:
             logger.error(f"Dynamic prompt generation failed: {e}. Using static prompts.")
             discovery_prompt = get_system_prompt(method, system_prompt_override, task_description_clean)
@@ -159,10 +166,38 @@ def generate_prompts(
                 clustering_prompts=None,
                 expanded_task_description=task_description_clean
             )
+            _save_metadata_to_file(output_dir=output_dir, metadata=metadata)
 
     return discovery_prompt, custom_clustering_prompts, metadata
 
 
+def _save_metadata_to_file(
+    output_dir: str,
+    metadata: PromptsMetadata
+) -> None:
+    """Save prompts metadata to JSON file in output directory.
+
+    Args:
+        output_dir: Directory to save metadata to (relative paths resolved relative to results dir).
+        metadata: PromptsMetadata object to save.
+    """
+    import json
+    from stringsight.utils.paths import _get_results_dir
+
+    # Resolve output_dir relative to results directory if it's not absolute
+    output_path = Path(output_dir)
+    if not output_path.is_absolute():
+        results_base = _get_results_dir()
+        output_path = results_base / output_dir
+
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    metadata_file = output_path / "prompts_metadata.json"
+    with open(metadata_file, "w") as f:
+        json.dump(metadata.dict(), f, indent=2)
+    logger.info(f"Saved prompts metadata to {metadata_file}")
+
+
 def _save_prompts_to_file(
     output_dir: str,
     discovery_prompt: str,
@@ -172,12 +207,19 @@ def _save_prompts_to_file(
     """Save generated prompts to text files in output directory.
 
     Args:
-        output_dir: Directory to save prompts to.
+        output_dir: Directory to save prompts to (relative paths resolved relative to results dir).
         discovery_prompt: The discovery/extraction prompt.
         clustering_prompts: Optional dict of clustering prompts.
         expanded_task_description: Optional expanded task description.
     """
+    from stringsight.utils.paths import _get_results_dir
+
+    # Resolve output_dir relative to results directory if it's not absolute
     output_path = Path(output_dir)
+    if not output_path.is_absolute():
+        results_base = _get_results_dir()
+        output_path = results_base / output_dir
+
     output_path.mkdir(parents=True, exist_ok=True)
 
     # Save discovery prompt
diff --git a/stringsight/prompts/__init__.py b/stringsight/prompts/__init__.py
@@ -219,9 +219,12 @@ def get_system_prompt(method: str, system_prompt: str | None = None, task_descri
             desc = task_description if task_description is not None else cast(str, default_desc)
             return _format_task_aware(template, desc)
         if task_description is not None:
-            raise ValueError(
-                "A task_description was provided, but the given system_prompt string does not "
-                "contain {task_description}. Please include the placeholder or use an alias ('default'|'agent')."
+            # Match the behavior of prompt templates loaded from this module:
+            # if the prompt doesn't support {task_description}, ignore it rather than erroring.
+            import warnings
+            warnings.warn(
+                "task_description was provided but the given system_prompt string does not contain "
+                "{task_description}. The task_description will be ignored."
             )
         return template
     except Exception as e:
diff --git a/stringsight/routers/extraction.py b/stringsight/routers/extraction.py
@@ -381,6 +381,7 @@ async def extract_single(req: ExtractSingleRequest) -> Dict[str, Any]:
             method=method,
             system_prompt=discovery_prompt if discovery_prompt else req.system_prompt,
             task_description=None,  # task_description already incorporated into discovery_prompt
+            fail_on_empty_properties=False,
             model_name=req.model_name or "gpt-4.1",
             temperature=req.temperature or 0.7,
             top_p=req.top_p or 0.95,
@@ -444,30 +445,29 @@ async def extract_batch(req: ExtractBatchRequest) -> Dict[str, Any]:
             "available": list(df.columns),
         })
 
-    # Generate prompts and capture metadata
-    prompts_metadata = None
-    if req.task_description and req.use_dynamic_prompts:
-        from stringsight.core.data_objects import PropertyDataset
-        from stringsight.prompt_generation import generate_prompts
+    # Generate prompts and capture metadata (always generate to get metadata)
+    from stringsight.core.data_objects import PropertyDataset
+    from stringsight.prompt_generation import generate_prompts
 
-        temp_dataset = PropertyDataset.from_dataframe(df, method=method)
-        discovery_prompt, custom_clustering_prompts, prompts_metadata = generate_prompts(
-            task_description=req.task_description,
-            dataset=temp_dataset,
-            method=method,
-            use_dynamic_prompts=req.use_dynamic_prompts,
-            dynamic_prompt_samples=req.dynamic_prompt_samples or 5,
-            model=req.model_name or "gpt-4.1",
-            system_prompt_override=req.system_prompt,
-            output_dir=req.output_dir
-        )
+    temp_dataset = PropertyDataset.from_dataframe(df, method=method)
+    discovery_prompt, custom_clustering_prompts, prompts_metadata = generate_prompts(
+        task_description=req.task_description,
+        dataset=temp_dataset,
+        method=method,
+        use_dynamic_prompts=req.use_dynamic_prompts if req.use_dynamic_prompts is not None else True,
+        dynamic_prompt_samples=req.dynamic_prompt_samples or 5,
+        model=req.model_name or "gpt-4.1",
+        system_prompt_override=req.system_prompt,
+        output_dir=req.output_dir
+    )
 
     try:
         result = await public_api.extract_properties_only_async(
             df,
             method=method,
-            system_prompt=req.system_prompt,
-            task_description=req.task_description,
+            system_prompt=discovery_prompt if discovery_prompt else req.system_prompt,
+            task_description=None,  # task_description already incorporated into discovery_prompt
+            fail_on_empty_properties=False,
             model_name=req.model_name or "gpt-4.1",
             temperature=req.temperature or 0.7,
             top_p=req.top_p or 0.95,
@@ -578,24 +578,23 @@ def update_progress(completed: int, total: int):
         # Create dataset once and reuse
         dataset = PropertyDataset.from_dataframe(df, method=method)
 
-        # Generate prompts and capture metadata
-        prompts_metadata = None
-        if req.task_description and req.use_dynamic_prompts:
-            discovery_prompt, custom_clustering_prompts, prompts_metadata = generate_prompts(
-                task_description=req.task_description,
-                dataset=dataset,
-                method=method,
-                use_dynamic_prompts=req.use_dynamic_prompts,
-                dynamic_prompt_samples=req.dynamic_prompt_samples or 5,
-                model=req.model_name or "gpt-4.1",
-                system_prompt_override=req.system_prompt,
-                output_dir=req.output_dir
-            )
-            # Store prompts metadata in job
-            with _JOBS_LOCK:
-                job.prompts_metadata = prompts_metadata.dict() if prompts_metadata else None
+        # Generate prompts and capture metadata (always generate to get metadata)
+        discovery_prompt, custom_clustering_prompts, prompts_metadata = generate_prompts(
+            task_description=req.task_description,
+            dataset=dataset,
+            method=method,
+            use_dynamic_prompts=req.use_dynamic_prompts if req.use_dynamic_prompts is not None else True,
+            dynamic_prompt_samples=req.dynamic_prompt_samples or 5,
+            model=req.model_name or "gpt-4.1",
+            system_prompt_override=req.system_prompt,
+            output_dir=req.output_dir
+        )
+        # Store prompts metadata in job
+        with _JOBS_LOCK:
+            job.prompts_metadata = prompts_metadata.dict() if prompts_metadata else None
 
-        system_prompt = get_system_prompt(method, req.system_prompt, req.task_description)
+        # Use the generated discovery_prompt if available, otherwise fall back to get_system_prompt
+        system_prompt = discovery_prompt if discovery_prompt else get_system_prompt(method, req.system_prompt, req.task_description)
 
         extractor = get_extractor(
             model_name=req.model_name or "gpt-4.1",
diff --git a/stringsight/routers/jobs.py b/stringsight/routers/jobs.py
@@ -184,10 +184,31 @@ def get_job_results(
         # Read JSONL file using storage adapter
         properties = storage.read_jsonl(result_file_path)
 
-        return {
+        response = {
             "properties": properties,
             "result_path": job.result_path,
             "count": len(properties)
         }
+
+        # Try to load prompts metadata from saved files
+        prompts_metadata_file = str(full_result_path / "prompts_metadata.json")
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.info(f"Looking for prompts metadata at: {prompts_metadata_file}")
+
+        if storage.exists(prompts_metadata_file):
+            try:
+                prompts_metadata_content = storage.read_text(prompts_metadata_file)
+                import json
+                prompts_metadata = json.loads(prompts_metadata_content)
+                response["prompts"] = prompts_metadata
+                logger.info(f"Successfully loaded prompts metadata")
+            except Exception as e:
+                # Log but don't fail the request if prompts metadata can't be loaded
+                logger.warning(f"Failed to load prompts metadata: {e}")
+        else:
+            logger.warning(f"Prompts metadata file not found at: {prompts_metadata_file}")
+
+        return response
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Failed to read results: {str(e)}")
diff --git a/stringsight/schemas.py b/stringsight/schemas.py
@@ -31,7 +31,7 @@ class ExtractBatchRequest(BaseModel):
     return_debug: Optional[bool] = False
     sample_size: Optional[int] = None
     use_dynamic_prompts: Optional[bool] = True
-    dynamic_prompt_samples: Optional[int] = 5
+    dynamic_prompt_samples: Optional[int] = 10
 
 class ExtractJobStartRequest(ExtractBatchRequest):
     pass
diff --git a/stringsight/workers/tasks.py b/stringsight/workers/tasks.py
@@ -93,9 +93,25 @@ def update_progress(completed: int, total_count: int):
                 except Exception as e:
                     logger.error(f"Failed to update progress: {e}")
 
-        system_prompt = get_system_prompt(method, req.system_prompt, req.task_description)
+        # Generate prompts and capture metadata (always generate to get metadata)
+        from stringsight.prompt_generation import generate_prompts
+
         dataset = PropertyDataset.from_dataframe(df, method=method)
 
+        discovery_prompt, custom_clustering_prompts, prompts_metadata = generate_prompts(
+            task_description=req.task_description,
+            dataset=dataset,
+            method=method,
+            use_dynamic_prompts=req.use_dynamic_prompts if req.use_dynamic_prompts is not None else True,
+            dynamic_prompt_samples=req.dynamic_prompt_samples or 5,
+            model=req.model_name or "gpt-4.1",
+            system_prompt_override=req.system_prompt,
+            output_dir=None  # We'll save to output_dir later after determining it
+        )
+
+        # Use the generated discovery_prompt if available, otherwise fall back to get_system_prompt
+        system_prompt = discovery_prompt if discovery_prompt else get_system_prompt(method, req.system_prompt, req.task_description)
+
         extractor = get_extractor(
             model_name=req.model_name or "gpt-4.1",
             system_prompt=system_prompt,
@@ -125,6 +141,15 @@ def update_progress(completed: int, total_count: int):
         storage.ensure_directory(output_dir)
         logger.info(f"Results will be saved to: {output_dir}")
 
+        # Save prompts metadata to output directory
+        if prompts_metadata:
+            import json
+            from pathlib import Path
+            metadata_file = Path(output_dir) / "prompts_metadata.json"
+            with open(metadata_file, "w") as f:
+                json.dump(prompts_metadata.dict(), f, indent=2)
+            logger.info(f"Saved prompts metadata to {metadata_file}")
+
         # Run parsing and validation
         parser = LLMJsonParser(fail_fast=False, verbose=False, use_wandb=False, output_dir=output_dir)
         parsed_dataset = parser.run(extracted_dataset)