Strands and Jinja2 package updates (awslabs#60)

ayushi1208 · web-flow · commit a4faa5b2c89a · 2026-01-08T16:19:44.000-05:00
* Added a mock class for strands to mitigate import errors, and raise an import error for trying to use LLM

* Updated jinja template package version
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,8 @@ dependencies = [
     "scipy>=1.10.0",
     "psutil>=5.8.0",
     "pandas>=1.5.0",
-    "jsonschema>=4.0.0"
+    "jsonschema>=4.0.0",
+    "jinja2>=3.1.6,<3.2.0"
 ]
 
 [project.optional-dependencies]
@@ -30,10 +31,8 @@ dev = [
     "beautifulsoup4>=4.14.2",
     "ruff>=0.14.10",
 ]
-
 llm = [
-    "strands-agents>=1.0.0,<=1.16.0",
-    "jinja2>=3.0.0,<=3.1.6"
+    "strands-agents>=1.0.0,<=1.16.0"
 ]
 
 
diff --git a/src/stickler/comparators/__init__.py b/src/stickler/comparators/__init__.py
@@ -14,6 +14,14 @@
 from stickler.comparators.structured import StructuredModelComparator
 from stickler.comparators.utils import generate_bedrock_embedding
 
+# Import LLMComparator if strands-agents is available
+try:
+    from stickler.comparators.llm import LLMComparator  # noqa: F401
+
+    LLM_AVAILABLE = True
+except ImportError:
+    LLM_AVAILABLE = False
+
 # Import BERTComparator if evaluate is available
 try:
     from stickler.comparators.bert import BERTComparator  # noqa: F401
@@ -39,12 +47,15 @@
     "NumericComparator",
     "NumericExactC",
     "ExactComparator",
-    "LLMComparator",
     "StructuredModelComparator",
     "SemanticComparator",
     "generate_bedrock_embedding",
 ]
 
+# Add LLMComparator to __all__ if available
+if LLM_AVAILABLE:
+    __all__.append("LLMComparator")
+
 # Add BERTComparator to __all__ if available
 if BERT_AVAILABLE:
     __all__.append("BERTComparator")
@@ -53,4 +64,3 @@
 if RAPIDFUZZ_AVAILABLE:
     __all__.append("FuzzyComparator")
     __all__.append("Fuzz")
-
diff --git a/src/stickler/comparators/llm.py b/src/stickler/comparators/llm.py
@@ -9,80 +9,111 @@
 library and supports customizable evaluation guidelines for domain-specific
 comparison logic.
 
+Note:
+    This comparator requires the optional 'llm' dependencies. Install with:
+    pip install stickler-eval[llm]
+
 Example:
     Integration with StructuredModel:
         >>> from stickler.structured_object_evaluator.models.comparable_field import ComparableField
-        >>> 
+        >>>
         >>> class Address(StructuredModel):
         ...     street: str = ComparableField(
         ...         comparator=LLMComparator(eval_guidelines="Consider street abbreviations"),
         ...         threshold=0.8
         ...     )
 """
+
 import html
 from typing import Any, Dict, Union
 
-from botocore.exceptions import NoCredentialsError
 from jinja2 import Template
-from strands import Agent
-from strands.models import Model
 
 from stickler.comparators.base import BaseComparator
 
+try:
+    from botocore.exceptions import NoCredentialsError
+    from strands import Agent
+    from strands.models import Model
+
+    STRANDS_AVAILABLE = True
+except ImportError:
+    STRANDS_AVAILABLE = False
+
+    # Create mock classes for when strands is not available
+    class Model:
+        pass
+
+    class Agent:
+        pass
+
+    class NoCredentialsError(Exception):
+        pass
+
 
 class LLMComparator(BaseComparator):
     """Large Language Model-based semantic comparator.
-    
+
     This comparator uses LLMs to perform intelligent semantic comparisons that go
     beyond simple string matching. It can understand context, handle abbreviations,
     recognize synonyms, and apply domain-specific comparison logic through custom
     evaluation guidelines.
-    
+
     The comparator returns binary similarity scores (0.0 or 1.0) based on whether
     the LLM determines the values are semantically equivalent. It handles edge cases
     like None values and provides detailed comparison information for debugging.
-    
+
     Attributes:
         model (Union[Model, str]): The LLM model identifier or Model instance.
         eval_guidelines (str, optional): Custom guidelines for comparison logic.
         system_prompt (str): The system prompt used to instruct the LLM.
         prompt_template (Template): Jinja2 template for formatting comparison prompts.
         agent (Agent): The strands Agent instance for LLM interactions.
         threshold (float): Inherited from BaseComparator, used for binary decisions.
-    
+
     Note:
         This comparator requires AWS Bedrock access and proper authentication.
         API calls incur costs and latency, so consider caching for repeated comparisons.
     """
+
     def __init__(
         self,
         model: Union[Model, str] = None,
         eval_guidelines: str = None,
     ):
         """Initialize the LLM comparator.
-        
+
         Args:
             model: The LLM model to use for comparisons. Can be a model identifier
                 string (e.g., "us.anthropic.claude-3-haiku-20240307-v1:0") or a
                 strands Model instance. Defaults to Claude 3 Haiku.
             eval_guidelines: Optional custom guidelines to include in the comparison
                 prompt. These guidelines help the LLM understand domain-specific
                 comparison rules (e.g., "Consider abbreviations equivalent").
-        
+
         Raises:
-            Exception: If the model cannot be initialized or AWS credentials are invalid.
-        
+            ImportError: If strands-agents is not installed.
+            ValueError: If the model parameter is not provided.
+
         Example:
             >>> # Basic initialization
             >>> comparator = LLMComparator()
-            
+
             >>> # With custom model and guidelines
             >>> comparator = LLMComparator(
             ...     model="us.amazon.nova-lite-v1:0",
             ...     eval_guidelines="Consider street abbreviations equivalent"
             ... )
         """
         super().__init__()
+
+        # Check if strands is available
+        if not STRANDS_AVAILABLE:
+            raise ImportError(
+                "LLMComparator requires the 'strands-agents' package. "
+                "Install it with: pip install stickler-eval[llm]"
+            )
+
         if model is None:
             raise ValueError("Model must be provided for LLMComparator.")
         self.model = model
@@ -92,25 +123,23 @@ def __init__(
             self.eval_guidelines = html.escape(eval_guidelines)
         else:
             self.eval_guidelines = eval_guidelines
-        
+
         # Initialize Agent
         self.agent = Agent(
-            model=self.model,
-            system_prompt=self.system_prompt,
-            callback_handler=None
+            model=self.model, system_prompt=self.system_prompt, callback_handler=None
         )
 
     def _default_system_prompt(self) -> str:
         """Generate the default system prompt for the LLM.
-        
+
         Returns:
             str: System prompt instructing the LLM to perform binary comparisons.
         """
         return "You are a helpful assistant that compares two values and determines if they are equivalent. Only return one word: 'true' or 'false'."
-    
+
     def _default_prompt_template(self) -> Template:
         """Generate the default Jinja2 template for comparison prompts.
-        
+
         Returns:
             Template: Jinja2 template that formats comparison prompts with values
                 and optional evaluation guidelines.
@@ -133,44 +162,44 @@ def _default_prompt_template(self) -> Template:
 
         template = Template(prompt_template)
         return template
-                    
+
     def _invoke_agent(self, prompt: str) -> str:
         """Invoke the LLM agent with a formatted prompt.
-        
+
         Args:
             prompt: The formatted prompt string to send to the LLM.
-        
+
         Returns:
             str: The text response from the LLM.
-        
+
         Raises:
             Exception: If the agent call fails or response format is unexpected.
         """
         result = self.agent(prompt)
         return result.message["content"][0]["text"]
-    
+
     def compare(self, value1: Any, value2: Any) -> float:
         """Compare two values using LLM-based semantic analysis.
-        
+
         This method converts both values to strings and uses the configured LLM
         to determine if they are semantically equivalent. The comparison considers
         context, abbreviations, synonyms, and any provided evaluation guidelines.
-        
+
         Args:
             value1: First value to compare. Can be any type that converts to string.
             value2: Second value to compare. Can be any type that converts to string.
-        
+
         Returns:
             float: Binary similarity score:
                 - 1.0 if the LLM determines the values are equivalent
                 - 0.0 if the LLM determines the values are not equivalent
                 - 0.0 if an error occurs during comparison
-        
+
         Note:
             - None values: Returns 1.0 if both are None, 0.0 if only one is None
             - Error handling: Returns 0.0 for any exceptions during LLM calls
             - Cost consideration: Each call incurs API costs and latency
-        
+
         Example:
             >>> comparator = LLMComparator()
             >>> comparator.compare("St. John's Street", "Saint John's St")
@@ -190,51 +219,50 @@ def compare(self, value1: Any, value2: Any) -> float:
         formatted_prompt = self.prompt_template.render(
             value1=html.escape(str(value1)),
             value2=html.escape(str(value2)),
-            eval_guidelines=self.eval_guidelines
+            eval_guidelines=self.eval_guidelines,
         )
-        
+
         try:
             # Get LLM response
             response = self._invoke_agent(formatted_prompt)
             # Parse response to boolean
             response_lower = response.strip().lower()
-            if 'true' in response_lower:
+            if "true" in response_lower:
                 return 1.0
             else:
                 return 0.0
-            
+
         except NoCredentialsError:
             print("Error: AWS credentials not found.")
-            raise 
+            raise
 
         except Exception as e:
             print(f"Error during LLM call: {e}")
             raise
 
-    
     def get_comparison_details(self, value1: Any, value2: Any) -> Dict[str, Any]:
         """Get detailed information about a comparison operation.
-        
+
         This method provides comprehensive details about the comparison process,
         including the formatted prompt, LLM response, model information, and
         final comparison result. Useful for debugging, auditing, and understanding
         how the LLM made its decision.
-        
+
         Args:
             value1: First value to compare. Can be any type that converts to string.
             value2: Second value to compare. Can be any type that converts to string.
-        
+
         Returns:
             Dict[str, Any]: Dictionary containing comparison details:
                 - 'prompt' (str): The formatted prompt sent to the LLM
                 - 'llm_response' (str): Raw response from the LLM
                 - 'model_id' (Union[Model, str]): The model used (string ID or Model instance)
                 - 'comparison_result' (float): Final similarity score (0.0 or 1.0)
-                
+
                 On error:
                 - 'error' (str): Error message describing what went wrong
                 - 'comparison_result' (bool): False to indicate failure
-        
+
         Example:
             >>> comparator = LLMComparator(eval_guidelines="Consider abbreviations")
             >>> details = comparator.get_comparison_details("St. John", "Saint John")
@@ -248,19 +276,16 @@ def get_comparison_details(self, value1: Any, value2: Any) -> Dict[str, Any]:
         formatted_prompt = self.prompt_template.render(
             value1=html.escape(str(value1)),
             value2=html.escape(str(value2)),
-            eval_guidelines=self.eval_guidelines
+            eval_guidelines=self.eval_guidelines,
         )
-        
+
         try:
             response = self._invoke_agent(formatted_prompt)
             return {
                 "prompt": formatted_prompt,
                 "llm_response": response,
                 "model_id": self.model,
-                "comparison_result": self.compare(value1, value2)
+                "comparison_result": self.compare(value1, value2),
             }
         except Exception as e:
-            return {
-                "error": str(e),
-                "comparison_result": False
-            }
+            return {"error": str(e), "comparison_result": False}

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,8 @@ dependencies = [`
`19`	`19`	`"scipy>=1.10.0",`
`20`	`20`	`"psutil>=5.8.0",`
`21`	`21`	`"pandas>=1.5.0",`
`22`		`- "jsonschema>=4.0.0"`
	`22`	`+ "jsonschema>=4.0.0",`
	`23`	`+ "jinja2>=3.1.6,<3.2.0"`
`23`	`24`	`]`
`24`	`25`
`25`	`26`	`[project.optional-dependencies]`
`@@ -30,10 +31,8 @@ dev = [`
`30`	`31`	`"beautifulsoup4>=4.14.2",`
`31`	`32`	`"ruff>=0.14.10",`
`32`	`33`	`]`
`33`		`-`
`34`	`34`	`llm = [`
`35`		`- "strands-agents>=1.0.0,<=1.16.0",`
`36`		`- "jinja2>=3.0.0,<=3.1.6"`
	`35`	`+ "strands-agents>=1.0.0,<=1.16.0"`
`37`	`36`	`]`
`38`	`37`
`39`	`38`