Update: modify 4 file(s)

vawsgit · vawsgit · commit 4c0aa13245a4 · 2026-01-22T11:07:58.000-06:00
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,5 @@ uv.lock
 
 .kiro/*
 .kiro/**/*
+
+build/
diff --git a/docs/docs/Guides/StructuredModel_Export.md b/docs/docs/Guides/StructuredModel_Export.md
@@ -16,10 +16,12 @@ A typical workflow is to create a model with default comparators, export the con
 from stickler import StructuredModel, ComparableField
 
 # Step 1: Create model with defaults
+# Note: default=... means "required" in Pydantic
+# Use default=None for optional fields
 class Product(StructuredModel):
-    name: str = ComparableField(default=...)
-    price: float = ComparableField(default=...)
-    description: str = ComparableField(default=...)
+    name: str = ComparableField(default=...)        # Required field
+    price: float = ComparableField(default=...)     # Required field
+    description: str = ComparableField(default=None) # Optional field
 
 # Step 2: Export to get default configuration
 config = Product.to_stickler_config()
@@ -81,11 +83,13 @@ ReconstructedProduct = StructuredModel.from_json_schema(schema)
   "properties": {
     "name": {
       "type": "string",
+      "x-aws-stickler-comparator": "LevenshteinComparator",
       "x-aws-stickler-threshold": 0.8,
       "x-aws-stickler-weight": 2.0
     },
     "price": {
       "type": "number",
+      "x-aws-stickler-comparator": "NumericComparator",
       "x-aws-stickler-threshold": 0.95
     }
   },
@@ -120,12 +124,14 @@ ReconstructedProduct = StructuredModel.model_from_json(config)
   "fields": {
     "name": {
       "type": "str",
+      "comparator": "LevenshteinComparator",
       "threshold": 0.8,
       "weight": 2.0,
       "required": true
     },
     "price": {
       "type": "float",
+      "comparator": "NumericComparator",
       "threshold": 0.95,
       "required": true
     }
@@ -178,24 +184,32 @@ config = Customer.to_stickler_config()
 
 ## Lists of StructuredModels
 
-Lists are exported with their element schemas:
+Lists are exported with their element schemas.
+
+**Important:** When comparing `List[StructuredModel]` fields, Stickler uses the element model's `match_threshold` class attribute for Hungarian matching. You cannot specify a custom threshold or comparator on the list field itself.
 
 ```python
 from typing import List
 
+class LineItem(StructuredModel):
+    match_threshold = 0.8  # Used for matching list elements
+    product: str = ComparableField(default=...)
+    quantity: int = ComparableField(default=...)
+
 class Order(StructuredModel):
     order_id: str = ComparableField(threshold=1.0, default=...)
-    products: List[Product] = ComparableField(default=...)
+    # The list field uses LineItem.match_threshold for matching
+    products: List[LineItem] = ComparableField(default=...)
 
 # JSON Schema export
 schema = Order.to_json_schema()
 # schema["properties"]["products"]["type"] == "array"
-# schema["properties"]["products"]["items"] contains Product schema
+# schema["properties"]["products"]["items"] contains LineItem schema
 
 # Stickler config export
 config = Order.to_stickler_config()
 # config["fields"]["products"]["type"] == "list_structured_model"
-# config["fields"]["products"]["fields"] contains Product fields
+# config["fields"]["products"]["fields"] contains LineItem fields
 ```
 
 ## Round-trip Examples
@@ -391,4 +405,4 @@ git commit -m "Add Product model schema v1"
 
 - [StructuredModel Dynamic Creation](StructuredModel_Dynamic_Creation.md) - Import methods
 - [StructuredModel Advanced Functionality](StructuredModel_Advanced_Functionality.md) - Comparison features
-- [JSON Schema Extensions Reference](../README.md#json-schema-extensions-x-aws-stickler--complete-reference) - Full extension documentation
+- [JSON Schema Extensions](../../index.md) - Full extension documentation in main README
diff --git a/src/stickler/structured_object_evaluator/models/json_schema_field_converter.py b/src/stickler/structured_object_evaluator/models/json_schema_field_converter.py
@@ -6,6 +6,8 @@
 
 from typing import Any, Dict, List, Tuple, Type
 
+from pydantic.fields import FieldInfo
+
 from .comparable_field import ComparableField
 from .comparator_registry import create_comparator
 
@@ -450,7 +452,7 @@ def _handle_array_type(
         return field_type, field
 
     
-    def field_to_property(self, field_type: Type, field_info) -> Dict[str, Any]:
+    def field_to_property(self, field_type: Type, field_info: FieldInfo) -> Dict[str, Any]:
         """Convert Pydantic field to JSON Schema property.
         
         Extracts comparison metadata from the field's json_schema_extra attribute
@@ -462,37 +464,26 @@ def field_to_property(self, field_type: Type, field_info) -> Dict[str, Any]:
             
         Returns:
             JSON Schema property dict with x-aws-stickler-* extensions
-            
-        Example:
-            >>> converter = JsonSchemaFieldConverter(schema={}, field_path="")
-            >>> field_info = model.model_fields["name"]
-            >>> property_schema = converter.field_to_property(str, field_info)
-            >>> print(property_schema["x-aws-stickler-comparator"])
-            'LevenshteinComparator'
         """
-        # Use reverse mapping to avoid duplicating type definitions
         json_type = PYTHON_TYPE_TO_JSON_TYPE.get(field_type, "string")
-        
         property_schema = {"type": json_type}
         
-        # Extract metadata from field
+        # Extract metadata and build extensions using consolidated helper
         metadata = self._extract_field_metadata(field_info)
+        extensions = self._build_comparison_extensions(metadata, format="json_schema")
+        property_schema.update(extensions)
         
-        # Add x-aws-stickler-* extensions only if metadata exists
-        if metadata.get("comparator"):
-            property_schema["x-aws-stickler-comparator"] = metadata["comparator"].__class__.__name__
-        if "threshold" in metadata:
-            property_schema["x-aws-stickler-threshold"] = metadata["threshold"]
-        if "weight" in metadata:
-            property_schema["x-aws-stickler-weight"] = metadata["weight"]
-        if metadata.get("clip_under_threshold") is not None:
-            property_schema["x-aws-stickler-clip-under-threshold"] = metadata["clip_under_threshold"]
-        if metadata.get("aggregate"):
-            property_schema["x-aws-stickler-aggregate"] = metadata["aggregate"]
+        # Add Pydantic field params
+        if field_info.description:
+            property_schema["description"] = field_info.description
+        if field_info.alias:
+            property_schema["alias"] = field_info.alias
+        if field_info.examples:
+            property_schema["examples"] = field_info.examples
         
         return property_schema
     
-    def field_to_stickler_config(self, field_type: Type, field_info) -> Dict[str, Any]:
+    def field_to_stickler_config(self, field_type: Type, field_info: FieldInfo) -> Dict[str, Any]:
         """Convert Pydantic field to Stickler config format.
         
         Extracts comparison metadata and formats it as custom Stickler configuration
@@ -504,54 +495,81 @@ def field_to_stickler_config(self, field_type: Type, field_info) -> Dict[str, An
             
         Returns:
             Stickler field config dict with type, comparator, threshold, etc.
-            
-        Example:
-            >>> converter = JsonSchemaFieldConverter(schema={}, field_path="")
-            >>> field_info = model.model_fields["name"]
-            >>> config = converter.field_to_stickler_config(str, field_info)
-            >>> print(config["type"])
-            'str'
         """
-        # Use reverse mapping for Stickler type strings
         stickler_type = PYTHON_TYPE_TO_STICKLER_TYPE.get(field_type, "str")
-        
         field_config = {"type": stickler_type}
         
-        # Extract metadata
+        # Extract metadata and build extensions using consolidated helper
         metadata = self._extract_field_metadata(field_info)
+        extensions = self._build_comparison_extensions(metadata, format="stickler_config")
+        field_config.update(extensions)
         
-        # Add comparison config only if metadata exists
-        if metadata.get("comparator"):
-            field_config["comparator"] = metadata["comparator"].__class__.__name__
-        if "threshold" in metadata:
-            field_config["threshold"] = metadata["threshold"]
-        if "weight" in metadata:
-            field_config["weight"] = metadata["weight"]
-        if metadata.get("clip_under_threshold") is not None:
-            field_config["clip_under_threshold"] = metadata["clip_under_threshold"]
-        if metadata.get("aggregate"):
-            field_config["aggregate"] = metadata["aggregate"]
-        
-        # Add Pydantic field params - use is_required() for Pydantic compatibility
+        # Add Pydantic field params
         field_config["required"] = field_info.is_required()
         if not field_info.is_required():
             field_config["default"] = field_info.default
         if field_info.description:
             field_config["description"] = field_info.description
+        if field_info.alias:
+            field_config["alias"] = field_info.alias
+        if field_info.examples:
+            field_config["examples"] = field_info.examples
         
         return field_config
     
-    def _extract_field_metadata(self, field_info) -> Dict[str, Any]:
+    def _build_comparison_extensions(
+        self, 
+        metadata: Dict[str, Any], 
+        format: str = "json_schema"
+    ) -> Dict[str, Any]:
+        """Build comparison extensions in specified format.
+        
+        Consolidates duplicate logic from field_to_property() and field_to_stickler_config().
+        
+        Args:
+            metadata: Extracted field metadata from _extract_field_metadata()
+            format: Output format - "json_schema" or "stickler_config"
+        
+        Returns:
+            Dictionary with comparison extensions in the specified format
+        """
+        extensions = {}
+        prefix = "x-aws-stickler-" if format == "json_schema" else ""
+        
+        # Export comparator class name and configuration
+        if metadata.get("comparator"):
+            comparator = metadata["comparator"]
+            extensions[f"{prefix}comparator"] = comparator.__class__.__name__
+            
+            # Export comparator configuration (e.g., tolerance, case_sensitive)
+            if hasattr(comparator, "config") and comparator.config:
+                config_key = f"{prefix}comparator-config" if format == "json_schema" else "comparator_config"
+                extensions[config_key] = comparator.config
+        
+        # Export comparison parameters
+        if "threshold" in metadata:
+            extensions[f"{prefix}threshold"] = metadata["threshold"]
+        if "weight" in metadata:
+            extensions[f"{prefix}weight"] = metadata["weight"]
+        if metadata.get("clip_under_threshold") is not None:
+            clip_key = f"{prefix}clip-under-threshold" if format == "json_schema" else "clip_under_threshold"
+            extensions[clip_key] = metadata["clip_under_threshold"]
+        if metadata.get("aggregate") is not None:
+            extensions[f"{prefix}aggregate"] = metadata["aggregate"]
+        
+        return extensions
+    
+    def _extract_field_metadata(self, field_info: FieldInfo) -> Dict[str, Any]:
         """Extract comparison metadata from field's json_schema_extra.
         
-        The metadata is stored as attributes on the json_schema_extra function
-        by ComparableField() to work around Pydantic's __slots__ restriction.
+        Only includes attributes that are explicitly set (no default values).
         
         Args:
             field_info: Pydantic FieldInfo object
             
         Returns:
-            Dictionary with comparator, threshold, weight, etc.
+            Dictionary with explicitly set comparator, threshold, weight, etc.
+            Empty dict if no metadata found.
         """
         if not hasattr(field_info, "json_schema_extra"):
             return {}
@@ -560,11 +578,22 @@ def _extract_field_metadata(self, field_info) -> Dict[str, Any]:
         if not callable(json_func):
             return {}
         
-        # Extract stored attributes from the function object
-        return {
-            "comparator": getattr(json_func, "_comparator_instance", None),
-            "threshold": getattr(json_func, "_threshold", 0.5),
-            "weight": getattr(json_func, "_weight", 1.0),
-            "clip_under_threshold": getattr(json_func, "_clip_under_threshold", True),
-            "aggregate": getattr(json_func, "_aggregate", False),
-        }
+        # Only include attributes that are explicitly set
+        metadata = {}
+        
+        if hasattr(json_func, "_comparator_instance"):
+            metadata["comparator"] = json_func._comparator_instance
+        
+        if hasattr(json_func, "_threshold"):
+            metadata["threshold"] = json_func._threshold
+        
+        if hasattr(json_func, "_weight"):
+            metadata["weight"] = json_func._weight
+        
+        if hasattr(json_func, "_clip_under_threshold"):
+            metadata["clip_under_threshold"] = json_func._clip_under_threshold
+        
+        if hasattr(json_func, "_aggregate"):
+            metadata["aggregate"] = json_func._aggregate
+        
+        return metadata
diff --git a/src/stickler/structured_object_evaluator/models/structured_model.py b/src/stickler/structured_object_evaluator/models/structured_model.py

-Original file line number
+Diff line change
 .kiro/*
 .kiro/**/*
++
 +build/