Release v0.2.2

kazhou · kazhou · commit 01b98dba6021 · 2026-03-09T23:26:51.000Z
diff --git a/autochecklist/generators/instance_level/contrastive.py b/autochecklist/generators/instance_level/contrastive.py
@@ -154,8 +154,8 @@ def _generate_with_candidates(
                 )
             format_kwargs["reference"] = reference
 
-        # Load format instructions
-        format_text = load_format(self._format_name)
+        # Load format instructions (skip for custom schemas)
+        format_text = load_format(self._format_name) if self._format_name else ""
 
         # Inject format inline if template has {format_instructions} placeholder,
         # otherwise append after the prompt (default).
diff --git a/autochecklist/generators/instance_level/direct.py b/autochecklist/generators/instance_level/direct.py
@@ -56,10 +56,17 @@ def __init__(
         self._method_name = method_name
         self.max_items = preset.get("max_items", max_items)
         self.min_items = preset.get("min_items", min_items)
+
+        is_custom_schema = response_schema is not None
         self._response_schema = response_schema or preset.get(
             "response_schema", ChecklistResponse
         )
-        self._format_name = format_name or preset.get("format_name", "checklist")
+        if format_name is not None:
+            self._format_name = format_name
+        elif is_custom_schema:
+            self._format_name = None
+        else:
+            self._format_name = preset.get("format_name", "checklist")
 
         # Load template
         if custom_prompt is not None:
@@ -115,8 +122,8 @@ def generate(
         if "history" in self._template._placeholders:
             format_kwargs["history"] = history
 
-        # Load format instructions
-        format_text = load_format(self._format_name)
+        # Load format instructions (skip for custom schemas)
+        format_text = load_format(self._format_name) if self._format_name else ""
 
         # Inject format inline if template has {format_instructions} placeholder,
         # otherwise append after the prompt (default).
@@ -149,20 +156,67 @@ def _parse_structured(self, raw: str) -> list[ChecklistItem]:
 
         Primary path: json.loads() succeeds (structured output).
         Fallback path: extract_json() extracts JSON from raw text.
+
+        Auto-detects the list field and item fields from the schema,
+        supporting both built-in and custom response schemas.
         """
         try:
             data = json.loads(raw)
         except json.JSONDecodeError:
             data = extract_json(raw)
         validated = self._response_schema.model_validate(data)
 
+        # Find the list field (first List[BaseModel] field)
+        item_list = self._get_item_list(validated)
+
         items = []
-        for q in validated.questions[: self.max_items]:
+        for q in item_list[: self.max_items]:
+            q_data = q.model_dump() if hasattr(q, "model_dump") else {}
+            # Find question text: use 'question' field, or first str field
+            question, question_key = self._get_question_text(q, q_data)
+            weight = getattr(q, "weight", 100.0)
+            category = getattr(q, "category", None)
+            # Extra fields → metadata
+            known = {question_key, "weight", "category"}
+            extra = {k: v for k, v in q_data.items() if k not in known}
             items.append(
                 ChecklistItem(
-                    question=q.question,
-                    weight=getattr(q, "weight", 100.0),
-                    category=getattr(q, "category", None),
+                    question=question,
+                    weight=weight,
+                    category=category,
+                    metadata=extra if extra else {},
                 )
             )
         return items
+
+    @staticmethod
+    def _get_item_list(validated: Any) -> list:
+        """Extract the list of items from a validated response model."""
+        # Try 'questions' first (built-in convention)
+        if hasattr(validated, "questions"):
+            return validated.questions
+        # Auto-detect: first list attribute
+        for field_name in type(validated).model_fields:
+            value = getattr(validated, field_name)
+            if isinstance(value, list):
+                return value
+        raise ValueError(
+            f"Cannot find list field in {type(validated).__name__}. "
+            "Schema must have a list field (e.g., 'questions', 'items')."
+        )
+
+    @staticmethod
+    def _get_question_text(item: Any, item_data: dict) -> tuple[str, str]:
+        """Extract question text and its field key from an item."""
+        if isinstance(item, str):
+            return item, "question"
+        if hasattr(item, "question"):
+            return item.question, "question"
+        # Fall back to first str field
+        for key, value in item_data.items():
+            if isinstance(value, str):
+                return value, key
+        raise ValueError(
+            f"Cannot find question text in {type(item).__name__}. "
+            "Item must have a 'question' field or at least one str field."
+        )
diff --git a/docs/user-guide/custom-prompts.md b/docs/user-guide/custom-prompts.md
@@ -130,4 +130,38 @@ pipe = pipeline("code_review", scorer="strict")
 
 Custom generators registered via `register_custom_generator()` always use the unweighted `ChecklistResponse` schema. To use weighted output (`WeightedChecklistResponse`), instantiate `DirectGenerator` directly with `response_schema=WeightedChecklistResponse`.
 
+### Custom Response Schemas
+
+You can pass any Pydantic model as `response_schema` to define your own output structure. When a custom schema is provided, format instructions are skipped and the LLM is guided entirely via structured output enforcement.
+
+```python
+from pydantic import BaseModel
+from autochecklist import DirectGenerator
+
+class ActionItem(BaseModel):
+    item: str
+    sources: list[int]
+
+class ActionItemsResponse(BaseModel):
+    questions: list[ActionItem]
+
+gen = DirectGenerator(
+    custom_prompt="Generate evaluation criteria with source references for:\n\n{input}",
+    response_schema=ActionItemsResponse,
+    model="openai/gpt-5-mini",
+)
+checklist = gen.generate(input="Write a literature review.")
+```
+
+The parser auto-detects the list field and question text:
+
+- **List field**: uses `questions` if present, otherwise the first list field (e.g., `items`, `criteria`)
+- **Question text**: uses `question` if present, otherwise the first `str` field (e.g., `item`, `text`)
+- **Extra fields**: any fields beyond the question text, `weight`, and `category` are preserved in `ChecklistItem.metadata`
+
+```python
+checklist.items[0].question   # "Is it cited?"
+checklist.items[0].metadata   # {"sources": [1, 3]}
+```
+
 **Scorers** also use structured JSON output (`BatchScoringResponse`, `ItemScoringResponse`, etc.) with the same provider-level enforcement and fallback. Your custom scorer prompt does not need to dictate the output format.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "autochecklist"
-version = "0.2.1"
+version = "0.2.2"
 description = "A library of checklist generation and scoring methods for LLM evaluation"
 authors = [{name = "ChicagoHAI"}]
 readme = "README.pypi.md"
diff --git a/tests/test_generators/test_direct.py b/tests/test_generators/test_direct.py
@@ -112,6 +112,55 @@ def test_parse_structured_without_category_unchanged(self):
         assert items[0].category is None
 
 
+class TestCustomResponseSchema:
+    def test_custom_schema_with_items_list_str(self):
+        """Custom schema with 'items: List[str]' parses into ChecklistItems."""
+        from pydantic import BaseModel
+        from typing import List
+        from autochecklist.generators.instance_level.direct import DirectGenerator
+
+        class CustomResponse(BaseModel):
+            items: List[str]
+
+        gen = DirectGenerator(
+            method_name="custom",
+            custom_prompt="Generate criteria for: {input}",
+            response_schema=CustomResponse,
+        )
+        assert gen._format_name is None
+
+        raw = '{"items": ["Does the response address the topic?", "Is the tone appropriate?"]}'
+        items = gen._parse_structured(raw)
+        assert len(items) == 2
+        assert items[0].question == "Does the response address the topic?"
+        assert items[1].question == "Is the tone appropriate?"
+        assert items[0].weight == 100.0
+        assert items[0].category is None
+
+    def test_nested_schema_extra_fields_in_metadata(self):
+        """Nested item model with non-str fields preserved in metadata."""
+        from pydantic import BaseModel
+        from autochecklist.generators.instance_level.direct import DirectGenerator
+
+        class ActionItem(BaseModel):
+            item: str
+            sources: list[int]
+
+        class ActionItemsResponse(BaseModel):
+            questions: list[ActionItem]
+
+        gen = DirectGenerator(
+            method_name="custom",
+            custom_prompt="Generate criteria for: {input}",
+            response_schema=ActionItemsResponse,
+        )
+        raw = '{"questions": [{"item": "Is it cited?", "sources": [1, 3]}]}'
+        items = gen._parse_structured(raw)
+        assert len(items) == 1
+        assert items[0].question == "Is it cited?"
+        assert items[0].metadata == {"sources": [1, 3]}
+
+
 class TestContrastiveGeneratorConfig:
     def test_rlcf_candidate_preset_loads(self):
         from autochecklist.generators.instance_level.contrastive import ContrastiveGenerator

Original file line number	Diff line number	Diff line change
`@@ -154,8 +154,8 @@ def _generate_with_candidates(`
`154`	`154`	`)`
`155`	`155`	`format_kwargs["reference"] = reference`
`156`	`156`
`157`		`- # Load format instructions`
`158`		`- format_text = load_format(self._format_name)`
	`157`	`+ # Load format instructions (skip for custom schemas)`
	`158`	`+ format_text = load_format(self._format_name) if self._format_name else ""`
`159`	`159`
`160`	`160`	`# Inject format inline if template has {format_instructions} placeholder,`
`161`	`161`	`# otherwise append after the prompt (default).`