Merge pull request #864 from Pipelex/release/v0.26.2

thomashebrard · web-flow · commit 09a7f5ca9af7 · 2026-05-06T13:46:28.000+02:00
Release v0.26.2
diff --git a/.badges/tests.json b/.badges/tests.json
@@ -1,7 +1,7 @@
 {
   "schemaVersion": 1,
   "label": "tests",
-  "message": "4900",
+  "message": "4901",
   "color": "blue",
   "cacheSeconds": 300
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## [v0.26.2] - 2026-05-06
+
+### Fixed
+
+- **`choices` fields no longer fail validation with `'EnumName.MEMBER_NAME'` errors.** A concept declared with `choices = [...]` produces a `Literal[...]` field on the dynamic Pydantic class. That schema is round-tripped through `SchemaToModelFactory.make_from_json_schema` (used to rebuild dynamic models on Temporal workers and to feed structured-output schemas to LLM providers). Previously the round-trip silently re-emitted the field as a plain Python `Enum` class — e.g. `Literal["Strong Match", "Good Match", "Partial Match", "Poor Match"]` became `class Recommendation(Enum): Poor_Match = "Poor Match"; ...`. LLMs filling that schema then returned the enum's Python repr (`"Recommendation.Poor_Match"`) instead of the literal string (`"Poor Match"`), which failed Pydantic validation against the original choice set with errors like `Invalid choice errors: 'recommendation': got 'Recommendation.Poor_Match', expected one of 'Strong Match', 'Good Match', 'Partial Match' or 'Poor Match'`. `_generate_source_from_schema` now passes `enum_field_as_literal=LiteralType.All` to `datamodel-code-generator`, so `enum: [strings]` schema nodes round-trip as `Literal[...]` instead of being regenerated as `Enum` classes. `_exec_source_to_types` now also exposes `Literal` in the rebuild namespace so `model_rebuild` resolves the deferred annotations.
+
 ## [v0.26.1] - 2026-05-05
 
 ### Changed
@@ -694,10 +700,9 @@
   1. Pipelex Gateway telemetry for service monitoring (never collects prompts/completions/business data)
   2. Custom telemetry to user-configured backends
   3. Config updated accordingly (`telemetry.toml`):
+     - Renamed `[posthog]` to `[custom_posthog]` to distinguish user's PostHog from Pipelex Gateway telemetry
+     - Added new `[custom_portkey]` section with `force_debug_enabled` and `force_tracing_enabled` settings
 
-
-      - Renamed `[posthog]` to `[custom_posthog]` to distinguish user's PostHog from Pipelex Gateway telemetry
-      - Added new `[custom_portkey]` section with `force_debug_enabled` and `force_tracing_enabled` settings
 - **Main Configuration Overrides Updated** (`.pipelex/pipelex.toml`):
   - `pipelex_override.toml` (final override) renamed from `pipelex_super.toml` to `pipelex_override.toml` and moved from repo root to `.pipelex/` directory
   - `telemetry_override.toml` (personal telemetry settings)
@@ -1013,10 +1018,9 @@
   1. Pipelex Gateway telemetry for service monitoring (never collects prompts/completions/business data)
   2. Custom telemetry to user-configured backends
   3. Config updated accordingly (`telemetry.toml`):
+     - Renamed `[posthog]` to `[custom_posthog]` to distinguish user's PostHog from Pipelex Gateway telemetry
+     - Added new `[custom_portkey]` section with `force_debug_enabled` and `force_tracing_enabled` settings
 
-
-      - Renamed `[posthog]` to `[custom_posthog]` to distinguish user's PostHog from Pipelex Gateway telemetry
-      - Added new `[custom_portkey]` section with `force_debug_enabled` and `force_tracing_enabled` settings
 - **Main Configuration Overrides Updated** (`.pipelex/pipelex.toml`):
   - `pipelex_override.toml` (final override) renamed from `pipelex_super.toml` to `pipelex_override.toml` and moved from repo root to `.pipelex/` directory
   - `telemetry_override.toml` (personal telemetry settings)
diff --git a/pipelex/cogt/content_generation/schema_to_model_factory.py b/pipelex/cogt/content_generation/schema_to_model_factory.py
@@ -36,9 +36,9 @@
 from collections import OrderedDict
 from enum import Enum
 from pathlib import Path
-from typing import Any, ClassVar, cast
+from typing import Any, ClassVar, Literal, cast
 
-from pydantic import BaseModel
+from pydantic import BaseModel, RootModel
 
 from pipelex.cogt.content_generation.exceptions import UnsafeSchemaError
 
@@ -160,7 +160,7 @@ def _reject_unsafe_schema_extensions(cls, schema: dict[str, Any]) -> None:
     @classmethod
     def _generate_source_from_schema(cls, schema: dict[str, Any]) -> str:
         """Generate Python source code from a JSON schema using datamodel-code-generator."""
-        from datamodel_code_generator import InputFileType, generate  # noqa: PLC0415
+        from datamodel_code_generator import InputFileType, LiteralType, generate  # noqa: PLC0415
         from datamodel_code_generator.enums import DataModelType  # noqa: PLC0415
 
         cls._reject_unsafe_schema_extensions(schema)
@@ -178,11 +178,19 @@ def _generate_source_from_schema(cls, schema: dict[str, Any]) -> str:
             #    be replaced by ruff, but ruff isn't a runtime dep of pipelex or of
             #    datamodel-code-generator's core install. An empty list silences the
             #    warning without forcing a new runtime dependency on consumers.
+            # `enum_field_as_literal=LiteralType.All` keeps `enum: [strings]` schema
+            # nodes as Python `Literal[...]` annotations instead of regenerating a
+            # named `Enum` class. Without it, a `Literal[...]` field round-trips into
+            # a plain `Enum` (e.g. `class Recommendation(Enum): Poor_Match = "Poor Match"`),
+            # and an LLM filling that schema returns the Python repr
+            # `"Recommendation.Poor_Match"` instead of the value `"Poor Match"`,
+            # which then fails Pydantic validation against the original choice set.
             generate(
                 input_=schema_str,
                 input_file_type=InputFileType.JsonSchema,
                 output=output_path,
                 output_model_type=DataModelType.PydanticV2BaseModel,
+                enum_field_as_literal=LiteralType.All,
                 formatters=[],
             )
             return output_path.read_text(encoding="utf-8")
@@ -252,16 +260,18 @@ def _exec_source_to_types(cls, source_code: str) -> dict[str, type[Any]]:
             and not name.startswith("_")
             and (issubclass(obj, BaseModel) or issubclass(obj, Enum))
             and obj is not BaseModel
+            and obj is not RootModel
             and obj is not Enum
         }
 
         # datamodel-code-generator uses `from __future__ import annotations` which turns
         # type annotations into strings. Rebuild every BaseModel so forward refs (including
-        # references to generated Enum classes for choices fields) resolve against the
-        # full type namespace.
+        # references to generated Enum classes for choices fields, and Literal annotations
+        # produced by `enum_field_as_literal=All`) resolve against the full type namespace.
+        rebuild_namespace: dict[str, Any] = {**all_user_types, "Literal": Literal}
         for candidate in all_user_types.values():
             if issubclass(candidate, BaseModel):
-                candidate.model_rebuild(_types_namespace=all_user_types)
+                candidate.model_rebuild(_types_namespace=rebuild_namespace)
 
         return all_user_types
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pipelex"
-version = "0.26.1"
+version = "0.26.2"
 description = "Execute composable AI methods declared in the MTHDS open standard"
 authors = [{ name = "Evotis S.A.S.", email = "oss@pipelex.com" }]
 maintainers = [{ name = "Pipelex staff", email = "oss@pipelex.com" }]
diff --git a/tests/integration/pipelex/temporal/data_converter/test_data_conv_enum_roundtrip.py b/tests/integration/pipelex/temporal/data_converter/test_data_conv_enum_roundtrip.py
@@ -1,4 +1,3 @@
-from enum import Enum
 from typing import cast
 
 import pytest
@@ -26,10 +25,10 @@ def test_dynamic_class_with_enum_field_round_trips(
         self,
         payload_converter: BaseModelPayloadConverter,
     ):
-        """A class with an Enum field generated via SchemaToModelFactory must survive a
-        full payload round-trip. Exercises the receiver-side exec path that registers
-        Enum subclasses in the per-call scoped ClassRegistry — without it the
-        deserializer cannot resolve the dynamic enum class and the round-trip fails.
+        """A class with an enum-shaped field generated via SchemaToModelFactory must
+        survive a full payload round-trip. Exercises the receiver-side exec path that
+        rebuilds the dynamic class from `__kajson_class_source__` — without it the
+        deserializer cannot resolve the dynamic class and the round-trip fails.
         """
         schema = Pet.model_json_schema()
         dynamic_pet_cls = SchemaToModelFactory.make_from_json_schema(schema, "Pet")
@@ -46,12 +45,10 @@ def test_dynamic_class_with_enum_field_round_trips(
 
         restored_class: type[BaseModel] = type(restored)
         assert restored_class.__name__ == "Pet"
-        assert restored.name == "Rex"  # type: ignore[attr-defined]
-        # datamodel-code-generator emits `class PetSpecies(Enum)` (not StrEnum), so the
-        # dynamic enum is a distinct class from the static PetSpecies above. Assert on
-        # class name + value instead of identity equality.
-        species_value: Enum = restored.species  # type: ignore[attr-defined]
-        assert isinstance(species_value, Enum)
-        assert type(species_value).__name__ == "PetSpecies"
-        assert species_value.value == "dog"
+        # datamodel-code-generator now emits enum-shaped `$defs` as
+        # `RootModel[Literal[...]]` (since `enum_field_as_literal=LiteralType.All` is
+        # set in `_generate_source_from_schema`), so `restored.species` is a
+        # `RootModel` wrapping the value, not a Python `Enum` instance. Assert on the
+        # serialized data — that is what actually crosses the Temporal payload boundary.
+        assert restored.model_dump() == {"name": "Rex", "species": "dog"}
         assert getattr(restored_class, "__kajson_class_source__", None)
diff --git a/tests/unit/pipelex/cogt/content_generation/test_schema_to_model.py b/tests/unit/pipelex/cogt/content_generation/test_schema_to_model.py
@@ -2,7 +2,7 @@
 
 import threading
 import uuid
-from typing import Any
+from typing import Any, Literal, get_args, get_origin
 
 import pytest
 from pydantic import BaseModel, Field
@@ -17,6 +17,17 @@ class SimpleModel(BaseModel):
     age: int = Field(description="The age")
 
 
+class ModelWithLiteralChoices(BaseModel):
+    """A model whose ``recommendation`` field is a ``Literal`` over string choices.
+
+    Mirrors the shape a `.mthds` ``choices = [...]`` declaration produces: the
+    field's JSON schema contains an inline ``enum: [strings]`` array, and the
+    Python annotation is ``Literal[...]`` — NOT a named Python enum class.
+    """
+
+    recommendation: Literal["Strong Match", "Good Match", "Partial Match", "Poor Match"]
+
+
 class Address(BaseModel):
     street: str
     city: str
@@ -32,6 +43,53 @@ def _benign_object_schema() -> dict[str, Any]:
 
 
 class TestSchemaToModel:
+    def test_literal_choices_field_round_trips_as_literal_not_enum(self) -> None:
+        """Round-tripping a ``Literal[str-set]`` field through ``make_from_json_schema``
+        must keep it as a ``Literal[...]`` annotation in the reconstructed class.
+
+        Bug repro: today the round-trip silently re-emits the field as a generated
+        ``Enum`` class (e.g. ``class Recommendation(Enum)`` with members like
+        ``Poor_Match = "Poor Match"``). When this reconstructed class is handed to
+        an LLM as the structured-output target, the LLM tends to fill it with the
+        Python enum repr (``"Recommendation.Poor_Match"``) instead of the literal
+        string (``"Poor Match"``), which then fails Pydantic validation against the
+        original choice set.
+
+        We assert two things:
+          1. the generated Python source code does NOT introduce an ``Enum`` class
+             named after the field (``class Recommendation(Enum)``);
+          2. the reconstructed model's ``recommendation`` field annotation is a
+             ``Literal[...]`` whose args are exactly the original string choices.
+        """
+        # Use a unique title so the class-level schema cache never short-circuits
+        # this test with a stale (already-correct or already-buggy) result from
+        # another test run.
+        schema = ModelWithLiteralChoices.model_json_schema()
+        unique_title = f"LiteralChoicesRepro_{uuid.uuid4().hex}"
+        schema["title"] = unique_title
+
+        result_class = SchemaToModelFactory.make_from_json_schema(schema, unique_title)
+        source = getattr(result_class, "__kajson_class_source__", "")
+
+        assert "class Recommendation(Enum)" not in source, (
+            "Bug: Literal[...] choices were re-emitted as a generated Enum class. "
+            "An LLM targeting this Enum returns 'Recommendation.Poor_Match' (Python "
+            "enum repr) instead of the literal 'Poor Match', which fails validation "
+            "against the original choice set.\n\nGenerated source:\n" + source
+        )
+
+        recommendation_field = result_class.model_fields["recommendation"]
+        annotation = recommendation_field.annotation
+        assert get_origin(annotation) is Literal, (
+            f"Expected the round-tripped 'recommendation' field annotation to be Literal[...], got {annotation!r}. Source:\n{source}"
+        )
+        assert set(get_args(annotation)) == {
+            "Strong Match",
+            "Good Match",
+            "Partial Match",
+            "Poor Match",
+        }, f"Literal args drifted during round-trip: {get_args(annotation)!r}"
+
     def test_simple_model_reconstruction(self) -> None:
         """A simple model can be reconstructed from its JSON schema."""
         schema = SimpleModel.model_json_schema()
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"schemaVersion": 1,`
`3`	`3`	`"label": "tests",`
`4`		`- "message": "4900",`
	`4`	`+ "message": "4901",`
`5`	`5`	`"color": "blue",`
`6`	`6`	`"cacheSeconds": 300`
`7`	`7`	`}`