Add is_transparent method and TransparentModelEvaluationContext for stable cache keys (#194)

ptomecek · web-flow · commit a16f19b800e4 · 2026-04-16T09:20:50.000-04:00
Evaluators that don't modify return values can now override is_transparent() to return True, which causes make_evaluation_context() to create TransparentModelEvaluationContext layers. cache_key() strips these layers so that wrapping a model with different transparent evaluators does not change its cache identity or dependency graph node identity. The is_transparent() method accepts the ModelEvaluationContext, allowing evaluators to be conditionally transparent based on context. Closes #192 Signed-off-by: Pascal Tomecek <pascal.tomecek@cubistsystematic.com>
diff --git a/ccflow/callable.py b/ccflow/callable.py
@@ -41,6 +41,7 @@
     "FlowOptionsDeps",
     "FlowOptionsOverride",
     "ModelEvaluationContext",
+    "TransparentModelEvaluationContext",
     "EvaluatorBase",
     "Evaluator",
     "WrapperModel",
@@ -262,7 +263,7 @@ def get_evaluation_context(model: CallableModelType, context: ContextType, as_di
             if as_dict:
                 return dict(model=evaluator, context=evaluation_context)
             else:
-                return ModelEvaluationContext(model=evaluator, context=evaluation_context)
+                return evaluator.make_evaluation_context(evaluation_context)
 
         # The decorator implementation
         def wrapper(model, context=Signature.empty, *, _options: Optional[FlowOptions] = None, **kwargs):
@@ -510,10 +511,47 @@ def __deps__(self, context: ModelEvaluationContext) -> GraphDepList:
     def __exit__(self):
         pass
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        """Whether this evaluator does NOT modify the return value for the given context.
+
+        Transparent evaluators may add side effects (logging, caching, timing,
+        dependency ordering) but always return the same value as ``context()``.
+        This allows cache key computation and dependency graph deduplication to
+        skip these layers.
+
+        Override this method to return ``True`` for evaluators that are always
+        transparent, or implement context-dependent logic for evaluators that
+        are only sometimes transparent.
+        """
+        return False
+
+    def make_evaluation_context(self, context: ModelEvaluationContext, **kwargs) -> ModelEvaluationContext:
+        """Create a ModelEvaluationContext wrapping this evaluator around the given context.
+
+        Returns a ``TransparentModelEvaluationContext`` when ``is_transparent(context)``
+        is ``True``, signaling that this layer can be skipped for cache key computation.
+        """
+        if self.is_transparent(context):
+            return TransparentModelEvaluationContext(model=self, context=context, **kwargs)
+        return ModelEvaluationContext(model=self, context=context, **kwargs)
+
+
+class TransparentModelEvaluationContext(ModelEvaluationContext):
+    """A ModelEvaluationContext layer that is safe to skip for cache key computation.
+
+    Created by ``EvaluatorBase.make_evaluation_context()`` when the evaluator's
+    ``is_transparent()`` returns ``True``. Signals that this evaluator layer does
+    not modify the return value and can be ignored when computing cache keys or
+    deduplicating dependency graph nodes.
+    """
+
 
 class Evaluator(EvaluatorBase):
     """A higher-order model that evaluates a function on a CallableModel and a Context."""
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return True
+
     @override
     def __call__(self, context: ModelEvaluationContext) -> ResultType:
         return context()
diff --git a/ccflow/evaluators/common.py b/ccflow/evaluators/common.py
@@ -12,7 +12,14 @@
 from typing_extensions import override
 
 from ..base import BaseModel, make_lazy_result
-from ..callable import CallableModel, ContextBase, EvaluatorBase, ModelEvaluationContext, ResultType
+from ..callable import (
+    CallableModel,
+    ContextBase,
+    EvaluatorBase,
+    ModelEvaluationContext,
+    ResultType,
+    TransparentModelEvaluationContext,
+)
 
 __all__ = [
     "cache_key",
@@ -53,16 +60,25 @@ def combine_evaluators(first: Optional[EvaluatorBase], second: Optional[Evaluato
 
 
 class MultiEvaluator(EvaluatorBase):
-    """An evaluator that combines multiple evaluators."""
+    """An evaluator that combines multiple evaluators.
+
+    Each child evaluator is wrapped in a ModelEvaluationContext using its own
+    ``make_evaluation_context()`` method, so transparent children produce
+    ``TransparentModelEvaluationContext`` layers that can be skipped during
+    cache key computation.
+    """
 
     evaluators: List[EvaluatorBase] = Field(
         description="The list of evaluators to combine. The first evaluator in the list will be called first during evaluation."
     )
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return all(e.is_transparent(context) for e in self.evaluators)
+
     @override
     def __call__(self, context: ModelEvaluationContext) -> ResultType:
         for evaluator in self.evaluators:
-            context = ModelEvaluationContext(model=evaluator, context=context, options=context.options)
+            context = evaluator.make_evaluation_context(context, options=context.options)
         return context()
 
 
@@ -71,6 +87,9 @@ class FallbackEvaluator(EvaluatorBase):
 
     evaluators: List[EvaluatorBase] = Field(description="The list of evaluators to try (in order).")
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return all(e.is_transparent(context) for e in self.evaluators)
+
     @override
     def __call__(self, context: ModelEvaluationContext) -> ResultType:
         for evaluator in self.evaluators:
@@ -120,6 +139,9 @@ class LoggingEvaluator(EvaluatorBase):
     log_result: bool = Field(False, description="Whether to log the result of the evaluation")
     format_config: FormatConfig = Field(FormatConfig(), description="Configuration for formatting the result of the evaluation if log_result=True")
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return True
+
     @field_validator("log_level", mode="before")
     @classmethod
     def _validate_log_level(cls, v: Union[int, str]) -> int:
@@ -195,12 +217,30 @@ def _format_result(self, result: ResultType) -> str:
 
 
 def cache_key(flow_obj: Union[ModelEvaluationContext, ContextBase, CallableModel]) -> bytes:
-    """Returns a key suitable for use in caching.
+    """Returns a key suitable for use in caching and dependency graph deduplication.
+
+    For ``ModelEvaluationContext`` inputs, strips ``TransparentModelEvaluationContext``
+    layers (evaluators that don't modify the return value) so that the key depends
+    only on the underlying model, context, fn, options, and any non-transparent
+    evaluators in the chain.
 
     Args:
         flow_obj: The object to be tokenized to form the cache key.
     """
-    if isinstance(flow_obj, (ModelEvaluationContext, ContextBase, CallableModel)):
+    if isinstance(flow_obj, ModelEvaluationContext):
+        fn = flow_obj.fn
+        non_transparent = []
+        while isinstance(flow_obj.context, ModelEvaluationContext):
+            fn = flow_obj.fn if flow_obj.fn != "__call__" else fn
+            if not isinstance(flow_obj, TransparentModelEvaluationContext):
+                non_transparent.append(flow_obj.model)
+            flow_obj = flow_obj.context
+        d = flow_obj.model_dump(mode="python")
+        d["fn"] = fn if fn != "__call__" else flow_obj.fn
+        if non_transparent:
+            d["_evaluators"] = [e.model_dump(mode="python") for e in non_transparent]
+        return dask.base.tokenize(d).encode("utf-8")
+    elif isinstance(flow_obj, (ContextBase, CallableModel)):
         return dask.base.tokenize(flow_obj.model_dump(mode="python")).encode("utf-8")
     else:
         raise TypeError(f"object of type {type(flow_obj)} cannot be serialized by this function!")
@@ -213,8 +253,14 @@ class MemoryCacheEvaluator(EvaluatorBase):
     _cache: Dict[bytes, ResultType] = PrivateAttr({})
     _ids: Dict[bytes, ModelEvaluationContext] = PrivateAttr({})
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return True
+
     def key(self, context: ModelEvaluationContext):
-        """Function to convert a ModelEvaluationContext to a key"""
+        """Function to convert a ModelEvaluationContext to a cache key.
+
+        Delegates to ``cache_key()`` which strips transparent evaluator layers.
+        """
         return cache_key(context)
 
     @property
@@ -289,6 +335,9 @@ class GraphEvaluator(EvaluatorBase):
 
     _is_evaluating: bool = PrivateAttr(False)
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return True
+
     @override
     def __call__(self, context: ModelEvaluationContext) -> ResultType:
         import graphlib
diff --git a/ccflow/tests/evaluators/test_common.py b/ccflow/tests/evaluators/test_common.py
@@ -9,9 +9,11 @@
     DateContext,
     DateRangeContext,
     Evaluator,
+    EvaluatorBase,
     FlowOptionsOverride,
     ModelEvaluationContext,
     NullContext,
+    TransparentModelEvaluationContext,
 )
 from ccflow.evaluators import (
     FallbackEvaluator,
@@ -257,6 +259,73 @@ def test_model_evaluation_context(self):
         assert cache_key(mec1) == cache_key(mec2)
         assert cache_key(mec3) != cache_key(mec1)
 
+    def test_transparent_mec_stripped(self):
+        """TransparentModelEvaluationContext layers are stripped from cache keys."""
+        m = MyDateCallable(offset=1)
+        ctx = DateContext(date=date(2022, 1, 1))
+        inner = ModelEvaluationContext(model=m, context=ctx)
+        wrapped = TransparentModelEvaluationContext(model=LoggingEvaluator(), context=inner)
+        assert cache_key(inner) == cache_key(wrapped)
+
+    def test_opaque_mec_preserved(self):
+        """Non-transparent MEC layers produce different cache keys."""
+
+        class OpaqueEval(EvaluatorBase):
+            def __call__(self, context: ModelEvaluationContext):
+                return context()
+
+        m = MyDateCallable(offset=1)
+        ctx = DateContext(date=date(2022, 1, 1))
+        inner = ModelEvaluationContext(model=m, context=ctx)
+        wrapped = ModelEvaluationContext(model=OpaqueEval(), context=inner)
+        assert cache_key(inner) != cache_key(wrapped)
+
+    def test_stacked_transparent_stripped(self):
+        """Multiple stacked TransparentMEC layers are all stripped."""
+        m = MyDateCallable(offset=1)
+        ctx = DateContext(date=date(2022, 1, 1))
+        inner = ModelEvaluationContext(model=m, context=ctx)
+        layer1 = TransparentModelEvaluationContext(model=LoggingEvaluator(), context=inner)
+        layer2 = TransparentModelEvaluationContext(model=MemoryCacheEvaluator(), context=layer1)
+        assert cache_key(inner) == cache_key(layer2)
+
+    def test_sandwich_transparent_between_opaque(self):
+        """Transparent layer sandwiched between opaque layers is stripped, opaques preserved."""
+
+        class OpaqueEval(EvaluatorBase):
+            tag: str = "default"
+
+            def __call__(self, context: ModelEvaluationContext):
+                return context()
+
+        m = MyDateCallable(offset=1)
+        ctx = DateContext(date=date(2022, 1, 1))
+        inner = ModelEvaluationContext(model=m, context=ctx)
+        opaque1 = ModelEvaluationContext(model=OpaqueEval(tag="inner"), context=inner)
+        transparent = TransparentModelEvaluationContext(model=LoggingEvaluator(), context=opaque1)
+        opaque2 = ModelEvaluationContext(model=OpaqueEval(tag="outer"), context=transparent)
+        # Both opaque evaluators should be in the key; the transparent one should not
+        assert cache_key(opaque2) != cache_key(inner)
+        # Same sandwich should give consistent keys
+        opaque2b = ModelEvaluationContext(
+            model=OpaqueEval(tag="outer"),
+            context=TransparentModelEvaluationContext(
+                model=LoggingEvaluator(), context=ModelEvaluationContext(model=OpaqueEval(tag="inner"), context=inner)
+            ),
+        )
+        assert cache_key(opaque2) == cache_key(opaque2b)
+
+    def test_fn_deps_preserved_through_transparent(self):
+        """fn='__deps__' is preserved when walking through transparent layers."""
+        m = MyDateCallable(offset=1)
+        ctx = DateContext(date=date(2022, 1, 1))
+        inner = ModelEvaluationContext(model=m, context=ctx, fn="__deps__")
+        wrapped = TransparentModelEvaluationContext(model=LoggingEvaluator(), context=inner)
+        # Both should produce the same key, and it should differ from __call__
+        assert cache_key(inner) == cache_key(wrapped)
+        call_inner = ModelEvaluationContext(model=m, context=ctx, fn="__call__")
+        assert cache_key(inner) != cache_key(call_inner)
+
 
 class TestMemoryCacheEvaluator(TestCase):
     def test_basic(self):
@@ -355,6 +424,74 @@ def test_decorator_volatile(self):
                 self.assertGreater(out2, out1)
         self.assertEqual(len(captured.records), 2)
 
+    def test_cache_key_stable_across_evaluators(self):
+        """Cache keys should not change when wrapping with non-caching evaluators (e.g. LoggingEvaluator)."""
+        m1 = MyDateCallable(offset=1)
+        cache = MemoryCacheEvaluator()
+        ctx = DateContext(date=date(2022, 1, 1))
+
+        # First call: cache evaluator only
+        with FlowOptionsOverride(options={"evaluator": cache, "cacheable": True}):
+            out1 = m1(ctx)
+        self.assertEqual(len(cache.cache), 1)
+
+        # Second call: LoggingEvaluator + same cache evaluator
+        wrapped = combine_evaluators(LoggingEvaluator(), cache)
+        with FlowOptionsOverride(options={"evaluator": wrapped, "cacheable": True}):
+            out2 = m1(ctx)
+        # Should still be only 1 cache entry (same key, cache hit)
+        self.assertEqual(len(cache.cache), 1)
+        self.assertEqual(out1, out2)
+
+    def test_cache_key_differs_with_nontransparent_evaluator(self):
+        """Cache keys should differ when a non-transparent evaluator is in the chain."""
+
+        class OpaqueEvaluator(EvaluatorBase):
+            """A dummy evaluator that is NOT transparent."""
+
+            def __call__(self, context: ModelEvaluationContext):
+                return context()
+
+        m1 = MyDateCallable(offset=1)
+        cache = MemoryCacheEvaluator()
+        ctx = DateContext(date=date(2022, 1, 1))
+
+        # First call: cache evaluator only
+        with FlowOptionsOverride(options={"evaluator": cache, "cacheable": True}):
+            m1(ctx)
+        self.assertEqual(len(cache.cache), 1)
+
+        # Second call: OpaqueEvaluator + same cache evaluator
+        wrapped = combine_evaluators(OpaqueEvaluator(), cache)
+        with FlowOptionsOverride(options={"evaluator": wrapped, "cacheable": True}):
+            m1(ctx)
+        # OpaqueEvaluator is not transparent, so cache key should differ
+        self.assertEqual(len(cache.cache), 2)
+
+    def test_cache_key_differs_with_fallback_opaque_child(self):
+        """FallbackEvaluator with opaque child should produce different cache key."""
+
+        class OpaqueEvaluator(EvaluatorBase):
+            def __call__(self, context: ModelEvaluationContext):
+                return context()
+
+        m1 = MyDateCallable(offset=1)
+        cache = MemoryCacheEvaluator()
+        ctx = DateContext(date=date(2022, 1, 1))
+
+        # First call: cache evaluator only
+        with FlowOptionsOverride(options={"evaluator": cache, "cacheable": True}):
+            m1(ctx)
+        self.assertEqual(len(cache.cache), 1)
+
+        # Second call: FallbackEvaluator(OpaqueEvaluator) + cache
+        fallback = FallbackEvaluator(evaluators=[OpaqueEvaluator()])
+        wrapped = combine_evaluators(fallback, cache)
+        with FlowOptionsOverride(options={"evaluator": wrapped, "cacheable": True}):
+            m1(ctx)
+        # FallbackEvaluator is not transparent, so cache key should differ
+        self.assertEqual(len(cache.cache), 2)
+
 
 class TestGraphDeps(TestCase):
     def test_graph_deps_diamond(self):
diff --git a/docs/wiki/Workflows.md b/docs/wiki/Workflows.md
@@ -589,13 +589,21 @@ An evaluator is basically another form of callable model, with a few caveats
 The `ModelEvaluationContext` has fields for the model, the context, the function to evaluate (i.e. `__call__`), and the `FlowOptions`.
 It too, has a `__call__` method that will evaluate the function on the model with the provided context (but ignoring any options).
 
+Evaluators that do not modify the return value (e.g. logging, caching, timing) should override the `is_transparent` method to return `True`.
+This allows `cache_key()` to skip these layers when computing cache keys, so that wrapping a model with different transparent evaluators does not change its cache identity.
+Evaluators that transform the result should inherit from `EvaluatorBase` directly and leave `is_transparent` as the default (`False`).
+
 Below we illustrate how to write a really simple evaluator that just prints the options and delegates to the `ModelEvaluationContext` to get the normal result.
+Since it does not modify the return value, it overrides `is_transparent` to return `True`.
 
 ```python
 from ccflow import EvaluatorBase, ModelEvaluationContext, ResultType
 
 class MyEvaluator(EvaluatorBase):
 
+    def is_transparent(self, context: ModelEvaluationContext) -> bool:
+        return True
+
     def __call__(self, context: ModelEvaluationContext) -> ResultType:
         print("Custom evaluator with options:", context.options)
         return context()