Point72
diff --git a/‎.github/workflows/build.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ccflow/base.py‎
Lines changed: 48 additions & 0 deletions b/‎ccflow/base.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎ccflow/callable.py‎
Lines changed: 34 additions & 0 deletions b/‎ccflow/callable.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎ccflow/evaluators/common.py‎
Lines changed: 2 additions & 17 deletions b/‎ccflow/evaluators/common.py‎
Lines changed: 2 additions & 17 deletions
diff --git a/‎ccflow/tests/test_base_serialize.py‎
Lines changed: 3 additions & 2 deletions b/‎ccflow/tests/test_base_serialize.py‎
Lines changed: 3 additions & 2 deletions
@@ -35,7 +35,7 @@ jobs:
           - '3.11'
         dependencies:
           - ''
-          - '"pandas<2" "numpy<2" "xarray<2025.09.0" "dask<2024.7.0"'
+          - '"pandas<2" "numpy<2" "xarray<2025.09.0"'
           - '"pandas<3"'
           - '"pandas<4"'
 
 
@@ -31,6 +31,7 @@
 
 from .exttypes.pyobjectpath import PyObjectPath
 from .local_persistence import register_ccflow_import_path, sync_to_module
+from .utils.tokenize import DefaultTokenizer, Tokenizer, normalize_token
 
 log = logging.getLogger(__name__)
 
@@ -195,6 +196,41 @@ def type_(self) -> PyObjectPath:
     # We want to track under what names a model has been registered
     _registrations: List[Tuple["ModelRegistry", str]] = PrivateAttr(default_factory=list)
 
+    # Tokenization support
+    __ccflow_tokenizer__: ClassVar[Tokenizer] = DefaultTokenizer.with_bytecode()
+    _model_token: Optional[str] = PrivateAttr(default=None)
+
+    @property
+    def model_token(self) -> str:
+        """Return a deterministic content hash of this model.
+
+        The token is cached by default (controlled by ``cache_token`` in model_config).
+        For frozen models, the token is computed once and never recomputed.
+        For mutable models, the cache is cleared on field assignment (via ``validate_assignment``).
+        Set ``cache_token=False`` in model_config to always compute fresh.
+        """
+        cache = self.model_config.get("cache_token", True)
+        if cache and self._model_token is not None:
+            return self._model_token
+        token = self.__ccflow_tokenizer__.tokenize(self)
+        if cache:
+            self.__pydantic_private__["_model_token"] = token
+        return token
+
+    @model_validator(mode="after")
+    def _clear_token_cache(self):
+        """Clear the cached token on construction and field assignment."""
+        if self.model_config.get("cache_token", True):
+            self.__pydantic_private__["_model_token"] = None
+        return self
+
+    def model_copy(self, *, update=None, deep=False):
+        """Override model_copy to clear the stale token cache on the copy."""
+        copy = super().model_copy(update=update, deep=deep)
+        if update and copy.__pydantic_private__ is not None:
+            copy.__pydantic_private__["_model_token"] = None
+        return copy
+
     model_config = ConfigDict(
         # Note that validate_assignment only partially works: https://github.com/pydantic/pydantic/issues/7105
         validate_assignment=True,
@@ -316,6 +352,18 @@ def __getstate__(self):
     def __setstate__(self, state):
         state["__pydantic_fields_set__"] = set(state["__pydantic_fields_set__"])
         super().__setstate__(state)
+        # Clear stale token cache from pickle
+        if self.__pydantic_private__ is not None and "_model_token" in self.__pydantic_private__:
+            self.__pydantic_private__["_model_token"] = None
+
+
+# Register ccflow BaseModel-specific normalize_token handler
+# Delegates to the model's tokenizer so normalization is consistent
+# regardless of whether the model is accessed via model_token or
+# encountered as a value inside a container.
+@normalize_token.register(BaseModel)
+def _normalize_ccflow_basemodel(obj):
+    return obj.__ccflow_tokenizer__.normalize(obj)
 
 
 class _ModelRegistryData(PydanticBaseModel):
 
@@ -451,6 +451,40 @@ class ModelEvaluationContext(
     # Otherwise, the validation will re-run fully despite the models already being validated on construction
     # TODO: Make the instance check compatible with the generic types instead of the base type
 
+    @property
+    def model_token(self) -> str:
+        """Compute a cache-key token for this MEC chain.
+
+        Walks the MEC chain, strips ``TransparentModelEvaluationContext``
+        layers, and tokenizes the innermost context plus any opaque evaluators.
+        """
+        cache = self.model_config.get("cache_token", True)
+        if cache and self._model_token is not None:
+            return self._model_token
+
+        fn = self.fn
+        non_transparent = []
+        current = self
+        while isinstance(current.context, ModelEvaluationContext):
+            fn = current.fn if current.fn != "__call__" else fn
+            if not isinstance(current, TransparentModelEvaluationContext):
+                non_transparent.append(current.model)
+            current = current.context
+
+        # Build a canonical representation from the innermost MEC
+        from .utils.tokenize import normalize_token
+
+        inner_norm = normalize_token(current)
+        effective_fn = fn if fn != "__call__" else current.fn
+        parts = (inner_norm, effective_fn)
+        if non_transparent:
+            parts = parts + (tuple(normalize_token(e) for e in non_transparent),)
+        token = self.__ccflow_tokenizer__.hash_canonical(parts)
+
+        if cache:
+            self.__pydantic_private__["_model_token"] = token
+        return token
+
     @model_validator(mode="wrap")
     def _context_validator(cls, values, handler, info):
         """Override _context_validator from parent"""
 
@@ -7,7 +7,6 @@
 from types import MappingProxyType
 from typing import Any, Callable, Dict, List, Optional, Set, Union
 
-import dask.base
 from pydantic import Field, PrivateAttr, field_validator
 from typing_extensions import override
 
@@ -18,7 +17,6 @@
     EvaluatorBase,
     ModelEvaluationContext,
     ResultType,
-    TransparentModelEvaluationContext,
 )
 
 __all__ = [
@@ -227,21 +225,8 @@ def cache_key(flow_obj: Union[ModelEvaluationContext, ContextBase, CallableModel
     Args:
         flow_obj: The object to be tokenized to form the cache key.
     """
-    if isinstance(flow_obj, ModelEvaluationContext):
-        fn = flow_obj.fn
-        non_transparent = []
-        while isinstance(flow_obj.context, ModelEvaluationContext):
-            fn = flow_obj.fn if flow_obj.fn != "__call__" else fn
-            if not isinstance(flow_obj, TransparentModelEvaluationContext):
-                non_transparent.append(flow_obj.model)
-            flow_obj = flow_obj.context
-        d = flow_obj.model_dump(mode="python")
-        d["fn"] = fn if fn != "__call__" else flow_obj.fn
-        if non_transparent:
-            d["_evaluators"] = [e.model_dump(mode="python") for e in non_transparent]
-        return dask.base.tokenize(d).encode("utf-8")
-    elif isinstance(flow_obj, (ContextBase, CallableModel)):
-        return dask.base.tokenize(flow_obj.model_dump(mode="python")).encode("utf-8")
+    if isinstance(flow_obj, (ModelEvaluationContext, ContextBase, CallableModel)):
+        return flow_obj.model_token.encode("utf-8")
     else:
         raise TypeError(f"object of type {type(flow_obj)} cannot be serialized by this function!")
 
 
@@ -259,12 +259,13 @@ def test_pickle_consistency(self):
         # (as it would normally in pydantic because of https://github.com/pydantic/pydantic/issues/11603)
         # This is generated on Linux/Python 3.11 - might need to have version specific values if it changes.
         target = (
-            b"\x80\x04\x95\xdf\x00\x00\x00\x00\x00\x00\x00\x8c ccflow.tests.test_base_seri"
+            b"\x80\x04\x95\xf0\x00\x00\x00\x00\x00\x00\x00\x8c ccflow.tests.test_base_seri"
             b"alize\x94\x8c\x13MultiAttributeModel\x94\x93\x94)\x81\x94}\x94(\x8c\x08__"
             b"dict__\x94}\x94(\x8c\x01z\x94K\x01\x8c\x01y\x94\x8c\x04test\x94\x8c"
             b"\x01x\x94G@\t\x1e\xb8Q\xeb\x85\x1f\x8c\x01w\x94\x88u\x8c\x12__pydantic_extra"
             b"__\x94N\x8c\x17__pydantic_fields_set__\x94]\x94(h\x0bh\nh\x08h\x07e\x8c\x14"
-            b"__pydantic_private__\x94}\x94\x8c\x0e_registrations\x94]\x94sub."
+            b"__pydantic_private__\x94}\x94(\x8c\x0e_registrations\x94]\x94\x8c\x0c_model_"
+            b"token\x94Nuub."
         )
         self.assertEqual(serialized, target)
         deserialized = pickle.loads(serialized)