Skip to content

Commit fa37bfb

Browse files
ptomecekCopilot
andcommitted
feat: support class tokenizer deps
Allow __ccflow_tokenizer_deps__ to include class objects in addition to function-like dependencies. Class deps now contribute their own compute_behavior_token() recursively to the parent token. Also add explicit cycle detection for recursive class dependency graphs so they fail fast with a clear TypeError instead of recursing forever. Add regression tests for class deps, cache_key integration, and recursive class dep errors. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6908f21 commit fa37bfb

2 files changed

Lines changed: 115 additions & 23 deletions

File tree

ccflow/tests/utils/test_behavior_hash.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Tests for tokenize helpers used by cache_key()."""
22

3+
import pytest
4+
35
from ccflow.callable import CallableModel, ContextBase, EvaluatorBase, ModelEvaluationContext
46
from ccflow.context import NullContext
57
from ccflow.evaluators.common import cache_key
@@ -294,6 +296,29 @@ def f(self):
294296

295297
assert compute_behavior_token(A) != compute_behavior_token(B)
296298

299+
def test_class_dep_included(self):
300+
class HelperA:
301+
def f(self):
302+
return 1
303+
304+
class HelperB:
305+
def f(self):
306+
return 2
307+
308+
class A:
309+
__ccflow_tokenizer_deps__ = [HelperA]
310+
311+
def f(self):
312+
return 1
313+
314+
class B:
315+
__ccflow_tokenizer_deps__ = [HelperB]
316+
317+
def f(self):
318+
return 1
319+
320+
assert compute_behavior_token(A) != compute_behavior_token(B)
321+
297322
def test_subclass_deps_extend_inherited_deps(self):
298323
def base_a():
299324
return 1
@@ -324,6 +349,21 @@ class SubB(BaseB):
324349

325350
assert compute_behavior_token(SubA) != compute_behavior_token(SubB)
326351

352+
def test_recursive_class_deps_raise(self):
353+
class A:
354+
def f(self):
355+
return 1
356+
357+
class B:
358+
def g(self):
359+
return 2
360+
361+
A.__ccflow_tokenizer_deps__ = [B]
362+
B.__ccflow_tokenizer_deps__ = [A]
363+
364+
with pytest.raises(TypeError, match="Recursive __ccflow_tokenizer_deps__ class dependency"):
365+
compute_behavior_token(A)
366+
327367

328368
# ---------------------------------------------------------------------------
329369
# Integration with cache_key()
@@ -412,6 +452,33 @@ def helper(self, x=2):
412452

413453
assert cache_key(A()) != cache_key(B())
414454

455+
def test_class_dep_changes_key(self):
456+
from ccflow import Flow
457+
458+
class HelperA:
459+
def f(self):
460+
return 1
461+
462+
class HelperB:
463+
def f(self):
464+
return 2
465+
466+
class A(CallableModel):
467+
__ccflow_tokenizer_deps__ = [HelperA]
468+
469+
@Flow.call
470+
def __call__(self, context: NullContext) -> GenericResult:
471+
return GenericResult(value=1)
472+
473+
class B(CallableModel):
474+
__ccflow_tokenizer_deps__ = [HelperB]
475+
476+
@Flow.call
477+
def __call__(self, context: NullContext) -> GenericResult:
478+
return GenericResult(value=1)
479+
480+
assert cache_key(A()) != cache_key(B())
481+
415482
def test_opaque_evaluator_behavior_changes_key(self):
416483
from ccflow import Flow
417484

ccflow/utils/tokenize.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -129,27 +129,37 @@ def _hash_function_bytecode(func: Callable) -> Optional[str]:
129129
return h.hexdigest()
130130

131131

132-
def _dependency_sort_key(func: Callable) -> Tuple[str, str, str]:
133-
"""Return a deterministic identity for dependency sorting/deduping."""
132+
def _dependency_info(dep: object, *, _visited: Tuple[type, ...]) -> Optional[Tuple[Tuple[str, str, str, str], str, str]]:
133+
"""Return deterministic identity, name, and token for a dependency entry."""
134+
135+
if isinstance(dep, type):
136+
module = getattr(dep, "__module__", "")
137+
qualname = getattr(dep, "__qualname__", getattr(dep, "__name__", repr(dep)))
138+
behavior = compute_behavior_token(dep, _visited=_visited)
139+
if behavior is None:
140+
return None
141+
return ("class", module, qualname, behavior), f"__dep_class__:{qualname}", behavior
134142

135-
unwrapped = _unwrap_function(func) or func
143+
unwrapped = _unwrap_function(dep)
144+
if unwrapped is None:
145+
return None
136146
module = getattr(unwrapped, "__module__", "")
137147
qualname = getattr(unwrapped, "__qualname__", getattr(unwrapped, "__name__", repr(unwrapped)))
138-
behavior = _hash_function_bytecode(unwrapped) or ""
139-
return module, qualname, behavior
148+
behavior = _hash_function_bytecode(unwrapped)
149+
if behavior is None:
150+
return None
151+
return ("callable", module, qualname, behavior), f"__dep__:{qualname}", behavior
140152

141153

142154
def _collect_methods(cls: type) -> List[Tuple[str, Callable]]:
143-
"""Collect callable methods from *cls* (walking MRO) plus ``__ccflow_tokenizer_deps__``.
155+
"""Collect callable methods from *cls* (walking MRO).
144156
145157
Methods are collected with MRO override semantics: for each method name,
146158
the first definition found in the MRO wins. This means a subclass's
147159
``__call__`` overrides the base class's even if the subclass doesn't
148160
redefine every method.
149161
150-
Own methods are sorted alphabetically. Dependencies are merged across the
151-
MRO, deduplicated, and sorted deterministically so that declaration order
152-
does not affect the hash.
162+
Own methods are sorted alphabetically.
153163
154164
Internal framework attributes (``__ccflow_*``) and pydantic/python
155165
boilerplate methods are skipped.
@@ -173,43 +183,53 @@ def _collect_methods(cls: type) -> List[Tuple[str, Callable]]:
173183

174184
methods.sort(key=lambda pair: pair[0])
175185

176-
# Collect __ccflow_tokenizer_deps__ from the full MRO. Subclasses may add
177-
# deps without losing inherited ones.
186+
return methods
187+
188+
189+
def _collect_dependency_hashes(cls: type, *, _visited: Tuple[type, ...]) -> List[Tuple[str, str]]:
190+
"""Collect hashed ``__ccflow_tokenizer_deps__`` entries from the full MRO.
191+
192+
Dependencies are merged across the MRO, deduplicated, and sorted
193+
deterministically so that declaration order does not affect the hash.
194+
195+
Dependency entries may be either:
196+
- function-like objects hashable via ``_hash_function_bytecode()``
197+
- classes, in which case ``compute_behavior_token(dep_class)`` is included
198+
"""
178199
deps = []
179200
seen_dep_keys = set()
180201
for klass in cls.__mro__:
181202
extra_deps = klass.__dict__.get("__ccflow_tokenizer_deps__")
182203
if extra_deps is None:
183204
continue
184-
for func in extra_deps:
185-
unwrapped = _unwrap_function(func) or func
186-
if not callable(unwrapped):
205+
for dep in extra_deps:
206+
dep_info = _dependency_info(dep, _visited=_visited)
207+
if dep_info is None:
187208
continue
188-
dep_key = _dependency_sort_key(func)
209+
dep_key, dep_name, dep_token = dep_info
189210
if dep_key in seen_dep_keys:
190211
continue
191212
seen_dep_keys.add(dep_key)
192-
deps.append((dep_key, func))
213+
deps.append((dep_key, dep_name, dep_token))
193214

194-
deps.sort(key=lambda pair: pair[0])
195-
methods.extend((f"__dep__:{dep_key[1]}", func) for dep_key, func in deps)
196-
197-
return methods
215+
deps.sort(key=lambda item: item[0])
216+
return [(dep_name, dep_token) for _, dep_name, dep_token in deps]
198217

199218

200-
def compute_behavior_token(cls: type) -> Optional[str]:
219+
def compute_behavior_token(cls: type, *, _visited: Tuple[type, ...] = ()) -> Optional[str]:
201220
"""Compute a SHA-256 behavior token for *cls* based on its method bytecode.
202221
203222
The token captures behavior-relevant state for every method in *cls*'s MRO
204223
(with standard override semantics): bytecode, constants (minus docstrings),
205224
defaults, keyword-only defaults, and closure cell contents. It also
206-
includes any standalone functions listed in ``cls.__ccflow_tokenizer_deps__``.
225+
includes any functions or classes listed in ``cls.__ccflow_tokenizer_deps__``.
207226
208227
Decorator chains (e.g. ``@Flow.call``) are automatically unwrapped so
209228
that the hash reflects the user's implementation, not the wrapper.
210229
211230
``__ccflow_tokenizer_deps__`` values are merged across the full MRO, so
212-
subclasses can add dependencies without dropping inherited ones.
231+
subclasses can add dependencies without dropping inherited ones. Class
232+
entries contribute their own ``compute_behavior_token()`` recursively.
213233
214234
Results are cached on the class in ``cls.__behavior_token_cache__``.
215235
The cache is stored directly on the class (not inherited), so subclass
@@ -223,13 +243,18 @@ def compute_behavior_token(cls: type) -> Optional[str]:
223243
has been computed will **not** invalidate the cached token. Redefining
224244
the class (e.g. in Jupyter) creates a new class object and works fine.
225245
"""
246+
if cls in _visited:
247+
raise TypeError(f"Recursive __ccflow_tokenizer_deps__ class dependency detected for {cls.__module__}.{cls.__qualname__}")
248+
226249
# Check cache on cls itself (not inherited)
227250
cache = cls.__dict__.get("__behavior_token_cache__")
228251
if cache is not None:
229252
return cache
230253

254+
visited = _visited + (cls,)
231255
methods = _collect_methods(cls)
232256
method_hashes = [(name, h) for name, func in methods if (h := _hash_function_bytecode(func)) is not None]
257+
method_hashes.extend(_collect_dependency_hashes(cls, _visited=visited))
233258

234259
if not method_hashes:
235260
return None

0 commit comments

Comments
 (0)