Skip to content

Commit 6908f21

Browse files
ptomecekCopilot
andcommitted
fix: cover defaults closures and inherited deps
Update behavior hashing so function defaults, keyword-only defaults, and closure cell contents contribute to compute_behavior_token(). This closes a cache-key correctness gap where semantic changes could leave behavior tokens unchanged. Also merge __ccflow_tokenizer_deps__ across the full MRO instead of first-definition-wins, with deterministic deduping so subclasses can add deps without dropping inherited ones. Add regression tests for defaults, kwdefaults, closures, inherited deps, and a cache_key integration check for helper default changes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent e005023 commit 6908f21

2 files changed

Lines changed: 150 additions & 20 deletions

File tree

ccflow/tests/utils/test_behavior_hash.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,38 @@ def f(self):
120120

121121
assert compute_behavior_token(A) != compute_behavior_token(B)
122122

123+
def test_defaults_matter(self):
124+
class A:
125+
def f(self, x=1):
126+
return x
127+
128+
class B:
129+
def f(self, x=2):
130+
return x
131+
132+
assert compute_behavior_token(A) != compute_behavior_token(B)
133+
134+
def test_kwdefaults_matter(self):
135+
class A:
136+
def f(self, *, x=1):
137+
return x
138+
139+
class B:
140+
def f(self, *, x=2):
141+
return x
142+
143+
assert compute_behavior_token(A) != compute_behavior_token(B)
144+
145+
def test_closure_values_matter(self):
146+
def make_model(value):
147+
class M:
148+
def f(self):
149+
return value
150+
151+
return M
152+
153+
assert compute_behavior_token(make_model(1)) != compute_behavior_token(make_model(2))
154+
123155

124156
# ---------------------------------------------------------------------------
125157
# Method collection
@@ -262,6 +294,36 @@ def f(self):
262294

263295
assert compute_behavior_token(A) != compute_behavior_token(B)
264296

297+
def test_subclass_deps_extend_inherited_deps(self):
298+
def base_a():
299+
return 1
300+
301+
def base_b():
302+
return 2
303+
304+
def sub_dep():
305+
return 3
306+
307+
class BaseA:
308+
__ccflow_tokenizer_deps__ = [base_a]
309+
310+
def f(self):
311+
return 1
312+
313+
class BaseB:
314+
__ccflow_tokenizer_deps__ = [base_b]
315+
316+
def f(self):
317+
return 1
318+
319+
class SubA(BaseA):
320+
__ccflow_tokenizer_deps__ = [sub_dep]
321+
322+
class SubB(BaseB):
323+
__ccflow_tokenizer_deps__ = [sub_dep]
324+
325+
assert compute_behavior_token(SubA) != compute_behavior_token(SubB)
326+
265327

266328
# ---------------------------------------------------------------------------
267329
# Integration with cache_key()
@@ -329,6 +391,27 @@ class MyContext(ContextBase):
329391
key = cache_key(MyContext(value=1))
330392
assert isinstance(key, bytes)
331393

394+
def test_helper_default_arg_changes_key(self):
395+
from ccflow import Flow
396+
397+
class A(CallableModel):
398+
@Flow.call
399+
def __call__(self, context: NullContext) -> GenericResult:
400+
return GenericResult(value=self.helper())
401+
402+
def helper(self, x=1):
403+
return x
404+
405+
class B(CallableModel):
406+
@Flow.call
407+
def __call__(self, context: NullContext) -> GenericResult:
408+
return GenericResult(value=self.helper())
409+
410+
def helper(self, x=2):
411+
return x
412+
413+
assert cache_key(A()) != cache_key(B())
414+
332415
def test_opaque_evaluator_behavior_changes_key(self):
333416
from ccflow import Flow
334417

ccflow/utils/tokenize.py

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,40 @@ def _unwrap_function(func: object) -> Optional[Callable]:
7979
return func
8080

8181

82+
def _function_state(func: Callable) -> Tuple[Any, Any, Tuple[Tuple[str, bool, Any], ...] | None]:
83+
"""Return defaults/kwdefaults/closure state that affects runtime behavior."""
84+
85+
kwdefaults = getattr(func, "__kwdefaults__", None)
86+
if kwdefaults is not None:
87+
kwdefaults = tuple(sorted(kwdefaults.items()))
88+
89+
closure = getattr(func, "__closure__", None)
90+
closure_state = None
91+
if closure:
92+
closure_state = []
93+
for name, cell in zip(func.__code__.co_freevars, closure):
94+
try:
95+
closure_state.append((name, True, cell.cell_contents))
96+
except ValueError:
97+
closure_state.append((name, False, None))
98+
closure_state = tuple(closure_state)
99+
100+
return getattr(func, "__defaults__", None), kwdefaults, closure_state
101+
102+
82103
def _hash_function_bytecode(func: Callable) -> Optional[str]:
83-
"""Return a SHA-256 hex digest of a function's bytecode and constants.
104+
"""Return a SHA-256 hex digest of a function's behavior-relevant state.
84105
85106
The function is first unwrapped through any decorator chains, so that
86107
e.g. ``@Flow.call`` wrappers do not mask the real implementation.
87108
109+
In addition to ``co_code`` and ``co_consts``, this includes:
110+
- positional defaults (``__defaults__``)
111+
- keyword-only defaults (``__kwdefaults__``)
112+
- closure cell contents
113+
so that behavior changes that do not affect bytecode alone still change
114+
the token.
115+
88116
Returns ``None`` for objects without ``__code__`` (C builtins, etc.).
89117
"""
90118
unwrapped = _unwrap_function(func)
@@ -97,9 +125,20 @@ def _hash_function_bytecode(func: Callable) -> Optional[str]:
97125
if consts and isinstance(consts[0], str):
98126
consts = consts[1:]
99127
h.update(repr(consts).encode("utf-8"))
128+
h.update(compute_data_token(_function_state(unwrapped)).encode("utf-8"))
100129
return h.hexdigest()
101130

102131

132+
def _dependency_sort_key(func: Callable) -> Tuple[str, str, str]:
133+
"""Return a deterministic identity for dependency sorting/deduping."""
134+
135+
unwrapped = _unwrap_function(func) or func
136+
module = getattr(unwrapped, "__module__", "")
137+
qualname = getattr(unwrapped, "__qualname__", getattr(unwrapped, "__name__", repr(unwrapped)))
138+
behavior = _hash_function_bytecode(unwrapped) or ""
139+
return module, qualname, behavior
140+
141+
103142
def _collect_methods(cls: type) -> List[Tuple[str, Callable]]:
104143
"""Collect callable methods from *cls* (walking MRO) plus ``__ccflow_tokenizer_deps__``.
105144
@@ -108,8 +147,9 @@ def _collect_methods(cls: type) -> List[Tuple[str, Callable]]:
108147
``__call__`` overrides the base class's even if the subclass doesn't
109148
redefine every method.
110149
111-
Own methods are sorted alphabetically. Dependencies are sorted by
112-
qualified name so that declaration order does not affect the hash.
150+
Own methods are sorted alphabetically. Dependencies are merged across the
151+
MRO, deduplicated, and sorted deterministically so that declaration order
152+
does not affect the hash.
113153
114154
Internal framework attributes (``__ccflow_*``) and pydantic/python
115155
boilerplate methods are skipped.
@@ -133,37 +173,44 @@ def _collect_methods(cls: type) -> List[Tuple[str, Callable]]:
133173

134174
methods.sort(key=lambda pair: pair[0])
135175

136-
# Collect __ccflow_tokenizer_deps__ (also walk MRO, first definition wins)
137-
extra_deps = None
176+
# Collect __ccflow_tokenizer_deps__ from the full MRO. Subclasses may add
177+
# deps without losing inherited ones.
178+
deps = []
179+
seen_dep_keys = set()
138180
for klass in cls.__mro__:
139-
if "__ccflow_tokenizer_deps__" in klass.__dict__:
140-
extra_deps = klass.__dict__["__ccflow_tokenizer_deps__"]
141-
break
142-
143-
if extra_deps is not None:
144-
deps = []
181+
extra_deps = klass.__dict__.get("__ccflow_tokenizer_deps__")
182+
if extra_deps is None:
183+
continue
145184
for func in extra_deps:
146185
unwrapped = _unwrap_function(func) or func
147-
if callable(unwrapped):
148-
func_id = getattr(unwrapped, "__qualname__", getattr(unwrapped, "__name__", repr(unwrapped)))
149-
deps.append((f"__dep__:{func_id}", func))
150-
deps.sort(key=lambda pair: pair[0])
151-
methods.extend(deps)
186+
if not callable(unwrapped):
187+
continue
188+
dep_key = _dependency_sort_key(func)
189+
if dep_key in seen_dep_keys:
190+
continue
191+
seen_dep_keys.add(dep_key)
192+
deps.append((dep_key, func))
193+
194+
deps.sort(key=lambda pair: pair[0])
195+
methods.extend((f"__dep__:{dep_key[1]}", func) for dep_key, func in deps)
152196

153197
return methods
154198

155199

156200
def compute_behavior_token(cls: type) -> Optional[str]:
157201
"""Compute a SHA-256 behavior token for *cls* based on its method bytecode.
158202
159-
The token captures bytecode (``co_code``) and constants (``co_consts``,
160-
minus docstrings) of every method in *cls*'s MRO (with standard override
161-
semantics), plus any standalone functions listed in
162-
``cls.__ccflow_tokenizer_deps__``.
203+
The token captures behavior-relevant state for every method in *cls*'s MRO
204+
(with standard override semantics): bytecode, constants (minus docstrings),
205+
defaults, keyword-only defaults, and closure cell contents. It also
206+
includes any standalone functions listed in ``cls.__ccflow_tokenizer_deps__``.
163207
164208
Decorator chains (e.g. ``@Flow.call``) are automatically unwrapped so
165209
that the hash reflects the user's implementation, not the wrapper.
166210
211+
``__ccflow_tokenizer_deps__`` values are merged across the full MRO, so
212+
subclasses can add dependencies without dropping inherited ones.
213+
167214
Results are cached on the class in ``cls.__behavior_token_cache__``.
168215
The cache is stored directly on the class (not inherited), so subclass
169216
tokens are independent of parent tokens.

0 commit comments

Comments
 (0)