Skip to content

Commit d1e6924

Browse files
ptomecekCopilot
andcommitted
feat: extend tokenizer cache APIs
Add compute_cache_token() alongside compute_data_token() and compute_behavior_token(), refactor cache_key() to delegate to it, and rename the cached class attribute to __ccflow_tokenizer_cache__ so it matches __ccflow_tokenizer_deps__. This commit also keeps class support in __ccflow_tokenizer_deps__, including recursive class-dependency detection, and adds regression coverage for combined cache tokens and cache-key integration. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Pascal Tomecek <pascal.tomecek@cubistsystematic.com>
1 parent f3b21d1 commit d1e6924

4 files changed

Lines changed: 191 additions & 63 deletions

File tree

ccflow/evaluators/common.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,6 @@ def combine_evaluators(first: Optional[EvaluatorBase], second: Optional[Evaluato
5858
return MultiEvaluator(evaluators=[first, second])
5959

6060

61-
def _model_tokens(model: BaseModel, *, include_data: bool = True) -> List[str]:
62-
from ..utils.tokenize import compute_behavior_token, compute_data_token
63-
64-
tokens = [compute_data_token(model.model_dump(mode="python"))] if include_data else []
65-
behavior = compute_behavior_token(type(model))
66-
if behavior is not None:
67-
tokens.append(behavior)
68-
return tokens
69-
70-
71-
def _combine_tokens(tokens: List[str]) -> bytes:
72-
from ..utils.tokenize import compute_data_token
73-
74-
return compute_data_token(tuple(tokens)).encode("utf-8")
75-
76-
7761
def _flatten_cache_key_context(flow_obj: ModelEvaluationContext) -> tuple[ModelEvaluationContext, str, List[EvaluatorBase]]:
7862
fn = flow_obj.fn
7963
non_transparent: List[EvaluatorBase] = []
@@ -256,17 +240,22 @@ def cache_key(flow_obj: Union[ModelEvaluationContext, ContextBase, CallableModel
256240
Args:
257241
flow_obj: The object to be tokenized to form the cache key.
258242
"""
259-
from ..utils.tokenize import compute_data_token
243+
from ..utils.tokenize import compute_cache_token
260244

261245
if isinstance(flow_obj, ModelEvaluationContext):
262246
flow_obj, fn, non_transparent = _flatten_cache_key_context(flow_obj)
263-
tokens = [compute_data_token({**flow_obj.model_dump(mode="python"), "fn": fn})]
264-
tokens.extend(_model_tokens(flow_obj.model, include_data=False))
265-
for evaluator in non_transparent:
266-
tokens.extend(_model_tokens(evaluator))
267-
return _combine_tokens(tokens)
247+
return compute_cache_token(
248+
data_values=[
249+
{**flow_obj.model_dump(mode="python"), "fn": fn},
250+
*(evaluator.model_dump(mode="python") for evaluator in non_transparent),
251+
],
252+
behavior_classes=[type(flow_obj.model), *(type(evaluator) for evaluator in non_transparent)],
253+
).encode("utf-8")
268254
elif isinstance(flow_obj, (ContextBase, CallableModel)):
269-
return _combine_tokens(_model_tokens(flow_obj))
255+
return compute_cache_token(
256+
data_values=[flow_obj.model_dump(mode="python")],
257+
behavior_classes=[type(flow_obj)],
258+
).encode("utf-8")
270259
else:
271260
raise TypeError(f"object of type {type(flow_obj)} cannot be serialized by this function!")
272261

ccflow/tests/utils/test_behavior_hash.py

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Tests for tokenize helpers used by cache_key()."""
22

3+
import pytest
4+
35
from ccflow.callable import CallableModel, ContextBase, EvaluatorBase, ModelEvaluationContext
46
from ccflow.context import NullContext
57
from ccflow.evaluators.common import cache_key
68
from ccflow.result import GenericResult
7-
from ccflow.utils.tokenize import compute_behavior_token, compute_data_token
9+
from ccflow.utils.tokenize import compute_behavior_token, compute_cache_token, compute_data_token
810

911
# ---------------------------------------------------------------------------
1012
# Data token
@@ -21,6 +23,39 @@ def test_different_values_different_tokens(self):
2123
assert compute_data_token({"x": 1}) != compute_data_token({"x": 2})
2224

2325

26+
class TestComputeCacheToken:
27+
def test_deterministic(self):
28+
class Helper:
29+
def f(self):
30+
return 1
31+
32+
token1 = compute_cache_token(data_values=[{"x": 1}], behavior_classes=[Helper])
33+
token2 = compute_cache_token(data_values=[{"x": 1}], behavior_classes=[Helper])
34+
assert token1 == token2
35+
36+
def test_data_changes_token(self):
37+
class Helper:
38+
def f(self):
39+
return 1
40+
41+
token1 = compute_cache_token(data_values=[{"x": 1}], behavior_classes=[Helper])
42+
token2 = compute_cache_token(data_values=[{"x": 2}], behavior_classes=[Helper])
43+
assert token1 != token2
44+
45+
def test_behavior_changes_token(self):
46+
class HelperA:
47+
def f(self):
48+
return 1
49+
50+
class HelperB:
51+
def f(self):
52+
return 2
53+
54+
token1 = compute_cache_token(data_values=[{"x": 1}], behavior_classes=[HelperA])
55+
token2 = compute_cache_token(data_values=[{"x": 1}], behavior_classes=[HelperB])
56+
assert token1 != token2
57+
58+
2459
# ---------------------------------------------------------------------------
2560
# Basic behavior
2661
# ---------------------------------------------------------------------------
@@ -77,7 +112,7 @@ def f(self):
77112
return 1
78113

79114
token = compute_behavior_token(M)
80-
assert M.__behavior_token_cache__ == token
115+
assert M.__ccflow_tokenizer_cache__ == token
81116
# Second call returns cached value
82117
assert compute_behavior_token(M) is token
83118

@@ -294,6 +329,29 @@ def f(self):
294329

295330
assert compute_behavior_token(A) != compute_behavior_token(B)
296331

332+
def test_class_dep_included(self):
333+
class HelperA:
334+
def f(self):
335+
return 1
336+
337+
class HelperB:
338+
def f(self):
339+
return 2
340+
341+
class A:
342+
__ccflow_tokenizer_deps__ = [HelperA]
343+
344+
def f(self):
345+
return 1
346+
347+
class B:
348+
__ccflow_tokenizer_deps__ = [HelperB]
349+
350+
def f(self):
351+
return 1
352+
353+
assert compute_behavior_token(A) != compute_behavior_token(B)
354+
297355
def test_subclass_deps_extend_inherited_deps(self):
298356
def base_a():
299357
return 1
@@ -324,6 +382,21 @@ class SubB(BaseB):
324382

325383
assert compute_behavior_token(SubA) != compute_behavior_token(SubB)
326384

385+
def test_recursive_class_deps_raise(self):
386+
class A:
387+
def f(self):
388+
return 1
389+
390+
class B:
391+
def g(self):
392+
return 2
393+
394+
A.__ccflow_tokenizer_deps__ = [B]
395+
B.__ccflow_tokenizer_deps__ = [A]
396+
397+
with pytest.raises(TypeError, match="Recursive __ccflow_tokenizer_deps__ class dependency"):
398+
compute_behavior_token(A)
399+
327400

328401
# ---------------------------------------------------------------------------
329402
# Integration with cache_key()
@@ -412,6 +485,33 @@ def helper(self, x=2):
412485

413486
assert cache_key(A()) != cache_key(B())
414487

488+
def test_class_dep_changes_key(self):
489+
from ccflow import Flow
490+
491+
class HelperA:
492+
def f(self):
493+
return 1
494+
495+
class HelperB:
496+
def f(self):
497+
return 2
498+
499+
class A(CallableModel):
500+
__ccflow_tokenizer_deps__ = [HelperA]
501+
502+
@Flow.call
503+
def __call__(self, context: NullContext) -> GenericResult:
504+
return GenericResult(value=1)
505+
506+
class B(CallableModel):
507+
__ccflow_tokenizer_deps__ = [HelperB]
508+
509+
@Flow.call
510+
def __call__(self, context: NullContext) -> GenericResult:
511+
return GenericResult(value=1)
512+
513+
assert cache_key(A()) != cache_key(B())
514+
415515
def test_opaque_evaluator_behavior_changes_key(self):
416516
from ccflow import Flow
417517

@@ -512,7 +612,7 @@ def __call__(self):
512612
assert compute_behavior_token(Sub) != compute_behavior_token(Base)
513613

514614
def test_subclass_cache_independent(self):
515-
"""Parent and subclass don't share __behavior_token_cache__."""
615+
"""Parent and subclass don't share __ccflow_tokenizer_cache__."""
516616

517617
class Base:
518618
def f(self):

ccflow/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .chunker import *
22
from .core import *
33
from .logging import *
4-
from .tokenize import compute_behavior_token, compute_data_token, normalize_token, tokenize
4+
from .tokenize import compute_behavior_token, compute_cache_token, compute_data_token, normalize_token, tokenize

0 commit comments

Comments
 (0)