|
15 | 15 | import functools |
16 | 16 | import contextlib |
17 | 17 | import threading |
18 | | -from typing import Dict, Optional, Any |
19 | 18 |
|
20 | 19 |
|
21 | 20 | def cached_property(fx): |
@@ -48,46 +47,88 @@ def inner(self): |
48 | 47 | return property(inner) |
49 | 48 |
|
50 | 49 |
|
51 | | -# Thread-local storage for the simple cache dictionary |
| 50 | +# Thread-local storage for the simple cache dictionary. |
| 51 | +# This ensures that parallel generation tasks (if any) do not corrupt each other's cache. |
52 | 52 | _thread_local = threading.local() |
53 | 53 |
|
54 | 54 |
|
55 | 55 | @contextlib.contextmanager |
56 | 56 | def generation_cache_context(): |
57 | | - """Context manager to explicitly manage the cache lifecycle.""" |
58 | | - # Initialize the cache as a standard dictionary |
| 57 | + """Context manager to explicitly manage the lifecycle of the generation cache. |
| 58 | +
|
| 59 | + This manager initializes a fresh dictionary in thread-local storage when entering |
| 60 | + the context and strictly deletes it when exiting. |
| 61 | +
|
| 62 | + **Memory Management:** |
| 63 | + The cache stores strong references to Proto objects to "pin" them in memory |
| 64 | + (see `cached_proto_context`). It is critical that this context manager deletes |
| 65 | + the dictionary in the `finally` block. Deleting the dictionary breaks the |
| 66 | + reference chain, allowing Python's Garbage Collector to finally free all the |
| 67 | + large Proto objects that were pinned during generation. |
| 68 | + """ |
| 69 | + # Initialize the cache as a standard dictionary. |
59 | 70 | _thread_local.cache = {} |
60 | 71 | try: |
61 | 72 | yield |
62 | 73 | finally: |
63 | | - # Delete the dictionary to free all memory and pinned objects |
| 74 | + # Delete the dictionary to free all memory and pinned objects. |
| 75 | + # This is essential to prevent memory leaks in long-running processes. |
64 | 76 | del _thread_local.cache |
65 | 77 |
|
66 | 78 |
|
67 | 79 | def cached_proto_context(func): |
68 | | - """Decorator to memoize with_context calls based on self and collisions.""" |
| 80 | + """Decorator to memoize `with_context` calls based on object identity and collisions. |
| 81 | +
|
| 82 | + This mechanism provides a significant performance boost by preventing |
| 83 | + redundant recalculations of naming collisions during template rendering. |
| 84 | +
|
| 85 | + Since the Proto wrapper objects are unhashable (mutable), we use `id(self)` as |
| 86 | + the primary cache key. Normally, this is dangerous: if the object is garbage |
| 87 | + collected, Python might reuse its memory address for a *new* object, leading to |
| 88 | + a cache collision (the "Zombie ID" bug). |
| 89 | +
|
| 90 | + To prevent this, this decorator stores the value as a tuple: `(result, self)`. |
| 91 | + By keeping a reference to `self` in the cache value, we "pin" the object in |
| 92 | + memory. This forces the Garbage Collector to keep the object alive, guaranteeing |
| 93 | + that `id(self)` remains unique for the entire lifespan of the `generation_cache_context`. |
| 94 | +
|
| 95 | + Args: |
| 96 | + func (Callable): The function to decorate (usually `with_context`). |
| 97 | +
|
| 98 | + Returns: |
| 99 | + Callable: The wrapped function with caching and pinning logic. |
| 100 | + """ |
69 | 101 |
|
70 | 102 | @functools.wraps(func) |
71 | 103 | def wrapper(self, *, collisions, **kwargs): |
72 | | - # 1. Initialize cache if not provided (handles the root call case) |
73 | 104 |
|
| 105 | + # 1. Check for active cache (returns None if context is not active) |
74 | 106 | context_cache = getattr(_thread_local, "cache", None) |
| 107 | + |
| 108 | + # If we are not inside a generation_cache_context (e.g. unit tests), |
| 109 | + # bypass the cache entirely. |
75 | 110 | if context_cache is None: |
76 | 111 | return func(self, collisions=collisions, **kwargs) |
77 | 112 |
|
78 | 113 | # 2. Create the cache key |
| 114 | + # We use frozenset for collisions to make it hashable. |
| 115 | + # We use id(self) because 'self' is not hashable. |
79 | 116 | collisions_key = frozenset(collisions) if collisions else None |
80 | 117 | key = (id(self), collisions_key) |
81 | 118 |
|
82 | 119 | # 3. Check Cache |
83 | 120 | if key in context_cache: |
| 121 | + # The cache stores (result, pinned_object). We return just the result. |
84 | 122 | return context_cache[key][0] |
85 | 123 |
|
86 | 124 | # 4. Execute the actual function |
87 | 125 | # We ensure context_cache is passed down to the recursive calls |
88 | 126 | result = func(self, collisions=collisions, **kwargs) |
89 | 127 |
|
90 | | - # 5. Update Cache |
| 128 | + # 5. Update Cache & Pin Object |
| 129 | + # We store (result, self). The reference to 'self' prevents garbage collection, |
| 130 | + # ensuring that 'id(self)' cannot be reused for a new object while this |
| 131 | + # cache entry exists. |
91 | 132 | context_cache[key] = (result, self) |
92 | 133 | return result |
93 | 134 |
|
|
0 commit comments