add documentation

ohmayr · ohmayr · commit eebf51b581bb · 2026-01-27T18:50:31.000Z
diff --git a/gapic/cli/generate.py b/gapic/cli/generate.py
@@ -57,6 +57,13 @@ def generate(request: typing.BinaryIO, output: typing.BinaryIO) -> None:
         [p.package for p in req.proto_file if p.name in req.file_to_generate]
     ).rstrip(".")
 
+    # Create the generation cache context.
+    # This provides the shared storage for the @cached_proto_context decorator.
+    # 1. Performance: Memoizes `with_context` calls, speeding up generation significantly.
+    # 2. Safety: The decorator uses this storage to "pin" Proto objects in memory.
+    #    This prevents Python's Garbage Collector from deleting objects created during
+    #    `API.build` while `Generator.get_response` is still using their IDs.
+    #    (See `gapic.utils.cache.cached_proto_context` for the specific pinning logic).
     with generation_cache_context():
         # Build the API model object.
         # This object is a frozen representation of the whole API, and is sent
diff --git a/gapic/utils/cache.py b/gapic/utils/cache.py
@@ -15,7 +15,6 @@
 import functools
 import contextlib
 import threading
-from typing import Dict, Optional, Any
 
 
 def cached_property(fx):
@@ -48,46 +47,88 @@ def inner(self):
     return property(inner)
 
 
-# Thread-local storage for the simple cache dictionary
+# Thread-local storage for the simple cache dictionary.
+# This ensures that parallel generation tasks (if any) do not corrupt each other's cache.
 _thread_local = threading.local()
 
 
 @contextlib.contextmanager
 def generation_cache_context():
-    """Context manager to explicitly manage the cache lifecycle."""
-    # Initialize the cache as a standard dictionary
+    """Context manager to explicitly manage the lifecycle of the generation cache.
+
+    This manager initializes a fresh dictionary in thread-local storage when entering
+    the context and strictly deletes it when exiting.
+
+    **Memory Management:**
+    The cache stores strong references to Proto objects to "pin" them in memory
+    (see `cached_proto_context`). It is critical that this context manager deletes
+    the dictionary in the `finally` block. Deleting the dictionary breaks the
+    reference chain, allowing Python's Garbage Collector to finally free all the
+    large Proto objects that were pinned during generation.
+    """
+    # Initialize the cache as a standard dictionary.
     _thread_local.cache = {}
     try:
         yield
     finally:
-        # Delete the dictionary to free all memory and pinned objects
+        # Delete the dictionary to free all memory and pinned objects.
+        # This is essential to prevent memory leaks in long-running processes.
         del _thread_local.cache
 
 
 def cached_proto_context(func):
-    """Decorator to memoize with_context calls based on self and collisions."""
+    """Decorator to memoize `with_context` calls based on object identity and collisions.
+
+    This mechanism provides a significant performance boost by preventing
+    redundant recalculations of naming collisions during template rendering.
+
+    Since the Proto wrapper objects are unhashable (mutable), we use `id(self)` as
+    the primary cache key. Normally, this is dangerous: if the object is garbage
+    collected, Python might reuse its memory address for a *new* object, leading to
+    a cache collision (the "Zombie ID" bug).
+
+    To prevent this, this decorator stores the value as a tuple: `(result, self)`.
+    By keeping a reference to `self` in the cache value, we "pin" the object in
+    memory. This forces the Garbage Collector to keep the object alive, guaranteeing
+    that `id(self)` remains unique for the entire lifespan of the `generation_cache_context`.
+
+    Args:
+        func (Callable): The function to decorate (usually `with_context`).
+
+    Returns:
+        Callable: The wrapped function with caching and pinning logic.
+    """
 
     @functools.wraps(func)
     def wrapper(self, *, collisions, **kwargs):
-        # 1. Initialize cache if not provided (handles the root call case)
 
+        # 1. Check for active cache (returns None if context is not active)
         context_cache = getattr(_thread_local, "cache", None)
+
+        # If we are not inside a generation_cache_context (e.g. unit tests),
+        # bypass the cache entirely.
         if context_cache is None:
             return func(self, collisions=collisions, **kwargs)
 
         # 2. Create the cache key
+        # We use frozenset for collisions to make it hashable.
+        # We use id(self) because 'self' is not hashable.
         collisions_key = frozenset(collisions) if collisions else None
         key = (id(self), collisions_key)
 
         # 3. Check Cache
         if key in context_cache:
+            # The cache stores (result, pinned_object). We return just the result.
             return context_cache[key][0]
 
         # 4. Execute the actual function
         # We ensure context_cache is passed down to the recursive calls
         result = func(self, collisions=collisions, **kwargs)
 
-        # 5. Update Cache
+        # 5. Update Cache & Pin Object
+        # We store (result, self). The reference to 'self' prevents garbage collection,
+        # ensuring that 'id(self)' cannot be reused for a new object while this
+        # cache entry exists.
         context_cache[key] = (result, self)
         return result
 
diff --git a/tests/unit/utils/test_cache.py b/tests/unit/utils/test_cache.py
@@ -33,15 +33,6 @@ def bar(self):
     assert foo.call_count == 1
 
 
-# def test_generation_cache_context():
-#     assert cache.generation_cache.get() is None
-#     with cache.generation_cache_context():
-#         assert isinstance(cache.generation_cache.get(), dict)
-#         cache.generation_cache.get()["foo"] = "bar"
-#         assert cache.generation_cache.get()["foo"] == "bar"
-#     assert cache.generation_cache.get() is None
-
-
 def test_cached_proto_context():
     class Foo:
         def __init__(self):