docs: improve docstrings, inline comments, and add source references to SECURITY.md

Ismael Marchi · Ismael Marchi · commit e406993c6a6a · 2026-04-07T13:29:44.000Z
- core.py: clarify TQ scoring logic and in-memory store comments
- sanitizer.py: rename 'Production-Grade' to 'Semantic Privacy Guard™',
  expand docstrings for validate_sanitization() and batch_sanitize()
- privacy.py: expand docstrings for batch_apply() and get_config()
- validator.py: remove 'Production-grade' label, expand _score_content()
  docstring with normalization details
- handover.py: expand docstrings for _sign_token(), _verify_token(),
  _fail_handover(), and _generate_summary() with implementation details
- SECURITY.md: add Source References section linking all main modules

Only readability and documentation improved — no functionality altered.
diff --git a/SECURITY.md b/SECURITY.md
@@ -404,6 +404,22 @@ If you discover a security vulnerability:
 
 ---
 
+## Source References
+
+The security architecture described in this document is implemented across the following modules in the public SDK:
+
+| Module | Component | Path |
+|--------|-----------|------|
+| **Memory Core** | Zero-Knowledge Memory Layer (orchestration) | [`synapse_memory/core.py`](synapse_memory/core.py) |
+| **Semantic Privacy Guard™** | PII detection, removal & forensic hashing | [`synapse_memory/sanitizer.py`](synapse_memory/sanitizer.py) |
+| **Differential Privacy** | Gaussian noise injection on embeddings | [`synapse_memory/privacy.py`](synapse_memory/privacy.py) |
+| **Intelligent Intent Validation™** | Two-step classification + self-healing | [`synapse_memory/engine/validator.py`](synapse_memory/engine/validator.py) |
+| **Neural Handover™** | JWT-signed vault-first cross-agent transfer | [`synapse_memory/engine/handover.py`](synapse_memory/engine/handover.py) |
+
+Each module includes inline tests that can be executed individually (e.g., `python -m synapse_memory.core`).
+
+---
+
 ## Security Contact
 
 **Email:** security@synapselayer.org  
diff --git a/synapse_memory/core.py b/synapse_memory/core.py
@@ -135,7 +135,8 @@ def __init__(
             validate=True,
         )
 
-        # In-memory store (production uses pgvector + AES-256-GCM)
+        # In-memory store — SDK demo only.
+        # Production deployments persist to pgvector + AES-256-GCM.
         self._memories: List[Dict[str, Any]] = []
 
         logger.info(
@@ -213,11 +214,13 @@ async def store(
         ).hexdigest()
         memory_id = content_hash[:32]
 
-        # Trust Quotient = merged_confidence * validation_score
+        # Trust Quotient (TQ) = merged_confidence × validation_score.
+        # TQ is the primary ranking signal for recall operations.
         trust_quotient = round(
             validation.confidence * validation.validation_score, 4
         )
-        # Apply confidence_boost for CRITICAL memories
+        # Apply additive confidence boost for CRITICAL memories so they
+        # consistently surface at the top of recall results.
         if validation.confidence_boost > 0:
             trust_quotient = min(
                 trust_quotient + validation.confidence_boost * 0.1, 1.0
@@ -307,7 +310,8 @@ async def recall(
         """
         query_lower = query.lower()
 
-        # Simple relevance scoring for SDK demo
+        # Simple relevance scoring (SDK demo — substring matching).
+        # Production: pgvector cosine similarity with ANN index.
         scored: List[tuple] = []
         for mem in self._memories:
             content_lower = mem['content'].lower()
diff --git a/synapse_memory/engine/handover.py b/synapse_memory/engine/handover.py
@@ -621,7 +621,24 @@ def _sign_token(
         issued_at: float,
         expires_at: float,
     ) -> HandoverToken:
-        """Create and sign a JWT-style handover token."""
+        """Create and sign a JWT-style handover token (SHT format).
+
+        Constructs a three-part ``header.payload.signature`` string
+        using HMAC-SHA256 with the instance signing key. The token
+        is base64url-encoded (no padding) for safe URL transport.
+
+        Args:
+            token_id:      Unique identifier for this handover.
+            origin_agent:  Source agent ID.
+            target_agent:  Destination agent ID.
+            user_id:       Owning user ID.
+            scope:         Access scope (e.g., "full", "read_only").
+            issued_at:     Unix timestamp of issuance.
+            expires_at:    Unix timestamp of expiration.
+
+        Returns:
+            Fully populated ``HandoverToken`` with encoded JWT.
+        """
         header = {
             'alg': 'HS256',
             'typ': 'SHT',  # Synapse Handover Token
@@ -665,7 +682,14 @@ def _sign_token(
         )
 
     def _verify_token(self, token: HandoverToken) -> bool:
-        """Verify HMAC-SHA256 signature of a handover token."""
+        """Verify HMAC-SHA256 signature of a handover token.
+
+        Uses constant-time comparison (``hmac.compare_digest``) to
+        prevent timing-based side-channel attacks.
+
+        Returns:
+            True if signature is valid, False otherwise.
+        """
         try:
             parts = token.encoded_token.split('.')
             if len(parts) != 3:
@@ -685,7 +709,7 @@ def _verify_token(self, token: HandoverToken) -> bool:
             return False
 
     def _get_package(self, handover_id: str) -> HandoverPackage:
-        """Retrieve a package or raise KeyError."""
+        """Retrieve a package from the Status Ledger or raise KeyError."""
         package = self._ledger.get(handover_id)
         if package is None:
             raise KeyError(f"Handover '{handover_id}' not found in ledger.")
@@ -696,7 +720,11 @@ def _fail_handover(
         package: HandoverPackage,
         reason: str,
     ) -> HandoverPackage:
-        """Transition to FAILED and create emergency checkpoint."""
+        """Transition to FAILED and create an Emergency Checkpoint.
+
+        The checkpoint preserves a full snapshot of ``context_data`` so
+        that recovery (manual or automated) is always possible.
+        """
         now = time.time()
         package.status = HandoverStatus.FAILED
         package.failed_at = now
@@ -725,10 +753,12 @@ def _fail_handover(
 
     @staticmethod
     def _generate_summary(package: HandoverPackage) -> str:
-        """Generate a compact summary from expired handover context.
+        """Generate a compact summary from an expired handover context.
 
-        Used during grace period to return useful information
-        without exposing raw memory data.
+        Called during the grace period to provide the target agent with
+        useful high-level information without exposing raw memory data.
+        Includes handover metadata, intent distribution, and truncated
+        content snippets (first 80 chars of up to 5 memories).
         """
         parts: List[str] = [
             f"Handover Summary (expired): {package.handover_id}",
diff --git a/synapse_memory/engine/validator.py b/synapse_memory/engine/validator.py
@@ -1,8 +1,8 @@
 """
 SynapseValidator — Intelligent Intent Validation™ with Self-Healing
 
-Production-grade cognitive security layer that classifies, validates,
-and auto-corrects memory intent in a two-step pipeline:
+Cognitive security layer that classifies, validates, and auto-corrects
+memory intent in a two-step pipeline:
 
     Step 1 (Agent Suggestion):  Keyword heuristics + scoring → proposed intent
     Step 2 (Synapse Validation): Confidence gate, critical promotion, self-healing
@@ -488,8 +488,18 @@ def batch_validate(
     def _score_content(self, content: str) -> Dict[str, float]:
         """Score content against all keyword dictionaries.
 
+        Used by ``heal_conflicts()`` to re-evaluate evidence strength
+        for both memories involved in a category conflict.
+
+        Normalization: ``min(hits / 3.0, 1.0)`` — three keyword
+        matches saturate confidence for a given category.
+
+        Args:
+            content: Text to score (will be lowercased internally).
+
         Returns:
-            Dict mapping category value strings to normalized scores.
+            Dict mapping category value strings to normalized scores
+            in the range [0.0, 1.0].
         """
         content_lower = content.lower()
         scores: Dict[str, float] = {}
diff --git a/synapse_memory/privacy.py b/synapse_memory/privacy.py
@@ -186,11 +186,27 @@ def apply(self, embedding: List[float]) -> PrivacyResult:
     def batch_apply(
         self, embeddings: List[List[float]]
     ) -> List[PrivacyResult]:
-        """Apply differential privacy to a batch of embeddings."""
+        """Apply differential privacy to a batch of embeddings.
+
+        Each embedding receives independent noise calibrated to the
+        same (ε, δ) budget. Noise instances are non-correlated.
+
+        Args:
+            embeddings: List of dense float vectors (all same dimension).
+
+        Returns:
+            List of ``PrivacyResult`` in the same order as input.
+        """
         return [self.apply(emb) for emb in embeddings]
 
     def get_config(self) -> Dict[str, Any]:
-        """Return current privacy configuration for audit logging."""
+        """Return current privacy configuration for audit logging.
+
+        Useful for compliance reports and automated privacy audits.
+
+        Returns:
+            Dict with ``epsilon``, ``delta``, ``normalize``, and ``mechanism``.
+        """
         return {
             'epsilon': self.epsilon,
             'delta': self.delta,
diff --git a/synapse_memory/sanitizer.py b/synapse_memory/sanitizer.py
@@ -1,5 +1,5 @@
 """
-SynapseSanitizer — Production-Grade Content Sanitization Engine
+SynapseSanitizer — Semantic Privacy Guard™ Content Sanitization Engine
 
 High-performance PII detection, removal, and content hardening pipeline.
 Supports standard and aggressive modes for maximum semantic privacy.
@@ -42,7 +42,7 @@ class SanitizationResult:
 
 class SynapseSanitizer:
     """
-    Production-grade content sanitizer with:
+    Semantic Privacy Guard™ content sanitizer with:
     - 12 precompiled regex patterns for PII/sensitive data detection
     - Aggressive mode: removes proper nouns (capitalized words) for
       maximum semantic privacy against embedding-based inference attacks
@@ -289,6 +289,17 @@ def validate_sanitization(
     ) -> Dict[str, Any]:
         """
         Compute effectiveness metrics comparing original vs. sanitized content.
+
+        Measures character-level reduction to quantify how much PII was
+        removed. Useful for compliance dashboards and audit reporting.
+
+        Args:
+            original: The raw, unsanitized text.
+            sanitized: The output of ``sanitize_content()``.
+
+        Returns:
+            Dict with ``original_length``, ``sanitized_length``,
+            ``reduction_pct``, and ``effectiveness`` ("high" or "low").
         """
         orig_len = len(original)
         san_len = len(sanitized)
@@ -305,7 +316,17 @@ def validate_sanitization(
     def batch_sanitize(
         self, contents: List[str]
     ) -> List[SanitizationResult]:
-        """Sanitize a list of texts in batch."""
+        """Sanitize a list of texts in batch.
+
+        Convenience wrapper that applies ``sanitize_content()`` to each
+        element in *contents* and returns results in the same order.
+
+        Args:
+            contents: List of raw text strings to sanitize.
+
+        Returns:
+            List of ``SanitizationResult`` in input order.
+        """
         return [self.sanitize_content(c) for c in contents]