param20h
diff --git a/‎backend/app/cache.py‎
Lines changed: 18 additions & 13 deletions b/‎backend/app/cache.py‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎backend/app/database.py‎
Lines changed: 79 additions & 0 deletions b/‎backend/app/database.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎backend/app/models.py‎
Lines changed: 2 additions & 2 deletions b/‎backend/app/models.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/app/observability.py‎
Lines changed: 27 additions & 1 deletion b/‎backend/app/observability.py‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎backend/app/rag/bm25.py‎
Lines changed: 3 additions & 0 deletions b/‎backend/app/rag/bm25.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backend/app/rag/retriever.py‎
Lines changed: 25 additions & 7 deletions b/‎backend/app/rag/retriever.py‎
Lines changed: 25 additions & 7 deletions
diff --git a/‎backend/app/rag/tracing.py‎
Lines changed: 11 additions & 1 deletion b/‎backend/app/rag/tracing.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎backend/app/rag/vectorstore.py‎
Lines changed: 3 additions & 1 deletion b/‎backend/app/rag/vectorstore.py‎
Lines changed: 3 additions & 1 deletion
@@ -5,8 +5,9 @@
 - Redis (preferred, for production)
 - LRU in-memory cache (fallback for development or when Redis is unavailable)
 
-Cache key is a SHA-256 hash of (document_id, question) to ensure keys are
-short, stable, and unique across all question/document combinations.
+Cache key is a SHA-256 hash of (user_id, document_id, question) to ensure
+keys are short, stable, and unique across all user/question/document
+combinations — never shared between different users.
 """
 
 import hashlib
@@ -101,25 +102,28 @@ def _lru_delete(key: str) -> None:
 # ---------------------------------------------------------------------------
 
 
-def make_cache_key(document_id: str, question: str) -> str:
+def make_cache_key(user_id: str, document_id: str, question: str) -> str:
     """
-    Generate a stable, short cache key from document_id + question.
+    Generate a stable, short cache key from user_id + document_id + question.
 
     SHA-256 gives us a 64-char hex string that is:
     - Always the same length regardless of question length
-    - Unique per (document_id, question) pair
+    - Unique per (user_id, document_id, question) triple — user_id is
+      required so two different users asking the identical question never
+      collide on the same cache entry, even when document_id is empty
+      (cross-document queries against a user's own private knowledge base)
     - Safe for Redis keys and dict keys
     """
-    raw = f"{document_id}:{question.strip().lower()}"
+    raw = f"{user_id}:{document_id}:{question.strip().lower()}"
     return hashlib.sha256(raw.encode("utf-8")).hexdigest()
 
 
-def get_cached_response(document_id: str, question: str) -> Optional[str]:
+def get_cached_response(user_id: str, document_id: str, question: str) -> Optional[str]:
     """
-    Look up a cached answer for a (document_id, question) pair.
+    Look up a cached answer for a (user_id, document_id, question) triple.
     Returns the answer string on hit, None on miss.
     """
-    key = make_cache_key(document_id, question)
+    key = make_cache_key(user_id, document_id, question)
     r = _get_redis()
 
     if r is not None:
@@ -140,12 +144,13 @@ def get_cached_response(document_id: str, question: str) -> Optional[str]:
     return None
 
 
-def set_cached_response(document_id: str, question: str, answer: str) -> None:
+
+def set_cached_response(user_id: str, document_id: str, question: str, answer: str) -> None:
     """
     Store an answer. Tries Redis first; falls back to LRU.
     TTL is controlled by the CACHE_TTL environment variable.
     """
-    key = make_cache_key(document_id, question)
+    key = make_cache_key(user_id, document_id, question)
     serialised = json.dumps(answer)
     r = _get_redis()
 
@@ -161,9 +166,9 @@ def set_cached_response(document_id: str, question: str, answer: str) -> None:
     logger.debug("Cache SET (LRU) key %s", key[:12])
 
 
-def invalidate_cache(document_id: str, question: str) -> None:
+def invalidate_cache(user_id: str, document_id: str, question: str) -> None:
     """Remove one cache entry — useful when a document is re-indexed."""
-    key = make_cache_key(document_id, question)
+    key = make_cache_key(user_id, document_id, question)
     r = _get_redis()
     if r is not None:
         try:
 
@@ -230,6 +230,85 @@ def _migrate_schema():
                 )
 
 
+    # ── Workspace tables ──────────────────────────────────────────────────
+    existing_tables = set(inspector.get_table_names())
+
+    if "workspaces" not in existing_tables:
+        try:
+            with engine.begin() as conn:
+                conn.execute(text("""
+                    CREATE TABLE workspaces (
+                        id         CHAR(36)     PRIMARY KEY,
+                        name       VARCHAR(255) NOT NULL,
+                        created_by CHAR(36)     NOT NULL REFERENCES users(id),
+                        created_at TIMESTAMP
+                    )
+                """))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspaces_created_by "
+                    "ON workspaces (created_by)"
+                ))
+            logger.info("Migration: created table workspaces")
+        except Exception:
+            logger.warning("Migration skipped (may already exist): workspaces")
+
+    if "workspace_members" not in existing_tables:
+        try:
+            with engine.begin() as conn:
+                conn.execute(text("""
+                    CREATE TABLE workspace_members (
+                        id           CHAR(36)    PRIMARY KEY,
+                        workspace_id CHAR(36)    NOT NULL REFERENCES workspaces(id),
+                        user_id      CHAR(36)    NOT NULL REFERENCES users(id),
+                        role         VARCHAR(20) NOT NULL DEFAULT 'viewer',
+                        joined_at    TIMESTAMP,
+                        CONSTRAINT uq_workspace_member UNIQUE (workspace_id, user_id)
+                    )
+                """))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspace_members_workspace_id "
+                    "ON workspace_members (workspace_id)"
+                ))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspace_members_user_id "
+                    "ON workspace_members (user_id)"
+                ))
+            logger.info("Migration: created table workspace_members")
+        except Exception:
+            logger.warning("Migration skipped (may already exist): workspace_members")
+
+    if "workspace_invitations" not in existing_tables:
+        try:
+            with engine.begin() as conn:
+                conn.execute(text("""
+                    CREATE TABLE workspace_invitations (
+                        id             CHAR(36)     PRIMARY KEY,
+                        email          VARCHAR(120) NOT NULL,
+                        token_hash     VARCHAR(255) NOT NULL UNIQUE,
+                        inviter_id     CHAR(36)     NOT NULL REFERENCES users(id),
+                        workspace_name VARCHAR(255) NOT NULL,
+                        created_at     TIMESTAMP,
+                        expires_at     TIMESTAMP    NOT NULL,
+                        accepted_at    TIMESTAMP
+                    )
+                """))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspace_invitations_email "
+                    "ON workspace_invitations (email)"
+                ))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspace_invitations_token_hash "
+                    "ON workspace_invitations (token_hash)"
+                ))
+                conn.execute(text(
+                    "CREATE INDEX IF NOT EXISTS ix_workspace_invitations_inviter_id "
+                    "ON workspace_invitations (inviter_id)"
+                ))
+            logger.info("Migration: created table workspace_invitations")
+        except Exception:
+            logger.warning("Migration skipped (may already exist): workspace_invitations")
+
+
 def advisory_lock(lock_id: int):
     """Context manager that acquires a PostgreSQL advisory lock (xact scope).
 
 
@@ -200,11 +200,11 @@ class ApiKey(Base):
 class WorkspaceInvitation(Base):
     __tablename__ = "workspace_invitations"
 
-    id = Column(String, primary_key=True, default=generate_uuid)
+    id = Column(GUID, primary_key=True, default=uuid.uuid4)
     email = Column(String(120), nullable=False, index=True)
     token_hash = Column(String(255), nullable=False, unique=True, index=True)
     inviter_id = Column(
-        String,
+        GUID,
         ForeignKey("users.id"),
         nullable=False,
         index=True,
 
@@ -9,7 +9,33 @@
 
 from fastapi import FastAPI
 from prometheus_client import Gauge
-from prometheus_fastapi_instrumentator import Instrumentator
+from prometheus_fastapi_instrumentator import Instrumentator, routing
+from starlette.routing import Match
+
+# ── Workaround for FastAPI 0.135+ and prometheus-fastapi-instrumentator 8.0.0 ──
+# Newer FastAPI versions include _IncludedRouter objects in app.routes which
+# lack a '.path' attribute, causing AttributeErrors during instrumentation.
+def _patched_get_route_name(scope, routes, route_name=None):
+    """Safe version of _get_route_name that handles routes without a .path attribute."""
+    for route in routes:
+        try:
+            match, child_scope = route.matches(scope)
+        except Exception:
+            continue
+
+        if match == Match.FULL:
+            # If we have a full match and the route has a path, use it and return early.
+            # This matches Starlette's behavior where the first matching route wins.
+            if hasattr(route, "path"):
+                return route.path
+        elif match == Match.PARTIAL and hasattr(route, "routes"):
+            # Recursive call for nested routes (e.g. Mounts)
+            route_name = _patched_get_route_name(child_scope, route.routes, route_name)
+            if route_name:
+                return route_name
+    return route_name
+
+routing._get_route_name = _patched_get_route_name
 
 APP_PROCESS_RSS_BYTES = Gauge(
     "app_process_resident_memory_bytes",
 
@@ -52,7 +52,10 @@ def store_bm25_index(chunks: List[Dict[str, Any]], document_id: str, filename: s
     # Format chunks to match vectorstore output
     formatted_chunks = []
     for chunk in chunks:
+        chunk_idx = chunk.get("chunk_index")
+        chunk_id = f"{document_id}_{chunk_idx}" if chunk_idx is not None else None
         formatted_chunks.append({
+            "id": chunk_id,
             "text": chunk["text"],
             "filename": filename,
             "document_id": document_id,
 
@@ -90,36 +90,54 @@ def transform_query(query: str) -> List[str]:
 
 
 def _generate_query_variants(query: str) -> List[str]:
-    """Use the configured LLM to split/rewrite a user query for semantic search."""
+    """Use the configured LLM to rewrite a user query into 3 semantic variations.
+
+    Each variation rephrases the original from a different angle so that
+    BM25 and ChromaDB retrieve a broader, complementary set of chunks.
+    The original query is always prepended by the caller (transform_query),
+    so we ask for exactly 3 *additional* variants here.
+    """
     if not settings.HF_TOKEN:
         return []
 
     from huggingface_hub import InferenceClient
 
     client = InferenceClient(token=settings.HF_TOKEN)
+
     prompt = (
-        "Rewrite the user question into concise semantic search queries for document retrieval. "
-        "Split independent topics into separate queries. Return a JSON array of strings only. "
-        f"User question: {query}"
+        "Generate exactly 3 semantic variations of the user question below. "
+        "Each variation must preserve the original meaning but use different "
+        "vocabulary, phrasing, or sentence structure to improve document retrieval coverage. "
+        "Do NOT add new topics or change the intent. "
+        "Return ONLY a JSON array of 3 strings, with no extra text, markdown, or explanation.\n\n"
+        f"User question: {query}\n\n"
+        'Example output: ["variation one", "variation two", "variation three"]'
     )
+
     response = client.chat_completion(
         messages=[
             {
                 "role": "system",
-                "content": "You create optimized search queries for a RAG retriever.",
+                "content": (
+                    "You are a query rewriter for a RAG retrieval system. "
+                    "You output only valid JSON arrays of strings, nothing else."
+                ),
             },
             {"role": "user", "content": prompt},
         ],
         model=settings.LLM_MODEL,
         max_tokens=256,
-        temperature=0.2,
+        temperature=0.3,
     )
 
     if not response.choices:
         return []
 
     content = response.choices[0].message.content or ""
-    return _parse_query_variants(content)
+    variants = _parse_query_variants(content)
+
+    # Cap at 3 variants as requested — the original is added by transform_query
+    return variants[:3]
 
 
 def _parse_query_variants(content: str) -> List[str]:
 
@@ -87,7 +87,17 @@ def trace_function(
     def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
         @wraps(fn)
         def wrapped(*args: Any, **kwargs: Any) -> Any:
-            metadata = metadata_factory(*args, **kwargs) if metadata_factory else None
+            metadata = None
+            if metadata_factory:
+                try:
+                    metadata = metadata_factory(*args, **kwargs)
+                except Exception:
+                    logger.warning(
+                        "Metadata factory failed for trace %r; continuing without metadata.",
+                        name,
+                        exc_info=True,
+                    )
+                    metadata = {}
             return trace_call(
                 name,
                 fn,
 
@@ -161,7 +161,7 @@ def query_chunks(
         query_embeddings=[query_embedding],
         n_results=top_k,
         where=where_filter,
-        include=["documents", "metadatas", "distances"],
+        include=["documents", "metadatas", "distances", "ids"],
     )
 
     # ── Format results ───────────────────────────────
@@ -170,11 +170,13 @@ def query_chunks(
         for i, doc in enumerate(results["documents"][0]):
             metadata = results["metadatas"][0][i] if results["metadatas"] else {}
             distance = results["distances"][0][i] if results["distances"] else 0
+            chunk_id = results["ids"][0][i] if results.get("ids") and len(results["ids"]) > 0 else None
 
             # Convert cosine distance to similarity score (0-1)
             similarity = 1 - distance
 
             chunks.append({
+                "id": chunk_id,
                 "text": doc,
                 "filename": metadata.get("filename", ""),
                 "document_id": metadata.get("document_id", ""),