Skip to content

Commit fbccd93

Browse files
authored
Merge branch 'dev' into fix/caption-failure-logging-visibility
2 parents 55b923f + edfb85b commit fbccd93

38 files changed

Lines changed: 4686 additions & 1078 deletions

backend/app/cache.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
- Redis (preferred, for production)
66
- LRU in-memory cache (fallback for development or when Redis is unavailable)
77
8-
Cache key is a SHA-256 hash of (document_id, question) to ensure keys are
9-
short, stable, and unique across all question/document combinations.
8+
Cache key is a SHA-256 hash of (user_id, document_id, question) to ensure
9+
keys are short, stable, and unique across all user/question/document
10+
combinations — never shared between different users.
1011
"""
1112

1213
import hashlib
@@ -101,25 +102,28 @@ def _lru_delete(key: str) -> None:
101102
# ---------------------------------------------------------------------------
102103

103104

104-
def make_cache_key(document_id: str, question: str) -> str:
105+
def make_cache_key(user_id: str, document_id: str, question: str) -> str:
105106
"""
106-
Generate a stable, short cache key from document_id + question.
107+
Generate a stable, short cache key from user_id + document_id + question.
107108
108109
SHA-256 gives us a 64-char hex string that is:
109110
- Always the same length regardless of question length
110-
- Unique per (document_id, question) pair
111+
- Unique per (user_id, document_id, question) triple — user_id is
112+
required so two different users asking the identical question never
113+
collide on the same cache entry, even when document_id is empty
114+
(cross-document queries against a user's own private knowledge base)
111115
- Safe for Redis keys and dict keys
112116
"""
113-
raw = f"{document_id}:{question.strip().lower()}"
117+
raw = f"{user_id}:{document_id}:{question.strip().lower()}"
114118
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
115119

116120

117-
def get_cached_response(document_id: str, question: str) -> Optional[str]:
121+
def get_cached_response(user_id: str, document_id: str, question: str) -> Optional[str]:
118122
"""
119-
Look up a cached answer for a (document_id, question) pair.
123+
Look up a cached answer for a (user_id, document_id, question) triple.
120124
Returns the answer string on hit, None on miss.
121125
"""
122-
key = make_cache_key(document_id, question)
126+
key = make_cache_key(user_id, document_id, question)
123127
r = _get_redis()
124128

125129
if r is not None:
@@ -140,12 +144,13 @@ def get_cached_response(document_id: str, question: str) -> Optional[str]:
140144
return None
141145

142146

143-
def set_cached_response(document_id: str, question: str, answer: str) -> None:
147+
148+
def set_cached_response(user_id: str, document_id: str, question: str, answer: str) -> None:
144149
"""
145150
Store an answer. Tries Redis first; falls back to LRU.
146151
TTL is controlled by the CACHE_TTL environment variable.
147152
"""
148-
key = make_cache_key(document_id, question)
153+
key = make_cache_key(user_id, document_id, question)
149154
serialised = json.dumps(answer)
150155
r = _get_redis()
151156

@@ -161,9 +166,9 @@ def set_cached_response(document_id: str, question: str, answer: str) -> None:
161166
logger.debug("Cache SET (LRU) key %s", key[:12])
162167

163168

164-
def invalidate_cache(document_id: str, question: str) -> None:
169+
def invalidate_cache(user_id: str, document_id: str, question: str) -> None:
165170
"""Remove one cache entry — useful when a document is re-indexed."""
166-
key = make_cache_key(document_id, question)
171+
key = make_cache_key(user_id, document_id, question)
167172
r = _get_redis()
168173
if r is not None:
169174
try:

backend/app/database.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,85 @@ def _migrate_schema():
230230
)
231231

232232

233+
# ── Workspace tables ──────────────────────────────────────────────────
234+
existing_tables = set(inspector.get_table_names())
235+
236+
if "workspaces" not in existing_tables:
237+
try:
238+
with engine.begin() as conn:
239+
conn.execute(text("""
240+
CREATE TABLE workspaces (
241+
id CHAR(36) PRIMARY KEY,
242+
name VARCHAR(255) NOT NULL,
243+
created_by CHAR(36) NOT NULL REFERENCES users(id),
244+
created_at TIMESTAMP
245+
)
246+
"""))
247+
conn.execute(text(
248+
"CREATE INDEX IF NOT EXISTS ix_workspaces_created_by "
249+
"ON workspaces (created_by)"
250+
))
251+
logger.info("Migration: created table workspaces")
252+
except Exception:
253+
logger.warning("Migration skipped (may already exist): workspaces")
254+
255+
if "workspace_members" not in existing_tables:
256+
try:
257+
with engine.begin() as conn:
258+
conn.execute(text("""
259+
CREATE TABLE workspace_members (
260+
id CHAR(36) PRIMARY KEY,
261+
workspace_id CHAR(36) NOT NULL REFERENCES workspaces(id),
262+
user_id CHAR(36) NOT NULL REFERENCES users(id),
263+
role VARCHAR(20) NOT NULL DEFAULT 'viewer',
264+
joined_at TIMESTAMP,
265+
CONSTRAINT uq_workspace_member UNIQUE (workspace_id, user_id)
266+
)
267+
"""))
268+
conn.execute(text(
269+
"CREATE INDEX IF NOT EXISTS ix_workspace_members_workspace_id "
270+
"ON workspace_members (workspace_id)"
271+
))
272+
conn.execute(text(
273+
"CREATE INDEX IF NOT EXISTS ix_workspace_members_user_id "
274+
"ON workspace_members (user_id)"
275+
))
276+
logger.info("Migration: created table workspace_members")
277+
except Exception:
278+
logger.warning("Migration skipped (may already exist): workspace_members")
279+
280+
if "workspace_invitations" not in existing_tables:
281+
try:
282+
with engine.begin() as conn:
283+
conn.execute(text("""
284+
CREATE TABLE workspace_invitations (
285+
id CHAR(36) PRIMARY KEY,
286+
email VARCHAR(120) NOT NULL,
287+
token_hash VARCHAR(255) NOT NULL UNIQUE,
288+
inviter_id CHAR(36) NOT NULL REFERENCES users(id),
289+
workspace_name VARCHAR(255) NOT NULL,
290+
created_at TIMESTAMP,
291+
expires_at TIMESTAMP NOT NULL,
292+
accepted_at TIMESTAMP
293+
)
294+
"""))
295+
conn.execute(text(
296+
"CREATE INDEX IF NOT EXISTS ix_workspace_invitations_email "
297+
"ON workspace_invitations (email)"
298+
))
299+
conn.execute(text(
300+
"CREATE INDEX IF NOT EXISTS ix_workspace_invitations_token_hash "
301+
"ON workspace_invitations (token_hash)"
302+
))
303+
conn.execute(text(
304+
"CREATE INDEX IF NOT EXISTS ix_workspace_invitations_inviter_id "
305+
"ON workspace_invitations (inviter_id)"
306+
))
307+
logger.info("Migration: created table workspace_invitations")
308+
except Exception:
309+
logger.warning("Migration skipped (may already exist): workspace_invitations")
310+
311+
233312
def advisory_lock(lock_id: int):
234313
"""Context manager that acquires a PostgreSQL advisory lock (xact scope).
235314

backend/app/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,11 @@ class ApiKey(Base):
200200
class WorkspaceInvitation(Base):
201201
__tablename__ = "workspace_invitations"
202202

203-
id = Column(String, primary_key=True, default=generate_uuid)
203+
id = Column(GUID, primary_key=True, default=uuid.uuid4)
204204
email = Column(String(120), nullable=False, index=True)
205205
token_hash = Column(String(255), nullable=False, unique=True, index=True)
206206
inviter_id = Column(
207-
String,
207+
GUID,
208208
ForeignKey("users.id"),
209209
nullable=False,
210210
index=True,

backend/app/observability.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,33 @@
99

1010
from fastapi import FastAPI
1111
from prometheus_client import Gauge
12-
from prometheus_fastapi_instrumentator import Instrumentator
12+
from prometheus_fastapi_instrumentator import Instrumentator, routing
13+
from starlette.routing import Match
14+
15+
# ── Workaround for FastAPI 0.135+ and prometheus-fastapi-instrumentator 8.0.0 ──
16+
# Newer FastAPI versions include _IncludedRouter objects in app.routes which
17+
# lack a '.path' attribute, causing AttributeErrors during instrumentation.
18+
def _patched_get_route_name(scope, routes, route_name=None):
19+
"""Safe version of _get_route_name that handles routes without a .path attribute."""
20+
for route in routes:
21+
try:
22+
match, child_scope = route.matches(scope)
23+
except Exception:
24+
continue
25+
26+
if match == Match.FULL:
27+
# If we have a full match and the route has a path, use it and return early.
28+
# This matches Starlette's behavior where the first matching route wins.
29+
if hasattr(route, "path"):
30+
return route.path
31+
elif match == Match.PARTIAL and hasattr(route, "routes"):
32+
# Recursive call for nested routes (e.g. Mounts)
33+
route_name = _patched_get_route_name(child_scope, route.routes, route_name)
34+
if route_name:
35+
return route_name
36+
return route_name
37+
38+
routing._get_route_name = _patched_get_route_name
1339

1440
APP_PROCESS_RSS_BYTES = Gauge(
1541
"app_process_resident_memory_bytes",

backend/app/rag/bm25.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,10 @@ def store_bm25_index(chunks: List[Dict[str, Any]], document_id: str, filename: s
5252
# Format chunks to match vectorstore output
5353
formatted_chunks = []
5454
for chunk in chunks:
55+
chunk_idx = chunk.get("chunk_index")
56+
chunk_id = f"{document_id}_{chunk_idx}" if chunk_idx is not None else None
5557
formatted_chunks.append({
58+
"id": chunk_id,
5659
"text": chunk["text"],
5760
"filename": filename,
5861
"document_id": document_id,

backend/app/rag/retriever.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,36 +90,54 @@ def transform_query(query: str) -> List[str]:
9090

9191

9292
def _generate_query_variants(query: str) -> List[str]:
93-
"""Use the configured LLM to split/rewrite a user query for semantic search."""
93+
"""Use the configured LLM to rewrite a user query into 3 semantic variations.
94+
95+
Each variation rephrases the original from a different angle so that
96+
BM25 and ChromaDB retrieve a broader, complementary set of chunks.
97+
The original query is always prepended by the caller (transform_query),
98+
so we ask for exactly 3 *additional* variants here.
99+
"""
94100
if not settings.HF_TOKEN:
95101
return []
96102

97103
from huggingface_hub import InferenceClient
98104

99105
client = InferenceClient(token=settings.HF_TOKEN)
106+
100107
prompt = (
101-
"Rewrite the user question into concise semantic search queries for document retrieval. "
102-
"Split independent topics into separate queries. Return a JSON array of strings only. "
103-
f"User question: {query}"
108+
"Generate exactly 3 semantic variations of the user question below. "
109+
"Each variation must preserve the original meaning but use different "
110+
"vocabulary, phrasing, or sentence structure to improve document retrieval coverage. "
111+
"Do NOT add new topics or change the intent. "
112+
"Return ONLY a JSON array of 3 strings, with no extra text, markdown, or explanation.\n\n"
113+
f"User question: {query}\n\n"
114+
'Example output: ["variation one", "variation two", "variation three"]'
104115
)
116+
105117
response = client.chat_completion(
106118
messages=[
107119
{
108120
"role": "system",
109-
"content": "You create optimized search queries for a RAG retriever.",
121+
"content": (
122+
"You are a query rewriter for a RAG retrieval system. "
123+
"You output only valid JSON arrays of strings, nothing else."
124+
),
110125
},
111126
{"role": "user", "content": prompt},
112127
],
113128
model=settings.LLM_MODEL,
114129
max_tokens=256,
115-
temperature=0.2,
130+
temperature=0.3,
116131
)
117132

118133
if not response.choices:
119134
return []
120135

121136
content = response.choices[0].message.content or ""
122-
return _parse_query_variants(content)
137+
variants = _parse_query_variants(content)
138+
139+
# Cap at 3 variants as requested — the original is added by transform_query
140+
return variants[:3]
123141

124142

125143
def _parse_query_variants(content: str) -> List[str]:

backend/app/rag/tracing.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,17 @@ def trace_function(
8787
def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
8888
@wraps(fn)
8989
def wrapped(*args: Any, **kwargs: Any) -> Any:
90-
metadata = metadata_factory(*args, **kwargs) if metadata_factory else None
90+
metadata = None
91+
if metadata_factory:
92+
try:
93+
metadata = metadata_factory(*args, **kwargs)
94+
except Exception:
95+
logger.warning(
96+
"Metadata factory failed for trace %r; continuing without metadata.",
97+
name,
98+
exc_info=True,
99+
)
100+
metadata = {}
91101
return trace_call(
92102
name,
93103
fn,

backend/app/rag/vectorstore.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def query_chunks(
161161
query_embeddings=[query_embedding],
162162
n_results=top_k,
163163
where=where_filter,
164-
include=["documents", "metadatas", "distances"],
164+
include=["documents", "metadatas", "distances", "ids"],
165165
)
166166

167167
# ── Format results ───────────────────────────────
@@ -170,11 +170,13 @@ def query_chunks(
170170
for i, doc in enumerate(results["documents"][0]):
171171
metadata = results["metadatas"][0][i] if results["metadatas"] else {}
172172
distance = results["distances"][0][i] if results["distances"] else 0
173+
chunk_id = results["ids"][0][i] if results.get("ids") and len(results["ids"]) > 0 else None
173174

174175
# Convert cosine distance to similarity score (0-1)
175176
similarity = 1 - distance
176177

177178
chunks.append({
179+
"id": chunk_id,
178180
"text": doc,
179181
"filename": metadata.get("filename", ""),
180182
"document_id": metadata.get("document_id", ""),

0 commit comments

Comments
 (0)