Skip to content

Commit bf3d0eb

Browse files
fix: health check — schema migrations, import crash, error handling, performance
- Add lightweight schema migration system using PRAGMA user_version - Fix dashboard import crash by routing through MCP server - Replace silent contextlib.suppress(Exception) with logged warnings - Add column allowlist in ContentStore.update() to prevent injection - Surface graph/SQLite sync failures via SyncError instead of swallowing - Fix XSS in dashboard search with html.escape - Fix O(n) entity name scan with SPARQL LCASE() filter - Fix semantic search loading all embeddings without error handling - Add count_entities() to avoid materializing full list for counts - Align Dockerfile to Python 3.13 (matches CI), remove error suppression - Remove unused aiosqlite dependency - Fix stale REST API version (now reads __version__) - Bump version to 0.3.2
1 parent e3286fa commit bf3d0eb

18 files changed

Lines changed: 194 additions & 68 deletions

File tree

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.12-slim AS base
1+
FROM python:3.13-slim AS base
22

33
WORKDIR /app
44

@@ -17,7 +17,7 @@ COPY ontology/ ontology/
1717
COPY llms.txt ./
1818

1919
# Install dependencies (including embeddings for semantic search)
20-
RUN uv sync --no-dev --frozen --extra embeddings 2>/dev/null || uv sync --no-dev --extra embeddings
20+
RUN uv sync --no-dev --frozen --extra embeddings
2121

2222
# Create data directory and initialize
2323
RUN mkdir -p /data
@@ -33,4 +33,4 @@ HEALTHCHECK --interval=30s --timeout=5s --start-period=30s \
3333
CMD python -c "import urllib.request,json; r=urllib.request.Request('http://localhost:1314/mcp',data=json.dumps({'jsonrpc':'2.0','id':1,'method':'ping'}).encode(),headers={'Content-Type':'application/json','Accept':'application/json, text/event-stream'},method='POST'); urllib.request.urlopen(r)" || exit 1
3434

3535
# Setup (auto mode: writes .env from env vars, inits stores), then start server
36-
CMD ["sh", "-c", "uv run cortex setup --auto 2>/dev/null; uv run cortex serve --transport mcp-http --host 0.0.0.0 --port 1314"]
36+
CMD ["sh", "-c", "uv run cortex setup --auto && uv run cortex serve --transport mcp-http --host 0.0.0.0 --port 1314"]

pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "abbacus-cortex"
3-
version = "0.3.1"
3+
version = "0.3.2"
44
description = "Cognitive knowledge system with formal ontology, reasoning, and intelligence serving"
55
readme = "README.md"
66
authors = [
@@ -26,8 +26,6 @@ classifiers = [
2626
dependencies = [
2727
# Graph / Ontology
2828
"pyoxigraph>=0.4",
29-
# SQLite (async)
30-
"aiosqlite>=0.20",
3129
# Web framework
3230
"fastapi>=0.115",
3331
"uvicorn[standard]>=0.34",

src/cortex/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Cortex — Cognitive knowledge system."""
22

3-
__version__ = "0.3.1"
3+
__version__ = "0.3.2"

src/cortex/dashboard/server.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from __future__ import annotations
1515

16+
import html as html_mod
1617
import secrets
1718
from pathlib import Path
1819
from typing import Any
@@ -482,14 +483,7 @@ async def settings_import(request: Request, vault_path: str = Form(...)):
482483
)
483484

484485
try:
485-
import cortex.db.store as _store_mod
486-
import cortex.pipeline.importer as _importer_mod
487-
488-
store = _store_mod.Store(config.data_dir)
489-
importer = _importer_mod.ObsidianImporter(store, pipeline=None)
490-
result = importer.run(vault)
491-
store.close()
492-
486+
result = await mcp_client.import_obsidian(str(vault))
493487
imported = result.get("imported", 0)
494488
skipped = result.get("skipped", 0)
495489
failed = result.get("failed", 0)
@@ -923,21 +917,22 @@ async def api_search(request: Request, q: str = ""):
923917
return HTMLResponse("")
924918
results = await mcp_client.search(q, limit=8)
925919
# Return HTML fragment for HTMX
926-
html = ""
920+
html_out = ""
927921
for doc in results:
928-
obj_id = doc.get("id", "")
929-
title = doc.get("title", obj_id[:12])
930-
doc_type = doc.get("type", "")
931-
html += (
922+
obj_id = html_mod.escape(doc.get("id", ""), quote=True)
923+
title = html_mod.escape(doc.get("title", obj_id[:12]), quote=True)
924+
doc_type = html_mod.escape(doc.get("type", ""), quote=True)
925+
html_out += (
932926
f'<div class="search-item" '
933-
f"onclick=\"document.getElementById('target_id').value='{obj_id}'; "
927+
f'data-id="{obj_id}" '
928+
f"onclick=\"document.getElementById('target_id').value=this.dataset.id; "
934929
f"document.getElementById('search-results').innerHTML='';\">"
935930
f'<span class="badge badge-{doc_type}" style="font-size:0.7rem;">{doc_type}</span> '
936931
f"{title[:60]}"
937932
f'<small style="color:var(--text-muted);"> {obj_id[:8]}</small>'
938933
f"</div>"
939934
)
940-
return HTMLResponse(html)
935+
return HTMLResponse(html_out)
941936

942937
@app.post("/api/feedback")
943938
async def api_feedback(request: Request):

src/cortex/db/content_store.py

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
"""SQLite-backed content store for Cortex.
22
33
Handles content storage, FTS5 full-text search, config, and query logging.
4-
Uses synchronous sqlite3 (async wrapper can be added later via aiosqlite).
4+
Uses synchronous sqlite3.
55
"""
66

77
from __future__ import annotations
88

99
import json
1010
import sqlite3
11+
from collections.abc import Callable
1112
from datetime import UTC, datetime
1213
from pathlib import Path
1314
from typing import Any
@@ -99,6 +100,33 @@
99100
CREATE INDEX IF NOT EXISTS idx_query_log_timestamp ON query_log(timestamp);
100101
"""
101102

103+
# Columns that callers may update via ContentStore.update().
104+
# When a migration adds a new column, add it here too.
105+
UPDATABLE_COLUMNS: frozenset[str] = frozenset({
106+
"title",
107+
"content",
108+
"raw_markdown",
109+
"type",
110+
"project",
111+
"tags",
112+
"summary",
113+
"tier",
114+
"pipeline_stage",
115+
"confidence",
116+
"captured_by",
117+
"updated_at",
118+
})
119+
120+
_IMMUTABLE_COLUMNS: frozenset[str] = frozenset({"id", "created_at"})
121+
122+
# Schema versioning — bump SCHEMA_VERSION and add a migration function
123+
# when the schema changes. See MIGRATIONS below.
124+
SCHEMA_VERSION = 1
125+
126+
# List of (target_version, migration_function) tuples.
127+
# Each function receives a sqlite3.Connection and mutates the schema.
128+
MIGRATIONS: list[tuple[int, Callable[[sqlite3.Connection], None]]] = []
129+
102130

103131
class ContentStore:
104132
"""SQLite store for document content, FTS5 search, config, and query logs."""
@@ -121,8 +149,32 @@ def __init__(self, path: Path | None = None):
121149
self._init_schema()
122150

123151
def _init_schema(self) -> None:
124-
self._db.executescript(SCHEMA_SQL)
125-
self._db.commit()
152+
current_version = self._db.execute("PRAGMA user_version").fetchone()[0]
153+
154+
if current_version == 0:
155+
# Check if tables already exist (pre-migration install)
156+
has_tables = self._db.execute(
157+
"SELECT name FROM sqlite_master WHERE type='table' AND name='documents'"
158+
).fetchone() is not None
159+
160+
if not has_tables:
161+
# Brand new database: create full schema
162+
self._db.executescript(SCHEMA_SQL)
163+
self._db.commit()
164+
165+
# Stamp as version 1 (baseline)
166+
self._db.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
167+
self._db.commit()
168+
current_version = SCHEMA_VERSION
169+
170+
# Run any pending migrations
171+
for target_version, migrate_fn in MIGRATIONS:
172+
if current_version < target_version:
173+
migrate_fn(self._db)
174+
self._db.execute(f"PRAGMA user_version = {target_version}")
175+
self._db.commit()
176+
current_version = target_version
177+
logger.info("Migrated schema to version %d", target_version)
126178

127179
def close(self) -> None:
128180
self._db.close()
@@ -213,6 +265,21 @@ def update(self, doc_id: str, **updates: Any) -> bool:
213265
return True
214266

215267
updates["updated_at"] = datetime.now(UTC).isoformat()
268+
269+
# Validate column names against allowlist
270+
invalid_keys = set(updates.keys()) - UPDATABLE_COLUMNS
271+
if invalid_keys:
272+
immutable = invalid_keys & _IMMUTABLE_COLUMNS
273+
if immutable:
274+
raise StoreError(
275+
f"Cannot update immutable column(s): {', '.join(sorted(immutable))}",
276+
context={"columns": sorted(immutable)},
277+
)
278+
raise StoreError(
279+
f"Unknown column(s): {', '.join(sorted(invalid_keys))}",
280+
context={"columns": sorted(invalid_keys)},
281+
)
282+
216283
set_clause = ", ".join(f"{k} = ?" for k in updates)
217284
values = [*updates.values(), doc_id]
218285

@@ -361,6 +428,21 @@ def get_embedding(self, doc_id: str) -> bytes | None:
361428
).fetchone()
362429
return row["embedding"] if row else None
363430

431+
def get_all_embeddings(self, *, limit: int = 10000) -> list[dict[str, Any]]:
432+
"""Return all embeddings for similarity search.
433+
434+
Args:
435+
limit: Maximum number of embeddings to return (safety cap).
436+
437+
Returns:
438+
List of dicts with keys: doc_id, embedding (bytes), dimensions (int).
439+
"""
440+
rows = self._db.execute(
441+
"SELECT doc_id, embedding, dimensions FROM embeddings LIMIT ?",
442+
(limit,),
443+
).fetchall()
444+
return [dict(r) for r in rows]
445+
364446
# -------------------------------------------------------------------------
365447
# Config
366448
# -------------------------------------------------------------------------

src/cortex/db/graph_store.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -897,17 +897,19 @@ def create_entity(
897897

898898
def _find_entity_by_name(self, name: str) -> str | None:
899899
"""Find entity by name (case-insensitive)."""
900+
safe_name = name.replace("\\", "\\\\").replace('"', '\\"')
900901
query = f"""
901902
{SPARQL_PREFIXES}
902-
SELECT ?s ?name WHERE {{
903+
SELECT ?s WHERE {{
903904
?s a cortex:Entity .
904905
?s cortex:entityName ?name .
906+
FILTER(LCASE(?name) = LCASE("{safe_name}"))
905907
}}
908+
LIMIT 1
906909
"""
907910
for row in self._store.query(query):
908-
if row["name"].value.lower() == name.lower():
909-
subj = str(row["s"].value)
910-
return subj.split("/")[-1] if "entity/" in subj else subj
911+
subj = str(row["s"].value)
912+
return subj.split("/")[-1] if "entity/" in subj else subj
911913
return None
912914

913915
def add_mention(self, *, obj_id: str, entity_id: str) -> None:
@@ -956,6 +958,18 @@ def list_entities(self, entity_type: str | None = None) -> list[dict[str, str]]:
956958
)
957959
return entities
958960

961+
def count_entities(self) -> int:
962+
"""Return the total number of entities without materializing the full list."""
963+
query = f"""
964+
{SPARQL_PREFIXES}
965+
SELECT (COUNT(DISTINCT ?s) AS ?count) WHERE {{
966+
?s a cortex:Entity .
967+
}}
968+
"""
969+
for row in self._store.query(query):
970+
return int(row["count"].value)
971+
return 0
972+
959973
# -------------------------------------------------------------------------
960974
# SPARQL Query
961975
# -------------------------------------------------------------------------

src/cortex/db/store.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,14 @@ def update(self, obj_id: str, **updates: Any) -> bool:
151151
except NotFoundError:
152152
pass # Graph might not have all properties — OK
153153
except Exception as e:
154-
logger.warning("Graph update failed for %s: %s", obj_id, e)
154+
logger.warning(
155+
"Graph update failed for %s (SQLite update succeeded): %s",
156+
obj_id, e,
157+
)
158+
raise SyncError(
159+
f"SQLite updated but graph update failed for {obj_id}",
160+
cause=e,
161+
) from e
155162

156163
return True
157164

@@ -239,5 +246,5 @@ def status(self) -> dict[str, Any]:
239246
"graph_triples": self.graph.triple_count,
240247
"counts_by_type": content_counts,
241248
"graph_counts_by_type": graph_counts,
242-
"entities": len(self.graph.list_entities()),
249+
"entities": self.graph.count_entities(),
243250
}

src/cortex/pipeline/enrich.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,13 @@ def run(self, obj_id: str) -> dict[str, Any]:
4747
}
4848
try:
4949
self.store.content.update(obj_id, **updates)
50+
except Exception as e:
51+
logger.warning("Content update failed during enrichment for %s: %s", obj_id, e)
52+
53+
try:
5054
self.store.graph.update_object(obj_id, tier=tier)
5155
except Exception as e:
52-
logger.warning("Failed to update enrichment for %s: %s", obj_id, e)
56+
logger.warning("Graph tier update failed during enrichment for %s: %s", obj_id, e)
5357

5458
return {
5559
"status": "enriched",
@@ -101,16 +105,24 @@ def promote_to_reflex(self, obj_id: str) -> bool:
101105
"""Explicitly promote an object to the reflex tier."""
102106
try:
103107
self.store.content.update(obj_id, tier="reflex")
104-
self.store.graph.update_object(obj_id, tier="reflex")
105-
return True
106-
except Exception:
108+
except Exception as e:
109+
logger.warning("Failed to promote %s to reflex in content store: %s", obj_id, e)
107110
return False
111+
try:
112+
self.store.graph.update_object(obj_id, tier="reflex")
113+
except Exception as e:
114+
logger.warning("Failed to promote %s to reflex in graph store: %s", obj_id, e)
115+
return True
108116

109117
def demote_from_reflex(self, obj_id: str) -> bool:
110118
"""Demote an object from reflex back to recall."""
111119
try:
112120
self.store.content.update(obj_id, tier="recall")
113-
self.store.graph.update_object(obj_id, tier="recall")
114-
return True
115-
except Exception:
121+
except Exception as e:
122+
logger.warning("Failed to demote %s from reflex in content store: %s", obj_id, e)
116123
return False
124+
try:
125+
self.store.graph.update_object(obj_id, tier="recall")
126+
except Exception as e:
127+
logger.warning("Failed to demote %s from reflex in graph store: %s", obj_id, e)
128+
return True

src/cortex/pipeline/link.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from __future__ import annotations
77

8-
import contextlib
98
from typing import Any
109

1110
from cortex.core.constants import RELATIONSHIP_TYPES
@@ -44,8 +43,10 @@ def run(
4443
relationships = self._discover_relationships(obj_id)
4544

4645
# Step 3: Update pipeline stage
47-
with contextlib.suppress(Exception):
46+
try:
4847
self.store.content.update(obj_id, pipeline_stage="linked")
48+
except Exception as e:
49+
logger.warning("Failed to update pipeline stage to 'linked' for %s: %s", obj_id, e)
4950

5051
return {
5152
"status": "linked",

src/cortex/pipeline/normalize.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
from __future__ import annotations
1111

12-
import contextlib
1312
import struct
1413
from typing import Any
1514

@@ -99,8 +98,10 @@ def run(self, obj_id: str) -> dict[str, Any]:
9998
graph_updates.update(
10099
{k: v for k, v in classification["properties"].items() if isinstance(v, str)}
101100
)
102-
with contextlib.suppress(Exception):
101+
try:
103102
self.store.graph.update_object(obj_id, **graph_updates)
103+
except Exception as e:
104+
logger.warning("Graph update failed during normalization for %s: %s", obj_id, e)
104105

105106
# Step 3: Generate embedding
106107
self._generate_embedding(obj_id, title, content)

0 commit comments

Comments
 (0)