Skip to content

Commit fe15fd5

Browse files
DavidsonGomesclaude
andcommitted
fix(knowledge): port api_keys to SQLAlchemy and hard-delete on revoke
api_keys.py opened a raw sqlite3 connection by stripping "sqlite:///" off SQLALCHEMY_DATABASE_URI, which is a no-op in PG mode — every API-keys endpoint 500'd with "unable to open database file". Same root cause as 3792854; this module was missed in that pass. Port the module to SQLAlchemy text() with named placeholders, splitting the SQLite-only executescript into separate CREATE TABLE / CREATE INDEX calls, computing "now" in Python instead of datetime('now'), and dropping the PRAGMA lines. Engine is resolved via models.db.engine inside Flask and falls back to create_engine(SQLALCHEMY_DATABASE_URI) for CLI/workers. revoke_api_key now hard-deletes the row. The previous soft-delete left revoked keys visible in the UI (list_api_keys didn't filter expired), so the trash icon appeared to do nothing on first click and returned 404 on second click. Hard delete matches the UX of the icon. Migration 001 also corrected: knowledge_api_keys does not belong in the connection-scoped Postgres (that's where pgvector lives), only the counter table knowledge_api_usage does. Drop the table from the migration and the FK on api_key_id — the keys are now host-DB only. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8b8e797 commit fe15fd5

2 files changed

Lines changed: 128 additions & 121 deletions

File tree

dashboard/backend/knowledge/api_keys.py

Lines changed: 122 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,64 @@
55
The prefix is stored as plain-text for O(1) lookup; only the full token is
66
bcrypt-hashed (rounds=12). The plain token is returned exactly once, at
77
creation time.
8+
9+
Backend-portable: uses SQLAlchemy with named placeholders so the same code
10+
path works on the dashboard's SQLite backend and on the Postgres backend
11+
selected via ``SQLALCHEMY_DATABASE_URI`` / ``DATABASE_URL``.
812
"""
913

1014
from __future__ import annotations
1115

16+
import json
1217
import os
1318
import secrets
14-
import sqlite3
19+
import threading
1520
import uuid
1621
from base64 import urlsafe_b64encode
1722
from datetime import datetime, timezone
1823
from typing import Any
1924

2025
import bcrypt
26+
from sqlalchemy import create_engine, text
27+
from sqlalchemy.engine import Engine
2128

2229
# ---------------------------------------------------------------------------
23-
# DB path helpers — mirrors app.py's resolution so both use the same file.
30+
# Engine resolution — prefer Flask's shared engine, fall back to env URI.
2431
# ---------------------------------------------------------------------------
2532

26-
def _db_path() -> str:
27-
# Reuse the single source of truth so we never drift from app.py / get_dsn.
28-
from knowledge.connection_pool import _resolve_sqlite_db_path
29-
return _resolve_sqlite_db_path()
33+
_fallback_engine: Engine | None = None
34+
_fallback_uri: str | None = None
35+
_engine_lock = threading.Lock()
36+
37+
38+
def _get_engine() -> Engine:
39+
"""Return a SQLAlchemy Engine for the host DB.
3040
41+
Order of resolution:
42+
1. Flask app's ``models.db.engine`` when inside an app context.
43+
2. A process-wide engine built from ``SQLALCHEMY_DATABASE_URI`` env var
44+
(used by CLI/worker/test contexts).
45+
"""
46+
try:
47+
from flask import current_app # noqa: F401 — only used to check ctx
48+
from models import db
49+
return db.engine
50+
except Exception:
51+
pass
52+
53+
uri = os.environ.get("SQLALCHEMY_DATABASE_URI", "").strip()
54+
if not uri:
55+
raise RuntimeError(
56+
"No SQLAlchemy engine available: outside Flask app context and "
57+
"SQLALCHEMY_DATABASE_URI is unset."
58+
)
3159

32-
def _connect() -> sqlite3.Connection:
33-
conn = sqlite3.connect(_db_path())
34-
conn.row_factory = sqlite3.Row
35-
conn.execute("PRAGMA journal_mode=WAL")
36-
conn.execute("PRAGMA foreign_keys=ON")
37-
return conn
60+
global _fallback_engine, _fallback_uri
61+
with _engine_lock:
62+
if _fallback_engine is None or _fallback_uri != uri:
63+
_fallback_engine = create_engine(uri, future=True)
64+
_fallback_uri = uri
65+
return _fallback_engine
3866

3967

4068
# ---------------------------------------------------------------------------
@@ -60,10 +88,10 @@ def _generate_token() -> tuple[str, str, str]:
6088

6189

6290
# ---------------------------------------------------------------------------
63-
# Ensure table exists
91+
# Ensure table exists (idempotent, portable across SQLite + Postgres).
6492
# ---------------------------------------------------------------------------
6593

66-
_CREATE_TABLE = """
94+
_CREATE_TABLE_SQL = """
6795
CREATE TABLE IF NOT EXISTS knowledge_api_keys (
6896
id TEXT PRIMARY KEY,
6997
name TEXT,
@@ -77,26 +105,37 @@ def _generate_token() -> tuple[str, str, str]:
77105
created_at TEXT NOT NULL,
78106
last_used_at TEXT,
79107
expires_at TEXT
80-
);
81-
CREATE INDEX IF NOT EXISTS idx_kak_prefix ON knowledge_api_keys(prefix);
108+
)
82109
"""
83110

111+
_CREATE_INDEX_SQL = "CREATE INDEX IF NOT EXISTS idx_kak_prefix ON knowledge_api_keys(prefix)"
112+
84113

85114
def ensure_table() -> None:
86-
"""Idempotent — safe to call multiple times."""
87-
with _connect() as conn:
88-
conn.executescript(_CREATE_TABLE)
115+
"""Idempotent — safe to call multiple times. Creates table on first use
116+
when the Alembic/host-DB migration hasn't been applied yet."""
117+
engine = _get_engine()
118+
with engine.begin() as conn:
119+
conn.execute(text(_CREATE_TABLE_SQL))
120+
conn.execute(text(_CREATE_INDEX_SQL))
89121

90122

91123
# ---------------------------------------------------------------------------
92124
# CRUD
93125
# ---------------------------------------------------------------------------
94126

95127
def _now() -> str:
96-
# Use SQLite-compatible format for datetime comparisons (datetime('now') returns this format)
97128
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
98129

99130

131+
def _row_to_dict(row) -> dict[str, Any]:
132+
"""Convert a SQLAlchemy Row to a plain dict and decode JSON fields."""
133+
d = dict(row._mapping)
134+
d["space_ids"] = json.loads(d["space_ids"]) if d.get("space_ids") else []
135+
d["scopes"] = json.loads(d["scopes"]) if d.get("scopes") else []
136+
return d
137+
138+
100139
def create_api_key(
101140
*,
102141
name: str | None,
@@ -111,96 +150,84 @@ def create_api_key(
111150
112151
Returns ``(row_dict, plain_token)``. ``plain_token`` is shown once only.
113152
"""
114-
import json
115-
116153
ensure_table()
117154
full_token, prefix, token_hash = _generate_token()
118155
key_id = str(uuid.uuid4())
119156
now = _now()
120157
space_ids = space_ids or []
121158
scopes = scopes or ["read"]
122159

123-
with _connect() as conn:
160+
engine = _get_engine()
161+
with engine.begin() as conn:
124162
conn.execute(
125-
"""
126-
INSERT INTO knowledge_api_keys
127-
(id, name, prefix, token_hash, connection_id, space_ids, scopes,
128-
rate_limit_per_min, rate_limit_per_day, created_at, expires_at)
129-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
130-
""",
131-
(
132-
key_id,
133-
name,
134-
prefix,
135-
token_hash,
136-
connection_id,
137-
json.dumps(space_ids),
138-
json.dumps(scopes),
139-
rate_limit_per_min,
140-
rate_limit_per_day,
141-
now,
142-
expires_at,
163+
text(
164+
"""
165+
INSERT INTO knowledge_api_keys
166+
(id, name, prefix, token_hash, connection_id, space_ids, scopes,
167+
rate_limit_per_min, rate_limit_per_day, created_at, expires_at)
168+
VALUES (:id, :name, :prefix, :token_hash, :connection_id, :space_ids, :scopes,
169+
:rate_limit_per_min, :rate_limit_per_day, :created_at, :expires_at)
170+
"""
143171
),
172+
{
173+
"id": key_id,
174+
"name": name,
175+
"prefix": prefix,
176+
"token_hash": token_hash,
177+
"connection_id": connection_id,
178+
"space_ids": json.dumps(space_ids),
179+
"scopes": json.dumps(scopes),
180+
"rate_limit_per_min": rate_limit_per_min,
181+
"rate_limit_per_day": rate_limit_per_day,
182+
"created_at": now,
183+
"expires_at": expires_at,
184+
},
144185
)
145186

146187
row = get_api_key(key_id)
147188
return row, full_token # type: ignore[return-value]
148189

149190

150191
def get_api_key(key_id: str) -> dict[str, Any] | None:
151-
import json
152-
153192
ensure_table()
154-
with _connect() as conn:
193+
engine = _get_engine()
194+
with engine.connect() as conn:
155195
row = conn.execute(
156-
"SELECT * FROM knowledge_api_keys WHERE id = ?", (key_id,)
196+
text("SELECT * FROM knowledge_api_keys WHERE id = :id"),
197+
{"id": key_id},
157198
).fetchone()
158-
if row is None:
159-
return None
160-
d = dict(row)
161-
d["space_ids"] = json.loads(d["space_ids"])
162-
d["scopes"] = json.loads(d["scopes"])
163-
return d
199+
return _row_to_dict(row) if row else None
164200

165201

166202
def list_api_keys(connection_id: str | None = None) -> list[dict[str, Any]]:
167-
import json
168-
169203
ensure_table()
170-
with _connect() as conn:
204+
engine = _get_engine()
205+
with engine.connect() as conn:
171206
if connection_id:
172207
rows = conn.execute(
173-
"SELECT * FROM knowledge_api_keys WHERE connection_id = ? ORDER BY created_at DESC",
174-
(connection_id,),
208+
text(
209+
"SELECT * FROM knowledge_api_keys "
210+
"WHERE connection_id = :cid ORDER BY created_at DESC"
211+
),
212+
{"cid": connection_id},
175213
).fetchall()
176214
else:
177215
rows = conn.execute(
178-
"SELECT * FROM knowledge_api_keys ORDER BY created_at DESC"
216+
text("SELECT * FROM knowledge_api_keys ORDER BY created_at DESC")
179217
).fetchall()
180-
result = []
181-
for row in rows:
182-
d = dict(row)
183-
d["space_ids"] = json.loads(d["space_ids"])
184-
d["scopes"] = json.loads(d["scopes"])
185-
result.append(d)
186-
return result
218+
return [_row_to_dict(r) for r in rows]
187219

188220

189221
def revoke_api_key(key_id: str) -> bool:
190-
"""Soft-delete by setting ``expires_at`` to 1 second in the past. Returns True if the key existed."""
191-
from datetime import timedelta as _td
192-
222+
"""Hard-delete the API key row. Returns True if a row was deleted."""
193223
ensure_table()
194-
# Set expires_at 1 second in the past so SQLite's datetime('now') comparison reliably excludes it.
195-
past = (datetime.now(timezone.utc).replace(microsecond=0) - _td(seconds=1)).strftime(
196-
"%Y-%m-%d %H:%M:%S"
197-
)
198-
with _connect() as conn:
199-
cur = conn.execute(
200-
"UPDATE knowledge_api_keys SET expires_at = ? WHERE id = ? AND (expires_at IS NULL OR expires_at > ?)",
201-
(past, key_id, past),
224+
engine = _get_engine()
225+
with engine.begin() as conn:
226+
result = conn.execute(
227+
text("DELETE FROM knowledge_api_keys WHERE id = :id"),
228+
{"id": key_id},
202229
)
203-
return cur.rowcount > 0
230+
return result.rowcount > 0
204231

205232

206233
def verify_token(bearer: str) -> dict[str, Any] | None:
@@ -209,8 +236,6 @@ def verify_token(bearer: str) -> dict[str, Any] | None:
209236
Returns the api_key row if valid and not expired, else None.
210237
Uses prefix-first lookup (O(1)) then a single bcrypt.checkpw call.
211238
"""
212-
import json
213-
214239
if not bearer.startswith("evo_k_"):
215240
return None
216241
rest = bearer[len("evo_k_"):]
@@ -220,31 +245,35 @@ def verify_token(bearer: str) -> dict[str, Any] | None:
220245
prefix = parts[0]
221246

222247
ensure_table()
223-
with _connect() as conn:
248+
# Compute "now" in Python so the WHERE clause is portable across SQLite + PG.
249+
now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
250+
251+
engine = _get_engine()
252+
with engine.connect() as conn:
224253
rows = conn.execute(
225-
"""
226-
SELECT * FROM knowledge_api_keys
227-
WHERE prefix = ?
228-
AND (expires_at IS NULL OR expires_at > datetime('now'))
229-
""",
230-
(prefix,),
254+
text(
255+
"SELECT * FROM knowledge_api_keys "
256+
"WHERE prefix = :prefix "
257+
" AND (expires_at IS NULL OR expires_at > :now)"
258+
),
259+
{"prefix": prefix, "now": now_str},
231260
).fetchall()
232261

233262
for row in rows:
234-
d = dict(row)
263+
d = _row_to_dict(row)
235264
try:
236265
if bcrypt.checkpw(bearer.encode(), d["token_hash"].encode()):
237-
# Update last_used_at (fire and forget, best-effort)
238266
try:
239-
with _connect() as conn:
240-
conn.execute(
241-
"UPDATE knowledge_api_keys SET last_used_at = ? WHERE id = ?",
242-
(_now(), d["id"]),
267+
with engine.begin() as wconn:
268+
wconn.execute(
269+
text(
270+
"UPDATE knowledge_api_keys SET last_used_at = :now "
271+
"WHERE id = :id"
272+
),
273+
{"now": _now(), "id": d["id"]},
243274
)
244275
except Exception:
245276
pass
246-
d["space_ids"] = json.loads(d["space_ids"])
247-
d["scopes"] = json.loads(d["scopes"])
248277
return d
249278
except Exception:
250279
continue

dashboard/backend/knowledge/migrations/versions/001_initial_schema.py

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -270,41 +270,20 @@ def upgrade() -> None:
270270
sa.text("CREATE INDEX idx_chunks_unit ON knowledge_chunks(unit_id)")
271271
)
272272

273-
# ------------------------------------------------------------------
274-
# knowledge_api_keys
275-
# ------------------------------------------------------------------
276-
op.create_table(
277-
"knowledge_api_keys",
278-
sa.Column(
279-
"id",
280-
postgresql.UUID(as_uuid=True),
281-
primary_key=True,
282-
server_default=sa.text("gen_random_uuid()"),
283-
),
284-
sa.Column("token_hash", sa.Text(), nullable=False),
285-
sa.Column("name", sa.Text()),
286-
sa.Column("space_ids", postgresql.ARRAY(postgresql.UUID(as_uuid=True))),
287-
sa.Column("scopes", postgresql.ARRAY(sa.Text())),
288-
sa.Column("rate_limit_per_min", sa.Integer(), server_default="60"),
289-
sa.Column("rate_limit_per_day", sa.Integer(), server_default="10000"),
290-
sa.Column(
291-
"created_at",
292-
sa.TIMESTAMP(timezone=True),
293-
server_default=sa.text("now()"),
294-
),
295-
sa.Column("last_used_at", sa.TIMESTAMP(timezone=True)),
296-
sa.Column("expires_at", sa.TIMESTAMP(timezone=True)),
297-
)
298-
299273
# ------------------------------------------------------------------
300274
# knowledge_api_usage — fixed window rate limiting counter
275+
#
276+
# NOTE: knowledge_api_keys does NOT live in this database. The keys are
277+
# the dashboard's source of truth and are managed in the host DB (SQLite
278+
# or Postgres reached via DATABASE_URL) by knowledge.api_keys.
279+
# This connection-scoped Postgres only stores the per-window counter;
280+
# api_key_id is therefore a free UUID without a foreign key.
301281
# ------------------------------------------------------------------
302282
op.create_table(
303283
"knowledge_api_usage",
304284
sa.Column(
305285
"api_key_id",
306286
postgresql.UUID(as_uuid=True),
307-
sa.ForeignKey("knowledge_api_keys.id", ondelete="CASCADE"),
308287
nullable=False,
309288
),
310289
sa.Column("window_start", sa.TIMESTAMP(timezone=True), nullable=False),
@@ -365,7 +344,6 @@ def upgrade() -> None:
365344
def downgrade() -> None:
366345
op.drop_table("knowledge_classify_queue")
367346
op.drop_table("knowledge_api_usage")
368-
op.drop_table("knowledge_api_keys")
369347
op.get_bind().execute(sa.text("DROP TABLE IF EXISTS knowledge_chunks"))
370348
op.drop_table("knowledge_documents")
371349
op.drop_table("knowledge_units")

0 commit comments

Comments
 (0)