Skip to content

Commit 816ee85

Browse files
phernandezclaude
andauthored
fix(core): prevent asyncpg engine-dispose crash on Postgres backend (#902)
Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 3ba3a95 commit 816ee85

11 files changed

Lines changed: 280 additions & 12 deletions

File tree

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ dependencies = [
5151
"litellm>=1.60.0,<2.0.0",
5252
"logfire>=4.19.0",
5353
"psutil>=5.9.0",
54+
# uvloop's C event loop has no self._ready.popleft() codepath, so the
55+
# asyncpg engine-dispose race ("IndexError: pop from an empty deque") that
56+
# crashes the Postgres backend cannot fire under it. Not available on Windows.
57+
"uvloop>=0.21.0; sys_platform != 'win32'",
5458
]
5559

5660
[project.urls]

src/basic_memory/alembic/env.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22

33
import asyncio
44
import os
5+
from contextlib import suppress
56
from logging.config import fileConfig
67

8+
from loguru import logger
9+
710
# Allow nested event loops (needed for pytest-asyncio and other async contexts)
811
# Note: nest_asyncio doesn't work with uvloop or Python 3.14+, so we handle those cases separately
912
import sys
@@ -13,9 +16,15 @@
1316
import nest_asyncio
1417

1518
nest_asyncio.apply()
16-
except (ImportError, ValueError):
17-
# nest_asyncio not available or can't patch this loop type (e.g., uvloop)
18-
pass
19+
except (ImportError, ValueError) as exc:
20+
# Trigger: nest_asyncio is absent (ImportError) or refuses to patch the
21+
# running loop (ValueError, e.g. the uvloop policy installed for the
22+
# Postgres backend - #831/#877).
23+
# Why: the uvloop ValueError is now an *expected* path on every Postgres
24+
# startup, so swallowing it silently hides a routine branch.
25+
# Outcome: log at DEBUG (observable, not noisy) and fall through to the
26+
# thread-based migration fallback.
27+
logger.debug(f"nest_asyncio not applied ({exc!r}); using thread-based migration fallback")
1928
# For Python 3.14+, we rely on the thread-based fallback in run_migrations_online()
2029

2130
from sqlalchemy import engine_from_config, pool
@@ -115,7 +124,15 @@ async def run_async_migrations(connectable):
115124
"""Run migrations asynchronously with AsyncEngine."""
116125
async with connectable.connect() as connection:
117126
await connection.run_sync(do_run_migrations)
118-
await connectable.dispose()
127+
# Trigger: startup migrations on the asyncpg backend dispose the engine
128+
# while the event loop may be tearing down around them.
129+
# Why: that race surfaces "IndexError: pop from an empty deque" from
130+
# base_events._run_once (#831/#877); shielding lets dispose finish atomically
131+
# and suppressing CancelledError keeps a cancelled teardown from re-raising it.
132+
# Outcome: the migration engine always disposes cleanly. (uvloop is the
133+
# structural fix for the race; this hardens the teardown path.)
134+
with suppress(asyncio.CancelledError):
135+
await asyncio.shield(connectable.dispose())
119136

120137

121138
def _run_async_migrations_with_asyncio_run(connectable) -> None:

src/basic_memory/cli/app.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ def app_callback(
5757
container = CliContainer.create()
5858
set_container(container)
5959

60+
# Trigger: Postgres backend resolved at CLI startup, before any asyncio.run().
61+
# Why: uvloop must own the event-loop policy before the loop is created so the
62+
# asyncpg engine-dispose race (#831/#877) cannot fire. No-op for SQLite.
63+
# Outcome: subsequent asyncio.run() calls in CLI commands use uvloop on Postgres.
64+
from basic_memory.db import maybe_install_uvloop
65+
66+
maybe_install_uvloop(container.config)
67+
6068
# Trigger: first-run init confirmation before command output.
6169
# Why: informational "initialized" message belongs above command results, not in the upsell panel.
6270
# Outcome: one-time plain line printed before the subcommand runs.

src/basic_memory/cli/commands/mcp.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,18 @@ def _run_background_auto_update() -> None:
9898
if transport == "stdio":
9999
threading.Thread(target=_run_background_auto_update, daemon=True).start()
100100

101+
# Trigger: MCP server startup on the Postgres backend, before the transport
102+
# creates its event loop.
103+
# Why: the watcher/lifespan path runs startup migrations + engine.dispose() on
104+
# asyncpg, which races stdlib asyncio teardown and crashes the container loop
105+
# (#831/#877). uvloop's C scheduler structurally avoids that race and must own
106+
# the loop policy before the loop is created. No-op for SQLite.
107+
# Outcome: `basic-memory mcp` on Postgres runs on uvloop. (The CLI callback also
108+
# installs it; this keeps the server startup seam explicit and self-contained.)
109+
from basic_memory.db import maybe_install_uvloop
110+
111+
maybe_install_uvloop(ConfigManager().config)
112+
101113
# Run the MCP server (blocks)
102114
# Lifespan handles: initialization, migrations, file sync, cleanup
103115
logger.info(f"Starting MCP server with {transport.upper()} transport")

src/basic_memory/db.py

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import asyncio
22
import os
33
import sys
4-
from contextlib import asynccontextmanager
4+
from contextlib import asynccontextmanager, suppress
55
from enum import Enum, auto
66
from pathlib import Path
77
from typing import AsyncGenerator, Optional
@@ -39,6 +39,44 @@
3939
if sys.platform == "win32": # pragma: no cover
4040
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
4141

42+
43+
def maybe_install_uvloop(config: BasicMemoryConfig) -> bool:
44+
"""Install the uvloop event-loop policy for the Postgres backend.
45+
46+
Trigger: process entrypoint starting with database_backend == postgres,
47+
uvloop importable, and a non-Windows platform.
48+
Why: asyncpg engine teardown (engine.dispose()) races the stdlib asyncio
49+
loop shutdown and surfaces "IndexError: pop from an empty deque" from
50+
base_events._run_once (see #831/#877). uvloop's C scheduler has no
51+
self._ready.popleft() codepath, so that class of crash cannot fire under it.
52+
Outcome: Postgres deployments run on uvloop; SQLite users keep the default
53+
loop (no behavior change, smaller blast radius). Must run before the event
54+
loop is created, i.e. before asyncio.run().
55+
56+
Returns:
57+
True if the uvloop policy was installed, False otherwise.
58+
"""
59+
# uvloop is not available on Windows; the default loop already differs there.
60+
if sys.platform == "win32": # pragma: no cover
61+
return False
62+
63+
# Limit the change to the backend that actually hits the asyncpg dispose race.
64+
if config.database_backend != DatabaseBackend.POSTGRES:
65+
return False
66+
67+
# Deferred import: uvloop is an optional, platform-gated dependency and the
68+
# default (SQLite) path must not require it to be installed.
69+
try:
70+
import uvloop
71+
except ImportError: # pragma: no cover
72+
logger.warning("uvloop not available - using default event loop for Postgres backend")
73+
return False
74+
75+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
76+
logger.info("Installed uvloop event-loop policy for Postgres backend")
77+
return True
78+
79+
4280
# Module level state
4381
_engine: Optional[AsyncEngine] = None
4482
_session_maker: Optional[async_sessionmaker[AsyncSession]] = None
@@ -320,7 +358,15 @@ async def shutdown_db() -> None: # pragma: no cover
320358
global _engine, _session_maker
321359

322360
if _engine:
323-
await _engine.dispose()
361+
# Trigger: teardown can run while the surrounding task is being cancelled
362+
# (e.g. lifespan shutdown, unshielded CLI cleanup).
363+
# Why: a cancellation landing mid-dispose surfaces the asyncpg
364+
# "IndexError: pop from an empty deque" race (#831/#877); shielding lets
365+
# dispose finish atomically, and suppressing CancelledError keeps a
366+
# cancelled shutdown from re-raising the underlying race.
367+
# Outcome: connections always close cleanly even under cancellation.
368+
with suppress(asyncio.CancelledError):
369+
await asyncio.shield(_engine.dispose())
324370
_engine = None
325371
_session_maker = None
326372

@@ -364,7 +410,14 @@ async def engine_session_factory(
364410

365411
yield created_engine, created_session_maker
366412
finally:
367-
await created_engine.dispose()
413+
# Trigger: context-manager teardown can run while the surrounding task is
414+
# being cancelled (e.g. a test aborting mid-fixture).
415+
# Why: on the asyncpg backend a cancellation landing mid-dispose surfaces
416+
# the "IndexError: pop from an empty deque" race (#831/#877); shield the
417+
# dispose and suppress CancelledError to match the other dispose seams.
418+
# Outcome: the per-context engine always disposes cleanly under cancellation.
419+
with suppress(asyncio.CancelledError):
420+
await asyncio.shield(created_engine.dispose())
368421

369422
# Only clear module-level globals if they still point to this context's
370423
# engine/session. This avoids clobbering newer globals from other callers.
@@ -433,7 +486,11 @@ async def run_migrations(
433486
# Trigger: run_migrations() created a temporary engine while module-level
434487
# session maker was not initialized.
435488
# Why: temporary aiosqlite worker threads can outlive CLI command execution
436-
# and block process shutdown if the engine is not disposed.
437-
# Outcome: always dispose temporary engines after migration work completes.
489+
# and block process shutdown if the engine is not disposed. On the asyncpg
490+
# backend a cancellation landing mid-dispose surfaces the same "IndexError:
491+
# pop from an empty deque" race as the other dispose seams (#831/#877), so
492+
# shield the dispose and suppress CancelledError to match them.
493+
# Outcome: always dispose temporary engines cleanly, even under cancellation.
438494
if temp_engine is not None:
439-
await temp_engine.dispose()
495+
with suppress(asyncio.CancelledError):
496+
await asyncio.shield(temp_engine.dispose())
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
"""Integration test for the asyncpg engine-dispose race (issue #831 / #877).
2+
3+
On the Postgres backend (``postgresql+asyncpg``), the "open async engine -> work
4+
-> await engine.dispose()" cycle could crash with
5+
``IndexError: pop from an empty deque`` raised from SQLAlchemy's async dispose
6+
machinery as it races event-loop teardown. This made Postgres + file watcher
7+
unusable (container restart loop).
8+
9+
These tests exercise the real create-engine -> query -> dispose cycle against a
10+
live Postgres instance (testcontainers) for many iterations. The race is
11+
timing-dependent, so we loop enough to be meaningful and assert clean
12+
completion. They are skipped on SQLite, which is unaffected.
13+
14+
Run with:
15+
BASIC_MEMORY_TEST_POSTGRES=1 uv run pytest \
16+
test-int/test_postgres_dispose_cycle.py -q
17+
"""
18+
19+
import pytest
20+
from sqlalchemy import text
21+
22+
from basic_memory import db
23+
from basic_memory.config import DatabaseBackend
24+
from basic_memory.db import DatabaseType, _create_postgres_engine
25+
26+
27+
@pytest.mark.asyncio
28+
async def test_create_query_dispose_cycle_is_clean(app_config, db_backend):
29+
"""Repeatedly create -> query -> dispose an asyncpg engine without crashing."""
30+
if db_backend != "postgres":
31+
pytest.skip("Postgres-specific test - asyncpg dispose race does not affect SQLite")
32+
33+
db_url = DatabaseType.get_db_url(app_config.database_path, DatabaseType.POSTGRES, app_config)
34+
35+
# Loop enough iterations to make the timing-dependent dispose race observable.
36+
for _ in range(50):
37+
engine = _create_postgres_engine(db_url, app_config)
38+
async with engine.connect() as conn:
39+
result = await conn.execute(text("SELECT 1"))
40+
assert result.scalar() == 1
41+
# This dispose is the call site that raced loop teardown in #831/#877.
42+
await engine.dispose()
43+
44+
45+
@pytest.mark.asyncio
46+
async def test_shutdown_db_dispose_is_clean(app_config, config_manager, db_backend):
47+
"""Repeated get_or_create_db -> query -> shutdown_db cycles complete cleanly.
48+
49+
Exercises the shielded ``shutdown_db`` teardown path against asyncpg. The
50+
autouse ``cleanup_global_db_after_test`` fixture in conftest restores module
51+
state afterwards.
52+
"""
53+
if db_backend != "postgres":
54+
pytest.skip("Postgres-specific test - asyncpg dispose race does not affect SQLite")
55+
56+
assert app_config.database_backend == DatabaseBackend.POSTGRES
57+
58+
for _ in range(25):
59+
engine, session_maker = await db.get_or_create_db(
60+
app_config.database_path,
61+
db_type=DatabaseType.POSTGRES,
62+
ensure_migrations=False,
63+
config=app_config,
64+
)
65+
async with db.scoped_session(session_maker) as session:
66+
result = await session.execute(text("SELECT 1"))
67+
assert result.scalar() == 1
68+
# Shielded dispose - must not surface the asyncpg deque race.
69+
await db.shutdown_db()

tests/cli/test_cli_exit.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ def test_bm_version_exits_cleanly():
1515
["uv", "run", "bm", "--version"],
1616
capture_output=True,
1717
text=True,
18-
timeout=10,
18+
# `uv run` startup under full-suite load (especially on Windows runners)
19+
# can exceed a tight 10s budget even though --version short-circuits
20+
# before any heavy work. This test guards against hangs, not a startup
21+
# performance budget, so match the looser --help timeout below.
22+
timeout=20,
1923
cwd=Path(__file__).parent.parent.parent, # Project root
2024
)
2125
assert result.returncode == 0

tests/cli/test_cli_telemetry.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
from types import SimpleNamespace
56
from typing import Any, cast
67

78
import logfire
@@ -29,8 +30,16 @@ def test_app_callback_registers_command_operation(monkeypatch) -> None:
2930
resource = object()
3031

3132
monkeypatch.setattr(cli_app, "init_cli_logging", lambda: None)
32-
monkeypatch.setattr(cli_app.CliContainer, "create", staticmethod(lambda: object()))
33+
# Container needs a `config` attribute because app_callback passes it to the
34+
# uvloop policy installer; the helper itself is stubbed below for this
35+
# telemetry-focused test.
36+
monkeypatch.setattr(
37+
cli_app.CliContainer, "create", staticmethod(lambda: SimpleNamespace(config=object()))
38+
)
3339
monkeypatch.setattr(cli_app, "set_container", lambda container: None)
40+
# app_callback installs the uvloop policy for the Postgres backend; stub the
41+
# helper so this telemetry test does not depend on event-loop policy state.
42+
monkeypatch.setattr("basic_memory.db.maybe_install_uvloop", lambda config: False)
3443
monkeypatch.setattr(cli_app, "maybe_show_init_line", lambda command_name: None)
3544
monkeypatch.setattr(cli_app, "maybe_show_cloud_promo", lambda command_name: None)
3645
monkeypatch.setattr(cli_app, "maybe_run_periodic_auto_update", lambda command_name: None)

tests/cli/test_db_reindex.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typer.testing import CliRunner
1010

1111
from basic_memory.cli.app import app
12+
from basic_memory.config import DatabaseBackend
1213
import basic_memory.cli.commands.db as db_cmd # noqa: F401
1314

1415

@@ -21,6 +22,8 @@ def _stub_app_config(*, semantic_search_enabled: bool = True) -> SimpleNamespace
2122
semantic_search_enabled=semantic_search_enabled,
2223
database_path=Path("/tmp/basic-memory.db"),
2324
get_project_mode=lambda project_name: None,
25+
# app_callback reads this to decide whether to install the uvloop policy.
26+
database_backend=DatabaseBackend.SQLITE,
2427
)
2528

2629

0 commit comments

Comments
 (0)