Skip to content

Commit 651563a

Browse files
committed
perf(core): speed up postgres test resets
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent c1b0de5 commit 651563a

3 files changed

Lines changed: 141 additions & 68 deletions

File tree

.github/workflows/test.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ jobs:
5252
test-sqlite-unit:
5353
name: Test SQLite Unit (${{ matrix.os }}, Python ${{ matrix.python-version }})
5454
timeout-minutes: 30
55-
needs: [static-checks]
5655
strategy:
5756
fail-fast: false
5857
matrix:
@@ -99,7 +98,6 @@ jobs:
9998
test-sqlite-integration:
10099
name: Test SQLite Integration (${{ matrix.os }}, Python ${{ matrix.python-version }})
101100
timeout-minutes: 45
102-
needs: [static-checks]
103101
strategy:
104102
fail-fast: false
105103
matrix:
@@ -146,7 +144,7 @@ jobs:
146144
test-postgres-unit:
147145
name: Test Postgres Unit (Python ${{ matrix.python-version }})
148146
timeout-minutes: 30
149-
needs: [static-checks]
147+
if: github.event_name != 'pull_request' || matrix.python-version == '3.12'
150148
strategy:
151149
fail-fast: false
152150
matrix:
@@ -190,7 +188,7 @@ jobs:
190188
test-postgres-integration:
191189
name: Test Postgres Integration (Python ${{ matrix.python-version }})
192190
timeout-minutes: 45
193-
needs: [static-checks]
191+
if: github.event_name != 'pull_request' || matrix.python-version == '3.12'
194192
strategy:
195193
fail-fast: false
196194
matrix:
@@ -234,7 +232,6 @@ jobs:
234232
test-semantic:
235233
name: Test Semantic (Python 3.12)
236234
timeout-minutes: 45
237-
needs: [static-checks]
238235
runs-on: ubuntu-latest
239236

240237
steps:

test-int/conftest.py

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ async def test_my_mcp_tool(mcp_server, app):
5151
"""
5252

5353
import os
54-
from typing import AsyncGenerator, Literal
54+
from typing import AsyncGenerator, Generator, Literal
5555

5656
import pytest
5757
import pytest_asyncio
@@ -63,7 +63,13 @@ async def test_my_mcp_tool(mcp_server, app):
6363

6464
from httpx import AsyncClient, ASGITransport
6565

66-
from basic_memory.config import BasicMemoryConfig, ProjectConfig, ConfigManager, DatabaseBackend
66+
from basic_memory.config import (
67+
BasicMemoryConfig,
68+
ProjectConfig,
69+
ProjectEntry,
70+
ConfigManager,
71+
DatabaseBackend,
72+
)
6773
from basic_memory.db import engine_session_factory, DatabaseType
6874
from basic_memory.models import Project
6975
from basic_memory.models.base import Base
@@ -112,6 +118,49 @@ def postgres_container(db_backend):
112118
yield postgres
113119

114120

121+
POSTGRES_EPHEMERAL_TABLES = [
122+
"search_vector_embeddings",
123+
"search_vector_chunks",
124+
"search_vector_index",
125+
]
126+
127+
128+
def _postgres_reset_tables() -> list[str]:
129+
"""Resolve the current ORM table set at reset time."""
130+
return [table.name for table in Base.metadata.sorted_tables] + ["search_index"]
131+
132+
133+
async def _reset_postgres_integration_schema(engine) -> None:
134+
"""Restore the shared Postgres integration schema to a clean baseline."""
135+
from basic_memory.models.search import (
136+
CREATE_POSTGRES_SEARCH_INDEX_FTS,
137+
CREATE_POSTGRES_SEARCH_INDEX_METADATA,
138+
CREATE_POSTGRES_SEARCH_INDEX_PERMALINK,
139+
CREATE_POSTGRES_SEARCH_INDEX_TABLE,
140+
)
141+
142+
async with engine.begin() as conn:
143+
# Trigger: integration tests may leave behind temporary search/vector tables while
144+
# exercising full-stack recovery paths.
145+
# Why: recreating only the missing schema is much cheaper than dropping every table.
146+
# Outcome: each integration test gets the same baseline without paying repeated full DDL cost.
147+
await conn.run_sync(Base.metadata.create_all)
148+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_TABLE)
149+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_FTS)
150+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_METADATA)
151+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_PERMALINK)
152+
153+
for table_name in POSTGRES_EPHEMERAL_TABLES:
154+
await conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE"))
155+
156+
await conn.execute(
157+
text(
158+
f"TRUNCATE TABLE {', '.join(_postgres_reset_tables())} "
159+
"RESTART IDENTITY CASCADE"
160+
)
161+
)
162+
163+
115164
@pytest_asyncio.fixture
116165
async def engine_factory(
117166
app_config,
@@ -121,13 +170,7 @@ async def engine_factory(
121170
tmp_path,
122171
) -> AsyncGenerator[tuple, None]:
123172
"""Create engine and session factory for the configured database backend."""
124-
from basic_memory.models.search import (
125-
CREATE_SEARCH_INDEX,
126-
CREATE_POSTGRES_SEARCH_INDEX_TABLE,
127-
CREATE_POSTGRES_SEARCH_INDEX_FTS,
128-
CREATE_POSTGRES_SEARCH_INDEX_METADATA,
129-
CREATE_POSTGRES_SEARCH_INDEX_PERMALINK,
130-
)
173+
from basic_memory.models.search import CREATE_SEARCH_INDEX
131174
from basic_memory import db
132175

133176
if db_backend == "postgres":
@@ -153,16 +196,7 @@ async def engine_factory(
153196
db._engine = engine
154197
db._session_maker = session_maker
155198

156-
# Drop and recreate all tables for test isolation
157-
async with engine.begin() as conn:
158-
await conn.execute(text("DROP TABLE IF EXISTS search_index CASCADE"))
159-
await conn.run_sync(Base.metadata.drop_all)
160-
await conn.run_sync(Base.metadata.create_all)
161-
# asyncpg requires separate execute calls for each statement
162-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_TABLE)
163-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_FTS)
164-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_METADATA)
165-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_PERMALINK)
199+
await _reset_postgres_integration_schema(engine)
166200

167201
yield engine, session_maker
168202

@@ -228,7 +262,7 @@ def app_config(
228262
monkeypatch.setenv("BASIC_MEMORY_CLOUD_MODE", "false")
229263

230264
# Create a basic config with test-project like unit tests do
231-
projects = {"test-project": str(config_home)}
265+
projects = {"test-project": ProjectEntry(path=str(config_home))}
232266

233267
# Configure database backend based on env var
234268
if db_backend == "postgres":
@@ -285,7 +319,9 @@ def project_config(test_project):
285319

286320

287321
@pytest.fixture
288-
def app(app_config, project_config, engine_factory, test_project, config_manager) -> FastAPI:
322+
def app(
323+
app_config, project_config, engine_factory, test_project, config_manager
324+
) -> Generator[FastAPI, None, None]:
289325
"""Create test FastAPI application with single project."""
290326

291327
# Import the FastAPI app AFTER the config_manager has written the test config to disk

tests/conftest.py

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,13 @@
2222
from testcontainers.postgres import PostgresContainer
2323

2424
from basic_memory import db
25-
from basic_memory.config import ProjectConfig, BasicMemoryConfig, ConfigManager, DatabaseBackend
25+
from basic_memory.config import (
26+
ProjectConfig,
27+
ProjectEntry,
28+
BasicMemoryConfig,
29+
ConfigManager,
30+
DatabaseBackend,
31+
)
2632
from basic_memory.db import DatabaseType
2733
from basic_memory.markdown import EntityParser
2834
from basic_memory.markdown.markdown_processor import MarkdownProcessor
@@ -83,6 +89,79 @@ def postgres_container(db_backend):
8389
yield postgres
8490

8591

92+
POSTGRES_EPHEMERAL_TABLES = [
93+
"search_vector_embeddings",
94+
"search_vector_index",
95+
]
96+
97+
98+
def _postgres_alembic_config(async_url: str) -> Config:
99+
"""Build Alembic config for stamping the shared Postgres test schema."""
100+
alembic_dir = Path(db.__file__).parent / "alembic"
101+
cfg = Config()
102+
cfg.set_main_option("script_location", str(alembic_dir))
103+
cfg.set_main_option(
104+
"file_template",
105+
"%%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s",
106+
)
107+
cfg.set_main_option("timezone", "UTC")
108+
cfg.set_main_option("revision_environment", "false")
109+
cfg.set_main_option("sqlalchemy.url", async_url)
110+
return cfg
111+
112+
113+
def _postgres_reset_tables() -> list[str]:
114+
"""Resolve the current ORM table set at reset time.
115+
116+
Some tests declare models after conftest import, so the list must stay dynamic.
117+
"""
118+
return [table.name for table in Base.metadata.sorted_tables] + [
119+
"search_index",
120+
"search_vector_chunks",
121+
]
122+
123+
124+
async def _reset_postgres_test_schema(engine: AsyncEngine, async_url: str) -> None:
125+
"""Restore the shared Postgres schema to a clean baseline before each test."""
126+
from basic_memory.models.search import (
127+
CREATE_POSTGRES_SEARCH_INDEX_FTS,
128+
CREATE_POSTGRES_SEARCH_INDEX_METADATA,
129+
CREATE_POSTGRES_SEARCH_INDEX_PERMALINK,
130+
CREATE_POSTGRES_SEARCH_INDEX_TABLE,
131+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX,
132+
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE,
133+
)
134+
135+
async with engine.begin() as conn:
136+
# Trigger: several tests intentionally drop or stub search tables to exercise recovery code.
137+
# Why: TRUNCATE is much cheaper than drop_all/create_all, but it only works when the schema exists.
138+
# Outcome: we recreate any missing core tables once, then clear rows for deterministic test setup.
139+
await conn.run_sync(Base.metadata.create_all)
140+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_TABLE)
141+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_FTS)
142+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_METADATA)
143+
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_PERMALINK)
144+
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE)
145+
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX)
146+
147+
for table_name in POSTGRES_EPHEMERAL_TABLES:
148+
await conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE"))
149+
150+
await conn.execute(
151+
text(
152+
f"TRUNCATE TABLE {', '.join(_postgres_reset_tables())} "
153+
"RESTART IDENTITY CASCADE"
154+
)
155+
)
156+
157+
alembic_version_exists = (
158+
await conn.execute(text("SELECT to_regclass('public.alembic_version')"))
159+
).scalar() is not None
160+
161+
if not alembic_version_exists:
162+
command.stamp(_postgres_alembic_config(async_url), "head")
163+
164+
86165
@pytest.fixture
87166
def anyio_backend():
88167
return "asyncio"
@@ -114,7 +193,7 @@ def config_home(tmp_path, monkeypatch) -> Path:
114193
@pytest.fixture(scope="function")
115194
def app_config(config_home, db_backend, postgres_container, monkeypatch) -> BasicMemoryConfig:
116195
"""Create test app configuration for the appropriate backend."""
117-
projects = {"test-project": str(config_home)}
196+
projects = {"test-project": ProjectEntry(path=str(config_home))}
118197

119198
# Set backend based on parameterized db_backend fixture
120199
if db_backend == "postgres":
@@ -229,46 +308,7 @@ async def engine_factory(
229308
db._engine = engine
230309
db._session_maker = session_maker
231310

232-
from basic_memory.models.search import (
233-
CREATE_POSTGRES_SEARCH_INDEX_TABLE,
234-
CREATE_POSTGRES_SEARCH_INDEX_FTS,
235-
CREATE_POSTGRES_SEARCH_INDEX_METADATA,
236-
CREATE_POSTGRES_SEARCH_INDEX_PERMALINK,
237-
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE,
238-
CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX,
239-
)
240-
241-
# Drop and recreate all tables for test isolation
242-
async with engine.begin() as conn:
243-
# Must drop search_index first (has FK to project, blocks drop_all)
244-
await conn.execute(text("DROP TABLE IF EXISTS search_index CASCADE"))
245-
await conn.run_sync(Base.metadata.drop_all)
246-
await conn.run_sync(Base.metadata.create_all)
247-
# Create search_index via DDL (not ORM - uses composite PK + tsvector)
248-
# asyncpg requires separate execute calls for each statement
249-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_TABLE)
250-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_FTS)
251-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_METADATA)
252-
await conn.execute(CREATE_POSTGRES_SEARCH_INDEX_PERMALINK)
253-
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_TABLE)
254-
await conn.execute(CREATE_POSTGRES_SEARCH_VECTOR_CHUNKS_INDEX)
255-
256-
# Mark migrations as already applied for this test-created schema.
257-
#
258-
# Some codepaths (e.g. ensure_initialization()) invoke Alembic migrations.
259-
# If we create tables via ORM directly, alembic_version is missing and migrations
260-
# will try to create tables again, causing DuplicateTableError.
261-
alembic_dir = Path(db.__file__).parent / "alembic"
262-
cfg = Config()
263-
cfg.set_main_option("script_location", str(alembic_dir))
264-
cfg.set_main_option(
265-
"file_template",
266-
"%%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s",
267-
)
268-
cfg.set_main_option("timezone", "UTC")
269-
cfg.set_main_option("revision_environment", "false")
270-
cfg.set_main_option("sqlalchemy.url", async_url)
271-
command.stamp(cfg, "head")
311+
await _reset_postgres_test_schema(engine, async_url)
272312

273313
yield engine, session_maker
274314

0 commit comments

Comments
 (0)