Skip to content

Commit eacefe1

Browse files
committed
Merge branch 'main' into fix/integration-test-failures
Signed-off-by: Joe P <joe@basicmemory.com>
2 parents 938330b + a920a9f commit eacefe1

42 files changed

Lines changed: 1507 additions & 71 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ dependencies = [
3434
"python-dotenv>=1.1.0",
3535
"pytest-aio>=1.9.0",
3636
"aiofiles>=24.1.0", # Async file I/O
37-
"logfire>=0.73.0", # Optional observability (disabled by default via config)
37+
"logfire[fastapi]>=0.73.0", # Optional observability (disabled by default via config)
3838
"asyncpg>=0.30.0",
3939
"nest-asyncio>=1.6.0", # For Alembic migrations with Postgres
4040
]
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""Add project_id to relation/observation and pg_trgm for fuzzy link resolution
2+
3+
Revision ID: f8a9b2c3d4e5
4+
Revises: 314f1ea54dc4
5+
Create Date: 2025-12-01 12:00:00.000000
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
import sqlalchemy as sa
12+
from alembic import op
13+
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "f8a9b2c3d4e5"
17+
down_revision: Union[str, None] = "314f1ea54dc4"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
"""Add project_id to relation and observation tables, plus pg_trgm indexes.
24+
25+
This migration:
26+
1. Adds project_id column to relation and observation tables (denormalization)
27+
2. Backfills project_id from the associated entity
28+
3. Enables pg_trgm extension for trigram-based fuzzy matching (Postgres only)
29+
4. Creates GIN indexes on entity title and permalink for fast similarity searches
30+
5. Creates partial index on unresolved relations for efficient bulk resolution
31+
"""
32+
connection = op.get_bind()
33+
dialect = connection.dialect.name
34+
35+
# -------------------------------------------------------------------------
36+
# Add project_id to relation table
37+
# -------------------------------------------------------------------------
38+
39+
# Step 1: Add project_id column as nullable first
40+
op.add_column("relation", sa.Column("project_id", sa.Integer(), nullable=True))
41+
42+
# Step 2: Backfill project_id from entity.project_id via from_id
43+
if dialect == "postgresql":
44+
op.execute("""
45+
UPDATE relation
46+
SET project_id = entity.project_id
47+
FROM entity
48+
WHERE relation.from_id = entity.id
49+
""")
50+
else:
51+
# SQLite syntax
52+
op.execute("""
53+
UPDATE relation
54+
SET project_id = (
55+
SELECT entity.project_id
56+
FROM entity
57+
WHERE entity.id = relation.from_id
58+
)
59+
""")
60+
61+
# Step 3: Make project_id NOT NULL and add foreign key
62+
op.alter_column("relation", "project_id", nullable=False)
63+
op.create_foreign_key(
64+
"fk_relation_project_id",
65+
"relation",
66+
"project",
67+
["project_id"],
68+
["id"],
69+
)
70+
71+
# Step 4: Create index on relation.project_id
72+
op.create_index("ix_relation_project_id", "relation", ["project_id"])
73+
74+
# -------------------------------------------------------------------------
75+
# Add project_id to observation table
76+
# -------------------------------------------------------------------------
77+
78+
# Step 1: Add project_id column as nullable first
79+
op.add_column("observation", sa.Column("project_id", sa.Integer(), nullable=True))
80+
81+
# Step 2: Backfill project_id from entity.project_id via entity_id
82+
if dialect == "postgresql":
83+
op.execute("""
84+
UPDATE observation
85+
SET project_id = entity.project_id
86+
FROM entity
87+
WHERE observation.entity_id = entity.id
88+
""")
89+
else:
90+
# SQLite syntax
91+
op.execute("""
92+
UPDATE observation
93+
SET project_id = (
94+
SELECT entity.project_id
95+
FROM entity
96+
WHERE entity.id = observation.entity_id
97+
)
98+
""")
99+
100+
# Step 3: Make project_id NOT NULL and add foreign key
101+
op.alter_column("observation", "project_id", nullable=False)
102+
op.create_foreign_key(
103+
"fk_observation_project_id",
104+
"observation",
105+
"project",
106+
["project_id"],
107+
["id"],
108+
)
109+
110+
# Step 4: Create index on observation.project_id
111+
op.create_index("ix_observation_project_id", "observation", ["project_id"])
112+
113+
# Postgres-specific: pg_trgm and GIN indexes
114+
if dialect == "postgresql":
115+
# Enable pg_trgm extension for fuzzy string matching
116+
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
117+
118+
# Create trigram indexes on entity table for fuzzy matching
119+
# GIN indexes with gin_trgm_ops support similarity searches
120+
op.execute("""
121+
CREATE INDEX IF NOT EXISTS idx_entity_title_trgm
122+
ON entity USING gin (title gin_trgm_ops)
123+
""")
124+
125+
op.execute("""
126+
CREATE INDEX IF NOT EXISTS idx_entity_permalink_trgm
127+
ON entity USING gin (permalink gin_trgm_ops)
128+
""")
129+
130+
# Create partial index on unresolved relations for efficient bulk resolution
131+
# This makes "WHERE to_id IS NULL AND project_id = X" queries very fast
132+
op.execute("""
133+
CREATE INDEX IF NOT EXISTS idx_relation_unresolved
134+
ON relation (project_id, to_name)
135+
WHERE to_id IS NULL
136+
""")
137+
138+
# Create index on relation.to_name for join performance in bulk resolution
139+
op.execute("""
140+
CREATE INDEX IF NOT EXISTS idx_relation_to_name
141+
ON relation (to_name)
142+
""")
143+
144+
145+
def downgrade() -> None:
146+
"""Remove project_id from relation/observation and pg_trgm indexes."""
147+
connection = op.get_bind()
148+
dialect = connection.dialect.name
149+
150+
if dialect == "postgresql":
151+
# Drop Postgres-specific indexes
152+
op.execute("DROP INDEX IF EXISTS idx_relation_to_name")
153+
op.execute("DROP INDEX IF EXISTS idx_relation_unresolved")
154+
op.execute("DROP INDEX IF EXISTS idx_entity_permalink_trgm")
155+
op.execute("DROP INDEX IF EXISTS idx_entity_title_trgm")
156+
# Note: We don't drop the pg_trgm extension as other code may depend on it
157+
158+
# Drop project_id from observation
159+
op.drop_index("ix_observation_project_id", table_name="observation")
160+
op.drop_constraint("fk_observation_project_id", "observation", type_="foreignkey")
161+
op.drop_column("observation", "project_id")
162+
163+
# Drop project_id from relation
164+
op.drop_index("ix_relation_project_id", table_name="relation")
165+
op.drop_constraint("fk_relation_project_id", "relation", type_="foreignkey")
166+
op.drop_column("relation", "project_id")

src/basic_memory/api/app.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,6 @@ async def lifespan(app: FastAPI): # pragma: no cover
100100
app.include_router(project.project_resource_router)
101101
app.include_router(management.router)
102102

103-
# Auth routes are handled by FastMCP automatically when auth is enabled
104-
105103

106104
@app.exception_handler(Exception)
107105
async def exception_handler(request, exc): # pragma: no cover

src/basic_memory/api/routers/resource_router.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@
2525
router = APIRouter(prefix="/resource", tags=["resources"])
2626

2727

28+
def _mtime_to_datetime(entity: EntityModel) -> datetime:
29+
"""Convert entity mtime (file modification time) to datetime.
30+
31+
Returns the file's actual modification time, falling back to updated_at
32+
if mtime is not available.
33+
"""
34+
if entity.mtime:
35+
return datetime.fromtimestamp(entity.mtime).astimezone()
36+
return entity.updated_at
37+
38+
2839
def get_entity_ids(item: SearchIndexRow) -> set[int]:
2940
match item.type:
3041
case SearchItemType.ENTITY:
@@ -97,7 +108,7 @@ async def get_resource_content(
97108
# Read content for each entity
98109
content = await file_service.read_entity_content(result)
99110
memory_url = normalize_memory_url(result.permalink)
100-
modified_date = result.updated_at.isoformat()
111+
modified_date = _mtime_to_datetime(result).isoformat()
101112
checksum = result.checksum[:8] if result.checksum else ""
102113

103114
# Prepare the delimited content

src/basic_memory/config.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,23 @@ class BasicMemoryConfig(BaseSettings):
100100
description="Database connection URL. For Postgres, use postgresql+asyncpg://user:pass@host:port/db. If not set, SQLite will use default path.",
101101
)
102102

103+
# Database connection pool configuration (Postgres only)
104+
db_pool_size: int = Field(
105+
default=20,
106+
description="Number of connections to keep in the pool (Postgres only)",
107+
gt=0,
108+
)
109+
db_pool_overflow: int = Field(
110+
default=40,
111+
description="Max additional connections beyond pool_size under load (Postgres only)",
112+
gt=0,
113+
)
114+
db_pool_recycle: int = Field(
115+
default=180,
116+
description="Recycle connections after N seconds to prevent stale connections. Default 180s works well with Neon's ~5 minute scale-to-zero (Postgres only)",
117+
gt=0,
118+
)
119+
103120
# Watch service configuration
104121
sync_delay: int = Field(
105122
default=1000, description="Milliseconds to wait after changes before syncing", gt=0

src/basic_memory/db.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,21 +190,37 @@ def enable_wal_mode(dbapi_conn, connection_record):
190190
return engine
191191

192192

193-
def _create_postgres_engine(db_url: str) -> AsyncEngine:
193+
def _create_postgres_engine(db_url: str, config: BasicMemoryConfig) -> AsyncEngine:
194194
"""Create Postgres async engine with appropriate configuration.
195195
196196
Args:
197197
db_url: Postgres connection URL (postgresql+asyncpg://...)
198+
config: BasicMemoryConfig with pool settings
198199
199200
Returns:
200201
Configured async engine for Postgres
201202
"""
202-
# Postgres with asyncpg - use standard async connection
203+
# Use NullPool connection issues.
204+
# Assume connection pooler like PgBouncer handles connection pooling.
203205
engine = create_async_engine(
204206
db_url,
205207
echo=False,
206-
pool_pre_ping=True, # Verify connections before using them
208+
poolclass=NullPool, # No pooling - fresh connection per request
209+
connect_args={
210+
# Disable statement cache to avoid issues with prepared statements on reconnect
211+
"statement_cache_size": 0,
212+
# Allow 30s for commands (Neon cold start can take 2-5s, sometimes longer)
213+
"command_timeout": 30,
214+
# Allow 30s for initial connection (Neon wake-up time)
215+
"timeout": 30,
216+
"server_settings": {
217+
"application_name": "basic-memory",
218+
# Statement timeout for queries (30s to allow for cold start)
219+
"statement_timeout": "30s",
220+
},
221+
},
207222
)
223+
logger.debug("Created Postgres engine with NullPool (no connection pooling)")
208224

209225
return engine
210226

@@ -228,7 +244,7 @@ def _create_engine_and_session(
228244
# Delegate to backend-specific engine creation
229245
# Check explicit POSTGRES type first, then config setting
230246
if db_type == DatabaseType.POSTGRES or config.database_backend == DatabaseBackend.POSTGRES:
231-
engine = _create_postgres_engine(db_url)
247+
engine = _create_postgres_engine(db_url, config)
232248
else:
233249
engine = _create_sqlite_engine(db_url, db_type)
234250

src/basic_memory/deps.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,11 +368,10 @@ async def get_markdown_processor_v2(entity_parser: EntityParserV2Dep) -> Markdow
368368
async def get_file_service(
369369
project_config: ProjectConfigDep, markdown_processor: MarkdownProcessorDep
370370
) -> FileService:
371+
file_service = FileService(project_config.home, markdown_processor)
371372
logger.debug(
372-
f"Creating FileService for project: {project_config.name}, base_path: {project_config.home}"
373+
f"Created FileService for project: {project_config.name}, base_path: {project_config.home} "
373374
)
374-
file_service = FileService(project_config.home, markdown_processor)
375-
logger.debug(f"Created FileService for project: {file_service} ")
376375
return file_service
377376

378377

@@ -382,11 +381,10 @@ async def get_file_service(
382381
async def get_file_service_v2(
383382
project_config: ProjectConfigV2Dep, markdown_processor: MarkdownProcessorV2Dep
384383
) -> FileService:
384+
file_service = FileService(project_config.home, markdown_processor)
385385
logger.debug(
386-
f"Creating FileService for project: {project_config.name}, base_path: {project_config.home}"
386+
f"Created FileService for project: {project_config.name}, base_path: {project_config.home}"
387387
)
388-
file_service = FileService(project_config.home, markdown_processor)
389-
logger.debug(f"Created FileService for project: {file_service} ")
390388
return file_service
391389

392390

src/basic_memory/markdown/entity_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
Relation,
2323
)
2424
from basic_memory.utils import parse_tags
25+
import logfire
2526

2627
md = MarkdownIt().use(observation_plugin).use(relation_plugin)
2728

2829

30+
@logfire.instrument()
2931
def normalize_frontmatter_value(value: Any) -> Any:
3032
"""Normalize frontmatter values to safe types for processing.
3133
@@ -87,6 +89,7 @@ def normalize_frontmatter_value(value: Any) -> Any:
8789
return value
8890

8991

92+
@logfire.instrument()
9093
def normalize_frontmatter_metadata(metadata: dict) -> dict:
9194
"""Normalize all values in frontmatter metadata dict.
9295
@@ -109,6 +112,7 @@ class EntityContent:
109112
relations: list[Relation] = field(default_factory=list)
110113

111114

115+
@logfire.instrument()
112116
def parse(content: str) -> EntityContent:
113117
"""Parse markdown content into EntityMarkdown."""
114118

@@ -167,6 +171,7 @@ def parse_date(self, value: Any) -> Optional[datetime]:
167171
return parsed
168172
return None
169173

174+
@logfire.instrument()
170175
async def parse_file(self, path: Path | str) -> EntityMarkdown:
171176
"""Parse markdown file into EntityMarkdown."""
172177

@@ -188,6 +193,7 @@ def get_file_path(self, path):
188193
"""Get absolute path for a file using the base path for the project."""
189194
return self.base_path / path
190195

196+
@logfire.instrument()
191197
async def parse_file_content(self, absolute_path, file_content):
192198
"""Parse markdown content from file stats.
193199
@@ -205,6 +211,7 @@ async def parse_file_content(self, absolute_path, file_content):
205211
ctime=file_stats.st_ctime,
206212
)
207213

214+
@logfire.instrument()
208215
async def parse_markdown_content(
209216
self,
210217
file_path: Path,

0 commit comments

Comments
 (0)