Skip to content

Commit a920a9f

Browse files
phernandezclaude
andcommitted
feat: Add project_id to Relation and Observation for efficient project-scoped queries
Denormalizes project_id onto Relation and Observation tables to enable efficient project-scoped queries without joins. Migration backfills from associated entity and adds pg_trgm extension with GIN indexes for fuzzy link resolution on PostgreSQL. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 05efe87 commit a920a9f

12 files changed

Lines changed: 216 additions & 20 deletions
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""Add project_id to relation/observation and pg_trgm for fuzzy link resolution
2+
3+
Revision ID: f8a9b2c3d4e5
4+
Revises: 314f1ea54dc4
5+
Create Date: 2025-12-01 12:00:00.000000
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
import sqlalchemy as sa
12+
from alembic import op
13+
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "f8a9b2c3d4e5"
17+
down_revision: Union[str, None] = "314f1ea54dc4"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
"""Add project_id to relation and observation tables, plus pg_trgm indexes.
24+
25+
This migration:
26+
1. Adds project_id column to relation and observation tables (denormalization)
27+
2. Backfills project_id from the associated entity
28+
3. Enables pg_trgm extension for trigram-based fuzzy matching (Postgres only)
29+
4. Creates GIN indexes on entity title and permalink for fast similarity searches
30+
5. Creates partial index on unresolved relations for efficient bulk resolution
31+
"""
32+
connection = op.get_bind()
33+
dialect = connection.dialect.name
34+
35+
# -------------------------------------------------------------------------
36+
# Add project_id to relation table
37+
# -------------------------------------------------------------------------
38+
39+
# Step 1: Add project_id column as nullable first
40+
op.add_column("relation", sa.Column("project_id", sa.Integer(), nullable=True))
41+
42+
# Step 2: Backfill project_id from entity.project_id via from_id
43+
if dialect == "postgresql":
44+
op.execute("""
45+
UPDATE relation
46+
SET project_id = entity.project_id
47+
FROM entity
48+
WHERE relation.from_id = entity.id
49+
""")
50+
else:
51+
# SQLite syntax
52+
op.execute("""
53+
UPDATE relation
54+
SET project_id = (
55+
SELECT entity.project_id
56+
FROM entity
57+
WHERE entity.id = relation.from_id
58+
)
59+
""")
60+
61+
# Step 3: Make project_id NOT NULL and add foreign key
62+
op.alter_column("relation", "project_id", nullable=False)
63+
op.create_foreign_key(
64+
"fk_relation_project_id",
65+
"relation",
66+
"project",
67+
["project_id"],
68+
["id"],
69+
)
70+
71+
# Step 4: Create index on relation.project_id
72+
op.create_index("ix_relation_project_id", "relation", ["project_id"])
73+
74+
# -------------------------------------------------------------------------
75+
# Add project_id to observation table
76+
# -------------------------------------------------------------------------
77+
78+
# Step 1: Add project_id column as nullable first
79+
op.add_column("observation", sa.Column("project_id", sa.Integer(), nullable=True))
80+
81+
# Step 2: Backfill project_id from entity.project_id via entity_id
82+
if dialect == "postgresql":
83+
op.execute("""
84+
UPDATE observation
85+
SET project_id = entity.project_id
86+
FROM entity
87+
WHERE observation.entity_id = entity.id
88+
""")
89+
else:
90+
# SQLite syntax
91+
op.execute("""
92+
UPDATE observation
93+
SET project_id = (
94+
SELECT entity.project_id
95+
FROM entity
96+
WHERE entity.id = observation.entity_id
97+
)
98+
""")
99+
100+
# Step 3: Make project_id NOT NULL and add foreign key
101+
op.alter_column("observation", "project_id", nullable=False)
102+
op.create_foreign_key(
103+
"fk_observation_project_id",
104+
"observation",
105+
"project",
106+
["project_id"],
107+
["id"],
108+
)
109+
110+
# Step 4: Create index on observation.project_id
111+
op.create_index("ix_observation_project_id", "observation", ["project_id"])
112+
113+
# Postgres-specific: pg_trgm and GIN indexes
114+
if dialect == "postgresql":
115+
# Enable pg_trgm extension for fuzzy string matching
116+
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
117+
118+
# Create trigram indexes on entity table for fuzzy matching
119+
# GIN indexes with gin_trgm_ops support similarity searches
120+
op.execute("""
121+
CREATE INDEX IF NOT EXISTS idx_entity_title_trgm
122+
ON entity USING gin (title gin_trgm_ops)
123+
""")
124+
125+
op.execute("""
126+
CREATE INDEX IF NOT EXISTS idx_entity_permalink_trgm
127+
ON entity USING gin (permalink gin_trgm_ops)
128+
""")
129+
130+
# Create partial index on unresolved relations for efficient bulk resolution
131+
# This makes "WHERE to_id IS NULL AND project_id = X" queries very fast
132+
op.execute("""
133+
CREATE INDEX IF NOT EXISTS idx_relation_unresolved
134+
ON relation (project_id, to_name)
135+
WHERE to_id IS NULL
136+
""")
137+
138+
# Create index on relation.to_name for join performance in bulk resolution
139+
op.execute("""
140+
CREATE INDEX IF NOT EXISTS idx_relation_to_name
141+
ON relation (to_name)
142+
""")
143+
144+
145+
def downgrade() -> None:
146+
"""Remove project_id from relation/observation and pg_trgm indexes."""
147+
connection = op.get_bind()
148+
dialect = connection.dialect.name
149+
150+
if dialect == "postgresql":
151+
# Drop Postgres-specific indexes
152+
op.execute("DROP INDEX IF EXISTS idx_relation_to_name")
153+
op.execute("DROP INDEX IF EXISTS idx_relation_unresolved")
154+
op.execute("DROP INDEX IF EXISTS idx_entity_permalink_trgm")
155+
op.execute("DROP INDEX IF EXISTS idx_entity_title_trgm")
156+
# Note: We don't drop the pg_trgm extension as other code may depend on it
157+
158+
# Drop project_id from observation
159+
op.drop_index("ix_observation_project_id", table_name="observation")
160+
op.drop_constraint("fk_observation_project_id", "observation", type_="foreignkey")
161+
op.drop_column("observation", "project_id")
162+
163+
# Drop project_id from relation
164+
op.drop_index("ix_relation_project_id", table_name="relation")
165+
op.drop_constraint("fk_relation_project_id", "relation", type_="foreignkey")
166+
op.drop_column("relation", "project_id")

src/basic_memory/markdown/utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414

1515
@logfire.instrument()
1616
def entity_model_from_markdown(
17-
file_path: Path, markdown: EntityMarkdown, entity: Optional[Entity] = None
17+
file_path: Path,
18+
markdown: EntityMarkdown,
19+
entity: Optional[Entity] = None,
20+
project_id: Optional[int] = None,
1821
) -> Entity:
1922
"""
2023
Convert markdown entity to model. Does not include relations.
@@ -23,6 +26,7 @@ def entity_model_from_markdown(
2326
file_path: Path to the markdown file
2427
markdown: Parsed markdown entity
2528
entity: Optional existing entity to update
29+
project_id: Project ID for new observations (uses entity.project_id if not provided)
2630
2731
Returns:
2832
Entity model populated from markdown
@@ -52,9 +56,13 @@ def entity_model_from_markdown(
5256
metadata = markdown.frontmatter.metadata or {}
5357
model.entity_metadata = {k: str(v) for k, v in metadata.items() if v is not None}
5458

59+
# Get project_id from entity if not provided
60+
obs_project_id = project_id or (model.project_id if hasattr(model, "project_id") else None)
61+
5562
# Convert observations
5663
model.observations = [
5764
ObservationModel(
65+
project_id=obs_project_id,
5866
content=obs.content,
5967
category=obs.category,
6068
context=obs.context,

src/basic_memory/models/knowledge.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class Observation(Base):
145145
)
146146

147147
id: Mapped[int] = mapped_column(Integer, primary_key=True)
148+
project_id: Mapped[int] = mapped_column(Integer, ForeignKey("project.id"), index=True)
148149
entity_id: Mapped[int] = mapped_column(Integer, ForeignKey("entity.id", ondelete="CASCADE"))
149150
content: Mapped[str] = mapped_column(Text)
150151
category: Mapped[str] = mapped_column(String, nullable=False, default="note")
@@ -191,6 +192,7 @@ class Relation(Base):
191192
)
192193

193194
id: Mapped[int] = mapped_column(Integer, primary_key=True)
195+
project_id: Mapped[int] = mapped_column(Integer, ForeignKey("project.id"), index=True)
194196
from_id: Mapped[int] = mapped_column(Integer, ForeignKey("entity.id", ondelete="CASCADE"))
195197
to_id: Mapped[Optional[int]] = mapped_column(
196198
Integer, ForeignKey("entity.id", ondelete="CASCADE"), nullable=True

src/basic_memory/repository/relation_repository.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ async def add_all_ignore_duplicates(self, relations: List[Relation]) -> int:
117117
# Convert Relation objects to dicts for insert
118118
values = [
119119
{
120+
"project_id": r.project_id if r.project_id else self.project_id,
120121
"from_id": r.from_id,
121122
"to_id": r.to_id,
122123
"to_name": r.to_name,

src/basic_memory/services/entity_service.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,9 @@ async def create_entity_from_markdown(
398398
Uses UPSERT approach to handle permalink/file_path conflicts cleanly.
399399
"""
400400
logger.debug(f"Creating entity: {markdown.frontmatter.title} file_path: {file_path}")
401-
model = entity_model_from_markdown(file_path, markdown)
401+
model = entity_model_from_markdown(
402+
file_path, markdown, project_id=self.repository.project_id
403+
)
402404

403405
# Mark as incomplete because we still need to add relations
404406
model.checksum = None
@@ -429,6 +431,7 @@ async def update_entity_and_observations(
429431
# add new observations
430432
observations = [
431433
Observation(
434+
project_id=self.observation_repository.project_id,
432435
entity_id=db_entity.id,
433436
content=obs.content,
434437
category=obs.category,
@@ -496,6 +499,7 @@ async def update_entity_relations(
496499

497500
# Create the relation
498501
relation = Relation(
502+
project_id=self.relation_repository.project_id,
499503
from_id=db_entity.id,
500504
to_id=target_id,
501505
to_name=target_name,

test-int/test_db_wal_mode.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,21 +142,6 @@ async def test_null_pool_on_windows(tmp_path, monkeypatch):
142142
assert isinstance(engine.pool, NullPool)
143143

144144

145-
@pytest.mark.asyncio
146-
@pytest.mark.skipif(
147-
__import__("os").name == "nt", reason="Non-Windows test - cannot mock POSIX paths on Windows"
148-
)
149-
async def test_regular_pool_on_non_windows(tmp_path):
150-
"""Test that regular pooling is used on non-Windows platforms."""
151-
from basic_memory.db import engine_session_factory, DatabaseType
152-
from sqlalchemy.pool import NullPool
153-
154-
db_path = tmp_path / "test_posix_pool.db"
155-
156-
with patch("basic_memory.db.os.name", "posix"):
157-
async with engine_session_factory(db_path, DatabaseType.FILESYSTEM) as (engine, _):
158-
# Engine should NOT be using NullPool on non-Windows
159-
assert not isinstance(engine.pool, NullPool)
160145

161146

162147
@pytest.mark.asyncio

tests/markdown/test_entity_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ async def test_parse_complete_file(project_config, entity_parser, valid_entity_c
8585
), "missing [[Auth API Spec]]"
8686

8787
# inline links in content
88-
assert Relation(type="links to", target="Random Link", context=None) in entity.relations, (
88+
assert Relation(type="links_to", target="Random Link", context=None) in entity.relations, (
8989
"missing [[Random Link]]"
9090
)
9191
assert (
92-
Relation(type="links to", target="Random Link with Title|Titled Link", context=None)
92+
Relation(type="links_to", target="Random Link with Title|Titled Link", context=None)
9393
in entity.relations
9494
), "missing [[Random Link with Title|Titled Link]]"
9595

tests/repository/test_entity_repository.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ async def entity_with_observations(session_maker, sample_entity):
1818
async with db.scoped_session(session_maker) as session:
1919
observations = [
2020
Observation(
21+
project_id=sample_entity.project_id,
2122
entity_id=sample_entity.id,
2223
content="First observation",
2324
),
2425
Observation(
26+
project_id=sample_entity.project_id,
2527
entity_id=sample_entity.id,
2628
content="Second observation",
2729
),
@@ -59,6 +61,7 @@ async def related_results(session_maker, test_project: Project):
5961
await session.flush()
6062

6163
relation = Relation(
64+
project_id=test_project.id,
6265
from_id=source.id,
6366
to_id=target.id,
6467
to_name=target.title,
@@ -199,6 +202,7 @@ async def test_update_entity_returns_with_relations_and_observations(
199202
await session.flush()
200203

201204
relation = Relation(
205+
project_id=test_project.id,
202206
from_id=entity.id,
203207
to_id=target.id,
204208
to_name=target.title,
@@ -785,6 +789,7 @@ async def test_get_all_file_paths_performance(entity_repository: EntityRepositor
785789

786790
# Add observations to entity1
787791
observation = Observation(
792+
project_id=entity_repository.project_id,
788793
entity_id=entity1.id,
789794
content="Test observation",
790795
category="note",
@@ -793,6 +798,7 @@ async def test_get_all_file_paths_performance(entity_repository: EntityRepositor
793798

794799
# Add relation between entities
795800
relation = Relation(
801+
project_id=entity_repository.project_id,
796802
from_id=entity1.id,
797803
to_id=entity2.id,
798804
to_name=entity2.title,

tests/repository/test_entity_upsert_issue_187.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ async def test_upsert_entity_with_observations_conflict(entity_repository: Entit
2828

2929
# Add observations to the entity
3030
obs1 = Observation(
31+
project_id=entity_repository.project_id,
3132
content="This is a test observation",
3233
category="testing",
3334
tags=["test"],
@@ -56,11 +57,13 @@ async def test_upsert_entity_with_observations_conflict(entity_repository: Entit
5657

5758
# Add different observations
5859
obs2 = Observation(
60+
project_id=entity_repository.project_id,
5961
content="This is an updated observation",
6062
category="updated",
6163
tags=["updated"],
6264
)
6365
obs3 = Observation(
66+
project_id=entity_repository.project_id,
6467
content="This is a second observation",
6568
category="second",
6669
tags=["second"],

0 commit comments

Comments
 (0)