Skip to content

Commit de22b60

Browse files
authored
feat(memory): reversible curation — edit/invalidate/revert memory units (#1976)
Edit (text/context/dates/fact_type/entities), invalidate (move to a separate invalidated_memory_units archive, reversible), and revert raw memory units via PATCH /memories/{id}. Tracks user edits with edited_at. Control-plane UI, docs (Memories API page), and multi-language examples included. RFC #1951.
1 parent 51d25d8 commit de22b60

55 files changed

Lines changed: 5445 additions & 218 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ node_modules/
1515

1616
# Environment variables and local config
1717
.env
18+
.env.bak*
19+
.env.*.bak
1820
docker-compose.yml
1921
docker-compose.override.yml
2022

hindsight-api-slim/hindsight_api/admin/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
"entities",
5050
"chunks",
5151
"memory_units",
52+
"invalidated_memory_units",
5253
"unit_entities",
5354
"entity_cooccurrences",
5455
"memory_links",
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""Add invalidated_memory_units table for curation (edit/invalidate).
2+
3+
Curation keeps the recall hot-path (``memory_units``) clean by *moving*
4+
invalidated facts into a sibling archive table rather than flagging them in
5+
place. If a row is in ``memory_units`` it is live; if it is in
6+
``invalidated_memory_units`` it has been retired. Recall/consolidation/graph
7+
queries never need a state predicate — the rows simply aren't there.
8+
9+
The archive mirrors ``memory_units`` column-for-column (so a row round-trips
10+
losslessly on revert) plus:
11+
- ``invalidation_reason`` optional free text recorded on invalidate
12+
- ``invalidated_at`` when it was retired
13+
- ``entity_ids`` snapshot of the unit's entity associations, so revert
14+
can restore them (``unit_entities`` is cascade-deleted
15+
when the live row is removed)
16+
17+
This migration also adds ``edited_at`` to ``memory_units``: set whenever a user
18+
edits a memory's fields (text, context, dates, fact_type, entities) via curation.
19+
NULL means never manually modified; a non-NULL value answers "has the user ever
20+
changed this?" with the time of the last edit (distinct from ``updated_at``,
21+
which background operations also bump). It is added to ``memory_units`` *before*
22+
the archive is cloned below, so the archive inherits the column and the marker
23+
travels with a fact when it is invalidated.
24+
25+
Revision ID: c9a1b2d3e4f5
26+
Revises: b2d4f6a8c1e3
27+
Create Date: 2026-06-03
28+
"""
29+
30+
from collections.abc import Sequence
31+
32+
from alembic import context, op
33+
34+
from hindsight_api.alembic._dialect import run_for_dialect
35+
36+
revision: str = "c9a1b2d3e4f5"
37+
down_revision: str | Sequence[str] | None = "b2d4f6a8c1e3"
38+
branch_labels: str | Sequence[str] | None = None
39+
depends_on: str | Sequence[str] | None = None
40+
41+
42+
def _pg_schema_prefix() -> str:
43+
schema = context.config.get_main_option("target_schema")
44+
return f'"{schema}".' if schema else ""
45+
46+
47+
def _pg_upgrade() -> None:
48+
schema = _pg_schema_prefix()
49+
# Add edited_at to the live table FIRST so the archive's LIKE clone below
50+
# inherits it (keeps the two tables column-for-column identical for round-trip).
51+
op.execute(f"ALTER TABLE {schema}memory_units ADD COLUMN IF NOT EXISTS edited_at TIMESTAMPTZ")
52+
# LIKE ... INCLUDING DEFAULTS clones every memory_units column (incl. the
53+
# embedding vector and edited_at) so an invalidated row can move back verbatim.
54+
# We deliberately omit indexes/constraints — the archive is cold storage, not a
55+
# recall surface; only the lookups below need indexing.
56+
op.execute(
57+
f"CREATE TABLE IF NOT EXISTS {schema}invalidated_memory_units (LIKE {schema}memory_units INCLUDING DEFAULTS)"
58+
)
59+
op.execute(
60+
f"ALTER TABLE {schema}invalidated_memory_units "
61+
f"ADD COLUMN IF NOT EXISTS invalidation_reason TEXT, "
62+
f"ADD COLUMN IF NOT EXISTS invalidated_at TIMESTAMPTZ DEFAULT now(), "
63+
f"ADD COLUMN IF NOT EXISTS entity_ids UUID[]"
64+
)
65+
op.execute(f"CREATE UNIQUE INDEX IF NOT EXISTS idx_invalidated_mu_id ON {schema}invalidated_memory_units (id)")
66+
op.execute(
67+
f"CREATE INDEX IF NOT EXISTS idx_invalidated_mu_bank "
68+
f"ON {schema}invalidated_memory_units (bank_id, invalidated_at)"
69+
)
70+
# Deleting a document (or bank) should clear its archived facts too, mirroring
71+
# the memory_units → documents cascade.
72+
op.execute(
73+
f"""
74+
DO $$ BEGIN
75+
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'invalidated_mu_document_fkey') THEN
76+
ALTER TABLE {schema}invalidated_memory_units
77+
ADD CONSTRAINT invalidated_mu_document_fkey
78+
FOREIGN KEY (document_id, bank_id)
79+
REFERENCES {schema}documents(id, bank_id) ON DELETE CASCADE;
80+
END IF; END $$;
81+
"""
82+
)
83+
84+
85+
def _pg_downgrade() -> None:
86+
schema = _pg_schema_prefix()
87+
# Drops the archive (and its inherited edited_at) wholesale, then removes
88+
# edited_at from the live table.
89+
op.execute(f"DROP TABLE IF EXISTS {schema}invalidated_memory_units")
90+
op.execute(f"ALTER TABLE {schema}memory_units DROP COLUMN IF EXISTS edited_at")
91+
92+
93+
def upgrade() -> None:
94+
# PG-only: Oracle gets the table from the baseline snapshot, matching the
95+
# convention used by sibling column/index migrations in this tree.
96+
run_for_dialect(pg=_pg_upgrade)
97+
98+
99+
def downgrade() -> None:
100+
run_for_dialect(pg=_pg_downgrade)

hindsight-api-slim/hindsight_api/alembic/versions/o1a2b3c4d5e6_oracle_baseline.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
text_signals CLOB,
123123
consolidation_failed_at TIMESTAMP WITH TIME ZONE,
124124
search_vector CLOB,
125+
edited_at TIMESTAMP WITH TIME ZONE,
125126
created_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
126127
updated_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
127128
CONSTRAINT pk_memory_units PRIMARY KEY (id),
@@ -138,6 +139,48 @@
138139
PARTITION BY LIST (bank_id) AUTOMATIC
139140
(PARTITION p_default VALUES ('__default__'))
140141
""",
142+
# Cold archive for curation: invalidated facts are MOVED here out of
143+
# memory_units so the recall hot-path never sees them. Mirrors memory_units
144+
# plus invalidation bookkeeping and an entity-id snapshot for lossless revert.
145+
"""
146+
CREATE TABLE IF NOT EXISTS invalidated_memory_units (
147+
id RAW(16) NOT NULL,
148+
bank_id VARCHAR2(256) NOT NULL,
149+
document_id VARCHAR2(512),
150+
chunk_id VARCHAR2(512),
151+
text CLOB NOT NULL,
152+
embedding VECTOR(384, FLOAT32),
153+
context CLOB,
154+
event_date TIMESTAMP WITH TIME ZONE NOT NULL,
155+
occurred_start TIMESTAMP WITH TIME ZONE,
156+
occurred_end TIMESTAMP WITH TIME ZONE,
157+
mentioned_at TIMESTAMP WITH TIME ZONE,
158+
fact_type VARCHAR2(64) DEFAULT 'world' NOT NULL,
159+
confidence_score BINARY_DOUBLE,
160+
access_count NUMBER(10) DEFAULT 0 NOT NULL,
161+
consolidated_at TIMESTAMP WITH TIME ZONE,
162+
observation_scopes CLOB CONSTRAINT imu_obs_scopes_json CHECK (observation_scopes IS JSON OR observation_scopes IS NULL),
163+
tags CLOB DEFAULT '[]' NOT NULL,
164+
metadata CLOB DEFAULT '{}' NOT NULL
165+
CONSTRAINT imu_metadata_json CHECK (metadata IS JSON),
166+
proof_count NUMBER(10) DEFAULT 1,
167+
source_memory_ids CLOB,
168+
history CLOB DEFAULT '[]'
169+
CONSTRAINT imu_history_json CHECK (history IS JSON OR history IS NULL),
170+
text_signals CLOB,
171+
consolidation_failed_at TIMESTAMP WITH TIME ZONE,
172+
search_vector CLOB,
173+
edited_at TIMESTAMP WITH TIME ZONE,
174+
created_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
175+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL,
176+
invalidation_reason CLOB,
177+
invalidated_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP,
178+
entity_ids CLOB CONSTRAINT imu_entity_ids_json CHECK (entity_ids IS JSON OR entity_ids IS NULL),
179+
CONSTRAINT pk_invalidated_memory_units PRIMARY KEY (id),
180+
CONSTRAINT fk_imu_document FOREIGN KEY (document_id, bank_id)
181+
REFERENCES documents(id, bank_id) ON DELETE CASCADE
182+
)
183+
""",
141184
"""
142185
CREATE TABLE IF NOT EXISTS entities (
143186
id RAW(16) DEFAULT SYS_GUID() NOT NULL,

hindsight-api-slim/hindsight_api/api/http.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,83 @@ class UpdateDocumentResponse(BaseModel):
13901390
success: bool = True
13911391

13921392

1393+
class UpdateMemoryRequest(BaseModel):
1394+
"""Request model for curating a single memory unit (edit / invalidate / revert).
1395+
1396+
Provide ``text`` to correct the fact, and/or ``state`` to invalidate
1397+
('invalidated') or revert ('valid') it. ``reason`` is optional free text
1398+
recorded on the memory. At least one of ``text`` or ``state`` must be set.
1399+
Only world/experience facts can be curated; observations are derived.
1400+
"""
1401+
1402+
model_config = ConfigDict(
1403+
json_schema_extra={
1404+
"example": {
1405+
"state": "invalidated",
1406+
"reason": "superseded: server decommissioned 2026-06-01",
1407+
}
1408+
}
1409+
)
1410+
1411+
text: str | None = Field(
1412+
default=None,
1413+
description="New fact text. Re-embeds the memory, drops its derived "
1414+
"observations and links, and triggers re-consolidation.",
1415+
)
1416+
context: str | None = Field(
1417+
default=None,
1418+
description="New context for the fact. '' clears it; omit to leave unchanged.",
1419+
)
1420+
occurred_start: str | None = Field(
1421+
default=None,
1422+
description="New occurred-range start (ISO 8601). '' clears it; omit to leave unchanged.",
1423+
)
1424+
occurred_end: str | None = Field(
1425+
default=None,
1426+
description="New occurred-range end (ISO 8601). '' clears it; omit to leave unchanged.",
1427+
)
1428+
fact_type: str | None = Field(
1429+
default=None,
1430+
description="Reclassify the fact: 'world' or 'experience'. Omit to leave unchanged.",
1431+
)
1432+
entities: list[str] | None = Field(
1433+
default=None,
1434+
description="Replace the fact's entities. Names are resolved/find-or-created "
1435+
"the same way retain does; '[]' detaches all entities. Omit to leave unchanged.",
1436+
)
1437+
state: str | None = Field(
1438+
default=None,
1439+
description="Curation state: 'invalidated' to soft-retire the memory "
1440+
"(excluded from recall/consolidation, links and derived observations "
1441+
"pruned, moved to the archive) or 'valid' to revert. Reversible.",
1442+
)
1443+
reason: str | None = Field(
1444+
default=None,
1445+
description="Optional free-text reason recorded when invalidating.",
1446+
)
1447+
1448+
@model_validator(mode="after")
1449+
def _require_an_edit(self) -> "UpdateMemoryRequest":
1450+
if all(
1451+
v is None
1452+
for v in (
1453+
self.text,
1454+
self.context,
1455+
self.occurred_start,
1456+
self.occurred_end,
1457+
self.fact_type,
1458+
self.entities,
1459+
self.state,
1460+
)
1461+
):
1462+
raise ValueError("Provide at least one field to update.")
1463+
if self.state is not None and self.state not in ("valid", "invalidated"):
1464+
raise ValueError("state must be 'valid' or 'invalidated'.")
1465+
if self.fact_type is not None and self.fact_type not in ("world", "experience"):
1466+
raise ValueError("fact_type must be 'world' or 'experience'.")
1467+
return self
1468+
1469+
13931470
class DeleteDocumentResponse(BaseModel):
13941471
"""Response model for delete document endpoint."""
13951472

@@ -3211,6 +3288,8 @@ async def api_list(
32113288
type: str | None = None,
32123289
q: str | None = None,
32133290
consolidation_state: str | None = None,
3291+
state: str | None = None,
3292+
document_id: str | None = None,
32143293
limit: int = 100,
32153294
offset: int = 0,
32163295
request_context: RequestContext = Depends(get_request_context),
@@ -3236,6 +3315,8 @@ async def api_list(
32363315
fact_type=type,
32373316
search_query=q,
32383317
consolidation_state=consolidation_state,
3318+
state=state,
3319+
document_id=document_id,
32393320
limit=limit,
32403321
offset=offset,
32413322
request_context=request_context,
@@ -3289,6 +3370,53 @@ async def api_get_memory(
32893370
logger.error(f"Error in /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
32903371
raise HTTPException(status_code=500, detail=str(e))
32913372

3373+
@app.patch(
3374+
"/v1/default/banks/{bank_id}/memories/{memory_id}",
3375+
summary="Curate memory unit",
3376+
description="Edit a memory's text and/or change its curation state "
3377+
"(invalidate / revert). Invalidated memories are excluded from recall, "
3378+
"consolidation, and graph maintenance but kept for audit (reversible). "
3379+
"Only world/experience facts can be curated; observations are derived.",
3380+
operation_id="update_memory",
3381+
tags=["Memory"],
3382+
)
3383+
async def api_update_memory(
3384+
bank_id: str,
3385+
memory_id: str,
3386+
request: UpdateMemoryRequest,
3387+
request_context: RequestContext = Depends(get_request_context),
3388+
):
3389+
"""Curate a single memory unit (edit text / invalidate / revert)."""
3390+
try:
3391+
data = await app.state.memory.update_memory_unit(
3392+
bank_id=bank_id,
3393+
memory_id=memory_id,
3394+
text=request.text,
3395+
context=request.context,
3396+
occurred_start=request.occurred_start,
3397+
occurred_end=request.occurred_end,
3398+
new_fact_type=request.fact_type,
3399+
entities=request.entities,
3400+
state=request.state,
3401+
reason=request.reason,
3402+
request_context=request_context,
3403+
)
3404+
if data is None:
3405+
raise HTTPException(status_code=404, detail=f"Memory unit '{memory_id}' not found")
3406+
return data
3407+
except OperationValidationError as e:
3408+
raise HTTPException(status_code=e.status_code, detail=e.reason)
3409+
except ValueError as e:
3410+
raise HTTPException(status_code=400, detail=str(e))
3411+
except (AuthenticationError, HTTPException):
3412+
raise
3413+
except Exception as e:
3414+
import traceback
3415+
3416+
error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
3417+
logger.error(f"Error in PATCH /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
3418+
raise HTTPException(status_code=500, detail=str(e))
3419+
32923420
@app.get(
32933421
"/v1/default/banks/{bank_id}/memories/{memory_id}/history",
32943422
summary="Get observation history",

hindsight-api-slim/hindsight_api/api/mcp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def create_mcp_server(memory: MemoryEngine, multi_bank: bool = True) -> FastMCP:
113113
"delete_directive",
114114
"list_memories",
115115
"get_memory",
116+
"update_memory",
117+
"invalidate_memory",
116118
"list_documents",
117119
"get_document",
118120
"delete_document",

0 commit comments

Comments
 (0)