microsoft
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 12 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.pyrit_conf_example‎
Lines changed: 8 additions & 0 deletions b/‎.pyrit_conf_example‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎build_scripts/enforce_alembic_revision_immutability.py‎
Lines changed: 39 additions & 0 deletions b/‎build_scripts/enforce_alembic_revision_immutability.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎build_scripts/memory_migrations.py‎
Lines changed: 90 additions & 0 deletions b/‎build_scripts/memory_migrations.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎doc/contributing/11_memory_models.md‎
Lines changed: 103 additions & 0 deletions b/‎doc/contributing/11_memory_models.md‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎doc/myst.yml‎
Lines changed: 1 addition & 0 deletions b/‎doc/myst.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pyrit/memory/alembic/env.py‎
Lines changed: 24 additions & 0 deletions b/‎pyrit/memory/alembic/env.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎pyrit/memory/alembic/script.py.mako‎
Lines changed: 32 additions & 0 deletions b/‎pyrit/memory/alembic/script.py.mako‎
Lines changed: 32 additions & 0 deletions
@@ -30,6 +30,18 @@ repos:
         files: ^(doc/.*\.(py|ipynb|md)|doc/myst\.yml)$
         pass_filenames: false
         additional_dependencies: ['pyyaml']
+      - id: enforce_alembic_revision_immutability
+        name: Enforce Alembic Revision Immutability
+        entry: python ./build_scripts/enforce_alembic_revision_immutability.py
+        language: python
+        files: ^pyrit/memory/alembic/versions/.*\.py$
+        pass_filenames: false
+      - id: memory-migrations-check
+        name: Check Memory Migrations
+        entry: python ./build_scripts/memory_migrations.py check
+        language: system
+        pass_filenames: false
+        files: ^pyrit/memory/(memory_models\.py|alembic/.*|migration\.py)$
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
 
@@ -95,6 +95,14 @@ operation: op_trash_panda
 #   - /path/to/.env
 #   - /path/to/.env.local
 
+# Schema Migration Check
+# ---------------------
+# If true, runs database schema migration on startup to ensure the database
+# is up to date with the latest PyRIT version.
+# Set to false to skip the check (e.g., for read-only access, testing, or
+# when managing migrations externally).
+check_schema: true
+
 # Silent Mode
 # -----------
 # If true, suppresses print statements during initialization.
 
@@ -0,0 +1,39 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Migration history must be immutable. This hook enforces that by preventing deletion or updates to migration scripts.
+
+Checks both staged changes (local pre-commit) and the full branch diff against origin/main (CI).
+"""
+
+import subprocess
+import sys
+
+_VERSIONS_PATH = "pyrit/memory/alembic/versions/"
+
+
+def _git(*args: str) -> str:
+    result = subprocess.run(["git", *args], capture_output=True, text=True)
+    return result.stdout.strip()
+
+
+def _has_non_add_changes(diff_spec: list[str]) -> bool:
+    output = _git("diff", "--name-status", *diff_spec, "--", _VERSIONS_PATH)
+    return any(line and not line.startswith("A") for line in output.splitlines())
+
+
+def has_revision_violations() -> bool:
+    # Local pre-commit: check staged changes
+    if _has_non_add_changes(["--cached"]):
+        return True
+
+    # CI: check full branch diff against origin/main
+    merge_base = _git("merge-base", "origin/main", "HEAD")
+    return bool(merge_base and _has_non_add_changes([f"{merge_base}...HEAD"]))
+
+
+if __name__ == "__main__":
+    if has_revision_violations():
+        print("[ERROR] Migration scripts can only be added, not modified or deleted.")
+        sys.exit(1)
@@ -0,0 +1,90 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import argparse
+import sys
+import tempfile
+from pathlib import Path
+
+from alembic.util.exc import AutogenerateDiffsDetected
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+
+from pyrit.memory.migration import check_schema_migrations, generate_schema_migration, run_schema_migrations
+
+# ANSI color codes
+_RED = "\033[91m"
+_RESET = "\033[0m"
+
+
+def _print_error(message: str) -> None:
+    """Print an error message in red to stderr."""
+    print(f"{_RED}{message}{_RESET}", file=sys.stderr)
+
+
+def _create_temp_engine() -> tuple[Engine, Path]:
+    """Create a temp SQLite database upgraded to head and return engine and path."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+        tmp_path = Path(tmp.name)
+    engine = create_engine(f"sqlite:///{tmp_path}")
+    run_schema_migrations(engine=engine)
+    return engine, tmp_path
+
+
+def _cmd_generate(*, message: str, force: bool = False) -> None:
+    """Generate a new Alembic revision from model changes."""
+    engine, tmp_path = _create_temp_engine()
+    try:
+        generate_schema_migration(engine=engine, message=message, force=force)
+        print("Migration file generated. Review it carefully before committing.")
+    except RuntimeError as e:
+        _print_error(str(e))
+        raise SystemExit(1) from e
+    finally:
+        engine.dispose()
+        tmp_path.unlink(missing_ok=True)
+
+
+def _cmd_check() -> None:
+    """Verify all migrations apply cleanly and schema matches models."""
+    engine, tmp_path = _create_temp_engine()
+    try:
+        check_schema_migrations(engine=engine)
+    except AutogenerateDiffsDetected as e:
+        _print_error(f"Migration check failed. Run 'generate' to create a migration. Error: {e}")
+        raise SystemExit(1) from e
+    finally:
+        engine.dispose()
+        tmp_path.unlink(missing_ok=True)
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    """Build the CLI argument parser."""
+    parser = argparse.ArgumentParser(
+        description="PyRIT memory migration tool. Generate and validate migrations based on the current memory models."
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    gen = sub.add_parser("generate", help="Generate a new migration from model changes.")
+    gen.add_argument("-m", "--message", required=True, help="Migration message.")
+    gen.add_argument("--force", action="store_true", help="Generate migration even if no changes detected.")
+
+    sub.add_parser("check", help="Verify all migrations apply cleanly and add up to the current memory models.")
+
+    return parser
+
+
+def main() -> int:
+    """Dispatch the selected migration command."""
+    args = _build_parser().parse_args()
+
+    if args.command == "generate":
+        _cmd_generate(message=args.message, force=args.force)
+    elif args.command == "check":
+        _cmd_check()
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,103 @@
+# Memory Models & Migrations
+
+This guide covers how to work with PyRIT's memory models — where they live, how to add or update them, and how the migration system works.
+
+## Where Things Live
+
+| What | Path |
+|---|---|
+| ORM models (SQLAlchemy) | `pyrit/memory/memory_models.py` |
+| Domain objects they map to | `pyrit/models/` (e.g. `MessagePiece`, `Score`, `Seed`, `AttackResult`, `ScenarioResult`) |
+| Alembic migration environment | `pyrit/memory/alembic/env.py` |
+| Migration revisions | `pyrit/memory/alembic/versions/` |
+| Migration helpers | `pyrit/memory/migration.py` |
+| CLI migration tool | `build_scripts/memory_migrations.py` |
+| Schema diagram | `doc/code/memory/10_schema_diagram.md` |
+
+## Current Models
+
+All models inherit from the SQLAlchemy `Base` declarative class and live in `memory_models.py`:
+
+- **`PromptMemoryEntry`** — prompt/response data (`PromptMemoryEntries` table)
+- **`ScoreEntry`** — evaluation results (`ScoreEntries` table)
+- **`EmbeddingDataEntry`** — embeddings for semantic search (`EmbeddingData` table)
+- **`SeedEntry`** — dataset prompts/templates (`SeedPromptEntries` table)
+- **`AttackResultEntry`** — attack execution results (`AttackResultEntries` table)
+- **`ScenarioResultEntry`** — scenario execution metadata (`ScenarioResultEntries` table)
+
+Each entry model has a corresponding domain object and conversion methods (e.g. `PromptMemoryEntry.__init__(entry: MessagePiece)` and `get_message_piece()`).
+
+## Adding or Updating a Model
+
+### 1. Edit the model
+
+Make your changes in `pyrit/memory/memory_models.py`. Follow these conventions:
+
+- Use `mapped_column()` with explicit types.
+- Use `CustomUUID` for all UUID columns (handles cross-database compatibility).
+- Add foreign keys where relationships exist.
+- Include `pyrit_version` on new entry models.
+
+### 2. Generate a migration
+
+```bash
+python build_scripts/memory_migrations.py generate -m "short description of change"
+```
+
+This creates a new revision file under `pyrit/memory/alembic/versions/`. **Review the generated file carefully** — auto-generated migrations may need manual adjustments (e.g. for data migrations or default values).
+
+### 3. Validate the migration
+
+```bash
+python build_scripts/memory_migrations.py check
+```
+
+This verifies the schema produced by running all migrations matches the current models. Both pre-commit hooks (see below) and CI run this check.
+
+### 4. Update the schema diagram
+
+If you changed the schema in a meaningful way (added a table, added a foreign key, etc.), update the Mermaid diagram in `doc/code/memory/10_schema_diagram.md`.
+
+## How Migrations Run at Startup
+
+Schema migrations are triggered inside each memory class constructor (`SQLiteMemory.__init__` and `AzureSQLMemory.__init__`). When `skip_schema_migration=False` (the default), the inherited `_run_schema_migration()` method on `MemoryInterface` runs:
+
+```
+SQLiteMemory.__init__() / AzureSQLMemory.__init__()
+  → _run_schema_migration()                      # pyrit/memory/memory_interface.py
+      → run_schema_migrations(engine=...)         # pyrit/memory/migration.py
+          → alembic upgrade head
+      → check_schema_migrations(engine=...)       # pyrit/memory/migration.py
+          → alembic check
+```
+
+Both SQLite and AzureSQL follow the same migration path: first `run_schema_migrations` applies any pending Alembic revisions (`alembic upgrade head`), then `check_schema_migrations` verifies the resulting schema matches the current models (`alembic check`). The behavior depends on database state:
+
+| Database state | What happens |
+|---|---|
+| **Fresh (no tables)** | All migrations apply from scratch |
+| **Already versioned** | Only unapplied migrations run (idempotent) |
+| **Legacy (tables exist, no version tracking)** | Validates schema matches models, stamps current version, then upgrades. Raises `RuntimeError` on mismatch to prevent data corruption |
+
+Migrations run inside a transaction (`engine.begin()`), so a failed migration rolls back cleanly. The version tracking table is `pyrit_memory_alembic_version`.
+
+Users can skip migrations by passing `skip_schema_migration=True` to the memory class constructor. When using `initialize_pyrit_async()`, this can be forwarded via `**memory_instance_kwargs`:
+
+```python
+await initialize_pyrit_async("SQLite", skip_schema_migration=True)
+```
+
+## Important Rules
+
+### Migration revisions are immutable
+
+Once a migration revision is committed, it **must not be modified or deleted**. This is enforced by a pre-commit hook (`enforce_alembic_revision_immutability`). If you need to fix a migration, create a new revision instead.
+
+### Pre-commit hooks
+
+Two hooks run automatically when you touch memory-related files:
+
+1. **`enforce_alembic_revision_immutability`** — blocks modifications/deletions to existing revision files.
+2. **`memory-migrations-check`** — runs `memory_migrations.py check` to verify the schema is in sync.
+
+These hooks trigger on changes to `pyrit/memory/memory_models.py`, `pyrit/memory/migration.py`, and files under `pyrit/memory/alembic/`.
@@ -53,6 +53,7 @@ project:
         - file: contributing/8_pre_commit.md
         - file: contributing/9_exception.md
         - file: contributing/10_release_process.md
+        - file: contributing/11_memory_models.md
     - file: gui/0_gui.md
     - file: scanner/0_scanner.md
       children:
 
@@ -28,6 +28,7 @@ classifiers = [
 requires-python = ">=3.10, <3.15"
 dependencies = [
     "aiofiles>=24,<25",
+    "alembic>=1.16.0",
     "appdirs>=1.4.0",
     "art>=6.5.0",
     "av>=14.0.0",
@@ -201,6 +202,8 @@ include = ["pyrit", "pyrit.*"]
 [tool.setuptools.package-data]
 pyrit = [
     "backend/frontend/**/*",
+    "memory/alembic/**/*",
+    "memory/alembic.ini",
     "py.typed"
 ]
 
 
@@ -0,0 +1,24 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from alembic import context
+from sqlalchemy.engine import Connection
+
+from pyrit.memory.memory_models import Base
+from pyrit.memory.migration import PYRIT_MEMORY_ALEMBIC_VERSION_TABLE
+
+config = context.config
+connection: Connection | None = config.attributes.get("connection")
+target_metadata = Base.metadata
+
+if connection is None:
+    raise RuntimeError("No connection found for Alembic migration")
+
+context.configure(
+    connection=connection,
+    target_metadata=target_metadata,
+    compare_type=True,
+    version_table=PYRIT_MEMORY_ALEMBIC_VERSION_TABLE,
+)
+with context.begin_transaction():
+    context.run_migrations()
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+${message}.
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = "${up_revision}"
+down_revision: str | None = ${repr(down_revision).replace("'", '"')}
+branch_labels: str | Sequence[str] | None = ${repr(branch_labels).replace("'", '"')}
+depends_on: str | Sequence[str] | None = ${repr(depends_on).replace("'", '"')}
+
+
+def upgrade() -> None:
+    """Apply this schema upgrade."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Revert this schema upgrade."""
+    ${downgrades if downgrades else "pass"}