feat(bot): add per-user conversation memory with summary and sarcastic emoji style in system prompt

TheMeinerLP · TheMeinerLP · commit 0a462e1617a5 · 2025-11-30T12:17:31.000+01:00
diff --git a/README.md b/README.md
@@ -3,6 +3,7 @@ OneLiteFeather Discord RAG Bot (pgvector + LlamaIndex)
 
 Overview
 - Discord bot + queue-based worker ecosystem that talks directly to Postgres/pgvector for retrieval-based answers (no REST intermediary).
+- Conversation memory: per-user Kontext + kompakte Zusammenfassung wird in Postgres gespeichert und fließt in Antworten ein.
 - Queue jobs are delivered via RabbitMQ with Postgres metadata so multiple workers can scale horizontally; an indexing CLI is still available for ad-hoc runs.
 - Modular architecture: commands/listeners, DI services, provider abstraction (OpenAI, Ollama, vLLM) and built-in Prometheus metrics for Discord, RAG, and jobs.
 - docker-compose includes Postgres/pgvector and optional Ollama for local end-to-end testing.
@@ -19,6 +20,7 @@ Scaling
 - Deploying to Kubernetes: use the provided bot and worker deployments plus the dedicated HPAs (`k8s/bot-hpa.yaml`, `k8s/worker-hpa.yaml`).
 - The bot exposes `/metrics`, `/healthz`, `/readyz` on `APP_HEALTH_HTTP_PORT` so k8s liveness/readiness and Prometheus scraping work.
 - Workers will auto-scale through RabbitMQ and the `rag-run-queue` HPA; configure RabbitMQ + Postgres as a shared queue reference.
+- Style: Antworten sind hilfreich mit trockenem Sarkasmus und passenden Discord‑Emojis; Emoji-/Style‑Guides können via RAG indexiert werden.
 
 
 
diff --git a/src/discord_rag_bot/bot/startup.py b/src/discord_rag_bot/bot/startup.py
@@ -10,6 +10,7 @@
 from rag_core.tools.registry import ToolsRegistry
 from ..infrastructure.config_store import ensure_store as ensure_config_store
 from ..infrastructure.config_store import migrate_prompts_files_to_db
+from ..infrastructure.memory import ensure_store as ensure_memory_store
 
 
 def build_services() -> BotServices:
@@ -43,6 +44,11 @@ def build_services() -> BotServices:
             migrate_prompts_files_to_db(delete_files=True)
         except Exception:
             pass
+    # Ensure memory store
+    try:
+        ensure_memory_store()
+    except Exception:
+        pass
     tools = ToolsRegistry()
     return BotServices(rag=rag, job_repo_factory=job_repo_factory, job_repo_default=default_job_repo, tools=tools)
 
diff --git a/src/discord_rag_bot/infrastructure/memory.py b/src/discord_rag_bot/infrastructure/memory.py
@@ -0,0 +1,148 @@
+from __future__ import annotations
+
+import asyncio
+from dataclasses import dataclass
+from typing import Optional, Sequence
+
+import asyncpg
+
+from ..config import settings
+
+
+def _dsn() -> str:
+    db = settings.db
+    return f"postgresql://{db.user}:{db.password}@{db.host}:{db.port}/{db.database}"
+
+
+async def _ensure_async(conn: asyncpg.Connection) -> None:
+    await conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS bot_memory (
+            id BIGSERIAL PRIMARY KEY,
+            user_id BIGINT NOT NULL,
+            guild_id BIGINT,
+            channel_id BIGINT,
+            role TEXT NOT NULL,          -- 'user' | 'assistant' | 'system' | 'summary'
+            kind TEXT NOT NULL DEFAULT 'message',
+            content TEXT NOT NULL,
+            created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+        );
+        CREATE INDEX IF NOT EXISTS idx_bot_memory_user_chan_time ON bot_memory(user_id, channel_id, created_at DESC);
+        CREATE INDEX IF NOT EXISTS idx_bot_memory_user_time ON bot_memory(user_id, created_at DESC);
+        """
+    )
+
+
+def ensure_store() -> None:
+    async def run():
+        conn = await asyncpg.connect(_dsn())
+        try:
+            await _ensure_async(conn)
+        finally:
+            await conn.close()
+
+    asyncio.run(run())
+
+
+def save_message(*, user_id: int, guild_id: Optional[int], channel_id: Optional[int], role: str, content: str, kind: str = "message") -> None:
+    async def run():
+        conn = await asyncpg.connect(_dsn())
+        try:
+            await _ensure_async(conn)
+            await conn.execute(
+                """
+                INSERT INTO bot_memory(user_id, guild_id, channel_id, role, kind, content)
+                VALUES ($1, $2, $3, $4, $5, $6)
+                """,
+                int(user_id),
+                int(guild_id) if guild_id is not None else None,
+                int(channel_id) if channel_id is not None else None,
+                role,
+                kind,
+                content,
+            )
+        finally:
+            await conn.close()
+
+    asyncio.run(run())
+
+
+@dataclass
+class MemorySlice:
+    summary: Optional[str]
+    recent: list[tuple[str, str]]  # list of (role, content)
+
+
+def load_slice(*, user_id: int, channel_id: Optional[int], limit: int = 8) -> MemorySlice:
+    async def run() -> MemorySlice:
+        conn = await asyncpg.connect(_dsn())
+        try:
+            await _ensure_async(conn)
+            # summary (latest)
+            row = await conn.fetchrow(
+                """
+                SELECT content FROM bot_memory
+                WHERE user_id=$1 AND role='summary'
+                ORDER BY created_at DESC
+                LIMIT 1
+                """,
+                int(user_id),
+            )
+            summary = str(row["content"]) if row and row["content"] else None
+
+            # recent conversation in channel (user/assistant roles)
+            if channel_id is not None:
+                rows: Sequence[asyncpg.Record] = await conn.fetch(
+                    """
+                    SELECT role, content FROM bot_memory
+                    WHERE user_id=$1 AND channel_id=$2 AND role IN ('user','assistant')
+                    ORDER BY created_at DESC
+                    LIMIT $3
+                    """,
+                    int(user_id),
+                    int(channel_id),
+                    int(limit),
+                )
+            else:
+                rows = await conn.fetch(
+                    """
+                    SELECT role, content FROM bot_memory
+                    WHERE user_id=$1 AND role IN ('user','assistant')
+                    ORDER BY created_at DESC
+                    LIMIT $2
+                    """,
+                    int(user_id),
+                    int(limit),
+                )
+            recent = [(str(r["role"]), str(r["content"])) for r in rows]
+            recent.reverse()  # chronological
+            return MemorySlice(summary=summary, recent=list(recent))
+        finally:
+            await conn.close()
+
+    return asyncio.run(run())
+
+
+def update_summary_with_ai(*, current_summary: Optional[str], user_text: str, bot_answer: str, answer_llm: callable) -> Optional[str]:
+    """Use the LLM to keep a concise user memory summary.
+
+    answer_llm: callable(question: str, system_prompt: Optional[str]) -> str
+    """
+    sys_prompt = (
+        "Du bist ein Assistent, der eine kurze, stichpunktartige Nutzer-Zusammenfassung pflegt.\n"
+        "Extrahiere nur langlebige Fakten, Präferenzen, Schreibstil/Emoji-Vorlieben, Sprache, wichtige Kontexte.\n"
+        "Halte es knapp (max. ~6 Stichpunkte), keine PII, nichts Sensibles. Aktualisiere konsistent.\n"
+    )
+    base = current_summary or "(leer)"
+    question = (
+        "Aktualisiere diese Nutzer-Zusammenfassung auf Basis der neuen Interaktion.\n\n"
+        f"Bisherige Zusammenfassung:\n{base}\n\n"
+        f"Neue Nachricht des Nutzers:\n{user_text}\n\n"
+        f"Antwort des Bots:\n{bot_answer}"
+    )
+    try:
+        updated = answer_llm(question, system_prompt=sys_prompt)
+        return updated.strip()
+    except Exception:
+        return None
+
diff --git a/src/discord_rag_bot/listeners/chat.py b/src/discord_rag_bot/listeners/chat.py
@@ -9,6 +9,7 @@
 from ..util.text import clip_discord_message
 from rag_core import RagResult
 from ..infrastructure.config_store import load_prompt_effective
+from ..infrastructure.memory import save_message, load_slice, update_summary_with_ai
 from ..infrastructure.gating import should_use_rag
 from ..infrastructure.language import get_language_hint
 from rag_core.metrics import discord_messages_processed_total, rag_queries_total
@@ -80,8 +81,30 @@ async def on_message(self, message: discord.Message):
             # nothing to ask
             return
 
+        def _style_prompt(base: str | None, mem_summary: str | None, recent: list[tuple[str, str]]) -> str:
+            style = (
+                "Du antwortest hilfreich, prägnant und mit trockenem Sarkasmus, ohne unhöflich zu sein.\n"
+                "Nutze passende Discord-Emojis (z. B. 😅, 🤔, ✅, ❌, 🧠, 🔧, 📎), aber nicht übermäßig.\n"
+                "Wenn Daten fehlen, sag es ehrlich. Antworte in der Sprache des Nutzers.\n"
+            )
+            mem = ""
+            if mem_summary:
+                mem += f"\nNutzerprofil (Zusammenfassung):\n{mem_summary}\n"
+            if recent:
+                # Kurzer Kontext aus letzten Beiträgen
+                lines = []
+                for r, c in recent[-6:]:
+                    prefix = "User" if r == "user" else "Bot"
+                    lines.append(f"- {prefix}: {c[:300]}")
+                mem += "\nLetzte Unterhaltungsschritte:\n" + "\n".join(lines) + "\n"
+            base = base or ""
+            return (base + "\n\n" + style + mem).strip()
+
         def run_query() -> tuple[str, list[str]]:
-            prompt = load_prompt_effective(message.guild.id if message.guild else None, message.channel.id)
+            base_prompt = load_prompt_effective(message.guild.id if message.guild else None, message.channel.id)
+            # Load user memory slice (summary + recent channel messages)
+            mem = load_slice(user_id=message.author.id, channel_id=message.channel.id)
+            prompt = _style_prompt(base_prompt, mem.summary, mem.recent)
             lang_hint = get_language_hint(question)
             if lang_hint:
                 prompt = f"{prompt}\n\nAntwortsprache: {lang_hint}"
@@ -123,6 +146,17 @@ def run_query() -> tuple[str, list[str]]:
 
         # Send friendly placeholder reply and then edit when ready
         placeholder_msg = await message.reply("🧠 Einen kleinen Moment – ich suche passende Informationen und schreibe die Antwort …")
+        # Save the incoming user message into memory (best-effort)
+        try:
+            save_message(
+                user_id=message.author.id,
+                guild_id=message.guild.id if message.guild else None,
+                channel_id=message.channel.id if hasattr(message.channel, "id") else None,
+                role="user",
+                content=message.content or "",
+            )
+        except Exception:
+            pass
         answer, sources = await asyncio.to_thread(run_query)
         if sources:
             try:
@@ -138,6 +172,43 @@ def run_query() -> tuple[str, list[str]]:
         except Exception:
             # Fallback: send a fresh reply if edit fails
             await message.reply(clip_discord_message(text))
+        # Save bot answer and update summary in background (best-effort)
+        try:
+            save_message(
+                user_id=message.author.id,
+                guild_id=message.guild.id if message.guild else None,
+                channel_id=message.channel.id if hasattr(message.channel, "id") else None,
+                role="assistant",
+                content=text,
+            )
+        except Exception:
+            pass
+        # Summarize/update user memory asynchronously
+        async def _update_summary_bg():
+            try:
+                mem_now = load_slice(user_id=message.author.id, channel_id=message.channel.id)
+                updated = update_summary_with_ai(
+                    current_summary=mem_now.summary,
+                    user_text=message.content or "",
+                    bot_answer=text,
+                    answer_llm=lambda q, system_prompt: self.bot.services.rag.answer_llm(q, system_prompt=system_prompt),  # type: ignore[attr-defined]
+                )
+                if updated and updated.strip():
+                    save_message(
+                        user_id=message.author.id,
+                        guild_id=message.guild.id if message.guild else None,
+                        channel_id=None,
+                        role="summary",
+                        content=updated.strip(),
+                        kind="summary",
+                    )
+            except Exception:
+                pass
+
+        try:
+            asyncio.create_task(_update_summary_bg())
+        except Exception:
+            pass
 
 
 async def setup(bot: commands.Bot):