Prompt engineering

Robert Weber · Robert Weber · commit e61192300cb4 · 2026-04-12T07:38:06.000-04:00
diff --git a/novelforge/agents/chapter/__init__.py b/novelforge/agents/chapter/__init__.py
@@ -23,11 +23,16 @@
     _PATTERN_THRESHOLD,
     _SOFT_LIMIT_PER_CHAPTER,
     _SOFT_LIMITED_WORDS,
+    _LEGAL_ADJACENT_GENRES,
+    _LEGAL_TERMS,
     _call_with_content_retry,
     _draft_with_content_retry,
     _format_anti_repetition_rules,
     _log_pass_failure,
     _sanitize_for_content_policy,
+    format_vocabulary_rules,
+    get_forbidden_words,
+    get_soft_limited_words,
     scan_vocabulary_overuse,
 )
 
diff --git a/novelforge/agents/chapter/_helpers.py b/novelforge/agents/chapter/_helpers.py
@@ -230,8 +230,22 @@ def _draft_with_content_retry(
     # Bookkeeping / accounting metaphors used as emotional shorthand
     "ledger", "tally", "inventory", "audit", "balance sheet",
     "debit", "dividend",
+    # Institutional / formal register words LLMs overuse
+    "mandate", "decree", "edict",
+    "apparatus", "machinations",
 ]
 
+# Legal terms forbidden in all genres except crime-adjacent ones.
+# When a crime-adjacent genre is detected these become soft-limited instead.
+_LEGAL_TERMS = [
+    "verdict", "indictment", "tribunal", "acquittal", "exonerate",
+    "adjudicate", "clemency", "arbitrate", "testimony", "jurisprudence",
+    "litigate", "prosecution", "prosecute",
+]
+
+# Genres where legal terminology is contextually appropriate (soft-limited, not banned).
+_LEGAL_ADJACENT_GENRES = {"Crime", "Mystery", "Noir", "Thriller"}
+
 # Soft-limited words: OK once or twice per novel, but the LLM wildly overuses them
 _SOFT_LIMITED_WORDS = [
     "brittle", "tighten", "tightened", "tightening",
@@ -264,10 +278,40 @@ def _compile_word_pattern(words: list[str]) -> re.Pattern[str]:
 
 # Pre-compiled patterns for the vocabulary scanner (built once at import time)
 _FORBIDDEN_RE = _compile_word_pattern(_FORBIDDEN_WORDS)
+_LEGAL_TERMS_RE = _compile_word_pattern(_LEGAL_TERMS)
 _SOFT_LIMITED_RE = _compile_word_pattern(_SOFT_LIMITED_WORDS)
 _OVERUSED_PATTERN_RE = _compile_word_pattern(_OVERUSED_PATTERNS)
 
 
+def get_forbidden_words(genre: str = "") -> list[str]:
+    """Return the full forbidden-word list, adding legal terms unless the genre is legal-adjacent."""
+    if genre in _LEGAL_ADJACENT_GENRES:
+        return list(_FORBIDDEN_WORDS)
+    return list(_FORBIDDEN_WORDS) + list(_LEGAL_TERMS)
+
+
+def get_soft_limited_words(genre: str = "") -> list[str]:
+    """Return soft-limited words, including legal terms for legal-adjacent genres."""
+    if genre in _LEGAL_ADJACENT_GENRES:
+        return list(_SOFT_LIMITED_WORDS) + list(_LEGAL_TERMS)
+    return list(_SOFT_LIMITED_WORDS)
+
+
+def format_vocabulary_rules(genre: str = "") -> str:
+    """Return a compact vocabulary-constraint block for injection into agent system prompts."""
+    forbidden = get_forbidden_words(genre)
+    soft = get_soft_limited_words(genre)
+    return (
+        "VOCABULARY CONSTRAINTS (strict — apply to every word you write):\n"
+        f"NEVER use these words: {', '.join(forbidden)}.\n"
+        f"Limit these to at most 1 occurrence per chapter: {', '.join(soft)}.\n"
+        "Avoid: accounting/legal metaphors for emotions, "
+        '"small [mercy/victory/repair]" constructions, emotions lodged in '
+        "ribs/sternum/throat, metallic taste as distress, "
+        '"jaw tightened," "the economy of someone who."'
+    )
+
+
 def _format_anti_repetition_rules() -> str:
     """Format the soft-limited words and overused patterns for prompt injection."""
     lines = []
@@ -295,16 +339,20 @@ def _count_word_matches(pattern: re.Pattern[str], text: str) -> dict[str, int]:
     return counts
 
 
-def scan_vocabulary_overuse(chapter_text: str) -> list[str]:
+def scan_vocabulary_overuse(chapter_text: str, genre: str = "") -> list[str]:
     """
     Scan a chapter for overused vocabulary from the watchlists.
 
     Returns a list of human-readable warnings for each violation found.
     Pure Python — no LLM call.  Uses pre-compiled word-boundary regexes
     so that ``"audit"`` does **not** match inside ``"auditor"`` or
     ``"ledger"`` inside ``"sledgehammer"``.
+
+    When *genre* is a legal-adjacent genre (Crime, Mystery, Noir, Thriller),
+    legal terms are soft-limited instead of hard-banned.
     """
     warnings: list[str] = []
+    is_legal_adjacent = genre in _LEGAL_ADJACENT_GENRES
 
     # Check hard-banned words
     for word, count in _count_word_matches(_FORBIDDEN_RE, chapter_text).items():
@@ -313,6 +361,22 @@ def scan_vocabulary_overuse(chapter_text: str) -> list[str]:
                 f'BANNED WORD "{word}" appears {count}x — must be removed entirely'
             )
 
+    # Check legal terms — hard-banned unless genre is legal-adjacent
+    for word, count in _count_word_matches(_LEGAL_TERMS_RE, chapter_text).items():
+        if is_legal_adjacent:
+            if count > _SOFT_LIMIT_PER_CHAPTER:
+                warnings.append(
+                    f'OVERUSED LEGAL TERM "{word}" appears {count}x in this chapter '
+                    f'(limit: {_SOFT_LIMIT_PER_CHAPTER}) — replace most occurrences '
+                    f'with varied alternatives'
+                )
+        else:
+            if count > _HARD_BAN_THRESHOLD:
+                warnings.append(
+                    f'BANNED LEGAL TERM "{word}" appears {count}x — must be removed '
+                    f'entirely (not a legal-themed novel)'
+                )
+
     # Check soft-limited words
     for word, count in _count_word_matches(_SOFT_LIMITED_RE, chapter_text).items():
         if count > _SOFT_LIMIT_PER_CHAPTER:
diff --git a/novelforge/agents/chapter/pipeline.py b/novelforge/agents/chapter/pipeline.py
@@ -10,6 +10,7 @@
     PASS_FAILURE_KEY,
     _call_with_content_retry,
     _log_pass_failure,
+    format_vocabulary_rules,
     scan_vocabulary_overuse,
 )
 from novelforge.agents.chapter.context import ChapterContext
@@ -233,18 +234,32 @@ def _run_all_chapter_agents(
     """
     if ctx is None:
         ctx = ChapterContext()
+
+    # Build the vocabulary-constraint block once for the whole pipeline.
+    # Every prose-rewriting agent gets this injected into its system prompt
+    # so forbidden words are never introduced by any agent in the chain.
+    vocab_rules = format_vocabulary_rules(genre)
+
     def _check_deadline() -> None:
         """Raise ChapterTimeoutError if the per-chapter deadline has passed."""
         if deadline and time.monotonic() > deadline:
             raise ChapterTimeoutError(
                 f"Chapter {chapter_num} exceeded the {PER_CHAPTER_TIMEOUT // 60}-minute time limit."
             )
 
-    # Local shorthand: every agent call goes through the content-retry wrapper
+    # Local shorthand: every agent call goes through the content-retry wrapper.
+    # The wrapper also injects vocabulary constraints into the system message
+    # so that every prose-rewriting agent is told about forbidden words.
     def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, json_mode: bool = False) -> str:
-        """Call the LLM via the content-retry wrapper."""
+        """Call the LLM via the content-retry wrapper with vocabulary rules injected."""
+        def _build_with_vocab_rules(t: str) -> list[dict]:
+            messages = build_msgs(t)
+            if vocab_rules and messages and messages[0].get("role") == "system":
+                messages[0] = dict(messages[0])  # avoid mutating cached prompts
+                messages[0]["content"] += f"\n\n{vocab_rules}"
+            return messages
         return _call_with_content_retry(
-            build_msgs, txt, action=action,
+            _build_with_vocab_rules, txt, action=action,
             chapter_num=chapter_num, title=title, json_mode=json_mode,
         )
 
@@ -381,7 +396,7 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
     if step_callback:
         step_callback(f"Chapter {chapter_num}: anti-LLM pass")
     text = _safe(
-        lambda t: build_anti_llm_agent_prompt(t, chapter_num, title),
+        lambda t: build_anti_llm_agent_prompt(t, chapter_num, title, genre),
         text, action=f"Chapter {chapter_num}: anti-LLM pass",
     )
 
@@ -393,18 +408,6 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
         text, action=f"Chapter {chapter_num}: metaphor reduction",
     )
 
-    # Vocabulary diversity scan — pure Python, no LLM call
-    _check_deadline()
-    violations = scan_vocabulary_overuse(text)
-    if violations:
-        if step_callback:
-            step_callback(f"Chapter {chapter_num}: fixing {len(violations)} vocabulary issues")
-        logger.info("Chapter %d: vocabulary scan found %d violations", chapter_num, len(violations))
-        text = _safe(
-            lambda t: build_vocabulary_fix_prompt(t, chapter_num, title, violations),
-            text, action=f"Chapter {chapter_num}: vocabulary fix-up",
-        )
-
     _check_deadline()
     if step_callback:
         step_callback(f"Chapter {chapter_num}: quality control")
@@ -421,6 +424,20 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
         text, action=f"Chapter {chapter_num}: copy edit",
     )
 
+    # Vocabulary diversity scan — pure Python, no LLM call.
+    # Runs AFTER copy edit (the last prose-rewriting agent) so that no
+    # subsequent agent can reintroduce forbidden words.
+    _check_deadline()
+    violations = scan_vocabulary_overuse(text, genre=genre)
+    if violations:
+        if step_callback:
+            step_callback(f"Chapter {chapter_num}: fixing {len(violations)} vocabulary issues")
+        logger.info("Chapter %d: vocabulary scan found %d violations", chapter_num, len(violations))
+        text = _safe(
+            lambda t: build_vocabulary_fix_prompt(t, chapter_num, title, violations),
+            text, action=f"Chapter {chapter_num}: vocabulary fix-up",
+        )
+
     _check_deadline()
     if step_callback:
         step_callback(f"Chapter {chapter_num}: summarising")
diff --git a/novelforge/agents/chapter/prompts.py b/novelforge/agents/chapter/prompts.py
@@ -7,7 +7,10 @@
 from novelforge.llm.prompts import render_prompt
 from novelforge.names import format_name_pool_for_prompt
 
-from novelforge.agents.chapter._helpers import _FORBIDDEN_WORDS, _SOFT_LIMITED_WORDS
+from novelforge.agents.chapter._helpers import (
+    get_forbidden_words,
+    get_soft_limited_words,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -90,8 +93,8 @@ def build_chapter_draft_prompt(
         compression_guidance=compression_guidance or "",
         chapter_rhythm_shape=chapter_rhythm_shape or "",
         chapter_rhythm_reason=chapter_rhythm_reason or "",
-        forbidden_words=", ".join(_FORBIDDEN_WORDS),
-        soft_limited_words=", ".join(_SOFT_LIMITED_WORDS),
+        forbidden_words=", ".join(get_forbidden_words(genre)),
+        soft_limited_words=", ".join(get_soft_limited_words(genre)),
         voice_prompt=voice_prompt or "",
         perspective_prompt=perspective_prompt or "",
     )
@@ -246,12 +249,13 @@ def build_polish_agent_prompt(chapter_text: str, chapter_num: int, title: str, g
     return render_prompt("polish_agent", title=title, genre=genre, chapter_num=chapter_num, chapter_text=chapter_text)
 
 
-def build_anti_llm_agent_prompt(chapter_text: str, chapter_num: int, title: str) -> list[dict[str, str]]:
+def build_anti_llm_agent_prompt(chapter_text: str, chapter_num: int, title: str,
+                                genre: str = "") -> list[dict[str, str]]:
     """Build the anti-LLM pattern removal prompt with forbidden word lists."""
     return render_prompt(
         "anti_llm_agent", title=title, chapter_num=chapter_num,
-        chapter_text=chapter_text, forbidden_words=", ".join(_FORBIDDEN_WORDS),
-        soft_limited_words=", ".join(_SOFT_LIMITED_WORDS),
+        chapter_text=chapter_text, forbidden_words=", ".join(get_forbidden_words(genre)),
+        soft_limited_words=", ".join(get_soft_limited_words(genre)),
     )