Skip to content

Commit e611923

Browse files
Robert WeberRobert Weber
authored andcommitted
Prompt engineering
1 parent f0c35dc commit e611923

4 files changed

Lines changed: 113 additions & 23 deletions

File tree

novelforge/agents/chapter/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
_PATTERN_THRESHOLD,
2424
_SOFT_LIMIT_PER_CHAPTER,
2525
_SOFT_LIMITED_WORDS,
26+
_LEGAL_ADJACENT_GENRES,
27+
_LEGAL_TERMS,
2628
_call_with_content_retry,
2729
_draft_with_content_retry,
2830
_format_anti_repetition_rules,
2931
_log_pass_failure,
3032
_sanitize_for_content_policy,
33+
format_vocabulary_rules,
34+
get_forbidden_words,
35+
get_soft_limited_words,
3136
scan_vocabulary_overuse,
3237
)
3338

novelforge/agents/chapter/_helpers.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,22 @@ def _draft_with_content_retry(
230230
# Bookkeeping / accounting metaphors used as emotional shorthand
231231
"ledger", "tally", "inventory", "audit", "balance sheet",
232232
"debit", "dividend",
233+
# Institutional / formal register words LLMs overuse
234+
"mandate", "decree", "edict",
235+
"apparatus", "machinations",
233236
]
234237

238+
# Legal terms forbidden in all genres except crime-adjacent ones.
239+
# When a crime-adjacent genre is detected these become soft-limited instead.
240+
_LEGAL_TERMS = [
241+
"verdict", "indictment", "tribunal", "acquittal", "exonerate",
242+
"adjudicate", "clemency", "arbitrate", "testimony", "jurisprudence",
243+
"litigate", "prosecution", "prosecute",
244+
]
245+
246+
# Genres where legal terminology is contextually appropriate (soft-limited, not banned).
247+
_LEGAL_ADJACENT_GENRES = {"Crime", "Mystery", "Noir", "Thriller"}
248+
235249
# Soft-limited words: OK once or twice per novel, but the LLM wildly overuses them
236250
_SOFT_LIMITED_WORDS = [
237251
"brittle", "tighten", "tightened", "tightening",
@@ -264,10 +278,40 @@ def _compile_word_pattern(words: list[str]) -> re.Pattern[str]:
264278

265279
# Pre-compiled patterns for the vocabulary scanner (built once at import time)
266280
_FORBIDDEN_RE = _compile_word_pattern(_FORBIDDEN_WORDS)
281+
_LEGAL_TERMS_RE = _compile_word_pattern(_LEGAL_TERMS)
267282
_SOFT_LIMITED_RE = _compile_word_pattern(_SOFT_LIMITED_WORDS)
268283
_OVERUSED_PATTERN_RE = _compile_word_pattern(_OVERUSED_PATTERNS)
269284

270285

286+
def get_forbidden_words(genre: str = "") -> list[str]:
287+
"""Return the full forbidden-word list, adding legal terms unless the genre is legal-adjacent."""
288+
if genre in _LEGAL_ADJACENT_GENRES:
289+
return list(_FORBIDDEN_WORDS)
290+
return list(_FORBIDDEN_WORDS) + list(_LEGAL_TERMS)
291+
292+
293+
def get_soft_limited_words(genre: str = "") -> list[str]:
294+
"""Return soft-limited words, including legal terms for legal-adjacent genres."""
295+
if genre in _LEGAL_ADJACENT_GENRES:
296+
return list(_SOFT_LIMITED_WORDS) + list(_LEGAL_TERMS)
297+
return list(_SOFT_LIMITED_WORDS)
298+
299+
300+
def format_vocabulary_rules(genre: str = "") -> str:
301+
"""Return a compact vocabulary-constraint block for injection into agent system prompts."""
302+
forbidden = get_forbidden_words(genre)
303+
soft = get_soft_limited_words(genre)
304+
return (
305+
"VOCABULARY CONSTRAINTS (strict — apply to every word you write):\n"
306+
f"NEVER use these words: {', '.join(forbidden)}.\n"
307+
f"Limit these to at most 1 occurrence per chapter: {', '.join(soft)}.\n"
308+
"Avoid: accounting/legal metaphors for emotions, "
309+
'"small [mercy/victory/repair]" constructions, emotions lodged in '
310+
"ribs/sternum/throat, metallic taste as distress, "
311+
'"jaw tightened," "the economy of someone who."'
312+
)
313+
314+
271315
def _format_anti_repetition_rules() -> str:
272316
"""Format the soft-limited words and overused patterns for prompt injection."""
273317
lines = []
@@ -295,16 +339,20 @@ def _count_word_matches(pattern: re.Pattern[str], text: str) -> dict[str, int]:
295339
return counts
296340

297341

298-
def scan_vocabulary_overuse(chapter_text: str) -> list[str]:
342+
def scan_vocabulary_overuse(chapter_text: str, genre: str = "") -> list[str]:
299343
"""
300344
Scan a chapter for overused vocabulary from the watchlists.
301345
302346
Returns a list of human-readable warnings for each violation found.
303347
Pure Python — no LLM call. Uses pre-compiled word-boundary regexes
304348
so that ``"audit"`` does **not** match inside ``"auditor"`` or
305349
``"ledger"`` inside ``"sledgehammer"``.
350+
351+
When *genre* is a legal-adjacent genre (Crime, Mystery, Noir, Thriller),
352+
legal terms are soft-limited instead of hard-banned.
306353
"""
307354
warnings: list[str] = []
355+
is_legal_adjacent = genre in _LEGAL_ADJACENT_GENRES
308356

309357
# Check hard-banned words
310358
for word, count in _count_word_matches(_FORBIDDEN_RE, chapter_text).items():
@@ -313,6 +361,22 @@ def scan_vocabulary_overuse(chapter_text: str) -> list[str]:
313361
f'BANNED WORD "{word}" appears {count}x — must be removed entirely'
314362
)
315363

364+
# Check legal terms — hard-banned unless genre is legal-adjacent
365+
for word, count in _count_word_matches(_LEGAL_TERMS_RE, chapter_text).items():
366+
if is_legal_adjacent:
367+
if count > _SOFT_LIMIT_PER_CHAPTER:
368+
warnings.append(
369+
f'OVERUSED LEGAL TERM "{word}" appears {count}x in this chapter '
370+
f'(limit: {_SOFT_LIMIT_PER_CHAPTER}) — replace most occurrences '
371+
f'with varied alternatives'
372+
)
373+
else:
374+
if count > _HARD_BAN_THRESHOLD:
375+
warnings.append(
376+
f'BANNED LEGAL TERM "{word}" appears {count}x — must be removed '
377+
f'entirely (not a legal-themed novel)'
378+
)
379+
316380
# Check soft-limited words
317381
for word, count in _count_word_matches(_SOFT_LIMITED_RE, chapter_text).items():
318382
if count > _SOFT_LIMIT_PER_CHAPTER:

novelforge/agents/chapter/pipeline.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
PASS_FAILURE_KEY,
1111
_call_with_content_retry,
1212
_log_pass_failure,
13+
format_vocabulary_rules,
1314
scan_vocabulary_overuse,
1415
)
1516
from novelforge.agents.chapter.context import ChapterContext
@@ -233,18 +234,32 @@ def _run_all_chapter_agents(
233234
"""
234235
if ctx is None:
235236
ctx = ChapterContext()
237+
238+
# Build the vocabulary-constraint block once for the whole pipeline.
239+
# Every prose-rewriting agent gets this injected into its system prompt
240+
# so forbidden words are never introduced by any agent in the chain.
241+
vocab_rules = format_vocabulary_rules(genre)
242+
236243
def _check_deadline() -> None:
237244
"""Raise ChapterTimeoutError if the per-chapter deadline has passed."""
238245
if deadline and time.monotonic() > deadline:
239246
raise ChapterTimeoutError(
240247
f"Chapter {chapter_num} exceeded the {PER_CHAPTER_TIMEOUT // 60}-minute time limit."
241248
)
242249

243-
# Local shorthand: every agent call goes through the content-retry wrapper
250+
# Local shorthand: every agent call goes through the content-retry wrapper.
251+
# The wrapper also injects vocabulary constraints into the system message
252+
# so that every prose-rewriting agent is told about forbidden words.
244253
def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, json_mode: bool = False) -> str:
245-
"""Call the LLM via the content-retry wrapper."""
254+
"""Call the LLM via the content-retry wrapper with vocabulary rules injected."""
255+
def _build_with_vocab_rules(t: str) -> list[dict]:
256+
messages = build_msgs(t)
257+
if vocab_rules and messages and messages[0].get("role") == "system":
258+
messages[0] = dict(messages[0]) # avoid mutating cached prompts
259+
messages[0]["content"] += f"\n\n{vocab_rules}"
260+
return messages
246261
return _call_with_content_retry(
247-
build_msgs, txt, action=action,
262+
_build_with_vocab_rules, txt, action=action,
248263
chapter_num=chapter_num, title=title, json_mode=json_mode,
249264
)
250265

@@ -381,7 +396,7 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
381396
if step_callback:
382397
step_callback(f"Chapter {chapter_num}: anti-LLM pass")
383398
text = _safe(
384-
lambda t: build_anti_llm_agent_prompt(t, chapter_num, title),
399+
lambda t: build_anti_llm_agent_prompt(t, chapter_num, title, genre),
385400
text, action=f"Chapter {chapter_num}: anti-LLM pass",
386401
)
387402

@@ -393,18 +408,6 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
393408
text, action=f"Chapter {chapter_num}: metaphor reduction",
394409
)
395410

396-
# Vocabulary diversity scan — pure Python, no LLM call
397-
_check_deadline()
398-
violations = scan_vocabulary_overuse(text)
399-
if violations:
400-
if step_callback:
401-
step_callback(f"Chapter {chapter_num}: fixing {len(violations)} vocabulary issues")
402-
logger.info("Chapter %d: vocabulary scan found %d violations", chapter_num, len(violations))
403-
text = _safe(
404-
lambda t: build_vocabulary_fix_prompt(t, chapter_num, title, violations),
405-
text, action=f"Chapter {chapter_num}: vocabulary fix-up",
406-
)
407-
408411
_check_deadline()
409412
if step_callback:
410413
step_callback(f"Chapter {chapter_num}: quality control")
@@ -421,6 +424,20 @@ def _safe(build_msgs: Callable[[str], list[dict]], txt: str, *, action: str, jso
421424
text, action=f"Chapter {chapter_num}: copy edit",
422425
)
423426

427+
# Vocabulary diversity scan — pure Python, no LLM call.
428+
# Runs AFTER copy edit (the last prose-rewriting agent) so that no
429+
# subsequent agent can reintroduce forbidden words.
430+
_check_deadline()
431+
violations = scan_vocabulary_overuse(text, genre=genre)
432+
if violations:
433+
if step_callback:
434+
step_callback(f"Chapter {chapter_num}: fixing {len(violations)} vocabulary issues")
435+
logger.info("Chapter %d: vocabulary scan found %d violations", chapter_num, len(violations))
436+
text = _safe(
437+
lambda t: build_vocabulary_fix_prompt(t, chapter_num, title, violations),
438+
text, action=f"Chapter {chapter_num}: vocabulary fix-up",
439+
)
440+
424441
_check_deadline()
425442
if step_callback:
426443
step_callback(f"Chapter {chapter_num}: summarising")

novelforge/agents/chapter/prompts.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
from novelforge.llm.prompts import render_prompt
88
from novelforge.names import format_name_pool_for_prompt
99

10-
from novelforge.agents.chapter._helpers import _FORBIDDEN_WORDS, _SOFT_LIMITED_WORDS
10+
from novelforge.agents.chapter._helpers import (
11+
get_forbidden_words,
12+
get_soft_limited_words,
13+
)
1114

1215

1316
# ---------------------------------------------------------------------------
@@ -90,8 +93,8 @@ def build_chapter_draft_prompt(
9093
compression_guidance=compression_guidance or "",
9194
chapter_rhythm_shape=chapter_rhythm_shape or "",
9295
chapter_rhythm_reason=chapter_rhythm_reason or "",
93-
forbidden_words=", ".join(_FORBIDDEN_WORDS),
94-
soft_limited_words=", ".join(_SOFT_LIMITED_WORDS),
96+
forbidden_words=", ".join(get_forbidden_words(genre)),
97+
soft_limited_words=", ".join(get_soft_limited_words(genre)),
9598
voice_prompt=voice_prompt or "",
9699
perspective_prompt=perspective_prompt or "",
97100
)
@@ -246,12 +249,13 @@ def build_polish_agent_prompt(chapter_text: str, chapter_num: int, title: str, g
246249
return render_prompt("polish_agent", title=title, genre=genre, chapter_num=chapter_num, chapter_text=chapter_text)
247250

248251

249-
def build_anti_llm_agent_prompt(chapter_text: str, chapter_num: int, title: str) -> list[dict[str, str]]:
252+
def build_anti_llm_agent_prompt(chapter_text: str, chapter_num: int, title: str,
253+
genre: str = "") -> list[dict[str, str]]:
250254
"""Build the anti-LLM pattern removal prompt with forbidden word lists."""
251255
return render_prompt(
252256
"anti_llm_agent", title=title, chapter_num=chapter_num,
253-
chapter_text=chapter_text, forbidden_words=", ".join(_FORBIDDEN_WORDS),
254-
soft_limited_words=", ".join(_SOFT_LIMITED_WORDS),
257+
chapter_text=chapter_text, forbidden_words=", ".join(get_forbidden_words(genre)),
258+
soft_limited_words=", ".join(get_soft_limited_words(genre)),
255259
)
256260

257261

0 commit comments

Comments
 (0)