@@ -230,8 +230,22 @@ def _draft_with_content_retry(
230230 # Bookkeeping / accounting metaphors used as emotional shorthand
231231 "ledger" , "tally" , "inventory" , "audit" , "balance sheet" ,
232232 "debit" , "dividend" ,
233+ # Institutional / formal register words LLMs overuse
234+ "mandate" , "decree" , "edict" ,
235+ "apparatus" , "machinations" ,
233236]
234237
238+ # Legal terms forbidden in all genres except crime-adjacent ones.
239+ # When a crime-adjacent genre is detected these become soft-limited instead.
240+ _LEGAL_TERMS = [
241+ "verdict" , "indictment" , "tribunal" , "acquittal" , "exonerate" ,
242+ "adjudicate" , "clemency" , "arbitrate" , "testimony" , "jurisprudence" ,
243+ "litigate" , "prosecution" , "prosecute" ,
244+ ]
245+
246+ # Genres where legal terminology is contextually appropriate (soft-limited, not banned).
247+ _LEGAL_ADJACENT_GENRES = {"Crime" , "Mystery" , "Noir" , "Thriller" }
248+
235249# Soft-limited words: OK once or twice per novel, but the LLM wildly overuses them
236250_SOFT_LIMITED_WORDS = [
237251 "brittle" , "tighten" , "tightened" , "tightening" ,
@@ -264,10 +278,40 @@ def _compile_word_pattern(words: list[str]) -> re.Pattern[str]:
264278
265279# Pre-compiled patterns for the vocabulary scanner (built once at import time)
266280_FORBIDDEN_RE = _compile_word_pattern (_FORBIDDEN_WORDS )
281+ _LEGAL_TERMS_RE = _compile_word_pattern (_LEGAL_TERMS )
267282_SOFT_LIMITED_RE = _compile_word_pattern (_SOFT_LIMITED_WORDS )
268283_OVERUSED_PATTERN_RE = _compile_word_pattern (_OVERUSED_PATTERNS )
269284
270285
286+ def get_forbidden_words (genre : str = "" ) -> list [str ]:
287+ """Return the full forbidden-word list, adding legal terms unless the genre is legal-adjacent."""
288+ if genre in _LEGAL_ADJACENT_GENRES :
289+ return list (_FORBIDDEN_WORDS )
290+ return list (_FORBIDDEN_WORDS ) + list (_LEGAL_TERMS )
291+
292+
293+ def get_soft_limited_words (genre : str = "" ) -> list [str ]:
294+ """Return soft-limited words, including legal terms for legal-adjacent genres."""
295+ if genre in _LEGAL_ADJACENT_GENRES :
296+ return list (_SOFT_LIMITED_WORDS ) + list (_LEGAL_TERMS )
297+ return list (_SOFT_LIMITED_WORDS )
298+
299+
300+ def format_vocabulary_rules (genre : str = "" ) -> str :
301+ """Return a compact vocabulary-constraint block for injection into agent system prompts."""
302+ forbidden = get_forbidden_words (genre )
303+ soft = get_soft_limited_words (genre )
304+ return (
305+ "VOCABULARY CONSTRAINTS (strict — apply to every word you write):\n "
306+ f"NEVER use these words: { ', ' .join (forbidden )} .\n "
307+ f"Limit these to at most 1 occurrence per chapter: { ', ' .join (soft )} .\n "
308+ "Avoid: accounting/legal metaphors for emotions, "
309+ '"small [mercy/victory/repair]" constructions, emotions lodged in '
310+ "ribs/sternum/throat, metallic taste as distress, "
311+ '"jaw tightened," "the economy of someone who."'
312+ )
313+
314+
271315def _format_anti_repetition_rules () -> str :
272316 """Format the soft-limited words and overused patterns for prompt injection."""
273317 lines = []
@@ -295,16 +339,20 @@ def _count_word_matches(pattern: re.Pattern[str], text: str) -> dict[str, int]:
295339 return counts
296340
297341
298- def scan_vocabulary_overuse (chapter_text : str ) -> list [str ]:
342+ def scan_vocabulary_overuse (chapter_text : str , genre : str = "" ) -> list [str ]:
299343 """
300344 Scan a chapter for overused vocabulary from the watchlists.
301345
302346 Returns a list of human-readable warnings for each violation found.
303347 Pure Python — no LLM call. Uses pre-compiled word-boundary regexes
304348 so that ``"audit"`` does **not** match inside ``"auditor"`` or
305349 ``"ledger"`` inside ``"sledgehammer"``.
350+
351+ When *genre* is a legal-adjacent genre (Crime, Mystery, Noir, Thriller),
352+ legal terms are soft-limited instead of hard-banned.
306353 """
307354 warnings : list [str ] = []
355+ is_legal_adjacent = genre in _LEGAL_ADJACENT_GENRES
308356
309357 # Check hard-banned words
310358 for word , count in _count_word_matches (_FORBIDDEN_RE , chapter_text ).items ():
@@ -313,6 +361,22 @@ def scan_vocabulary_overuse(chapter_text: str) -> list[str]:
313361 f'BANNED WORD "{ word } " appears { count } x — must be removed entirely'
314362 )
315363
364+ # Check legal terms — hard-banned unless genre is legal-adjacent
365+ for word , count in _count_word_matches (_LEGAL_TERMS_RE , chapter_text ).items ():
366+ if is_legal_adjacent :
367+ if count > _SOFT_LIMIT_PER_CHAPTER :
368+ warnings .append (
369+ f'OVERUSED LEGAL TERM "{ word } " appears { count } x in this chapter '
370+ f'(limit: { _SOFT_LIMIT_PER_CHAPTER } ) — replace most occurrences '
371+ f'with varied alternatives'
372+ )
373+ else :
374+ if count > _HARD_BAN_THRESHOLD :
375+ warnings .append (
376+ f'BANNED LEGAL TERM "{ word } " appears { count } x — must be removed '
377+ f'entirely (not a legal-themed novel)'
378+ )
379+
316380 # Check soft-limited words
317381 for word , count in _count_word_matches (_SOFT_LIMITED_RE , chapter_text ).items ():
318382 if count > _SOFT_LIMIT_PER_CHAPTER :
0 commit comments