Skip to content

Commit 542cc19

Browse files
neoneyeclaude
andcommitted
napkin-math(compress): preserve acronym casing in the deterministic if/then rewrite
Review feedback on PR #750 (third round): the rewrite naively lowercased the first character of the subject and consequence, which damages acronyms — 'API job queue latency exceeds 100ms, ...' was being rewritten to 'If aPI job queue latency...' which is visibly broken. Fix: only lowercase the first character when it is followed by a lowercase letter (a regular capitalised word like 'Middleware'). Acronyms like 'API' / 'OPC UA' / digit prefixes like '5G' all have a non-lowercase second character, so they are left unchanged. Two new regression tests: (1) 'API job queue latency exceeds 100ms, ...' rewrites to 'If API job queue latency exceeds 100ms, then ...' with the acronym intact, (2) 'Middleware development bid exceeds $75,000, ...' still rewrites to 'If middleware development bid exceeds $75,000, then ...' (regular capitalisation still adjusted). 44 unit tests pass (42 prior + 2 new). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b495818 commit 542cc19

2 files changed

Lines changed: 57 additions & 6 deletions

File tree

worker_plan/worker_plan_internal/parameter_extraction/compress_report_section.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,21 @@ def format_scored_item_line(item: PublicScoredItem) -> str:
11051105
)
11061106

11071107

1108+
def _lowercase_first_preserving_acronyms(text: str) -> str:
1109+
"""Lowercase the first character of ``text`` only when doing so does
1110+
not damage an acronym. Heuristic: lowercase only when the first
1111+
character is an uppercase letter AND the second character is a
1112+
lowercase letter (a regular capitalized word like ``Middleware``).
1113+
For tokens like ``API`` / ``OPC UA`` / ``5G`` the next character is
1114+
not a lowercase letter, so the casing is preserved.
1115+
"""
1116+
if len(text) < 2:
1117+
return text
1118+
if text[0].isupper() and text[1].islower():
1119+
return text[0].lower() + text[1:]
1120+
return text
1121+
1122+
11081123
def gate_shape_promotion(line: str) -> Optional[str]:
11091124
"""Return the if/then form of ``line`` if it has any recognised gate
11101125
structural shape, else ``None``.
@@ -1126,6 +1141,12 @@ def gate_shape_promotion(line: str) -> Optional[str]:
11261141
``If middleware development bid exceeds $75,000, then consuming
11271142
budget...`` so the downstream consumer reads a uniformly-shaped gate.
11281143
1144+
Casing in the rewrite is preserved for acronyms and proper nouns —
1145+
``API job queue latency...`` becomes ``If API job queue latency...``,
1146+
not ``If aPI...``. The case adjustment only fires on regular
1147+
capitalised words where the first letter being lowercased reads
1148+
naturally mid-sentence.
1149+
11291150
Returns ``None`` when the line matches neither shape — the promoter
11301151
must not steal qualitative risks (triggers with a non-numeric source
11311152
side, or risks framed without a comparison verb) into the gates pool.
@@ -1140,14 +1161,10 @@ def gate_shape_promotion(line: str) -> Optional[str]:
11401161
m = DECLARATIVE_GATE_SHAPE_PATTERN.match(s)
11411162
if m is None:
11421163
return None
1143-
subject = m.group("subject").strip()
1164+
subject = _lowercase_first_preserving_acronyms(m.group("subject").strip())
11441165
verb = " ".join(m.group("verb").split())
11451166
threshold = m.group("threshold").strip()
1146-
consequence = m.group("consequence").strip()
1147-
if subject and subject[0].isupper():
1148-
subject = subject[0].lower() + subject[1:]
1149-
if consequence and consequence[0].isupper():
1150-
consequence = consequence[0].lower() + consequence[1:]
1167+
consequence = _lowercase_first_preserving_acronyms(m.group("consequence").strip())
11511168
return f"If {subject} {verb} {threshold}, then {consequence}"
11521169

11531170

worker_plan/worker_plan_internal/parameter_extraction/tests/test_compress_report_section.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,40 @@ def test_gate_shape_promotion_passes_through_canonical_if_then() -> None:
662662
assert gate_shape_promotion(line) == line
663663

664664

665+
def test_gate_shape_promotion_preserves_acronyms_in_subject() -> None:
666+
"""Subject acronyms like ``API`` / ``OPC UA`` must NOT be lowercased
667+
by the rewrite — that would damage the readable form. The case
668+
adjustment only fires on regular capitalised words (uppercase
669+
followed by lowercase), not on acronyms (uppercase followed by
670+
uppercase)."""
671+
from worker_plan_internal.parameter_extraction.compress_report_section import (
672+
gate_shape_promotion,
673+
)
674+
675+
line = (
676+
"API job queue latency exceeds 100ms, "
677+
"requiring control board upgrades to meet the responsiveness target."
678+
)
679+
rewritten = gate_shape_promotion(line)
680+
assert rewritten is not None
681+
assert rewritten.startswith("If API "), rewritten
682+
assert "aPI" not in rewritten
683+
684+
685+
def test_gate_shape_promotion_lowercases_regular_capitalised_subject() -> None:
686+
"""Counterpart to the acronym test: a regular capitalised subject
687+
SHOULD be lowercased mid-sentence so the rewritten if/then reads
688+
naturally. ``Middleware`` → ``middleware`` is the canonical case."""
689+
from worker_plan_internal.parameter_extraction.compress_report_section import (
690+
gate_shape_promotion,
691+
)
692+
693+
line = "Middleware development bid exceeds $75,000, scope is cut."
694+
rewritten = gate_shape_promotion(line)
695+
assert rewritten is not None
696+
assert rewritten.startswith("If middleware development bid "), rewritten
697+
698+
665699
def test_promote_gate_shaped_risks_moves_misfiled_gate() -> None:
666700
"""Focal v53c scenario: gates emitted some items but missed a tripwire;
667701
risks emitted the tripwire with if/then numeric shape. Promotion

0 commit comments

Comments
 (0)