Skip to content

Commit 55dcb47

Browse files
sbryngelsonclaude
andcommitted
Fix Fortran % accessor and AST artifacts in physics docs formatting
Doxygen consumes % in %<word> even inside backtick code spans, causing tokens like bc_y%beg to render as bc_ybeg. Escape with \% to preserve the Fortran struct accessor in rendered output. Also clean up AST extraction artifacts: istr/jstr → i/j, remove (got varname) noise, and rewrite f-string value placeholders into readable prose. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 978f922 commit 55dcb47

1 file changed

Lines changed: 37 additions & 3 deletions

File tree

toolchain/mfc/gen_physics_docs.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,50 @@
5757
_CODE_RE = re.compile(
5858
r"(?<!`)\b("
5959
r"\w+(?:\([^)]*\))?%\w+(?:\([^)]*\))?"
60-
r"|(?:alpha|vel|Re|dt|nb|sigma|mhd|igr|Bx0|viscous|thermal|polytropic|relativity|rhoref|pref|var)(?:\([^)]*\))?"
60+
r"|(?:alpha|vel|Re|dt|nb|sigma|mhd|igr|Bx0|viscous|thermal|polytropic|relativity"
61+
r"|rhoref|pref|var|wavelength|npulse|hypoelasticity)(?:\([^)]*\))?"
6162
r"|[a-z]\w*_\w+(?:\([^)]*\))?"
6263
r"|[mnp]"
6364
r")(?!\w)"
6465
)
6566

6667

6768
def _format_message(msg: str) -> str:
68-
"""Wrap code-like parameter references in backticks."""
69-
return _CODE_RE.sub(r"`\1`", msg)
69+
"""Format a validation message for Doxygen-compatible markdown.
70+
71+
Handles MFC/Fortran naming conventions:
72+
- Cleans AST artifacts: istr→i, jstr→j (f-string index variables)
73+
- Strips runtime value placeholders: (got varname)
74+
- Wraps code-like parameter references in backticks
75+
- Escapes Fortran ``%`` accessor for Doxygen (``\\%`` → literal %)
76+
"""
77+
# Clean AST extraction artifacts: istr/jstr are f-string index vars
78+
msg = re.sub(r"\bistr\b", "i", msg)
79+
msg = re.sub(r"\bjstr\b", "j", msg)
80+
81+
# Remove "(got <varname>)" — Python variable names, not useful in docs
82+
msg = re.sub(r"\s*\(got \w+\)", "", msg)
83+
84+
# Clean f-string value placeholders that appear as bare variable names.
85+
# e.g. "= gamma implies" → "implies", "for pulse = pulse" → "for the given pulse"
86+
msg = re.sub(r"= (\w+) implies physical (\w+) = (\w+)", r"implies physical \2", msg)
87+
msg = re.sub(r"= (vel2|vel3) but", "is nonzero but", msg)
88+
msg = re.sub(r"for (support|pulse) = \1\b", r"for the given \1", msg)
89+
msg = re.sub(r"for (support|pulse) = (\d+)", r"for \1 = \2", msg)
90+
91+
# Wrap code-like tokens in backticks
92+
msg = _CODE_RE.sub(r"`\1`", msg)
93+
94+
# Escape % inside backtick code spans for Doxygen.
95+
# Doxygen treats %<word> as "suppress auto-link" and consumes the %.
96+
# \% produces a literal % in Doxygen output.
97+
msg = re.sub(
98+
r"`([^`]*%[^`]*)`",
99+
lambda m: "`" + m.group(1).replace("%", "\\%") + "`",
100+
msg,
101+
)
102+
103+
return msg
70104

71105

72106
def _stages_str(stages: Set[str]) -> str:

0 commit comments

Comments
 (0)