|
| 1 | +"""Static analysis for Fortran/Fypp source code. |
| 2 | +
|
| 3 | +Checks for patterns that indicate copy-paste bugs, non-standard constructs, |
| 4 | +and hardcoded assumptions that break under different build configurations. |
| 5 | +""" |
| 6 | + |
| 7 | +import re |
| 8 | +import sys |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | +# Source directory to scan (relative to repo root) |
| 12 | +SRC_DIR = "src" |
| 13 | + |
| 14 | +# Minimum stripped line length to consider for duplicate detection. |
| 15 | +# Lines shorter than this (e.g. "end if", "end do") are ignored. |
| 16 | +MIN_DUP_LINE_LEN = 40 |
| 17 | + |
| 18 | + |
| 19 | +def _is_comment_or_blank(stripped: str) -> bool: |
| 20 | + """True if stripped line is blank, a Fortran comment, or a Fypp directive.""" |
| 21 | + return not stripped or stripped.startswith("!") or stripped.startswith("#:") |
| 22 | + |
| 23 | + |
| 24 | +def _fortran_fpp_files(src_dir: Path): |
| 25 | + """Yield all .f90 and .fpp files under src/.""" |
| 26 | + yield from sorted(src_dir.rglob("*.f90")) |
| 27 | + yield from sorted(src_dir.rglob("*.fpp")) |
| 28 | + |
| 29 | + |
| 30 | +def check_fypp_list_duplicates(repo_root: Path) -> list[str]: |
| 31 | + """Check for duplicate entries in Fypp ``#:for VAR in [...]`` lists. |
| 32 | +
|
| 33 | + Copy-paste errors in broadcast lists or loop variable lists can silently |
| 34 | + skip a variable while broadcasting another one twice. |
| 35 | + """ |
| 36 | + errors: list[str] = [] |
| 37 | + src_dir = repo_root / SRC_DIR |
| 38 | + |
| 39 | + for fpp in sorted(src_dir.rglob("*.fpp")): |
| 40 | + lines = fpp.read_text(encoding="utf-8").splitlines() |
| 41 | + rel = fpp.relative_to(repo_root) |
| 42 | + |
| 43 | + i = 0 |
| 44 | + while i < len(lines): |
| 45 | + line = lines[i].strip() |
| 46 | + if line.startswith("#:for") and " in " in line and "[" in line: |
| 47 | + start_line = i + 1 # 1-indexed for display |
| 48 | + |
| 49 | + # Accumulate across Fortran-style '&' continuation lines |
| 50 | + full = line |
| 51 | + while full.rstrip().endswith("&") and i + 1 < len(lines): |
| 52 | + i += 1 |
| 53 | + full += " " + lines[i].strip() |
| 54 | + |
| 55 | + bracket_start = full.find("[") |
| 56 | + bracket_end = full.rfind("]") |
| 57 | + if bracket_start >= 0 and bracket_end > bracket_start: |
| 58 | + list_content = full[bracket_start + 1:bracket_end] |
| 59 | + list_content = list_content.replace("&", "") |
| 60 | + |
| 61 | + # Extract single- or double-quoted entries |
| 62 | + entries = re.findall(r"['\"]([^'\"]*)['\"]", list_content) |
| 63 | + |
| 64 | + seen: dict[str, int] = {} |
| 65 | + for pos, entry in enumerate(entries, 1): |
| 66 | + if entry in seen: |
| 67 | + errors.append( |
| 68 | + f" {rel}:{start_line} Fypp list has duplicate" |
| 69 | + f" entry '{entry}' (positions {seen[entry]}" |
| 70 | + f" and {pos})." |
| 71 | + " Fix: one copy is likely a typo for a" |
| 72 | + " different variable" |
| 73 | + ) |
| 74 | + else: |
| 75 | + seen[entry] = pos |
| 76 | + i += 1 |
| 77 | + |
| 78 | + return errors |
| 79 | + |
| 80 | + |
| 81 | +def check_duplicate_lines(repo_root: Path) -> list[str]: |
| 82 | + """Flag identical adjacent non-trivial source lines. |
| 83 | +
|
| 84 | + Exact duplicate consecutive lines are almost always copy-paste errors: |
| 85 | + a duplicated accumulation, a repeated subroutine argument, etc. |
| 86 | + """ |
| 87 | + errors: list[str] = [] |
| 88 | + src_dir = repo_root / SRC_DIR |
| 89 | + |
| 90 | + for src in _fortran_fpp_files(src_dir): |
| 91 | + lines = src.read_text(encoding="utf-8").splitlines() |
| 92 | + rel = src.relative_to(repo_root) |
| 93 | + |
| 94 | + prev_stripped = "" |
| 95 | + for i, line in enumerate(lines): |
| 96 | + stripped = line.strip() |
| 97 | + if ( |
| 98 | + stripped == prev_stripped |
| 99 | + and len(stripped) >= MIN_DUP_LINE_LEN |
| 100 | + and not _is_comment_or_blank(stripped) |
| 101 | + ): |
| 102 | + display = stripped[:72] |
| 103 | + if len(stripped) > 72: |
| 104 | + display += "..." |
| 105 | + errors.append( |
| 106 | + f" {rel}:{i + 1} identical to previous line:" |
| 107 | + f" '{display}'." |
| 108 | + " Fix: check for accidental copy-paste" |
| 109 | + ) |
| 110 | + prev_stripped = stripped |
| 111 | + |
| 112 | + return errors |
| 113 | + |
| 114 | + |
| 115 | +def check_hardcoded_byte_size(repo_root: Path) -> list[str]: |
| 116 | + """Flag ``int(8._wp, ...)`` patterns that assume 8-byte reals. |
| 117 | +
|
| 118 | + When MFC is built in single precision (``wp = real32``), reals are |
| 119 | + 4 bytes. Hard-coding 8 makes MPI I/O read/write the wrong amount. |
| 120 | + Use ``storage_size(0._wp)/8`` instead. |
| 121 | + """ |
| 122 | + errors: list[str] = [] |
| 123 | + src_dir = repo_root / SRC_DIR |
| 124 | + byte_re = re.compile(r"\bint\s*\(\s*8\._wp\b", re.IGNORECASE) |
| 125 | + |
| 126 | + for src in _fortran_fpp_files(src_dir): |
| 127 | + lines = src.read_text(encoding="utf-8").splitlines() |
| 128 | + rel = src.relative_to(repo_root) |
| 129 | + |
| 130 | + for i, line in enumerate(lines): |
| 131 | + stripped = line.strip() |
| 132 | + if _is_comment_or_blank(stripped): |
| 133 | + continue |
| 134 | + if byte_re.search(stripped): |
| 135 | + errors.append( |
| 136 | + f" {rel}:{i + 1} hard-codes 8-byte real size." |
| 137 | + " Fix: use 'storage_size(0._wp)/8' instead of" |
| 138 | + " '8._wp'" |
| 139 | + ) |
| 140 | + |
| 141 | + return errors |
| 142 | + |
| 143 | + |
| 144 | +def main(): |
| 145 | + repo_root = Path(__file__).resolve().parents[2] |
| 146 | + |
| 147 | + all_errors: list[str] = [] |
| 148 | + all_errors.extend(check_fypp_list_duplicates(repo_root)) |
| 149 | + all_errors.extend(check_duplicate_lines(repo_root)) |
| 150 | + all_errors.extend(check_hardcoded_byte_size(repo_root)) |
| 151 | + |
| 152 | + if all_errors: |
| 153 | + print("Fortran/Fypp source analysis failed:") |
| 154 | + for e in all_errors: |
| 155 | + print(e) |
| 156 | + sys.exit(1) |
| 157 | + |
| 158 | + |
| 159 | +if __name__ == "__main__": |
| 160 | + main() |
0 commit comments