|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Validate reference.conf comment coverage. |
| 3 | +
|
| 4 | +Rules enforced: |
| 5 | + 1. Every user-defined key line must have a comment. |
| 6 | + 2. A key is documented by either an inline comment on the same line or a |
| 7 | + comment on the immediately preceding line. Blank lines do not count. |
| 8 | + 3. Object fields repeated across array elements are checked only on their |
| 9 | + first occurrence within that array. |
| 10 | +
|
| 11 | +Design scope — basic coverage gate, not a full HOCON parser |
| 12 | +----------------------------------------------------------- |
| 13 | +This script is deliberately line-oriented. pyhocon is not used because it |
| 14 | +discards comments, and this gate only needs enough structure to track braces |
| 15 | +and arrays. |
| 16 | +
|
| 17 | +As a consequence, several HOCON constructs are handled in a simplified way. |
| 18 | +Each known limitation is listed below together with its practical risk level |
| 19 | +for reference.conf. The gate is intentionally kept simple: reference.conf |
| 20 | +uses a small, stable subset of HOCON syntax, and the constructs below are |
| 21 | +either forbidden by the project's config conventions or have never appeared |
| 22 | +in the file. |
| 23 | +
|
| 24 | +Known limitations (all rated LOW risk for reference.conf): |
| 25 | +
|
| 26 | + A. Silent miss — keys matched by none of the patterns below are neither |
| 27 | + checked nor flagged; they pass silently: |
| 28 | +
|
| 29 | + * Quoted keys: "my-key" = value |
| 30 | + KEY_LINE requires [A-Za-z_] at the start; a leading '"' never matches. |
| 31 | + reference.conf uses only plain lowerCamelCase keys — risk: none. |
| 32 | +
|
| 33 | + * Hyphenated keys: my-key = value |
| 34 | + KEY_LINE allows only [A-Za-z0-9_]; '-' is excluded. |
| 35 | + reference.conf has no hyphenated keys — risk: none. |
| 36 | +
|
| 37 | + * Append operator: foo += bar |
| 38 | + KEY_LINE ends with [:={]; '+' before '=' is not in that set. |
| 39 | + reference.conf does not use '+=' — risk: none. |
| 40 | +
|
| 41 | + * Inline-object sub-keys: outer = {inner = 1} |
| 42 | + KEY_LINE.match() anchors to the line start, so only the first key on |
| 43 | + each line ('outer') is detected; 'inner' inside the braces is missed. |
| 44 | + reference.conf expands every block across multiple lines — risk: none. |
| 45 | +
|
| 46 | + * Second key on a bare-value line: a = 1, b = 2 |
| 47 | + re.match() matches only at the start; 'b' is invisible to KEY_LINE. |
| 48 | + reference.conf never puts two assignments on one line — risk: none. |
| 49 | +
|
| 50 | + B. False positive — non-key content incorrectly flagged as a missing key: |
| 51 | +
|
| 52 | + * Triple-quoted multi-line strings (key = \"\"\" ... \"\"\") |
| 53 | + strip_quoted() is line-oriented and does not track triple-quote spans |
| 54 | + across lines. Lines inside the string body that look like 'word = ...' |
| 55 | + are matched by KEY_LINE and reported as keys lacking comments. |
| 56 | + reference.conf contains no triple-quoted strings — risk: none. |
| 57 | + If triple-quoted strings are ever introduced, add a triple-quote span |
| 58 | + tracker at the top of the collect_keys() loop (see inline comment there). |
| 59 | +
|
| 60 | + C. False pass — a key with no real comment is incorrectly classified as |
| 61 | + documented: |
| 62 | +
|
| 63 | + * Block opened on the next line: key =\n{ |
| 64 | + opening_after_key() only scans the current line for '{' or '['. |
| 65 | + If the opening brace appears on the next line, no named frame is |
| 66 | + pushed for the key, so array-element deduplication silently stops |
| 67 | + working for that block's contents. |
| 68 | + reference.conf always opens blocks on the same line as the key |
| 69 | + (e.g. "genesis.block = {") — risk: none. |
| 70 | +
|
| 71 | + * Bare URL value: key = http://example.com |
| 72 | + has_inline_comment() sees '//' in the URL and returns True, treating |
| 73 | + the URL as an inline comment. Quoting the URL ("http://...") avoids |
| 74 | + this because strip_quoted() removes the string contents before the |
| 75 | + comment scan. reference.conf contains no bare (unquoted) URLs and all |
| 76 | + such values are either quoted or absent — risk: none. |
| 77 | +""" |
| 78 | +import re |
| 79 | +import sys |
| 80 | +from pathlib import Path |
| 81 | + |
| 82 | +KEY_LINE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)*)\s*[:={]") |
| 83 | +COMMENT_LINE = re.compile(r"^\s*(#|//)") |
| 84 | + |
| 85 | + |
| 86 | +def strip_quoted(line): |
| 87 | + """Remove quoted string contents while preserving comments and delimiters.""" |
| 88 | + out = [] |
| 89 | + quote = None |
| 90 | + escaped = False |
| 91 | + i = 0 |
| 92 | + while i < len(line): |
| 93 | + ch = line[i] |
| 94 | + if quote: |
| 95 | + if escaped: |
| 96 | + escaped = False |
| 97 | + elif ch == "\\": |
| 98 | + escaped = True |
| 99 | + elif ch == quote: |
| 100 | + quote = None |
| 101 | + out.append(ch) |
| 102 | + i += 1 |
| 103 | + continue |
| 104 | + if ch in ('"', "'"): |
| 105 | + quote = ch |
| 106 | + out.append(ch) |
| 107 | + i += 1 |
| 108 | + continue |
| 109 | + out.append(ch) |
| 110 | + i += 1 |
| 111 | + return "".join(out) |
| 112 | + |
| 113 | + |
| 114 | +def strip_comments(line): |
| 115 | + """Strip # and // comments outside quotes.""" |
| 116 | + text = strip_quoted(line) |
| 117 | + i = 0 |
| 118 | + while i < len(text): |
| 119 | + ch = text[i] |
| 120 | + if ch == "#": |
| 121 | + return text[:i] |
| 122 | + if ch == "/" and i + 1 < len(text) and text[i + 1] == "/": |
| 123 | + return text[:i] |
| 124 | + i += 1 |
| 125 | + return text |
| 126 | + |
| 127 | + |
| 128 | +def has_inline_comment(line): |
| 129 | + text = strip_quoted(line) |
| 130 | + i = 0 |
| 131 | + while i < len(text): |
| 132 | + if text[i] == "#": |
| 133 | + return True |
| 134 | + if text[i] == "/" and i + 1 < len(text) and text[i + 1] == "/": |
| 135 | + return True |
| 136 | + i += 1 |
| 137 | + return False |
| 138 | + |
| 139 | + |
| 140 | +def has_prevline_comment(lines, index): |
| 141 | + if index == 0: |
| 142 | + return False |
| 143 | + prev = lines[index - 1] |
| 144 | + return bool(prev.strip()) and bool(COMMENT_LINE.match(prev)) |
| 145 | + |
| 146 | + |
| 147 | +def opening_after_key(code, match): |
| 148 | + pos = match.end() - 1 |
| 149 | + ch = code[pos] |
| 150 | + if ch in "{[": |
| 151 | + return ch, pos |
| 152 | + if ch in ":=": |
| 153 | + i = pos + 1 |
| 154 | + while i < len(code) and code[i].isspace(): |
| 155 | + i += 1 |
| 156 | + if i < len(code) and code[i] in "{[": |
| 157 | + return code[i], i |
| 158 | + return None, None |
| 159 | + |
| 160 | + |
| 161 | +def nearest_array_frame(stack): |
| 162 | + for frame in reversed(stack): |
| 163 | + if frame["type"] == "array": |
| 164 | + return frame |
| 165 | + return None |
| 166 | + |
| 167 | + |
| 168 | +def pop_frame(stack, closer): |
| 169 | + target_type = "object" if closer == "}" else "array" |
| 170 | + while stack: |
| 171 | + frame = stack.pop() |
| 172 | + if frame["type"] == target_type: |
| 173 | + return |
| 174 | + |
| 175 | + |
| 176 | +def scan_structure(code, stack, key_open_pos=None): |
| 177 | + i = 0 |
| 178 | + while i < len(code): |
| 179 | + ch = code[i] |
| 180 | + if key_open_pos is not None and i == key_open_pos: |
| 181 | + i += 1 |
| 182 | + continue |
| 183 | + if ch == "{": |
| 184 | + stack.append({"type": "object", "name": None, "seen": set()}) |
| 185 | + elif ch == "[": |
| 186 | + stack.append({"type": "array", "name": None, "seen": set()}) |
| 187 | + elif ch == "}": |
| 188 | + pop_frame(stack, "}") |
| 189 | + elif ch == "]": |
| 190 | + pop_frame(stack, "]") |
| 191 | + i += 1 |
| 192 | + |
| 193 | + |
| 194 | +def collect_keys(path, list_all=False): |
| 195 | + """Scan *path* line by line and classify every HOCON key. |
| 196 | +
|
| 197 | + Returns |
| 198 | + ------- |
| 199 | + missing : list of (line_no, key) |
| 200 | + Keys that lack a comment and are not exempt. Empty means the file |
| 201 | + passes the gate. |
| 202 | + seen_rows : list of (line_no, key, status) |
| 203 | + One entry per matched key line, in file order. Populated only when |
| 204 | + *list_all* is True (``--list`` flag); always empty otherwise. |
| 205 | + status is one of: "commented" | "dedup" | "missing". |
| 206 | + """ |
| 207 | + lines = path.read_text(encoding="utf-8").splitlines() |
| 208 | + |
| 209 | + # stack — bracket-nesting context, one frame per open { or [. |
| 210 | + # Each frame is a dict: |
| 211 | + # "type" : "object" | "array" |
| 212 | + # "name" : str | None — the key that opened this block, or None for |
| 213 | + # anonymous braces/brackets. |
| 214 | + # "seen" : set — only meaningful on array frames: the set of |
| 215 | + # key names already encountered inside this array. |
| 216 | + # Enables deduplication so that repeated keys in |
| 217 | + # homogeneous array elements (e.g. rate.limiter |
| 218 | + # entries) are only checked on their first |
| 219 | + # occurrence. |
| 220 | + stack = [] |
| 221 | + |
| 222 | + # missing — accumulates (line_no, key) for every key that is neither |
| 223 | + # exempt nor deduplicated yet has no comment. Drives the exit-1 path. |
| 224 | + missing = [] |
| 225 | + |
| 226 | + # seen_rows — full audit log for --list mode: (line_no, key, status). |
| 227 | + # Built only when list_all=True to avoid wasting memory in normal runs. |
| 228 | + seen_rows = [] |
| 229 | + |
| 230 | + for index, raw in enumerate(lines): |
| 231 | + line_no = index + 1 |
| 232 | + |
| 233 | + # code: raw line with comment text removed. Used for KEY_LINE |
| 234 | + # matching and bracket counting so that "#" / "//" inside values |
| 235 | + # do not confuse the structural parser. |
| 236 | + code = strip_comments(raw) |
| 237 | + |
| 238 | + stripped = raw.lstrip() |
| 239 | + is_comment = stripped.startswith("#") or stripped.startswith("//") |
| 240 | + |
| 241 | + # Skip pure comment lines; never treat them as key lines. |
| 242 | + match = None if is_comment else KEY_LINE.match(code) |
| 243 | + |
| 244 | + key = None |
| 245 | + status = "non-key" |
| 246 | + key_open_pos = None # position in `code` of the { or [ that this key opens |
| 247 | + if match: |
| 248 | + key = match.group(1) |
| 249 | + |
| 250 | + # opener: "{" or "[" when the key introduces a block/array on |
| 251 | + # the same line (e.g. "node {" or "active = ["). |
| 252 | + # key_open_pos: char index of that opener inside `code`, passed |
| 253 | + # to scan_structure so it is not counted a second time. |
| 254 | + opener, key_open_pos = opening_after_key(code, match) |
| 255 | + |
| 256 | + # --- Array deduplication --- |
| 257 | + # Find the innermost enclosing array frame (if any). Within an |
| 258 | + # array, all elements share the same schema, so only the first |
| 259 | + # occurrence of each key name needs a comment. |
| 260 | + deduped = False |
| 261 | + array_frame = nearest_array_frame(stack) |
| 262 | + if array_frame is not None: |
| 263 | + if key in array_frame["seen"]: |
| 264 | + # Already checked on an earlier array element — skip. |
| 265 | + deduped = True |
| 266 | + else: |
| 267 | + # First time we see this key in this array; record it and |
| 268 | + # fall through to the normal comment check below. |
| 269 | + array_frame["seen"].add(key) |
| 270 | + |
| 271 | + # --- Comment check --- |
| 272 | + # A key is considered documented if it has an inline comment on |
| 273 | + # the same line *or* a non-blank comment on the immediately |
| 274 | + # preceding line (blank lines between comment and key do NOT |
| 275 | + # count as "preceding"). |
| 276 | + commented = has_inline_comment(raw) or has_prevline_comment(lines, index) |
| 277 | + |
| 278 | + # Assign the final status in priority order. |
| 279 | + if deduped: |
| 280 | + status = "dedup" |
| 281 | + elif commented: |
| 282 | + status = "commented" |
| 283 | + else: |
| 284 | + status = "missing" |
| 285 | + missing.append((line_no, key)) |
| 286 | + |
| 287 | + # If this key opens a new block or array, push a fresh frame so |
| 288 | + # that nested keys and future deduplication operate in the correct |
| 289 | + # scope. We push *after* classifying the key itself so that the |
| 290 | + # key is judged in its *parent* scope, not inside itself. |
| 291 | + if opener: |
| 292 | + stack.append({ |
| 293 | + "type": "object" if opener == "{" else "array", |
| 294 | + "name": key, |
| 295 | + "seen": set(), |
| 296 | + }) |
| 297 | + |
| 298 | + # Walk any remaining { } [ ] characters in `code` that were NOT the |
| 299 | + # opener just pushed above. This keeps the stack in sync for lines |
| 300 | + # that contain multiple brackets (e.g. closing braces after a value). |
| 301 | + scan_structure(code, stack, key_open_pos) |
| 302 | + |
| 303 | + if list_all and match: |
| 304 | + seen_rows.append((line_no, key, status)) |
| 305 | + |
| 306 | + return missing, seen_rows |
| 307 | + |
| 308 | + |
| 309 | +def main(argv): |
| 310 | + list_all = False |
| 311 | + args = list(argv[1:]) |
| 312 | + if "--list" in args: |
| 313 | + list_all = True |
| 314 | + args.remove("--list") |
| 315 | + if len(args) != 1: |
| 316 | + print(f"usage: {argv[0]} [--list] <path/to/reference.conf>", file=sys.stderr) |
| 317 | + return 2 |
| 318 | + |
| 319 | + path = Path(args[0]) |
| 320 | + if not path.is_file(): |
| 321 | + print(f"error: file not found: {path}", file=sys.stderr) |
| 322 | + return 2 |
| 323 | + |
| 324 | + missing, seen_rows = collect_keys(path, list_all) |
| 325 | + |
| 326 | + if list_all: |
| 327 | + for line_no, key, status in seen_rows: |
| 328 | + print(f"{line_no}: {key} [{status}]") |
| 329 | + print() |
| 330 | + |
| 331 | + if missing: |
| 332 | + lines_out = [ |
| 333 | + f"Comment coverage violations ({len(missing)}) — each key " |
| 334 | + "needs an inline or immediately preceding comment:" |
| 335 | + ] |
| 336 | + for line_no, key in missing: |
| 337 | + lines_out.append(f" comment: line {line_no}: {key}") |
| 338 | + print("\n".join(lines_out)) |
| 339 | + print() |
| 340 | + |
| 341 | + entries = [f"line {line_no}: {key}" for line_no, key in missing] |
| 342 | + body = ( |
| 343 | + f"reference.conf has {len(missing)} comment coverage violation(s):%0A" |
| 344 | + + "%0A".join(entries) |
| 345 | + ) |
| 346 | + print(f"::error file={path},title=reference.conf::{body}") |
| 347 | + print( |
| 348 | + f"FAIL: {len(missing)} comment coverage violation(s) in {path}", |
| 349 | + file=sys.stderr, |
| 350 | + ) |
| 351 | + return 1 |
| 352 | + |
| 353 | + print(f"OK: {path} — all keys have comments") |
| 354 | + return 0 |
| 355 | + |
| 356 | + |
| 357 | +if __name__ == "__main__": |
| 358 | + sys.exit(main(sys.argv)) |
0 commit comments