|
35 | 35 | # Issue body parser |
36 | 36 | # --------------------------------------------------------------------------- |
37 | 37 |
|
38 | | -def parse_issue_body(body: str) -> dict[str, str]: |
| 38 | +def parse_issue_body(body: str, known_labels: set[str] | None = None) -> dict[str, str]: |
39 | 39 | """Parse a GitHub issue form body into {label: value} pairs. |
40 | 40 |
|
41 | 41 | GitHub issue forms render as markdown with ``### Label`` headers |
42 | 42 | followed by the user's input. Checkbox groups render as lists of |
43 | 43 | ``- [X]`` / ``- [ ]`` items. |
| 44 | +
|
| 45 | + When *known_labels* is provided, only ``### Label`` lines whose text |
| 46 | + matches a known label start a new field. Other ``###`` headings |
| 47 | + inside textarea content are preserved as-is. |
44 | 48 | """ |
45 | 49 | fields: dict[str, str] = {} |
46 | 50 | current_label: str | None = None |
47 | 51 | current_lines: list[str] = [] |
48 | 52 |
|
49 | 53 | for line in body.splitlines(): |
50 | 54 | if line.startswith("### "): |
51 | | - # Store previous field |
52 | | - if current_label is not None: |
53 | | - fields[current_label] = "\n".join(current_lines).strip() |
54 | | - current_label = line[4:].strip() |
55 | | - current_lines = [] |
56 | | - else: |
57 | | - current_lines.append(line) |
| 55 | + heading = line[4:].strip() |
| 56 | + # Only split on known form labels (if provided) |
| 57 | + if known_labels is None or heading in known_labels: |
| 58 | + # Store previous field |
| 59 | + if current_label is not None: |
| 60 | + fields[current_label] = "\n".join(current_lines).strip() |
| 61 | + current_label = heading |
| 62 | + current_lines = [] |
| 63 | + continue |
| 64 | + current_lines.append(line) |
58 | 65 |
|
59 | 66 | # Don't forget the last field |
60 | 67 | if current_label is not None: |
@@ -295,6 +302,17 @@ def check_url_reachable( |
295 | 302 | hostname = parsed.hostname |
296 | 303 | if not hostname: |
297 | 304 | return False, f"{field_name} URL has no hostname." |
| 305 | + |
| 306 | + # Restrict to known hosts to mitigate DNS-rebinding TOCTOU risks |
| 307 | + _allowed_hosts = { |
| 308 | + "github.com", "www.github.com", "codeload.github.com", |
| 309 | + "raw.githubusercontent.com", "objects.githubusercontent.com", |
| 310 | + } |
| 311 | + if hostname not in _allowed_hosts: |
| 312 | + return False, ( |
| 313 | + f"{field_name} URL must be on a GitHub domain " |
| 314 | + f"(got `{hostname}`)." |
| 315 | + ) |
298 | 316 | try: |
299 | 317 | addr_info = socket.getaddrinfo(hostname, None) |
300 | 318 | for _family, _type, _proto, _canonname, sockaddr in addr_info: |
@@ -764,9 +782,11 @@ def _build_preset_entry( |
764 | 782 | for line in extensions_raw.splitlines(): |
765 | 783 | line = line.strip().lstrip("-*").strip() |
766 | 784 | for part in line.split(","): |
767 | | - part = part.strip() |
768 | | - if part: |
| 785 | + part = part.strip().lower() |
| 786 | + if part and _ID_RE.match(part): |
769 | 787 | ext_list.append(part) |
| 788 | + # Deduplicate and sort for stable catalog output |
| 789 | + ext_list = sorted(set(ext_list)) |
770 | 790 | if ext_list: |
771 | 791 | requires["extensions"] = ext_list |
772 | 792 | elif is_update and "extensions" in existing.get("requires", {}): |
@@ -936,7 +956,8 @@ def main() -> None: |
936 | 956 | catalog = json.load(f) |
937 | 957 |
|
938 | 958 | # Parse and normalize |
939 | | - raw_fields = parse_issue_body(issue_body) |
| 959 | + known_labels = set(LABEL_MAPS[args.type].keys()) |
| 960 | + raw_fields = parse_issue_body(issue_body, known_labels=known_labels) |
940 | 961 | fields = normalize_fields(raw_fields, args.type) |
941 | 962 |
|
942 | 963 | if not fields: |
|
0 commit comments