|
10 | 10 |
|
11 | 11 | Dependencies: pip install requests |
12 | 12 | """ |
13 | | -import sys |
14 | | -import requests |
15 | 13 | import json |
| 14 | +import sys |
16 | 15 | from collections import OrderedDict |
| 16 | +from typing import Dict, List, Optional, Tuple |
| 17 | + |
| 18 | +import requests |
17 | 19 |
|
18 | 20 | # Define templates |
19 | 21 | CS_BASE_URL = "https://cheatsheetseries.owasp.org/cheatsheets/%s.html" |
| 22 | +INDEX_URL = ( |
| 23 | + "https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md" |
| 24 | +) |
| 25 | + |
| 26 | + |
| 27 | +def parse_index_line(line: str) -> Optional[Tuple[str, List[str]]]: |
| 28 | + """Parse a single line from ``Index.md``. |
| 29 | +
|
| 30 | + Index lines that reference technology icons have the shape:: |
| 31 | +
|
| 32 | + [Cheatsheet Name](cheatsheets/Filename.md)  ... |
| 33 | +
|
| 34 | + This function returns a ``(cheatsheet_name, [technology_names])`` tuple |
| 35 | + for any such line, or ``None`` for lines that do not reference |
| 36 | + technology icons. |
| 37 | +
|
| 38 | + Returns: |
| 39 | + A tuple of the cheatsheet display name and the list of |
| 40 | + uppercased technology names, or ``None`` if the line has no |
| 41 | + technology icon references. |
| 42 | + """ |
| 43 | + if "(assets/Index_" not in line: |
| 44 | + return None |
| 45 | + work = line.strip() |
| 46 | + cs_name = work[1:work.index("]")] |
| 47 | + technologies = work.split("!")[1:] |
| 48 | + tech_names = [tech[1:tech.index("]")].upper() for tech in technologies] |
| 49 | + return cs_name, tech_names |
20 | 50 |
|
21 | | -# Grab the index MD source from the GitHub repository |
22 | | -response = requests.get( |
23 | | - "https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md") |
24 | | -if response.status_code != 200: |
25 | | - print("Cannot load the INDEX content: HTTP %s received!" % |
26 | | - response.status_code) |
27 | | - sys.exit(1) |
28 | | -else: |
29 | | - data = OrderedDict({}) |
30 | | - for line in response.text.split("\n"): |
31 | | - if "(assets/Index_" in line: |
32 | | - work = line.strip() |
33 | | - # Extract the name of the CS |
34 | | - cs_name = work[1:work.index("]")] |
35 | | - # Extract technologies and map the CS to them |
36 | | - technologies = work.split("!")[1:] |
37 | | - for technology in technologies: |
38 | | - technology_name = technology[1:technology.index("]")].upper() |
39 | | - if technology_name not in data: |
40 | | - data[technology_name] = [] |
41 | | - data[technology_name].append( |
42 | | - {"CS_NAME": cs_name, "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_")}) |
43 | | - # Display the built structure and formatted JSON |
| 51 | + |
| 52 | +def build_technologies_dict( |
| 53 | + index_text: str, |
| 54 | +) -> "OrderedDict[str, List[Dict[str, str]]]": |
| 55 | + """Build the technology -> [cheatsheet] mapping from ``Index.md`` text. |
| 56 | +
|
| 57 | + The returned dict preserves the order in which technologies first |
| 58 | + appear in the index, matching the legacy behavior of the script. |
| 59 | + """ |
| 60 | + data: "OrderedDict[str, List[Dict[str, str]]]" = OrderedDict() |
| 61 | + for line in index_text.split("\n"): |
| 62 | + parsed = parse_index_line(line) |
| 63 | + if parsed is None: |
| 64 | + continue |
| 65 | + cs_name, tech_names = parsed |
| 66 | + for tech in tech_names: |
| 67 | + data.setdefault(tech, []).append( |
| 68 | + { |
| 69 | + "CS_NAME": cs_name, |
| 70 | + "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_"), |
| 71 | + } |
| 72 | + ) |
| 73 | + return data |
| 74 | + |
| 75 | + |
| 76 | +def fetch_index_text(url: str = INDEX_URL) -> Tuple[int, str]: |
| 77 | + """Fetch the ``Index.md`` content from the given URL. |
| 78 | +
|
| 79 | + Returns: |
| 80 | + A ``(status_code, body)`` tuple. Callers are expected to check |
| 81 | + the status code and emit a user-facing error if it is not 200. |
| 82 | + """ |
| 83 | + response = requests.get(url) |
| 84 | + return response.status_code, response.text |
| 85 | + |
| 86 | + |
| 87 | +def main() -> int: |
| 88 | + """Fetch the index and print the technologies JSON to stdout. |
| 89 | +
|
| 90 | + Returns 0 on success and 1 if the upstream index cannot be fetched. |
| 91 | + """ |
| 92 | + status, text = fetch_index_text() |
| 93 | + if status != 200: |
| 94 | + print( |
| 95 | + "Cannot load the INDEX content: HTTP %s received!" % status |
| 96 | + ) |
| 97 | + return 1 |
| 98 | + data = build_technologies_dict(text) |
44 | 99 | print(json.dumps(data, sort_keys=True, indent=1)) |
45 | | - sys.exit(0) |
| 100 | + return 0 |
| 101 | + |
| 102 | + |
| 103 | +if __name__ == "__main__": |
| 104 | + sys.exit(main()) |
0 commit comments