Skip to content

Commit bc98d25

Browse files
committed
feat: add pytest test suite for 3 Python scripts
Introduces a pytest-based test suite for the three build-helper scripts in scripts/ that have no network dependencies: - Generate_CheatSheets_TOC.py - Update_CheatSheets_Index.py - Generate_Technologies_JSON.py Each script is minimally refactored to expose pure helper functions (to_display_name, parse_index_line, group_by_letter, etc.) and to wrap its imperative top-level code in a main() function with parameterized paths. Byte-for-byte output of the original scripts is preserved when invoked from the scripts/ directory. The test suite covers 56 cases (unit + integration), including: - helper-function unit tests (display names, language detection, letter grouping, whitespace cleanup, line parsing) - main() integration tests against fixture cheatsheet directories - mocked HTTP path for Generate_Technologies_JSON.main() Files added: - pytest.ini, requirements-dev.txt - tests/__init__.py, tests/conftest.py - tests/test_*.py for the three scripts The remaining two scripts (Generate_RSS_Feed.py, Identify_Old_Issue_And_PR.py) and a CI workflow that runs pytest on PRs are deferred to follow-up PRs. AI assistance: this change was developed with the help of an AI coding assistant. The refactor, tests, and commit message were reviewed by the author before submission.
1 parent 2845241 commit bc98d25

11 files changed

Lines changed: 846 additions & 106 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@ venv
1818
.claude/settings.local.json
1919
.claude/settings.json
2020
.claude/worktrees/
21+
# Python bytecode
22+
__pycache__/
23+
*.pyc

pytest.ini

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[pytest]
2+
testpaths = tests
3+
python_files = test_*.py
4+
python_classes = Test*
5+
python_functions = test_*
6+
addopts = -v --tb=short --strict-markers
7+
filterwarnings =
8+
error
9+
# The repo's scripts predate py3.10; tolerate missing annotations.
10+
ignore::DeprecationWarning

requirements-dev.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Test-only dependencies. Install with:
2+
# pip install -r requirements-dev.txt
3+
#
4+
# Kept separate from requirements.txt so the runtime image for mkdocs/feedgen
5+
# does not pull in pytest.
6+
pytest>=7.0

scripts/Generate_CheatSheets_TOC.py

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,88 @@
88
same location that the script in order to be moved later by the caller script.
99
"""
1010
import os
11+
import sys
12+
from typing import Iterable, List
1113

1214
# Define templates
1315
cs_md_link_template = "* [%s](cheatsheets/%s)"
1416

15-
# Scan all CS files
16-
cheatsheets = [f.name for f in os.scandir("../cheatsheets") if f.is_file()]
17-
cheatsheets.sort()
18-
19-
# Generate the summary file
20-
with open("TOC.md", "w") as index_file:
21-
index_file.write("# Summary\n\n")
22-
index_file.write("### Cheatsheets\n\n")
23-
index_file.write(cs_md_link_template % ("Index Alphabetical", "Index.md"))
24-
index_file.write("\n")
25-
index_file.write(cs_md_link_template % ("Index ASVS", "IndexASVS.md"))
26-
index_file.write("\n")
27-
index_file.write(cs_md_link_template % ("Index ASVS", "IndexMASVS.md"))
28-
index_file.write("\n")
29-
index_file.write(cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md"))
30-
index_file.write("\n")
31-
for cheatsheet in cheatsheets:
32-
if cheatsheet != "Index.md" and cheatsheet != "IndexASVS.md" and cheatsheet != "IndexMASVS.md" and cheatsheet != "IndexProactiveControls.md" and cheatsheet != "TOC.md":
33-
cs_name = cheatsheet.replace("_"," ").replace(".md", "").replace("Cheat Sheet", "")
34-
index_file.write(cs_md_link_template % (cs_name, cheatsheet))
17+
# Files that are not actual cheat sheets and must be excluded from the TOC
18+
# even if they happen to live in the cheatsheets/ directory.
19+
_EXCLUDED_FROM_TOC = frozenset({
20+
"Index.md",
21+
"IndexASVS.md",
22+
"IndexMASVS.md",
23+
"IndexProactiveControls.md",
24+
"TOC.md",
25+
})
26+
27+
28+
def to_display_name(filename: str) -> str:
29+
"""Convert a cheatsheet filename to its human-readable display name.
30+
31+
Underscores become spaces, the .md suffix is dropped, and the
32+
"Cheat Sheet" suffix (if present) is stripped. The result is
33+
whitespace-stripped so trailing/leading spaces do not leak into
34+
the rendered link text.
35+
36+
Examples:
37+
>>> to_display_name("Authentication_Cheat_Sheet.md")
38+
'Authentication'
39+
>>> to_display_name("XSS_Prevention_Cheat_Sheet.md")
40+
'XSS Prevention'
41+
"""
42+
return (filename
43+
.replace("_", " ")
44+
.replace(".md", "")
45+
.replace("Cheat Sheet", "")
46+
.strip())
47+
48+
49+
def should_skip(filename: str) -> bool:
50+
"""Return True for files that should not appear in the generated TOC."""
51+
return filename in _EXCLUDED_FROM_TOC
52+
53+
54+
def build_toc_lines(cheatsheets: Iterable[str]) -> List[str]:
55+
"""Return the list of fixed pre-defined index links for the TOC.
56+
57+
These four links are always emitted in this order, regardless of the
58+
contents of the cheatsheets/ directory.
59+
"""
60+
return [
61+
cs_md_link_template % ("Index Alphabetical", "Index.md"),
62+
cs_md_link_template % ("Index ASVS", "IndexASVS.md"),
63+
cs_md_link_template % ("Index ASVS", "IndexMASVS.md"),
64+
cs_md_link_template % ("Index Proactive Controls", "IndexProactiveControls.md"),
65+
]
66+
67+
68+
def main(cheatsheets_dir: str = "../cheatsheets", output_file: str = "TOC.md") -> int:
69+
"""Generate the summary markdown page.
70+
71+
Scans ``cheatsheets_dir`` for files, sorts them alphabetically, and
72+
writes a SUMMARY-style markdown file at ``output_file``. Returns 0 on
73+
success.
74+
"""
75+
cheatsheets = sorted(
76+
f.name for f in os.scandir(cheatsheets_dir) if f.is_file()
77+
)
78+
with open(output_file, "w") as index_file:
79+
index_file.write("# Summary\n\n")
80+
index_file.write("### Cheatsheets\n\n")
81+
for link in build_toc_lines(cheatsheets):
82+
index_file.write(link)
3583
index_file.write("\n")
36-
print("Summary markdown page generated.")
84+
for cheatsheet in cheatsheets:
85+
if not should_skip(cheatsheet):
86+
index_file.write(
87+
cs_md_link_template % (to_display_name(cheatsheet), cheatsheet)
88+
)
89+
index_file.write("\n")
90+
print("Summary markdown page generated.")
91+
return 0
92+
93+
94+
if __name__ == "__main__":
95+
sys.exit(main())

scripts/Generate_Technologies_JSON.py

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,95 @@
1010
1111
Dependencies: pip install requests
1212
"""
13-
import sys
14-
import requests
1513
import json
14+
import sys
1615
from collections import OrderedDict
16+
from typing import Dict, List, Optional, Tuple
17+
18+
import requests
1719

1820
# Define templates
1921
CS_BASE_URL = "https://cheatsheetseries.owasp.org/cheatsheets/%s.html"
22+
INDEX_URL = (
23+
"https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md"
24+
)
25+
26+
27+
def parse_index_line(line: str) -> Optional[Tuple[str, List[str]]]:
28+
"""Parse a single line from ``Index.md``.
29+
30+
Index lines that reference technology icons have the shape::
31+
32+
[Cheatsheet Name](cheatsheets/Filename.md) ![Tech](assets/Index_Tech.svg) ...
33+
34+
This function returns a ``(cheatsheet_name, [technology_names])`` tuple
35+
for any such line, or ``None`` for lines that do not reference
36+
technology icons.
37+
38+
Returns:
39+
A tuple of the cheatsheet display name and the list of
40+
uppercased technology names, or ``None`` if the line has no
41+
technology icon references.
42+
"""
43+
if "(assets/Index_" not in line:
44+
return None
45+
work = line.strip()
46+
cs_name = work[1:work.index("]")]
47+
technologies = work.split("!")[1:]
48+
tech_names = [tech[1:tech.index("]")].upper() for tech in technologies]
49+
return cs_name, tech_names
2050

21-
# Grab the index MD source from the GitHub repository
22-
response = requests.get(
23-
"https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/Index.md")
24-
if response.status_code != 200:
25-
print("Cannot load the INDEX content: HTTP %s received!" %
26-
response.status_code)
27-
sys.exit(1)
28-
else:
29-
data = OrderedDict({})
30-
for line in response.text.split("\n"):
31-
if "(assets/Index_" in line:
32-
work = line.strip()
33-
# Extract the name of the CS
34-
cs_name = work[1:work.index("]")]
35-
# Extract technologies and map the CS to them
36-
technologies = work.split("!")[1:]
37-
for technology in technologies:
38-
technology_name = technology[1:technology.index("]")].upper()
39-
if technology_name not in data:
40-
data[technology_name] = []
41-
data[technology_name].append(
42-
{"CS_NAME": cs_name, "CS_URL": CS_BASE_URL % cs_name.replace(" ", "_")})
43-
# Display the built structure and formatted JSON
51+
52+
def build_technologies_dict(
53+
index_text: str,
54+
) -> "OrderedDict[str, List[Dict[str, str]]]":
55+
"""Build the technology -> [cheatsheet] mapping from ``Index.md`` text.
56+
57+
The returned dict preserves the order in which technologies first
58+
appear in the index, matching the legacy behavior of the script.
59+
"""
60+
data: "OrderedDict[str, List[Dict[str, str]]]" = OrderedDict()
61+
for line in index_text.split("\n"):
62+
parsed = parse_index_line(line)
63+
if parsed is None:
64+
continue
65+
cs_name, tech_names = parsed
66+
for tech in tech_names:
67+
data.setdefault(tech, []).append(
68+
{
69+
"CS_NAME": cs_name,
70+
"CS_URL": CS_BASE_URL % cs_name.replace(" ", "_"),
71+
}
72+
)
73+
return data
74+
75+
76+
def fetch_index_text(url: str = INDEX_URL) -> Tuple[int, str]:
77+
"""Fetch the ``Index.md`` content from the given URL.
78+
79+
Returns:
80+
A ``(status_code, body)`` tuple. Callers are expected to check
81+
the status code and emit a user-facing error if it is not 200.
82+
"""
83+
response = requests.get(url)
84+
return response.status_code, response.text
85+
86+
87+
def main() -> int:
88+
"""Fetch the index and print the technologies JSON to stdout.
89+
90+
Returns 0 on success and 1 if the upstream index cannot be fetched.
91+
"""
92+
status, text = fetch_index_text()
93+
if status != 200:
94+
print(
95+
"Cannot load the INDEX content: HTTP %s received!" % status
96+
)
97+
return 1
98+
data = build_technologies_dict(text)
4499
print(json.dumps(data, sort_keys=True, indent=1))
45-
sys.exit(0)
100+
return 0
101+
102+
103+
if __name__ == "__main__":
104+
sys.exit(main())

0 commit comments

Comments
 (0)