Skip to content

Commit c8b74dd

Browse files
committed
test: Add unit, integration, and e2e tests for scanner components and configure CI workflow.
1 parent 305f4ec commit c8b74dd

8 files changed

Lines changed: 805 additions & 0 deletions

File tree

.github/workflows/test.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: Test
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Install uv
16+
uses: astral-sh/setup-uv@v5
17+
with:
18+
version: "0.5.11"
19+
20+
- name: Set up Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version-file: "pyproject.toml"
24+
25+
- name: Install dependencies
26+
run: uv sync --all-extras --dev
27+
28+
- name: Run tests
29+
run: uv run pytest

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ dependencies = [
1818
dev = [
1919
"ruff>=0.9.0",
2020
"pre-commit>=4.0.1",
21+
"pytest>=8.0.0",
22+
"pytest-asyncio>=0.23.0",
23+
"pytest-httpx>=0.30.0",
24+
"pytest-cov>=7.0.0",
2125
]
2226

2327
[build-system]

tests/test_analyzer.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import json
2+
import tempfile
3+
4+
import pytest
5+
6+
from scanner.analyzer import detect_api_prefix
7+
8+
9+
@pytest.fixture
10+
def har_data():
11+
return {
12+
"log": {
13+
"entries": [
14+
{
15+
"request": {"url": "https://api.example.com/v1/users", "method": "GET"},
16+
"response": {"status": 200, "content": {"mimeType": "application/json"}},
17+
},
18+
{
19+
"request": {"url": "https://api.example.com/v1/posts", "method": "GET"},
20+
"response": {"status": 200, "content": {"mimeType": "application/json"}},
21+
},
22+
{
23+
"request": {"url": "https://google.com/analytics", "method": "POST"},
24+
"response": {"status": 200, "content": {"mimeType": "application/json"}},
25+
},
26+
{
27+
"request": {"url": "https://legacy-app.com/login.php", "method": "POST"},
28+
"response": {"status": 200, "content": {"mimeType": "text/html"}},
29+
},
30+
]
31+
}
32+
}
33+
34+
35+
def create_har_file(data):
36+
with tempfile.NamedTemporaryFile(mode="w", suffix=".har", delete=False) as f:
37+
json.dump(data, f)
38+
return f.name
39+
40+
41+
def test_detect_api_prefix_json(har_data):
42+
har_file = create_har_file(har_data)
43+
# targeting the main API
44+
prefix = detect_api_prefix(har_file, target_url="https://api.example.com")
45+
assert prefix == "https://api.example.com/v1"
46+
47+
48+
def test_detect_api_prefix_filter_third_party(har_data):
49+
har_file = create_har_file(har_data)
50+
# Should ignore google.com even though it has JSON
51+
prefix = detect_api_prefix(har_file, target_url="https://api.example.com")
52+
assert "google.com" not in prefix
53+
54+
55+
def test_detect_api_prefix_html_legacy():
56+
# specifically test the fix for text/html content types
57+
data = {
58+
"log": {
59+
"entries": [
60+
{
61+
"request": {"url": "https://legacy-app.com/auth/login", "method": "POST"},
62+
"response": {"status": 200, "content": {"mimeType": "text/html"}},
63+
},
64+
{
65+
"request": {"url": "https://legacy-app.com/auth/register", "method": "POST"},
66+
"response": {"status": 200, "content": {"mimeType": "application/x-www-form-urlencoded"}},
67+
},
68+
]
69+
}
70+
}
71+
har_file = create_har_file(data)
72+
prefix = detect_api_prefix(har_file, target_url="https://legacy-app.com")
73+
assert prefix == "https://legacy-app.com/auth"
74+
75+
76+
def test_fallback_behavior(har_data):
77+
# If no target URL matches, it might return None or fallback.
78+
# Current implementation falls back to all traffic if target filter yields nothing.
79+
har_file = create_har_file(har_data)
80+
# Searching for a domain not in HAR
81+
prefix = detect_api_prefix(har_file, target_url="https://missing.com")
82+
# It should fall back to finding the most common prefix in *all* traffic (api.example.com)
83+
assert prefix == "https://api.example.com/v1"

tests/test_crawler.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
from unittest.mock import AsyncMock, MagicMock, patch
2+
3+
import pytest
4+
5+
from scanner.crawler import AsyncCrawler
6+
7+
8+
@pytest.fixture
9+
def mock_page():
10+
page = AsyncMock()
11+
# Mock locator for forms
12+
page.locator.return_value.all.return_value = []
13+
# Mock eval_on_selector_all for links
14+
page.eval_on_selector_all.return_value = []
15+
page.url = "http://test-site.com"
16+
return page
17+
18+
19+
@pytest.fixture
20+
def mock_context(mock_page):
21+
context = AsyncMock()
22+
context.new_page.return_value = mock_page
23+
return context
24+
25+
26+
@pytest.fixture
27+
def mock_browser(mock_context):
28+
browser = AsyncMock()
29+
browser.new_context.return_value = mock_context
30+
return browser
31+
32+
33+
@pytest.fixture
34+
def mock_playwright(mock_browser):
35+
pw = AsyncMock()
36+
pw.chromium.launch.return_value = mock_browser
37+
return pw
38+
39+
40+
@pytest.mark.asyncio
41+
async def test_crawler_initialization():
42+
crawler = AsyncCrawler("http://test.com", max_depth=3)
43+
assert crawler.start_url == "http://test.com"
44+
assert crawler.max_depth == 3
45+
assert crawler.domain == "test.com"
46+
assert "http://test.com" in [q[0] for q in crawler.queue]
47+
48+
49+
@pytest.mark.asyncio
50+
async def test_is_valid_url():
51+
crawler = AsyncCrawler("http://test.com")
52+
53+
assert crawler.is_valid_url("http://test.com/path")
54+
assert not crawler.is_valid_url("http://google.com") # External
55+
assert not crawler.is_valid_url("http://test.com/image.png") # Static
56+
57+
crawler.visited.add("http://test.com/visited")
58+
assert not crawler.is_valid_url("http://test.com/visited") # Visited
59+
60+
61+
@patch("scanner.crawler.async_playwright")
62+
@pytest.mark.asyncio
63+
async def test_crawl_flow(mock_pw_func, mock_playwright, mock_page):
64+
# Setup mock
65+
mock_pw_context = AsyncMock()
66+
mock_pw_context.__aenter__.return_value = mock_playwright
67+
mock_pw_context.__aexit__.return_value = None
68+
mock_pw_func.return_value = mock_pw_context
69+
70+
# Specifics for this test
71+
# 1. First visit http://test.com -> found link http://test.com/page2
72+
# 2. Visit http://test.com/page2 -> found no links
73+
74+
async def side_effect_eval(selector, fn):
75+
if selector == "a":
76+
if mock_page.url == "http://test.com":
77+
return ["http://test.com/page2", "http://google.com", "http://test.com/image.png"]
78+
elif mock_page.url == "http://test.com/page2":
79+
return []
80+
return []
81+
82+
mock_page.eval_on_selector_all.side_effect = side_effect_eval
83+
84+
# We also need to update mock_page.url when goto is called to simulate navigation
85+
async def side_effect_goto(url, **kwargs):
86+
mock_page.url = url
87+
88+
mock_page.goto.side_effect = side_effect_goto
89+
90+
crawler = AsyncCrawler("http://test.com", max_depth=2, output_har="test.har")
91+
await crawler.crawl()
92+
93+
# Verify visited
94+
assert "http://test.com" in crawler.visited
95+
assert "http://test.com/page2" in crawler.visited
96+
97+
# Verify ignored
98+
assert "http://google.com" not in crawler.visited
99+
100+
# Verify calls
101+
assert mock_page.goto.call_count == 2
102+
103+
104+
@patch("scanner.crawler.async_playwright")
105+
@pytest.mark.asyncio
106+
async def test_crawl_form_detection(mock_pw_func, mock_playwright, mock_page):
107+
# Setup mock
108+
mock_pw_context = AsyncMock()
109+
mock_pw_context.__aenter__.return_value = mock_playwright
110+
mock_pw_context.__aexit__.return_value = None
111+
mock_pw_func.return_value = mock_pw_context
112+
113+
mock_page.url = "http://test.com/login"
114+
115+
# Define mock form and input
116+
mock_form = AsyncMock()
117+
mock_input = AsyncMock()
118+
mock_input.is_visible.return_value = True
119+
120+
# Side effects for attributes
121+
async def attr_side_effect(attr):
122+
if attr == "type":
123+
return "text"
124+
if attr == "name":
125+
return "username"
126+
return ""
127+
128+
mock_input.get_attribute.side_effect = attr_side_effect
129+
130+
# Mock submit button
131+
mock_submit = AsyncMock()
132+
mock_submit.count.return_value = 1
133+
mock_submit.is_visible.return_value = True
134+
135+
# Let's construct a Mock Locator for input
136+
# Locator.all() IS async
137+
mock_input_locator = AsyncMock()
138+
mock_input_locator.all.return_value = [mock_input]
139+
140+
# Mock Locator for submit
141+
mock_submit_locator = AsyncMock()
142+
mock_submit_locator.first = mock_submit
143+
mock_submit_locator.count.return_value = 1
144+
145+
# form.locator("...") is SYNCHRONOUS, returns a locator
146+
# We use MagicMock for synchronous callable
147+
# form.locator("...") should return the appropriate locator
148+
mock_form.locator = MagicMock()
149+
150+
def form_locator_side_effect(selector):
151+
# The submit selector contains 'submit'
152+
if "submit" in selector or "Login" in selector:
153+
return mock_submit_locator
154+
# The input selector is mostly implicit or generic 'input'
155+
elif "input" in selector or "textarea" in selector:
156+
return mock_input_locator
157+
else:
158+
# Fallback
159+
return MagicMock()
160+
161+
mock_form.locator.side_effect = form_locator_side_effect
162+
163+
# page.locator("form") is SYNCHRONOUS
164+
mock_form_locator = AsyncMock()
165+
mock_form_locator.all.return_value = [mock_form]
166+
167+
mock_page.locator = MagicMock()
168+
mock_page.locator.return_value = mock_form_locator
169+
170+
crawler = AsyncCrawler("http://test.com/login", max_depth=1)
171+
await crawler.process_page(mock_page, "http://test.com/login", 0)
172+
173+
# Verify input was filled
174+
mock_input.fill.assert_called_with("testuser")
175+
176+
# Verify submit clicked
177+
mock_submit.click.assert_called()

tests/test_e2e_mock.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
import pytest
4+
5+
from scanner.main import async_main
6+
7+
8+
@patch("scanner.main.run_crawler")
9+
@patch("scanner.main.run_prober")
10+
@patch("scanner.main.detect_api_prefix")
11+
@patch("scanner.main.process_to_spec")
12+
@patch("scanner.main.subprocess.Popen")
13+
@patch("scanner.main.argparse.ArgumentParser.parse_args")
14+
@pytest.mark.asyncio
15+
async def test_main_e2e_flow(
16+
mock_parse_args, mock_popen, mock_process_to_spec, mock_detect_prefix, mock_run_prober, mock_run_crawler
17+
):
18+
# Setup args
19+
mock_args = MagicMock()
20+
mock_args.url = "http://target.com"
21+
mock_args.depth = 2
22+
mock_args.header = ["Authorization: Bearer test"]
23+
mock_args.cookie = ["session=123"]
24+
mock_args.proxy_port = 8080
25+
mock_args.har_file = "traffic.har"
26+
mock_args.initial_spec = "init.yaml"
27+
mock_args.fuzzing_dump = "fuzz.mitm"
28+
mock_args.final_spec = "final.yaml"
29+
mock_args.resume = False
30+
mock_args.state_file = "state.json"
31+
32+
mock_parse_args.return_value = mock_args
33+
34+
# Setup detect_api_prefix return
35+
mock_detect_prefix.return_value = "http://target.com/api"
36+
37+
# Setup Popen for mitmdump
38+
mock_process = MagicMock()
39+
mock_process.poll.return_value = None # Process running
40+
mock_popen.return_value = mock_process
41+
42+
# Run main
43+
await async_main()
44+
45+
# Verify Step 1: Crawler
46+
mock_run_crawler.assert_called_once()
47+
args, kwargs = mock_run_crawler.call_args
48+
assert args[0] == "http://target.com"
49+
assert kwargs["headers"] == {"Authorization": "Bearer test"}
50+
assert kwargs["cookies"][0]["name"] == "session"
51+
52+
# Verify Step 2: Proxy Start
53+
mock_popen.assert_called_once()
54+
assert "mitmdump" in mock_popen.call_args[0][0]
55+
56+
# Verify Step 3: Probing
57+
mock_run_prober.assert_called_once()
58+
# Check that parsed prefix was passed
59+
# async_main calls: run_prober(args.initial_spec, target_prefix, proxy_url, ...)
60+
# where target_prefix comes from detect_api_prefix or fallback
61+
call_args = mock_run_prober.call_args
62+
# call_args could be positional or keyword
63+
# Signature: run_prober(spec_file, api_prefix, proxy, ...)
64+
# args: ('init.yaml', 'http://target.com/api', 'http://127.0.0.1:8080')
65+
assert call_args[0][1] == "http://target.com/api"
66+
67+
# Verify Step 4: Spec Gen
68+
mock_process_to_spec.assert_called_once()
69+
path_gen_kwargs = mock_process_to_spec.call_args[1]
70+
assert path_gen_kwargs["api_prefix"] == "http://target.com/api"
71+
assert path_gen_kwargs["output_file"] == "final.yaml"

0 commit comments

Comments
 (0)