Skip to content

Commit 6df64d0

Browse files
authored
Merge pull request #380 from Sid0004/issue-300-tests
tests: add comprehensive unit tests for linktree module (fixes #300)
2 parents b170678 + 792f9b6 commit 6df64d0

File tree

4 files changed

+571
-0
lines changed

4 files changed

+571
-0
lines changed

run_tests.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
r"""Helper to run pytest with the `src` layout on systems where the package isn't installed.
2+
3+
Usage:
4+
.venv\Scripts\python.exe run_tests.py -q
5+
"""
6+
import sys
7+
8+
# Ensure the src directory is first on sys.path so `import torbot` works
9+
sys.path.insert(0, "src")
10+
11+
import pytest
12+
13+
14+
if __name__ == "__main__":
15+
args = sys.argv[1:] or ["-q"]
16+
raise SystemExit(pytest.main(args))

tests/conftest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""pytest configuration and test-time shims for TorBot tests.
2+
3+
This module provides a lightweight stub for the NLP classifier so tests can
4+
run without installing the full scientific stack (numpy, scikit-learn, etc.).
5+
The stub is only active during test runs and does not affect production code.
6+
"""
7+
import sys
8+
import types
9+
10+
11+
def _install_nlp_stub():
12+
"""Install a minimal stub for torbot.modules.nlp.main during tests."""
13+
mod_name = "torbot.modules.nlp.main"
14+
if mod_name in sys.modules:
15+
return
16+
17+
# Create a minimal module with classify function
18+
stub = types.ModuleType(mod_name)
19+
20+
def classify(data):
21+
"""Lightweight test-only classifier.
22+
23+
Returns a deterministic classification without requiring ML libraries.
24+
Real implementation uses sklearn pipeline with training data.
25+
"""
26+
_ = data # unused in stub
27+
return ["unknown", 0.0]
28+
29+
# Use setattr to avoid linter complaints about dynamic attributes
30+
setattr(stub, "classify", classify)
31+
sys.modules[mod_name] = stub
32+
33+
34+
# Install stub before any test imports
35+
_install_nlp_stub()

tests/test_linktree_extra.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""Additional edge-case tests for linktree parsing functions.
2+
3+
These tests cover corner cases and error conditions for the parsing helpers.
4+
"""
5+
from bs4 import BeautifulSoup
6+
import pytest
7+
8+
from torbot.modules.linktree import (
9+
parse_hostname,
10+
parse_links,
11+
parse_emails,
12+
parse_phone_numbers,
13+
)
14+
15+
16+
def test_parse_hostname_raises_on_invalid_url() -> None:
17+
"""Ensure parse_hostname raises exception for URLs without hostname."""
18+
with pytest.raises(Exception, match="unable to parse hostname"):
19+
parse_hostname("not-a-valid-url")
20+
21+
22+
def test_parse_hostname_handles_various_schemes() -> None:
23+
"""Verify parse_hostname works with http, https, and onion domains."""
24+
assert parse_hostname("https://www.example.com/path") == "www.example.com"
25+
assert parse_hostname("http://test.onion") == "test.onion"
26+
assert parse_hostname("https://sub.domain.co.uk:8080/") == "sub.domain.co.uk"
27+
28+
29+
def test_parse_links_filters_only_valid_full_urls() -> None:
30+
"""Ensure parse_links returns only absolute http(s) URLs."""
31+
html = """
32+
<html>
33+
<a href="/relative/path">relative</a>
34+
<a href="https://valid.example/path">valid</a>
35+
<a href="http://also.valid.test/">valid2</a>
36+
<a href="javascript:void(0)">js</a>
37+
<a href="https://valid.example/path">valid-duplicate</a>
38+
</html>
39+
"""
40+
41+
links = parse_links(html)
42+
# only absolute http(s) URLs should be returned, duplicates preserved
43+
assert links == [
44+
"https://valid.example/path",
45+
"http://also.valid.test/",
46+
"https://valid.example/path",
47+
]
48+
49+
50+
def test_parse_links_empty_html() -> None:
51+
"""Test parse_links with HTML containing no anchor tags."""
52+
html = "<html><body><p>No links here</p></body></html>"
53+
links = parse_links(html)
54+
assert links == []
55+
56+
57+
def test_parse_links_anchor_without_href() -> None:
58+
"""Ensure parse_links handles anchor tags without href attribute."""
59+
html = """
60+
<html>
61+
<a>No href</a>
62+
<a name="anchor">Named anchor</a>
63+
<a href="https://valid.com">Valid</a>
64+
</html>
65+
"""
66+
links = parse_links(html)
67+
assert links == ["https://valid.com"]
68+
69+
70+
def test_parse_emails_ignores_invalid_and_returns_unique() -> None:
71+
"""Verify parse_emails filters invalid emails and removes duplicates."""
72+
doc = BeautifulSoup(
73+
"""
74+
<html>
75+
<a href="mailto:good@example.com">good</a>
76+
<a href="mailto:good@example.com">good-dup</a>
77+
<a href="mailto:bad-email@invalid@">bad</a>
78+
<a href="mailto:withparams@example.com?subject=hi">withparams</a>
79+
<a href="#">not-mailto</a>
80+
</html>
81+
""",
82+
"html.parser",
83+
)
84+
85+
emails = parse_emails(doc)
86+
# duplicates removed, invalid emails rejected
87+
# Note: current impl splits on 'mailto:' so params might be included
88+
# We test actual behavior here
89+
assert "good@example.com" in emails
90+
assert len([e for e in emails if e == "good@example.com"]) == 1 # no duplicates
91+
92+
93+
def test_parse_emails_empty_page() -> None:
94+
"""Test parse_emails with no mailto links."""
95+
doc = BeautifulSoup("<html><body><p>No emails</p></body></html>", "html.parser")
96+
emails = parse_emails(doc)
97+
assert emails == []
98+
99+
100+
def test_parse_emails_malformed_mailto() -> None:
101+
"""Ensure malformed mailto links are filtered out."""
102+
doc = BeautifulSoup(
103+
"""
104+
<html>
105+
<a href="mailto:">empty</a>
106+
<a href="mailto:not-an-email">invalid</a>
107+
<a href="mailto:valid@test.com">valid</a>
108+
</html>
109+
""",
110+
"html.parser",
111+
)
112+
emails = parse_emails(doc)
113+
# Only valid email should be extracted
114+
assert emails == ["valid@test.com"]
115+
116+
117+
def test_parse_phone_numbers_only_accepts_possible_international_numbers() -> None:
118+
"""Verify parse_phone_numbers validates international format."""
119+
doc = BeautifulSoup(
120+
"""
121+
<html>
122+
<a href="tel:+14155552671">us</a>
123+
<a href="tel:4155552671">no-plus</a>
124+
<a href="tel:+442071838750">uk</a>
125+
<a href="tel:invalid_phone">invalid</a>
126+
</html>
127+
""",
128+
"html.parser",
129+
)
130+
131+
numbers = parse_phone_numbers(doc)
132+
# only the properly formatted international numbers (with +) are considered possible
133+
assert sorted(numbers) == ["+14155552671", "+442071838750"]
134+
135+
136+
def test_parse_phone_numbers_empty_page() -> None:
137+
"""Test parse_phone_numbers with no tel: links."""
138+
doc = BeautifulSoup("<html><body><p>No phones</p></body></html>", "html.parser")
139+
numbers = parse_phone_numbers(doc)
140+
assert numbers == []
141+
142+
143+
def test_parse_phone_numbers_removes_duplicates() -> None:
144+
"""Ensure duplicate phone numbers are deduplicated."""
145+
doc = BeautifulSoup(
146+
"""
147+
<html>
148+
<a href="tel:+14155551234">call</a>
149+
<a href="tel:+14155551234">call again</a>
150+
<a href="tel:+14155559999">other</a>
151+
</html>
152+
""",
153+
"html.parser",
154+
)
155+
numbers = parse_phone_numbers(doc)
156+
assert len(numbers) == 2
157+
assert "+14155551234" in numbers
158+
assert "+14155559999" in numbers

0 commit comments

Comments
 (0)