Skip to content

Commit 9fc9f30

Browse files
Merge pull request #309 from kaifcodec/fix/email-osint-modules
fix(email_scan/): nytimes and polarsteps module as they changed the logic
2 parents 416cf9d + 4817817 commit 9fc9f30

2 files changed

Lines changed: 68 additions & 38 deletions

File tree

user_scanner/email_scan/news/nytimes.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,37 @@
77

88
async def _check(email: str) -> Result:
99
show_url = "https://nytimes.com"
10-
10+
# hit this first to wake up the session and grab the token
1111
login_url = "https://myaccount.nytimes.com/auth/enter-email?response_type=cookie&client_id=vi&redirect_uri=https%3A%2F%2Fwww.nytimes.com"
1212
check_url = "https://myaccount.nytimes.com/svc/lire_ui/authorize-email/check"
1313

1414
headers = {
15-
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
16-
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
15+
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36",
16+
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
1717
'Accept-Language': "en-US,en;q=0.9",
18+
'Accept-Encoding': "identity",
19+
'sec-ch-ua-platform': '"Android"',
20+
'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
21+
'sec-ch-ua-mobile': "?1"
1822
}
1923

2024
try:
21-
async with httpx.AsyncClient(timeout=7.0, follow_redirects=True) as client:
25+
# NYT likes HTTP/2, helps avoid getting flagged as a bot
26+
async with httpx.AsyncClient(timeout=12.0, follow_redirects=True, http2=True) as client:
27+
2228
init_res = await client.get(login_url, headers=headers)
2329

30+
if init_res.status_code == 403:
31+
return Result.error("NYT blocked the initial hit (403)")
32+
33+
# Digging out the auth_token from the mess of HTML/JS
2434
token_match = re.search(
25-
r'authToken(?:"|"):(?:"|")([^&"]+)', init_res.text)
35+
r'authToken(?:"|"|\\")\s*:\s*(?:"|"|\\")([^&"\\]+)',
36+
init_res.text
37+
)
2638

2739
if not token_match:
28-
return Result.error("Could not extract NYT auth_token")
40+
return Result.error("Couldn't find the auth_token in the page")
2941

3042
auth_token = html.unescape(token_match.group(1))
3143

@@ -37,32 +49,44 @@ async def _check(email: str) -> Result:
3749
"environment": "production"
3850
}
3951

40-
# Update headers for the API call
52+
# The critical tracking/origin headers
4153
api_headers = headers.copy()
4254
api_headers.update({
4355
'Content-Type': "application/json",
4456
'Accept': "application/json",
4557
'req-details': "[[it:lui]]",
4658
'Origin': "https://myaccount.nytimes.com",
47-
'Referer': login_url
59+
'Referer': login_url,
60+
'sec-fetch-site': "same-origin",
61+
'sec-fetch-mode': "cors",
62+
'sec-fetch-dest': "empty"
4863
})
4964

50-
response = await client.post(check_url, content=json.dumps(payload), headers=api_headers)
51-
data = response.json()
65+
response = await client.post(
66+
check_url,
67+
content=json.dumps(payload),
68+
headers=api_headers
69+
)
70+
71+
if response.status_code == 403:
72+
return Result.error("Bot detection triggered on the check (403)")
73+
74+
if response.status_code != 200:
75+
return Result.error(f"API acted up: {response.status_code}")
5276

53-
further_action = data.get("data", {}).get("further_action", "")
77+
res_data = response.json()
78+
further_action = res_data.get("data", {}).get("further_action", "")
5479

80+
# If it says show-login, they have an account. If show-register, they don't.
5581
if further_action == "show-login":
5682
return Result.taken(url=show_url)
5783
elif further_action == "show-register":
5884
return Result.available(url=show_url)
5985

60-
return Result.error("Unexpected response body, report it on github")
86+
return Result.error(f"Got an weird action: {further_action}")
6187

62-
except httpx.ConnectTimeout:
63-
return Result.error("Connection timed out!")
6488
except httpx.ReadTimeout:
65-
return Result.error("Server took too long to respond (Read Timeout)")
89+
return Result.error("NYT took too long to answer")
6690
except Exception as e:
6791
return Result.error(e)
6892

user_scanner/email_scan/travel/polarsteps.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,56 @@
11
import httpx
2+
import json
23
from user_scanner.core.result import Result
34

45

56
async def _check(email: str) -> Result:
67
show_url = "https://polarsteps.com"
7-
url = "https://www.polarsteps.com/send_password_reset"
8+
# Switching to the login endpoint to leverage 401 vs 404 status codes
9+
url = "https://www.polarsteps.com/api/login"
810

911
payload = {
10-
'email': email
12+
"username": email,
13+
"password": "nic3_guys_finish_last" # Dummy password for existence check
1114
}
1215

1316
headers = {
14-
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Mobile Safari/537.36",
17+
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36",
18+
'Accept': "application/json, text/plain, */*",
1519
'Accept-Encoding': "identity",
20+
'Content-Type': "application/json",
1621
'sec-ch-ua-platform': '"Android"',
17-
'sec-ch-ua': '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
22+
'polarsteps-api-version': "69",
23+
'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
1824
'sec-ch-ua-mobile': "?1",
19-
'origin': "https://www.polarsteps.com",
20-
'sec-fetch-site': "same-origin",
21-
'sec-fetch-mode': "cors",
22-
'sec-fetch-dest': "empty",
23-
'referer': "https://www.polarsteps.com/forgot_password",
24-
'accept-language': "en-US,en;q=0.9",
25-
'priority': "u=1, i"
25+
'Origin': "https://www.polarsteps.com",
26+
'Referer': "https://www.polarsteps.com/login",
27+
'Accept-Language': "en-US,en;q=0.9,ru;q=0.8",
28+
'Priority': "u=1, i"
2629
}
2730

2831
try:
2932
async with httpx.AsyncClient(timeout=10.0) as client:
30-
response = await client.post(url, data=payload, headers=headers)
31-
status = response.status_code
33+
response = await client.post(
34+
url,
35+
content=json.dumps(payload),
36+
headers=headers
37+
)
3238

33-
if status == 403:
34-
return Result.error("Caught by WAF or IP Block (403)")
39+
status = response.status_code
3540

36-
if status == 200:
37-
data = response.json()
41+
# 401 means the account exists but the password (dummy) was wrong
42+
if status == 401:
43+
return Result.taken(url=show_url)
3844

39-
if data.get("success") == "OK":
40-
return Result.taken(url=show_url)
45+
# 404 means the username/email is not registered in their system
46+
if status == 404:
47+
return Result.available(url=show_url)
4148

42-
error_msg = data.get("error", {}).get("email", "")
43-
if "don't have any user" in error_msg:
44-
return Result.available(url=show_url)
49+
if status == 403:
50+
return Result.error("Caught by WAF or IP Block (403)")
4551

4652
if status == 429:
47-
return Result.error("Rate limited by Polarsteps")
53+
return Result.error("Rate limited by Polarsteps (429)")
4854

4955
return Result.error(f"Unexpected status code: {status}")
5056

0 commit comments

Comments
 (0)