77
88async def _check (email : str ) -> Result :
99 show_url = "https://nytimes.com"
10-
10+ # hit this first to wake up the session and grab the token
1111 login_url = "https://myaccount.nytimes.com/auth/enter-email?response_type=cookie&client_id=vi&redirect_uri=https%3A%2F%2Fwww.nytimes.com"
1212 check_url = "https://myaccount.nytimes.com/svc/lire_ui/authorize-email/check"
1313
1414 headers = {
15- 'User-Agent' : "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144 .0.0.0 Mobile Safari/537.36" ,
16- 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ,
15+ 'User-Agent' : "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146 .0.0.0 Mobile Safari/537.36" ,
16+ 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng, */*;q=0.8" ,
1717 'Accept-Language' : "en-US,en;q=0.9" ,
18+ 'Accept-Encoding' : "identity" ,
19+ 'sec-ch-ua-platform' : '"Android"' ,
20+ 'sec-ch-ua' : '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"' ,
21+ 'sec-ch-ua-mobile' : "?1"
1822 }
1923
2024 try :
21- async with httpx .AsyncClient (timeout = 7.0 , follow_redirects = True ) as client :
25+ # NYT likes HTTP/2, helps avoid getting flagged as a bot
26+ async with httpx .AsyncClient (timeout = 12.0 , follow_redirects = True , http2 = True ) as client :
27+
2228 init_res = await client .get (login_url , headers = headers )
2329
30+ if init_res .status_code == 403 :
31+ return Result .error ("NYT blocked the initial hit (403)" )
32+
33+ # Digging out the auth_token from the mess of HTML/JS
2434 token_match = re .search (
25- r'authToken(?:"|"):(?:"|")([^&"]+)' , init_res .text )
35+ r'authToken(?:"|"|\\")\s*:\s*(?:"|"|\\")([^&"\\]+)' ,
36+ init_res .text
37+ )
2638
2739 if not token_match :
28- return Result .error ("Could not extract NYT auth_token" )
40+ return Result .error ("Couldn't find the auth_token in the page " )
2941
3042 auth_token = html .unescape (token_match .group (1 ))
3143
@@ -37,32 +49,44 @@ async def _check(email: str) -> Result:
3749 "environment" : "production"
3850 }
3951
40- # Update headers for the API call
52+ # The critical tracking/origin headers
4153 api_headers = headers .copy ()
4254 api_headers .update ({
4355 'Content-Type' : "application/json" ,
4456 'Accept' : "application/json" ,
4557 'req-details' : "[[it:lui]]" ,
4658 'Origin' : "https://myaccount.nytimes.com" ,
47- 'Referer' : login_url
59+ 'Referer' : login_url ,
60+ 'sec-fetch-site' : "same-origin" ,
61+ 'sec-fetch-mode' : "cors" ,
62+ 'sec-fetch-dest' : "empty"
4863 })
4964
50- response = await client .post (check_url , content = json .dumps (payload ), headers = api_headers )
51- data = response .json ()
65+ response = await client .post (
66+ check_url ,
67+ content = json .dumps (payload ),
68+ headers = api_headers
69+ )
70+
71+ if response .status_code == 403 :
72+ return Result .error ("Bot detection triggered on the check (403)" )
73+
74+ if response .status_code != 200 :
75+ return Result .error (f"API acted up: { response .status_code } " )
5276
53- further_action = data .get ("data" , {}).get ("further_action" , "" )
77+ res_data = response .json ()
78+ further_action = res_data .get ("data" , {}).get ("further_action" , "" )
5479
80+ # If it says show-login, they have an account. If show-register, they don't.
5581 if further_action == "show-login" :
5682 return Result .taken (url = show_url )
5783 elif further_action == "show-register" :
5884 return Result .available (url = show_url )
5985
60- return Result .error ("Unexpected response body, report it on github " )
86+ return Result .error (f"Got an weird action: { further_action } " )
6187
62- except httpx .ConnectTimeout :
63- return Result .error ("Connection timed out!" )
6488 except httpx .ReadTimeout :
65- return Result .error ("Server took too long to respond (Read Timeout) " )
89+ return Result .error ("NYT took too long to answer " )
6690 except Exception as e :
6791 return Result .error (e )
6892
0 commit comments