@@ -108,17 +108,34 @@ def _error(request: Request, detail: str, code: int, elapsed_ms: float = 0) -> J
108108 return JSONResponse (content = body .model_dump (), status_code = code )
109109
110110
111- def _detect_chat_provider (url : str ) -> str :
112- lowered = url .lower ()
113- if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered :
114- return "chatgpt"
115- if "claude.ai" in lowered :
116- return "claude"
117- if "gemini.google.com" in lowered or "g.co/gemini" in lowered :
118- return "gemini"
111+ def _detect_chat_provider (* urls : str ) -> str :
112+ for url in urls :
113+ lowered = (url or "" ).lower ()
114+ if not lowered :
115+ continue
116+ if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered :
117+ return "chatgpt"
118+ if "claude.ai" in lowered or "claude.com" in lowered :
119+ return "claude"
120+ if "gemini.google.com" in lowered or "g.co/gemini" in lowered :
121+ return "gemini"
119122 return "unknown"
120123
121124
125+ def _chat_share_error_message (result : Dict [str , Any ]) -> str :
126+ provider = result .get ("provider" ) or "unknown"
127+ if provider == "unknown" :
128+ return (
129+ "Failed to extract messages from the provided link. "
130+ "Please provide a public ChatGPT, Claude, or Gemini share link."
131+ )
132+
133+ return (
134+ f"Failed to extract messages from the provided { provider } share link. "
135+ "Please confirm the link is public, exists, and is not redirecting to a login or deleted-chat page."
136+ )
137+
138+
122139async def _render_chat_share (url : str ) -> tuple [str , str ]:
123140 return await asyncio .to_thread (_render_chat_share_sync , url )
124141
@@ -206,7 +223,7 @@ def _block_heavy_assets(route):
206223 except Exception as exc :
207224 logger .warning ("Timeout or error during navigation: %s" , exc )
208225
209- provider = _detect_chat_provider (page .url or url )
226+ provider = _detect_chat_provider (page .url , url )
210227 selector = {
211228 "chatgpt" : "div[data-message-author-role]" ,
212229 "claude" : "script" ,
@@ -230,8 +247,12 @@ def _block_heavy_assets(route):
230247 return html , final_url
231248
232249
233- def _extract_chat_pairs (url : str , html : str ) -> tuple [str , str , List [MessagePair ]]:
234- provider = _detect_chat_provider (url )
250+ def _extract_chat_pairs (
251+ url : str ,
252+ html : str ,
253+ source_url : str = "" ,
254+ ) -> tuple [str , str , List [MessagePair ]]:
255+ provider = _detect_chat_provider (url , source_url )
235256 soup = BeautifulSoup (html , "html.parser" )
236257 pairs : List [MessagePair ] = []
237258 extraction_method = "none"
@@ -512,7 +533,7 @@ def _parse_transcript_text(text: str) -> tuple[str, List[MessagePair]]:
512533
513534async def _scrape_chat_share (url : str ) -> Dict [str , Any ]:
514535 html , final_url = await _render_chat_share (url )
515- provider , extraction_method , pairs = _extract_chat_pairs (final_url or url , html )
536+ provider , extraction_method , pairs = _extract_chat_pairs (final_url or url , html , url )
516537
517538 return {
518539 "provider" : provider ,
@@ -757,7 +778,8 @@ async def scrape_chat_link(req: ScrapeRequest, request: Request):
757778 pairs = result ["pairs" ]
758779
759780 if not pairs :
760- return _error (request , "Failed to extract messages from the provided link." , 400 )
781+ elapsed = round ((time .perf_counter () - start ) * 1000 , 2 )
782+ return _error (request , _chat_share_error_message (result ), 400 , elapsed )
761783
762784 data = ScrapeResponse (pairs = pairs )
763785 elapsed = round ((time .perf_counter () - start ) * 1000 , 2 )
0 commit comments