Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.

Commit d02aba5

Browse files
authored
Merge pull request #167 from Draconna/improve-context-share-errors
Improve context share link error handling
2 parents ab9c205 + 1d3485b commit d02aba5

2 files changed

Lines changed: 69 additions & 26 deletions

File tree

server.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ async def scrape_chat_link(req: ScrapeRequest):
196196
{
197197
"status": "error",
198198
"data": None,
199-
"error": "Failed to extract messages from the provided link.",
199+
"error": _chat_share_error_message(result),
200200
"elapsed_ms": elapsed,
201201
},
202202
status_code=400,
@@ -532,17 +532,34 @@ def _build_memory_domain(judge: Any, weaver: Any) -> dict[str, Any] | None:
532532
}
533533

534534

535-
def _detect_chat_provider(url: str) -> str:
536-
lowered = url.lower()
537-
if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered:
538-
return "chatgpt"
539-
if "claude.ai" in lowered:
540-
return "claude"
541-
if "gemini.google.com" in lowered or "g.co/gemini" in lowered:
542-
return "gemini"
535+
def _detect_chat_provider(*urls: str) -> str:
536+
for url in urls:
537+
lowered = (url or "").lower()
538+
if not lowered:
539+
continue
540+
if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered:
541+
return "chatgpt"
542+
if "claude.ai" in lowered or "claude.com" in lowered:
543+
return "claude"
544+
if "gemini.google.com" in lowered or "g.co/gemini" in lowered:
545+
return "gemini"
543546
return "unknown"
544547

545548

549+
def _chat_share_error_message(result: dict[str, Any]) -> str:
550+
provider = result.get("provider") or "unknown"
551+
if provider == "unknown":
552+
return (
553+
"Failed to extract messages from the provided link. "
554+
"Please provide a public ChatGPT, Claude, or Gemini share link."
555+
)
556+
557+
return (
558+
f"Failed to extract messages from the provided {provider} share link. "
559+
"Please confirm the link is public, exists, and is not redirecting to a login or deleted-chat page."
560+
)
561+
562+
546563
async def _render_chat_share(url: str) -> tuple[str, str]:
547564
return await asyncio.to_thread(_render_chat_share_sync, url)
548565

@@ -595,7 +612,7 @@ def _block_heavy_assets(route):
595612
except Exception as exc:
596613
print(f"[scrape] navigation warning: {exc}", flush=True)
597614

598-
provider = _detect_chat_provider(page.url or url)
615+
provider = _detect_chat_provider(page.url, url)
599616
selector = {
600617
"chatgpt": "div[data-message-author-role]",
601618
"claude": "script",
@@ -617,8 +634,12 @@ def _block_heavy_assets(route):
617634
return html, final_url
618635

619636

620-
def _extract_chat_pairs(url: str, html: str) -> tuple[str, str, list[dict[str, str]]]:
621-
provider = _detect_chat_provider(url)
637+
def _extract_chat_pairs(
638+
url: str,
639+
html: str,
640+
source_url: str = "",
641+
) -> tuple[str, str, list[dict[str, str]]]:
642+
provider = _detect_chat_provider(url, source_url)
622643
soup = BeautifulSoup(html, "html.parser")
623644
pairs: list[dict[str, str]] = []
624645
extraction_method = "none"
@@ -859,7 +880,7 @@ def _parse_transcript_text(text: str) -> tuple[str, list[dict[str, str]]]:
859880

860881
async def _scrape_chat_share(url: str) -> dict[str, Any]:
861882
html, final_url = await _render_chat_share(url)
862-
provider, extraction_method, pairs = _extract_chat_pairs(final_url or url, html)
883+
provider, extraction_method, pairs = _extract_chat_pairs(final_url or url, html, url)
863884

864885
return {
865886
"provider": provider,

src/api/routes/memory.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,34 @@ def _error(request: Request, detail: str, code: int, elapsed_ms: float = 0) -> J
108108
return JSONResponse(content=body.model_dump(), status_code=code)
109109

110110

111-
def _detect_chat_provider(url: str) -> str:
112-
lowered = url.lower()
113-
if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered:
114-
return "chatgpt"
115-
if "claude.ai" in lowered:
116-
return "claude"
117-
if "gemini.google.com" in lowered or "g.co/gemini" in lowered:
118-
return "gemini"
111+
def _detect_chat_provider(*urls: str) -> str:
112+
for url in urls:
113+
lowered = (url or "").lower()
114+
if not lowered:
115+
continue
116+
if "chatgpt.com" in lowered or "chat.openai.com" in lowered or "openai.com" in lowered:
117+
return "chatgpt"
118+
if "claude.ai" in lowered or "claude.com" in lowered:
119+
return "claude"
120+
if "gemini.google.com" in lowered or "g.co/gemini" in lowered:
121+
return "gemini"
119122
return "unknown"
120123

121124

125+
def _chat_share_error_message(result: Dict[str, Any]) -> str:
126+
provider = result.get("provider") or "unknown"
127+
if provider == "unknown":
128+
return (
129+
"Failed to extract messages from the provided link. "
130+
"Please provide a public ChatGPT, Claude, or Gemini share link."
131+
)
132+
133+
return (
134+
f"Failed to extract messages from the provided {provider} share link. "
135+
"Please confirm the link is public, exists, and is not redirecting to a login or deleted-chat page."
136+
)
137+
138+
122139
async def _render_chat_share(url: str) -> tuple[str, str]:
123140
return await asyncio.to_thread(_render_chat_share_sync, url)
124141

@@ -206,7 +223,7 @@ def _block_heavy_assets(route):
206223
except Exception as exc:
207224
logger.warning("Timeout or error during navigation: %s", exc)
208225

209-
provider = _detect_chat_provider(page.url or url)
226+
provider = _detect_chat_provider(page.url, url)
210227
selector = {
211228
"chatgpt": "div[data-message-author-role]",
212229
"claude": "script",
@@ -230,8 +247,12 @@ def _block_heavy_assets(route):
230247
return html, final_url
231248

232249

233-
def _extract_chat_pairs(url: str, html: str) -> tuple[str, str, List[MessagePair]]:
234-
provider = _detect_chat_provider(url)
250+
def _extract_chat_pairs(
251+
url: str,
252+
html: str,
253+
source_url: str = "",
254+
) -> tuple[str, str, List[MessagePair]]:
255+
provider = _detect_chat_provider(url, source_url)
235256
soup = BeautifulSoup(html, "html.parser")
236257
pairs: List[MessagePair] = []
237258
extraction_method = "none"
@@ -512,7 +533,7 @@ def _parse_transcript_text(text: str) -> tuple[str, List[MessagePair]]:
512533

513534
async def _scrape_chat_share(url: str) -> Dict[str, Any]:
514535
html, final_url = await _render_chat_share(url)
515-
provider, extraction_method, pairs = _extract_chat_pairs(final_url or url, html)
536+
provider, extraction_method, pairs = _extract_chat_pairs(final_url or url, html, url)
516537

517538
return {
518539
"provider": provider,
@@ -757,7 +778,8 @@ async def scrape_chat_link(req: ScrapeRequest, request: Request):
757778
pairs = result["pairs"]
758779

759780
if not pairs:
760-
return _error(request, "Failed to extract messages from the provided link.", 400)
781+
elapsed = round((time.perf_counter() - start) * 1000, 2)
782+
return _error(request, _chat_share_error_message(result), 400, elapsed)
761783

762784
data = ScrapeResponse(pairs=pairs)
763785
elapsed = round((time.perf_counter() - start) * 1000, 2)

0 commit comments

Comments
 (0)