Skip to content

Commit 181ff43

Browse files
committed
Preserve partial results when browser closes
1 parent 65eed21 commit 181ff43

3 files changed

Lines changed: 49 additions & 3 deletions

File tree

diagnostics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"timeline_empty": "No tweet articles were detected in the loaded timeline.",
2828
"timeline_stalled": "The timeline stopped advancing before the requested amount was collected.",
2929
"partial_target_not_met": "The scrape ended with fewer items than requested.",
30+
"browser_window_closed": "The browser window closed or ChromeDriver lost the active web view during scraping.",
3031
"tweet_parse_failed": "A tweet article was detected but required fields could not be parsed.",
3132
"full_text_failed": "A long tweet was detected but full text extraction returned no content.",
3233
"article_extraction_failed": "An X Article was detected but article content extraction returned no content.",

main.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,16 @@ def main():
582582
import traceback
583583
traceback.print_exc()
584584

585+
if scraper and hasattr(scraper, 'tweets_collected') and scraper.tweets_collected and not tweets:
586+
tweets = scraper.tweets_collected
587+
588+
error_text = str(e).lower()
589+
error_reason = (
590+
"browser_window_closed"
591+
if "no such window" in error_text or "web view not found" in error_text
592+
else "unknown_error"
593+
)
594+
585595
# Hata durumunda da kaydetmeyi dene
586596
if tweets and config:
587597
print(f"\nHataya rağmen {len(tweets)} tweet kaydediliyor...")
@@ -609,10 +619,11 @@ def main():
609619
"export_saving",
610620
"warning",
611621
"Error export saved after exception",
622+
reason=error_reason,
612623
path=output_path,
613624
total_tweets=len(tweets),
614625
)
615-
save_cli_run_log(run_log, "failed")
626+
save_cli_run_log(run_log, "partial")
616627
except:
617628
if run_log:
618629
record_event(
@@ -630,7 +641,7 @@ def main():
630641
"unknown_error",
631642
"error",
632643
f"Unhandled exception: {e}",
633-
reason="unknown_error",
644+
reason=error_reason,
634645
)
635646
save_cli_run_log(run_log, "failed")
636647

scraper.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
from selenium.common.exceptions import (
2020
TimeoutException,
2121
NoSuchElementException,
22+
NoSuchWindowException,
2223
StaleElementReferenceException,
24+
WebDriverException,
2325
)
2426
from webdriver_manager.chrome import ChromeDriverManager
2527

@@ -983,6 +985,7 @@ def scrape_by_count(self, count: int) -> List[Tweet]:
983985
max_recovery_attempts = 3
984986
scan_cycles = 0
985987
max_scan_cycles = max(60, count * 8)
988+
browser_lost = False
986989

987990
try:
988991
while len(self.tweets_collected) < count:
@@ -1093,9 +1096,40 @@ def scrape_by_count(self, count: int) -> List[Tweet]:
10931096
except KeyboardInterrupt:
10941097
print(f"\n\nDurduruldu! {len(self.tweets_collected)} tweet toplandı.")
10951098
raise # Ana programa ilet
1099+
except NoSuchWindowException as e:
1100+
browser_lost = True
1101+
print(f"\nBrowser penceresi kapandı veya Chrome bağlantısı koptu. {len(self.tweets_collected)} tweet kısmi sonuç olarak kullanılacak.")
1102+
record_event(
1103+
self.run_log,
1104+
"browser",
1105+
"error" if not self.tweets_collected else "warning",
1106+
f"Browser window closed during count scrape: {e}",
1107+
reason="browser_window_closed",
1108+
collected=len(self.tweets_collected),
1109+
target=count,
1110+
scan_cycles=scan_cycles,
1111+
)
1112+
except WebDriverException as e:
1113+
if "no such window" not in str(e).lower() and "web view not found" not in str(e).lower():
1114+
raise
1115+
browser_lost = True
1116+
print(f"\nChrome webview kayboldu. {len(self.tweets_collected)} tweet kısmi sonuç olarak kullanılacak.")
1117+
record_event(
1118+
self.run_log,
1119+
"browser",
1120+
"error" if not self.tweets_collected else "warning",
1121+
f"Chrome webview was lost during count scrape: {e}",
1122+
reason="browser_window_closed",
1123+
collected=len(self.tweets_collected),
1124+
target=count,
1125+
scan_cycles=scan_cycles,
1126+
)
10961127

10971128
# Scroll bitti, şimdi show more olan tweetlerin tam metnini al
1098-
self._process_show_more_tweets()
1129+
if browser_lost:
1130+
print("Browser kapandığı için show more/article tam metin alma adımı atlandı.")
1131+
else:
1132+
self._process_show_more_tweets()
10991133

11001134
print(f"Toplam {len(self.tweets_collected)} tweet toplandı.")
11011135
if not self.tweets_collected:

0 commit comments

Comments
 (0)