Skip to content

Commit 65eed21

Browse files
committed
Use native wheel scrolling for timelines
1 parent 6088121 commit 65eed21

1 file changed

Lines changed: 61 additions & 34 deletions

File tree

scraper.py

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from selenium.webdriver.chrome.options import Options
1414
from selenium.webdriver.common.by import By
1515
from selenium.webdriver.common.keys import Keys
16+
from selenium.webdriver.common.action_chains import ActionChains
1617
from selenium.webdriver.support.ui import WebDriverWait
1718
from selenium.webdriver.support import expected_conditions as EC
1819
from selenium.common.exceptions import (
@@ -760,43 +761,75 @@ def _get_article_content(self, tweet_url: str) -> str:
760761
def _scroll_down(self):
761762
"""Sayfayı aşağı kaydır ve X'in sanal timeline DOM'unu tetikle."""
762763
before = self._timeline_snapshot()
763-
old_articles = before["articles"]
764764

765-
# X aynı sayıda article tutup içerikleri değiştirebildiği için sadece
766-
# article sayısına veya scroll height'a bakmak erken "sayfa sonu" üretir.
765+
# X timeline React/virtualized bir liste. Bazı profillerde window.scrollBy
766+
# tek başına hiçbir şeyi tetiklemiyor; gerçek wheel input daha güvenilir.
767+
for attempt in range(4):
768+
self._perform_timeline_scroll(attempt + 1)
769+
time.sleep(random.uniform(SCROLL_PAUSE_MIN, SCROLL_PAUSE_MAX))
770+
771+
for _ in range(10):
772+
after = self._timeline_snapshot()
773+
if self._timeline_advanced(before, after):
774+
return True
775+
time.sleep(0.3)
776+
777+
return False
778+
779+
def _perform_timeline_scroll(self, intensity: int = 1) -> None:
780+
"""Birden fazla scroll yöntemi dene; X her yönteme aynı cevap vermiyor."""
781+
delta = 900 * max(1, intensity)
782+
767783
try:
768-
if old_articles:
769-
self.driver.execute_script(
770-
"arguments[0].scrollIntoView({block: 'end', behavior: 'instant'});",
771-
old_articles[-1],
772-
)
773-
time.sleep(0.25)
784+
self.driver.execute_script("window.focus();")
774785
except Exception:
775786
pass
776787

777788
try:
778-
for _ in range(3):
779-
self.driver.execute_script("window.scrollBy(0, window.innerHeight);")
780-
time.sleep(0.2)
789+
ActionChains(self.driver).scroll_by_amount(0, delta).perform()
790+
time.sleep(0.15)
781791
except Exception:
782-
self.driver.execute_script("window.scrollBy(0, 1400);")
792+
pass
783793

784794
try:
785-
body = self.driver.find_element(By.TAG_NAME, "body")
786-
body.send_keys(Keys.PAGE_DOWN)
787-
time.sleep(0.2)
795+
self.driver.execute_cdp_cmd(
796+
"Input.dispatchMouseEvent",
797+
{
798+
"type": "mouseWheel",
799+
"x": 600,
800+
"y": 600,
801+
"deltaX": 0,
802+
"deltaY": delta,
803+
},
804+
)
805+
time.sleep(0.15)
788806
except Exception:
789807
pass
790808

791-
time.sleep(random.uniform(SCROLL_PAUSE_MIN, SCROLL_PAUSE_MAX))
792-
793-
for _ in range(24):
794-
after = self._timeline_snapshot()
795-
if self._timeline_advanced(before, after):
796-
return True
797-
time.sleep(0.35)
809+
try:
810+
self.driver.execute_script(
811+
"""
812+
const delta = arguments[0];
813+
window.dispatchEvent(new WheelEvent('wheel', {
814+
deltaY: delta,
815+
bubbles: true,
816+
cancelable: true
817+
}));
818+
const scroller = document.scrollingElement || document.documentElement || document.body;
819+
scroller.scrollBy(0, delta);
820+
""",
821+
delta,
822+
)
823+
time.sleep(0.15)
824+
except Exception:
825+
pass
798826

799-
return False
827+
try:
828+
body = self.driver.find_element(By.TAG_NAME, "body")
829+
body.send_keys(Keys.PAGE_DOWN)
830+
time.sleep(0.15)
831+
except Exception:
832+
pass
800833

801834
def _timeline_snapshot(self) -> Dict:
802835
"""DOM ve scroll durumunu tek yerde ölç."""
@@ -916,19 +949,13 @@ def _scroll_recovery(self):
916949
try:
917950
body = self.driver.find_element(By.TAG_NAME, "body")
918951
body.click()
919-
for _ in range(6):
920-
body.send_keys(Keys.PAGE_DOWN)
921-
time.sleep(0.35)
922-
body.send_keys(Keys.END)
923-
time.sleep(1.0)
952+
time.sleep(0.2)
924953
except Exception:
925954
pass
926955

927-
try:
928-
self.driver.execute_script("window.scrollBy(0, document.documentElement.clientHeight * 4);")
929-
time.sleep(1.0)
930-
except Exception:
931-
pass
956+
for intensity in (2, 3, 4, 5):
957+
self._perform_timeline_scroll(intensity)
958+
time.sleep(0.4)
932959

933960
def _scroll_to_bottom(self):
934961
"""Sayfanın en altına git"""

0 commit comments

Comments
 (0)