Skip to content

Commit e337f26

Browse files
committed
Add examples of scraping Hacker News
1 parent 72bdaf1 commit e337f26

File tree

4 files changed

+54
-0
lines changed

4 files changed

+54
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import asyncio
2+
from playwright.async_api import async_playwright
3+
from seleniumbase import cdp_driver
4+
5+
6+
async def main():
7+
driver = await cdp_driver.start_async()
8+
endpoint_url = driver.get_endpoint_url()
9+
10+
async with async_playwright() as p:
11+
browser = await p.chromium.connect_over_cdp(endpoint_url)
12+
context = browser.contexts[0]
13+
page = context.pages[0]
14+
url = "https://news.ycombinator.com/submitted?id=seleniumbase"
15+
await page.goto(url)
16+
items = page.locator("span.titleline > a")
17+
for i in range(await (items.count())):
18+
item_text = await (items.nth(i)).inner_text()
19+
print("* " + item_text)
20+
21+
22+
if __name__ == "__main__":
23+
loop = asyncio.new_event_loop()
24+
loop.run_until_complete(main())
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from playwright.sync_api import sync_playwright
2+
from seleniumbase import sb_cdp
3+
4+
sb = sb_cdp.Chrome()
5+
endpoint_url = sb.get_endpoint_url()
6+
7+
with sync_playwright() as p:
8+
browser = p.chromium.connect_over_cdp(endpoint_url)
9+
context = browser.contexts[0]
10+
page = context.pages[0]
11+
page.goto("https://news.ycombinator.com/submitted?id=seleniumbase")
12+
items = page.locator("span.titleline > a")
13+
for i in range(items.count()):
14+
item_text = items.nth(i).inner_text()
15+
print("* " + item_text)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from seleniumbase import sb_cdp
2+
3+
url = "https://news.ycombinator.com/submitted?id=seleniumbase"
4+
sb = sb_cdp.Chrome(url)
5+
elements = sb.find_elements("span.titleline > a")
6+
for element in elements:
7+
print("* " + element.text)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from seleniumbase import SB
2+
3+
with SB(uc=True) as sb:
4+
url = "https://news.ycombinator.com/submitted?id=seleniumbase"
5+
sb.activate_cdp_mode(url)
6+
elements = sb.find_elements("span.titleline > a")
7+
for element in elements:
8+
print("* " + element.text)

0 commit comments

Comments
 (0)