-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path00_getting_started.py
More file actions
67 lines (45 loc) · 1.42 KB
/
00_getting_started.py
File metadata and controls
67 lines (45 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python3
"""Getting started with EasyScrape.
Run: python 00_getting_started.py
"""
import easyscrape as es
def basic_fetch():
"""Fetch a page and check the response."""
result = es.scrape("https://httpbin.org/html")
print(f"Status: {result.status_code}")
print(f"Content-Type: {result.headers.get('content-type', 'unknown')}")
print(f"Size: {len(result.text):,} bytes")
return result
def css_selectors(result):
"""Extract elements using CSS selectors."""
heading = result.css("h1")
print(f"Heading: {heading}")
paragraphs = result.css_list("p")
print(f"Paragraphs: {len(paragraphs)}")
def builtin_helpers(result):
"""Use convenience methods for common tasks."""
print(f"Title: {result.title()}")
main = result.main_text()[:100].replace("\n", " ")
print(f"Main text: {main}...")
links = result.safe_links()
print(f"Safe links: {len(links)}")
def json_api():
"""Fetch and parse JSON."""
result = es.scrape("https://httpbin.org/json")
data = result.json()
print(f"Keys: {list(data.keys())}")
def main():
print("1. Basic Fetch")
print("-" * 40)
result = basic_fetch()
print("\n2. CSS Selectors")
print("-" * 40)
css_selectors(result)
print("\n3. Built-in Helpers")
print("-" * 40)
builtin_helpers(result)
print("\n4. JSON API")
print("-" * 40)
json_api()
if __name__ == "__main__":
main()