-
Notifications
You must be signed in to change notification settings - Fork 753
Expand file tree
/
Copy pathset_headers.py
More file actions
33 lines (24 loc) · 1.14 KB
/
set_headers.py
File metadata and controls
33 lines (24 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import asyncio
from crawlee import HttpHeaders, Request
from crawlee.crawlers import HttpCrawler, HttpCrawlingContext
from crawlee.http_clients import ImpitHttpClient
async def main() -> None:
# Set default headers on the client. They are sent on every request.
http_client = ImpitHttpClient(headers={'X-Api-Key': 'secret'})
crawler = HttpCrawler(http_client=http_client)
@crawler.router.default_handler
async def request_handler(context: HttpCrawlingContext) -> None:
# `httpbin.org/headers` echoes the received request headers back.
response = (await context.http_response.read()).decode()
context.log.info(response)
# Add a header for this request only. It merges with the client defaults.
request = Request.from_url(
'https://httpbin.org/headers',
headers=HttpHeaders({'Accept': 'application/json'}),
# Both requests target the same URL. Without a distinct `unique_key`,
# deduplication would drop this one.
unique_key='set-headers-example',
)
await crawler.run(['https://httpbin.org/headers', request])
if __name__ == '__main__':
asyncio.run(main())