-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathspider_custom_pipeline.py
More file actions
34 lines (24 loc) · 1.13 KB
/
spider_custom_pipeline.py
File metadata and controls
34 lines (24 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from scrapy import Request, Spider
if TYPE_CHECKING:
from collections.abc import Generator
from scrapy.http.response import Response
class CustomPipelineSpider(Spider):
name = 'custom_pipeline_spider'
def __init__(self, start_urls: list[str], *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.start_urls = start_urls
def start_requests(self) -> Generator[Request, None, None]:
for url in self.start_urls:
yield Request(url, callback=self.parse)
def parse(self, response: Response) -> Generator[Request, None, None]:
for link in response.css('a[href*="/products/"]::attr(href)').getall():
yield response.follow(link, callback=self.parse_product)
def parse_product(self, response: Response) -> Generator[dict, None, None]:
yield {
'url': response.url,
'name': response.css('h1::text').get(''),
'price': response.css('span.price::text').get(''),
'description': response.css('p.description::text').get(''),
}