-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathspider_crawl.py
More file actions
29 lines (20 loc) · 925 Bytes
/
spider_crawl.py
File metadata and controls
29 lines (20 loc) · 925 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
if TYPE_CHECKING:
from collections.abc import Generator
from scrapy.http.response import Response
class CrawlProductSpider(CrawlSpider):
name = 'crawl_product_spider'
rules = (Rule(LinkExtractor(allow=r'/products/'), callback='parse_product'),)
def __init__(self, start_urls: list[str], *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.start_urls = start_urls
def parse_product(self, response: Response) -> Generator[dict, None, None]:
yield {
'url': response.url,
'name': response.css('h1::text').get(''),
'price': response.css('span.price::text').get(''),
'description': response.css('p.description::text').get(''),
}