-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Expand file tree
/
Copy pathxiladaili.py
More file actions
32 lines (25 loc) · 805 Bytes
/
xiladaili.py
File metadata and controls
32 lines (25 loc) · 805 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from proxypool.schemas.proxy import Proxy
from proxypool.crawlers.base import BaseCrawler
from lxml import etree
BASE_URL = "http://www.xiladaili.com/"
MAX_PAGE = 5
class XiladailiCrawler(BaseCrawler):
"""
xiladaili crawler, http://www.xiladaili.com/
"""
urls = ["http://www.xiladaili.com/"]
def parse(self, html):
"""
parse html file to get proxies
:return:
"""
etree_html = etree.HTML(html)
ip_ports = etree_html.xpath("//tbody/tr/td[1]/text()")
for ip_port in ip_ports:
host = ip_port.partition(":")[0]
port = ip_port.partition(":")[2]
yield Proxy(host=host, port=port)
if __name__ == '__main__':
crawler = XiladailiCrawler()
for proxy in crawler.run():
print(proxy)