Skip to content

Commit e1da680

Browse files
author
linzeen
committed
[modify] Fix BUG and add some annotations.
1 parent 7349020 commit e1da680

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

simple_proxy_pool/proxy_pool.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,25 @@ def __init__(self, total: int = 10, proxy_timeout: int = 3, crawl_interval: int
3333
set_logger_config(log_config)
3434
self._process = None
3535

36-
async def _filter_urls(self, urls: list) -> list:
36+
async def _filter_urls(self, urls: list, is_https: bool = False) -> list:
3737
"""
3838
Filter can't use proxy connections.
3939
:param urls: The proxy URLs list. Example: ["http://1234:9090","https://1234:9090"]
40+
:param is_https: True or False, default False.
41+
This will determine whether to test the connection using the HTTPS protocol.
4042
:return: Filtered URLs.
4143
"""
4244

4345
async def verify_proxy(proxy_url: str):
4446
"""
4547
Verify proxy connection can be connected.
46-
:param proxy_url: If url starts with "https://" will access self.test_https_web,
47-
otherwise access self..test_http_web.
48+
:param proxy_url: If protocol is "https" will access self.test_https_web,
49+
otherwise access self.test_http_web.
4850
:return: Only test website response status equal 200 will return url, otherwise return None.
4951
"""
5052
try:
5153
async with aiohttp.ClientSession(headers=REQUEST_HEADERS, timeout=self.proxy_timeout) as session:
52-
if proxy_url.startswith("https://"):
54+
if is_https:
5355
async with session.get(self.test_https_web, proxy=proxy_url) as resp:
5456
return proxy_url if resp.status == 200 else None
5557
else:
@@ -69,11 +71,11 @@ async def acquire_url_list(self):
6971
for spider in self._spiders:
7072
if len(self._http_list) < self.total:
7173
all_urls = await spider.get_http_urls()
72-
urls = await self._filter_urls(all_urls)
74+
urls = await self._filter_urls(all_urls, is_https=False)
7375
self._http_list.extend(urls)
7476
if len(self._https_list) < self.total:
7577
all_urls = await spider.get_https_urls()
76-
urls = await self._filter_urls(all_urls)
78+
urls = await self._filter_urls(all_urls, is_https=True)
7779
self._https_list.extend(urls)
7880

7981
async def main(self):
@@ -101,7 +103,7 @@ def set_spiders(self, spiders: list):
101103
def get_http_urls(self, nums: int = 0) -> list:
102104
"""
103105
Choice a number of http urls.
104-
:param nums: The number of urls what you want to use.
106+
:param nums: The number of urls what you want to use.If nums is 0, will return all proxies.
105107
Note: If nums greater than current list size then maybe get repeat url.
106108
:return: Http proxy list.
107109
"""
@@ -124,7 +126,7 @@ def get_https_urls(self, nums: int = 0) -> list:
124126
"""
125127
Choice a number of https urls.
126128
:param nums: The number of urls what you want to use.
127-
Note: If nums greater than current list size then maybe get repeat url.
129+
Note: If nums greater than current list size then maybe get repeat url.If nums is 0, will return all proxies.
128130
:return: Https proxy list.
129131
"""
130132
temp_list = self._https_list[:]

0 commit comments

Comments
 (0)