@@ -33,23 +33,25 @@ def __init__(self, total: int = 10, proxy_timeout: int = 3, crawl_interval: int
3333 set_logger_config (log_config )
3434 self ._process = None
3535
36- async def _filter_urls (self , urls : list ) -> list :
36+ async def _filter_urls (self , urls : list , is_https : bool = False ) -> list :
3737 """
3838 Filter can't use proxy connections.
3939 :param urls: The proxy URLs list. Example: ["http://1234:9090","https://1234:9090"]
40+ :param is_https: True or False, default False.
41+ This will determine whether to test the connection using the HTTPS protocol.
4042 :return: Filtered URLs.
4143 """
4244
4345 async def verify_proxy (proxy_url : str ):
4446 """
4547 Verify proxy connection can be connected.
46- :param proxy_url: If url starts with "https:// " will access self.test_https_web,
47- otherwise access self.. test_http_web.
48+ :param proxy_url: If protocol is "https" will access self.test_https_web,
49+ otherwise access self.test_http_web.
4850 :return: Only test website response status equal 200 will return url, otherwise return None.
4951 """
5052 try :
5153 async with aiohttp .ClientSession (headers = REQUEST_HEADERS , timeout = self .proxy_timeout ) as session :
52- if proxy_url . startswith ( "https://" ) :
54+ if is_https :
5355 async with session .get (self .test_https_web , proxy = proxy_url ) as resp :
5456 return proxy_url if resp .status == 200 else None
5557 else :
@@ -69,11 +71,11 @@ async def acquire_url_list(self):
6971 for spider in self ._spiders :
7072 if len (self ._http_list ) < self .total :
7173 all_urls = await spider .get_http_urls ()
72- urls = await self ._filter_urls (all_urls )
74+ urls = await self ._filter_urls (all_urls , is_https = False )
7375 self ._http_list .extend (urls )
7476 if len (self ._https_list ) < self .total :
7577 all_urls = await spider .get_https_urls ()
76- urls = await self ._filter_urls (all_urls )
78+ urls = await self ._filter_urls (all_urls , is_https = True )
7779 self ._https_list .extend (urls )
7880
7981 async def main (self ):
@@ -101,7 +103,7 @@ def set_spiders(self, spiders: list):
101103 def get_http_urls (self , nums : int = 0 ) -> list :
102104 """
103105 Choice a number of http urls.
104- :param nums: The number of urls what you want to use.
106+ :param nums: The number of urls what you want to use.If nums is 0, will return all proxies.
105107 Note: If nums greater than current list size then maybe get repeat url.
106108 :return: Http proxy list.
107109 """
@@ -124,7 +126,7 @@ def get_https_urls(self, nums: int = 0) -> list:
124126 """
125127 Choice a number of https urls.
126128 :param nums: The number of urls what you want to use.
127- Note: If nums greater than current list size then maybe get repeat url.
129+ Note: If nums greater than current list size then maybe get repeat url.If nums is 0, will return all proxies.
128130 :return: Https proxy list.
129131 """
130132 temp_list = self ._https_list [:]
0 commit comments