Skip to content

Commit ea09958

Browse files
Added --format option that you can use in order to tell the ProxyEater what format you want your proxies in the text output file to have. E.g: {ip}:{port}
1 parent 8cf43d2 commit ea09958

5 files changed

Lines changed: 105 additions & 44 deletions

File tree

ProxyEater/Proxy.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class Proxy:
5959
def __init__(self, ip: str, port: int, type_: ProxyType) -> None:
6060
self.ip: str = ip
6161
self.port: int = port
62+
if isinstance(type_, str):
63+
type_ = ProxyType.from_name(type_)
6264
self.type: ProxyType = type_
6365

6466
def check_status(self, timeout: int = 10, url: str = 'http://icanhazip.com/', on_success_callback: _Callable = None,
@@ -179,9 +181,20 @@ def __dict__(self) -> dict:
179181
'port': self.port,
180182
'type': self.type.name,
181183
'status': self.status.name,
184+
'scheme': self.type.name.lower(),
182185
'geolocation_info': self.geolocation_info
183186
}
184187

188+
def __iter__(self):
189+
return iter((
190+
('ip', self.ip),
191+
('port', self.port),
192+
('type', self.type.name),
193+
('status', self.status.name),
194+
('scheme', self.type.name.lower()),
195+
('geolocation_info', self.geolocation_info)
196+
))
197+
185198

186199
class ProxyList(set):
187200
def __init__(self, proxies: _Iterable[Proxy] = None):
@@ -256,38 +269,50 @@ def check_proxy(proxy_: Proxy):
256269

257270
on_progress_callback(self, 100)
258271

259-
def to_text(self, separator: str = "\n") -> str:
272+
def to_text(self, separator: str = "\n", format_: str = '{scheme}://{ip}:{port}') -> str:
260273
"""
261274
This method is used to convert the list to a text string.
275+
276+
:param separator: The separator between proxies.
277+
:param format_: The format of each proxy.
278+
:return: The text string.
262279
"""
263-
return separator.join(str(proxy) for proxy in self)
280+
return separator.join(format_.format(**dict(proxy)) for proxy in self)
264281

265282
def batch_collect_geolocations(self, fields: str = 'status,message,continent,continentCode,country,countryCode,'
266283
'region,regionName,city,zip,lat,lon,timezone,isp,org,as,asname,'
267-
'query', on_progress_callback: _Callable = None):
284+
'query', on_progress_callback: _Callable = None,
285+
on_error_callback: _Callable = None) -> None:
268286
"""
269287
This method is used to collect the geolocation of all proxies in the list.
270288
271289
:param fields: The fields to be returned.
272290
:param on_progress_callback: A callback function to be called on each progress.
291+
:param on_error_callback: A callback function to be called on each error.
273292
"""
274293
if on_progress_callback is not None:
275294
if not callable(on_progress_callback):
276-
raise TypeError(
277-
"ProxyList.batch_collect_geolocations() argument on_progress_callback must be a callable.")
295+
raise TypeError("ProxyList.batch_collect_geolocations() argument on_progress_callback must be a"
296+
" callable.")
278297
else:
279298
on_progress_callback = lambda proxy_list, progress: None
299+
if on_error_callback is not None:
300+
if not callable(on_error_callback):
301+
raise TypeError("ProxyList.batch_collect_geolocations() argument on_error_callback must be a callable.")
302+
else:
303+
on_error_callback = lambda proxy_list, error: None
280304
all_proxies = list(self)
281305
for start_index in range(0, len(self), 100):
282-
end_index = start_index + 100
306+
end_index = start_index + 100 if start_index + 100 < len(self) else len(self)
283307
proxies = all_proxies[start_index:end_index]
284308
try:
285-
response = requests.post(url=f"http://ip-api.com/batch?fields={fields}", data=proxies)
309+
response = requests.post(url=f"http://ip-api.com/batch?fields={fields}",
310+
json=[proxy.ip for proxy in proxies]).json()
286311
for index, proxy in enumerate(proxies):
287-
proxy.geolocation_info = response.json()[index]
312+
proxy.geolocation_info = response[index]
313+
on_progress_callback(self, end_index / len(self) * 100)
288314
except Exception as e:
289-
raise Exception("Failed to collect geolocation information.", e)
290-
on_progress_callback(self, (end_index + 1) / len(self) * 100)
315+
on_error_callback(self, Exception("Failed to collect geolocation information.", e))
291316

292317
def to_json(self, indent: int = 4, include_status: bool = True, include_geolocation: bool = True) -> str:
293318
"""
@@ -315,15 +340,17 @@ def to_json(self, indent: int = 4, include_status: bool = True, include_geolocat
315340

316341
return json.dumps(proxies, indent=indent)
317342

318-
def to_text_file(self, filename: _Union[str, os.PathLike], separator: str = "\n") -> None:
343+
def to_text_file(self, filename: _Union[str, os.PathLike], separator: str = "\n",
344+
format_: str = '{scheme}://{ip}:{port}') -> None:
319345
"""
320346
This method is used to write the list to a text file.
321347
322348
:param filename: The name of the text file.
323349
:param separator: The separator of the text file.
350+
:param format_: The format of each proxy.
324351
"""
325352
with open(filename, 'w') as f:
326-
f.write(self.to_text(separator))
353+
f.write(self.to_text(separator, format_))
327354

328355
def to_json_file(self, filename: _Union[str, os.PathLike], indent: int = 4, include_status: bool = True,
329356
include_geolocation: bool = True) -> None:

ProxyEater/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ProxyEater
22
# CodeWriter21
33

4-
__version__ = "1.4.5"
4+
__version__ = "1.5.0"
55
__author__ = "CodeWriter21"
66
__email__ = "CodeWriter21@gmail.com"
77
__license__ = "Apache-2.0"

ProxyEater/__main__.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,28 @@ def checking_callback(proxy_list: ProxyList, progress: float):
102102
logger.info(f'Scraped {len(proxies)} proxies.')
103103

104104
if proxies.count > 0:
105+
if args.include_geolocation:
106+
on_progress_callback = on_error_callback = None
107+
if args.verbose:
108+
logger.progress_bar = log21.ProgressBar()
109+
110+
def on_progress_callback(proxy_list: ProxyList, progress: float):
111+
logger.progress_bar(progress, 100)
112+
113+
def on_error_callback(proxy_list: ProxyList, error: Exception):
114+
logger.error(f'{error.__class__.__name__}: {error}')
115+
logger.info('Getting the geolocation info of the proxies...')
116+
proxies.batch_collect_geolocations(on_progress_callback=on_progress_callback,
117+
on_error_callback=on_error_callback)
105118
if args.verbose:
106119
logger.info(f'Writing {proxies.count} proxies to {args.output}...')
107120
# Write to file
108-
if args.format == 'text':
109-
proxies.to_text_file(args.output, '\n')
110-
elif args.format == 'json':
121+
if args.file_format == 'text':
122+
proxies.to_text_file(args.output, '\n', format_=args.format)
123+
elif args.file_format == 'json':
111124
proxies.to_json_file(args.output, include_status=args.include_status,
112125
include_geolocation=args.include_geolocation)
113-
elif args.format == 'csv':
126+
elif args.file_format == 'csv':
114127
proxies.to_csv_file(args.output, include_status=args.include_status,
115128
include_geolocation=args.include_geolocation)
116129
if proxies.count > 0:
@@ -159,13 +172,26 @@ def checking_callback(proxy_list: ProxyList, progress: float):
159172
logger.info(f'Alive proxies: {proxies.count}')
160173

161174
if proxies.count > 0:
175+
if args.include_geolocation:
176+
on_progress_callback = on_error_callback = None
177+
if args.verbose:
178+
logger.progress_bar = log21.ProgressBar()
179+
180+
def on_progress_callback(proxy_list: ProxyList, progress: float):
181+
logger.progress_bar(progress, 100)
182+
183+
def on_error_callback(proxy_list: ProxyList, error: Exception):
184+
logger.error(f'{error.__class__.__name__}: {error}')
185+
logger.info('Getting the geolocation info of the proxies...')
186+
proxies.batch_collect_geolocations(on_progress_callback=on_progress_callback,
187+
on_error_callback=on_error_callback)
162188
# Write to file
163-
if args.format == 'text':
164-
proxies.to_text_file(args.output, '\n')
165-
elif args.format == 'json':
189+
if args.file_format == 'text':
190+
proxies.to_text_file(args.output, '\n', format_=args.format)
191+
elif args.file_format == 'json':
166192
proxies.to_json_file(args.output, include_status=args.include_status,
167193
include_geolocation=args.include_geolocation)
168-
elif args.format == 'csv':
194+
elif args.file_format == 'csv':
169195
proxies.to_csv_file(args.output, include_status=args.include_status,
170196
include_geolocation=args.include_geolocation)
171197
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
@@ -177,8 +203,12 @@ def main():
177203
parser.add_argument('mode', help='Modes: Scrape, Check')
178204
parser.add_argument('--source', '-s', help=f'The source of the proxies(default:{path / "sources.json"}).')
179205
parser.add_argument('--output', '-o', help=f'The output file.')
180-
parser.add_argument('--format', '-f', help=f'The format of the output file(default:text).', default='text',
206+
parser.add_argument('--file-format', '-ff', help=f'The format of the output file(default:text).',
207+
default='text',
181208
choices=['text', 'json', 'csv'])
209+
parser.add_argument('--format', '-f', help='The format for saving the proxies in text file(default:'
210+
'"{scheme}://{ip}:{port}").',
211+
default='{scheme}://{ip}:{port}')
182212
parser.add_argument('--include-status', '-is', help=f'Include the status of the proxies in the output file.',
183213
action='store_true')
184214
parser.add_argument('--threads', '-t', help=f'The number of threads to use for scraping(default:25).', type=int,
@@ -228,26 +258,21 @@ def main():
228258
if args.output:
229259
args.output = pathlib.Path(args.output)
230260
else:
231-
if args.format == 'text':
261+
if args.file_format == 'text':
232262
ext = 'txt'
233-
elif args.format == 'json':
263+
elif args.file_format == 'json':
234264
ext = 'json'
235-
elif args.format == 'csv':
265+
elif args.file_format == 'csv':
236266
ext = 'csv'
237267
else:
238-
parser.error(f'The format {args.format} is not supported.')
268+
parser.error(f'The format {args.file_format} is not supported.')
239269
return
240270
args.output = pathlib.Path('.') / ('proxies.' + ext)
241271
i = 2
242272
while args.output.exists():
243273
args.output = pathlib.Path('.') / f'proxies-{i}.{ext}'
244274
i += 1
245275

246-
# Output Format
247-
if args.format == 'text' and (args.include_status or args.include_geolocation):
248-
parser.error(f'The format {args.format} does not support the include-status or include-geolocation.')
249-
return
250-
251276
args.mode = args.mode.lower()
252277
if args.mode == 'scrape':
253278
scrape(args)

README.md

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ProxyEater\[1.4.5\]
1+
ProxyEater\[1.5.0\]
22
===================
33

44
![version](https://img.shields.io/pypi/v/ProxyEater)
@@ -34,11 +34,12 @@ Usage
3434
-----
3535

3636
```
37-
usage: ProxyEater [-h] [--source SOURCE] [--output OUTPUT] [--format { text, json, csv }]
38-
[--threads THREADS] [--include-status] [--verbose] [--quiet] [--version]
39-
[--timeout TIMEOUT] [--proxy PROXY] [--proxy-type PROXY_TYPE] [--useragent
40-
USERAGENT] [--include-geolocation] [--source-format { text, json, csv }]
41-
[--default-type { http, https, socks4, socks5 }]
37+
usage: ProxyEater [-h] [--source SOURCE] [--output OUTPUT] [--file-format { text, json, csv }]
38+
[--format FORMAT] [--include-status] [--threads THREADS] [--timeout TIMEOUT]
39+
[--url URL] [--verbose] [--quiet] [--version] [--proxy PROXY] [--proxy-type
40+
PROXY_TYPE] [--useragent USERAGENT] [--include-geolocation] [--no-check]
41+
[--source-format { text, json, csv }] [--default-type { http, https, socks4,
42+
socks5 }]
4243
mode
4344
4445
positional arguments:
@@ -48,23 +49,29 @@ options:
4849
-h, --help
4950
show this help message and exit
5051
--source SOURCE, -s SOURCE
51-
The source of the proxies(default:*\Python\Python310\lib\site-packages\ProxyEater\sources.json).
52+
The source of the proxies(default:C:\Users\Morteza\AppData\Local\Programs\
53+
Python\Python310\lib\site-packages\ProxyEater\sources.json).
5254
--output OUTPUT, -o OUTPUT
5355
The output file.
54-
--format { text, json, csv }, -f { text, json, csv }
56+
--file-format { text, json, csv }, -ff { text, json, csv }
5557
The format of the output file(default:text).
56-
--threads THREADS, -t THREADS
57-
The number of threads to use for scraping(default:25).
58+
--format FORMAT, -f FORMAT
59+
The format for saving the proxies in text
60+
file(default:"{scheme}://{ip}:{port}").
5861
--include-status, -is
5962
Include the status of the proxies in the output file.
63+
--threads THREADS, -t THREADS
64+
The number of threads to use for scraping(default:25).
65+
--timeout TIMEOUT, -to TIMEOUT
66+
The timeout of the requests(default:15).
67+
--url URL, -u URL
68+
The url to use for checking the proxies(default:http://icanhazip.com).
6069
--verbose, -v
6170
The verbose of the program(default:False).
6271
--quiet, -q
6372
The quiet of the program(default:False).
6473
--version, -V
6574
The version of the program.
66-
--timeout TIMEOUT, -to TIMEOUT
67-
The timeout of the requests(default:15).
6875
6976
Scrape:
7077
Scrape mode arguments
@@ -77,6 +84,8 @@ Scrape:
7784
The useragent of the requests(default:random).
7885
--include-geolocation, -ig
7986
Include the geolocation info of the proxies in the output file.
87+
--no-check, -nc
88+
Use this option to skip the checking of the proxies after
8089
8190
Check:
8291
Check mode arguments

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='ProxyEater',
10-
version='1.4.5',
10+
version='1.5.0',
1111
author='CodeWriter21',
1212
author_email='CodeWriter21@gmail.com',
1313
description='A Python Proxy Scraper for gathering fresh proxies.',

0 commit comments

Comments
 (0)