Skip to content

Commit 1c9e7d7

Browse files
Minor improvements
1 parent 6154fd0 commit 1c9e7d7

7 files changed

Lines changed: 73 additions & 35 deletions

File tree

.github/workflows/automatic-release.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ jobs:
2323
- name: Build
2424
run: |
2525
python setup.py sdist bdist_wheel
26+
pip install dist/*.whl
27+
echo "PACKAGE_VERSION=$(python -c 'import ProxyEater; print(ProxyEater.__version__)')" >> $GITHUB_ENV
2628
- uses: "marvinpinto/action-automatic-releases@latest"
2729
with:
2830
repo_token: "${{ secrets.GITHUB_TOKEN }}"
29-
automatic_release_tag: "latest"
31+
automatic_release_tag: "${{ env.PACKAGE_VERSION }}"
3032
title: "Auto Build"
3133
files: |
3234
dist/*

ProxyEater/Proxy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,13 @@ def check_proxy(proxy_: Proxy):
243243
thread = threading.Thread(target=check_proxy, args=(proxy,))
244244
threads.append(thread)
245245
thread.start()
246-
on_progress_callback(self, (i + 1) / length * 100)
247246
while len(threads) >= threads_no:
248247
for thread in threads:
249248
if not thread.is_alive():
250249
threads.remove(thread)
251250
break
252251
time.sleep(0.1)
252+
on_progress_callback(self, (i + 1) / length * 100)
253253

254254
for thread in threads:
255255
thread.join()

ProxyEater/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ProxyEater
22
# CodeWriter21
33

4-
__version__ = "1.4.0"
4+
__version__ = "1.4.1"
55
__author__ = "CodeWriter21"
66
__email__ = "CodeWriter21@gmail.com"
77
__license__ = "Apache-2.0"

ProxyEater/__main__.py

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ProxyEater.__main__.py
22
# CodeWriter21
3-
3+
import os
44
import sys
55
import json
66
import pathlib
@@ -60,7 +60,8 @@ def scrape(args):
6060
for config in source_data:
6161
progress_callback = finish_callback = error_callback = checking_callback = None
6262
if args.verbose:
63-
logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{')
63+
logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%',
64+
style='{', additional_variables={'count': 0})
6465

6566
def progress_callback(scraper_: Scraper, progress: float, page: int):
6667
logger.info(f'{scraper_.name}: Collected: {scraper_.proxies.count}; Page: {page}, {progress:.2f}%',
@@ -74,7 +75,7 @@ def error_callback(scraper_: Scraper, error: Exception):
7475
logger.error(f'{scraper_.name}: {error.__class__.__name__}: {error}')
7576

7677
def checking_callback(proxy_list: ProxyList, progress: float):
77-
logger.progress_bar(progress, 100)
78+
logger.progress_bar(progress, 100, count=proxy_list.count)
7879

7980
logger.info(f'Scraping {config.get("id")}...')
8081
scraper = Scraper(config.get('url'), config.get('parser'), method=config.get('method'),
@@ -100,18 +101,20 @@ def checking_callback(proxy_list: ProxyList, progress: float):
100101
proxies.update(proxies_)
101102
logger.info(f'Scraped {len(proxies)} proxies.')
102103

103-
if args.verbose:
104-
logger.info(f'Writing {proxies.count} proxies to {args.output}...')
105-
# Write to file
106-
if args.format == 'text':
107-
proxies.to_text_file(args.output, '\n')
108-
elif args.format == 'json':
109-
proxies.to_json_file(args.output, include_status=args.include_status,
110-
include_geolocation=args.include_geolocation)
111-
elif args.format == 'csv':
112-
proxies.to_csv_file(args.output, include_status=args.include_status,
113-
include_geolocation=args.include_geolocation)
114-
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
104+
if proxies.count > 0:
105+
if args.verbose:
106+
logger.info(f'Writing {proxies.count} proxies to {args.output}...')
107+
# Write to file
108+
if args.format == 'text':
109+
proxies.to_text_file(args.output, '\n')
110+
elif args.format == 'json':
111+
proxies.to_json_file(args.output, include_status=args.include_status,
112+
include_geolocation=args.include_geolocation)
113+
elif args.format == 'csv':
114+
proxies.to_csv_file(args.output, include_status=args.include_status,
115+
include_geolocation=args.include_geolocation)
116+
if proxies.count > 0:
117+
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
115118

116119

117120
def check(args):
@@ -138,10 +141,11 @@ def check(args):
138141
logger.error(f'The source format {args.source_format} is not valid.')
139142
return
140143

141-
logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{')
144+
logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', style='{',
145+
additional_variables={'count': 0})
142146

143147
def checking_callback(proxy_list: ProxyList, progress: float):
144-
logger.progress_bar(progress, 100)
148+
logger.progress_bar(progress, 100, count=proxy_list.count)
145149

146150
# Check the proxies
147151
count = proxies.count
@@ -154,16 +158,17 @@ def checking_callback(proxy_list: ProxyList, progress: float):
154158
logger.info(f'Removed {count - proxies.count} dead proxies.')
155159
logger.info(f'Alive proxies: {proxies.count}')
156160

157-
# Write to file
158-
if args.format == 'text':
159-
proxies.to_text_file(args.output, '\n')
160-
elif args.format == 'json':
161-
proxies.to_json_file(args.output, include_status=args.include_status,
162-
include_geolocation=args.include_geolocation)
163-
elif args.format == 'csv':
164-
proxies.to_csv_file(args.output, include_status=args.include_status,
165-
include_geolocation=args.include_geolocation)
166-
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
161+
if proxies.count > 0:
162+
# Write to file
163+
if args.format == 'text':
164+
proxies.to_text_file(args.output, '\n')
165+
elif args.format == 'json':
166+
proxies.to_json_file(args.output, include_status=args.include_status,
167+
include_geolocation=args.include_geolocation)
168+
elif args.format == 'csv':
169+
proxies.to_csv_file(args.output, include_status=args.include_status,
170+
include_geolocation=args.include_geolocation)
171+
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
167172

168173

169174
def main():
@@ -249,9 +254,15 @@ def main():
249254
elif args.mode == 'check':
250255
check(args)
251256
except KeyboardInterrupt:
257+
try:
258+
terminal_size = os.get_terminal_size()[0] - 1
259+
except OSError:
260+
terminal_size = 50
261+
if not terminal_size:
262+
terminal_size = 50
263+
logger.clear_line(terminal_size)
252264
logger.error('KeyboardInterrupt: Exiting...')
253265
sys.exit()
254-
return
255266

256267

257268
if __name__ == '__main__':

ProxyEater/sources.json

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@
237237
}
238238
},
239239
{
240-
"id": "github.com/TheSpeedX/PROXY-List",
240+
"id": "github.com/TheSpeedX/PROXY-List/http.txt",
241241
"url": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt",
242242
"method": "GET",
243243
"parser": {
@@ -248,6 +248,30 @@
248248
}
249249
}
250250
},
251+
{
252+
"id": "github.com/TheSpeedX/PROXY-List/socks5.txt",
253+
"url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt",
254+
"method": "GET",
255+
"parser": {
256+
"text": {
257+
"type": {
258+
"default": "SOCKS5"
259+
}
260+
}
261+
}
262+
},
263+
{
264+
"id": "github.com/TheSpeedX/PROXY-List/socks4",
265+
"url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
266+
"method": "GET",
267+
"parser": {
268+
"text": {
269+
"type": {
270+
"default": "SOCKS4"
271+
}
272+
}
273+
}
274+
},
251275
{
252276
"id": "https://github.com/mertguvencli/http-proxy-list",
253277
"url": "https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt",

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ProxyEater\[1.4.0\]
1+
ProxyEater\[1.4.1\]
22
===================
33

44
![version](https://img.shields.io/pypi/v/ProxyEater)

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='ProxyEater',
10-
version='1.4.0',
10+
version='1.4.1',
1111
author='CodeWriter21',
1212
author_email='CodeWriter21@gmail.com',
1313
description='A Python Proxy Scraper for gathering fresh proxies.',
@@ -20,7 +20,8 @@
2020
'ProxyEater=ProxyEater.__main__:main'
2121
]
2222
},
23-
install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21', 'importlib_resources'],
23+
install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21>=2.3.3',
24+
'importlib_resources', 'random_user_agent'],
2425
classifiers=[
2526
'Programming Language :: Python :: 3',
2627
],

0 commit comments

Comments
 (0)