Skip to content

Commit d12e30f

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pywwwgetmini.py
1 parent 9b12a86 commit d12e30f

1 file changed

Lines changed: 50 additions & 32 deletions

File tree

pywwwgetmini.py

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@
6464
except ImportError:
6565
pass
6666

67+
# Add the mechanize import check
68+
havemechanize = False
69+
try:
70+
import mechanize
71+
havemechanize = True
72+
except ImportError:
73+
pass
74+
6775
# Requests support
6876
haverequests = False
6977
try:
@@ -137,17 +145,6 @@
137145
__version__ = str(
138146
__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])
139147

140-
__use_pysftp__ = False
141-
if(not havepysftp):
142-
__use_pysftp__ = False
143-
__use_http_lib__ = "httpx"
144-
if(__use_http_lib__ == "httpx" and haverequests and not havehttpx):
145-
__use_http_lib__ = "requests"
146-
if(__use_http_lib__ == "requests" and havehttpx and not haverequests):
147-
__use_http_lib__ = "httpx"
148-
if((__use_http_lib__ == "httpx" or __use_http_lib__ == "requests") and not havehttpx and not haverequests):
149-
__use_http_lib__ = "urllib"
150-
151148
PyBitness = platform.architecture()
152149
if(PyBitness == "32bit" or PyBitness == "32"):
153150
PyBitness = "32"
@@ -308,65 +305,86 @@ def read(self, size=-1):
308305
def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__):
309306
if headers is None:
310307
headers = {}
311-
# Parse the URL to extract username and password if present
312308
urlparts = urlparse(url)
313309
username = urlparts.username
314310
password = urlparts.password
315-
# Rebuild the URL without the username and password
316-
netloc = urlparts.hostname
311+
312+
# Rebuild URL without username and password
313+
netloc = urlparts.hostname or ''
314+
if urlparts.port:
315+
netloc += ':' + str(urlparts.port)
316+
rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path,
317+
urlparts.params, urlparts.query, urlparts.fragment))
318+
319+
# Handle SFTP/FTP
317320
if urlparts.scheme == "sftp":
318321
if __use_pysftp__:
319322
return download_file_from_pysftp_file(url)
320323
else:
321324
return download_file_from_sftp_file(url)
322325
elif urlparts.scheme == "ftp" or urlparts.scheme == "ftps":
323326
return download_file_from_ftp_file(url)
324-
if urlparts.port:
325-
netloc += ':' + str(urlparts.port)
326-
rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path,
327-
urlparts.params, urlparts.query, urlparts.fragment))
327+
328328
# Create a temporary file object
329329
httpfile = BytesIO()
330+
331+
# 1) Requests branch
330332
if usehttp == 'requests' and haverequests:
331-
# Use the requests library if selected and available
332333
if username and password:
333-
response = requests.get(rebuilt_url, headers=headers, auth=(
334-
username, password), stream=True)
334+
response = requests.get(
335+
rebuilt_url, headers=headers, auth=(username, password), stream=True
336+
)
335337
else:
336338
response = requests.get(rebuilt_url, headers=headers, stream=True)
337339
response.raw.decode_content = True
338340
shutil.copyfileobj(response.raw, httpfile)
341+
342+
# 2) HTTPX branch
339343
elif usehttp == 'httpx' and havehttpx:
340-
# Use httpx if selected and available
341344
with httpx.Client(follow_redirects=True) as client:
342345
if username and password:
343346
response = client.get(
344-
rebuilt_url, headers=headers, auth=(username, password))
347+
rebuilt_url, headers=headers, auth=(username, password)
348+
)
345349
else:
346350
response = client.get(rebuilt_url, headers=headers)
347351
raw_wrapper = RawIteratorWrapper(response.iter_bytes())
348352
shutil.copyfileobj(raw_wrapper, httpfile)
353+
354+
# 3) Mechanize branch
355+
elif usehttp == 'mechanize' and havemechanize:
356+
# Create a mechanize browser
357+
br = mechanize.Browser()
358+
# Optional: configure mechanize (disable robots.txt, handle redirects, etc.)
359+
br.set_handle_robots(False)
360+
# If you need custom headers, add them as a list of (header_name, header_value)
361+
if headers:
362+
br.addheaders = list(headers.items())
363+
364+
# If you need to handle basic auth:
365+
if username and password:
366+
# Mechanize has its own password manager; this is one way to do it:
367+
br.add_password(rebuilt_url, username, password)
368+
369+
# Open the URL and copy the response to httpfile
370+
response = br.open(rebuilt_url)
371+
shutil.copyfileobj(response, httpfile)
372+
373+
# 4) Fallback to urllib
349374
else:
350-
# Use urllib as a fallback
351-
# Build a Request object for urllib
352375
request = Request(rebuilt_url, headers=headers)
353-
# Create an opener object for handling URLs
354376
if username and password:
355-
# Create a password manager
356377
password_mgr = HTTPPasswordMgrWithDefaultRealm()
357-
# Add the username and password
358378
password_mgr.add_password(None, rebuilt_url, username, password)
359-
# Create an authentication handler using the password manager
360379
auth_handler = HTTPBasicAuthHandler(password_mgr)
361-
# Build the opener with the authentication handler
362380
opener = build_opener(auth_handler)
363381
else:
364382
opener = build_opener()
365383
response = opener.open(request)
366384
shutil.copyfileobj(response, httpfile)
367-
# Reset file pointer to the start
385+
386+
# Reset file pointer to the start before returning
368387
httpfile.seek(0, 0)
369-
# Return the temporary file object
370388
return httpfile
371389

372390

0 commit comments

Comments
 (0)