|
64 | 64 | except ImportError: |
65 | 65 | pass |
66 | 66 |
|
| 67 | +# Add the mechanize import check |
| 68 | +havemechanize = False |
| 69 | +try: |
| 70 | + import mechanize |
| 71 | + havemechanize = True |
| 72 | +except ImportError: |
| 73 | + pass |
| 74 | + |
67 | 75 | # Requests support |
68 | 76 | haverequests = False |
69 | 77 | try: |
|
137 | 145 | __version__ = str( |
138 | 146 | __version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]) |
139 | 147 |
|
140 | | -__use_pysftp__ = False |
141 | | -if(not havepysftp): |
142 | | - __use_pysftp__ = False |
143 | | -__use_http_lib__ = "httpx" |
144 | | -if(__use_http_lib__ == "httpx" and haverequests and not havehttpx): |
145 | | - __use_http_lib__ = "requests" |
146 | | -if(__use_http_lib__ == "requests" and havehttpx and not haverequests): |
147 | | - __use_http_lib__ = "httpx" |
148 | | -if((__use_http_lib__ == "httpx" or __use_http_lib__ == "requests") and not havehttpx and not haverequests): |
149 | | - __use_http_lib__ = "urllib" |
150 | | - |
151 | 148 | PyBitness = platform.architecture() |
152 | 149 | if(PyBitness == "32bit" or PyBitness == "32"): |
153 | 150 | PyBitness = "32" |
@@ -308,65 +305,86 @@ def read(self, size=-1): |
308 | 305 | def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__): |
309 | 306 | if headers is None: |
310 | 307 | headers = {} |
311 | | - # Parse the URL to extract username and password if present |
312 | 308 | urlparts = urlparse(url) |
313 | 309 | username = urlparts.username |
314 | 310 | password = urlparts.password |
315 | | - # Rebuild the URL without the username and password |
316 | | - netloc = urlparts.hostname |
| 311 | + |
| 312 | + # Rebuild URL without username and password |
| 313 | + netloc = urlparts.hostname or '' |
| 314 | + if urlparts.port: |
| 315 | + netloc += ':' + str(urlparts.port) |
| 316 | + rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path, |
| 317 | + urlparts.params, urlparts.query, urlparts.fragment)) |
| 318 | + |
| 319 | + # Handle SFTP/FTP |
317 | 320 | if urlparts.scheme == "sftp": |
318 | 321 | if __use_pysftp__: |
319 | 322 | return download_file_from_pysftp_file(url) |
320 | 323 | else: |
321 | 324 | return download_file_from_sftp_file(url) |
322 | 325 | elif urlparts.scheme == "ftp" or urlparts.scheme == "ftps": |
323 | 326 | return download_file_from_ftp_file(url) |
324 | | - if urlparts.port: |
325 | | - netloc += ':' + str(urlparts.port) |
326 | | - rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path, |
327 | | - urlparts.params, urlparts.query, urlparts.fragment)) |
| 327 | + |
328 | 328 | # Create a temporary file object |
329 | 329 | httpfile = BytesIO() |
| 330 | + |
| 331 | + # 1) Requests branch |
330 | 332 | if usehttp == 'requests' and haverequests: |
331 | | - # Use the requests library if selected and available |
332 | 333 | if username and password: |
333 | | - response = requests.get(rebuilt_url, headers=headers, auth=( |
334 | | - username, password), stream=True) |
| 334 | + response = requests.get( |
| 335 | + rebuilt_url, headers=headers, auth=(username, password), stream=True |
| 336 | + ) |
335 | 337 | else: |
336 | 338 | response = requests.get(rebuilt_url, headers=headers, stream=True) |
337 | 339 | response.raw.decode_content = True |
338 | 340 | shutil.copyfileobj(response.raw, httpfile) |
| 341 | + |
| 342 | + # 2) HTTPX branch |
339 | 343 | elif usehttp == 'httpx' and havehttpx: |
340 | | - # Use httpx if selected and available |
341 | 344 | with httpx.Client(follow_redirects=True) as client: |
342 | 345 | if username and password: |
343 | 346 | response = client.get( |
344 | | - rebuilt_url, headers=headers, auth=(username, password)) |
| 347 | + rebuilt_url, headers=headers, auth=(username, password) |
| 348 | + ) |
345 | 349 | else: |
346 | 350 | response = client.get(rebuilt_url, headers=headers) |
347 | 351 | raw_wrapper = RawIteratorWrapper(response.iter_bytes()) |
348 | 352 | shutil.copyfileobj(raw_wrapper, httpfile) |
| 353 | + |
| 354 | + # 3) Mechanize branch |
| 355 | + elif usehttp == 'mechanize' and havemechanize: |
| 356 | + # Create a mechanize browser |
| 357 | + br = mechanize.Browser() |
| 358 | + # Optional: configure mechanize (disable robots.txt, handle redirects, etc.) |
| 359 | + br.set_handle_robots(False) |
| 360 | + # If you need custom headers, add them as a list of (header_name, header_value) |
| 361 | + if headers: |
| 362 | + br.addheaders = list(headers.items()) |
| 363 | + |
| 364 | + # If you need to handle basic auth: |
| 365 | + if username and password: |
| 366 | + # Mechanize has its own password manager; this is one way to do it: |
| 367 | + br.add_password(rebuilt_url, username, password) |
| 368 | + |
| 369 | + # Open the URL and copy the response to httpfile |
| 370 | + response = br.open(rebuilt_url) |
| 371 | + shutil.copyfileobj(response, httpfile) |
| 372 | + |
| 373 | + # 4) Fallback to urllib |
349 | 374 | else: |
350 | | - # Use urllib as a fallback |
351 | | - # Build a Request object for urllib |
352 | 375 | request = Request(rebuilt_url, headers=headers) |
353 | | - # Create an opener object for handling URLs |
354 | 376 | if username and password: |
355 | | - # Create a password manager |
356 | 377 | password_mgr = HTTPPasswordMgrWithDefaultRealm() |
357 | | - # Add the username and password |
358 | 378 | password_mgr.add_password(None, rebuilt_url, username, password) |
359 | | - # Create an authentication handler using the password manager |
360 | 379 | auth_handler = HTTPBasicAuthHandler(password_mgr) |
361 | | - # Build the opener with the authentication handler |
362 | 380 | opener = build_opener(auth_handler) |
363 | 381 | else: |
364 | 382 | opener = build_opener() |
365 | 383 | response = opener.open(request) |
366 | 384 | shutil.copyfileobj(response, httpfile) |
367 | | - # Reset file pointer to the start |
| 385 | + |
| 386 | + # Reset file pointer to the start before returning |
368 | 387 | httpfile.seek(0, 0) |
369 | | - # Return the temporary file object |
370 | 388 | return httpfile |
371 | 389 |
|
372 | 390 |
|
|
0 commit comments