|
28 | 28 | # Timeout) |
29 | 29 | import time |
30 | 30 | from typing import Generator, Optional |
31 | | -from urllib.parse import urljoin, urlparse |
| 31 | +from urllib.parse import urlencode, urljoin, urlparse |
32 | 32 | from urllib3 import PoolManager, HTTPResponse, Timeout as Urllib3Timeout |
33 | 33 | from urllib3.connectionpool import HTTPConnectionPool |
34 | | -from urllib3.exceptions import (ClosedPoolError, |
35 | | - ConnectTimeoutError, |
| 34 | +from urllib3.exceptions import (ConnectTimeoutError, |
36 | 35 | DecodeError, |
37 | 36 | MaxRetryError, |
38 | 37 | ProtocolError, |
39 | 38 | ReadTimeoutError, |
40 | 39 | ProxyError, |
41 | | - TimeoutError, |
42 | | - ProtocolError) |
| 40 | + TimeoutError) |
43 | 41 | from warnings import warn |
44 | 42 | from . import _utils, __version__ |
45 | 43 | from ._models import CdxRecord, Memento |
@@ -350,7 +348,8 @@ def iter_byte_slices(data: bytes, size: int) -> Generator[bytes, None, None]: |
350 | 348 | def parse_header_links(value): |
351 | 349 | """Return a list of parsed link headers proxies. |
352 | 350 |
|
353 | | - i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg" |
| 351 | + i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg", |
| 352 | + <http://.../back.jpeg>; rel=back;type="image/jpeg" |
354 | 353 |
|
355 | 354 | :rtype: list |
356 | 355 | """ |
@@ -384,7 +383,6 @@ def parse_header_links(value): |
384 | 383 | return links |
385 | 384 |
|
386 | 385 |
|
387 | | -from urllib.parse import urlencode |
388 | 386 | # XXX: pretty much wholesale taken from requests. May need adjustment. |
389 | 387 | # https://github.com/psf/requests/blob/147c8511ddbfa5e8f71bbf5c18ede0c4ceb3bba4/requests/models.py#L107-L134 |
390 | 388 | def serialize_querystring(data): |
@@ -441,7 +439,7 @@ def _parse_content_type_header(header): |
441 | 439 | index_of_equals = param.find("=") |
442 | 440 | if index_of_equals != -1: |
443 | 441 | key = param[:index_of_equals].strip(items_to_strip) |
444 | | - value = param[index_of_equals + 1 :].strip(items_to_strip) |
| 442 | + value = param[index_of_equals + 1:].strip(items_to_strip) |
445 | 443 | params_dict[key.lower()] = value |
446 | 444 | return content_type, params_dict |
447 | 445 |
|
@@ -503,7 +501,7 @@ def __init__(self, raw: HTTPResponse, request_url: str) -> None: |
503 | 501 | self.raw = raw |
504 | 502 | self.status_code = raw.status |
505 | 503 | self.headers = raw.headers |
506 | | - self.url = getattr(raw, 'url', request_url) |
| 504 | + self.url = urljoin(request_url, getattr(raw, 'url', '')) |
507 | 505 | self.encoding = get_encoding_from_headers(self.headers) |
508 | 506 |
|
509 | 507 | # XXX: shortcut to essentially what requests does in `iter_content()`. |
@@ -539,9 +537,7 @@ def stream(self, chunk_size: int = 10 * 1024) -> Generator[bytes, None, None]: |
539 | 537 | @property |
540 | 538 | def content(self) -> bytes: |
541 | 539 | if self._content is None: |
542 | | - logger.warning(f'Getting content!!!') |
543 | 540 | self._content = b"".join(self.stream()) or b"" |
544 | | - logger.warning(f'Getting content DONE: "{self._content}"') |
545 | 541 |
|
546 | 542 | return self._content |
547 | 543 |
|
@@ -612,7 +608,7 @@ def close(self, cache: bool = True) -> None: |
612 | 608 | if self.raw: |
613 | 609 | try: |
614 | 610 | if cache: |
615 | | - # Inspired by requests: https://github.com/psf/requests/blob/eedd67462819f8dbf8c1c32e77f9070606605231/requests/sessions.py#L160-L163 |
| 611 | + # Inspired by requests: https://github.com/psf/requests/blob/eedd67462819f8dbf8c1c32e77f9070606605231/requests/sessions.py#L160-L163 # noqa |
616 | 612 | try: |
617 | 613 | self.content |
618 | 614 | except (DecodeError, ProtocolError, RuntimeError): |
@@ -838,15 +834,17 @@ def send(self, method, url, *, params=None, allow_redirects=True, timeout=-1) -> |
838 | 834 | response.close(cache=False) |
839 | 835 | # XXX: urllib3's MaxRetryError can wrap all the other errors, so |
840 | 836 | # we should probably be checking `error.reason` on it. See how |
841 | | - # requests handles this: https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/adapters.py#L502-L537 |
| 837 | + # requests handles this: |
| 838 | + # https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/adapters.py#L502-L537 |
842 | 839 | # |
843 | 840 | # XXX: requests.RetryError used to be in our list of handleable |
844 | 841 | # errors; it gets raised when urllib3 raises a MaxRetryError with a |
845 | 842 | # ResponseError for its `reason` attribute. Need to test the |
846 | 843 | # situation here... |
847 | 844 | # |
848 | 845 | # XXX: Consider how read-related exceptions need to be handled (or |
849 | | - # not). In requests: https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/models.py#L794-L839 |
| 846 | + # not). In requests: |
| 847 | + # https://github.com/psf/requests/blob/a25fde6989f8df5c3d823bc9f2e2fc24aa71f375/src/requests/models.py#L794-L839 |
850 | 848 | except WaybackSession.handleable_errors as error: |
851 | 849 | response = getattr(error, 'response', None) |
852 | 850 | if response is not None: |
|
0 commit comments