Skip to content

Commit f733e8d

Browse files
authored
Add files via upload
1 parent 41e9547 commit f733e8d

4 files changed

Lines changed: 760 additions & 104 deletions

File tree

pywwwget_chatgpt.py

Lines changed: 190 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ def randbits(k):
111111
except ImportError:
112112
import http.cookiejar as cookielib
113113

114+
try:
115+
from Cookie import SimpleCookie # Py2
116+
except ImportError:
117+
from http.cookies import SimpleCookie # Py3
118+
114119
try:
115120
from io import BytesIO
116121
except ImportError:
@@ -122,13 +127,13 @@ def randbits(k):
122127
try:
123128
# Py3
124129
from urllib.parse import quote_from_bytes, unquote_to_bytes, urlencode
125-
from urllib.request import install_opener
130+
from urllib.request import install_opener, build_opener
126131
except ImportError:
127132
# Py2
128133
from urllib import urlencode
129134
from urllib import quote as _quote
130135
from urllib import unquote as _unquote
131-
from urllib2 import install_opener
136+
from urllib2 import install_opener, build_opener
132137

133138
def quote_from_bytes(b, safe=''):
134139
# Py2 urllib.quote expects "str" (bytes)
@@ -362,12 +367,12 @@ def data_url_decode(data_url):
362367

363368
try:
364369
from urllib.parse import urlparse, urlunparse, parse_qs, unquote
365-
from urllib.request import Request, build_opener, HTTPBasicAuthHandler
370+
from urllib.request import Request, build_opener, HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPSHandler
366371
from urllib.error import URLError, HTTPError
367372
from urllib.request import HTTPPasswordMgrWithDefaultRealm
368373
except Exception:
369374
from urlparse import urlparse, urlunparse, parse_qs # type: ignore
370-
from urllib2 import Request, build_opener, HTTPBasicAuthHandler, URLError, HTTPError # type: ignore
375+
from urllib2 import Request, build_opener, HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPSHandler, URLError, HTTPError # type: ignore
371376
from urllib2 import HTTPPasswordMgrWithDefaultRealm # type: ignore
372377
try:
373378
from urllib import unquote # py2
@@ -2313,6 +2318,149 @@ def read(self, n=-1):
23132318
except StopIteration:
23142319
return b""
23152320

2321+
def fix_localhost_cookies(jar: cookielib.CookieJar) -> None:
2322+
"""
2323+
Convert cookies stored as localhost.local into host-only cookies for localhost,
2324+
so they behave more like a real browser and don't break subsequent requests.
2325+
"""
2326+
2327+
to_add = []
2328+
to_del = []
2329+
2330+
for c in jar:
2331+
if getattr(c, "domain", None) == "localhost.local":
2332+
to_del.append((c.domain, c.path, c.name))
2333+
2334+
# Some Python versions store extra attrs in _rest, some in other places.
2335+
rest = getattr(c, "rest", None)
2336+
if rest is None:
2337+
rest = getattr(c, "_rest", {}) or {}
2338+
if not isinstance(rest, dict):
2339+
rest = {}
2340+
2341+
new_cookie = cookielib.Cookie(
2342+
version=getattr(c, "version", 0),
2343+
name=c.name,
2344+
value=c.value,
2345+
port=getattr(c, "port", None),
2346+
port_specified=getattr(c, "port_specified", False),
2347+
2348+
domain="localhost",
2349+
domain_specified=False, # host-only
2350+
domain_initial_dot=False,
2351+
2352+
path=getattr(c, "path", "/"),
2353+
path_specified=getattr(c, "path_specified", True),
2354+
2355+
secure=getattr(c, "secure", False),
2356+
expires=getattr(c, "expires", None),
2357+
discard=getattr(c, "discard", True),
2358+
2359+
comment=getattr(c, "comment", None),
2360+
comment_url=getattr(c, "comment_url", None),
2361+
2362+
rest=rest,
2363+
rfc2109=getattr(c, "rfc2109", False),
2364+
)
2365+
to_add.append(new_cookie)
2366+
2367+
for dom, path, name in to_del:
2368+
jar.clear(domain=dom, path=path, name=name)
2369+
2370+
for c in to_add:
2371+
jar.set_cookie(c)
2372+
2373+
def _cookie_header_from_jar(jar, url):
2374+
"""
2375+
Build a Cookie header string from a CookieJar for a given URL.
2376+
Respects domain/path/secure in a best-effort way.
2377+
"""
2378+
u = urlparse(url)
2379+
host = (u.hostname or "").lower()
2380+
path = u.path or "/"
2381+
secure = (u.scheme == "https")
2382+
2383+
pairs = []
2384+
now = int(time.time())
2385+
2386+
for c in jar:
2387+
# expired?
2388+
if c.expires is not None and c.expires != 0 and c.expires < now:
2389+
continue
2390+
# secure?
2391+
if c.secure and not secure:
2392+
continue
2393+
# domain match
2394+
cd = (c.domain or "").lstrip(".").lower()
2395+
if cd and host != cd and not host.endswith("." + cd):
2396+
continue
2397+
# path match
2398+
cp = c.path or "/"
2399+
if not path.startswith(cp):
2400+
continue
2401+
2402+
pairs.append("{}={}".format(c.name, c.value))
2403+
2404+
return "; ".join(pairs)
2405+
2406+
def _update_jar_from_set_cookie(jar, url, set_cookie_values):
2407+
"""
2408+
Parse Set-Cookie headers and update a CookieJar.
2409+
set_cookie_values may be:
2410+
- a single string
2411+
- a list of strings
2412+
"""
2413+
u = urlparse(url)
2414+
host = (u.hostname or "").lower()
2415+
default_path = u.path or "/"
2416+
if "/" in default_path:
2417+
default_path = default_path.rsplit("/", 1)[0] or "/"
2418+
2419+
if not set_cookie_values:
2420+
return
2421+
2422+
# Py2: basestring; Py3: str
2423+
try:
2424+
string_types = (basestring,)
2425+
except NameError:
2426+
string_types = (str,)
2427+
2428+
if isinstance(set_cookie_values, string_types):
2429+
set_cookie_values = [set_cookie_values]
2430+
2431+
for hdr in set_cookie_values:
2432+
sc = SimpleCookie()
2433+
sc.load(hdr)
2434+
2435+
for name, morsel in sc.items():
2436+
value = morsel.value
2437+
2438+
domain = morsel["domain"] or host
2439+
path = morsel["path"] or default_path
2440+
secure = bool(morsel["secure"])
2441+
expires = None # (best effort placeholder)
2442+
2443+
cookie = cookielib.Cookie(
2444+
version=0,
2445+
name=name,
2446+
value=value,
2447+
port=None,
2448+
port_specified=False,
2449+
domain=domain,
2450+
domain_specified=bool(morsel["domain"]),
2451+
domain_initial_dot=domain.startswith("."),
2452+
path=path,
2453+
path_specified=bool(morsel["path"]),
2454+
secure=secure,
2455+
expires=expires,
2456+
discard=False,
2457+
comment=None,
2458+
comment_url=None,
2459+
rest={}, # could add HttpOnly/SameSite if desired
2460+
rfc2109=False,
2461+
)
2462+
jar.set_cookie(cookie)
2463+
23162464
def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, usesslcert=defcert, resumefile=None, keepsession=False, insessionvar=None, httpuseragent=None, httpreferer=None, httpcookie=None, httpmethod="GET", postdata=None, jsonpost=False, sendfiles=None, putfile=None, timeout=60, returnstats=False):
23172465
if headers is None:
23182466
headers = {}
@@ -2326,11 +2474,11 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
23262474
if(usehttp!="pycurl" or not havepycurl):
23272475
if(cookie_ext == ".lwp"):
23282476
policy = cookielib.DefaultCookiePolicy(netscape=True, rfc2965=False, hide_cookie2=True)
2329-
httpcookie = cookielib.LWPCookieJar(httpcookie, policy=policy)
2477+
httpcookie = cookielib.LWPCookieJar(cookiefile, policy=policy)
23302478
else:
23312479
policy = cookielib.DefaultCookiePolicy(netscape=True, rfc2965=False, hide_cookie2=True)
2332-
httpcookie = cookielib.MozillaCookieJar(httpcookie, policy=policy)
2333-
if os.path.exists(cookie_ext):
2480+
httpcookie = cookielib.MozillaCookieJar(cookiefile, policy=policy)
2481+
if os.path.exists(cookiefile):
23342482
httpcookie.load(ignore_discard=True, ignore_expires=True)
23352483
if(usehttp=="httpcore" or usehttp=="urllib3"):
23362484
openeralt = build_opener(HTTPCookieProcessor(httpcookie))
@@ -2375,7 +2523,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
23752523
# Requests
23762524
if usehttp == "requests" and haverequests:
23772525
auth = (username, password) if (username and password) else None
2378-
extendargs.update({'url': rebuilt_url, 'method': httpmethod, 'headers': headers, 'auth': auth, 'cookies': httpcookie, 'stream': True, 'allow_redirects': True, 'timeout': (float(timeout), float(timeout))})
2526+
extendargs.update({'url': rebuilt_url, 'method': httpmethod, 'headers': headers, 'auth': auth, 'stream': True, 'allow_redirects': True, 'timeout': (float(timeout), float(timeout))})
23792527
if(insessionvar is not None):
23802528
session = insessionvar
23812529
else:
@@ -2439,6 +2587,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
24392587
for chunk in r.iter_content(chunk_size=1024 * 1024):
24402588
if chunk:
24412589
httpfile.write(chunk)
2590+
fix_localhost_cookies(httpcookie)
24422591
session.cookies.save(ignore_discard=True, ignore_expires=True)
24432592
httpcodeout = r.status_code
24442593
httpcodereason = r.reason
@@ -2529,6 +2678,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
25292678
for chunk in r.iter_bytes(chunk_size=1024 * 1024):
25302679
if chunk:
25312680
httpfile.write(chunk)
2681+
fix_localhost_cookies(httpcookie)
25322682
httpcookie.save(cookiefile, ignore_discard=True, ignore_expires=True)
25332683
httpcodeout = r.status_code
25342684
try:
@@ -2610,6 +2760,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
26102760
shutil.copyfileobj(ResponseStream(r.iter_stream()), httpfile, length=1024 * 1024)
26112761
except (socket.timeout, socket.gaierror, httpcore.ConnectError):
26122762
return False
2763+
fix_localhost_cookies(httpcookie)
26132764
httpcookie.save(cookiefile, ignore_discard=True, ignore_expires=True)
26142765
httpcodeout = r.status
26152766
httpcodereason = http_status_to_reason(r.status)
@@ -2651,14 +2802,18 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
26512802
br.addheaders = list(headers.items())
26522803
resp = br.open(rebuilt_url, timeout=timeout)
26532804
elif(httpmethod == "POST"):
2805+
extendargs.update({'timeout': float(timeout)})
26542806
if(jsonpost and postdata is not None):
26552807
if('Content-Type' in headers):
26562808
headers['Content-Type'] = "application/json"
26572809
else:
26582810
headers.update({'Content-Type': "application/json"})
2811+
extendargs.update({'data': json.dumps(postdata)})
2812+
else:
2813+
extendargs.update({'data': urlencode(postdata).encode("ascii")})
26592814
if headers:
26602815
br.addheaders = list(headers.items())
2661-
resp = br.open(rebuilt_url, data=postdata, timeout=float(timeout))
2816+
resp = br.open(rebuilt_url, **extendargs)
26622817
else:
26632818
if headers:
26642819
br.addheaders = list(headers.items())
@@ -2674,6 +2829,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
26742829
httpfile.truncate(0)
26752830
httpfile.seek(0, 0)
26762831
shutil.copyfileobj(resp, httpfile, length=1024 * 1024)
2832+
fix_localhost_cookies(httpcookie)
26772833
httpcookie.save(cookiefile, ignore_discard=True, ignore_expires=True)
26782834
httpcodeout = resp.code
26792835
httpcodereason = resp.msg
@@ -2752,7 +2908,13 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
27522908
extendargs['fields'].update({postdata})
27532909
else:
27542910
extendargs.update({'fields': postdata})
2911+
cookie_hdr = _cookie_header_from_jar(httpcookie, rebuilt_url)
2912+
if cookie_hdr:
2913+
headers["Cookie"] = cookie_hdr
27552914
resp = http.request(**extendargs)
2915+
# urllib3 stores headers in resp.headers (HTTPHeaderDict)
2916+
set_cookie_vals = resp.headers.getlist("Set-Cookie") # returns [] if none
2917+
_update_jar_from_set_cookie(httpcookie, rebuilt_url, set_cookie_vals)
27562918
except (socket.timeout, socket.gaierror, urllib3.exceptions.MaxRetryError):
27572919
return False
27582920
if(resumefile is not None and hasattr(resumefile, "write")):
@@ -2762,6 +2924,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
27622924
httpfile.truncate(0)
27632925
httpfile.seek(0, 0)
27642926
shutil.copyfileobj(resp, httpfile, length=1024 * 1024)
2927+
fix_localhost_cookies(httpcookie)
27652928
httpcookie.save(cookiefile, ignore_discard=True, ignore_expires=True)
27662929
httpcodeout = resp.status
27672930
httpcodereason = resp.reason
@@ -2929,28 +3092,28 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
29293092
headers['Content-Type'] = "application/json"
29303093
else:
29313094
headers.update({'Content-Type': "application/json"})
2932-
extendargs.update({'data': postdata})
3095+
extendargs.update({'data': json.dumps(postdata)})
29333096
elif(not jsonpost and postdata is not None and putfile is None):
2934-
extendargs.update({'data': postdata})
3097+
extendargs.update({'data': urlencode(postdata).encode("ascii")})
29353098
extendargs.update({'headers': headers})
2936-
if(insessionvar is not None):
2937-
req = insessionvar
2938-
else:
2939-
req = Request(**extendargs)
3099+
req = Request(**extendargs)
3100+
handlers = [HTTPCookieProcessor(httpcookie)]
29403101
if username and password:
29413102
mgr = HTTPPasswordMgrWithDefaultRealm()
29423103
mgr.add_password(None, rebuilt_url, username, password)
2943-
opener = build_opener(HTTPBasicAuthHandler(mgr), HTTPCookieProcessor(httpcookie))
3104+
handlers.insert(0, HTTPBasicAuthHandler(mgr))
3105+
if(usesslcert is None):
3106+
pass
29443107
else:
2945-
opener = build_opener(HTTPCookieProcessor(httpcookie))
2946-
install_opener(opener)
3108+
ssl_context = ssl.create_default_context()
3109+
ssl_context.load_verify_locations(usesslcert)
3110+
handlers.append(HTTPSHandler(context=ssl_context))
3111+
if(insessionvar is not None):
3112+
opener = insessionvar
3113+
else:
3114+
opener = build_opener(*handlers)
29473115
try:
2948-
if(usesslcert is None):
2949-
resp = opener.open(req, timeout=timeout)
2950-
else:
2951-
myssl = ssl.create_default_context()
2952-
myssl.load_verify_locations(usesslcert)
2953-
resp = opener.open(req, timeout=timeout, context=myssl)
3116+
resp = opener.open(req, timeout=timeout)
29543117
except HTTPError as e:
29553118
resp = e;
29563119
except (socket.timeout, socket.gaierror, URLError):
@@ -2963,7 +3126,8 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
29633126
httpfile.truncate(0)
29643127
httpfile.seek(0, 0)
29653128
shutil.copyfileobj(resp2, httpfile, length=1024 * 1024)
2966-
httpcookie.save(cookiefile, ignore_discard=True, ignore_expires=True)
3129+
fix_localhost_cookies(httpcookie)
3130+
httpcookie.save(ignore_discard=True, ignore_expires=True)
29673131
httpcodeout = resp.getcode()
29683132
try:
29693133
httpcodereason = resp.reason
@@ -2987,7 +3151,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, us
29873151
httpheadersentout = req.unredirected_hdrs | req.headers
29883152
except AttributeError:
29893153
httpheadersentout = req.header_items()
2990-
httpsession = req
3154+
httpsession = opener
29913155
if((not keepsession and not returnstats) or not keepsession or httpmethod == "HEAD"):
29923156
httpsession = None
29933157

0 commit comments

Comments
 (0)