Skip to content

Commit 5e6c1c8

Browse files
authored
Add files via upload
1 parent 13c6be2 commit 5e6c1c8

8 files changed

Lines changed: 1088 additions & 472 deletions

PyWWW/pywwwget_chatgpt.py

Lines changed: 136 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,7 @@
6161
import re
6262
import sys
6363
import platform
64-
try:
65-
import secrets
66-
except ImportError:
67-
secrets = False
64+
import secrets
6865
import socket
6966
import shutil
7067
import time
@@ -393,6 +390,13 @@ def unquote(x): # very small fallback
393390
except Exception:
394391
pass
395392

393+
havepycurl = False
394+
try:
395+
import pycurl
396+
havepycurl = True
397+
except ImportError:
398+
paass
399+
396400
haveparamiko = False
397401
try:
398402
import paramiko # noqa
@@ -1355,6 +1359,10 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
13551359
else:
13561360
headers.update({'Referer': httpreferer})
13571361

1362+
if(usehttp == "pycurl"):
1363+
if(isinstance(headers, dict)):
1364+
headers = make_http_headers_from_dict_to_pycurl(headers)
1365+
13581366
# Requests
13591367
if usehttp == "requests" and haverequests:
13601368
auth = (username, password) if (username and password) else None
@@ -1459,6 +1467,96 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
14591467
httpheadersentout = headers
14601468
resp.release_conn()
14611469

1470+
elif(usehttp == "pycurl"):
1471+
retrieved_body = MkTempFile()
1472+
retrieved_headers = MkTempFile()
1473+
try:
1474+
if(httpmethod == "GET"):
1475+
geturls_text = pycurl.Curl()
1476+
if(hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
1477+
usehttpver = geturls_text.CURL_HTTP_VERSION_3_0
1478+
elif(hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
1479+
usehttpver = geturls_text.CURL_HTTP_VERSION_2_0
1480+
else:
1481+
usehttpver = geturls_text.CURL_HTTP_VERSION_1_1
1482+
geturls_text.setopt(geturls_text.URL, url)
1483+
geturls_text.setopt(geturls_text.HTTP_VERSION,
1484+
geturls_text.CURL_HTTP_VERSION_1_1)
1485+
geturls_text.setopt(
1486+
geturls_text.WRITEFUNCTION, retrieved_body.write)
1487+
geturls_text.setopt(geturls_text.HTTPHEADER, headers)
1488+
geturls_text.setopt(
1489+
geturls_text.HEADERFUNCTION, retrieved_headers.write)
1490+
geturls_text.setopt(geturls_text.FOLLOWLOCATION, True)
1491+
geturls_text.setopt(geturls_text.TIMEOUT, 60)
1492+
geturls_text.perform()
1493+
elif(httpmethod == "POST"):
1494+
geturls_text = pycurl.Curl()
1495+
if(hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
1496+
usehttpver = geturls_text.CURL_HTTP_VERSION_3_0
1497+
elif(hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
1498+
usehttpver = geturls_text.CURL_HTTP_VERSION_2_0
1499+
else:
1500+
usehttpver = geturls_text.CURL_HTTP_VERSION_1_1
1501+
geturls_text.setopt(geturls_text.URL, url)
1502+
geturls_text.setopt(geturls_text.HTTP_VERSION,
1503+
geturls_text.CURL_HTTP_VERSION_1_1)
1504+
geturls_text.setopt(
1505+
geturls_text.WRITEFUNCTION, retrieved_body.write)
1506+
geturls_text.setopt(geturls_text.HTTPHEADER, headers)
1507+
geturls_text.setopt(
1508+
geturls_text.HEADERFUNCTION, retrieved_headers.write)
1509+
geturls_text.setopt(geturls_text.FOLLOWLOCATION, True)
1510+
geturls_text.setopt(geturls_text.TIMEOUT, 60)
1511+
geturls_text.setopt(geturls_text.POST, True)
1512+
geturls_text.setopt(geturls_text.POSTFIELDS, postdata)
1513+
geturls_text.perform()
1514+
else:
1515+
geturls_text = pycurl.Curl()
1516+
if(hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
1517+
usehttpver = geturls_text.CURL_HTTP_VERSION_3_0
1518+
elif(hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
1519+
usehttpver = geturls_text.CURL_HTTP_VERSION_2_0
1520+
else:
1521+
usehttpver = geturls_text.CURL_HTTP_VERSION_1_1
1522+
geturls_text.setopt(geturls_text.URL, url)
1523+
geturls_text.setopt(geturls_text.HTTP_VERSION,
1524+
geturls_text.CURL_HTTP_VERSION_1_1)
1525+
geturls_text.setopt(
1526+
geturls_text.WRITEFUNCTION, retrieved_body.write)
1527+
geturls_text.setopt(geturls_text.HTTPHEADER, headers)
1528+
geturls_text.setopt(
1529+
geturls_text.HEADERFUNCTION, retrieved_headers.write)
1530+
geturls_text.setopt(geturls_text.FOLLOWLOCATION, True)
1531+
geturls_text.setopt(geturls_text.TIMEOUT, 60)
1532+
geturls_text.perform()
1533+
retrieved_headers.seek(0, 0)
1534+
if(sys.version[0] == "2"):
1535+
pycurlhead = retrieved_headers.read()
1536+
if(sys.version[0] >= "3"):
1537+
pycurlhead = retrieved_headers.read().decode('UTF-8')
1538+
pyhttpverinfo = re.findall(
1539+
r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip().rstrip('\r\n'))[0]
1540+
pycurlheadersout = make_http_headers_from_pycurl_to_dict(
1541+
pycurlhead)
1542+
retrieved_body.seek(0, 0)
1543+
httpfile = retrieved_body
1544+
retrieved_headers.close()
1545+
except socket.timeout:
1546+
return False
1547+
except socket.gaierror:
1548+
return False
1549+
except ValueError:
1550+
return False
1551+
httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE)
1552+
httpcodereason = http_status_to_reason(
1553+
geturls_text.getinfo(geturls_text.HTTP_CODE))
1554+
httpversionout = pyhttpverinfo[0]
1555+
httpmethodout = httpmethod
1556+
httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL)
1557+
httpheaderout = pycurlheadersout
1558+
httpheadersentout = headers
1559+
14621560
# urllib fallback
14631561
else:
14641562
req = Request(rebuilt_url, headers=headers)
@@ -1481,27 +1579,19 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
14811579
try:
14821580
httpcodereason = resp.reason
14831581
except AttributeError:
1484-
httpcodereason = http_status_to_reason(resp.getcode())
1582+
httpcodereason = http_status_to_reason(geturls_text.getcode())
14851583
try:
14861584
httpversionout = resp.version
14871585
except AttributeError:
14881586
httpversionout = "1.1"
14891587
try:
14901588
httpmethodout = resp.get_method()
14911589
except AttributeError:
1492-
try:
1493-
httpmethodout = resp._method
1494-
except AttributeError:
1495-
httpmethodout = httpmethod
1590+
httpmethodout = resp._method
14961591
httpurlout = resp.geturl()
1497-
httpheaderout = dict(resp.info())
1498-
#httpheaderout = dict(resp.headers)
1592+
httpheaderout = resp.info()
14991593
try:
1500-
try:
1501-
httpheadersentout = req.unredirected_hdrs | req.headers
1502-
except TypeError:
1503-
httpheadersentout = req.unredirected_hdrs
1504-
httpheadersentout.update(req.headers)
1594+
httpheadersentout = req.unredirected_hdrs | req.headers
15051595
except AttributeError:
15061596
httpheadersentout = req.header_items()
15071597
fulldatasize = httpfile.tell()
@@ -3499,10 +3589,7 @@ def _udp_seq_send(fileobj, host, port, resume=False, path_text=None, **kwargs):
34993589

35003590
# Start with base timeout; will adapt
35013591
timeout = max(min_to, min(max_to, base_timeout))
3502-
try:
3503-
sock.settimeout(timeout)
3504-
except Exception:
3505-
pass
3592+
sock.settimeout(timeout)
35063593

35073594
chunk = int(kwargs.get("chunk", 1200))
35083595
max_window = int(kwargs.get("window", 32)) # cap
@@ -3520,11 +3607,7 @@ def _udp_seq_send(fileobj, host, port, resume=False, path_text=None, **kwargs):
35203607

35213608
tid = int(kwargs.get("tid", 0) or 0)
35223609
if tid == 0:
3523-
# py2/py3 compatible random 64b
3524-
try:
3525-
tid = secrets.randbits(64) # py3.6+
3526-
except Exception:
3527-
tid = struct.unpack("!Q", os.urandom(8))[0]
3610+
tid = secrets.randbits(64)
35283611

35293612
# stats
35303613
stats = {
@@ -3592,49 +3675,42 @@ def _udp_seq_send(fileobj, host, port, resume=False, path_text=None, **kwargs):
35923675
srtt = None
35933676
rttvar = None
35943677

3595-
# PY2/3 replacement for "nonlocal": store mutable state in dict
3596-
_st = {
3597-
"srtt": srtt,
3598-
"rttvar": rttvar,
3599-
"timeout": timeout,
3600-
"cwnd": cwnd,
3601-
"cwnd_float": cwnd_float,
3602-
}
3603-
36043678
def _update_rtt(sample):
3679+
nonlocal srtt, rttvar, timeout
36053680
if sample <= 0:
36063681
return
3607-
if _st["srtt"] is None:
3608-
_st["srtt"] = sample
3609-
_st["rttvar"] = sample / 2.0
3682+
if srtt is None:
3683+
srtt = sample
3684+
rttvar = sample / 2.0
36103685
else:
36113686
# RFC6298-ish
3612-
alpha = 1.0 / 8.0
3613-
beta = 1.0 / 4.0
3614-
_st["rttvar"] = (1 - beta) * _st["rttvar"] + beta * abs(_st["srtt"] - sample)
3615-
_st["srtt"] = (1 - alpha) * _st["srtt"] + alpha * sample
3616-
3617-
_st["timeout"] = _st["srtt"] + 4.0 * _st["rttvar"]
3618-
_st["timeout"] = max(min_to, min(max_to, _st["timeout"]))
3687+
alpha = 1 / 8
3688+
beta = 1 / 4
3689+
rttvar = (1 - beta) * rttvar + beta * abs(srtt - sample)
3690+
srtt = (1 - alpha) * srtt + alpha * sample
3691+
timeout = srtt + 4.0 * rttvar
3692+
timeout = max(min_to, min(max_to, timeout))
36193693
try:
3620-
sock.settimeout(_st["timeout"])
3694+
sock.settimeout(timeout)
36213695
except Exception:
36223696
pass
36233697

36243698
def _loss_event():
3699+
nonlocal cwnd, cwnd_float
36253700
stats["loss_events"] += 1
3626-
_st["cwnd"] = max(1, int(_st["cwnd"]) // 2)
3627-
_st["cwnd_float"] = float(_st["cwnd"])
3701+
cwnd = max(1, cwnd // 2)
3702+
cwnd_float = float(cwnd)
36283703

36293704
def _ai_increase(acked_count):
36303705
# additive increase: cwnd += acked/cwnd (smoothed)
3706+
nonlocal cwnd, cwnd_float
36313707
if acked_count <= 0:
36323708
return
3633-
_st["cwnd_float"] += float(acked_count) / max(1.0, float(_st["cwnd"]))
3634-
new_cwnd = int(_st["cwnd_float"])
3635-
if new_cwnd > _st["cwnd"]:
3636-
_st["cwnd"] = min(max_window, new_cwnd)
3637-
_st["cwnd_float"] = float(_st["cwnd"])
3709+
cwnd_float += float(acked_count) / max(1.0, float(cwnd))
3710+
new_cwnd = int(cwnd_float)
3711+
if new_cwnd > cwnd:
3712+
cwnd = min(max_window, new_cwnd)
3713+
cwnd_float = float(cwnd)
36383714

36393715
def _send_pkt(seq, wire_payload, flags):
36403716
sock.sendto(_u_pack(flags, seq, total, tid) + wire_payload, addr)
@@ -3654,7 +3730,7 @@ def _read_chunk():
36543730

36553731
# Prime window
36563732
eof = False
3657-
while not eof and len(in_flight) < _st["cwnd"]:
3733+
while not eof and len(in_flight) < cwnd:
36583734
data = _read_chunk()
36593735
if data is None:
36603736
eof = True
@@ -3691,6 +3767,7 @@ def _read_chunk():
36913767
# Cumulative ACK: drop all <= ack_upto
36923768
for s in [s for s in list(in_flight.keys()) if s <= ack_upto]:
36933769
wire, ts, _tries, _dlen = in_flight[s]
3770+
# RTT sample from original send timestamp (works best if not retransmitted)
36943771
sample = now - ts
36953772
_update_rtt(sample)
36963773
del in_flight[s]
@@ -3732,7 +3809,7 @@ def _read_chunk():
37323809
now = time.time()
37333810
for seq in list(in_flight.keys()):
37343811
wire, ts, tries, dlen = in_flight[seq]
3735-
if (now - ts) >= _st["timeout"]:
3812+
if (now - ts) >= timeout:
37363813
if tries >= retries:
37373814
failed = True
37383815
in_flight.clear()
@@ -3746,7 +3823,7 @@ def _read_chunk():
37463823
break
37473824

37483825
# Fill window based on cwnd
3749-
while not eof and len(in_flight) < _st["cwnd"]:
3826+
while not eof and len(in_flight) < cwnd:
37503827
data = _read_chunk()
37513828
if data is None:
37523829
eof = True
@@ -3762,10 +3839,10 @@ def _read_chunk():
37623839
dur = max(1e-9, time.time() - t_start)
37633840
stats["duration_s"] = dur
37643841
stats["throughput_Bps"] = float(stats["bytes_sent_payload"]) / dur
3765-
stats["timeout"] = _st["timeout"]
3766-
stats["srtt"] = _st["srtt"]
3767-
stats["rttvar"] = _st["rttvar"]
3768-
stats["cwnd_end"] = _st["cwnd"]
3842+
stats["timeout"] = timeout
3843+
stats["srtt"] = srtt
3844+
stats["rttvar"] = rttvar
3845+
stats["cwnd_end"] = cwnd
37693846

37703847
if failed:
37713848
try:

0 commit comments

Comments
 (0)