@@ -1317,6 +1317,254 @@ def sftp_status_to_reason(code):
13171317 }
13181318 return reasons .get (code , 'Unknown Status Code' )
13191319
1320+ def read_all (fileobj , encoding = 'utf-8' , errors = 'replace' ):
1321+ data = fileobj .read ()
1322+ if data is None :
1323+ return u'' if PY2 else ''
1324+ if isinstance (data , bytes ):
1325+ return data .decode (encoding , errors )
1326+ return data # already text (unicode on py2 or str on py3)
1327+
1328+ # ---------------- Parsing primitives ----------------
1329+
1330+ _req_line_http1 = re .compile (r'^(?P<method>[A-Z]+)\s+(?P<path>\S+)\s+HTTP/(?P<version>\d+\.\d)\s*$' )
1331+ _req_line_h2 = re .compile (r'^(?P<method>[A-Z]+)\s+(?P<path>\S+)\s+HTTP/(?P<version>2(?:\.0)?)\s*$' )
1332+ _status_line_v1 = re .compile (r'^HTTP/(?P<version>\d+\.\d)\s+(?P<code>\d{3})(?:\s+(?P<reason>.*))?$' )
1333+ _status_line_h2 = re .compile (r'^HTTP/(?P<version>2(?:\.0)?)\s+(?P<code>\d{3})(?:\s+(?P<reason>.*))?$' )
1334+
1335+ def _normalize (text ):
1336+ return text .replace ('\r \n ' , '\n ' ).replace ('\r ' , '\n ' )
1337+
1338+ def _split_header_block (block_text ):
1339+ block_text = _normalize (block_text )
1340+ lines = block_text .split ('\n ' )
1341+ while lines and lines [- 1 ] == '' :
1342+ lines .pop ()
1343+
1344+ # unfold obs-fold (space/tab continuation)
1345+ out = []
1346+ for line in lines :
1347+ if out and (line .startswith (' ' ) or line .startswith ('\t ' )):
1348+ out [- 1 ] += ' ' + line .lstrip ()
1349+ else :
1350+ out .append (line )
1351+ return out
1352+
1353+ def _parse_headers (lines ):
1354+ headers = {}
1355+ for line in lines :
1356+ if not line or ':' not in line :
1357+ continue
1358+ name , value = line .split (':' , 1 )
1359+ name = name .strip ()
1360+ value = value .strip ()
1361+ key = name .lower ()
1362+
1363+ if key in headers :
1364+ if isinstance (headers [key ], list ):
1365+ headers [key ].append (value )
1366+ else :
1367+ headers [key ] = [headers [key ], value ]
1368+ else :
1369+ headers [key ] = value
1370+ return headers
1371+
1372+ def parse_request_block (block_text ):
1373+ if not block_text :
1374+ return None
1375+ lines = _split_header_block (block_text )
1376+ if not lines :
1377+ return None
1378+
1379+ m = _req_line_http1 .match (lines [0 ]) or _req_line_h2 .match (lines [0 ])
1380+ if not m :
1381+ return None
1382+
1383+ return {
1384+ 'method' : m .group ('method' ),
1385+ 'path' : m .group ('path' ),
1386+ 'version' : m .group ('version' ),
1387+ 'headers' : _parse_headers (lines [1 :]),
1388+ }
1389+
1390+ def parse_response_block (block_text ):
1391+ if not block_text :
1392+ return None
1393+ lines = _split_header_block (block_text )
1394+ if not lines :
1395+ return None
1396+
1397+ m = _status_line_v1 .match (lines [0 ]) or _status_line_h2 .match (lines [0 ])
1398+ if not m :
1399+ return None
1400+
1401+ code = int (m .group ('code' ))
1402+ reason = (m .group ('reason' ) or '' ).strip ()
1403+ return {
1404+ 'version' : m .group ('version' ),
1405+ 'status_code' : code ,
1406+ 'reason' : reason ,
1407+ 'headers' : _parse_headers (lines [1 :]),
1408+ }
1409+
1410+ # ---------------- Extraction from verbose output ----------------
1411+
1412+ # HTTP/1.x request block: "GET / HTTP/1.1" ... blank line
1413+ _HTTP1_REQ_BLOCK = re .compile (
1414+ r'(?ms)^(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+\S+\s+HTTP/\d\.\d\s*\n'
1415+ r'(?:.*?\n)*?\n'
1416+ )
1417+
1418+ # HTTP/2 synthesized request block: "GET / HTTP/2" ... blank line
1419+ _HTTP2_SYN_REQ_BLOCK = re .compile (
1420+ r'(?ms)^(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+\S+\s+HTTP/2(?:\.0)?\s*\n'
1421+ r'(?:.*?\n)*?\n'
1422+ )
1423+
1424+ # HTTP/2 bracket pseudo-headers:
1425+ # [HTTP/2] [1] [:method: GET]
1426+ # [HTTP/2] [1] [:path: /]
1427+ # [HTTP/2] [1] [user-agent: ...]
1428+ _HTTP2_BRACKET_LINE = re .compile (
1429+ r'^\[HTTP/2\]\s*\[(?P<stream>\d+)\]\s*\[(?P<kv>.+?)\]\s*$'
1430+ )
1431+
1432+ def _extract_http2_bracket_request (text ):
1433+ """
1434+ Build a synthetic request block from the [HTTP/2] [stream] [key: value] lines.
1435+ Returns (block_text, used_stream) or (None, None).
1436+ """
1437+ t = _normalize (text )
1438+ lines = t .split ('\n ' )
1439+
1440+ # Collect per stream
1441+ per_stream = {}
1442+ order = [] # stream appearance order
1443+ for line in lines :
1444+ m = _HTTP2_BRACKET_LINE .match (line )
1445+ if not m :
1446+ continue
1447+ stream = m .group ('stream' )
1448+ kv = m .group ('kv' )
1449+ if stream not in per_stream :
1450+ per_stream [stream ] = []
1451+ order .append (stream )
1452+ per_stream [stream ].append (kv )
1453+
1454+ if not order :
1455+ return (None , None )
1456+
1457+ # pick first stream that has :method and :path
1458+ for stream in order :
1459+ kvs = per_stream [stream ]
1460+ pseudo = {}
1461+ normal = []
1462+ for kv in kvs :
1463+ # kv is like ":method: GET" or "user-agent: blah"
1464+ if ':' not in kv :
1465+ continue
1466+ name , value = kv .split (':' , 1 )
1467+ name = name .strip ()
1468+ value = value .strip ()
1469+
1470+ # special case: pseudo headers start with empty name because kv starts ":method..."
1471+ # our split gives name="" and value="method: GET" if we split at first ':'
1472+ if name == '' and value :
1473+ # now split "method: GET"
1474+ if ':' in value :
1475+ n2 , v2 = value .split (':' , 1 )
1476+ pseudo [':' + n2 .strip ()] = v2 .strip ()
1477+ continue
1478+
1479+ # regular "host: github.com"
1480+ normal .append ((name , value ))
1481+
1482+ # also handle case where pseudo lines came as "[:method: GET]" (already handled)
1483+ if ':method' in pseudo and ':path' in pseudo :
1484+ method = pseudo [':method' ]
1485+ path = pseudo [':path' ]
1486+ # prefer :authority for Host if present
1487+ authority = pseudo .get (':authority' )
1488+
1489+ block_lines = []
1490+ block_lines .append ('%s %s HTTP/2' % (method , path ))
1491+ if authority :
1492+ block_lines .append ('Host: %s' % authority )
1493+
1494+ # add other pseudo? scheme isn't a header line usually; skip it.
1495+ # add bracketed normal headers
1496+ for (name , value ) in normal :
1497+ block_lines .append ('%s: %s' % (name , value ))
1498+ block_lines .append ('' ) # blank line terminator
1499+
1500+ return ('\n ' .join (block_lines ), stream )
1501+
1502+ return (None , None )
1503+
1504+ # Response blocks
1505+ _HTTP1_RESP_BLOCK = re .compile (
1506+ r'(?ms)^HTTP/\d\.\d\s+\d{3}.*\n(?:.*?\n)*?\n'
1507+ )
1508+ _HTTP2_RESP_BLOCK = re .compile (
1509+ r'(?ms)^HTTP/2(?:\.0)?\s+\d{3}.*\n(?:.*?\n)*?\n'
1510+ )
1511+
1512+ def extract_request_and_response (debug_text ):
1513+ """
1514+ Returns (request_block_text, response_block_text).
1515+ Supports:
1516+ - HTTP/1 request blocks
1517+ - HTTP/2 synthesized request blocks
1518+ - HTTP/2 bracket pseudo-header sequences (converted to synthetic block)
1519+ - HTTP/1 and HTTP/2 response blocks
1520+ """
1521+ t = _normalize (debug_text )
1522+
1523+ # Try request in priority order:
1524+ # 1) HTTP/1 request block
1525+ m = _HTTP1_REQ_BLOCK .search (t )
1526+ if m :
1527+ req_block = m .group (0 )
1528+ else :
1529+ # 2) HTTP/2 synthesized request block
1530+ m2 = _HTTP2_SYN_REQ_BLOCK .search (t )
1531+ if m2 :
1532+ req_block = m2 .group (0 )
1533+ else :
1534+ # 3) HTTP/2 bracket pseudo headers -> synthesize
1535+ req_block , _stream = _extract_http2_bracket_request (t )
1536+
1537+ # Try response in priority order:
1538+ mr2 = _HTTP2_RESP_BLOCK .search (t )
1539+ mr1 = _HTTP1_RESP_BLOCK .search (t )
1540+ if mr2 and mr1 :
1541+ # choose whichever appears first
1542+ resp_block = mr2 .group (0 ) if mr2 .start () < mr1 .start () else mr1 .group (0 )
1543+ elif mr2 :
1544+ resp_block = mr2 .group (0 )
1545+ elif mr1 :
1546+ resp_block = mr1 .group (0 )
1547+ else :
1548+ resp_block = None
1549+
1550+ return req_block , resp_block
1551+
1552+ def parse_pycurl_verbose (fileobj_or_text ):
1553+ if hasattr (fileobj_or_text , 'read' ):
1554+ text = read_all (fileobj_or_text )
1555+ else :
1556+ if isinstance (fileobj_or_text , bytes ):
1557+ text = fileobj_or_text .decode ('utf-8' , 'replace' )
1558+ else :
1559+ text = fileobj_or_text
1560+
1561+ req_block , resp_block = extract_request_and_response (text )
1562+ return {
1563+ 'raw' : {'request' : req_block , 'response' : resp_block },
1564+ 'request' : parse_request_block (req_block ) if req_block else None ,
1565+ 'response' : parse_response_block (resp_block ) if resp_block else None ,
1566+ }
1567+
13201568def download_file_from_http_file (url , headers = None , usehttp = __use_http_lib__ , httpuseragent = None , httpreferer = None , httpcookie = geturls_cj , httpmethod = "GET" , returnstat = False ):
13211569 if headers is None :
13221570 headers = {}
@@ -1447,7 +1695,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
14471695 httpcodereason = http_status_to_reason (r .status )
14481696 httpversionout = "1.1"
14491697 httpmethodout = httpmethod
1450- httpurlout = str (httpurl )
1698+ httpurlout = str (rebuilt_url )
14511699 httpheaderout = r .headers
14521700 httpheadersentout = headers
14531701
@@ -1507,6 +1755,7 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
15071755 elif (usehttp == "pycurl" ):
15081756 retrieved_body = MkTempFile ()
15091757 retrieved_headers = MkTempFile ()
1758+ sentout_headers = MkTempFile ()
15101759 try :
15111760 if (httpmethod == "GET" ):
15121761 geturls_text = pycurl .Curl ()
@@ -1517,13 +1766,12 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
15171766 else :
15181767 usehttpver = geturls_text .CURL_HTTP_VERSION_1_1
15191768 geturls_text .setopt (geturls_text .URL , rebuilt_url )
1520- geturls_text .setopt (geturls_text .HTTP_VERSION ,
1521- geturls_text .CURL_HTTP_VERSION_1_1 )
1522- geturls_text .setopt (
1523- geturls_text .WRITEFUNCTION , retrieved_body .write )
1769+ geturls_text .setopt (geturls_text .HTTP_VERSION , usehttpver )
1770+ geturls_text .setopt (geturls_text .WRITEFUNCTION , retrieved_body .write )
15241771 geturls_text .setopt (geturls_text .HTTPHEADER , headers )
1525- geturls_text .setopt (
1526- geturls_text .HEADERFUNCTION , retrieved_headers .write )
1772+ geturls_text .setopt (geturls_text .HEADERFUNCTION , retrieved_headers .write )
1773+ geturls_text .setopt (pycurl .VERBOSE , 1 )
1774+ geturls_text .setopt (pycurl .DEBUGFUNCTION , lambda t , m : sentout_headers .write (m ))
15271775 geturls_text .setopt (geturls_text .FOLLOWLOCATION , True )
15281776 geturls_text .setopt (geturls_text .TIMEOUT , 60 )
15291777 geturls_text .perform ()
@@ -1536,13 +1784,12 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
15361784 else :
15371785 usehttpver = geturls_text .CURL_HTTP_VERSION_1_1
15381786 geturls_text .setopt (geturls_text .URL , rebuilt_url )
1539- geturls_text .setopt (geturls_text .HTTP_VERSION ,
1540- geturls_text .CURL_HTTP_VERSION_1_1 )
1541- geturls_text .setopt (
1542- geturls_text .WRITEFUNCTION , retrieved_body .write )
1787+ geturls_text .setopt (geturls_text .HTTP_VERSION , usehttpver )
1788+ geturls_text .setopt (geturls_text .WRITEFUNCTION , retrieved_body .write )
15431789 geturls_text .setopt (geturls_text .HTTPHEADER , headers )
1544- geturls_text .setopt (
1545- geturls_text .HEADERFUNCTION , retrieved_headers .write )
1790+ geturls_text .setopt (geturls_text .HEADERFUNCTION , retrieved_headers .write )
1791+ geturls_text .setopt (pycurl .VERBOSE , 1 )
1792+ geturls_text .setopt (pycurl .DEBUGFUNCTION , lambda t , m : sentout_headers .write (m ))
15461793 geturls_text .setopt (geturls_text .FOLLOWLOCATION , True )
15471794 geturls_text .setopt (geturls_text .TIMEOUT , 60 )
15481795 geturls_text .setopt (geturls_text .POST , True )
@@ -1557,25 +1804,25 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
15571804 else :
15581805 usehttpver = geturls_text .CURL_HTTP_VERSION_1_1
15591806 geturls_text .setopt (geturls_text .URL , rebuilt_url )
1560- geturls_text .setopt (geturls_text .HTTP_VERSION ,
1561- geturls_text .CURL_HTTP_VERSION_1_1 )
1562- geturls_text .setopt (
1563- geturls_text . WRITEFUNCTION , retrieved_body . write )
1807+ geturls_text .setopt (geturls_text .HTTP_VERSION , usehttpver )
1808+ geturls_text .setopt ( geturls_text . WRITEFUNCTION , retrieved_body . write )
1809+ geturls_text .setopt (pycurl . VERBOSE , 1 )
1810+ geturls_text . setopt ( pycurl . DEBUGFUNCTION , lambda t , m : sentout_headers . write ( m ) )
15641811 geturls_text .setopt (geturls_text .HTTPHEADER , headers )
1565- geturls_text .setopt (
1566- geturls_text .HEADERFUNCTION , retrieved_headers .write )
1812+ geturls_text .setopt (geturls_text .HEADERFUNCTION , retrieved_headers .write )
15671813 geturls_text .setopt (geturls_text .FOLLOWLOCATION , True )
15681814 geturls_text .setopt (geturls_text .TIMEOUT , 60 )
15691815 geturls_text .perform ()
15701816 retrieved_headers .seek (0 , 0 )
1817+ sentout_headers .seek (0 , 0 )
1818+ httpheadersentpre = parse_pycurl_verbose (sentout_headers )
1819+ sentout_headers .close ()
15711820 if (sys .version [0 ] == "2" ):
15721821 pycurlhead = retrieved_headers .read ()
15731822 if (sys .version [0 ] >= "3" ):
15741823 pycurlhead = retrieved_headers .read ().decode ('UTF-8' )
1575- pyhttpverinfo = re .findall (
1576- r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$' , pycurlhead .splitlines ()[0 ].strip ().rstrip ('\r \n ' ))[0 ]
1577- pycurlheadersout = make_http_headers_from_pycurl_to_dict (
1578- pycurlhead )
1824+ pyhttpverinfo = re .findall (r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$' , pycurlhead .splitlines ()[0 ].strip ().rstrip ('\r \n ' ))[0 ]
1825+ pycurlheadersout = make_http_headers_from_pycurl_to_dict (pycurlhead )
15791826 retrieved_body .seek (0 , 0 )
15801827 httpfile = retrieved_body
15811828 retrieved_headers .close ()
@@ -1586,13 +1833,15 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__, ht
15861833 except ValueError :
15871834 return False
15881835 httpcodeout = geturls_text .getinfo (geturls_text .HTTP_CODE )
1589- httpcodereason = http_status_to_reason (
1590- geturls_text .getinfo (geturls_text .HTTP_CODE ))
1836+ httpcodereason = http_status_to_reason (geturls_text .getinfo (geturls_text .HTTP_CODE ))
15911837 httpversionout = pyhttpverinfo [0 ]
15921838 httpmethodout = httpmethod
15931839 httpurlout = geturls_text .getinfo (geturls_text .EFFECTIVE_URL )
15941840 httpheaderout = pycurlheadersout
1595- httpheadersentout = headers
1841+ try :
1842+ httpheadersentout = httpheadersentpre ['request' ]['headers' ]
1843+ except TypeError :
1844+ httpheadersentout = headers
15961845
15971846 # urllib fallback
15981847 else :
0 commit comments