Skip to content

Commit 3301103

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pyarchivefile.py
1 parent cba078d commit 3301103

1 file changed

Lines changed: 195 additions & 0 deletions

File tree

pyarchivefile.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,23 @@ def _parse_net_url(url):
864864
)
865865
return parts, opts
866866

867+
def _rewrite_url_without_auth(url):
868+
u = urlparse(url)
869+
netloc = u.hostname or ''
870+
if u.port:
871+
netloc += ':' + str(u.port)
872+
rebuilt = urlunparse((u.scheme, netloc, u.path, u.params, u.query, u.fragment))
873+
# username/password may be percent-encoded in URL; unquote them
874+
usr = unquote(u.username) if u.username else ''
875+
pwd = unquote(u.password) if u.password else ''
876+
return rebuilt, usr, pwd
877+
878+
def _guess_filename(url, filename):
879+
if filename:
880+
return filename
881+
path = urlparse(url).path or ''
882+
base = os.path.basename(path)
883+
return base or 'OutFile.'+__file_format_extension__
867884

868885
def DetectTarBombArchiveFileArray(listarrayfiles,
869886
top_file_ratio_threshold=0.6,
@@ -9554,6 +9571,184 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__):
95549571
return httpfile
95559572

95569573

9574+
def upload_file_to_http_file(
9575+
fileobj,
9576+
url,
9577+
method="POST", # "POST" or "PUT"
9578+
headers=None,
9579+
form=None, # dict of extra form fields → triggers multipart/form-data
9580+
field_name="file", # form field name for the file content
9581+
filename=None, # defaults to basename of URL path
9582+
content_type="application/octet-stream",
9583+
usehttp=__use_http_lib__, # 'requests' | 'httpx' | 'mechanize' | anything → urllib fallback
9584+
):
9585+
"""
9586+
Py2+Py3 compatible HTTP/HTTPS upload.
9587+
9588+
- If `form` is provided (dict), uses multipart/form-data:
9589+
* text fields from `form`
9590+
* file part named by `field_name` with given `filename` and `content_type`
9591+
- If `form` is None, uploads raw body as POST/PUT with Content-Type.
9592+
- Returns True on HTTP 2xx, else False.
9593+
"""
9594+
if headers is None:
9595+
headers = {}
9596+
method = (method or "POST").upper()
9597+
9598+
rebuilt_url, username, password = _rewrite_url_without_auth(url)
9599+
filename = _guess_filename(url, filename)
9600+
9601+
# rewind if possible
9602+
try:
9603+
fileobj.seek(0)
9604+
except Exception:
9605+
pass
9606+
9607+
# ========== 1) requests (Py2+Py3) ==========
9608+
if usehttp == 'requests' and haverequests:
9609+
import requests
9610+
9611+
auth = (username, password) if (username or password) else None
9612+
9613+
if form is not None:
9614+
# multipart/form-data
9615+
files = {field_name: (filename, fileobj, content_type)}
9616+
data = form or {}
9617+
resp = requests.request(method, rebuilt_url, headers=headers, auth=auth,
9618+
files=files, data=data, timeout=(5, 120))
9619+
else:
9620+
# raw body
9621+
hdrs = {'Content-Type': content_type}
9622+
hdrs.update(headers)
9623+
# best-effort content-length (helps some servers)
9624+
if hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
9625+
try:
9626+
cur = fileobj.tell()
9627+
fileobj.seek(0, io.SEEK_END if hasattr(io, 'SEEK_END') else 2)
9628+
size = fileobj.tell() - cur
9629+
fileobj.seek(cur)
9630+
hdrs.setdefault('Content-Length', str(size))
9631+
except Exception:
9632+
pass
9633+
resp = requests.request(method, rebuilt_url, headers=hdrs, auth=auth,
9634+
data=fileobj, timeout=(5, 300))
9635+
9636+
return (200 <= resp.status_code < 300)
9637+
9638+
# ========== 2) httpx (Py3 only) ==========
9639+
if usehttp == 'httpx' and havehttpx and not PY2:
9640+
import httpx
9641+
auth = (username, password) if (username or password) else None
9642+
9643+
with httpx.Client(follow_redirects=True, timeout=60) as client:
9644+
if form is not None:
9645+
files = {field_name: (filename, fileobj, content_type)}
9646+
data = form or {}
9647+
resp = client.request(method, rebuilt_url, headers=headers, auth=auth,
9648+
files=files, data=data)
9649+
else:
9650+
hdrs = {'Content-Type': content_type}
9651+
hdrs.update(headers)
9652+
resp = client.request(method, rebuilt_url, headers=hdrs, auth=auth,
9653+
content=fileobj)
9654+
return (200 <= resp.status_code < 300)
9655+
9656+
# ========== 3) mechanize (forms) → prefer requests if available ==========
9657+
if usehttp == 'mechanize' and havemechanize:
9658+
# mechanize is great for HTML forms, but file upload requires form discovery.
9659+
# For a generic upload helper, prefer requests. If not available, fall through.
9660+
try:
9661+
import requests # noqa
9662+
# delegate to requests path to ensure robust multipart handling
9663+
return upload_file_to_http_file(
9664+
fileobj, url, method=method, headers=headers,
9665+
form=(form or {}), field_name=field_name,
9666+
filename=filename, content_type=content_type,
9667+
usehttp='requests'
9668+
)
9669+
except Exception:
9670+
pass # fall through to urllib
9671+
9672+
# ========== 4) urllib fallback (Py2+Py3) ==========
9673+
# multipart builder (no f-strings)
9674+
boundary = ('----pyuploader-%s' % uuid.uuid4().hex)
9675+
9676+
if form is not None:
9677+
# Build multipart body to a temp file-like (your MkTempFile())
9678+
buf = MkTempFile()
9679+
9680+
def _w(s):
9681+
buf.write(_to_bytes(s))
9682+
9683+
# text fields
9684+
if form:
9685+
for k, v in form.items():
9686+
_w('--' + boundary + '\r\n')
9687+
_w('Content-Disposition: form-data; name="%s"\r\n\r\n' % k)
9688+
_w('' if v is None else (v if isinstance(v, (str, bytes)) else str(v)))
9689+
_w('\r\n')
9690+
9691+
# file field
9692+
_w('--' + boundary + '\r\n')
9693+
_w('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (field_name, filename))
9694+
_w('Content-Type: %s\r\n\r\n' % content_type)
9695+
9696+
try:
9697+
fileobj.seek(0)
9698+
except Exception:
9699+
pass
9700+
shutil.copyfileobj(fileobj, buf)
9701+
9702+
_w('\r\n')
9703+
_w('--' + boundary + '--\r\n')
9704+
9705+
buf.seek(0)
9706+
data = buf.read()
9707+
hdrs = {'Content-Type': 'multipart/form-data; boundary=%s' % boundary}
9708+
hdrs.update(headers)
9709+
req = Request(rebuilt_url, data=data)
9710+
# method override for Py3; Py2 Request ignores 'method' kw
9711+
if not PY2:
9712+
req.method = method # type: ignore[attr-defined]
9713+
else:
9714+
# raw body
9715+
try:
9716+
fileobj.seek(0)
9717+
except Exception:
9718+
pass
9719+
data = fileobj.read()
9720+
hdrs = {'Content-Type': content_type}
9721+
hdrs.update(headers)
9722+
req = Request(rebuilt_url, data=data)
9723+
if not PY2:
9724+
req.method = method # type: ignore[attr-defined]
9725+
9726+
for k, v in hdrs.items():
9727+
req.add_header(k, v)
9728+
9729+
# Basic auth if present
9730+
if username or password:
9731+
pwd_mgr = HTTPPasswordMgrWithDefaultRealm()
9732+
pwd_mgr.add_password(None, rebuilt_url, username, password)
9733+
opener = build_opener(HTTPBasicAuthHandler(pwd_mgr))
9734+
else:
9735+
opener = build_opener()
9736+
9737+
# Py2 OpenerDirector.open takes timeout since 2.6; to be safe, avoid passing if it explodes
9738+
try:
9739+
resp = opener.open(req, timeout=60)
9740+
except TypeError:
9741+
resp = opener.open(req)
9742+
9743+
# Status code compat
9744+
code = getattr(resp, 'status', None) or getattr(resp, 'code', None) or 0
9745+
try:
9746+
resp.close()
9747+
except Exception:
9748+
pass
9749+
return (200 <= int(code) < 300)
9750+
9751+
95579752
def download_file_from_http_string(url, headers=geturls_headers_pyfile_python_alt, usehttp=__use_http_lib__):
95589753
httpfile = download_file_from_http_file(url, headers, usehttp)
95599754
httpout = httpfile.read()

0 commit comments

Comments
 (0)