Skip to content

Commit cc22646

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pycatfile.py
1 parent afbfa5b commit cc22646

1 file changed

Lines changed: 196 additions & 0 deletions

File tree

pycatfile.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,24 @@ def _parse_net_url(url):
877877
)
878878
return parts, opts
879879

880+
def _rewrite_url_without_auth(url):
881+
u = urlparse(url)
882+
netloc = u.hostname or ''
883+
if u.port:
884+
netloc += ':' + str(u.port)
885+
rebuilt = urlunparse((u.scheme, netloc, u.path, u.params, u.query, u.fragment))
886+
# username/password may be percent-encoded in URL; unquote them
887+
usr = unquote(u.username) if u.username else ''
888+
pwd = unquote(u.password) if u.password else ''
889+
return rebuilt, usr, pwd
890+
891+
def _guess_filename(url, filename):
892+
if filename:
893+
return filename
894+
path = urlparse(url).path or ''
895+
base = os.path.basename(path)
896+
return base or 'OutFile.'+__file_format_extension__
897+
880898
def DetectTarBombCatFileArray(listarrayfiles,
881899
top_file_ratio_threshold=0.6,
882900
min_members_for_ratio=4,
@@ -9563,6 +9581,184 @@ def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__):
95639581
return httpfile
95649582

95659583

9584+
def upload_file_to_http_file(
9585+
fileobj,
9586+
url,
9587+
method="POST", # "POST" or "PUT"
9588+
headers=None,
9589+
form=None, # dict of extra form fields → triggers multipart/form-data
9590+
field_name="file", # form field name for the file content
9591+
filename=None, # defaults to basename of URL path
9592+
content_type="application/octet-stream",
9593+
usehttp=__use_http_lib__, # 'requests' | 'httpx' | 'mechanize' | anything → urllib fallback
9594+
):
9595+
"""
9596+
Py2+Py3 compatible HTTP/HTTPS upload.
9597+
9598+
- If `form` is provided (dict), uses multipart/form-data:
9599+
* text fields from `form`
9600+
* file part named by `field_name` with given `filename` and `content_type`
9601+
- If `form` is None, uploads raw body as POST/PUT with Content-Type.
9602+
- Returns True on HTTP 2xx, else False.
9603+
"""
9604+
if headers is None:
9605+
headers = {}
9606+
method = (method or "POST").upper()
9607+
9608+
rebuilt_url, username, password = _rewrite_url_without_auth(url)
9609+
filename = _guess_filename(url, filename)
9610+
9611+
# rewind if possible
9612+
try:
9613+
fileobj.seek(0)
9614+
except Exception:
9615+
pass
9616+
9617+
# ========== 1) requests (Py2+Py3) ==========
9618+
if usehttp == 'requests' and haverequests:
9619+
import requests
9620+
9621+
auth = (username, password) if (username or password) else None
9622+
9623+
if form is not None:
9624+
# multipart/form-data
9625+
files = {field_name: (filename, fileobj, content_type)}
9626+
data = form or {}
9627+
resp = requests.request(method, rebuilt_url, headers=headers, auth=auth,
9628+
files=files, data=data, timeout=(5, 120))
9629+
else:
9630+
# raw body
9631+
hdrs = {'Content-Type': content_type}
9632+
hdrs.update(headers)
9633+
# best-effort content-length (helps some servers)
9634+
if hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'):
9635+
try:
9636+
cur = fileobj.tell()
9637+
fileobj.seek(0, io.SEEK_END if hasattr(io, 'SEEK_END') else 2)
9638+
size = fileobj.tell() - cur
9639+
fileobj.seek(cur)
9640+
hdrs.setdefault('Content-Length', str(size))
9641+
except Exception:
9642+
pass
9643+
resp = requests.request(method, rebuilt_url, headers=hdrs, auth=auth,
9644+
data=fileobj, timeout=(5, 300))
9645+
9646+
return (200 <= resp.status_code < 300)
9647+
9648+
# ========== 2) httpx (Py3 only) ==========
9649+
if usehttp == 'httpx' and havehttpx and not PY2:
9650+
import httpx
9651+
auth = (username, password) if (username or password) else None
9652+
9653+
with httpx.Client(follow_redirects=True, timeout=60) as client:
9654+
if form is not None:
9655+
files = {field_name: (filename, fileobj, content_type)}
9656+
data = form or {}
9657+
resp = client.request(method, rebuilt_url, headers=headers, auth=auth,
9658+
files=files, data=data)
9659+
else:
9660+
hdrs = {'Content-Type': content_type}
9661+
hdrs.update(headers)
9662+
resp = client.request(method, rebuilt_url, headers=hdrs, auth=auth,
9663+
content=fileobj)
9664+
return (200 <= resp.status_code < 300)
9665+
9666+
# ========== 3) mechanize (forms) → prefer requests if available ==========
9667+
if usehttp == 'mechanize' and havemechanize:
9668+
# mechanize is great for HTML forms, but file upload requires form discovery.
9669+
# For a generic upload helper, prefer requests. If not available, fall through.
9670+
try:
9671+
import requests # noqa
9672+
# delegate to requests path to ensure robust multipart handling
9673+
return upload_file_to_http_file(
9674+
fileobj, url, method=method, headers=headers,
9675+
form=(form or {}), field_name=field_name,
9676+
filename=filename, content_type=content_type,
9677+
usehttp='requests'
9678+
)
9679+
except Exception:
9680+
pass # fall through to urllib
9681+
9682+
# ========== 4) urllib fallback (Py2+Py3) ==========
9683+
# multipart builder (no f-strings)
9684+
boundary = ('----pyuploader-%s' % uuid.uuid4().hex)
9685+
9686+
if form is not None:
9687+
# Build multipart body to a temp file-like (your MkTempFile())
9688+
buf = MkTempFile()
9689+
9690+
def _w(s):
9691+
buf.write(_to_bytes(s))
9692+
9693+
# text fields
9694+
if form:
9695+
for k, v in form.items():
9696+
_w('--' + boundary + '\r\n')
9697+
_w('Content-Disposition: form-data; name="%s"\r\n\r\n' % k)
9698+
_w('' if v is None else (v if isinstance(v, (str, bytes)) else str(v)))
9699+
_w('\r\n')
9700+
9701+
# file field
9702+
_w('--' + boundary + '\r\n')
9703+
_w('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (field_name, filename))
9704+
_w('Content-Type: %s\r\n\r\n' % content_type)
9705+
9706+
try:
9707+
fileobj.seek(0)
9708+
except Exception:
9709+
pass
9710+
shutil.copyfileobj(fileobj, buf)
9711+
9712+
_w('\r\n')
9713+
_w('--' + boundary + '--\r\n')
9714+
9715+
buf.seek(0)
9716+
data = buf.read()
9717+
hdrs = {'Content-Type': 'multipart/form-data; boundary=%s' % boundary}
9718+
hdrs.update(headers)
9719+
req = Request(rebuilt_url, data=data)
9720+
# method override for Py3; Py2 Request ignores 'method' kw
9721+
if not PY2:
9722+
req.method = method # type: ignore[attr-defined]
9723+
else:
9724+
# raw body
9725+
try:
9726+
fileobj.seek(0)
9727+
except Exception:
9728+
pass
9729+
data = fileobj.read()
9730+
hdrs = {'Content-Type': content_type}
9731+
hdrs.update(headers)
9732+
req = Request(rebuilt_url, data=data)
9733+
if not PY2:
9734+
req.method = method # type: ignore[attr-defined]
9735+
9736+
for k, v in hdrs.items():
9737+
req.add_header(k, v)
9738+
9739+
# Basic auth if present
9740+
if username or password:
9741+
pwd_mgr = HTTPPasswordMgrWithDefaultRealm()
9742+
pwd_mgr.add_password(None, rebuilt_url, username, password)
9743+
opener = build_opener(HTTPBasicAuthHandler(pwd_mgr))
9744+
else:
9745+
opener = build_opener()
9746+
9747+
# Py2 OpenerDirector.open takes timeout since 2.6; to be safe, avoid passing if it explodes
9748+
try:
9749+
resp = opener.open(req, timeout=60)
9750+
except TypeError:
9751+
resp = opener.open(req)
9752+
9753+
# Status code compat
9754+
code = getattr(resp, 'status', None) or getattr(resp, 'code', None) or 0
9755+
try:
9756+
resp.close()
9757+
except Exception:
9758+
pass
9759+
return (200 <= int(code) < 300)
9760+
9761+
95669762
def download_file_from_http_string(url, headers=geturls_headers_pyfile_python_alt, usehttp=__use_http_lib__):
95679763
httpfile = download_file_from_http_file(url, headers, usehttp)
95689764
httpout = httpfile.read()

0 commit comments

Comments
 (0)