Skip to content

Commit b3befe7

Browse files
authored
Add files via upload
1 parent 1791e94 commit b3befe7

4 files changed

Lines changed: 60 additions & 84 deletions

File tree

pywwwget_chatgpt.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -300,19 +300,18 @@ def data_url_encode(fileobj,
300300
_DATA_URL_RE = re.compile(r'^data:(?P<meta>[^,]*?),(?P<data>.*)$', re.DOTALL)
301301

302302

303-
def data_url_decode(data_url):
304-
"""
305-
Parse a data: URL and return (bytes_io, mime, is_base64).
303+
def _normalize_b64(s):
304+
# Remove whitespace and newlines
305+
s = ''.join(s.split())
306+
# Normalize URL-safe base64 just in case
307+
s = s.replace('-', '+').replace('_', '/')
308+
# Fix missing padding
309+
s = s + '=' * (-len(s) % 4)
310+
return s
306311

307-
Returns:
308-
(MkTempFile(data_bytes), mime_string_or_None, is_base64_bool)
309312

310-
Notes:
311-
- If no MIME is provided in the URL, mime will be None (per RFC 2397 default is text/plain;charset=US-ASCII).
312-
- This function does not attempt charset transcoding; it returns raw bytes.
313-
"""
313+
def data_url_decode(data_url):
314314
if not isinstance(data_url, text_type):
315-
# Accept bytes input too
316315
try:
317316
data_url = data_url.decode('utf-8')
318317
except Exception:
@@ -330,7 +329,6 @@ def data_url_decode(data_url):
330329
mime = None
331330

332331
if meta_parts:
333-
# First part may be mime if it contains '/' or looks like type/subtype
334332
if '/' in meta_parts[0]:
335333
mime = meta_parts[0]
336334
rest = meta_parts[1:]
@@ -341,25 +339,21 @@ def data_url_decode(data_url):
341339
if p.lower() == 'base64':
342340
is_base64 = True
343341
else:
344-
# keep parameters on mime if present (e.g. charset)
345342
if mime is None:
346343
mime = p
347344
else:
348345
mime = mime + ';' + p
349346

350347
if is_base64:
351-
# data_part is base64 ascii text
352348
try:
353-
decoded_bytes = base64.b64decode(data_part.encode('ascii'))
354-
except Exception:
355-
# some inputs may include whitespace/newlines
356-
cleaned = ''.join(data_part.split())
357-
decoded_bytes = base64.b64decode(cleaned.encode('ascii'))
349+
cleaned = _normalize_b64(data_part)
350+
decoded_bytes = base64.b64decode(cleaned.encode('ascii'), validate=False)
351+
except (binascii.Error, ValueError) as e:
352+
raise ValueError(
353+
"Invalid base64 data URL payload: {0}".format(e)
354+
)
358355
else:
359-
# Percent-decoding; must operate on str, returns bytes in both py2/py3 wrapper
360356
decoded_bytes = unquote_to_bytes(data_part)
361-
362-
# Py3 wrapper returns bytes; Py2 returns "str" bytes already.
363357
if isinstance(decoded_bytes, text_type):
364358
decoded_bytes = decoded_bytes.encode('latin-1')
365359

pywwwget_deepseek.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -270,19 +270,18 @@ def data_url_encode(fileobj,
270270
_DATA_URL_RE = re.compile(r'^data:(?P<meta>[^,]*?),(?P<data>.*)$', re.DOTALL)
271271

272272

273-
def data_url_decode(data_url):
274-
"""
275-
Parse a data: URL and return (bytes_io, mime, is_base64).
273+
def _normalize_b64(s):
274+
# Remove whitespace and newlines
275+
s = ''.join(s.split())
276+
# Normalize URL-safe base64 just in case
277+
s = s.replace('-', '+').replace('_', '/')
278+
# Fix missing padding
279+
s = s + '=' * (-len(s) % 4)
280+
return s
276281

277-
Returns:
278-
(MkTempFile(data_bytes), mime_string_or_None, is_base64_bool)
279282

280-
Notes:
281-
- If no MIME is provided in the URL, mime will be None (per RFC 2397 default is text/plain;charset=US-ASCII).
282-
- This function does not attempt charset transcoding; it returns raw bytes.
283-
"""
283+
def data_url_decode(data_url):
284284
if not isinstance(data_url, text_type):
285-
# Accept bytes input too
286285
try:
287286
data_url = data_url.decode('utf-8')
288287
except Exception:
@@ -300,7 +299,6 @@ def data_url_decode(data_url):
300299
mime = None
301300

302301
if meta_parts:
303-
# First part may be mime if it contains '/' or looks like type/subtype
304302
if '/' in meta_parts[0]:
305303
mime = meta_parts[0]
306304
rest = meta_parts[1:]
@@ -311,25 +309,21 @@ def data_url_decode(data_url):
311309
if p.lower() == 'base64':
312310
is_base64 = True
313311
else:
314-
# keep parameters on mime if present (e.g. charset)
315312
if mime is None:
316313
mime = p
317314
else:
318315
mime = mime + ';' + p
319316

320317
if is_base64:
321-
# data_part is base64 ascii text
322318
try:
323-
decoded_bytes = base64.b64decode(data_part.encode('ascii'))
324-
except Exception:
325-
# some inputs may include whitespace/newlines
326-
cleaned = ''.join(data_part.split())
327-
decoded_bytes = base64.b64decode(cleaned.encode('ascii'))
319+
cleaned = _normalize_b64(data_part)
320+
decoded_bytes = base64.b64decode(cleaned.encode('ascii'), validate=False)
321+
except (binascii.Error, ValueError) as e:
322+
raise ValueError(
323+
"Invalid base64 data URL payload: {0}".format(e)
324+
)
328325
else:
329-
# Percent-decoding; must operate on str, returns bytes in both py2/py3 wrapper
330326
decoded_bytes = unquote_to_bytes(data_part)
331-
332-
# Py3 wrapper returns bytes; Py2 returns "str" bytes already.
333327
if isinstance(decoded_bytes, text_type):
334328
decoded_bytes = decoded_bytes.encode('latin-1')
335329

pywwwget_merged.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -304,19 +304,18 @@ def data_url_encode(fileobj,
304304
_DATA_URL_RE = re.compile(r'^data:(?P<meta>[^,]*?),(?P<data>.*)$', re.DOTALL)
305305

306306

307-
def data_url_decode(data_url):
308-
"""
309-
Parse a data: URL and return (bytes_io, mime, is_base64).
307+
def _normalize_b64(s):
308+
# Remove whitespace and newlines
309+
s = ''.join(s.split())
310+
# Normalize URL-safe base64 just in case
311+
s = s.replace('-', '+').replace('_', '/')
312+
# Fix missing padding
313+
s = s + '=' * (-len(s) % 4)
314+
return s
310315

311-
Returns:
312-
(MkTempFile(data_bytes), mime_string_or_None, is_base64_bool)
313316

314-
Notes:
315-
- If no MIME is provided in the URL, mime will be None (per RFC 2397 default is text/plain;charset=US-ASCII).
316-
- This function does not attempt charset transcoding; it returns raw bytes.
317-
"""
317+
def data_url_decode(data_url):
318318
if not isinstance(data_url, text_type):
319-
# Accept bytes input too
320319
try:
321320
data_url = data_url.decode('utf-8')
322321
except Exception:
@@ -334,7 +333,6 @@ def data_url_decode(data_url):
334333
mime = None
335334

336335
if meta_parts:
337-
# First part may be mime if it contains '/' or looks like type/subtype
338336
if '/' in meta_parts[0]:
339337
mime = meta_parts[0]
340338
rest = meta_parts[1:]
@@ -345,25 +343,21 @@ def data_url_decode(data_url):
345343
if p.lower() == 'base64':
346344
is_base64 = True
347345
else:
348-
# keep parameters on mime if present (e.g. charset)
349346
if mime is None:
350347
mime = p
351348
else:
352349
mime = mime + ';' + p
353350

354351
if is_base64:
355-
# data_part is base64 ascii text
356352
try:
357-
decoded_bytes = base64.b64decode(data_part.encode('ascii'))
358-
except Exception:
359-
# some inputs may include whitespace/newlines
360-
cleaned = ''.join(data_part.split())
361-
decoded_bytes = base64.b64decode(cleaned.encode('ascii'))
353+
cleaned = _normalize_b64(data_part)
354+
decoded_bytes = base64.b64decode(cleaned.encode('ascii'), validate=False)
355+
except (binascii.Error, ValueError) as e:
356+
raise ValueError(
357+
"Invalid base64 data URL payload: {0}".format(e)
358+
)
362359
else:
363-
# Percent-decoding; must operate on str, returns bytes in both py2/py3 wrapper
364360
decoded_bytes = unquote_to_bytes(data_part)
365-
366-
# Py3 wrapper returns bytes; Py2 returns "str" bytes already.
367361
if isinstance(decoded_bytes, text_type):
368362
decoded_bytes = decoded_bytes.encode('latin-1')
369363

pywwwget_nextver.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -304,19 +304,18 @@ def data_url_encode(fileobj,
304304
_DATA_URL_RE = re.compile(r'^data:(?P<meta>[^,]*?),(?P<data>.*)$', re.DOTALL)
305305

306306

307-
def data_url_decode(data_url):
308-
"""
309-
Parse a data: URL and return (bytes_io, mime, is_base64).
307+
def _normalize_b64(s):
308+
# Remove whitespace and newlines
309+
s = ''.join(s.split())
310+
# Normalize URL-safe base64 just in case
311+
s = s.replace('-', '+').replace('_', '/')
312+
# Fix missing padding
313+
s = s + '=' * (-len(s) % 4)
314+
return s
310315

311-
Returns:
312-
(MkTempFile(data_bytes), mime_string_or_None, is_base64_bool)
313316

314-
Notes:
315-
- If no MIME is provided in the URL, mime will be None (per RFC 2397 default is text/plain;charset=US-ASCII).
316-
- This function does not attempt charset transcoding; it returns raw bytes.
317-
"""
317+
def data_url_decode(data_url):
318318
if not isinstance(data_url, text_type):
319-
# Accept bytes input too
320319
try:
321320
data_url = data_url.decode('utf-8')
322321
except Exception:
@@ -334,7 +333,6 @@ def data_url_decode(data_url):
334333
mime = None
335334

336335
if meta_parts:
337-
# First part may be mime if it contains '/' or looks like type/subtype
338336
if '/' in meta_parts[0]:
339337
mime = meta_parts[0]
340338
rest = meta_parts[1:]
@@ -345,25 +343,21 @@ def data_url_decode(data_url):
345343
if p.lower() == 'base64':
346344
is_base64 = True
347345
else:
348-
# keep parameters on mime if present (e.g. charset)
349346
if mime is None:
350347
mime = p
351348
else:
352349
mime = mime + ';' + p
353350

354351
if is_base64:
355-
# data_part is base64 ascii text
356352
try:
357-
decoded_bytes = base64.b64decode(data_part.encode('ascii'))
358-
except Exception:
359-
# some inputs may include whitespace/newlines
360-
cleaned = ''.join(data_part.split())
361-
decoded_bytes = base64.b64decode(cleaned.encode('ascii'))
353+
cleaned = _normalize_b64(data_part)
354+
decoded_bytes = base64.b64decode(cleaned.encode('ascii'), validate=False)
355+
except (binascii.Error, ValueError) as e:
356+
raise ValueError(
357+
"Invalid base64 data URL payload: {0}".format(e)
358+
)
362359
else:
363-
# Percent-decoding; must operate on str, returns bytes in both py2/py3 wrapper
364360
decoded_bytes = unquote_to_bytes(data_part)
365-
366-
# Py3 wrapper returns bytes; Py2 returns "str" bytes already.
367361
if isinstance(decoded_bytes, text_type):
368362
decoded_bytes = decoded_bytes.encode('latin-1')
369363

0 commit comments

Comments
 (0)