Skip to content

Commit 5679d02

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pyfoxfile.py
1 parent 7674cf0 commit 5679d02

1 file changed

Lines changed: 59 additions & 29 deletions

File tree

pyfoxfile.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,19 @@
9191
from urlparse import urlparse, urlunparse
9292

9393
# Windows-specific setup
94-
if os.name == 'nt':
95-
if sys.version_info[0] == 2:
96-
import codecs
97-
sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
98-
sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
99-
else:
100-
sys.stdout = io.TextIOWrapper(
101-
sys.stdout.buffer, encoding='UTF-8', errors='replace', line_buffering=True)
102-
sys.stderr = io.TextIOWrapper(
103-
sys.stderr.buffer, encoding='UTF-8', errors='replace', line_buffering=True)
94+
if os.name == "nt":
95+
import io
96+
def _wrap(stream):
97+
buf = getattr(stream, "buffer", None)
98+
is_tty = getattr(stream, "isatty", lambda: False)()
99+
if buf is not None and is_tty:
100+
try:
101+
return io.TextIOWrapper(buf, encoding="UTF-8", errors="replace", line_buffering=True)
102+
except Exception:
103+
return stream
104+
return stream
105+
sys.stdout = _wrap(sys.stdout)
106+
sys.stderr = _wrap(sys.stderr)
104107

105108
hashlib_guaranteed = False
106109
# Environment setup
@@ -286,7 +289,28 @@ def get_default_threads():
286289
# Define a function to check if var contains only non-printable chars
287290
all_np_chars = [chr(i) for i in range(128)]
288291
def is_only_nonprintable(var):
289-
return all(not c.isprintable() for c in var)
292+
"""True if every character is non-printable (Py2/3-safe, handles bytes)."""
293+
if var is None:
294+
return True
295+
s = to_text(var)
296+
# In Py2, some unicode categories behave differently; isprintable is Py3-only.
297+
# We'll implement a portable check: letters, numbers, punctuation, and common whitespace are printable.
298+
try:
299+
# Py3 fast path
300+
return all(not ch.isprintable() for ch in s)
301+
except AttributeError:
302+
# Py2 path
303+
import unicodedata
304+
def _is_printable(ch):
305+
cat = unicodedata.category(ch)
306+
# Categories starting with 'C' are control/non-assigned/surrogates
307+
if cat.startswith('C'):
308+
return False
309+
# treat space and common whitespace as printable
310+
if ch in u"\t\n\r\x0b\x0c ":
311+
return True
312+
return True
313+
return all(not _is_printable(ch) for ch in s)
290314
__file_format_multi_dict__ = {}
291315
__file_format_default__ = "FoxFile"
292316
__include_defaults__ = True
@@ -360,13 +384,15 @@ def decode_unicode_escape(value):
360384
if(__version_info__[3] is None):
361385
__version__ = str(__version_info__[0]) + "." + str(__version_info__[1]) + "." + str(__version_info__[2])
362386

363-
PyBitness = platform.architecture()
364-
if(PyBitness == "32bit" or PyBitness == "32"):
365-
PyBitness = "32"
366-
elif(PyBitness == "64bit" or PyBitness == "64"):
367-
PyBitness = "64"
368-
else:
369-
PyBitness = "32"
387+
# Robust bitness detection
388+
# Works on Py2 & Py3, all platforms
389+
try:
390+
import struct
391+
PyBitness = "64" if struct.calcsize("P") * 8 == 64 else "32"
392+
except Exception:
393+
# conservative fallback
394+
m = platform.machine() or ""
395+
PyBitness = "64" if m.endswith("64") else "32"
370396

371397
geturls_ua_pyfile_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(
372398
proname=__project__, prover=__version__, prourl=__project_url__)
@@ -379,9 +405,9 @@ def decode_unicode_escape(value):
379405
geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
380406
geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)"
381407
geturls_headers_pyfile_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pyfile_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close",
382-
'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)}
408+
'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM-VERSION': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)}
383409
geturls_headers_pyfile_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pyfile_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close",
384-
'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)}
410+
'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM-VERSION': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)}
385411
geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6",
386412
'Accept-Charset': "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}
387413
geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6",
@@ -538,16 +564,20 @@ def VerbosePrintOutReturn(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
538564

539565
def RemoveWindowsPath(dpath):
540566
"""
541-
Normalizes a path by converting Windows-style separators to Unix-style and stripping trailing slashes.
567+
Normalize a path by converting backslashes to forward slashes
568+
and stripping a trailing slash.
542569
"""
543-
if dpath is None:
544-
dpath = ""
545-
if os.sep != "/":
546-
dpath = dpath.replace(os.path.sep, "/")
547-
dpath = dpath.rstrip("/")
548-
if dpath in [".", ".."]:
549-
dpath = dpath + "/"
550-
return dpath
570+
if not dpath:
571+
return ""
572+
# Accept bytes and decode safely
573+
if isinstance(dpath, (bytes, bytearray)):
574+
dpath = dpath.decode("utf-8", "ignore")
575+
dpath = dpath.replace("\\", "/")
576+
# Collapse multiple slashes except for protocol prefixes like "s3://"
577+
if "://" not in dpath:
578+
while "//" in dpath:
579+
dpath = dpath.replace("//", "/")
580+
return dpath.rstrip("/")
551581

552582

553583
def NormalizeRelativePath(inpath):

0 commit comments

Comments
 (0)