9191 from urlparse import urlparse , urlunparse
9292
9393# Windows-specific setup
94- if os .name == 'nt' :
95- if sys .version_info [0 ] == 2 :
96- import codecs
97- sys .stdout = codecs .getwriter ('UTF-8' )(sys .stdout )
98- sys .stderr = codecs .getwriter ('UTF-8' )(sys .stderr )
99- else :
100- sys .stdout = io .TextIOWrapper (
101- sys .stdout .buffer , encoding = 'UTF-8' , errors = 'replace' , line_buffering = True )
102- sys .stderr = io .TextIOWrapper (
103- sys .stderr .buffer , encoding = 'UTF-8' , errors = 'replace' , line_buffering = True )
94+ if os .name == "nt" :
95+ import io
96+ def _wrap (stream ):
97+ buf = getattr (stream , "buffer" , None )
98+ is_tty = getattr (stream , "isatty" , lambda : False )()
99+ if buf is not None and is_tty :
100+ try :
101+ return io .TextIOWrapper (buf , encoding = "UTF-8" , errors = "replace" , line_buffering = True )
102+ except Exception :
103+ return stream
104+ return stream
105+ sys .stdout = _wrap (sys .stdout )
106+ sys .stderr = _wrap (sys .stderr )
104107
105108hashlib_guaranteed = False
106109# Environment setup
@@ -286,7 +289,28 @@ def get_default_threads():
286289# Define a function to check if var contains only non-printable chars
287290all_np_chars = [chr (i ) for i in range (128 )]
288291def is_only_nonprintable (var ):
289- return all (not c .isprintable () for c in var )
292+ """True if every character is non-printable (Py2/3-safe, handles bytes)."""
293+ if var is None :
294+ return True
295+ s = to_text (var )
296+ # In Py2, some unicode categories behave differently; isprintable is Py3-only.
297+ # We'll implement a portable check: letters, numbers, punctuation, and common whitespace are printable.
298+ try :
299+ # Py3 fast path
300+ return all (not ch .isprintable () for ch in s )
301+ except AttributeError :
302+ # Py2 path
303+ import unicodedata
304+ def _is_printable (ch ):
305+ cat = unicodedata .category (ch )
306+ # Categories starting with 'C' are control/non-assigned/surrogates
307+ if cat .startswith ('C' ):
308+ return False
309+ # treat space and common whitespace as printable
310+ if ch in u"\t \n \r \x0b \x0c " :
311+ return True
312+ return True
313+ return all (not _is_printable (ch ) for ch in s )
290314__file_format_multi_dict__ = {}
291315__file_format_default__ = "FoxFile"
292316__include_defaults__ = True
@@ -360,13 +384,15 @@ def decode_unicode_escape(value):
360384if (__version_info__ [3 ] is None ):
361385 __version__ = str (__version_info__ [0 ]) + "." + str (__version_info__ [1 ]) + "." + str (__version_info__ [2 ])
362386
363- PyBitness = platform .architecture ()
364- if (PyBitness == "32bit" or PyBitness == "32" ):
365- PyBitness = "32"
366- elif (PyBitness == "64bit" or PyBitness == "64" ):
367- PyBitness = "64"
368- else :
369- PyBitness = "32"
387+ # Robust bitness detection
388+ # Works on Py2 & Py3, all platforms
389+ try :
390+ import struct
391+ PyBitness = "64" if struct .calcsize ("P" ) * 8 == 64 else "32"
392+ except Exception :
393+ # conservative fallback
394+ m = platform .machine () or ""
395+ PyBitness = "64" if m .endswith ("64" ) else "32"
370396
371397geturls_ua_pyfile_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})" .format (
372398 proname = __project__ , prover = __version__ , prourl = __project_url__ )
@@ -379,9 +405,9 @@ def decode_unicode_escape(value):
379405geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
380406geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)"
381407geturls_headers_pyfile_python = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_pyfile_python , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" , 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" ,
382- 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
408+ 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM-VERSION ' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
383409geturls_headers_pyfile_python_alt = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_pyfile_python_alt , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" , 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" ,
384- 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
410+ 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM-VERSION ' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
385411geturls_headers_googlebot_google = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_googlebot_google , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" ,
386412 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" }
387413geturls_headers_googlebot_google_old = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_googlebot_google_old , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" ,
@@ -538,16 +564,20 @@ def VerbosePrintOutReturn(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
538564
539565def RemoveWindowsPath (dpath ):
540566 """
541- Normalizes a path by converting Windows-style separators to Unix-style and stripping trailing slashes.
567+ Normalize a path by converting backslashes to forward slashes
568+ and stripping a trailing slash.
542569 """
543- if dpath is None :
544- dpath = ""
545- if os .sep != "/" :
546- dpath = dpath .replace (os .path .sep , "/" )
547- dpath = dpath .rstrip ("/" )
548- if dpath in ["." , ".." ]:
549- dpath = dpath + "/"
550- return dpath
570+ if not dpath :
571+ return ""
572+ # Accept bytes and decode safely
573+ if isinstance (dpath , (bytes , bytearray )):
574+ dpath = dpath .decode ("utf-8" , "ignore" )
575+ dpath = dpath .replace ("\\ " , "/" )
576+ # Collapse multiple slashes except for protocol prefixes like "s3://"
577+ if "://" not in dpath :
578+ while "//" in dpath :
579+ dpath = dpath .replace ("//" , "/" )
580+ return dpath .rstrip ("/" )
551581
552582
553583def NormalizeRelativePath (inpath ):
0 commit comments