9191 from urlparse import urlparse , urlunparse
9292
9393# Windows-specific setup
94- if os .name == 'nt' :
95- if sys .version_info [0 ] == 2 :
96- import codecs
97- sys .stdout = codecs .getwriter ('UTF-8' )(sys .stdout )
98- sys .stderr = codecs .getwriter ('UTF-8' )(sys .stderr )
99- else :
100- sys .stdout = io .TextIOWrapper (
101- sys .stdout .buffer , encoding = 'UTF-8' , errors = 'replace' , line_buffering = True )
102- sys .stderr = io .TextIOWrapper (
103- sys .stderr .buffer , encoding = 'UTF-8' , errors = 'replace' , line_buffering = True )
94+ if os .name == "nt" :
95+ import io
96+ def _wrap (stream ):
97+ buf = getattr (stream , "buffer" , None )
98+ is_tty = getattr (stream , "isatty" , lambda : False )()
99+ if buf is not None and is_tty :
100+ try :
101+ return io .TextIOWrapper (buf , encoding = "UTF-8" , errors = "replace" , line_buffering = True )
102+ except Exception :
103+ return stream
104+ return stream
105+ sys .stdout = _wrap (sys .stdout )
106+ sys .stderr = _wrap (sys .stderr )
104107
105108hashlib_guaranteed = False
106109# Environment setup
@@ -286,7 +289,28 @@ def get_default_threads():
286289# Define a function to check if var contains only non-printable chars
287290all_np_chars = [chr (i ) for i in range (128 )]
288291def is_only_nonprintable (var ):
289- return all (not c .isprintable () for c in var )
292+ """True if every character is non-printable (Py2/3-safe, handles bytes)."""
293+ if var is None :
294+ return True
295+ s = to_text (var )
296+ # In Py2, some unicode categories behave differently; isprintable is Py3-only.
297+ # We'll implement a portable check: letters, numbers, punctuation, and common whitespace are printable.
298+ try :
299+ # Py3 fast path
300+ return all (not ch .isprintable () for ch in s )
301+ except AttributeError :
302+ # Py2 path
303+ import unicodedata
304+ def _is_printable (ch ):
305+ cat = unicodedata .category (ch )
306+ # Categories starting with 'C' are control/non-assigned/surrogates
307+ if cat .startswith ('C' ):
308+ return False
309+ # treat space and common whitespace as printable
310+ if ch in u"\t \n \r \x0b \x0c " :
311+ return True
312+ return True
313+ return all (not _is_printable (ch ) for ch in s )
290314__file_format_multi_dict__ = {}
291315__file_format_default__ = "ArchiveFile"
292316__include_defaults__ = True
@@ -358,13 +382,15 @@ def decode_unicode_escape(value):
358382if (__version_info__ [3 ] is None ):
359383 __version__ = str (__version_info__ [0 ]) + "." + str (__version_info__ [1 ]) + "." + str (__version_info__ [2 ])
360384
361- PyBitness = platform .architecture ()
362- if (PyBitness == "32bit" or PyBitness == "32" ):
363- PyBitness = "32"
364- elif (PyBitness == "64bit" or PyBitness == "64" ):
365- PyBitness = "64"
366- else :
367- PyBitness = "32"
385+ # Robust bitness detection
386+ # Works on Py2 & Py3, all platforms
387+ try :
388+ import struct
389+ PyBitness = "64" if struct .calcsize ("P" ) * 8 == 64 else "32"
390+ except Exception :
391+ # conservative fallback
392+ m = platform .machine () or ""
393+ PyBitness = "64" if m .endswith ("64" ) else "32"
368394
369395geturls_ua_pyfile_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})" .format (
370396 proname = __project__ , prover = __version__ , prourl = __project_url__ )
@@ -377,9 +403,9 @@ def decode_unicode_escape(value):
377403geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
378404geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)"
379405geturls_headers_pyfile_python = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_pyfile_python , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" , 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" ,
380- 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
406+ 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM-VERSION : str(__version__), ' SEC - CH - UA - BITNESS ': str (PyBitness )}
381407geturls_headers_pyfile_python_alt = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_pyfile_python_alt , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" , 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" ,
382- 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM' : str (__version__ ), 'SEC-CH-UA-BITNESS' : str (PyBitness )}
408+ 'SEC-CH-UA' : "\" " + __project__ + "\" ;v=\" " + str (__version__ )+ "\" , \" Not;A=Brand\" ;v=\" 8\" , \" " + py_implementation + "\" ;v=\" " + str (platform .release ())+ "\" " , 'SEC-CH-UA-FULL-VERSION' : str (__version__ ), 'SEC-CH-UA-PLATFORM' : "" + py_implementation + "" , 'SEC-CH-UA-ARCH' : "" + platform .machine ()+ "" , 'SEC-CH-UA-PLATFORM-VERSION : str(__version__), ' SEC - CH - UA - BITNESS ': str (PyBitness )}
383409geturls_headers_googlebot_google = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_googlebot_google , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" ,
384410 'Accept-Charset' : "ISO-8859-1,ISO-8859-15,UTF-8;q=0.7,*;q=0.7" , 'Accept' : "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" , 'Connection' : "close" }
385411geturls_headers_googlebot_google_old = {'Referer' : "http://google.com/" , 'User-Agent' : geturls_ua_googlebot_google_old , 'Accept-Encoding' : "none" , 'Accept-Language' : "en-US,en;q=0.8,en-CA,en-GB;q=0.6" ,
@@ -536,16 +562,20 @@ def VerbosePrintOutReturn(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
536562
537563def RemoveWindowsPath (dpath ):
538564 """
539- Normalizes a path by converting Windows-style separators to Unix-style and stripping trailing slashes.
565+ Normalize a path by converting backslashes to forward slashes
566+ and stripping a trailing slash.
540567 """
541- if dpath is None :
542- dpath = ""
543- if os .sep != "/" :
544- dpath = dpath .replace (os .path .sep , "/" )
545- dpath = dpath .rstrip ("/" )
546- if dpath in ["." , ".." ]:
547- dpath = dpath + "/"
548- return dpath
568+ if not dpath :
569+ return ""
570+ # Accept bytes and decode safely
571+ if isinstance (dpath , (bytes , bytearray )):
572+ dpath = dpath .decode ("utf-8" , "ignore" )
573+ dpath = dpath .replace ("\\ " , "/" )
574+ # Collapse multiple slashes except for protocol prefixes like "s3://"
575+ if "://" not in dpath :
576+ while "//" in dpath :
577+ dpath = dpath .replace ("//" , "/" )
578+ return dpath .rstrip ("/" )
549579
550580
551581def NormalizeRelativePath (inpath ):
0 commit comments