Handle unicode differently when running on Python 2

bgr · bgr · commit 98ead88a9ab3 · 2017-11-30T15:16:32.000+01:00
diff --git a/omdbtool.py b/omdbtool.py
@@ -8,10 +8,17 @@
 import json
 
 try:
+    # Python 3
     from urllib.request import urlopen
     from urllib.parse import urlencode
+    to_unicode = lambda s: s
+    mk_trans = str.maketrans
 except ImportError:
-    from urllib import urlopen, urlencode
+    # Python 2
+    from urllib2 import urlopen
+    from urllib import urlencode
+    to_unicode = lambda s: unicode(s)  # noqa
+    mk_trans = lambda a, b: {ord(ca): ord(cb) for ca, cb in zip(a, b)}
 
 
 parser = argparse.ArgumentParser(description='Get OMDb data for a movie')
@@ -135,17 +142,17 @@
 
 
 # known problematic characters to replace
-char_map = str.maketrans(
-    '–',
-    '-'
+char_map = mk_trans(
+    u'–',
+    u'-'
 )
 
 
 def fmt(s):
     # get rid of weird characters in output, which also cause errors on Windows
     # first use the preferred character mapping for known characters, then fall
     # back to encode + decode for unexpected ones
-    return (s
+    return (to_unicode(s)
             .translate(char_map)
             .encode('ascii', errors='replace')
             .decode('utf-8'))