88from typing import Optional
99from typing import Sequence
1010
11+ import six
1112
12- NON_CODE_TOKENS = frozenset ((
13- tokenize .COMMENT , tokenize .ENDMARKER , tokenize .NEWLINE , tokenize .NL ,
14- ))
13+ if six .PY2 : # pragma: no cover (PY2)
14+ from tokenize import generate_tokens as tokenize_tokenize
15+ OTHER_NON_CODE = ()
16+ else : # pragma: no cover (PY3)
17+ from tokenize import tokenize as tokenize_tokenize
18+ OTHER_NON_CODE = (tokenize .ENCODING ,)
19+
20+ NON_CODE_TOKENS = frozenset (
21+ (tokenize .COMMENT , tokenize .ENDMARKER , tokenize .NEWLINE , tokenize .NL ) +
22+ OTHER_NON_CODE ,
23+ )
1524
1625
1726def check_docstring_first (src , filename = '<unknown>' ):
18- # type: (str , str) -> int
27+ # type: (bytes , str) -> int
1928 """Returns nonzero if the source has what looks like a docstring that is
2029 not at the beginning of the source.
2130
@@ -25,7 +34,7 @@ def check_docstring_first(src, filename='<unknown>'):
2534 found_docstring_line = None
2635 found_code_line = None
2736
28- tok_gen = tokenize . generate_tokens (io .StringIO (src ).readline )
37+ tok_gen = tokenize_tokenize (io .BytesIO (src ).readline )
2938 for tok_type , _ , (sline , scol ), _ , _ in tok_gen :
3039 # Looks like a docstring!
3140 if tok_type == tokenize .STRING and scol == 0 :
@@ -61,7 +70,7 @@ def main(argv=None): # type: (Optional[Sequence[str]]) -> int
6170 retv = 0
6271
6372 for filename in args .filenames :
64- with io . open (filename , encoding = 'UTF-8 ' ) as f :
73+ with open (filename , 'rb ' ) as f :
6574 contents = f .read ()
6675 retv |= check_docstring_first (contents , filename = filename )
6776
0 commit comments