@@ -107,13 +107,31 @@ def get_branch():
107107 return _get_output ('git branch' ).split ()[- 1 ]
108108
109109
110+ def get_file_content_as_binary (filename ):
111+ '''Get content of a file in binary mode
112+
113+ Locally (ie. non-github event) we return the content of the staged file,
114+ not the file in the working directory.
115+ '''
116+ if _is_github_event () or 'pytest' in sys .modules :
117+ try :
118+ with open (filename , 'rb' ) as fileobj :
119+ data = fileobj .read ().decode ()
120+ except UnicodeDecodeError :
121+ _skip (filename , 'File is not UTF-8 encoded' )
122+ data = None
123+ else :
124+ data = _get_output (f'git show :{ filename } ' )
125+ return data
126+
127+
110128def get_text_file_content (filename ):
111129 '''Get content of a text file
112130
113131 Locally (ie. non-github event) we return the content of the staged file,
114132 not the file in the working directory.
115133 '''
116- if _is_github_event ():
134+ if _is_github_event () or 'pytest' in sys . modules :
117135 data = Path (filename ).read_text ()
118136 else :
119137 data = _get_output (f'git show :{ filename } ' )
@@ -292,11 +310,8 @@ def check_eol(files):
292310 # As the client environment is not configured with autocrlf
293311 # we need to ensure that every text file does not contain CRLF.
294312 for filename in files :
295- try :
296- with open (filename , 'rb' ) as fileobj :
297- data = fileobj .read ().decode ()
298- except UnicodeDecodeError :
299- _skip (filename , 'File is not UTF-8 encoded' )
313+ data = get_file_content_as_binary (filename )
314+ if data is None :
300315 continue
301316
302317 # Skip binary file
@@ -311,12 +326,11 @@ def check_eol(files):
311326
312327def check_do_not_merge_in_file (filename , new_file = False ):
313328 '''Check for "do not merge" in a filename'''
314- try :
315- with open (filename , 'rb' ) as fileobj :
316- lines = fileobj .read ().decode ().splitlines (True )
317- except UnicodeDecodeError :
318- _skip (filename , 'File is not UTF-8 encoded' )
329+ data = get_file_content_as_binary (filename )
330+ if data is None :
319331 return 0
332+ else :
333+ lines = data .splitlines (True )
320334
321335 if new_file :
322336 line_nums = [f'1-{ len (lines )} ' ]
@@ -398,12 +412,11 @@ def trim_trailing_whitespace_in_file(filename, new_file, dry_run,
398412 :returns: If dry_run=True, 0 if no trailing whitespace is found, 1 if
399413 trailing whitepsace is found.
400414 '''
401- try :
402- with open (filename , 'rb' ) as fileobj :
403- lines = fileobj .read ().decode ().splitlines (True )
404- except UnicodeDecodeError :
405- _skip (filename , 'File is not UTF-8 encoded' )
415+ data = get_file_content_as_binary (filename )
416+ if data is None :
406417 return 0
418+ else :
419+ lines = data .splitlines (True )
407420
408421 if new_file :
409422 line_nums = [f'1-{ len (lines )} ' ]
@@ -732,6 +745,24 @@ def test_match_word_boundaries(self):
732745 cpp_throw_std_exception_pattern .search ('rethrow exception' ))
733746
734747
748+ class TestCheckFileContent (unittest .TestCase ):
749+ def test_various_files (self ):
750+ def _test (filename , is_good , data = None ):
751+ test_file = Path (__file__ ).parent / f'../test/{ filename } '
752+ if data is None :
753+ data = get_file_content (str (test_file ))
754+ retval = check_file_content (filename , data )
755+ self .assertEqual (retval == 0 , is_good )
756+ def _test_good_file (filename , data = None ):
757+ _test (filename , True , data = data )
758+ def _test_bad_file (filename , data = None ):
759+ _test (filename , False , data = data )
760+ _test_bad_file ('do_not_commit.py' , data = 'do not ' + 'commit' )
761+ _test_bad_file ('tab.py' , data = 'field\t field' )
762+ _test_bad_file ('no_newline.cpp' , data = 'No terminating newline' )
763+ _test_good_file ('good_file.cpp' )
764+
765+
735766def get_file_content (filename ):
736767 '''Return the content of a file.
737768
0 commit comments