@@ -227,12 +227,14 @@ def init_chardet(self) -> None:
227227
228228 self .encdetector = UniversalDetector ()
229229
230- def open (self , filename : str ) -> tuple [list [str ], str ]:
230+ def open (self , filename : str ) -> tuple [list [tuple [ bool , int , list [ str ]] ], str ]:
231231 if self .use_chardet :
232232 return self .open_with_chardet (filename )
233233 return self .open_with_internal (filename )
234234
235- def open_with_chardet (self , filename : str ) -> tuple [list [str ], str ]:
235+ def open_with_chardet (
236+ self , filename : str
237+ ) -> tuple [list [tuple [bool , int , list [str ]]], str ]:
236238 self .encdetector .reset ()
237239 with open (filename , "rb" ) as fb :
238240 for line in fb :
@@ -259,7 +261,9 @@ def open_with_chardet(self, filename: str) -> tuple[list[str], str]:
259261
260262 return lines , f .encoding
261263
262- def open_with_internal (self , filename : str ) -> tuple [list [str ], str ]:
264+ def open_with_internal (
265+ self , filename : str
266+ ) -> tuple [list [tuple [bool , int , list [str ]]], str ]:
263267 encoding = None
264268 first_try = True
265269 for encoding in ("utf-8" , "iso-8859-1" ):
@@ -286,21 +290,25 @@ def open_with_internal(self, filename: str) -> tuple[list[str], str]:
286290
287291 return lines , encoding
288292
289- def get_lines (self , f : TextIO ) -> list [str ]:
293+ def get_lines (self , f : TextIO ) -> list [tuple [bool , int , list [str ]]]:
294+ fragments = []
295+ line_number = 0
290296 if self .ignore_multiline_regex :
291297 text = f .read ()
292298 pos = 0
293- text2 = ""
294299 for m in re .finditer (self .ignore_multiline_regex , text ):
295- text2 += text [pos : m .start ()]
296- # Replace with blank lines so line numbers are unchanged.
297- text2 += "\n " * m .group ().count ("\n " )
300+ lines = text [pos : m .start ()].splitlines (True )
301+ fragments .append ((False , line_number , lines ))
302+ line_number += len (lines )
303+ lines = m .group ().splitlines (True )
304+ fragments .append ((True , line_number , lines ))
305+ line_number += len (lines ) - 1
298306 pos = m .end ()
299- text2 + = text [pos :]
300- lines = text2 . splitlines ( True )
307+ lines = text [pos :]. splitlines ( True )
308+ fragments . append (( False , line_number , lines ) )
301309 else :
302- lines = f .readlines ()
303- return lines
310+ fragments . append (( False , line_number , f .readlines ()) )
311+ return fragments
304312
305313
306314# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
@@ -870,7 +878,7 @@ def apply_uri_ignore_words(
870878
871879
872880def parse_lines (
873- lines : list [str ],
881+ fragment : tuple [ bool , int , list [str ] ],
874882 filename : str ,
875883 colors : TermColors ,
876884 summary : Optional [Summary ],
@@ -887,10 +895,13 @@ def parse_lines(
887895 bad_count = 0
888896 changed = False
889897
898+ _ , fragment_line_number , lines = fragment
899+
890900 for i , line in enumerate (lines ):
891901 line = line .rstrip ()
892902 if not line or line in exclude_lines :
893903 continue
904+ line_number = fragment_line_number + i
894905
895906 extra_words_to_ignore = set ()
896907 match = inline_ignore_regex .search (line )
@@ -977,7 +988,7 @@ def parse_lines(
977988 continue
978989
979990 cfilename = f"{ colors .FILE } { filename } { colors .DISABLE } "
980- cline = f"{ colors .FILE } { i + 1 } { colors .DISABLE } "
991+ cline = f"{ colors .FILE } { line_number + 1 } { colors .DISABLE } "
981992 cwrongword = f"{ colors .WWORD } { word } { colors .DISABLE } "
982993 crightword = f"{ colors .FWORD } { fixword } { colors .DISABLE } "
983994
@@ -1028,13 +1039,13 @@ def parse_file(
10281039 options : argparse .Namespace ,
10291040) -> int :
10301041 bad_count = 0
1031- lines = None
1042+ fragments = None
10321043
10331044 # Read lines.
10341045 if filename == "-" :
10351046 f = sys .stdin
10361047 encoding = "utf-8"
1037- lines = file_opener .get_lines (f )
1048+ fragments = file_opener .get_lines (f )
10381049 else :
10391050 if options .check_filenames :
10401051 for word in extract_words (filename , word_regex , ignore_word_regex ):
@@ -1084,42 +1095,51 @@ def parse_file(
10841095 print (f"WARNING: Binary file: { filename } " , file = sys .stderr )
10851096 return bad_count
10861097 try :
1087- lines , encoding = file_opener .open (filename )
1098+ fragments , encoding = file_opener .open (filename )
10881099 except OSError :
10891100 return bad_count
10901101
10911102 # Parse lines.
1092- bad_count_update , changed = parse_lines (
1093- lines ,
1094- filename ,
1095- colors ,
1096- summary ,
1097- misspellings ,
1098- ignore_words_cased ,
1099- exclude_lines ,
1100- word_regex ,
1101- ignore_word_regex ,
1102- uri_regex ,
1103- uri_ignore_words ,
1104- context ,
1105- options ,
1106- )
1107- bad_count += bad_count_update
1103+ changed = False
1104+ for fragment in fragments :
1105+ ignore , _ , _ = fragment
1106+ if ignore :
1107+ continue
1108+
1109+ bad_count_update , changed_update = parse_lines (
1110+ fragment ,
1111+ filename ,
1112+ colors ,
1113+ summary ,
1114+ misspellings ,
1115+ ignore_words_cased ,
1116+ exclude_lines ,
1117+ word_regex ,
1118+ ignore_word_regex ,
1119+ uri_regex ,
1120+ uri_ignore_words ,
1121+ context ,
1122+ options ,
1123+ )
1124+ bad_count += bad_count_update
1125+ changed = changed or changed_update
11081126
11091127 # Write out lines, if changed.
11101128 if changed :
11111129 if filename == "-" :
11121130 print ("---" )
1113- for line in lines :
1114- print (line , end = "" )
1131+ for _ , _ , lines in fragments :
1132+ for line in lines :
1133+ print (line , end = "" )
11151134 else :
11161135 if not options .quiet_level & QuietLevels .FIXES :
11171136 print (
11181137 f"{ colors .FWORD } FIXED:{ colors .DISABLE } { filename } " ,
11191138 file = sys .stderr ,
11201139 )
11211140 with open (filename , "w" , encoding = encoding , newline = "" ) as f :
1122- f .writelines (lines )
1141+ for _ , _ , lines in fragments :
1142+ f .writelines (lines )
11231143
11241144 return bad_count
11251145
0 commit comments