1212//!
1313//! Some of the post-processing is outside of Topiary's capabilities, while other
1414//! rules have too much performance overhead when applied through Topiary.
15- use std:: { borrow :: Cow , io:: BufWriter } ;
15+ use std:: io:: BufWriter ;
1616
17- use regex:: RegexBuilder ;
17+ use regex:: { Regex , RegexBuilder } ;
1818use topiary_core:: { Language , Operation , TopiaryQuery , formatter_tree} ;
1919use tree_sitter:: { Parser , Point , Query , QueryCursor , StreamingIterator , Tree } ;
2020
@@ -132,8 +132,7 @@ impl Formatter {
132132 /// to clean up/balance out the output.
133133 #[ inline( always) ]
134134 fn postprocess ( & mut self ) -> & mut Self {
135- self . clean_up_lines_with_only_whitespace ( )
136- . fix_dangling_semicolons ( )
135+ self . fix_dangling_semicolons ( )
137136 . fix_dangling_commas ( )
138137 . remove_trailing_commas_from_preload ( )
139138 . postprocess_tree_sitter ( )
@@ -169,58 +168,7 @@ impl Formatter {
169168 . build ( )
170169 . expect ( "regex should compile" ) ;
171170
172- let mut locations = re. capture_locations ( ) ;
173-
174- // We manually remove new lines to inform the tree which lines were changed
175- if let Some ( _) = re. captures_read ( & mut locations, & self . content ) {
176- let new_lines_bounds = locations. get ( 4 ) . unwrap ( ) ;
177-
178- fn find_position ( s : & str , end_byte : usize ) -> Point {
179- let mut position = Point :: new ( 0 , 0 ) ;
180- for b in & s. as_bytes ( ) [ ..end_byte] {
181- if * b == b'\n' {
182- position. column = 0 ;
183- position. row += 1 ;
184- } else {
185- position. column += 1 ;
186- }
187- }
188- position
189- }
190-
191- let start_byte = new_lines_bounds. 0 ;
192- let end_byte = new_lines_bounds. 1 ;
193- let start_position = find_position ( & self . content , start_byte) ;
194- let old_end_position = find_position ( & self . content , end_byte) ;
195-
196- self . content . replace_range ( start_byte..end_byte, "" ) ;
197-
198- self . tree . edit ( & tree_sitter:: InputEdit {
199- start_byte,
200- old_end_byte : end_byte,
201- new_end_byte : start_byte,
202- start_position,
203- old_end_position,
204- new_end_position : start_position,
205- } ) ;
206-
207- self . tree = self . parser . parse ( & self . content , Some ( & self . tree ) ) . unwrap ( ) ;
208- }
209- self
210- }
211-
212- /// This function cleans up lines that contain only whitespace characters
213- /// (spaces, tabs) and a newline character. It only keeps a single newline
214- /// character.
215- #[ inline( always) ]
216- fn clean_up_lines_with_only_whitespace ( & mut self ) -> & mut Self {
217- let re = RegexBuilder :: new ( r"^\s+\n$" )
218- . multi_line ( true )
219- . build ( )
220- . expect ( "empty line regex should compile" ) ;
221- if let Cow :: Owned ( replaced) = re. replace_all ( & self . content , "\n " ) {
222- self . content = replaced;
223- }
171+ self . regex_replace_all_outside_strings ( re, "$extends_line$extends_name\n " ) ;
224172 self
225173 }
226174
@@ -235,9 +183,8 @@ impl Formatter {
235183 . multi_line ( true )
236184 . build ( )
237185 . expect ( "semicolon regex should compile" ) ;
238- if let Cow :: Owned ( replaced) = re_trailing. replace_all ( & self . content , "" ) {
239- self . content = replaced;
240- }
186+
187+ self . regex_replace_all_outside_strings ( re_trailing, "" ) ;
241188 self
242189 }
243190
@@ -254,9 +201,8 @@ impl Formatter {
254201 . multi_line ( true )
255202 . build ( )
256203 . expect ( "dangling comma regex should compile" ) ;
257- if let Cow :: Owned ( replaced) = re. replace_all ( & self . content , "$1," ) {
258- self . content = replaced;
259- }
204+
205+ self . regex_replace_all_outside_strings ( re, "$1," ) ;
260206 self
261207 }
262208
@@ -269,9 +215,7 @@ impl Formatter {
269215 . build ( )
270216 . expect ( "preload regex should compile" ) ;
271217
272- if let Cow :: Owned ( replaced) = re. replace_all ( & self . content , "preload($1$2)" ) {
273- self . content = replaced;
274- }
218+ self . regex_replace_all_outside_strings ( re, "preload($1$2)" ) ;
275219 self
276220 }
277221
@@ -283,6 +227,71 @@ impl Formatter {
283227 self . handle_two_blank_line ( )
284228 }
285229
230+ /// Replaces every match of regex `re` with `rep`, but only if the match is
231+ /// outside of strings (simple or multiline).
232+ /// Use this to make post-processing changes needed for formatting but that
233+ /// shouldn't affect strings in the source code.
234+ fn regex_replace_all_outside_strings ( & mut self , re : Regex , rep : & str ) {
235+ let mut iter = re. captures_iter ( & self . content ) . peekable ( ) ;
236+ if iter. peek ( ) . is_none ( ) {
237+ return ;
238+ }
239+
240+ let mut new = String :: new ( ) ;
241+ let mut last_match = 0 ;
242+ let mut start_position = Point :: new ( 0 , 0 ) ;
243+
244+ // We first collect tree edits and then apply them, because regex returns positions from unmodified content
245+ let mut edits = Vec :: new ( ) ;
246+
247+ for capture in iter {
248+ let m = capture. get ( 0 ) . unwrap ( ) ;
249+ let start_byte = m. start ( ) ;
250+ let old_end_byte = m. end ( ) ;
251+ let node = self
252+ . tree
253+ . root_node ( )
254+ . descendant_for_byte_range ( start_byte, start_byte)
255+ . unwrap ( ) ;
256+ if node. kind ( ) == "string" {
257+ continue ;
258+ }
259+
260+ let mut replacement = String :: new ( ) ;
261+ capture. expand ( rep, & mut replacement) ;
262+
263+ let new_end_byte = start_byte + replacement. len ( ) ;
264+
265+ let slice = & self . content [ last_match..start_byte] ;
266+ start_position = calculate_end_position ( start_position, slice) ;
267+ let old_end_position =
268+ calculate_end_position ( start_position, & self . content [ start_byte..old_end_byte] ) ;
269+ let new_end_position = calculate_end_position ( start_position, & replacement) ;
270+ new. push_str ( slice) ;
271+ new. push_str ( & replacement) ;
272+ last_match = old_end_byte;
273+
274+ edits. push ( tree_sitter:: InputEdit {
275+ start_byte,
276+ old_end_byte,
277+ new_end_byte,
278+ start_position,
279+ old_end_position,
280+ new_end_position,
281+ } ) ;
282+
283+ start_position = old_end_position;
284+ }
285+
286+ new. push_str ( & self . content [ last_match..] ) ;
287+ self . content = new;
288+
289+ for edit in edits {
290+ self . tree . edit ( & edit) ;
291+ }
292+ self . tree = self . parser . parse ( & self . content , Some ( & self . tree ) ) . unwrap ( ) ;
293+ }
294+
286295 /// This function makes sure we have the correct vertical spacing between important definitions:
287296 /// Two blank lines between function definitions, inner classes, etc. Taking any
288297 /// comments or docstrings into account.
@@ -388,6 +397,19 @@ impl Formatter {
388397 }
389398}
390399
400+ /// Calculates end position of the `slice` counting from `start`
401+ fn calculate_end_position ( mut start : Point , slice : & str ) -> Point {
402+ for b in slice. as_bytes ( ) {
403+ if * b == b'\n' {
404+ start. row += 1 ;
405+ start. column = 0 ;
406+ } else {
407+ start. column += 1 ;
408+ }
409+ }
410+ start
411+ }
412+
391413/// Returns true if both trees have the same structure.
392414fn compare_trees ( left_tree : Tree , right_tree : Tree ) -> bool {
393415 let mut left_cursor = left_tree. walk ( ) ;
0 commit comments