Skip to content

Commit 563ddcc

Browse files
authored
Merge pull request #80 from shadr/fix-postprocessing
Don't apply pre/postprocessing inside strings
2 parents be7592d + 6999fa7 commit 563ddcc

3 files changed

Lines changed: 127 additions & 65 deletions

File tree

src/formatter.rs

Lines changed: 87 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
//!
1313
//! Some of the post-processing is outside of Topiary's capabilities, while other
1414
//! rules have too much performance overhead when applied through Topiary.
15-
use std::{borrow::Cow, io::BufWriter};
15+
use std::io::BufWriter;
1616

17-
use regex::RegexBuilder;
17+
use regex::{Regex, RegexBuilder};
1818
use topiary_core::{Language, Operation, TopiaryQuery, formatter_tree};
1919
use tree_sitter::{Parser, Point, Query, QueryCursor, StreamingIterator, Tree};
2020

@@ -132,8 +132,7 @@ impl Formatter {
132132
/// to clean up/balance out the output.
133133
#[inline(always)]
134134
fn postprocess(&mut self) -> &mut Self {
135-
self.clean_up_lines_with_only_whitespace()
136-
.fix_dangling_semicolons()
135+
self.fix_dangling_semicolons()
137136
.fix_dangling_commas()
138137
.remove_trailing_commas_from_preload()
139138
.postprocess_tree_sitter()
@@ -169,58 +168,7 @@ impl Formatter {
169168
.build()
170169
.expect("regex should compile");
171170

172-
let mut locations = re.capture_locations();
173-
174-
// We manually remove new lines to inform the tree which lines were changed
175-
if let Some(_) = re.captures_read(&mut locations, &self.content) {
176-
let new_lines_bounds = locations.get(4).unwrap();
177-
178-
fn find_position(s: &str, end_byte: usize) -> Point {
179-
let mut position = Point::new(0, 0);
180-
for b in &s.as_bytes()[..end_byte] {
181-
if *b == b'\n' {
182-
position.column = 0;
183-
position.row += 1;
184-
} else {
185-
position.column += 1;
186-
}
187-
}
188-
position
189-
}
190-
191-
let start_byte = new_lines_bounds.0;
192-
let end_byte = new_lines_bounds.1;
193-
let start_position = find_position(&self.content, start_byte);
194-
let old_end_position = find_position(&self.content, end_byte);
195-
196-
self.content.replace_range(start_byte..end_byte, "");
197-
198-
self.tree.edit(&tree_sitter::InputEdit {
199-
start_byte,
200-
old_end_byte: end_byte,
201-
new_end_byte: start_byte,
202-
start_position,
203-
old_end_position,
204-
new_end_position: start_position,
205-
});
206-
207-
self.tree = self.parser.parse(&self.content, Some(&self.tree)).unwrap();
208-
}
209-
self
210-
}
211-
212-
/// This function cleans up lines that contain only whitespace characters
213-
/// (spaces, tabs) and a newline character. It only keeps a single newline
214-
/// character.
215-
#[inline(always)]
216-
fn clean_up_lines_with_only_whitespace(&mut self) -> &mut Self {
217-
let re = RegexBuilder::new(r"^\s+\n$")
218-
.multi_line(true)
219-
.build()
220-
.expect("empty line regex should compile");
221-
if let Cow::Owned(replaced) = re.replace_all(&self.content, "\n") {
222-
self.content = replaced;
223-
}
171+
self.regex_replace_all_outside_strings(re, "$extends_line$extends_name\n");
224172
self
225173
}
226174

@@ -235,9 +183,8 @@ impl Formatter {
235183
.multi_line(true)
236184
.build()
237185
.expect("semicolon regex should compile");
238-
if let Cow::Owned(replaced) = re_trailing.replace_all(&self.content, "") {
239-
self.content = replaced;
240-
}
186+
187+
self.regex_replace_all_outside_strings(re_trailing, "");
241188
self
242189
}
243190

@@ -254,9 +201,8 @@ impl Formatter {
254201
.multi_line(true)
255202
.build()
256203
.expect("dangling comma regex should compile");
257-
if let Cow::Owned(replaced) = re.replace_all(&self.content, "$1,") {
258-
self.content = replaced;
259-
}
204+
205+
self.regex_replace_all_outside_strings(re, "$1,");
260206
self
261207
}
262208

@@ -269,9 +215,7 @@ impl Formatter {
269215
.build()
270216
.expect("preload regex should compile");
271217

272-
if let Cow::Owned(replaced) = re.replace_all(&self.content, "preload($1$2)") {
273-
self.content = replaced;
274-
}
218+
self.regex_replace_all_outside_strings(re, "preload($1$2)");
275219
self
276220
}
277221

@@ -283,6 +227,71 @@ impl Formatter {
283227
self.handle_two_blank_line()
284228
}
285229

230+
/// Replaces every match of regex `re` with `rep`, but only if the match is
231+
/// outside of strings (simple or multiline).
232+
/// Use this to make post-processing changes needed for formatting but that
233+
/// shouldn't affect strings in the source code.
234+
fn regex_replace_all_outside_strings(&mut self, re: Regex, rep: &str) {
235+
let mut iter = re.captures_iter(&self.content).peekable();
236+
if iter.peek().is_none() {
237+
return;
238+
}
239+
240+
let mut new = String::new();
241+
let mut last_match = 0;
242+
let mut start_position = Point::new(0, 0);
243+
244+
// We first collect tree edits and then apply them, because regex returns positions from unmodified content
245+
let mut edits = Vec::new();
246+
247+
for capture in iter {
248+
let m = capture.get(0).unwrap();
249+
let start_byte = m.start();
250+
let old_end_byte = m.end();
251+
let node = self
252+
.tree
253+
.root_node()
254+
.descendant_for_byte_range(start_byte, start_byte)
255+
.unwrap();
256+
if node.kind() == "string" {
257+
continue;
258+
}
259+
260+
let mut replacement = String::new();
261+
capture.expand(rep, &mut replacement);
262+
263+
let new_end_byte = start_byte + replacement.len();
264+
265+
let slice = &self.content[last_match..start_byte];
266+
start_position = calculate_end_position(start_position, slice);
267+
let old_end_position =
268+
calculate_end_position(start_position, &self.content[start_byte..old_end_byte]);
269+
let new_end_position = calculate_end_position(start_position, &replacement);
270+
new.push_str(slice);
271+
new.push_str(&replacement);
272+
last_match = old_end_byte;
273+
274+
edits.push(tree_sitter::InputEdit {
275+
start_byte,
276+
old_end_byte,
277+
new_end_byte,
278+
start_position,
279+
old_end_position,
280+
new_end_position,
281+
});
282+
283+
start_position = old_end_position;
284+
}
285+
286+
new.push_str(&self.content[last_match..]);
287+
self.content = new;
288+
289+
for edit in edits {
290+
self.tree.edit(&edit);
291+
}
292+
self.tree = self.parser.parse(&self.content, Some(&self.tree)).unwrap();
293+
}
294+
286295
/// This function makes sure we have the correct vertical spacing between important definitions:
287296
/// Two blank lines between function definitions, inner classes, etc. Taking any
288297
/// comments or docstrings into account.
@@ -388,6 +397,19 @@ impl Formatter {
388397
}
389398
}
390399

400+
/// Calculates end position of the `slice` counting from `start`
401+
fn calculate_end_position(mut start: Point, slice: &str) -> Point {
402+
for b in slice.as_bytes() {
403+
if *b == b'\n' {
404+
start.row += 1;
405+
start.column = 0;
406+
} else {
407+
start.column += 1;
408+
}
409+
}
410+
start
411+
}
412+
391413
/// Returns true if both trees have the same structure.
392414
fn compare_trees(left_tree: Tree, right_tree: Tree) -> bool {
393415
let mut left_cursor = left_tree.walk();
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
func _ready() -> void:
2+
var dangling_comma = """
3+
first line
4+
,
5+
second line
6+
"""
7+
var new_line_after_extends = """
8+
extends Node
9+
10+
something
11+
"""
12+
var dangling_semicolon = """
13+
asdasd;
14+
"""
15+
var trailing_comma_in_preload = """
16+
preload("",)
17+
"""
18+
var trailing_whitespaces_in_multiline_strings = """
19+
20+
"""
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
func _ready() -> void:
2+
var dangling_comma = """
3+
first line
4+
,
5+
second line
6+
"""
7+
var new_line_after_extends = """
8+
extends Node
9+
10+
something
11+
"""
12+
var dangling_semicolon = """
13+
asdasd;
14+
"""
15+
var trailing_comma_in_preload = """
16+
preload("",)
17+
"""
18+
var trailing_whitespaces_in_multiline_strings = """
19+
20+
"""

0 commit comments

Comments
 (0)