Skip to content

Commit e1b4602

Browse files
authored
Merge pull request #165 from GDQuest/nathan/safe-mode-reorder
Implement safe mode for reorder code
2 parents 6d84cca + 6f5c089 commit e1b4602

8 files changed

Lines changed: 279 additions & 22 deletions

File tree

src/formatter.rs

Lines changed: 223 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@ use regex::{Regex, RegexBuilder, Replacer};
1818
use topiary_core::{Language, Operation, TopiaryQuery, formatter_tree};
1919
use tree_sitter::{Parser, Point, Query, QueryCursor, StreamingIterator, Tree};
2020

21-
use crate::FormatterConfig;
21+
use crate::{
22+
FormatterConfig,
23+
reorder::{
24+
GDScriptTokenKind, GDScriptTokensWithComments, MethodType, collect_top_level_tokens,
25+
},
26+
};
2227

2328
static QUERY: &str = include_str!("../queries/gdscript.scm");
2429

@@ -32,7 +37,12 @@ pub fn format_gdscript_with_config(
3237
) -> Result<String, Box<dyn std::error::Error>> {
3338
let mut formatter = Formatter::new(content.to_owned(), config.clone());
3439

35-
formatter.preprocess().format()?.postprocess().reorder();
40+
formatter
41+
.preprocess()
42+
.format()?
43+
.postprocess()
44+
.validate_formatting()?
45+
.reorder()?;
3646
formatter.finish()
3747
}
3848

@@ -42,6 +52,7 @@ struct Formatter {
4252
parser: Parser,
4353
input_tree: GdTree,
4454
tree: Tree,
55+
original_source: Option<String>,
4556
}
4657

4758
impl Formatter {
@@ -53,8 +64,17 @@ impl Formatter {
5364
.unwrap();
5465
let tree = parser.parse(&content, None).unwrap();
5566
let input_tree = GdTree::from_ts_tree(&tree, content.as_bytes());
67+
let original_source = if config.safe && config.reorder_code {
68+
// When both safe mode and reordering are enabled we keep an
69+
// untouched copy of the original source code so we can later verify
70+
// that the top-level declarations all survive the formatting pass.
71+
Some(content.clone())
72+
} else {
73+
None
74+
};
5675

5776
Self {
77+
original_source,
5878
content,
5979
config,
6080
tree,
@@ -102,9 +122,9 @@ impl Formatter {
102122
}
103123

104124
#[inline(always)]
105-
fn reorder(&mut self) -> &mut Self {
125+
fn reorder(&mut self) -> Result<&mut Self, Box<dyn std::error::Error>> {
106126
if !self.config.reorder_code {
107-
return self;
127+
return Ok(self);
108128
}
109129

110130
self.tree = self.parser.parse(&self.content, Some(&self.tree)).unwrap();
@@ -116,9 +136,15 @@ impl Formatter {
116136
eprintln!(
117137
"Warning: Code reordering failed: {e}. Returning formatted code without reordering."
118138
);
139+
return Ok(self);
119140
}
120141
};
121-
self
142+
143+
if self.config.safe {
144+
self.ensure_safe_reorder()?;
145+
}
146+
147+
Ok(self)
122148
}
123149

124150
/// This function runs over the content before going through topiary.
@@ -141,19 +167,41 @@ impl Formatter {
141167
.postprocess_tree_sitter()
142168
}
143169

144-
/// Finishes formatting and returns the resulting file content.
145170
#[inline(always)]
146-
fn finish(mut self) -> Result<String, Box<dyn std::error::Error>> {
147-
if self.config.safe {
148-
self.input_tree.postprocess();
149-
self.tree = self.parser.parse(&self.content, None).unwrap();
171+
fn validate_formatting(&mut self) -> Result<&mut Self, Box<dyn std::error::Error>> {
172+
if !self.config.safe {
173+
return Ok(self);
174+
}
150175

151-
let output_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes());
152-
if self.input_tree != output_tree {
153-
return Err("Code structure has changed after formatting".into());
154-
}
176+
self.input_tree.postprocess();
177+
self.tree = self.parser.parse(&self.content, None).unwrap();
178+
179+
let formatted_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes());
180+
if self.input_tree != formatted_tree {
181+
return Err("Code structure has changed after formatting".into());
155182
}
156183

184+
Ok(self)
185+
}
186+
187+
#[inline(always)]
188+
fn ensure_safe_reorder(&mut self) -> Result<(), Box<dyn std::error::Error>> {
189+
let original_source = self
190+
.original_source
191+
.as_deref()
192+
.ok_or_else(|| {
193+
"Safe mode requires the original source to verify reordered code".to_string()
194+
})?;
195+
196+
self.tree = self.parser.parse(&self.content, None).unwrap();
197+
ensure_top_level_tokens_match(original_source, &self.tree, &self.content)?;
198+
199+
Ok(())
200+
}
201+
202+
/// Finishes formatting and returns the resulting file content.
203+
#[inline(always)]
204+
fn finish(self) -> Result<String, Box<dyn std::error::Error>> {
157205
Ok(self.content)
158206
}
159207

@@ -456,6 +504,167 @@ impl Formatter {
456504
}
457505
}
458506

507+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
508+
struct TopLevelTokenSignature {
509+
kind: String,
510+
attached_comments: Vec<String>,
511+
trailing_comments: Vec<String>,
512+
}
513+
514+
/// Ensures that the top-level tokens (child nodes of (source) in the
515+
/// tree-sitter AST) in the original source match those in the current tree
516+
/// after formatting and reordering. We compare their structural “signatures”
517+
/// (kind, relevant identifiers, and attached comments). This checks that we did
518+
/// not lose any top-level declaration.
519+
fn ensure_top_level_tokens_match(
520+
original_source: &str,
521+
current_tree: &Tree,
522+
current_source: &str,
523+
) -> Result<(), Box<dyn std::error::Error>> {
524+
// Safe mode only cares that we did not lose or duplicate any top-level declaration.
525+
// We accumulate signed counts per signature; a non-zero delta means something changed.
526+
let mut diff = std::collections::HashMap::<TopLevelTokenSignature, i32>::new();
527+
528+
for signature in parse_top_level_token_signatures(original_source)? {
529+
*diff.entry(signature).or_insert(0) += 1;
530+
}
531+
532+
for signature in top_level_token_signatures_from_tree(current_tree, current_source)? {
533+
*diff.entry(signature).or_insert(0) -= 1;
534+
}
535+
536+
let mismatched: Vec<_> = diff.iter().filter(|(_, count)| **count != 0).collect();
537+
538+
if !mismatched.is_empty() {
539+
eprintln!("Safe mode mismatch detected at top level:");
540+
for (signature, count) in mismatched {
541+
eprintln!(" {:?}: delta={}", signature, count);
542+
}
543+
return Err("Safe mode detected mismatched top-level declarations after reordering".into());
544+
}
545+
546+
Ok(())
547+
}
548+
549+
fn parse_top_level_token_signatures(
550+
source: &str,
551+
) -> Result<Vec<TopLevelTokenSignature>, Box<dyn std::error::Error>> {
552+
// We re-parse the original content with tree-sitter instead of reusing `input_tree`
553+
// because the reorder module already knows how to classify the raw syntax tree into
554+
// the token structures we want to compare. I'm not 100% sure it's needed
555+
// but it's not very costly.
556+
let mut parser = Parser::new();
557+
parser
558+
.set_language(&tree_sitter_gdscript::LANGUAGE.into())
559+
.unwrap();
560+
let tree = parser
561+
.parse(source, None)
562+
.ok_or_else(|| "Failed to parse GDScript source in safe mode")?;
563+
564+
top_level_token_signatures_from_tree(&tree, source)
565+
}
566+
567+
fn top_level_token_signatures_from_tree(
568+
tree: &Tree,
569+
content: &str,
570+
) -> Result<Vec<TopLevelTokenSignature>, Box<dyn std::error::Error>> {
571+
let tokens = collect_top_level_tokens(tree, content)?;
572+
let mut signatures = Vec::with_capacity(tokens.len());
573+
574+
for token in tokens {
575+
let GDScriptTokensWithComments {
576+
token_kind,
577+
attached_comments,
578+
trailing_comments,
579+
start_byte: _,
580+
end_byte: _,
581+
original_text,
582+
} = token;
583+
584+
signatures.push(TopLevelTokenSignature {
585+
kind: token_kind_key(&token_kind),
586+
attached_comments,
587+
trailing_comments,
588+
});
589+
590+
if let Some(extends_key) = inline_extends_signature(&token_kind, original_text.as_str()) {
591+
signatures.push(TopLevelTokenSignature {
592+
kind: extends_key,
593+
attached_comments: Vec::new(),
594+
trailing_comments: Vec::new(),
595+
});
596+
}
597+
}
598+
599+
Ok(signatures)
600+
}
601+
602+
fn token_kind_key(kind: &GDScriptTokenKind) -> String {
603+
match kind {
604+
GDScriptTokenKind::ClassAnnotation(text) => format!("ClassAnnotation::{text}"),
605+
GDScriptTokenKind::ClassName(text) => format!("ClassName::{text}"),
606+
GDScriptTokenKind::Extends(text) => format!("Extends::{text}"),
607+
GDScriptTokenKind::Docstring(text) => format!("Docstring::{text}"),
608+
GDScriptTokenKind::Signal(name, is_private) => {
609+
format!("Signal::{name}::{is_private}")
610+
}
611+
GDScriptTokenKind::Enum(name, is_private) => format!("Enum::{name}::{is_private}"),
612+
GDScriptTokenKind::Constant(name, is_private) => {
613+
format!("Constant::{name}::{is_private}")
614+
}
615+
GDScriptTokenKind::StaticVariable(name, is_private) => {
616+
format!("StaticVariable::{name}::{is_private}")
617+
}
618+
GDScriptTokenKind::ExportVariable(name, is_private) => {
619+
format!("ExportVariable::{name}::{is_private}")
620+
}
621+
GDScriptTokenKind::RegularVariable(name, is_private) => {
622+
format!("RegularVariable::{name}::{is_private}")
623+
}
624+
GDScriptTokenKind::OnReadyVariable(name, is_private) => {
625+
format!("OnReadyVariable::{name}::{is_private}")
626+
}
627+
GDScriptTokenKind::Method(name, method_type, is_private) => format!(
628+
"Method::{name}::{}::{is_private}",
629+
method_type_key(method_type)
630+
),
631+
GDScriptTokenKind::InnerClass(name, is_private) => {
632+
format!("InnerClass::{name}::{is_private}")
633+
}
634+
GDScriptTokenKind::Unknown(text) => format!("Unknown::{text}"),
635+
}
636+
}
637+
638+
fn method_type_key(method_type: &MethodType) -> String {
639+
match method_type {
640+
MethodType::StaticInit => "StaticInit".to_string(),
641+
MethodType::StaticFunction => "StaticFunction".to_string(),
642+
MethodType::BuiltinVirtual(priority) => format!("BuiltinVirtual({priority})"),
643+
MethodType::Custom => "Custom".to_string(),
644+
}
645+
}
646+
647+
fn inline_extends_signature(token_kind: &GDScriptTokenKind, original_text: &str) -> Option<String> {
648+
match token_kind {
649+
GDScriptTokenKind::ClassName(_) => {
650+
let extends_part = extract_inline_extends(original_text)?;
651+
Some(format!("Extends::{extends_part}"))
652+
}
653+
_ => None,
654+
}
655+
}
656+
657+
fn extract_inline_extends(original_text: &str) -> Option<String> {
658+
let extends_index = original_text.find("extends")?;
659+
let extends_slice = &original_text[extends_index..];
660+
let trimmed = extends_slice.trim();
661+
if trimmed.is_empty() {
662+
None
663+
} else {
664+
Some(trimmed.to_string())
665+
}
666+
}
667+
459668
/// A syntax tree of the source code.
460669
struct GdTree {
461670
nodes: Vec<GdTreeNode>,

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
pub mod formatter;
2-
pub mod reorder;
32
pub mod linter;
3+
pub mod reorder;
44

55
#[derive(Clone)]
66
pub struct FormatterConfig {

src/main.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,10 @@ struct Args {
9999
///
100100
/// This offers a good amount protection against the formatter failing
101101
/// on new syntax at the cost of a small little extra running time.
102-
/// Currently incompatible with --reorder-code.
103102
///
104103
/// WARNING: this is not a perfect solution. Some rare edge cases may still
105104
/// lead to syntax changes.
106-
#[arg(short, long, conflicts_with = "reorder_code")]
105+
#[arg(short, long)]
107106
safe: bool,
108107
}
109108

src/reorder.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,21 @@ pub fn reorder_gdscript_elements(
1515
tree: &Tree,
1616
content: &str,
1717
) -> Result<String, Box<dyn std::error::Error>> {
18-
let tokens = extract_tokens_to_reorder(tree, content)?;
18+
let tokens = collect_top_level_tokens(tree, content)?;
1919
let ordered_elements = sort_gdscript_tokens(tokens);
2020
let reordered_content = build_reordered_code(ordered_elements, content);
2121

2222
Ok(reordered_content)
2323
}
2424

25+
/// Collects all top-level tokens (direct children of the `source` node) without reordering them.
26+
pub fn collect_top_level_tokens(
27+
tree: &Tree,
28+
content: &str,
29+
) -> Result<Vec<GDScriptTokensWithComments>, Box<dyn std::error::Error>> {
30+
extract_tokens_to_reorder(tree, content)
31+
}
32+
2533
/// This struct is used to hold an element along with its associated comments
2634
/// and original text so we can precisely reconstruct it, and also when we move
2735
/// functions etc. their docstrings and comments come along.
@@ -35,7 +43,7 @@ pub struct GDScriptTokensWithComments {
3543
pub end_byte: usize,
3644
}
3745

38-
#[derive(Debug, Clone, PartialEq)]
46+
#[derive(Debug, Clone, PartialEq, Eq)]
3947
pub enum GDScriptTokenKind {
4048
ClassAnnotation(String), // Annotations that go at the top of the file like @tool and @icon
4149
ClassName(String), // This is the class_name declaration
@@ -226,7 +234,7 @@ fn extract_tokens_to_reorder(
226234
content: &str,
227235
) -> Result<Vec<GDScriptTokensWithComments>, Box<dyn std::error::Error>> {
228236
let root = tree.root_node();
229-
let mut elements = Vec::new();
237+
let mut elements: Vec<GDScriptTokensWithComments> = Vec::new();
230238

231239
// This query covers all top-level elements (direct children of source)
232240
// We need to capture everything so nothing gets lost
@@ -345,7 +353,33 @@ fn extract_tokens_to_reorder(
345353
// This may look inefficient but in practice it should not have much impact
346354
if class_docstring_comments_rows.contains(&node.start_position().row) {
347355
continue;
348-
} else {
356+
}
357+
358+
// Here we look for inline comments after declarations, and if
359+
// so, we attach them as inline to the declaration. For
360+
// example:
361+
//
362+
// var test = 1 # inline comment
363+
//
364+
// Without this code, the comment would wrap to the next line.
365+
let mut handled_inline = false;
366+
if let Some(last_element) = elements.last_mut() {
367+
let last_end = last_element.end_byte;
368+
let comment_start = node.start_byte();
369+
if last_end <= comment_start && comment_start <= content.len() {
370+
if let Some(spacing) = content.get(last_end..comment_start) {
371+
let has_newline = spacing.contains('\n') || spacing.contains('\r');
372+
if !has_newline {
373+
last_element.original_text.push_str(spacing);
374+
last_element.original_text.push_str(&text);
375+
last_element.end_byte = node.end_byte();
376+
handled_inline = true;
377+
}
378+
}
379+
}
380+
}
381+
382+
if !handled_inline {
349383
pending_comments.push(PendingAttachment {
350384
start_byte: node.start_byte(),
351385
text: text.clone(),

0 commit comments

Comments
 (0)