Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 223 additions & 14 deletions src/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ use regex::{Regex, RegexBuilder, Replacer};
use topiary_core::{Language, Operation, TopiaryQuery, formatter_tree};
use tree_sitter::{Parser, Point, Query, QueryCursor, StreamingIterator, Tree};

use crate::FormatterConfig;
use crate::{
FormatterConfig,
reorder::{
GDScriptTokenKind, GDScriptTokensWithComments, MethodType, collect_top_level_tokens,
},
};

static QUERY: &str = include_str!("../queries/gdscript.scm");

Expand All @@ -32,7 +37,12 @@ pub fn format_gdscript_with_config(
) -> Result<String, Box<dyn std::error::Error>> {
let mut formatter = Formatter::new(content.to_owned(), config.clone());

formatter.preprocess().format()?.postprocess().reorder();
formatter
.preprocess()
.format()?
.postprocess()
.validate_formatting()?
.reorder()?;
formatter.finish()
}

Expand All @@ -42,6 +52,7 @@ struct Formatter {
parser: Parser,
input_tree: GdTree,
tree: Tree,
original_source: Option<String>,
}

impl Formatter {
Expand All @@ -53,8 +64,17 @@ impl Formatter {
.unwrap();
let tree = parser.parse(&content, None).unwrap();
let input_tree = GdTree::from_ts_tree(&tree, content.as_bytes());
let original_source = if config.safe && config.reorder_code {
// When both safe mode and reordering are enabled we keep an
// untouched copy of the original source code so we can later verify
// that the top-level declarations all survive the formatting pass.
Some(content.clone())
} else {
None
};

Self {
original_source,
content,
config,
tree,
Expand Down Expand Up @@ -102,9 +122,9 @@ impl Formatter {
}

#[inline(always)]
fn reorder(&mut self) -> &mut Self {
fn reorder(&mut self) -> Result<&mut Self, Box<dyn std::error::Error>> {
if !self.config.reorder_code {
return self;
return Ok(self);
}

self.tree = self.parser.parse(&self.content, Some(&self.tree)).unwrap();
Expand All @@ -116,9 +136,15 @@ impl Formatter {
eprintln!(
"Warning: Code reordering failed: {e}. Returning formatted code without reordering."
);
return Ok(self);
}
};
self

if self.config.safe {
self.ensure_safe_reorder()?;
}

Ok(self)
}

/// This function runs over the content before going through topiary.
Expand All @@ -141,19 +167,41 @@ impl Formatter {
.postprocess_tree_sitter()
}

/// Finishes formatting and returns the resulting file content.
#[inline(always)]
fn finish(mut self) -> Result<String, Box<dyn std::error::Error>> {
if self.config.safe {
self.input_tree.postprocess();
self.tree = self.parser.parse(&self.content, None).unwrap();
fn validate_formatting(&mut self) -> Result<&mut Self, Box<dyn std::error::Error>> {
if !self.config.safe {
return Ok(self);
}

let output_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes());
if self.input_tree != output_tree {
return Err("Code structure has changed after formatting".into());
}
self.input_tree.postprocess();
self.tree = self.parser.parse(&self.content, None).unwrap();

let formatted_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes());
if self.input_tree != formatted_tree {
return Err("Code structure has changed after formatting".into());
}

Ok(self)
}

#[inline(always)]
fn ensure_safe_reorder(&mut self) -> Result<(), Box<dyn std::error::Error>> {
let original_source = self
.original_source
.as_deref()
.ok_or_else(|| {
"Safe mode requires the original source to verify reordered code".to_string()
})?;

self.tree = self.parser.parse(&self.content, None).unwrap();
ensure_top_level_tokens_match(original_source, &self.tree, &self.content)?;

Ok(())
}

/// Finishes formatting and returns the resulting file content.
#[inline(always)]
fn finish(self) -> Result<String, Box<dyn std::error::Error>> {
Ok(self.content)
}

Expand Down Expand Up @@ -456,6 +504,167 @@ impl Formatter {
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct TopLevelTokenSignature {
kind: String,
attached_comments: Vec<String>,
trailing_comments: Vec<String>,
}

/// Ensures that the top-level tokens (child nodes of (source) in the
/// tree-sitter AST) in the original source match those in the current tree
/// after formatting and reordering. We compare their structural “signatures”
/// (kind, relevant identifiers, and attached comments). This checks that we did
/// not lose any top-level declaration.
fn ensure_top_level_tokens_match(
original_source: &str,
current_tree: &Tree,
current_source: &str,
) -> Result<(), Box<dyn std::error::Error>> {
// Safe mode only cares that we did not lose or duplicate any top-level declaration.
// We accumulate signed counts per signature; a non-zero delta means something changed.
let mut diff = std::collections::HashMap::<TopLevelTokenSignature, i32>::new();

for signature in parse_top_level_token_signatures(original_source)? {
*diff.entry(signature).or_insert(0) += 1;
}

for signature in top_level_token_signatures_from_tree(current_tree, current_source)? {
*diff.entry(signature).or_insert(0) -= 1;
}

let mismatched: Vec<_> = diff.iter().filter(|(_, count)| **count != 0).collect();

if !mismatched.is_empty() {
eprintln!("Safe mode mismatch detected at top level:");
for (signature, count) in mismatched {
eprintln!(" {:?}: delta={}", signature, count);
}
return Err("Safe mode detected mismatched top-level declarations after reordering".into());
}

Ok(())
}

fn parse_top_level_token_signatures(
source: &str,
) -> Result<Vec<TopLevelTokenSignature>, Box<dyn std::error::Error>> {
// We re-parse the original content with tree-sitter instead of reusing `input_tree`
// because the reorder module already knows how to classify the raw syntax tree into
// the token structures we want to compare. I'm not 100% sure it's needed
// but it's not very costly.
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_gdscript::LANGUAGE.into())
.unwrap();
let tree = parser
.parse(source, None)
.ok_or_else(|| "Failed to parse GDScript source in safe mode")?;

top_level_token_signatures_from_tree(&tree, source)
}

fn top_level_token_signatures_from_tree(
tree: &Tree,
content: &str,
) -> Result<Vec<TopLevelTokenSignature>, Box<dyn std::error::Error>> {
let tokens = collect_top_level_tokens(tree, content)?;
let mut signatures = Vec::with_capacity(tokens.len());

for token in tokens {
let GDScriptTokensWithComments {
token_kind,
attached_comments,
trailing_comments,
start_byte: _,
end_byte: _,
original_text,
} = token;

signatures.push(TopLevelTokenSignature {
kind: token_kind_key(&token_kind),
attached_comments,
trailing_comments,
});

if let Some(extends_key) = inline_extends_signature(&token_kind, original_text.as_str()) {
signatures.push(TopLevelTokenSignature {
kind: extends_key,
attached_comments: Vec::new(),
trailing_comments: Vec::new(),
});
}
}

Ok(signatures)
}

fn token_kind_key(kind: &GDScriptTokenKind) -> String {
match kind {
GDScriptTokenKind::ClassAnnotation(text) => format!("ClassAnnotation::{text}"),
GDScriptTokenKind::ClassName(text) => format!("ClassName::{text}"),
GDScriptTokenKind::Extends(text) => format!("Extends::{text}"),
GDScriptTokenKind::Docstring(text) => format!("Docstring::{text}"),
GDScriptTokenKind::Signal(name, is_private) => {
format!("Signal::{name}::{is_private}")
}
GDScriptTokenKind::Enum(name, is_private) => format!("Enum::{name}::{is_private}"),
GDScriptTokenKind::Constant(name, is_private) => {
format!("Constant::{name}::{is_private}")
}
GDScriptTokenKind::StaticVariable(name, is_private) => {
format!("StaticVariable::{name}::{is_private}")
}
GDScriptTokenKind::ExportVariable(name, is_private) => {
format!("ExportVariable::{name}::{is_private}")
}
GDScriptTokenKind::RegularVariable(name, is_private) => {
format!("RegularVariable::{name}::{is_private}")
}
GDScriptTokenKind::OnReadyVariable(name, is_private) => {
format!("OnReadyVariable::{name}::{is_private}")
}
GDScriptTokenKind::Method(name, method_type, is_private) => format!(
"Method::{name}::{}::{is_private}",
method_type_key(method_type)
),
GDScriptTokenKind::InnerClass(name, is_private) => {
format!("InnerClass::{name}::{is_private}")
}
GDScriptTokenKind::Unknown(text) => format!("Unknown::{text}"),
}
}

fn method_type_key(method_type: &MethodType) -> String {
match method_type {
MethodType::StaticInit => "StaticInit".to_string(),
MethodType::StaticFunction => "StaticFunction".to_string(),
MethodType::BuiltinVirtual(priority) => format!("BuiltinVirtual({priority})"),
MethodType::Custom => "Custom".to_string(),
}
}

fn inline_extends_signature(token_kind: &GDScriptTokenKind, original_text: &str) -> Option<String> {
match token_kind {
GDScriptTokenKind::ClassName(_) => {
let extends_part = extract_inline_extends(original_text)?;
Some(format!("Extends::{extends_part}"))
}
_ => None,
}
}

fn extract_inline_extends(original_text: &str) -> Option<String> {
let extends_index = original_text.find("extends")?;
let extends_slice = &original_text[extends_index..];
let trimmed = extends_slice.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}

/// A syntax tree of the source code.
struct GdTree {
nodes: Vec<GdTreeNode>,
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub mod formatter;
pub mod reorder;
pub mod linter;
pub mod reorder;

#[derive(Clone)]
pub struct FormatterConfig {
Expand Down
3 changes: 1 addition & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,10 @@ struct Args {
///
/// This offers a good amount protection against the formatter failing
/// on new syntax at the cost of a small little extra running time.
/// Currently incompatible with --reorder-code.
///
/// WARNING: this is not a perfect solution. Some rare edge cases may still
/// lead to syntax changes.
#[arg(short, long, conflicts_with = "reorder_code")]
#[arg(short, long)]
safe: bool,
}

Expand Down
42 changes: 38 additions & 4 deletions src/reorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@ pub fn reorder_gdscript_elements(
tree: &Tree,
content: &str,
) -> Result<String, Box<dyn std::error::Error>> {
let tokens = extract_tokens_to_reorder(tree, content)?;
let tokens = collect_top_level_tokens(tree, content)?;
let ordered_elements = sort_gdscript_tokens(tokens);
let reordered_content = build_reordered_code(ordered_elements, content);

Ok(reordered_content)
}

/// Collects all top-level tokens (direct children of the `source` node) without reordering them.
pub fn collect_top_level_tokens(
tree: &Tree,
content: &str,
) -> Result<Vec<GDScriptTokensWithComments>, Box<dyn std::error::Error>> {
extract_tokens_to_reorder(tree, content)
}

/// This struct is used to hold an element along with its associated comments
/// and original text so we can precisely reconstruct it, and also when we move
/// functions etc. their docstrings and comments come along.
Expand All @@ -35,7 +43,7 @@ pub struct GDScriptTokensWithComments {
pub end_byte: usize,
}

#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GDScriptTokenKind {
ClassAnnotation(String), // Annotations that go at the top of the file like @tool and @icon
ClassName(String), // This is the class_name declaration
Expand Down Expand Up @@ -226,7 +234,7 @@ fn extract_tokens_to_reorder(
content: &str,
) -> Result<Vec<GDScriptTokensWithComments>, Box<dyn std::error::Error>> {
let root = tree.root_node();
let mut elements = Vec::new();
let mut elements: Vec<GDScriptTokensWithComments> = Vec::new();

// This query covers all top-level elements (direct children of source)
// We need to capture everything so nothing gets lost
Expand Down Expand Up @@ -345,7 +353,33 @@ fn extract_tokens_to_reorder(
// This may look inefficient but in practice it should not have much impact
if class_docstring_comments_rows.contains(&node.start_position().row) {
continue;
} else {
}

// Here we look for inline comments after declarations, and if
// so, we attach them as inline to the declaration. For
// example:
//
// var test = 1 # inline comment
//
// Without this code, the comment would wrap to the next line.
let mut handled_inline = false;
if let Some(last_element) = elements.last_mut() {
let last_end = last_element.end_byte;
let comment_start = node.start_byte();
if last_end <= comment_start && comment_start <= content.len() {
if let Some(spacing) = content.get(last_end..comment_start) {
let has_newline = spacing.contains('\n') || spacing.contains('\r');
if !has_newline {
last_element.original_text.push_str(spacing);
last_element.original_text.push_str(&text);
last_element.end_byte = node.end_byte();
handled_inline = true;
}
}
}
}

if !handled_inline {
pending_comments.push(PendingAttachment {
start_byte: node.start_byte(),
text: text.clone(),
Expand Down
Loading