Skip to content

Commit 411b524

Browse files
authored
[vhdl_syntax] Add remove to rewrite action, and fix source text offset calculation
1 parent 1535b3b commit 411b524

15 files changed

Lines changed: 427 additions & 223 deletions

vhdl-dump-ast/src/main.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ struct Args {
2525
#[arg(short, long, default_value = "false")]
2626
trivia: bool,
2727

28-
/// Include source-code location into the dumped AST
29-
#[arg(short, long, default_value = "false")]
30-
loc: bool,
31-
3228
/// Specify the encoding to use for comments
3329
#[arg(short, long, default_value = "None")]
3430
comment_encoding: Option<String>,
@@ -46,13 +42,11 @@ fn serialize(
4642
format: OutputFormat,
4743
pretty: bool,
4844
trivia: bool,
49-
loc: bool,
5045
comment_encoding: String,
5146
) -> Result<String, Box<dyn Error>> {
5247
let serde_flags = SerdeFlags::default()
5348
.with_comment_encoding(comment_encoding)
54-
.include_trivia(trivia)
55-
.include_loc(loc);
49+
.include_trivia(trivia);
5650
let serializable_node = node.serialize_with(serde_flags);
5751
Ok(match (format, pretty) {
5852
(OutputFormat::Json, false) => serde_json::to_string(&serializable_node)?,
@@ -80,7 +74,6 @@ fn main() {
8074
args.format,
8175
!args.no_pretty,
8276
args.trivia,
83-
args.loc,
8477
args.comment_encoding
8578
.unwrap_or(DEFAULT_COMMENT_ENCODING.to_string()),
8679
) {
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//! Alphabetically sorts and deduplicates `use` clauses inside a design unit's context clause using
2+
//! the [`Rewriter`](vhdl_syntax::syntax::rewrite) API.
3+
// This Source Code Form is subject to the terms of the Mozilla Public
4+
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
5+
// You can obtain one at http://mozilla.org/MPL/2.0/.
6+
//
7+
// Copyright (c) 2026, Lukas Scheller lukasscheller@icloud.com
8+
use std::collections::HashMap;
9+
use vhdl_syntax::parser;
10+
use vhdl_syntax::syntax::node::{SyntaxElement, SyntaxNode};
11+
use vhdl_syntax::syntax::rewrite::RewriteAction;
12+
use vhdl_syntax::syntax::AstNode;
13+
use vhdl_syntax::syntax::{ContextItemSyntax, UseClauseContextItemSyntax};
14+
15+
fn main() {
16+
// Out-of-order imports with one duplicate (`std_logic_1164` appears twice).
17+
let vhdl = "\
18+
library ieee;
19+
use ieee.math_real.all;
20+
use ieee.std_logic_1164.all;
21+
use ieee.numeric_std.all;
22+
use ieee.std_logic_1164.all;
23+
24+
entity foo is
25+
end foo;
26+
";
27+
let (file, diagnostics) = parser::parse(vhdl);
28+
assert!(diagnostics.is_empty());
29+
30+
// 1) Collect every `use`-clause context item from each design unit, in document order.
31+
let originals: Vec<UseClauseContextItemSyntax> = file
32+
.design_units()
33+
.filter_map(|du| du.context_clause())
34+
.flat_map(|cc| cc.context_items().collect::<Vec<_>>())
35+
.filter_map(|ci| match ci {
36+
ContextItemSyntax::UseClauseContextItem(u) => Some(u),
37+
_ => None,
38+
})
39+
.collect();
40+
41+
// 2) Build the sorted, deduplicated target list.
42+
let mut sorted: Vec<UseClauseContextItemSyntax> = originals.clone();
43+
sorted.sort_by_key(sort_key);
44+
sorted.dedup_by_key(|u| sort_key(u));
45+
46+
// 3) Map each original slot (by its offset in the source) to either the next
47+
// sorted replacement or `None` (= remove this slot, it was a duplicate).
48+
let mut plan: HashMap<usize, Option<SyntaxNode>> = HashMap::new();
49+
let mut replacements = sorted.into_iter().map(|u| u.raw());
50+
for orig in &originals {
51+
plan.insert(orig.raw().offset(), replacements.next());
52+
}
53+
54+
// 4) Single rewrite pass: every visited UseClauseContextItem is either swapped to
55+
// its sorted replacement (`Change`) or dropped entirely (`Remove`).
56+
let new_file = file.raw().rewrite(|el| match el {
57+
SyntaxElement::Node(n) => match plan.get(&n.offset()) {
58+
Some(Some(replacement)) => {
59+
RewriteAction::Change(SyntaxElement::Node(replacement.clone()))
60+
}
61+
Some(None) => RewriteAction::Remove,
62+
None => RewriteAction::Leave,
63+
},
64+
SyntaxElement::Token(_) => RewriteAction::Leave,
65+
});
66+
67+
assert_eq!(
68+
format!("{}", new_file),
69+
"\
70+
library ieee;
71+
use ieee.math_real.all;
72+
use ieee.numeric_std.all;
73+
use ieee.std_logic_1164.all;
74+
75+
entity foo is
76+
end foo;
77+
"
78+
);
79+
}
80+
81+
/// Alphabetical sort key — just the displayed text without surrounding whitespace.
82+
/// Good enough for this example; a real tool would compare the parsed name segments.
83+
fn sort_key(item: &UseClauseContextItemSyntax) -> String {
84+
item.raw().to_string().trim().to_lowercase()
85+
}

vhdl_syntax/src/fmt/latin1.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ impl ToLatin1 for SyntaxToken {
9090
impl ToLatin1 for GreenChild {
9191
fn to_latin1(&self) -> Latin1String {
9292
match self {
93-
Child::Node((_, node)) => node.to_latin1(),
94-
Child::Token((_, token)) => token.to_latin1(),
93+
Child::Node(node) => node.to_latin1(),
94+
Child::Token(token) => token.to_latin1(),
9595
}
9696
}
9797
}

vhdl_syntax/src/fmt/utf8.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ impl fmt::Display for SyntaxToken {
8989
impl fmt::Display for GreenChild {
9090
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
9191
match self {
92-
Child::Node((_, node)) => write!(f, "{}", node),
93-
Child::Token((_, token)) => write!(f, "{}", token),
92+
Child::Node(node) => write!(f, "{}", node),
93+
Child::Token(token) => write!(f, "{}", token),
9494
}
9595
}
9696
}

vhdl_syntax/src/parser/builder.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::tokens::Token;
1010

1111
/// Internal builder used to create nodes when parsing.
1212
pub(crate) struct NodeBuilder {
13-
rel_offset: usize,
1413
text_len: usize,
1514
token_index: usize,
1615
parents: Vec<(NodeKind, usize)>,
@@ -22,7 +21,6 @@ pub(crate) type Checkpoint = usize;
2221
impl NodeBuilder {
2322
pub fn new() -> NodeBuilder {
2423
NodeBuilder {
25-
rel_offset: 0,
2624
text_len: 0,
2725
token_index: 0,
2826
parents: Vec::new(),
@@ -32,10 +30,8 @@ impl NodeBuilder {
3230

3331
pub fn push(&mut self, token: Token) {
3432
let tok_text_len = token.byte_len();
35-
let offset = self.rel_offset;
3633
self.children
37-
.push(GreenChild::Token((offset, GreenToken::new(token))));
38-
self.rel_offset += tok_text_len;
34+
.push(GreenChild::Token(GreenToken::new(token)));
3935
self.token_index += 1;
4036
self.text_len += tok_text_len;
4137
}
@@ -53,15 +49,14 @@ impl NodeBuilder {
5349
// TODO: This is a required invariant, but enforcing it here is brittle.
5450
// Instead, we should move to a event-based API for parser <-> builder
5551
if !data.is_empty() {
56-
self.children
57-
.push(GreenChild::Node((0, GreenNode::new(data))));
52+
self.children.push(GreenChild::Node(GreenNode::new(data)));
5853
}
5954
}
6055

6156
pub fn end(mut self) -> GreenNode {
6257
assert_eq!(self.children.len(), 1);
6358
match self.children.pop().unwrap() {
64-
GreenChild::Node((_, node)) => node,
59+
GreenChild::Node(node) => node,
6560
GreenChild::Token(_) => panic!(),
6661
}
6762
}
@@ -92,9 +87,7 @@ impl NodeBuilder {
9287

9388
pub(crate) fn push_node(&mut self, node: GreenNode) {
9489
let node_len = node.byte_len();
95-
let offset = self.rel_offset;
96-
self.children.push(GreenChild::Node((offset, node)));
97-
self.rel_offset += node_len;
90+
self.children.push(GreenChild::Node(node));
9891
self.text_len += node_len;
9992
}
10093

vhdl_syntax/src/parser/productions/context.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ impl Parser {
3232
self.start_node(NodeKind::ContextClause);
3333
loop {
3434
match self.peek_token() {
35-
Keyword(Kw::Use) => self.use_clause(),
35+
Keyword(Kw::Use) => {
36+
self.start_node(NodeKind::UseClauseContextItem);
37+
self.use_clause();
38+
self.end_node();
39+
}
3640
Keyword(Kw::Library) => self.library_clause(),
3741
Keyword(Kw::Context) => {
3842
if !self.next_nth_is(Keyword(Kw::Is), 2) {

vhdl_syntax/src/parser/productions/snapshots/vhdl_syntax__parser__productions__design__tests__context_clause_associated_with_design_units.snap

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@ DesignFile
1010
IdentifierList
1111
Identifier 'lib'
1212
SemiColon
13-
UseClause
14-
Keyword(Use)
15-
NameList
16-
Name
17-
Identifier 'lib'
18-
SelectedName
19-
Dot
20-
Identifier 'foo'
21-
SemiColon
13+
UseClauseContextItem
14+
UseClause
15+
Keyword(Use)
16+
NameList
17+
Name
18+
Identifier 'lib'
19+
SelectedName
20+
Dot
21+
Identifier 'foo'
22+
SemiColon
2223
EntityDeclaration
2324
EntityDeclarationPreamble
2425
Keyword(Entity)

vhdl_syntax/src/parser/productions/snapshots/vhdl_syntax__parser__productions__design__tests__context_clause_items.snap

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,16 @@ ContextDeclaration
1313
IdentifierList
1414
Identifier 'foo'
1515
SemiColon
16-
UseClause
17-
Keyword(Use)
18-
NameList
19-
Name
20-
Identifier 'foo'
21-
SelectedName
22-
Dot
23-
Identifier 'bar'
24-
SemiColon
16+
UseClauseContextItem
17+
UseClause
18+
Keyword(Use)
19+
NameList
20+
Name
21+
Identifier 'foo'
22+
SelectedName
23+
Dot
24+
Identifier 'bar'
25+
SemiColon
2526
ContextReference
2627
Keyword(Context)
2728
NameList

vhdl_syntax/src/parser/productions/snapshots/vhdl_syntax__parser__productions__design__tests__parse_entity_with_context_clause.snap

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,19 @@ DesignFile
1010
IdentifierList
1111
Identifier 'ieee'
1212
SemiColon
13-
UseClause
14-
Keyword(Use)
15-
NameList
16-
Name
17-
Identifier 'ieee'
18-
SelectedName
19-
Dot
20-
Identifier 'std_logic_1164'
21-
SelectedName
22-
Dot
23-
Keyword(All)
24-
SemiColon
13+
UseClauseContextItem
14+
UseClause
15+
Keyword(Use)
16+
NameList
17+
Name
18+
Identifier 'ieee'
19+
SelectedName
20+
Dot
21+
Identifier 'std_logic_1164'
22+
SelectedName
23+
Dot
24+
Keyword(All)
25+
SemiColon
2526
EntityDeclaration
2627
EntityDeclarationPreamble
2728
Keyword(Entity)

vhdl_syntax/src/serde/flags.rs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,30 @@
11
//! Configuration flags for controlling serialization behavior of nodes and tokens.
22
//!
33
//! `SerdeFlags` allows fine-grained control over what gets included when serializing
4-
//! VHDL syntax trees, such as trivia (whitespace, comments) and source location data.
4+
//! VHDL syntax trees, such as trivia (whitespace, comments) and the comment encoding.
55
//!
66
//! # Example
77
//!
88
//! ```
99
//! # use vhdl_syntax::serde::SerdeFlags;
1010
//! let flags = SerdeFlags::default()
11-
//! .include_trivia(false)
12-
//! .include_loc(true);
11+
//! .include_trivia(false);
1312
//!
1413
//! // Serialized data will not include trivia information
1514
//! assert!(!flags.includes_trivia());
16-
//!
17-
//! // Serialized data will include source location
18-
//! assert!(flags.includes_loc());
1915
//! ```
2016
2117
/// Controls how the syntax nodes are being serialized.
2218
#[derive(Debug, Clone)]
2319
pub struct SerdeFlags {
2420
include_trivia: bool,
25-
include_loc: bool,
2621
comment_encoding: String,
2722
}
2823

2924
impl Default for SerdeFlags {
3025
fn default() -> Self {
3126
Self {
3227
include_trivia: true,
33-
include_loc: true,
3428
comment_encoding: "utf-8".into(),
3529
}
3630
}
@@ -48,17 +42,6 @@ impl SerdeFlags {
4842
self
4943
}
5044

51-
/// Whether to include source location
52-
pub fn includes_loc(&self) -> bool {
53-
self.include_loc
54-
}
55-
56-
/// Specifies whether location information should be included in the serialized output
57-
pub fn include_loc(mut self, include: bool) -> Self {
58-
self.include_loc = include;
59-
self
60-
}
61-
6245
/// Comments in VHDL can have arbitrary encoding. This flag allows serializers to specify an
6346
/// encoding that is attached to individual comments in the serialized AST.
6447
///

0 commit comments

Comments
 (0)