Skip to content

Commit e8eeba2

Browse files
authored
Remove raw tokens (#468)
Removes all instances of RawTokens in favour of a widened AST that can represent any valid VHDL (as defined by the LRM), but also invalid VHDL. The latter must be rejected in a subsequent analysis stage.
1 parent e142ca8 commit e8eeba2

197 files changed

Lines changed: 3615 additions & 3833 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

vhdl_syntax/src/builder.rs

Lines changed: 1 addition & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,7 @@
2323
//! ```
2424
2525
use crate::latin_1::{char_to_latin1, Latin1Str, Latin1String, NonLatin1CharError};
26-
use crate::parser::builder::NodeBuilder;
27-
use crate::syntax::node::SyntaxNode;
28-
use crate::syntax::node_kind::NodeKind;
29-
use crate::tokens::{Keyword, Token, TokenKind, Tokenize, Trivia, TriviaPiece};
26+
use crate::tokens::{Keyword, Token, TokenKind, Trivia, TriviaPiece};
3027

3128
fn default_trivia() -> Trivia {
3229
Trivia::from([TriviaPiece::Spaces(1)])
@@ -451,69 +448,6 @@ fn bit_string_literal_helpers_produce_correct_prefixes_prefixes() {
451448
assert_eq!(tok.text().to_string(), r#"X"FF""#);
452449
}
453450

454-
// MARK: RawNodeBuilder
455-
456-
/// Shared builder for `!RawTokens` AST nodes (e.g. `ActualPartSyntax`, `RawTokensSyntax`).
457-
/// Use the generated per-node wrappers rather than this type directly.
458-
pub(crate) struct RawNodeBuilder {
459-
kind: NodeKind,
460-
tokens: Vec<Token>,
461-
}
462-
463-
impl RawNodeBuilder {
464-
pub(crate) fn new(kind: NodeKind) -> Self {
465-
Self {
466-
kind,
467-
tokens: vec![],
468-
}
469-
}
470-
471-
pub(crate) fn token(mut self, t: impl Into<Token>) -> Self {
472-
self.tokens.push(t.into());
473-
self
474-
}
475-
476-
/// Tokenizes `vhdl` and stores the resulting tokens (EOF token excluded).
477-
/// Adds one leading space to the first token when it carries no trivia,
478-
/// matching the default-trivia convention of all other builders.
479-
pub(crate) fn from_vhdl(kind: NodeKind, vhdl: impl Tokenize) -> Self {
480-
let mut tokens: Vec<Token> = vhdl
481-
.tokenize()
482-
.filter(|t| t.kind() != TokenKind::Eof)
483-
.collect();
484-
if let Some(first) = tokens.first_mut() {
485-
if first.leading_trivia().is_empty() {
486-
first.set_leading_trivia(Trivia::from([TriviaPiece::Spaces(1)]));
487-
}
488-
}
489-
Self { kind, tokens }
490-
}
491-
492-
pub(crate) fn build(self) -> SyntaxNode {
493-
let mut b = NodeBuilder::new();
494-
b.start_node(self.kind);
495-
for token in self.tokens {
496-
b.push(token);
497-
}
498-
b.end_node();
499-
SyntaxNode::new_root(b.end())
500-
}
501-
}
502-
503-
#[test]
504-
fn raw_node_builder_from_vhdl_adds_leading_space() {
505-
let syntax = RawNodeBuilder::from_vhdl(NodeKind::ActualPart, Latin1Str::new(b"clk")).build();
506-
assert_eq!(syntax.to_string(), " clk");
507-
}
508-
509-
/// A programmatically assembled `RawNodeBuilder` holds exactly the pushed tokens.
510-
#[test]
511-
fn raw_node_builder_programmatic_token_chain() {
512-
let tok = Token::new(TokenKind::Identifier, b"foo".as_slice(), Trivia::default());
513-
let syntax = RawNodeBuilder::new(NodeKind::ActualPart).token(tok).build();
514-
assert_eq!(syntax.tokens().count(), 1);
515-
}
516-
517451
// MARK: Canonical tokens
518452

519453
impl TokenKind {

vhdl_syntax/src/parser/productions/composite_types.rs

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//
55
// Copyright (c) 2025, Lukas Scheller lukasscheller@icloud.com
66
/// Parsing of composite types (LRM §5.3)
7-
use crate::parser::{util::LookaheadError, Parser};
7+
use crate::parser::Parser;
88
use crate::syntax::node_kind::NodeKind::*;
99
use crate::tokens::token_kind::Keyword as Kw;
1010
use crate::tokens::TokenKind::*;
@@ -69,42 +69,10 @@ impl Parser {
6969
pub fn index_constraint(&mut self) {
7070
self.start_node(IndexConstraint);
7171
self.expect_token(LeftPar);
72-
self.separated_list(Parser::discrete_range, Comma);
72+
self.separated_list(Parser::expression, Comma);
7373
self.expect_token(RightPar);
7474
self.end_node();
7575
}
76-
77-
pub fn discrete_range(&mut self) {
78-
// One of the following tokens must follow after a `discrete_range`.
79-
// FOLLOW(discrete_range) := "," | ")" | "|" | "=>" | "generate" | "loop" | ";"
80-
let end_of_range = match self.lookahead([
81-
Comma,
82-
RightPar,
83-
RightArrow,
84-
Bar,
85-
Keyword(Kw::Generate),
86-
Keyword(Kw::Loop),
87-
SemiColon,
88-
]) {
89-
Ok((tok, end_index)) => Some((tok, end_index)),
90-
// If EOF is reached, the range cannot be parsed correctly
91-
Err((LookaheadError::Eof, _)) => {
92-
self.eof_err();
93-
None
94-
}
95-
// Since we use `usize::MAX` as a maximum index, this error is not possible!
96-
Err((LookaheadError::MaxIndexReached, _)) => unreachable!(),
97-
// This error is only possible, when a `RightPar` is found before any token in `kinds`.
98-
// Since `RightPar` is in `kinds` that's not possible!
99-
Err((LookaheadError::TokenKindNotFound, _)) => unreachable!(),
100-
};
101-
102-
if let Some((_, end_index)) = end_of_range {
103-
self.start_node(RawTokens);
104-
self.skip_to(end_index);
105-
self.end_node();
106-
}
107-
}
10876
}
10977

11078
#[cfg(test)]

vhdl_syntax/src/parser/productions/concurrent_statement.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ impl Parser {
402402
self.start_node(ParameterSpecification);
403403
self.identifier();
404404
self.expect_kw(Kw::In);
405-
self.discrete_range();
405+
self.expression();
406406
self.end_node();
407407
}
408408

vhdl_syntax/src/parser/productions/expression.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@ use std::num::NonZeroU8;
1414

1515
fn binary_precedence(token: TokenKind) -> Option<NonZeroU8> {
1616
Some(match token {
17+
// `to`/`downto` were a separate `range` production in the LRM; folding
18+
// them into the operator table lets `range_constraint` accept any
19+
// expression and reuse precedence climbing.
20+
Keyword(Kw::To | Kw::Downto) => nonzero!(1u8),
1721
Keyword(Kw::And | Kw::Or | Kw::Nand | Kw::Nor | Kw::Xor | Kw::Xnor) => nonzero!(2u8),
18-
EQ | NE | LT | LTE | GT | GTE | QueEQ | QueNE | QueLT | QueGT | QueGTE => nonzero!(3u8),
22+
EQ | NE | LT | LTE | GT | GTE | QueEQ | QueNE | QueLT | QueLTE | QueGT | QueGTE => {
23+
nonzero!(3u8)
24+
}
1925
Keyword(Kw::Sll | Kw::Srl | Kw::Sla | Kw::Sra | Kw::Rol | Kw::Ror) => nonzero!(4u8),
2026
Plus | Minus | Concat => nonzero!(5u8),
2127
Times | Div | Keyword(Kw::Mod | Kw::Rem) => nonzero!(7u8),
@@ -39,9 +45,9 @@ impl Parser {
3945
pub fn primary(&mut self) {
4046
match_next_token!(self,
4147
Identifier, LtLt => {
42-
self.start_node(NameExpression);
48+
let checkpoint = self.checkpoint();
4349
self.name();
44-
self.end_node();
50+
self.continue_primary_after_name(checkpoint);
4551
},
4652
BitStringLiteral, CharacterLiteral, StringLiteral, Keyword(Kw::Null) => self.skip_into_node(LiteralExpression),
4753
AbstractLiteral => {
@@ -56,20 +62,43 @@ impl Parser {
5662
self.end_node();
5763
},
5864
LeftPar => {
59-
self.start_node(ParenthesizedExpressionOrAggregate);
60-
self.aggregate_inner();
61-
self.end_node();
65+
self.parenthesized_expression_or_aggregate();
6266
},
6367
Keyword(Kw::New) => {
6468
self.allocator();
6569
}
6670
);
6771
}
6872

73+
pub(crate) fn parenthesized_expression_or_aggregate(&mut self) {
74+
self.start_node(ParenthesizedExpressionOrAggregate);
75+
self.aggregate_inner();
76+
self.end_node();
77+
}
78+
79+
/// Finalize a primary whose leading `Name` was already parsed and starts
80+
/// at `checkpoint`. If a `Tick` follows, the name is the type mark of a
81+
/// `QualifiedExpression` and the `'(…)` is consumed here; otherwise the
82+
/// name is wrapped in `NameExpression`. Callers that need to continue
83+
/// with binary operators should follow up with `expression_from_primary`.
84+
pub(crate) fn continue_primary_after_name(
85+
&mut self,
86+
checkpoint: crate::parser::builder::Checkpoint,
87+
) {
88+
if self.next_is(Tick) {
89+
self.start_node_at(checkpoint, QualifiedExpression);
90+
self.skip();
91+
self.parenthesized_expression_or_aggregate();
92+
} else {
93+
self.start_node_at(checkpoint, NameExpression);
94+
}
95+
self.end_node();
96+
}
97+
6998
pub fn allocator(&mut self) {
70-
self.start_node(ExpressionAllocator);
99+
self.start_node(Allocator);
71100
self.expect_kw(Kw::New);
72-
self.subtype_indication();
101+
self.expression();
73102
self.end_node();
74103
}
75104

@@ -105,6 +134,22 @@ impl Parser {
105134
self.expression_inner(0);
106135
}
107136

137+
/// Continue an expression parse from an already-emitted primary located at
138+
/// `checkpoint`. The caller is responsible for having emitted the leading
139+
/// primary node (e.g. `NameExpression`) starting at `checkpoint`.
140+
pub(crate) fn expression_from_primary(
141+
&mut self,
142+
checkpoint: crate::parser::builder::Checkpoint,
143+
) {
144+
while let Some(precedence) = binary_precedence(self.peek_token()) {
145+
let precedence: u8 = precedence.into();
146+
self.start_node_at(checkpoint, BinaryExpression);
147+
self.skip();
148+
self.expression_inner(precedence);
149+
self.end_node();
150+
}
151+
}
152+
108153
pub fn condition(&mut self) {
109154
self.expression()
110155
}

0 commit comments

Comments
 (0)