Skip to content

Commit ba9c443

Browse files
committed
parser, lexer: indirect call support
Required a small refactoring.
1 parent 8993ad2 commit ba9c443

8 files changed

Lines changed: 210 additions & 93 deletions

File tree

lexer/src/lib.rs

Lines changed: 28 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ pub enum Token<'a> {
4747
BeginFilePattern,
4848
#[token("ENDFILE", |lex| parse_non_posix_keyword(lex, Token::EndFilePattern))]
4949
EndFilePattern,
50-
#[token("@load \"", parse_directive)]
51-
LoadDirective(Slice<'a>),
52-
#[token("@include \"", parse_directive)]
53-
IncludeDirective(Slice<'a>),
54-
#[token("@nsinclude \"", parse_non_posix_directive)]
55-
NsIncludeDirective(Slice<'a>),
56-
#[regex("@namespace \"(?&identifier)\"", parse_namespace_directive)]
57-
NamespaceDirective(&'a str),
50+
#[token("@load")]
51+
LoadDirective,
52+
#[token("@include")]
53+
IncludeDirective,
54+
#[token("@nsinclude", parse_non_posix_operator)]
55+
NsIncludeDirective,
56+
#[regex("@namespace", parse_non_posix_operator)]
57+
NamespaceDirective,
5858
#[token("@concurrent", parse_non_gnu_operator)]
5959
ConcurrentDirective,
6060
#[token("if", accept_expression)]
@@ -130,9 +130,12 @@ pub enum Token<'a> {
130130
RlengthVariable,
131131
#[token("ENVIRON", accept_expression)]
132132
EnvironVariable,
133-
#[regex("(?&identifier)", Identifier::without_namespace)]
134-
#[regex(r"(?&identifier)::(?&identifier)", Identifier::with_namespace)]
133+
#[regex("(?&identifier)", Identifier::without_namespace::<0>)]
134+
#[regex(r"(?&identifier)::(?&identifier)", Identifier::with_namespace::<0>)]
135135
Identifier(Identifier<'a>),
136+
#[regex("@(?&identifier)", parse_indirect_call::<false>)]
137+
#[regex(r"@(?&identifier)::(?&identifier)", parse_indirect_call::<true>)]
138+
IndirectCall(Identifier<'a>),
136139
#[token("+", accept_expression)]
137140
Plus,
138141
#[token("-", accept_expression)]
@@ -333,29 +336,6 @@ fn parse_regex_or_op<'a>(lex: &mut logos::Lexer<'a, Token<'a>>) -> Result<Token<
333336
}
334337
}
335338

336-
fn parse_directive<'a>(lex: &mut Lexer<'a>) -> Result<Slice<'a>> {
337-
accept_expression(lex);
338-
parse_content::<false, '"'>(lex)
339-
}
340-
341-
fn parse_non_posix_directive<'a>(lex: &mut Lexer<'a>) -> Result<Slice<'a>> {
342-
if lex.extras.posix_strict {
343-
Err(LexingError::non_posix(lex))
344-
} else {
345-
parse_directive(lex)
346-
}
347-
}
348-
349-
fn parse_namespace_directive<'a>(lex: &mut Lexer<'a>) -> Result<&'a str> {
350-
if lex.extras.posix_strict {
351-
Err(LexingError::non_posix(lex))
352-
} else {
353-
accept_expression(lex);
354-
let offset = "@namespace \"".len();
355-
Ok(parse_ident(lex, offset..lex.slice().len() - 1))
356-
}
357-
}
358-
359339
fn parse_content<'a, const REGEX: bool, const DELIMITER: char>(
360340
lex: &mut Lexer<'a>,
361341
) -> Result<Slice<'a>> {
@@ -456,7 +436,7 @@ fn parse_float(lex: &mut Lexer<'_>) -> f64 {
456436

457437
fn parse_non_posix_keyword<'a>(lex: &mut Lexer<'a>, other: Token<'a>) -> Token<'a> {
458438
if lex.extras.posix_strict {
459-
Token::Identifier(Identifier::without_namespace(lex))
439+
Token::Identifier(Identifier::without_namespace::<0>(lex))
460440
} else {
461441
accept_expression(lex);
462442
other
@@ -481,22 +461,32 @@ fn parse_non_gnu_operator(lex: &mut Lexer<'_>) -> Result<()> {
481461
}
482462
}
483463

464+
fn parse_indirect_call<'a, const QUALIFIED: bool>(lex: &mut Lexer<'a>) -> Result<Identifier<'a>> {
465+
if lex.extras.posix_strict {
466+
Err(LexingError::non_posix(lex))
467+
} else if QUALIFIED {
468+
Identifier::with_namespace::<1>(lex)
469+
} else {
470+
Ok(Identifier::without_namespace::<1>(lex))
471+
}
472+
}
473+
484474
impl<'a> Identifier<'a> {
485-
fn without_namespace(lex: &mut Lexer<'a>) -> Self {
475+
fn without_namespace<const SKIP: usize>(lex: &mut Lexer<'a>) -> Self {
486476
Self {
487477
namespace: None,
488-
literal: parse_ident(lex, ..),
478+
literal: parse_ident(lex, SKIP..),
489479
}
490480
}
491481

492-
fn with_namespace(lex: &mut Lexer<'a>) -> Result<Self> {
482+
fn with_namespace<const SKIP: usize>(lex: &mut Lexer<'a>) -> Result<Self> {
493483
if lex.extras.posix_strict {
494484
Err(LexingError::non_posix(lex))
495485
} else {
496486
// SAFETY: The regex matching ensures it is present and well-formed.
497487
let separator = unsafe { memchr(b':', lex.slice()).unwrap_unchecked() };
498488
Ok(Self {
499-
namespace: Some(parse_ident(lex, ..separator)),
489+
namespace: Some(parse_ident(lex, SKIP..separator)),
500490
literal: parse_ident(lex, separator + 2..),
501491
})
502492
}

lexer/src/tests.rs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,17 +153,42 @@ fn lexer_test_directive_escaping() {
153153
assert_eq!(
154154
&lex(str, &arena, false, false),
155155
&[
156-
Token::IncludeDirective(b"aa\"a\ta".into()),
157-
Token::NsIncludeDirective(b"b\"\nb".into())
156+
Token::IncludeDirective,
157+
Token::String(b"aa\"a\ta".into()),
158+
Token::NsIncludeDirective,
159+
Token::String(b"b\"\nb".into())
158160
]
159161
);
160162
}
161163

162164
#[test]
163-
#[should_panic]
164165
fn lexer_test_ident_rules_non_posix() {
165166
let arena = Bump::new();
166-
lex(b"@namespace \"1a\"; a::1a", &arena, false, false);
167+
assert_eq!(
168+
&lex(b"1a::a a::1a _a", &arena, false, false),
169+
&[
170+
Token::Number(1.),
171+
Token::Identifier(Identifier {
172+
namespace: Some("a"),
173+
literal: "a"
174+
}),
175+
Token::Identifier(Identifier {
176+
namespace: None,
177+
literal: "a"
178+
}),
179+
Token::Colon,
180+
Token::Colon,
181+
Token::Number(1.),
182+
Token::Identifier(Identifier {
183+
namespace: None,
184+
literal: "a"
185+
}),
186+
Token::Identifier(Identifier {
187+
namespace: None,
188+
literal: "_a"
189+
})
190+
]
191+
);
167192
}
168193

169194
#[test]
@@ -186,7 +211,8 @@ fn lexer_test_general_tokens() {
186211
&lex(str, &arena, false, false),
187212
&[
188213
Token::Newline,
189-
Token::LoadDirective(b"lib1.so.1".into()),
214+
Token::LoadDirective,
215+
Token::String(b"lib1.so.1".into()),
190216
Token::Newline,
191217
Token::BeginPattern,
192218
Token::OpenBrace,

parser/src/ast.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ pub type Pattern<'a> = Either<RulePattern<'a>, SpecialPattern>;
9393
#[derive(Debug)]
9494
pub enum ExprNode<'a> {
9595
FunctionCall(Identifier<'a>, Vec<'a, Expr<'a>>),
96+
IndirectCall(Variable<'a>, Vec<'a, Expr<'a>>),
9697
UnaryOperation(UnaryOperator, Expr<'a>),
9798
BinaryOperation(BinaryOperator, Expr<'a>, Expr<'a>),
9899
UnaryPlaceOperation(UnaryPlaceOperator, Place<'a>),
@@ -556,6 +557,13 @@ impl Debug for Expr<'_> {
556557
}
557558
write!(f, ")")
558559
}
560+
ExprNode::IndirectCall(ident, args) => {
561+
write!(f, "(@{ident:?}")?;
562+
for arg in args {
563+
write!(f, " {arg:?}")?;
564+
}
565+
write!(f, ")")
566+
}
559567
ExprNode::UnaryOperation(op, a) => write!(f, "({op:?} {a:?})"),
560568
ExprNode::BinaryOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),
561569
ExprNode::BinaryPlaceOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),

parser/src/diagnostics.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ pub enum ParsingError {
5353
InvalidExpression(Span, String),
5454
#[error("Missing alternate branch in ternary expression.")]
5555
MissingTernaryOr(Span),
56-
#[error("Missing closing parenthesis in function call to `{}`.", .1)]
57-
FunctionCallMissingParenthesis(Span, String),
56+
#[error("Missing closing parenthesis in function call.")]
57+
FunctionCallMissingParenthesis(Span),
5858
#[error("Functions calls must have their name yuxtaposed to the parenthesis `(`.")]
5959
FunctionCallSeparatedIdent(Span),
6060
#[error("Missing closing parenthesis `(` in function call to `{}`.", .1)]
@@ -69,6 +69,10 @@ pub enum ParsingError {
6969
ExpectedPlaceOperator(Span),
7070
#[error("Typed regular expressions not accepted in this position.")]
7171
UnexpectedTypedRegex(Span),
72+
#[error("Can't call non-function, special variable `{}`.", .1)]
73+
SpecialVariableCall(Span, String),
74+
#[error("Can't use special variable `{}` for indirect function call.", .1)]
75+
SpecialVariableIndirectCall(Span, String),
7276
}
7377

7478
impl ParsingError {
@@ -104,14 +108,16 @@ impl ParsingError {
104108
Self::OperatorExpectsVariable(span) => Some(span.clone()),
105109
Self::InvalidExpression(span, _) => Some(span.clone()),
106110
Self::MissingTernaryOr(span) => Some(span.clone()),
107-
Self::FunctionCallMissingParenthesis(span, _) => Some(span.clone()),
111+
Self::FunctionCallMissingParenthesis(span) => Some(span.clone()),
108112
Self::FunctionCallSeparatedIdent(span) => Some(span.clone()),
109113
Self::FunctionCallUnclosed(span, _) => Some(span.clone()),
110114
Self::ExpectedIdentifier(span) => Some(span.clone()),
111115
Self::ExpectedUnaryOperator(span) => Some(span.clone()),
112116
Self::ExpectedBinaryOperator(span) => Some(span.clone()),
113117
Self::ExpectedPlaceOperator(span) => Some(span.clone()),
114118
Self::UnexpectedTypedRegex(span) => Some(span.clone()),
119+
Self::SpecialVariableCall(span, _) => Some(span.clone()),
120+
Self::SpecialVariableIndirectCall(span, _) => Some(span.clone()),
115121
}
116122
}
117123
fn hint(&self) -> Option<&'static str> {

parser/src/idempotency.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,11 @@ impl Display for ExprNode<'_> {
277277
write_args(f, args, indent)?;
278278
write!(f, ")")
279279
}
280+
Self::IndirectCall(var, args) => {
281+
write!(f, "@{var}(")?;
282+
write_args(f, args, indent)?;
283+
write!(f, ")")
284+
}
280285
Self::UnaryOperation(op, x) => {
281286
let bp = op.binding_power();
282287
let child_w = encode(indent, bp.saturating_add(1));

parser/src/lex.rs

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
use std::{fmt::Debug, iter::Peekable};
77

88
use bumpalo::Bump;
9-
use lexer::{LexingError, Span, SpannedIter, Token};
9+
use lexer::{Identifier, LexingError, Slice, Span, SpannedIter, Token};
1010

1111
use crate::{
1212
ParsingError,
1313
ast::{Command, SpecialPattern},
1414
};
1515

16+
use super::Result;
17+
1618
pub struct Lexer<'a> {
1719
inner: Peekable<SpannedIter<'a, Token<'a>>>,
1820
span: Span,
@@ -48,7 +50,7 @@ impl<'a> Lexer<'a> {
4850
&mut self,
4951
expected: &Token,
5052
err: impl FnOnce(Span) -> ParsingError,
51-
) -> super::Result<Token<'a>> {
53+
) -> Result<Token<'a>> {
5254
match self.next() {
5355
Some(Ok(tok)) if expected == &tok => Ok(tok),
5456
Some(Ok(_)) => Err(err(self.span())),
@@ -61,7 +63,7 @@ impl<'a> Lexer<'a> {
6163
&mut self,
6264
expected: impl FnOnce(&Token<'a>) -> bool,
6365
msg: String,
64-
) -> super::Result<Token<'a>> {
66+
) -> Result<Token<'a>> {
6567
match self.next() {
6668
Some(Ok(tok)) if expected(&tok) => Ok(tok),
6769
Some(Ok(_)) => Err(ParsingError::UnexpectedToken(self.span(), msg)),
@@ -70,7 +72,7 @@ impl<'a> Lexer<'a> {
7072
}
7173
}
7274

73-
pub fn expect_identifier(&mut self) -> super::Result<lexer::Identifier<'a>> {
75+
pub fn expect_identifier(&mut self) -> Result<Identifier<'a>> {
7476
if let Some(Token::Identifier(ident)) =
7577
self.next_if(|t| matches!(t, Token::Identifier(_)))?
7678
{
@@ -83,6 +85,28 @@ impl<'a> Lexer<'a> {
8385
}
8486
}
8587

88+
pub fn expect_string(&mut self) -> Result<Slice<'a>> {
89+
if let Some(Token::String(string)) = self.next_if(|t| matches!(t, Token::String(_)))? {
90+
Ok(string)
91+
} else {
92+
Err(ParsingError::UnexpectedToken(
93+
self.peeked_span().unwrap_or(self.span()),
94+
"expected a string".into(),
95+
))
96+
}
97+
}
98+
99+
pub fn lex_ident(&self, source: &[u8], arena: &'a Bump) -> Result<&'a str> {
100+
let Some(Ok(Token::Identifier(ident))) = Token::lex(source, arena, false, true).next()
101+
else {
102+
return Err(ParsingError::UnexpectedToken(
103+
self.span().start + 1..self.span().end - 1,
104+
"expected a valid, non-qualified identifier.".into(),
105+
));
106+
};
107+
Ok(arena.alloc_str(ident.literal))
108+
}
109+
86110
pub fn consume(&mut self, token: &Token) -> bool {
87111
if let Some(Ok(next)) = self.peek()
88112
&& next == token
@@ -113,15 +137,15 @@ impl<'a> Lexer<'a> {
113137
self.advance_span(next).transpose()
114138
}
115139

116-
pub fn expect_next(&mut self) -> super::Result<Token<'a>> {
140+
pub fn expect_next(&mut self) -> Result<Token<'a>> {
117141
match self.next() {
118142
None => Err(ParsingError::LexingError(LexingError::UnexpectedEof)),
119143
Some(Ok(tok)) => Ok(tok),
120144
Some(Err(err)) => Err(ParsingError::LexingError(err)),
121145
}
122146
}
123147

124-
pub fn expect_peek(&mut self) -> super::Result<&Token<'a>> {
148+
pub fn expect_peek(&mut self) -> Result<&Token<'a>> {
125149
match self.peek() {
126150
None => Err(ParsingError::LexingError(LexingError::UnexpectedEof)),
127151
Some(Ok(tok)) => Ok(tok),
@@ -133,14 +157,14 @@ impl<'a> Lexer<'a> {
133157
self.span.clone()
134158
}
135159

136-
pub fn peeked_span(&mut self) -> super::Result<Span> {
160+
pub fn peeked_span(&mut self) -> Result<Span> {
137161
self.inner
138162
.peek()
139163
.map(|(_, s)| s.clone())
140164
.ok_or(ParsingError::LexingError(LexingError::UnexpectedEof))
141165
}
142166

143-
pub fn peek_with_span(&mut self) -> Option<(super::Result<&Token<'a>>, Span)> {
167+
pub fn peek_with_span(&mut self) -> Option<(Result<&Token<'a>>, Span)> {
144168
self.inner.peek().map(|(a, b)| {
145169
(
146170
a.as_ref().map_err(|e| ParsingError::LexingError(e.clone())),
@@ -202,8 +226,10 @@ impl TokenExt for Token<'_> {
202226
fn is_expr_start(&self) -> bool {
203227
self.is_atom()
204228
|| self.is_prefix_op()
205-
|| self == &Token::OpenParent
206-
|| self == &Token::Getline
229+
|| matches!(
230+
self,
231+
Token::IndirectCall(_) | Token::Getline | Token::OpenParent
232+
)
207233
}
208234
fn is_place(&self) -> bool {
209235
matches!(

0 commit comments

Comments
 (0)