Skip to content

Commit 1b165d2

Browse files
committed
parser, lexer: typed regexes support
1 parent ec6e4a6 commit 1b165d2

8 files changed

Lines changed: 121 additions & 48 deletions

File tree

lexer/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ pub enum Token<'a> {
143143
Slash,
144144
/// Not generated by Logos directly.
145145
Regex(Slice<'a>),
146+
#[token("@/", |lex| { accept_operator(lex); parse_content::<true, '/'>(lex) })]
147+
TypedRegex(Slice<'a>),
146148
#[token("%", accept_expression)]
147149
Percent,
148150
#[token("^", accept_expression)]

parser/src/ast.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub enum Atom<'a> {
3838
BigInt(),
3939
BigFloat(),
4040
Regex(Slice<'a>),
41+
TypedRegex(Slice<'a>),
4142
}
4243

4344
#[derive(Debug)]
@@ -119,8 +120,8 @@ pub enum BinaryOperator {
119120
GtE,
120121
And,
121122
Or,
122-
MatchRegex,
123-
NotMatchRegex,
123+
Matches,
124+
MatchesNot,
124125
Add,
125126
Subtract,
126127
Multiply,
@@ -338,8 +339,8 @@ impl<'a> BinaryOperator {
338339
Token::LesserThan => Ok(Self::Lt),
339340
Token::GreaterOrEqualThan => Ok(Self::GtE),
340341
Token::LesserOrEqualThan => Ok(Self::LtE),
341-
Token::Matching => Ok(Self::MatchRegex),
342-
Token::NotMatching => Ok(Self::NotMatchRegex),
342+
Token::Matching => Ok(Self::Matches),
343+
Token::NotMatching => Ok(Self::MatchesNot),
343344
Token::Plus => Ok(Self::Add),
344345
Token::Minus => Ok(Self::Subtract),
345346
Token::Star => Ok(Self::Multiply),
@@ -486,7 +487,7 @@ impl BindingPower for BinaryOperator {
486487
}
487488
Self::And => binding_powers::BP_AND,
488489
Self::Or => binding_powers::BP_OR,
489-
Self::MatchRegex | Self::NotMatchRegex => binding_powers::BP_MATCH,
490+
Self::Matches | Self::MatchesNot => binding_powers::BP_MATCH,
490491
Self::Add => binding_powers::BP_ADDITION,
491492
Self::Subtract => binding_powers::BP_ADDITION,
492493
Self::Multiply => binding_powers::BP_MULTI,

parser/src/diagnostics.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ pub enum ParsingError {
4949
UnclosedArrayAccess(Span),
5050
#[error("Expected operand to be a variable.")]
5151
OperatorExpectsVariable(Span),
52-
#[error("Malformed expression.")]
53-
InvalidExpression(Span),
52+
#[error("Malformed expression: {}", .1)]
53+
InvalidExpression(Span, String),
5454
#[error("Missing alternate branch in ternary expression.")]
5555
MissingTernaryOr(Span),
5656
#[error("Missing closing parenthesis in function call to `{}`.", .1)]
@@ -67,6 +67,8 @@ pub enum ParsingError {
6767
ExpectedBinaryOperator(Span),
6868
#[error("Expected a placing operation.")]
6969
ExpectedPlaceOperator(Span),
70+
#[error("Typed regular expressions not accepted in this position.")]
71+
UnexpectedTypedRegex(Span),
7072
}
7173

7274
impl ParsingError {
@@ -100,7 +102,7 @@ impl ParsingError {
100102
Self::UnclosedParenthesisExpression(span) => Some(span.clone()),
101103
Self::UnclosedArrayAccess(span) => Some(span.clone()),
102104
Self::OperatorExpectsVariable(span) => Some(span.clone()),
103-
Self::InvalidExpression(span) => Some(span.clone()),
105+
Self::InvalidExpression(span, _) => Some(span.clone()),
104106
Self::MissingTernaryOr(span) => Some(span.clone()),
105107
Self::FunctionCallMissingParenthesis(span, _) => Some(span.clone()),
106108
Self::FunctionCallSeparatedIdent(span) => Some(span.clone()),
@@ -109,6 +111,7 @@ impl ParsingError {
109111
Self::ExpectedUnaryOperator(span) => Some(span.clone()),
110112
Self::ExpectedBinaryOperator(span) => Some(span.clone()),
111113
Self::ExpectedPlaceOperator(span) => Some(span.clone()),
114+
Self::UnexpectedTypedRegex(span) => Some(span.clone()),
112115
}
113116
}
114117
fn hint(&self) -> Option<&'static str> {
@@ -139,6 +142,9 @@ impl ParsingError {
139142
Self::LexingError(LexingError::UnavailableOnGnu(_, _)) => {
140143
Some("This item is not available in GNU-strict mode.")
141144
}
145+
Self::UnexpectedTypedRegex(_) => Some(
146+
"This is only valid in some contexts, like a right-hand assignment or a function argument.",
147+
),
142148
_ => None,
143149
}
144150
}

parser/src/idempotency.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,8 @@ impl Display for BinaryOperator {
372372
Self::GtE => write!(f, " >= "),
373373
Self::And => write!(f, " && "),
374374
Self::Or => write!(f, " || "),
375-
Self::MatchRegex => write!(f, " ~ "),
376-
Self::NotMatchRegex => write!(f, " !~ "),
375+
Self::Matches => write!(f, " ~ "),
376+
Self::MatchesNot => write!(f, " !~ "),
377377
Self::Add => write!(f, " + "),
378378
Self::Subtract => write!(f, " - "),
379379
Self::Multiply => write!(f, " * "),

parser/src/lex.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,16 @@ impl<'a> Lexer<'a> {
7171
}
7272

7373
pub fn expect_identifier(&mut self) -> super::Result<lexer::Identifier<'a>> {
74-
let Token::Identifier(name) = self.expect_with(
75-
|t| matches!(t, Token::Identifier(_)),
76-
"expected an identifier.".into(),
77-
)?
78-
else {
79-
unreachable!()
80-
};
81-
Ok(name)
74+
if let Some(Token::Identifier(ident)) =
75+
self.next_if(|t| matches!(t, Token::Identifier(_)))?
76+
{
77+
Ok(ident)
78+
} else {
79+
Err(ParsingError::UnexpectedToken(
80+
self.peeked_span().unwrap_or(self.span()),
81+
"expected an identifier.".into(),
82+
))
83+
}
8284
}
8385

8486
pub fn consume(&mut self, token: &Token) -> bool {
@@ -103,9 +105,12 @@ impl<'a> Lexer<'a> {
103105
}
104106
}
105107

106-
pub fn next_if(&mut self, f: impl FnOnce(&LexItem<'a>) -> bool) -> Option<LexItem<'a>> {
107-
let next = self.inner.next_if(|(tok, _)| f(tok));
108-
self.advance_span(next)
108+
pub fn next_if(
109+
&mut self,
110+
f: impl FnOnce(&Token<'a>) -> bool,
111+
) -> lexer::Result<Option<Token<'a>>> {
112+
let next = self.inner.next_if(|(tok, _)| tok.as_ref().is_ok_and(f));
113+
self.advance_span(next).transpose()
109114
}
110115

111116
pub fn expect_next(&mut self) -> super::Result<Token<'a>> {

parser/src/lib.rs

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,9 @@ impl<'a> Parser<'a> {
178178
Token::BeginFilePattern => Ok(Right(SpecialPattern::BeginFile)),
179179
Token::EndFilePattern => Ok(Right(SpecialPattern::EndFile)),
180180
_ => {
181-
let expr = self.parse_expression(lex)?;
181+
let expr = self.parse_expression(lex, false)?;
182182
Ok(Left(if lex.consume(&Token::Comma) {
183-
let expr_end = self.parse_expression(lex)?;
183+
let expr_end = self.parse_expression(lex, false)?;
184184
RulePattern::Range(expr, expr_end)
185185
} else {
186186
RulePattern::Expression(expr)
@@ -226,7 +226,10 @@ impl<'a> Parser<'a> {
226226
) -> Option<Result<SimpleStatement<'a>>> {
227227
let peek = lex.expect_peek().ok()?;
228228
if peek.is_expr_start() {
229-
Some(self.parse_expression(lex).map(SimpleStatement::Expression))
229+
Some(
230+
self.parse_expression(lex, false)
231+
.map(SimpleStatement::Expression),
232+
)
230233
} else {
231234
match peek {
232235
token if let Some(name) = token.maps_to_command() => {
@@ -357,14 +360,14 @@ impl<'a> Parser<'a> {
357360
Token::Continue => Statement::Continue,
358361
Token::Return => Statement::Return(
359362
(!lex.peek_with(Token::is_stmnt_or_block_end))
360-
.then(|| self.parse_expression(lex))
363+
.then(|| self.parse_expression(lex, true))
361364
.transpose()?,
362365
),
363366
Token::Next => Statement::Next,
364367
Token::NextFile => Statement::NextFile,
365368
Token::Exit => Statement::Exit(
366369
(!lex.peek_with(Token::is_stmnt_or_block_end))
367-
.then(|| self.parse_expression(lex))
370+
.then(|| self.parse_expression(lex, false))
368371
.transpose()?,
369372
),
370373
_ => {
@@ -386,7 +389,7 @@ impl<'a> Parser<'a> {
386389
&Token::OpenParent,
387390
ParsingError::MissingParenthesisInStatement,
388391
)?;
389-
let expr = self.parse_expression(lex)?;
392+
let expr = self.parse_expression(lex, false)?;
390393
lex.expect(
391394
&Token::ClosedParent,
392395
ParsingError::UnclosedParenthesisInStatement,
@@ -402,7 +405,7 @@ impl<'a> Parser<'a> {
402405
) -> Result<Statement<'a>> {
403406
lex.consume(&Token::Newline);
404407
let condition = (!lex.peek_is(&Token::Semicolon))
405-
.then(|| self.parse_expression(lex))
408+
.then(|| self.parse_expression(lex, false))
406409
.transpose()?;
407410
lex.expect(&Token::Semicolon, ParsingError::InvalidForLoop)?;
408411

@@ -469,7 +472,7 @@ impl<'a> Parser<'a> {
469472
fn parse_case(&mut self, lex: &mut Lexer<'a>) -> Result<Atom<'a>> {
470473
lex.expect(&Token::Case, ParsingError::MissingSwitchBranch)?;
471474
let next = lex.expect_next()?;
472-
let value = self.parse_atom(lex, next)?;
475+
let value = self.parse_atom(lex, next, true)?;
473476
lex.expect(&Token::Colon, ParsingError::ColonMustFollowCase)?;
474477
match value {
475478
Atom::Variable(_) => Err(ParsingError::InvalidCaseValue(lex.span())),
@@ -506,16 +509,16 @@ impl<'a> Parser<'a> {
506509
return Ok(arguments);
507510
}
508511

509-
arguments.push(self.parse_expression(lex)?);
512+
arguments.push(self.parse_expression(lex, true)?);
510513
while lex.consume(&Token::Comma) {
511-
arguments.push(self.parse_expression(lex)?);
514+
arguments.push(self.parse_expression(lex, true)?);
512515
}
513516
Ok(arguments)
514517
}
515518

516519
fn parse_command_args(&mut self, lex: &mut Lexer<'a>) -> Result<Vec<'a, Expr<'a>>> {
517520
let mut arguments = Vec::new_in(self.arena);
518-
let mut pratt = Pratt::new(self);
521+
let mut pratt = Pratt::new(self, false);
519522
if !lex.peek_with(Token::is_expr_start) {
520523
return Ok(arguments);
521524
}
@@ -537,7 +540,7 @@ impl<'a> Parser<'a> {
537540
lex.next();
538541
Ok(Some((
539542
redirection,
540-
Pratt::new(self).parse_redirection(lex)?,
543+
Pratt::new(self, false).parse_redirection(lex)?,
541544
)))
542545
} else {
543546
Ok(None)
@@ -550,7 +553,7 @@ impl<'a> Parser<'a> {
550553
return Err(ParsingError::OperatorExpectsVariable(lex.span()));
551554
};
552555
let index = if lex.consume(&Token::OpenBracket) {
553-
let mut pratt = Pratt::new(self);
556+
let mut pratt = Pratt::new(self, false);
554557
let first = pratt.parse(lex)?;
555558
Some(pratt.parse_array_index(lex, first)?)
556559
} else {
@@ -608,8 +611,8 @@ impl<'a> Parser<'a> {
608611
}
609612

610613
#[tracing::instrument]
611-
fn parse_expression(&mut self, lex: &mut Lexer<'a>) -> Result<Expr<'a>> {
612-
Pratt::new(self).parse(lex)
614+
fn parse_expression(&mut self, lex: &mut Lexer<'a>, typed_regex: bool) -> Result<Expr<'a>> {
615+
Pratt::new(self, typed_regex).parse(lex)
613616
}
614617

615618
#[tracing::instrument]
@@ -642,11 +645,18 @@ impl<'a> Parser<'a> {
642645
}
643646

644647
#[tracing::instrument]
645-
fn parse_atom(&self, lex: &mut Lexer<'a>, token: Token<'a>) -> Result<Atom<'a>> {
648+
fn parse_atom(
649+
&self,
650+
lex: &mut Lexer<'a>,
651+
token: Token<'a>,
652+
typed_regex: bool,
653+
) -> Result<Atom<'a>> {
646654
match token {
647655
Token::Number(n) => Ok(Atom::Number(n)),
648656
Token::String(s) => Ok(Atom::String(s)),
649657
Token::Regex(r) => Ok(Atom::Regex(r)),
658+
Token::TypedRegex(r) if typed_regex => Ok(Atom::TypedRegex(r)),
659+
Token::TypedRegex(_) => Err(ParsingError::UnexpectedTypedRegex(lex.span())),
650660
token => match self.get_place(lex, token) {
651661
Some(var) => Ok(Atom::Variable(var)),
652662
None => Err(ParsingError::UnexpectedToken(

0 commit comments

Comments
 (0)