Skip to content

Commit 8993ad2

Browse files
committed
parser: add comments
1 parent 1b165d2 commit 8993ad2

3 files changed

Lines changed: 27 additions & 11 deletions

File tree

parser/src/ast.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ pub enum BinaryPlaceOperator {
151151
InArray,
152152
}
153153

154+
/// Essentially lvalues. To the interpreter, these do not produce a value, but
155+
/// get theirs modified. A place is a subset of all expressions.
154156
pub enum Place<'a> {
155157
Record(Expr<'a>),
156158
Variable(Variable<'a>),
@@ -396,7 +398,8 @@ impl<'a> BinaryPlaceOperator {
396398
}
397399

398400
impl<'a> Place<'a> {
399-
pub fn promote_from(expr: Expr<'a>, span: Span) -> Result<Self, (Expr<'a>, ParsingError)> {
401+
/// Attempts to lower an expression into a place; on error returns it back.
402+
pub fn lower_from(expr: Expr<'a>, span: Span) -> Result<Self, (Expr<'a>, ParsingError)> {
400403
match expr {
401404
Expr::Leaf(Atom::Variable(var)) => Ok(Self::Variable(var)),
402405
Expr::Node(node)

parser/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ impl<'a> Parser<'a> {
8787
// * Pattern (Expression)
8888
// * Expects brackets afterwards (body) or a newline (default).
8989
// * Action (Statement)
90-
// * Expects a newline afterwards; inserts default pattern.
90+
// * Expects a newline afterwards.
9191
while let Some(tok) = lex.peek() {
9292
if tok.as_ref().is_ok_and(Token::is_pattern_start) {
9393
match self.parse_pattern(lex)? {
@@ -219,6 +219,8 @@ impl<'a> Parser<'a> {
219219
}
220220
}
221221
}
222+
223+
/// These are a subset of statements usable in places like for-loop defs.
222224
#[tracing::instrument]
223225
fn parse_simple_statement(
224226
&mut self,

parser/src/pratt.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ impl<'a, 'b> Pratt<'a, 'b> {
6363
) -> Result<Expr<'a>> {
6464
while let Some((next, span)) = lex.peek_with_span() {
6565
let next = next?;
66+
// Short circuits if requested. Useful for returning early when a
67+
// token may also match a known operator.
6668
if delimiter(next) {
6769
break;
6870
}
@@ -73,13 +75,13 @@ impl<'a, 'b> Pratt<'a, 'b> {
7375
break;
7476
}
7577
lex.next();
76-
let place = Place::promote_from(lhs.take(), lex.span())?;
78+
let place = Place::lower_from(lhs.take(), lex.span())?;
7779
Expr::node(op.expr(place), self.parser.arena)
7880
} else if let Ok(op) = BinaryPlaceOperator::parse(next, &span) {
7981
if op.binding_power().0 < min_bp {
8082
break;
8183
}
82-
let place = Place::promote_from(lhs.take(), lex.span())?;
84+
let place = Place::lower_from(lhs.take(), lex.span())?;
8385
self.parse_place_op(lex, op, place)?
8486
} else if let Ok(op) = BinaryOperator::parse(next, &span)
8587
&& !matches!(next, Token::Increment | Token::Decrement)
@@ -123,7 +125,7 @@ impl<'a, 'b> Pratt<'a, 'b> {
123125
if let Ok(op) = UnaryPlaceOperator::parse_prefix(&next, &lex.span()) {
124126
let rhs = self.parse_expression(lex, op.binding_power())?;
125127
Ok(Expr::node(
126-
op.expr(Place::promote_from(rhs, lex.span())?),
128+
op.expr(Place::lower_from(rhs, lex.span())?),
127129
self.parser.arena,
128130
))
129131
} else if let Ok(op) = UnaryOperator::parse(&next, &lex.peeked_span()?) {
@@ -142,17 +144,15 @@ impl<'a, 'b> Pratt<'a, 'b> {
142144
// redirection reading from file. Does not accept typed regexes.
143145
self.typed_regex = false;
144146
let place = if lex.peek_with(Token::is_place) {
145-
Some(Place::promote_from(
146-
self.parse_redirection(lex)?,
147-
lex.span(),
148-
))
147+
Some(Place::lower_from(self.parse_redirection(lex)?, lex.span()))
149148
} else {
150149
None
151150
}
152-
.transpose();
151+
.transpose(); // trick to simplify checks.
153152

154153
let getline = |gl| Expr::node(ExprNode::Getline(gl), self.parser.arena);
155154
match place {
155+
// Nonsensical expression; gawk just assumes concatenation.
156156
Err((expr, _)) => Ok(Expr::node(
157157
BinaryOperator::Concat.expr(getline(Getline::FromInput(None)), expr),
158158
self.parser.arena,
@@ -170,6 +170,9 @@ impl<'a, 'b> Pratt<'a, 'b> {
170170

171171
fn parse_atom_or_call(&mut self, lex: &mut Lexer<'a>) -> Result<Expr<'a>> {
172172
let next = lex.expect_next()?;
173+
// Only accepts calls if the function name is next to the parenthesis.
174+
// If there is a space, we interpret it as a concatenation and let the
175+
// interpreter error if necessary; elsewhere we can't concat with vars.
173176
if let Token::Identifier(name) = next
174177
&& lex.peek_is(&Token::OpenParent)
175178
&& lex.is_yuxtaposed()
@@ -179,6 +182,7 @@ impl<'a, 'b> Pratt<'a, 'b> {
179182
} else {
180183
match self.parser.parse_atom(lex, next, self.typed_regex) {
181184
Ok(atom) => Ok(Expr::leaf(atom)),
185+
// Add detail to this error.
182186
Err(ParsingError::UnexpectedToken(_, str)) => {
183187
Err(ParsingError::InvalidExpression(lex.span(), str))
184188
}
@@ -193,7 +197,9 @@ impl<'a, 'b> Pratt<'a, 'b> {
193197
op: BinaryOperator,
194198
lhs: Expr<'a>,
195199
) -> Result<Expr<'a>> {
200+
// Ensures it's not a typed regex; rejects cases like `x = @/a/ + 1`.
196201
self.typecheck(lex, &lhs)?;
202+
// This is just a parsing construct; we only skip if it's a real token.
197203
lex.consume_with(|_| op != BinaryOperator::Concat);
198204
self.typed_regex = matches!(op, BinaryOperator::Matches | BinaryOperator::MatchesNot);
199205

@@ -228,14 +234,18 @@ impl<'a, 'b> Pratt<'a, 'b> {
228234
self.parse_expression(lex, op.binding_power().1)?
229235
};
230236
if op == BinaryPlaceOperator::ArrayAccess {
237+
// We can only index on variables.
231238
if !matches!(place, Place::Variable(_)) {
232239
return Err(ParsingError::OperatorExpectsVariable(lex.span()));
233240
}
241+
// Concatenates each dimension with `SUBSEP`.
242+
// FIXME: undo when pretty-printing or defer to the interpreter.
234243
rhs = self.parse_array_index(lex, rhs)?;
235244
}
236245
Ok(Expr::node(op.expr(place, rhs), self.parser.arena))
237246
}
238247

248+
/// Continuously
239249
pub fn parse_array_index(&mut self, lex: &mut Lexer<'a>, lhs: Expr<'a>) -> Result<Expr<'a>> {
240250
let mut rhs = lhs;
241251
while lex.consume(&Token::Comma) {
@@ -286,7 +296,7 @@ impl<'a, 'b> Pratt<'a, 'b> {
286296
let pipe = |place| Expr::node(op.expr_getline(place, lhs), self.parser.arena);
287297
if lex.peek_with(Token::is_place) {
288298
let expr = self.parse_redirection(lex)?;
289-
match Place::promote_from(expr, lex.span()) {
299+
match Place::lower_from(expr, lex.span()) {
290300
Ok(place) => Ok(pipe(Some(place))),
291301
Err((expr, _)) => Ok(Expr::node(
292302
BinaryOperator::Concat.expr(pipe(None), expr),
@@ -302,6 +312,7 @@ impl<'a, 'b> Pratt<'a, 'b> {
302312
self.parse_expression(lex, BinaryOperator::Concat.binding_power().1 - 1)
303313
}
304314

315+
/// Errors if `expr` is a typed regex.
305316
fn typecheck(&self, lex: &mut Lexer<'a>, expr: &Expr<'a>) -> Result<()> {
306317
if matches!(expr, Expr::Leaf(Atom::TypedRegex(_))) {
307318
Err(ParsingError::UnexpectedTypedRegex(lex.span()))

0 commit comments

Comments
 (0)