Skip to content

Commit 48c2c6a

Browse files
committed
parser: refactor lvalues & ast ownership; add getline pipes
1 parent e63ff81 commit 48c2c6a

7 files changed

Lines changed: 278 additions & 167 deletions

File tree

parser/src/ast.rs

Lines changed: 120 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55

66
use std::fmt::Debug;
77

8-
use bumpalo::{Bump, collections::Vec};
8+
use bumpalo::{Bump, boxed::Box, collections::Vec};
99
use either::Either;
1010
use hashbrown::{DefaultHashBuilder, HashMap};
1111
use lexer::{Slice, Span, Token};
1212

1313
use crate::{ParsingError, Result, lex::TokenExt};
1414

15-
#[derive(Debug, Clone)]
15+
#[derive(Debug)]
1616
pub struct Ast<'a> {
1717
pub loads: Vec<'a, Slice<'a>>,
1818
pub begin: Vec<'a, Body<'a>>,
@@ -24,7 +24,7 @@ pub struct Ast<'a> {
2424
pub functions: HashMap<Identifier<'a>, Function<'a>, DefaultHashBuilder, &'a Bump>,
2525
}
2626

27-
#[derive(Debug, Clone)]
27+
#[derive(Debug)]
2828
pub struct Rule<'a> {
2929
pub pattern: Option<RulePattern<'a>>,
3030
pub actions: Option<Body<'a>>,
@@ -40,7 +40,7 @@ pub enum Atom<'a> {
4040
Regex(Slice<'a>),
4141
}
4242

43-
#[derive(Debug, Clone)]
43+
#[derive(Debug)]
4444
pub enum RulePattern<'a> {
4545
Expression(Expr<'a>),
4646
Range(Expr<'a>, Expr<'a>),
@@ -81,22 +81,21 @@ pub enum Variable<'a> {
8181
Environ,
8282
}
8383

84-
#[derive(Clone)]
8584
pub enum Expr<'a> {
8685
Leaf(Atom<'a>),
87-
Node(&'a ExprNode<'a>),
86+
Node(Box<'a, ExprNode<'a>>),
8887
}
8988

90-
#[derive(Clone)]
9189
pub struct Body<'a>(pub Vec<'a, Statement<'a>>);
9290
pub type Pattern<'a> = Either<RulePattern<'a>, SpecialPattern>;
9391

94-
#[derive(Debug, Clone)]
92+
#[derive(Debug)]
9593
pub enum ExprNode<'a> {
9694
FunctionCall(Identifier<'a>, Vec<'a, Expr<'a>>),
9795
UnaryOperation(UnaryOperator, Expr<'a>),
9896
BinaryOperation(BinaryOperator, Expr<'a>, Expr<'a>),
99-
PlaceOperation(PlaceOperator, Variable<'a>, Expr<'a>),
97+
UnaryPlaceOperation(UnaryPlaceOperator, Place<'a>),
98+
BinaryPlaceOperation(BinaryPlaceOperator, Place<'a>, Expr<'a>),
10099
Ternary(Expr<'a>, Expr<'a>, Expr<'a>),
101100
Getline(Getline<'a>),
102101
}
@@ -131,12 +130,32 @@ pub enum BinaryOperator {
131130
}
132131

133132
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
134-
pub enum PlaceOperator {
133+
pub enum UnaryPlaceOperator {
134+
IncrementL,
135+
DecrementL,
136+
IncrementR,
137+
DecrementR,
138+
}
139+
140+
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
141+
pub enum BinaryPlaceOperator {
135142
Assignment,
143+
AddAssign,
144+
SubAssign,
145+
MulAssign,
146+
DivAssign,
147+
PowAssign,
148+
ModAssign,
136149
ArrayAccess,
137150
InArray,
138151
}
139152

153+
pub enum Place<'a> {
154+
Record(Expr<'a>),
155+
Variable(Variable<'a>),
156+
ArrayElement(Variable<'a>, Expr<'a>),
157+
}
158+
140159
/// GNU docs: https://www.gnu.org/software/gawk/manual/html_node/Redirection.html
141160
#[derive(Debug, Clone)]
142161
pub enum Redirection<'a> {
@@ -152,19 +171,18 @@ pub enum WriteKind {
152171
Coprocess,
153172
}
154173

155-
#[derive(Debug, Clone)]
174+
#[derive(Debug)]
156175
pub enum Getline<'a> {
157176
// getline (var)?
158-
FromInput(Option<Variable<'a>>),
177+
FromInput(Option<Place<'a>>),
159178
// getline (var)? < (file)
160-
FromFile(Option<Variable<'a>>, Expr<'a>),
179+
FromFile(Option<Place<'a>>, Expr<'a>),
161180
// (expr) | getline (var)?
162-
PipeOut(Option<Variable<'a>>, Expr<'a>),
181+
PipeOut(Option<Place<'a>>, Expr<'a>),
163182
// (expr) |& getline (var)?
164-
CoprocessOut(Option<Variable<'a>>, Expr<'a>),
183+
CoprocessOut(Option<Place<'a>>, Expr<'a>),
165184
}
166185

167-
#[derive(Clone)]
168186
pub enum Statement<'a> {
169187
Expression(Expr<'a>),
170188
Command {
@@ -194,7 +212,7 @@ pub enum Statement<'a> {
194212
body: Body<'a>,
195213
},
196214
ForEach {
197-
place: Variable<'a>,
215+
variable: Variable<'a>,
198216
array: Variable<'a>,
199217
body: Body<'a>,
200218
},
@@ -211,13 +229,13 @@ pub enum Statement<'a> {
211229
Exit(Option<Expr<'a>>),
212230
}
213231

214-
#[derive(Debug, Clone)]
232+
#[derive(Debug)]
215233
pub struct Function<'a> {
216234
pub args: Vec<'a, Identifier<'a>>,
217235
pub body: Body<'a>,
218236
}
219237

220-
#[derive(Debug, Clone)]
238+
#[derive(Debug, Clone, Copy)]
221239
pub enum Command {
222240
Print,
223241
Printf,
@@ -229,7 +247,7 @@ impl<'a> Expr<'a> {
229247
}
230248

231249
pub fn node(op: impl Into<ExprNode<'a>>, arena: &'a Bump) -> Self {
232-
Self::Node(arena.alloc(op.into()))
250+
Self::Node(Box::new_in(op.into(), arena))
233251
}
234252
}
235253

@@ -245,9 +263,15 @@ impl BinaryOperator {
245263
}
246264
}
247265

248-
impl PlaceOperator {
249-
pub fn expr<'a>(self, a: Variable<'a>, b: Expr<'a>) -> ExprNode<'a> {
250-
ExprNode::PlaceOperation(self, a, b)
266+
impl UnaryPlaceOperator {
267+
pub fn expr(self, a: Place<'_>) -> ExprNode<'_> {
268+
ExprNode::UnaryPlaceOperation(self, a)
269+
}
270+
}
271+
272+
impl BinaryPlaceOperator {
273+
pub fn expr<'a>(self, a: Place<'a>, b: Expr<'a>) -> ExprNode<'a> {
274+
ExprNode::BinaryPlaceOperation(self, a, b)
251275
}
252276
}
253277

@@ -275,6 +299,12 @@ impl<'a> From<Variable<'a>> for Atom<'a> {
275299
}
276300
}
277301

302+
impl<'a> From<Variable<'a>> for Place<'a> {
303+
fn from(value: Variable<'a>) -> Self {
304+
Self::Variable(value)
305+
}
306+
}
307+
278308
impl From<f64> for Atom<'_> {
279309
fn from(value: f64) -> Self {
280310
Self::Number(value)
@@ -323,16 +353,34 @@ impl<'a> BinaryOperator {
323353
}
324354
}
325355

326-
impl<'a> PlaceOperator {
356+
impl UnaryPlaceOperator {
357+
pub fn parse_prefix(value: &Token<'_>, span: &Span) -> Result<Self> {
358+
match value {
359+
Token::Increment => Ok(Self::IncrementL),
360+
Token::Decrement => Ok(Self::DecrementL),
361+
_ => Err(ParsingError::OperatorExpectsVariable(span.clone())),
362+
}
363+
}
364+
365+
pub fn parse_suffix(value: &Token<'_>, span: &Span) -> Result<Self> {
366+
match value {
367+
Token::Increment => Ok(Self::IncrementR),
368+
Token::Decrement => Ok(Self::DecrementR),
369+
_ => Err(ParsingError::OperatorExpectsVariable(span.clone())),
370+
}
371+
}
372+
}
373+
374+
impl<'a> BinaryPlaceOperator {
327375
pub fn parse(value: &Token<'a>, span: &Span) -> Result<Self> {
328376
match value {
329-
Token::Assignment
330-
| Token::PlusAssign
331-
| Token::MinusAssign
332-
| Token::StarAssign
333-
| Token::SlashAssign
334-
| Token::CaretAssign
335-
| Token::PercentAssign => Ok(Self::Assignment),
377+
Token::Assignment => Ok(Self::Assignment),
378+
Token::PlusAssign => Ok(Self::AddAssign),
379+
Token::MinusAssign => Ok(Self::SubAssign),
380+
Token::StarAssign => Ok(Self::MulAssign),
381+
Token::SlashAssign => Ok(Self::DivAssign),
382+
Token::CaretAssign => Ok(Self::PowAssign),
383+
Token::PercentAssign => Ok(Self::ModAssign),
336384
Token::OpenBracket => Ok(Self::ArrayAccess),
337385
Token::In => Ok(Self::InArray),
338386
_ => Err(ParsingError::UnexpectedToken(
@@ -343,6 +391,34 @@ impl<'a> PlaceOperator {
343391
}
344392
}
345393

394+
impl<'a> Place<'a> {
395+
pub fn promote_from(expr: Expr<'a>, span: Span) -> Result<Self, (Expr<'a>, ParsingError)> {
396+
match expr {
397+
Expr::Leaf(Atom::Variable(var)) => Ok(Self::Variable(var)),
398+
Expr::Node(node)
399+
if matches!(
400+
&*node,
401+
&ExprNode::UnaryOperation(UnaryOperator::Record, _)
402+
| &ExprNode::BinaryPlaceOperation(
403+
BinaryPlaceOperator::ArrayAccess,
404+
Place::Variable(_),
405+
_
406+
)
407+
) =>
408+
{
409+
match Box::into_inner(node) {
410+
ExprNode::UnaryOperation(_, index) => Ok(Self::Record(index)),
411+
ExprNode::BinaryPlaceOperation(_, Place::Variable(var), index) => {
412+
Ok(Self::ArrayElement(var, index))
413+
}
414+
_ => unreachable!("Box is magic; handled awkwardly in the match guard."),
415+
}
416+
}
417+
_ => Err((expr, ParsingError::OperatorExpectsVariable(span))),
418+
}
419+
}
420+
}
421+
346422
impl WriteKind {
347423
pub fn parse(value: &Token) -> Option<Self> {
348424
match value {
@@ -352,43 +428,14 @@ impl WriteKind {
352428
}
353429
}
354430

355-
pub fn expr_getline<'a>(self, var: Option<Variable<'a>>, expr: Expr<'a>) -> Getline<'a> {
431+
pub fn expr_getline<'a>(self, var: Option<Place<'a>>, expr: Expr<'a>) -> Getline<'a> {
356432
match self {
357433
Self::Pipe => Getline::PipeOut(var, expr),
358434
Self::Coprocess => Getline::CoprocessOut(var, expr),
359435
}
360436
}
361437
}
362438

363-
impl BinaryOperator {
364-
pub fn unfold(token: &Token) -> Option<Self> {
365-
match token {
366-
Token::PlusAssign => Some(Self::Add),
367-
Token::MinusAssign => Some(Self::Subtract),
368-
Token::StarAssign => Some(Self::Multiply),
369-
Token::SlashAssign => Some(Self::Divide),
370-
Token::PercentAssign => Some(Self::Modulo),
371-
Token::CaretAssign => Some(Self::Raise),
372-
_ => None,
373-
}
374-
}
375-
376-
pub fn unfold_prefix(token: &Token<'_>) -> Option<(Self, u8)> {
377-
match token {
378-
Token::Increment => Some((Self::Add, binding_powers::BP_INC_DEC)),
379-
Token::Decrement => Some((Self::Subtract, binding_powers::BP_INC_DEC)),
380-
_ => None,
381-
}
382-
}
383-
pub fn unfold_suffix(token: &Token<'_>) -> Option<(Self, Self, u8)> {
384-
match token {
385-
Token::Increment => Some((Self::Add, Self::Subtract, binding_powers::BP_INC_DEC)),
386-
Token::Decrement => Some((Self::Subtract, Self::Add, binding_powers::BP_INC_DEC)),
387-
_ => None,
388-
}
389-
}
390-
}
391-
392439
pub struct Ternary;
393440

394441
mod binding_powers {
@@ -435,13 +482,20 @@ impl BindingPower for BinaryOperator {
435482
}
436483
}
437484

438-
impl BindingPower for PlaceOperator {
485+
impl BindingPower for UnaryPlaceOperator {
486+
type Bp = u8;
487+
fn binding_power(&self) -> Self::Bp {
488+
binding_powers::BP_INC_DEC
489+
}
490+
}
491+
492+
impl BindingPower for BinaryPlaceOperator {
439493
type Bp = (u8, u8);
440494
fn binding_power(&self) -> Self::Bp {
441495
match self {
442-
Self::Assignment => binding_powers::BP_ASSIGN,
443496
Self::ArrayAccess => (binding_powers::BP_GROUPING, 0),
444497
Self::InArray => binding_powers::BP_IN,
498+
_ => binding_powers::BP_ASSIGN,
445499
}
446500
}
447501
}
@@ -460,7 +514,6 @@ impl BindingPower for UnaryOperator {
460514

461515
impl BindingPower for Ternary {
462516
type Bp = (u8, u8);
463-
464517
fn binding_power(&self) -> Self::Bp {
465518
binding_powers::BP_TERNARY
466519
}
@@ -476,7 +529,7 @@ impl Debug for Expr<'_> {
476529
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
477530
match self {
478531
Self::Leaf(atom) => write!(f, "{atom:?}"),
479-
Self::Node(expr) => match expr {
532+
Self::Node(expr) => match expr.as_ref() {
480533
ExprNode::FunctionCall(ident, args) => {
481534
write!(f, "({ident:?}")?;
482535
for arg in args {
@@ -486,7 +539,8 @@ impl Debug for Expr<'_> {
486539
}
487540
ExprNode::UnaryOperation(op, a) => write!(f, "({op:?} {a:?})"),
488541
ExprNode::BinaryOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),
489-
ExprNode::PlaceOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),
542+
ExprNode::BinaryPlaceOperation(op, a, b) => write!(f, "({op:?} {a:?} {b:?})"),
543+
ExprNode::UnaryPlaceOperation(op, a) => write!(f, "({op:?} {a:?})"),
490544
ExprNode::Ternary(a, b, c) => write!(f, "(?: {a:?} {b:?} {c:?})"),
491545
ExprNode::Getline(getline) => match getline {
492546
Getline::FromInput(Some(a)) => write!(f, "(getline {a:?})"),

parser/src/diagnostics.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,9 @@ pub fn report_error<'a>(
150150
}
151151
(Box::new(report.finish()), Source::from(source))
152152
}
153+
154+
impl<T> From<(T, Self)> for ParsingError {
155+
fn from(value: (T, Self)) -> Self {
156+
value.1
157+
}
158+
}

0 commit comments

Comments
 (0)