Skip to content

Commit 9d5b76a

Browse files
committed
lexer: add new tests
1 parent 65ada71 commit 9d5b76a

1 file changed

Lines changed: 218 additions & 1 deletion

File tree

lexer/src/tests.rs

Lines changed: 218 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,218 @@
1-
// Placeholder: real tests to be added.
1+
// This file is part of the uutils awk package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// files that was distributed with this source code.
5+
6+
use std::io::Write;
7+
8+
use crate::{Context, Extra, Identifier, Lexer, Token};
9+
use bumpalo::{
10+
Bump,
11+
collections::{CollectIn, Vec},
12+
};
13+
14+
fn lex<'a>(
15+
src: &'a [u8],
16+
arena: &'a Bump,
17+
posix_strict: bool,
18+
gnu_strict: bool,
19+
) -> Vec<'a, Token<'a>> {
20+
Lexer::with_extras(
21+
src,
22+
Extra {
23+
ctx: Context::AcceptExpression,
24+
arena,
25+
posix_strict,
26+
gnu_strict,
27+
},
28+
)
29+
.collect_in::<Result<Vec<_>, _>>(arena)
30+
.unwrap()
31+
}
32+
33+
#[test]
34+
fn lexer_test_newlines_non_posix() {
35+
let mixed = " \t \n \t\n\n \\\n \t";
36+
let arena = Bump::new();
37+
let mut str = Vec::new_in(&arena);
38+
for tok in ["BEGIN", "{", "else", "do", "&&", "||", "?", ":", ","] {
39+
write!(str, "{tok}{mixed}").unwrap();
40+
}
41+
str.push(b'}');
42+
assert_eq!(
43+
&lex(&str, &arena, false, false),
44+
&[
45+
Token::BeginPattern,
46+
Token::Newline,
47+
Token::Newline,
48+
Token::OpenBrace,
49+
Token::Else,
50+
Token::Do,
51+
Token::BooleanAnd,
52+
Token::BooleanOr,
53+
Token::QuestionMark,
54+
Token::Colon,
55+
Token::Comma,
56+
Token::ClosedBrace
57+
]
58+
);
59+
}
60+
61+
#[test]
62+
#[should_panic]
63+
fn lexer_test_newlines_posix() {
64+
let mixed = " \t \n \t\n\n \\\n \t";
65+
let arena = Bump::new();
66+
let mut str = Vec::new_in(&arena);
67+
for tok in ["BEGIN", "{", "else", "do", "&&", "||", "?", ":", ","] {
68+
write!(str, "{tok}{mixed}").unwrap();
69+
}
70+
str.push(b'}');
71+
lex(&str, &arena, true, false);
72+
}
73+
74+
#[test]
75+
fn lexer_test_collapsible_delimiters() {
76+
let arena = Bump::new();
77+
let str = b";\\\n;;;\n\n\n\n;;;\n\\\n\n";
78+
assert_eq!(
79+
&lex(str, &arena, false, false),
80+
&[
81+
Token::Semicolon,
82+
Token::Semicolon,
83+
Token::Newline,
84+
Token::Semicolon,
85+
Token::Newline,
86+
Token::Newline,
87+
]
88+
);
89+
}
90+
91+
#[test]
92+
fn lexer_test_multiline() {
93+
let arena = Bump::new();
94+
let str = b"\"aaaa\\\nbbbb\", /ccc\\\nd/";
95+
assert_eq!(
96+
&lex(str, &arena, false, false),
97+
&[
98+
Token::String(b"aaaabbbb".into()),
99+
Token::Comma,
100+
Token::Regex(b"cccd".into())
101+
]
102+
);
103+
}
104+
105+
#[test]
106+
#[should_panic]
107+
fn lexer_test_uu_extensions() {
108+
let arena = Bump::new();
109+
lex(b"@concurrent", &arena, false, true);
110+
}
111+
112+
#[test]
113+
fn lexer_test_gnu_pattern() {
114+
let arena = Bump::new();
115+
assert_eq!(
116+
&lex(b"BEGINFILE ENDFILE", &arena, true, false),
117+
&[
118+
Token::Identifier(Identifier {
119+
namespace: None,
120+
literal: "BEGINFILE"
121+
}),
122+
Token::Identifier(Identifier {
123+
namespace: None,
124+
literal: "ENDFILE"
125+
})
126+
]
127+
);
128+
}
129+
130+
#[test]
131+
fn lexer_test_floats() {
132+
let arena = Bump::new();
133+
let str = b"1 20. 0. .3 2e4 -3.e2 5e+1 2.1e-3";
134+
assert_eq!(
135+
&lex(str, &arena, false, false),
136+
&[
137+
Token::Number(1.),
138+
Token::Number(20.),
139+
Token::Number(0.),
140+
Token::Number(0.3),
141+
Token::Number(2e4),
142+
Token::Number(-3e2),
143+
Token::Number(5e1),
144+
Token::Number(2.1e-3)
145+
]
146+
);
147+
}
148+
149+
#[test]
150+
fn lexer_test_directive_escaping() {
151+
let arena = Bump::new();
152+
let str = br#" @include "aa\"a\ta" @nsinclude "b\"\nb" "#;
153+
assert_eq!(
154+
&lex(str, &arena, false, false),
155+
&[
156+
Token::IncludeDirective(b"aa\"a\ta".into()),
157+
Token::NsIncludeDirective(b"b\"\nb".into())
158+
]
159+
);
160+
}
161+
162+
#[test]
163+
#[should_panic]
164+
fn lexer_test_ident_rules_non_posix() {
165+
let arena = Bump::new();
166+
lex(b"@namespace \"1a\"; a::1a", &arena, false, false);
167+
}
168+
169+
#[test]
170+
#[should_panic]
171+
fn lexer_test_ident_rules_posix() {
172+
let arena = Bump::new();
173+
lex(b"@namespace \"foo\"; foo::a", &arena, true, false);
174+
}
175+
176+
#[test]
177+
fn lexer_test_general_tokens() {
178+
let arena = Bump::new();
179+
let str = br#"
180+
@load "lib1.so.1"
181+
BEGIN { print a + 1 }
182+
/2\..*/;
183+
END { $1 == foo::bar }
184+
"#;
185+
assert_eq!(
186+
&lex(str, &arena, false, false),
187+
&[
188+
Token::Newline,
189+
Token::LoadDirective(b"lib1.so.1".into()),
190+
Token::Newline,
191+
Token::BeginPattern,
192+
Token::OpenBrace,
193+
Token::Print,
194+
Token::Identifier(Identifier {
195+
namespace: None,
196+
literal: "a"
197+
}),
198+
Token::Plus,
199+
Token::Number(1.),
200+
Token::ClosedBrace,
201+
Token::Newline,
202+
Token::Regex(b"2\\..*".into()),
203+
Token::Semicolon,
204+
Token::Newline,
205+
Token::EndPattern,
206+
Token::OpenBrace,
207+
Token::Record,
208+
Token::Number(1.),
209+
Token::EqualTo,
210+
Token::Identifier(Identifier {
211+
namespace: Some("foo"),
212+
literal: "bar"
213+
}),
214+
Token::ClosedBrace,
215+
Token::Newline
216+
]
217+
);
218+
}

0 commit comments

Comments
 (0)