Skip to content

Commit c22de26

Browse files
committed
lexer: arena integration for slices
1 parent aa9e1ad commit c22de26

5 files changed

Lines changed: 75 additions & 22 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lexer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ logos = "0.16.1"
1010
memchr.workspace = true
1111
thiserror.workspace = true
1212
tracing.workspace = true
13+
bumpalo.workspace = true
1314

1415
[lints]
1516
workspace = true

lexer/src/lib.rs

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ mod tests;
88

99
use core::str;
1010
use std::{
11-
borrow::Cow,
1211
fmt::{Debug, Display},
1312
slice::SliceIndex,
1413
};
1514

15+
use bumpalo::{Bump, collections::Vec};
16+
use logos::Logos;
1617
use logos::Skip;
17-
pub use logos::{Logos, Span, SpannedIter};
18+
pub use logos::{Span, SpannedIter};
1819
use memchr::{memchr, memchr3};
1920
use thiserror::Error;
2021

@@ -27,7 +28,7 @@ pub type Result<T, E = LexingError> = std::result::Result<T, E>;
2728
#[logos(skip("[ \t]+"))]
2829
#[logos(skip(r"(\\\n)+"))]
2930
#[logos(skip("#", skip_line))]
30-
#[logos(extras = Context)]
31+
#[logos(extras = Extra)]
3132
#[logos(subpattern identifier = r"[a-zA-Z_][a-zA-Z0-9_]*")]
3233
#[logos(error(LexingError, callback = |lex| LexingError::from(lex)))]
3334
pub enum Token<'a> {
@@ -215,6 +216,9 @@ pub enum Token<'a> {
215216
Semicolon,
216217
}
217218

219+
#[derive(Debug, Default, PartialEq, Eq)]
220+
pub struct Extra(Context, *const Bump);
221+
218222
#[derive(Debug, Default, PartialEq, Eq)]
219223
pub enum Context {
220224
#[default]
@@ -243,6 +247,12 @@ pub struct Identifier<'a> {
243247
pub literal: &'a str,
244248
}
245249

250+
impl<'a> Token<'a> {
251+
pub fn lex(source: &'a [u8], arena: &'a Bump) -> logos::Lexer<'a, Self> {
252+
Lexer::with_extras(source, Extra(Context::AcceptExpression, arena))
253+
}
254+
}
255+
246256
impl From<&mut Lexer<'_>> for LexingError {
247257
fn from(lex: &mut Lexer<'_>) -> Self {
248258
Self::Unexpected(lex.span(), String::from_utf8_lossy(lex.slice()).to_string())
@@ -259,7 +269,7 @@ fn parse_string<'a>(lex: &mut logos::Lexer<'a, Token<'a>>) -> Result<Slice<'a>>
259269
}
260270

261271
fn parse_regex_or_slash<'a>(lex: &mut logos::Lexer<'a, Token<'a>>) -> Result<Token<'a>> {
262-
match lex.extras {
272+
match lex.extras.0 {
263273
Context::AcceptExpression => {
264274
accept_operator(lex);
265275
parse_content::<false, true, '/'>(lex).map(Token::Regex)
@@ -280,7 +290,7 @@ fn parse_content<'a, const MINIMAL: bool, const REGEX: bool, const DELIMITER: ch
280290
) -> Result<Slice<'a>> {
281291
let rest = lex.remainder();
282292
let mut start = 0;
283-
let mut out: Cow<'a, [u8]> = Cow::Borrowed(&[]);
293+
let mut out = Slice::Borrowed(&[]);
284294

285295
while let Some(rel_i) = memchr3(b'\n', b'\\', DELIMITER as u8, &rest[start..]) {
286296
let i = start + rel_i;
@@ -290,15 +300,18 @@ fn parse_content<'a, const MINIMAL: bool, const REGEX: bool, const DELIMITER: ch
290300
// push remaining segment
291301
lex.bump(i + 1);
292302
if start == 0 {
293-
out = Cow::Borrowed(&rest[..i]);
303+
out = Slice::Borrowed(&rest[..i]);
294304
} else {
295-
out.to_mut().extend_from_slice(&rest[start..i]);
305+
out.to_mut(lex.extras.arena())
306+
.extend_from_slice(&rest[start..i]);
296307
}
297-
return Ok(Slice(out));
308+
return Ok(out);
298309
}
299310
b'\\' => {
300-
out.to_mut().extend_from_slice(&rest[start..i]);
301-
let consumed = parse_escape::<MINIMAL, REGEX>(&rest[i..], out.to_mut())?;
311+
out.to_mut(lex.extras.arena())
312+
.extend_from_slice(&rest[start..i]);
313+
let consumed =
314+
parse_escape::<MINIMAL, REGEX>(&rest[i..], out.to_mut(lex.extras.arena()))?;
302315
start = i + consumed;
303316
}
304317
_ => break,
@@ -394,20 +407,22 @@ impl<'a> Identifier<'a> {
394407
}
395408

396409
fn accept_expression(lex: &mut Lexer<'_>) {
397-
lex.extras = Context::AcceptExpression;
410+
lex.extras.0 = Context::AcceptExpression;
398411
}
399412

400413
fn accept_operator(lex: &mut Lexer<'_>) {
401-
lex.extras = Context::AcceptOperator;
414+
lex.extras.0 = Context::AcceptOperator;
402415
}
403416

404-
#[repr(transparent)]
405417
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
406-
pub struct Slice<'a>(Cow<'a, [u8]>);
418+
pub enum Slice<'a> {
419+
Borrowed(&'a [u8]),
420+
Owned(Vec<'a, u8>),
421+
}
407422

408423
impl Display for Slice<'_> {
409424
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
410-
write!(f, "{}", String::from_utf8_lossy(&self.0).as_ref())
425+
write!(f, "{}", String::from_utf8_lossy(self.as_ref()).as_ref())
411426
}
412427
}
413428

@@ -419,6 +434,41 @@ impl Debug for Slice<'_> {
419434

420435
impl AsRef<[u8]> for Slice<'_> {
421436
fn as_ref(&self) -> &[u8] {
422-
&self.0
437+
match self {
438+
Self::Borrowed(x) => x,
439+
Self::Owned(x) => x,
440+
}
441+
}
442+
}
443+
444+
impl<'a> Slice<'a> {
445+
pub fn to_mut(&mut self, arena: &'a Bump) -> &mut Vec<'a, u8> {
446+
if let Self::Borrowed(x) = self {
447+
let mut vec = Vec::new_in(arena);
448+
vec.extend_from_slice_copy(x);
449+
*self = Self::Owned(vec);
450+
}
451+
let Self::Owned(x) = self else { unreachable!() };
452+
x
453+
}
454+
}
455+
456+
impl<'a> From<&'a [u8]> for Slice<'a> {
457+
fn from(value: &'a [u8]) -> Self {
458+
Self::Borrowed(value)
459+
}
460+
}
461+
462+
impl<'a> From<Vec<'a, u8>> for Slice<'a> {
463+
fn from(value: Vec<'a, u8>) -> Self {
464+
Self::Owned(value)
465+
}
466+
}
467+
468+
impl Extra {
469+
fn arena<'a>(&self) -> &'a Bump {
470+
// SAFETY: lives for as long as self because it's the same lifetime as
471+
// the source being lexed; Logos just can't take lifetimes on extras.
472+
unsafe { &*self.1 }
423473
}
424474
}

parser/src/lex.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
use std::{fmt::Debug, iter::Peekable};
77

8-
use lexer::{LexingError, Logos, Span, SpannedIter, Token};
8+
use bumpalo::Bump;
9+
use lexer::{LexingError, Span, SpannedIter, Token};
910

1011
use crate::{
1112
ParsingError,
@@ -21,9 +22,9 @@ pub struct Lexer<'a> {
2122
type LexItem<'a> = <Lexer<'a> as Iterator>::Item;
2223

2324
impl<'a> Lexer<'a> {
24-
pub fn new(source: &'a [u8]) -> Self {
25+
pub fn new(source: &'a [u8], arena: &'a Bump) -> Self {
2526
Self {
26-
inner: Token::lexer(source).spanned().peekable(),
27+
inner: Token::lex(source, arena).spanned().peekable(),
2728
span: Span::default(),
2829
// source,
2930
}

parser/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ impl<'a> Parser<'a> {
7070
pub fn parse(&mut self, name: &'a str, source: &'a [u8]) -> Result<&Ast<'a>, AriadneErr<'a>> {
7171
let source = self.arena.alloc_slice_copy(source);
7272
self.current_file = name;
73-
let mut lex = Lexer::new(source);
73+
let mut lex = Lexer::new(source, self.arena);
7474
let parsed = self.parse_top(&mut lex, true);
7575
parsed.map_err(|error| report_error(error, name, source))
7676
}
@@ -124,14 +124,14 @@ impl<'a> Parser<'a> {
124124
Token::IncludeDirective(path) => {
125125
let old_namespace = self.namespace;
126126
let content = self.preprocessor.include_in(path.as_ref(), self.arena);
127-
self.parse_top(&mut Lexer::new(content), true)?;
127+
self.parse_top(&mut Lexer::new(content, self.arena), true)?;
128128
lex.expect_with(Token::is_stmnt_end, "expected statement end.".into())?;
129129
self.namespace = old_namespace;
130130
}
131131
Token::NsIncludeDirective(path) => {
132132
let old_namespace = self.namespace;
133133
let content = self.preprocessor.include_in(path.as_ref(), self.arena);
134-
self.parse_top(&mut Lexer::new(content), false)?;
134+
self.parse_top(&mut Lexer::new(content, self.arena), false)?;
135135
lex.expect_with(Token::is_stmnt_end, "expected statement end.".into())?;
136136
self.namespace = old_namespace;
137137
}

0 commit comments

Comments
 (0)